From ab85c82356ad23d0c9c4b1afc8571ffb4178c40f Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Thu, 24 Nov 2011 06:11:57 +1100 Subject: [PATCH] Use Tumblr's JSON format instead of XML, to correctly preserve white-space when converting to markdown. --- lib/jekyll/migrators/tumblr.rb | 130 +++++++++++++-------------------- 1 file changed, 49 insertions(+), 81 deletions(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 332c5fb0..3fd15642 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -1,10 +1,8 @@ require 'rubygems' -require 'nokogiri' require 'open-uri' require 'fileutils' -require 'cgi' -require 'iconv' require 'date' +require 'json' module Jekyll module Tumblr @@ -12,97 +10,67 @@ module Jekyll current_page = 0 while true - f = open(url + "/api/read?num=50&start=#{current_page * 50}") - doc = Nokogiri::HTML(Iconv.conv("utf-8", f.charset, f.readlines.join("\n"))) - - puts "Page: #{current_page + 1} - Posts: #{(doc/:tumblr/:posts/:post).size}" + f = open(url + "/api/read/json/?num=50&start=#{current_page * 50}") + # [21...-2] strips Tumblr's Javascript/JSONP start/end chars + json = f.readlines.join("\n")[21...-2] + blog = JSON.parse(json) + puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}" FileUtils.mkdir_p "_posts/tumblr" - (doc/:tumblr/:posts/:post).each do |post| - title = "" - content = nil - name = nil + blog["posts"].each do |post| - if post['type'] == "regular" - title_element = post.at("regular-title") - title = title_element.inner_text unless title_element == nil - content = CGI::unescapeHTML post.at("regular-body").inner_html unless post.at("regular-body") == nil - elsif post['type'] == "link" - title = post.at("link-text").inner_html unless post.at("link-text") == nil - - if post.at("link-text") != nil - content = "#{post.at("link-text").inner_html}" - else - content = "#{post.at("link-url").inner_html}" - end - - content << "
" + CGI::unescapeHTML(post.at("link-description").inner_html) unless post.at("link-description") == nil - elsif post['type'] == "photo" - content = "" - - if post.at("photo-link-url") != nil - content = "" - else - content = "" - end - - if post.at("photo-caption") != nil - content << "
" unless content == nil - content << CGI::unescapeHTML(post.at("photo-caption").inner_html) - end - elsif post['type'] == "audio" - content = CGI::unescapeHTML(post.at("audio-player").inner_html) - content << CGI::unescapeHTML(post.at("audio-caption").inner_html) unless post.at("audio-caption") == nil - elsif post['type'] == "quote" - content = "
" + CGI::unescapeHTML(post.at("quote-text").inner_html) + "
" - content << "—" + CGI::unescapeHTML(post.at("quote-source").inner_html) unless post.at("quote-source") == nil - elsif post['type'] == "conversation" - title = post.at("conversation-title").inner_html unless post.at("conversation-title") == nil - content = "
" - - (post/:conversation/:line).each do |line| - content << "
" + line['label'] + "
" + line.inner_html + "
" unless line['label'] == nil || line == nil - end - - content << "
" - elsif post['type'] == "video" - title = post.at("video-title").inner_html unless post.at("video-title") == nil - content = CGI::unescapeHTML(post.at("video-player").inner_html) - content << CGI::unescapeHTML(post.at("video-caption").inner_html) unless post.at("video-caption") == nil + case post['type'] + when "regular" + title = post["regular-title"] + content = post["regular-body"] + when "link" + title = post["link-text"] || post["link-url"] + content = "#{title}" + content << "
" + post["link-description"] unless post["link-description"].nil? + when "photo" + title = post["photo-caption"] + content = "" + content = "#{content}" unless post["photo-link-url"].nil? + when "audio" + if !post["id3-title"].nil? + title = post["id3-title"] + content = post.at["audio-player"] + "
" + post["audio-caption"] + else + title = post["audio-caption"] + content = post.at["audio-player"] + end + when "quote" + title = post["quote-text"] + content = "
#{post["quote-text"]}
" + content << "—" + post["quote-source"] unless post["quote-source"].nil? + when "conversation" + title = post["conversation-title"] + content = "
" + post["conversation"]["line"].each do |line| + content << "
#{line['label']}
#{line}
" + end + content << "
" + when "video" + title = post["video-title"] + content = post["video-player"] + content << "
" + post["video-caption"] unless post["video-caption"].nil? end # End post types name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}" - if title != nil || content != nil && name != nil - if format == "md" - content = %x[echo '#{content.gsub("'", "''")}' | html2text] - # html2text leaves extra blank lines in code blocks - clean them up. - begin - content.gsub!("\n \n", "\n") - end until !content.include? "\n \n" - end - File.open("_posts/tumblr/#{name}", "w") do |f| - - f.puts <<-HEADER ---- -layout: post -title: "#{title.gsub('"', '\"')}" ---- - -HEADER - - f.puts content - end # End file - end + File.open("_posts/tumblr/#{name}", "w") do |f| + content = %x[echo '#{content.gsub("'", "''")}' | html2text] if format == "md" + header = {"layout" => "post", "title" => title, "tags" => post["tags"]} + f.puts header.to_yaml + "---\n" + content + end # End file end # End post XML - if (doc/:tumblr/:posts/:post).size < 50 + if blog["posts"].size < 50 break - else - current_page = current_page + 1 end + current_page += 1 end # End while loop end # End method