diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index d7cb3969..367a83c9 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -1,119 +1,195 @@
require 'rubygems'
-require 'nokogiri'
require 'open-uri'
require 'fileutils'
-require 'CGI'
-require 'iconv'
+require 'nokogiri'
require 'date'
+require 'json'
+require 'uri'
+require 'jekyll'
module Jekyll
module Tumblr
- def self.process(url, grab_images = false)
- current_page = 0
-
- while true
- f = open(url + "/api/read?num=50&start=#{current_page * 50}")
- doc = Nokogiri::HTML(Iconv.conv("utf-8", f.charset, f.readlines.join("\n")))
-
- puts "Page: #{current_page + 1} - Posts: #{(doc/:tumblr/:posts/:post).size}"
-
- FileUtils.mkdir_p "_posts/tumblr"
-
- (doc/:tumblr/:posts/:post).each do |post|
- title = ""
- content = nil
- name = nil
-
- if post['type'] == "regular"
- title_element = post.at("regular-title")
- title = title_element.inner_text unless title_element == nil
- content = CGI::unescapeHTML post.at("regular-body").inner_html unless post.at("regular-body") == nil
- elsif post['type'] == "link"
- title = post.at("link-text").inner_html unless post.at("link-text") == nil
-
- if post.at("link-text") != nil
- content = "#{post.at("link-text").inner_html}"
- else
- content = "#{post.at("link-url").inner_html}"
- end
-
- content << "
" + CGI::unescapeHTML(post.at("link-description").inner_html) unless post.at("link-description") == nil
- elsif post['type'] == "photo"
- content = ""
-
- if post.at("photo-link-url") != nil
- content = ""
- else
- content = "
"
- end
-
- if post.at("photo-caption") != nil
- content << "
" unless content == nil
- content << CGI::unescapeHTML(post.at("photo-caption").inner_html)
- end
- elsif post['type'] == "audio"
- content = CGI::unescapeHTML(post.at("audio-player").inner_html)
- content << CGI::unescapeHTML(post.at("audio-caption").inner_html) unless post.at("audio-caption") == nil
- elsif post['type'] == "quote"
- content = "
" + CGI::unescapeHTML(post.at("quote-text").inner_html) + "" - content << "—" + CGI::unescapeHTML(post.at("quote-source").inner_html) unless post.at("quote-source") == nil - elsif post['type'] == "conversation" - title = post.at("conversation-title").inner_html unless post.at("conversation-title") == nil - content = "" - elsif post['type'] == "video" - title = post.at("video-title").inner_html unless post.at("video-title") == nil - content = CGI::unescapeHTML(post.at("video-player").inner_html) - content << CGI::unescapeHTML(post.at("video-caption").inner_html) unless post.at("video-caption") == nil - end # End post types - - name = "#{Date.parse(post['date']).to_s}-#{post['id'].downcase.gsub(/[^a-z0-9]/, '-')}.html" - - if title != nil || content != nil && name != nil - File.open("_posts/tumblr/#{name}", "w") do |f| - - f.puts <<-HEADER ---- -layout: post -title: #{title} ---- - -HEADER - - f.puts content - end # End file - end - - end # End post XML - - if (doc/:tumblr/:posts/:post).size < 50 - break - else - current_page = current_page + 1 + def self.process(url, format = "html", grab_images = false, + add_highlights = false, rewrite_urls = true) + @grab_images = grab_images + FileUtils.mkdir_p "_posts/tumblr" + url += "/api/read/json/" + per_page = 50 + posts = [] + # Two passes are required so that we can rewrite URLs. + # First pass builds up an array of each post as a hash. + begin + current_page = (current_page || -1) + 1 + feed = open(url + "?num=#{per_page}&start=#{current_page * per_page}") + json = feed.readlines.join("\n")[21...-2] # Strip Tumblr's JSONP chars. + blog = JSON.parse(json) + puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}" + posts += blog["posts"].map { |post| post_to_hash(post, format) } + end until blog["posts"].size < per_page + # Rewrite URLs and create redirects. + posts = rewrite_urls_and_redirects posts if rewrite_urls + # Second pass for writing post files. + posts.each do |post| + if format == "md" + post[:content] = html_to_markdown post[:content] + post[:content] = add_syntax_highlights post[:content] if add_highlights end - - end # End while loop - end # End method + File.open("_posts/tumblr/#{post[:name]}", "w") do |f| + f.puts post[:header].to_yaml + "---\n" + post[:content] + end + end + end private - def self.save_file(url, grab_image = false) - unless grab_image == false - FileUtils.mkdir_p "tumblr_files" - - File.open("tumblr_files/#{url.split('/').last}", "w") do |f| - f.write(open(url).read) - end - - return "/tumblr_files/#{url.split('/').last}" - else - return url + # Converts each type of Tumblr post to a hash with all required + # data for Jekyll. + def self.post_to_hash(post, format) + case post['type'] + when "regular" + title = post["regular-title"] + content = post["regular-body"] + when "link" + title = post["link-text"] || post["link-url"] + content = "#{title}" + unless post["link-description"].nil? + content << "
#{post["quote-text"]}" + unless post["quote-source"].nil? + content << "—" + post["quote-source"] + end + when "conversation" + title = post["conversation-title"] + content = "" + when "video" + title = post["video-title"] + content = post["video-player"] + unless post["video-caption"].nil? + content << "