From 06f5b7d2e341971f1484af41c0ea64765c7b7669 Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Sun, 20 Nov 2011 22:07:56 +1100
Subject: [PATCH 01/14] Fixed cgi module name.

---
 lib/jekyll/migrators/tumblr.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index d7cb3969..602f4169 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -2,7 +2,7 @@ require 'rubygems'
 require 'nokogiri'
 require 'open-uri'
 require 'fileutils'
-require 'CGI'
+require 'cgi'
 require 'iconv'
 require 'date'
 

From 6826317e00fac0adbbe80ee341fea3eaaaa3e2e0 Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Sun, 20 Nov 2011 22:09:43 +1100
Subject: [PATCH 02/14] Use the post's title for the filename rather than its
 ID, as per Jekyll's naming convention.

---
 lib/jekyll/migrators/tumblr.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index 602f4169..97105eb4 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -72,7 +72,7 @@ module Jekyll
             content << CGI::unescapeHTML(post.at("video-caption").inner_html) unless post.at("video-caption") == nil
           end # End post types
 
-          name = "#{Date.parse(post['date']).to_s}-#{post['id'].downcase.gsub(/[^a-z0-9]/, '-')}.html"
+          name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}"
 
           if title != nil || content != nil && name != nil
             File.open("_posts/tumblr/#{name}", "w") do |f|

From 87316894cc6179b36f91cfa97387ea967f793ff2 Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Sun, 20 Nov 2011 22:13:01 +1100
Subject: [PATCH 03/14] Quote the post's title so reserved yaml chars don't
 blow up.

---
 lib/jekyll/migrators/tumblr.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index 97105eb4..24f5ffe0 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -80,7 +80,7 @@ module Jekyll
               f.puts <<-HEADER
 ---
 layout: post
-title: #{title}
+title: "#{title.gsub('"', '\"')}"
 ---
 
 HEADER

From b762a1d5c5b10b307df9a7c879f5f1540d21cf96 Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Sun, 20 Nov 2011 22:15:36 +1100
Subject: [PATCH 04/14] Add a format option for converting posts to markdown
 via Python's html2text.

---
 lib/jekyll/migrators/tumblr.rb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index 24f5ffe0..1cbef57f 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -8,7 +8,7 @@ require 'date'
 
 module Jekyll
   module Tumblr
-    def self.process(url, grab_images = false)
+    def self.process(url, grab_images = false, format = "html")
       current_page = 0
 
       while true
@@ -75,6 +75,7 @@ module Jekyll
           name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}"
 
           if title != nil || content != nil && name != nil
+            content = %x[echo '#{content.gsub("'", "''")}' | html2text] if format == "md"
             File.open("_posts/tumblr/#{name}", "w") do |f|
 
               f.puts <<-HEADER

From 1b3abb61d8b7bab0932d0d8cc13a11c69433cebf Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Mon, 21 Nov 2011 05:49:49 +1100
Subject: [PATCH 05/14] Clean up extraneous newlines left by html2text.

---
 lib/jekyll/migrators/tumblr.rb | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index 1cbef57f..332c5fb0 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -75,7 +75,13 @@ module Jekyll
           name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}"
 
           if title != nil || content != nil && name != nil
-            content = %x[echo '#{content.gsub("'", "''")}' | html2text] if format == "md"
+            if format == "md"
+              content = %x[echo '#{content.gsub("'", "''")}' | html2text]
+              # html2text leaves extra blank lines in code blocks - clean them up.
+              begin
+                content.gsub!("\n    \n", "\n")
+              end until !content.include? "\n    \n"
+            end
             File.open("_posts/tumblr/#{name}", "w") do |f|
 
               f.puts <<-HEADER

From ab85c82356ad23d0c9c4b1afc8571ffb4178c40f Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Thu, 24 Nov 2011 06:11:57 +1100
Subject: [PATCH 06/14] Use Tumblr's JSON format instead of XML, to correctly
 preserve white-space when converting to markdown.

---
 lib/jekyll/migrators/tumblr.rb | 130 +++++++++++++--------------------
 1 file changed, 49 insertions(+), 81 deletions(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index 332c5fb0..3fd15642 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -1,10 +1,8 @@
 require 'rubygems'
-require 'nokogiri'
 require 'open-uri'
 require 'fileutils'
-require 'cgi'
-require 'iconv'
 require 'date'
+require 'json'
 
 module Jekyll
   module Tumblr
@@ -12,97 +10,67 @@ module Jekyll
       current_page = 0
 
       while true
-        f = open(url + "/api/read?num=50&start=#{current_page * 50}")
-        doc = Nokogiri::HTML(Iconv.conv("utf-8", f.charset, f.readlines.join("\n")))
-
-        puts "Page: #{current_page + 1} - Posts: #{(doc/:tumblr/:posts/:post).size}"
 
+        f = open(url + "/api/read/json/?num=50&start=#{current_page * 50}")
+        # [21...-2] strips Tumblr's Javascript/JSONP start/end chars
+        json = f.readlines.join("\n")[21...-2]
+        blog = JSON.parse(json)
+        puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
         FileUtils.mkdir_p "_posts/tumblr"
 
-        (doc/:tumblr/:posts/:post).each do |post|
-          title = ""
-          content = nil
-          name = nil
+        blog["posts"].each do |post|
 
-          if post['type'] == "regular"
-            title_element = post.at("regular-title")
-            title = title_element.inner_text unless title_element == nil
-            content = CGI::unescapeHTML post.at("regular-body").inner_html unless post.at("regular-body") == nil
-          elsif post['type'] == "link"
-            title = post.at("link-text").inner_html unless post.at("link-text") == nil
-
-            if post.at("link-text") != nil
-              content = "<a href=\"#{post.at("link-url").inner_html}\">#{post.at("link-text").inner_html}</a>"
-            else
-              content = "<a href=\"#{post.at("link-url").inner_html}\">#{post.at("link-url").inner_html}</a>"
-            end
-
-            content << "<br/>" + CGI::unescapeHTML(post.at("link-description").inner_html) unless post.at("link-description") == nil
-          elsif post['type'] == "photo"
-            content = ""
-
-            if post.at("photo-link-url") != nil
-              content = "<a href=\"#{post.at("photo-link-url").inner_html}\"><img src=\"#{save_file((post/"photo-url")[1].inner_html, grab_images)}\"/></a>"
-            else
-              content = "<img src=\"#{save_file((post/"photo-url")[1].inner_html, grab_images)}\"/>"
-            end
-
-            if post.at("photo-caption") != nil
-              content << "<br/>" unless content == nil
-              content << CGI::unescapeHTML(post.at("photo-caption").inner_html)
-            end
-          elsif post['type'] == "audio"
-            content = CGI::unescapeHTML(post.at("audio-player").inner_html)
-            content << CGI::unescapeHTML(post.at("audio-caption").inner_html) unless post.at("audio-caption") == nil
-          elsif post['type'] == "quote"
-            content = "<blockquote>" + CGI::unescapeHTML(post.at("quote-text").inner_html) + "</blockquote>"
-            content << "&#8212;" + CGI::unescapeHTML(post.at("quote-source").inner_html) unless post.at("quote-source") == nil
-          elsif post['type'] == "conversation"
-            title = post.at("conversation-title").inner_html unless post.at("conversation-title") == nil
-            content = "<section><dialog>"
-
-            (post/:conversation/:line).each do |line|
-              content << "<dt>" + line['label'] + "</dt><dd>" + line.inner_html + "</dd>" unless line['label'] == nil || line == nil
-            end
-
-            content << "</section></dialog>"
-          elsif post['type'] == "video"
-            title = post.at("video-title").inner_html unless post.at("video-title") == nil
-            content = CGI::unescapeHTML(post.at("video-player").inner_html)
-            content << CGI::unescapeHTML(post.at("video-caption").inner_html) unless post.at("video-caption") == nil
+          case post['type']
+            when "regular"
+              title = post["regular-title"]
+              content = post["regular-body"]
+            when "link"
+              title = post["link-text"] || post["link-url"]
+              content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
+              content << "<br/>" + post["link-description"] unless post["link-description"].nil?
+            when "photo"
+              title = post["photo-caption"]
+              content = "<img src=\"#{save_file(post["photo-url"], grab_images)}\"/>"
+              content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>" unless post["photo-link-url"].nil?
+            when "audio"
+              if !post["id3-title"].nil?
+                title = post["id3-title"]
+                content = post.at["audio-player"] + "<br/>" + post["audio-caption"]
+              else
+                title = post["audio-caption"]
+                content = post.at["audio-player"]
+              end
+            when "quote"
+              title = post["quote-text"]
+              content = "<blockquote>#{post["quote-text"]}</blockquote>"
+              content << "&#8212;" + post["quote-source"] unless post["quote-source"].nil?
+            when "conversation"
+              title = post["conversation-title"]
+              content = "<section><dialog>"
+              post["conversation"]["line"].each do |line|
+                content << "<dt>#{line['label']}</dt><dd>#{line}</dd>"
+              end
+              content << "</section></dialog>"
+            when "video"
+              title = post["video-title"]
+              content = post["video-player"]
+              content << "<br/>" + post["video-caption"] unless post["video-caption"].nil?
           end # End post types
 
           name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}"
 
-          if title != nil || content != nil && name != nil
-            if format == "md"
-              content = %x[echo '#{content.gsub("'", "''")}' | html2text]
-              # html2text leaves extra blank lines in code blocks - clean them up.
-              begin
-                content.gsub!("\n    \n", "\n")
-              end until !content.include? "\n    \n"
-            end
-            File.open("_posts/tumblr/#{name}", "w") do |f|
-
-              f.puts <<-HEADER
----
-layout: post
-title: "#{title.gsub('"', '\"')}"
----
-
-HEADER
-
-              f.puts content
-            end # End file
-          end
+          File.open("_posts/tumblr/#{name}", "w") do |f|
+            content = %x[echo '#{content.gsub("'", "''")}' | html2text] if format == "md"
+            header = {"layout" => "post", "title" => title, "tags" => post["tags"]}
+            f.puts header.to_yaml + "---\n" + content
+          end # End file
 
         end # End post XML
 
-        if (doc/:tumblr/:posts/:post).size < 50
+        if blog["posts"].size < 50
           break
-        else
-          current_page = current_page + 1
         end
+        current_page += 1
 
       end # End while loop
     end # End method

From bc20ba9be961ece9d5e49283366956d23c71bdd1 Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Fri, 25 Nov 2011 06:10:47 +1100
Subject: [PATCH 07/14] Preserve HTML tables as per markdown's support for
 tables.

---
 lib/jekyll/migrators/tumblr.rb | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index 3fd15642..10f41601 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -60,7 +60,12 @@ module Jekyll
           name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}"
 
           File.open("_posts/tumblr/#{name}", "w") do |f|
-            content = %x[echo '#{content.gsub("'", "''")}' | html2text] if format == "md"
+            if format == "md"
+                preserve = ["table", "tr", "th", "td"]
+                preserve.each { |tag| content = content.gsub(/<#{tag}/i, "$$" + tag).gsub(/<\/#{tag}/i, "||" + tag) }
+                content = %x[echo '#{content.gsub("'", "''")}' | html2text]
+                preserve.each { |tag| content = content.gsub("$$" + tag, "<" + tag).gsub("||" + tag, "</" + tag) }
+            end
             header = {"layout" => "post", "title" => title, "tags" => post["tags"]}
             f.puts header.to_yaml + "---\n" + content
           end # End file

From 0f51c81cfab6fcc3fbf6cd2720ad6da216a113d1 Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Sun, 4 Dec 2011 12:01:37 +1100
Subject: [PATCH 08/14] Added support for rewriting Tumblr URLs to Jekyll URLs
 in posts, meta redirects for Github pages, and automatic addition of Pygments
 highlight tags.

---
 lib/jekyll/migrators/tumblr.rb | 230 ++++++++++++++++++++++-----------
 1 file changed, 158 insertions(+), 72 deletions(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index 10f41601..0f9c227c 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -3,93 +3,179 @@ require 'open-uri'
 require 'fileutils'
 require 'date'
 require 'json'
+require 'uri'
+require 'jekyll'
 
 module Jekyll
   module Tumblr
-    def self.process(url, grab_images = false, format = "html")
-      current_page = 0
-
-      while true
-
-        f = open(url + "/api/read/json/?num=50&start=#{current_page * 50}")
-        # [21...-2] strips Tumblr's Javascript/JSONP start/end chars
-        json = f.readlines.join("\n")[21...-2]
+    def self.process(url, format = "html", grab_images = false,
+                     add_highlights = false, rewrite_urls = true)
+      FileUtils.mkdir_p "_posts/tumblr"
+      url += "/api/read/json/"
+      per_page = 50
+      posts = []
+      # Two passes are required so that we can rewrite URLs.
+      # First pass builds up an array of each post as a hash.
+      begin
+        current_page = (current_page || -1) + 1
+        feed = open(url + "?num=#{per_page}&start=#{current_page * per_page}")
+        json = feed.readlines.join("\n")[21...-2]  # Strip Tumblr's JSONP chars.
         blog = JSON.parse(json)
         puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
-        FileUtils.mkdir_p "_posts/tumblr"
-
-        blog["posts"].each do |post|
-
-          case post['type']
-            when "regular"
-              title = post["regular-title"]
-              content = post["regular-body"]
-            when "link"
-              title = post["link-text"] || post["link-url"]
-              content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
-              content << "<br/>" + post["link-description"] unless post["link-description"].nil?
-            when "photo"
-              title = post["photo-caption"]
-              content = "<img src=\"#{save_file(post["photo-url"], grab_images)}\"/>"
-              content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>" unless post["photo-link-url"].nil?
-            when "audio"
-              if !post["id3-title"].nil?
-                title = post["id3-title"]
-                content = post.at["audio-player"] + "<br/>" + post["audio-caption"]
-              else
-                title = post["audio-caption"]
-                content = post.at["audio-player"]
-              end
-            when "quote"
-              title = post["quote-text"]
-              content = "<blockquote>#{post["quote-text"]}</blockquote>"
-              content << "&#8212;" + post["quote-source"] unless post["quote-source"].nil?
-            when "conversation"
-              title = post["conversation-title"]
-              content = "<section><dialog>"
-              post["conversation"]["line"].each do |line|
-                content << "<dt>#{line['label']}</dt><dd>#{line}</dd>"
-              end
-              content << "</section></dialog>"
-            when "video"
-              title = post["video-title"]
-              content = post["video-player"]
-              content << "<br/>" + post["video-caption"] unless post["video-caption"].nil?
-          end # End post types
-
-          name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}"
-
-          File.open("_posts/tumblr/#{name}", "w") do |f|
-            if format == "md"
-                preserve = ["table", "tr", "th", "td"]
-                preserve.each { |tag| content = content.gsub(/<#{tag}/i, "$$" + tag).gsub(/<\/#{tag}/i, "||" + tag) }
-                content = %x[echo '#{content.gsub("'", "''")}' | html2text]
-                preserve.each { |tag| content = content.gsub("$$" + tag, "<" + tag).gsub("||" + tag, "</" + tag) }
-            end
-            header = {"layout" => "post", "title" => title, "tags" => post["tags"]}
-            f.puts header.to_yaml + "---\n" + content
-          end # End file
-
-        end # End post XML
-
-        if blog["posts"].size < 50
-          break
+        posts += blog["posts"].map { |post| post_to_hash(post, format) }
+      end until blog["posts"].size < per_page
+      # Rewrite URLs and create redirects.
+      posts = rewrite_urls_and_redirects posts if rewrite_urls
+      # Second pass for writing post files.
+      posts.each do |post|
+        if format == "md"
+          post[:content] = html_to_markdown post[:content]
+          post[:content] = add_syntax_highlights post[:content] if add_highlights
         end
-        current_page += 1
-
-      end # End while loop
-    end # End method
+        File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
+          f.puts post[:header].to_yaml + "---\n" + post[:content]
+        end
+      end
+    end
 
     private
 
+    # Converts each type of Tumblr post to a hash with all required
+    # data for Jekyll.
+    def self.post_to_hash(post, format)
+      case post['type']
+        when "regular"
+          title = post["regular-title"]
+          content = post["regular-body"]
+        when "link"
+          title = post["link-text"] || post["link-url"]
+          content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
+          unless post["link-description"].nil?
+            content << "<br/>" + post["link-description"]
+          end
+        when "photo"
+          title = post["photo-caption"]
+          content = "<img src=\"#{save_file(post["photo-url"], grab_images)}\"/>"
+          unless post["photo-link-url"].nil?
+            content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
+          end
+        when "audio"
+          if !post["id3-title"].nil?
+            title = post["id3-title"]
+            content = post.at["audio-player"] + "<br/>" + post["audio-caption"]
+          else
+            title = post["audio-caption"]
+            content = post.at["audio-player"]
+          end
+        when "quote"
+          title = post["quote-text"]
+          content = "<blockquote>#{post["quote-text"]}</blockquote>"
+          unless post["quote-source"].nil?
+            content << "&#8212;" + post["quote-source"]
+          end
+        when "conversation"
+          title = post["conversation-title"]
+          content = "<section><dialog>"
+          post["conversation"]["line"].each do |line|
+            content << "<dt>#{line['label']}</dt><dd>#{line}</dd>"
+          end
+          content << "</section></dialog>"
+        when "video"
+          title = post["video-title"]
+          content = post["video-player"]
+          unless post["video-caption"].nil?
+            content << "<br/>" + post["video-caption"]
+          end
+      end
+      date = Date.parse(post['date']).to_s
+      slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
+      {
+        :name => "#{date}-#{slug}.#{format}",
+        :header => {
+          "layout" => "post",
+          "title" => title,
+          "tags" => post["tags"],
+        },
+        :content => content,
+        :url => post["url"],
+        :slug => post["url-with-slug"],
+      }
+    end
+
+    # Create a Hash of old urls => new urls, for rewriting and
+    # redirects, and replace urls in each post. Instantiate Jekyll
+    # site/posts to get the correct permalink format.
+    def self.rewrite_urls_and_redirects(posts)
+      site = Jekyll::Site.new(Jekyll.configuration({}))
+      dir = File.join(File.dirname(__FILE__), "..")
+      urls = Hash[posts.map { |post|
+        tumblr_url = URI.parse(post[:slug]).path
+        jekyll_url = Jekyll::Post.new(site, dir, "", "tumblr/" + post[:name]).url
+        redirect_dir = tumblr_url.sub(/\//, "") + "/"
+        FileUtils.mkdir_p redirect_dir
+        File.open(redirect_dir + "index.html", "w") do |f|
+          f.puts "<html><head><meta http-equiv='Refresh' content='0; " +
+                 "url=#{jekyll_url}'></head><body></body></html>"
+        end
+        [tumblr_url, jekyll_url]
+      }]
+      posts.map { |post|
+        urls.each do |tumblr_url, jekyll_url|
+          post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
+        end
+        post
+      }
+    end
+
+    # Uses Python's html2text to convert a post's content to
+    # markdown. Preserve HTML tables as per the markdown docs.
+    def self.html_to_markdown(content)
+      preserve = ["table", "tr", "th", "td"]
+      preserve.each do |tag|
+        content.gsub!(/<#{tag}/i, "$$" + tag)
+        content.gsub!(/<\/#{tag}/i, "||" + tag)
+      end
+      content = %x[echo '#{content.gsub("'", "''")}' | html2text]
+      preserve.each do |tag|
+        content.gsub!("$$" + tag, "<" + tag)
+        content.gsub!("||" + tag, "</" + tag)
+      end
+      content
+    end
+
+    # Adds pygments highlight tags to code blocks in posts that use
+    # markdown format. This doesn't guess the language of the code
+    # block, so you should modify this to suit your own content.
+    # For example, my code block only contain Python and JavaScript,
+    # so I can assume the block is JavaScript if it contains a
+    # semi-colon.
+    def self.add_syntax_highlights(content)
+      lines = content.split("\n")
+      block, indent, lang, start = false, /^    /, nil, nil
+      lines.each_with_index do |line, i|
+        if !block && line =~ indent
+          block = true
+          lang = "python"
+          start = i
+        elsif block
+          lang = "javascript" if line =~ /;$/
+          block = line =~ indent && i < lines.size - 1 # Also handle EOF
+          if !block
+            lines[start] = "{% highlight #{lang} %}"
+            lines[i - 1] = "{% endhighlight %}"
+          end
+          lines[i] = lines[i].sub(indent, "")
+        end
+      end
+      lines.join("\n")
+    end
+
     def self.save_file(url, grab_image = false)
       unless grab_image == false
         FileUtils.mkdir_p "tumblr_files"
-
         File.open("tumblr_files/#{url.split('/').last}", "w") do |f|
           f.write(open(url).read)
         end
-
         return "/tumblr_files/#{url.split('/').last}"
       else
         return url

From 886b9740f2635334d83251875447a6f2bcb7cdce Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Tue, 27 Dec 2011 10:09:59 +1100
Subject: [PATCH 09/14] Fixed grab_images handling.

---
 lib/jekyll/migrators/tumblr.rb | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index 0f9c227c..0c6402ab 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -10,6 +10,7 @@ module Jekyll
   module Tumblr
     def self.process(url, format = "html", grab_images = false,
                      add_highlights = false, rewrite_urls = true)
+      @grab_images = grab_images
       FileUtils.mkdir_p "_posts/tumblr"
       url += "/api/read/json/"
       per_page = 50
@@ -55,7 +56,7 @@ module Jekyll
           end
         when "photo"
           title = post["photo-caption"]
-          content = "<img src=\"#{save_file(post["photo-url"], grab_images)}\"/>"
+          content = "<img src=\"#{save_file(post["photo-url"])}\"/>"
           unless post["photo-link-url"].nil?
             content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
           end
@@ -170,16 +171,15 @@ module Jekyll
       lines.join("\n")
     end
 
-    def self.save_file(url, grab_image = false)
-      unless grab_image == false
+    def self.save_file(url)
+      if @grab_images
         FileUtils.mkdir_p "tumblr_files"
         File.open("tumblr_files/#{url.split('/').last}", "w") do |f|
           f.write(open(url).read)
         end
-        return "/tumblr_files/#{url.split('/').last}"
-      else
-        return url
+        url = "/tumblr_files/#{url.split('/').last}"
       end
+      url
     end
   end
 end

From 3ab7658d1f62b75265d3f3cd03a561c54e1be3ab Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Wed, 28 Dec 2011 08:16:19 +1100
Subject: [PATCH 10/14] Fixed Tumblr URL redirects.

---
 lib/jekyll/migrators/tumblr.rb | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index 0c6402ab..2a2a3034 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -110,6 +110,9 @@ module Jekyll
       site = Jekyll::Site.new(Jekyll.configuration({}))
       dir = File.join(File.dirname(__FILE__), "..")
       urls = Hash[posts.map { |post|
+        # Create an initial empty file for the post so that
+        # we can instantiate a post object.
+        File.open("_posts/tumblr/#{post[:name]}", "w")
         tumblr_url = URI.parse(post[:slug]).path
         jekyll_url = Jekyll::Post.new(site, dir, "", "tumblr/" + post[:name]).url
         redirect_dir = tumblr_url.sub(/\//, "") + "/"

From 743e01f231649529350edaff6728f1bce87eae27 Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Thu, 29 Dec 2011 11:37:06 +1100
Subject: [PATCH 11/14] Fixed a change in Tumblr's API whereby photos no longer
 have a single URL field.

---
 lib/jekyll/migrators/tumblr.rb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index 2a2a3034..cc318ca0 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -56,10 +56,10 @@ module Jekyll
           end
         when "photo"
           title = post["photo-caption"]
-          content = "<img src=\"#{save_file(post["photo-url"])}\"/>"
-          unless post["photo-link-url"].nil?
-            content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
-          end
+          max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max
+          url = post["photo-url"] || post["photo-url-#{max_size}"]
+          content = "<img src=\"#{save_file(url)}\"/>"
+          content = "<a href=\"#{url}\">#{content}</a>" unless url.nil?
         when "audio"
           if !post["id3-title"].nil?
             title = post["id3-title"]

From bab178d9f8502e6941e75dbba1631d26f246fab4 Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Thu, 29 Dec 2011 11:37:46 +1100
Subject: [PATCH 12/14] Strip HTML from any caption-based titles.

---
 lib/jekyll/migrators/tumblr.rb | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index cc318ca0..d1df9678 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -1,6 +1,7 @@
 require 'rubygems'
 require 'open-uri'
 require 'fileutils'
+require 'nokogiri'
 require 'date'
 require 'json'
 require 'uri'
@@ -89,6 +90,7 @@ module Jekyll
           end
       end
       date = Date.parse(post['date']).to_s
+      title = Nokogiri::HTML(title).text
       slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
       {
         :name => "#{date}-#{slug}.#{format}",

From c26bd30318aae14442fc4f7565d2d6458e10808d Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Thu, 29 Dec 2011 11:58:11 +1100
Subject: [PATCH 13/14] Fix photo link URL.

---
 lib/jekyll/migrators/tumblr.rb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index d1df9678..e2758ba8 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -60,7 +60,9 @@ module Jekyll
           max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max
           url = post["photo-url"] || post["photo-url-#{max_size}"]
           content = "<img src=\"#{save_file(url)}\"/>"
-          content = "<a href=\"#{url}\">#{content}</a>" unless url.nil?
+          unless post["photo-link-url"].nil?
+            content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
+          end
         when "audio"
           if !post["id3-title"].nil?
             title = post["id3-title"]

From f2502dfab2da77f916731edcc4ac22022aeabf4b Mon Sep 17 00:00:00 2001
From: Stephen McDonald <steve@jupo.org>
Date: Fri, 30 Dec 2011 23:25:09 +1100
Subject: [PATCH 14/14] Added handling for Tumblr missing image extensions.

---
 lib/jekyll/migrators/tumblr.rb | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb
index e2758ba8..367a83c9 100644
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@@ -59,7 +59,10 @@ module Jekyll
           title = post["photo-caption"]
           max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max
           url = post["photo-url"] || post["photo-url-#{max_size}"]
-          content = "<img src=\"#{save_file(url)}\"/>"
+          ext = "." + post[post.keys.select { |k|
+            k =~ /^photo-url-/ && post[k].split("/").last =~ /\./
+          }.first].split(".").last
+          content = "<img src=\"#{save_file(url, ext)}\"/>"
           unless post["photo-link-url"].nil?
             content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
           end
@@ -178,13 +181,13 @@ module Jekyll
       lines.join("\n")
     end
 
-    def self.save_file(url)
+    def self.save_file(url, ext)
       if @grab_images
+        path = "tumblr_files/#{url.split('/').last}"
+        path += ext unless path =~ /#{ext}$/
         FileUtils.mkdir_p "tumblr_files"
-        File.open("tumblr_files/#{url.split('/').last}", "w") do |f|
-          f.write(open(url).read)
-        end
-        url = "/tumblr_files/#{url.split('/').last}"
+        File.open(path, "w") { |f| f.write(open(url).read) }
+        url = "/" + path
       end
       url
     end