From 06f5b7d2e341971f1484af41c0ea64765c7b7669 Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Sun, 20 Nov 2011 22:07:56 +1100 Subject: [PATCH 01/14] Fixed cgi module name. --- lib/jekyll/migrators/tumblr.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index d7cb3969..602f4169 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -2,7 +2,7 @@ require 'rubygems' require 'nokogiri' require 'open-uri' require 'fileutils' -require 'CGI' +require 'cgi' require 'iconv' require 'date' From 6826317e00fac0adbbe80ee341fea3eaaaa3e2e0 Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Sun, 20 Nov 2011 22:09:43 +1100 Subject: [PATCH 02/14] Use the post's title for the filename rather than its ID, as per Jekyll's naming convention. --- lib/jekyll/migrators/tumblr.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 602f4169..97105eb4 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -72,7 +72,7 @@ module Jekyll content << CGI::unescapeHTML(post.at("video-caption").inner_html) unless post.at("video-caption") == nil end # End post types - name = "#{Date.parse(post['date']).to_s}-#{post['id'].downcase.gsub(/[^a-z0-9]/, '-')}.html" + name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}" if title != nil || content != nil && name != nil File.open("_posts/tumblr/#{name}", "w") do |f| From 87316894cc6179b36f91cfa97387ea967f793ff2 Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Sun, 20 Nov 2011 22:13:01 +1100 Subject: [PATCH 03/14] Quote the post's title so reserved yaml chars don't blow up. --- lib/jekyll/migrators/tumblr.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 97105eb4..24f5ffe0 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -80,7 +80,7 @@ module Jekyll f.puts <<-HEADER --- layout: post -title: #{title} +title: "#{title.gsub('"', '\"')}" --- HEADER From b762a1d5c5b10b307df9a7c879f5f1540d21cf96 Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Sun, 20 Nov 2011 22:15:36 +1100 Subject: [PATCH 04/14] Add a format option for converting posts to markdown via Python's html2text. --- lib/jekyll/migrators/tumblr.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 24f5ffe0..1cbef57f 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -8,7 +8,7 @@ require 'date' module Jekyll module Tumblr - def self.process(url, grab_images = false) + def self.process(url, grab_images = false, format = "html") current_page = 0 while true @@ -75,6 +75,7 @@ module Jekyll name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}" if title != nil || content != nil && name != nil + content = %x[echo '#{content.gsub("'", "''")}' | html2text] if format == "md" File.open("_posts/tumblr/#{name}", "w") do |f| f.puts <<-HEADER From 1b3abb61d8b7bab0932d0d8cc13a11c69433cebf Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Mon, 21 Nov 2011 05:49:49 +1100 Subject: [PATCH 05/14] Clean up extraneous newlines left by html2text. --- lib/jekyll/migrators/tumblr.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 1cbef57f..332c5fb0 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -75,7 +75,13 @@ module Jekyll name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}" if title != nil || content != nil && name != nil - content = %x[echo '#{content.gsub("'", "''")}' | html2text] if format == "md" + if format == "md" + content = %x[echo '#{content.gsub("'", "''")}' | html2text] + # html2text leaves extra blank lines in code blocks - clean them up. + begin + content.gsub!("\n \n", "\n") + end until !content.include? "\n \n" + end File.open("_posts/tumblr/#{name}", "w") do |f| f.puts <<-HEADER From ab85c82356ad23d0c9c4b1afc8571ffb4178c40f Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Thu, 24 Nov 2011 06:11:57 +1100 Subject: [PATCH 06/14] Use Tumblr's JSON format instead of XML, to correctly preserve white-space when converting to markdown. --- lib/jekyll/migrators/tumblr.rb | 130 +++++++++++++-------------------- 1 file changed, 49 insertions(+), 81 deletions(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 332c5fb0..3fd15642 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -1,10 +1,8 @@ require 'rubygems' -require 'nokogiri' require 'open-uri' require 'fileutils' -require 'cgi' -require 'iconv' require 'date' +require 'json' module Jekyll module Tumblr @@ -12,97 +10,67 @@ module Jekyll current_page = 0 while true - f = open(url + "/api/read?num=50&start=#{current_page * 50}") - doc = Nokogiri::HTML(Iconv.conv("utf-8", f.charset, f.readlines.join("\n"))) - - puts "Page: #{current_page + 1} - Posts: #{(doc/:tumblr/:posts/:post).size}" + f = open(url + "/api/read/json/?num=50&start=#{current_page * 50}") + # [21...-2] strips Tumblr's Javascript/JSONP start/end chars + json = f.readlines.join("\n")[21...-2] + blog = JSON.parse(json) + puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}" FileUtils.mkdir_p "_posts/tumblr" - (doc/:tumblr/:posts/:post).each do |post| - title = "" - content = nil - name = nil + blog["posts"].each do |post| - if post['type'] == "regular" - title_element = post.at("regular-title") - title = title_element.inner_text unless title_element == nil - content = CGI::unescapeHTML post.at("regular-body").inner_html unless post.at("regular-body") == nil - elsif post['type'] == "link" - title = post.at("link-text").inner_html unless post.at("link-text") == nil - - if post.at("link-text") != nil - content = "#{post.at("link-text").inner_html}" - else - content = "#{post.at("link-url").inner_html}" - end - - content << "
" + CGI::unescapeHTML(post.at("link-description").inner_html) unless post.at("link-description") == nil - elsif post['type'] == "photo" - content = "" - - if post.at("photo-link-url") != nil - content = "" - else - content = "" - end - - if post.at("photo-caption") != nil - content << "
" unless content == nil - content << CGI::unescapeHTML(post.at("photo-caption").inner_html) - end - elsif post['type'] == "audio" - content = CGI::unescapeHTML(post.at("audio-player").inner_html) - content << CGI::unescapeHTML(post.at("audio-caption").inner_html) unless post.at("audio-caption") == nil - elsif post['type'] == "quote" - content = "
" + CGI::unescapeHTML(post.at("quote-text").inner_html) + "
" - content << "—" + CGI::unescapeHTML(post.at("quote-source").inner_html) unless post.at("quote-source") == nil - elsif post['type'] == "conversation" - title = post.at("conversation-title").inner_html unless post.at("conversation-title") == nil - content = "
" - - (post/:conversation/:line).each do |line| - content << "
" + line['label'] + "
" + line.inner_html + "
" unless line['label'] == nil || line == nil - end - - content << "
" - elsif post['type'] == "video" - title = post.at("video-title").inner_html unless post.at("video-title") == nil - content = CGI::unescapeHTML(post.at("video-player").inner_html) - content << CGI::unescapeHTML(post.at("video-caption").inner_html) unless post.at("video-caption") == nil + case post['type'] + when "regular" + title = post["regular-title"] + content = post["regular-body"] + when "link" + title = post["link-text"] || post["link-url"] + content = "#{title}" + content << "
" + post["link-description"] unless post["link-description"].nil? + when "photo" + title = post["photo-caption"] + content = "" + content = "#{content}" unless post["photo-link-url"].nil? + when "audio" + if !post["id3-title"].nil? + title = post["id3-title"] + content = post.at["audio-player"] + "
" + post["audio-caption"] + else + title = post["audio-caption"] + content = post.at["audio-player"] + end + when "quote" + title = post["quote-text"] + content = "
#{post["quote-text"]}
" + content << "—" + post["quote-source"] unless post["quote-source"].nil? + when "conversation" + title = post["conversation-title"] + content = "
" + post["conversation"]["line"].each do |line| + content << "
#{line['label']}
#{line}
" + end + content << "
" + when "video" + title = post["video-title"] + content = post["video-player"] + content << "
" + post["video-caption"] unless post["video-caption"].nil? end # End post types name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}" - if title != nil || content != nil && name != nil - if format == "md" - content = %x[echo '#{content.gsub("'", "''")}' | html2text] - # html2text leaves extra blank lines in code blocks - clean them up. - begin - content.gsub!("\n \n", "\n") - end until !content.include? "\n \n" - end - File.open("_posts/tumblr/#{name}", "w") do |f| - - f.puts <<-HEADER ---- -layout: post -title: "#{title.gsub('"', '\"')}" ---- - -HEADER - - f.puts content - end # End file - end + File.open("_posts/tumblr/#{name}", "w") do |f| + content = %x[echo '#{content.gsub("'", "''")}' | html2text] if format == "md" + header = {"layout" => "post", "title" => title, "tags" => post["tags"]} + f.puts header.to_yaml + "---\n" + content + end # End file end # End post XML - if (doc/:tumblr/:posts/:post).size < 50 + if blog["posts"].size < 50 break - else - current_page = current_page + 1 end + current_page += 1 end # End while loop end # End method From bc20ba9be961ece9d5e49283366956d23c71bdd1 Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Fri, 25 Nov 2011 06:10:47 +1100 Subject: [PATCH 07/14] Preserve HTML tables as per markdown's support for tables. --- lib/jekyll/migrators/tumblr.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 3fd15642..10f41601 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -60,7 +60,12 @@ module Jekyll name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}" File.open("_posts/tumblr/#{name}", "w") do |f| - content = %x[echo '#{content.gsub("'", "''")}' | html2text] if format == "md" + if format == "md" + preserve = ["table", "tr", "th", "td"] + preserve.each { |tag| content = content.gsub(/<#{tag}/i, "$$" + tag).gsub(/<\/#{tag}/i, "||" + tag) } + content = %x[echo '#{content.gsub("'", "''")}' | html2text] + preserve.each { |tag| content = content.gsub("$$" + tag, "<" + tag).gsub("||" + tag, " "post", "title" => title, "tags" => post["tags"]} f.puts header.to_yaml + "---\n" + content end # End file From 0f51c81cfab6fcc3fbf6cd2720ad6da216a113d1 Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Sun, 4 Dec 2011 12:01:37 +1100 Subject: [PATCH 08/14] Added support for rewriting Tumblr URLs to Jekyll URLs in posts, meta redirects for Github pages, and automatic addition of Pygments highlight tags. --- lib/jekyll/migrators/tumblr.rb | 230 ++++++++++++++++++++++----------- 1 file changed, 158 insertions(+), 72 deletions(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 10f41601..0f9c227c 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -3,93 +3,179 @@ require 'open-uri' require 'fileutils' require 'date' require 'json' +require 'uri' +require 'jekyll' module Jekyll module Tumblr - def self.process(url, grab_images = false, format = "html") - current_page = 0 - - while true - - f = open(url + "/api/read/json/?num=50&start=#{current_page * 50}") - # [21...-2] strips Tumblr's Javascript/JSONP start/end chars - json = f.readlines.join("\n")[21...-2] + def self.process(url, format = "html", grab_images = false, + add_highlights = false, rewrite_urls = true) + FileUtils.mkdir_p "_posts/tumblr" + url += "/api/read/json/" + per_page = 50 + posts = [] + # Two passes are required so that we can rewrite URLs. + # First pass builds up an array of each post as a hash. + begin + current_page = (current_page || -1) + 1 + feed = open(url + "?num=#{per_page}&start=#{current_page * per_page}") + json = feed.readlines.join("\n")[21...-2] # Strip Tumblr's JSONP chars. blog = JSON.parse(json) puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}" - FileUtils.mkdir_p "_posts/tumblr" - - blog["posts"].each do |post| - - case post['type'] - when "regular" - title = post["regular-title"] - content = post["regular-body"] - when "link" - title = post["link-text"] || post["link-url"] - content = "#{title}" - content << "
" + post["link-description"] unless post["link-description"].nil? - when "photo" - title = post["photo-caption"] - content = "" - content = "#{content}" unless post["photo-link-url"].nil? - when "audio" - if !post["id3-title"].nil? - title = post["id3-title"] - content = post.at["audio-player"] + "
" + post["audio-caption"] - else - title = post["audio-caption"] - content = post.at["audio-player"] - end - when "quote" - title = post["quote-text"] - content = "
#{post["quote-text"]}
" - content << "—" + post["quote-source"] unless post["quote-source"].nil? - when "conversation" - title = post["conversation-title"] - content = "
" - post["conversation"]["line"].each do |line| - content << "
#{line['label']}
#{line}
" - end - content << "
" - when "video" - title = post["video-title"] - content = post["video-player"] - content << "
" + post["video-caption"] unless post["video-caption"].nil? - end # End post types - - name = "#{Date.parse(post['date']).to_s}-#{title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}.#{format}" - - File.open("_posts/tumblr/#{name}", "w") do |f| - if format == "md" - preserve = ["table", "tr", "th", "td"] - preserve.each { |tag| content = content.gsub(/<#{tag}/i, "$$" + tag).gsub(/<\/#{tag}/i, "||" + tag) } - content = %x[echo '#{content.gsub("'", "''")}' | html2text] - preserve.each { |tag| content = content.gsub("$$" + tag, "<" + tag).gsub("||" + tag, " "post", "title" => title, "tags" => post["tags"]} - f.puts header.to_yaml + "---\n" + content - end # End file - - end # End post XML - - if blog["posts"].size < 50 - break + posts += blog["posts"].map { |post| post_to_hash(post, format) } + end until blog["posts"].size < per_page + # Rewrite URLs and create redirects. + posts = rewrite_urls_and_redirects posts if rewrite_urls + # Second pass for writing post files. + posts.each do |post| + if format == "md" + post[:content] = html_to_markdown post[:content] + post[:content] = add_syntax_highlights post[:content] if add_highlights end - current_page += 1 - - end # End while loop - end # End method + File.open("_posts/tumblr/#{post[:name]}", "w") do |f| + f.puts post[:header].to_yaml + "---\n" + post[:content] + end + end + end private + # Converts each type of Tumblr post to a hash with all required + # data for Jekyll. + def self.post_to_hash(post, format) + case post['type'] + when "regular" + title = post["regular-title"] + content = post["regular-body"] + when "link" + title = post["link-text"] || post["link-url"] + content = "#{title}" + unless post["link-description"].nil? + content << "
" + post["link-description"] + end + when "photo" + title = post["photo-caption"] + content = "" + unless post["photo-link-url"].nil? + content = "#{content}" + end + when "audio" + if !post["id3-title"].nil? + title = post["id3-title"] + content = post.at["audio-player"] + "
" + post["audio-caption"] + else + title = post["audio-caption"] + content = post.at["audio-player"] + end + when "quote" + title = post["quote-text"] + content = "
#{post["quote-text"]}
" + unless post["quote-source"].nil? + content << "—" + post["quote-source"] + end + when "conversation" + title = post["conversation-title"] + content = "
" + post["conversation"]["line"].each do |line| + content << "
#{line['label']}
#{line}
" + end + content << "
" + when "video" + title = post["video-title"] + content = post["video-player"] + unless post["video-caption"].nil? + content << "
" + post["video-caption"] + end + end + date = Date.parse(post['date']).to_s + slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') + { + :name => "#{date}-#{slug}.#{format}", + :header => { + "layout" => "post", + "title" => title, + "tags" => post["tags"], + }, + :content => content, + :url => post["url"], + :slug => post["url-with-slug"], + } + end + + # Create a Hash of old urls => new urls, for rewriting and + # redirects, and replace urls in each post. Instantiate Jekyll + # site/posts to get the correct permalink format. + def self.rewrite_urls_and_redirects(posts) + site = Jekyll::Site.new(Jekyll.configuration({})) + dir = File.join(File.dirname(__FILE__), "..") + urls = Hash[posts.map { |post| + tumblr_url = URI.parse(post[:slug]).path + jekyll_url = Jekyll::Post.new(site, dir, "", "tumblr/" + post[:name]).url + redirect_dir = tumblr_url.sub(/\//, "") + "/" + FileUtils.mkdir_p redirect_dir + File.open(redirect_dir + "index.html", "w") do |f| + f.puts "" + end + [tumblr_url, jekyll_url] + }] + posts.map { |post| + urls.each do |tumblr_url, jekyll_url| + post[:content].gsub!(/#{tumblr_url}/i, jekyll_url) + end + post + } + end + + # Uses Python's html2text to convert a post's content to + # markdown. Preserve HTML tables as per the markdown docs. + def self.html_to_markdown(content) + preserve = ["table", "tr", "th", "td"] + preserve.each do |tag| + content.gsub!(/<#{tag}/i, "$$" + tag) + content.gsub!(/<\/#{tag}/i, "||" + tag) + end + content = %x[echo '#{content.gsub("'", "''")}' | html2text] + preserve.each do |tag| + content.gsub!("$$" + tag, "<" + tag) + content.gsub!("||" + tag, " Date: Tue, 27 Dec 2011 10:09:59 +1100 Subject: [PATCH 09/14] Fixed grab_images handling. --- lib/jekyll/migrators/tumblr.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 0f9c227c..0c6402ab 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -10,6 +10,7 @@ module Jekyll module Tumblr def self.process(url, format = "html", grab_images = false, add_highlights = false, rewrite_urls = true) + @grab_images = grab_images FileUtils.mkdir_p "_posts/tumblr" url += "/api/read/json/" per_page = 50 @@ -55,7 +56,7 @@ module Jekyll end when "photo" title = post["photo-caption"] - content = "" + content = "" unless post["photo-link-url"].nil? content = "#{content}" end @@ -170,16 +171,15 @@ module Jekyll lines.join("\n") end - def self.save_file(url, grab_image = false) - unless grab_image == false + def self.save_file(url) + if @grab_images FileUtils.mkdir_p "tumblr_files" File.open("tumblr_files/#{url.split('/').last}", "w") do |f| f.write(open(url).read) end - return "/tumblr_files/#{url.split('/').last}" - else - return url + url = "/tumblr_files/#{url.split('/').last}" end + url end end end From 3ab7658d1f62b75265d3f3cd03a561c54e1be3ab Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Wed, 28 Dec 2011 08:16:19 +1100 Subject: [PATCH 10/14] Fixed Tumblr URL redirects. --- lib/jekyll/migrators/tumblr.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 0c6402ab..2a2a3034 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -110,6 +110,9 @@ module Jekyll site = Jekyll::Site.new(Jekyll.configuration({})) dir = File.join(File.dirname(__FILE__), "..") urls = Hash[posts.map { |post| + # Create an initial empty file for the post so that + # we can instantiate a post object. + File.open("_posts/tumblr/#{post[:name]}", "w") tumblr_url = URI.parse(post[:slug]).path jekyll_url = Jekyll::Post.new(site, dir, "", "tumblr/" + post[:name]).url redirect_dir = tumblr_url.sub(/\//, "") + "/" From 743e01f231649529350edaff6728f1bce87eae27 Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Thu, 29 Dec 2011 11:37:06 +1100 Subject: [PATCH 11/14] Fixed a change in Tumblr's API whereby photos no longer have a single URL field. --- lib/jekyll/migrators/tumblr.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index 2a2a3034..cc318ca0 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -56,10 +56,10 @@ module Jekyll end when "photo" title = post["photo-caption"] - content = "" - unless post["photo-link-url"].nil? - content = "#{content}" - end + max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max + url = post["photo-url"] || post["photo-url-#{max_size}"] + content = "" + content = "#{content}" unless url.nil? when "audio" if !post["id3-title"].nil? title = post["id3-title"] From bab178d9f8502e6941e75dbba1631d26f246fab4 Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Thu, 29 Dec 2011 11:37:46 +1100 Subject: [PATCH 12/14] Strip HTML from any caption-based titles. --- lib/jekyll/migrators/tumblr.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index cc318ca0..d1df9678 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -1,6 +1,7 @@ require 'rubygems' require 'open-uri' require 'fileutils' +require 'nokogiri' require 'date' require 'json' require 'uri' @@ -89,6 +90,7 @@ module Jekyll end end date = Date.parse(post['date']).to_s + title = Nokogiri::HTML(title).text slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') { :name => "#{date}-#{slug}.#{format}", From c26bd30318aae14442fc4f7565d2d6458e10808d Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Thu, 29 Dec 2011 11:58:11 +1100 Subject: [PATCH 13/14] Fix photo link URL. --- lib/jekyll/migrators/tumblr.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index d1df9678..e2758ba8 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -60,7 +60,9 @@ module Jekyll max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max url = post["photo-url"] || post["photo-url-#{max_size}"] content = "" - content = "#{content}" unless url.nil? + unless post["photo-link-url"].nil? + content = "#{content}" + end when "audio" if !post["id3-title"].nil? title = post["id3-title"] From f2502dfab2da77f916731edcc4ac22022aeabf4b Mon Sep 17 00:00:00 2001 From: Stephen McDonald Date: Fri, 30 Dec 2011 23:25:09 +1100 Subject: [PATCH 14/14] Added handling for Tumblr missing image extensions. --- lib/jekyll/migrators/tumblr.rb | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb index e2758ba8..367a83c9 100644 --- a/lib/jekyll/migrators/tumblr.rb +++ b/lib/jekyll/migrators/tumblr.rb @@ -59,7 +59,10 @@ module Jekyll title = post["photo-caption"] max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max url = post["photo-url"] || post["photo-url-#{max_size}"] - content = "" + ext = "." + post[post.keys.select { |k| + k =~ /^photo-url-/ && post[k].split("/").last =~ /\./ + }.first].split(".").last + content = "" unless post["photo-link-url"].nil? content = "#{content}" end @@ -178,13 +181,13 @@ module Jekyll lines.join("\n") end - def self.save_file(url) + def self.save_file(url, ext) if @grab_images + path = "tumblr_files/#{url.split('/').last}" + path += ext unless path =~ /#{ext}$/ FileUtils.mkdir_p "tumblr_files" - File.open("tumblr_files/#{url.split('/').last}", "w") do |f| - f.write(open(url).read) - end - url = "/tumblr_files/#{url.split('/').last}" + File.open(path, "w") { |f| f.write(open(url).read) } + url = "/" + path end url end