From aa0d82fa9679843e4356aacd7cb737bb98f16ba3 Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Sun, 8 May 2011 15:08:53 -0700 Subject: [PATCH] Clean up migrators formatting. --- lib/jekyll/migrators/csv.rb | 6 +-- lib/jekyll/migrators/drupal.rb | 18 ++++++--- lib/jekyll/migrators/marley.rb | 1 - lib/jekyll/migrators/mephisto.rb | 29 +++++++++------ lib/jekyll/migrators/mt.rb | 43 +++++++++++++--------- lib/jekyll/migrators/posterous.rb | 27 ++++++-------- lib/jekyll/migrators/textpattern.rb | 23 ++++++++---- lib/jekyll/migrators/typo.rb | 11 +++--- lib/jekyll/migrators/wordpress.rb | 28 ++++++++------ lib/jekyll/migrators/wordpressdotcom.rb | 49 ++++++++++++------------- 10 files changed, 130 insertions(+), 105 deletions(-) diff --git a/lib/jekyll/migrators/csv.rb b/lib/jekyll/migrators/csv.rb index 657b35b7..ce5203b7 100644 --- a/lib/jekyll/migrators/csv.rb +++ b/lib/jekyll/migrators/csv.rb @@ -1,7 +1,7 @@ module Jekyll module CSV - #Reads a csv with title, permalink, body, published_at, and filter. - #It creates a post file for each row in the csv + # Reads a csv with title, permalink, body, published_at, and filter. + # It creates a post file for each row in the csv def self.process(file = "posts.csv") FileUtils.mkdir_p "_posts" posts = 0 @@ -23,4 +23,4 @@ title: #{row[0]} "Created #{posts} posts!" end end -end \ No newline at end of file +end diff --git a/lib/jekyll/migrators/drupal.rb b/lib/jekyll/migrators/drupal.rb index be198197..32e335cc 100644 --- a/lib/jekyll/migrators/drupal.rb +++ b/lib/jekyll/migrators/drupal.rb @@ -11,12 +11,18 @@ require 'yaml' module Jekyll module Drupal - - # Reads a MySQL database via Sequel and creates a post file for each - # post in wp_posts that has post_status = 'publish'. - # This restriction is made because 'draft' posts are not guaranteed to - # have valid dates. - QUERY = "SELECT node.nid, node.title, node_revisions.body, node.created, node.status FROM node, node_revisions WHERE (node.type = 'blog' OR node.type = 'story') AND node.vid = node_revisions.vid" + # Reads a MySQL database via Sequel and creates a post file for each post + # in wp_posts that has post_status = 'publish'. This restriction is made + # because 'draft' posts are not guaranteed to have valid dates. + QUERY = "SELECT node.nid, \ + node.title, \ + node_revisions.body, \ + node.created, \ + node.status \ + FROM node, \ + node_revisions \ + WHERE (node.type = 'blog' OR node.type = 'story') \ + AND node.vid = node_revisions.vid" def self.process(dbname, user, pass, host = 'localhost') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') diff --git a/lib/jekyll/migrators/marley.rb b/lib/jekyll/migrators/marley.rb index 390a5a28..21bcead5 100644 --- a/lib/jekyll/migrators/marley.rb +++ b/lib/jekyll/migrators/marley.rb @@ -3,7 +3,6 @@ require 'fileutils' module Jekyll module Marley - def self.regexp { :id => /^\d{0,4}-{0,1}(.*)$/, :title => /^#\s*(.*)\s+$/, diff --git a/lib/jekyll/migrators/mephisto.rb b/lib/jekyll/migrators/mephisto.rb index 9a3e33ae..7622c722 100644 --- a/lib/jekyll/migrators/mephisto.rb +++ b/lib/jekyll/migrators/mephisto.rb @@ -36,11 +36,22 @@ module Jekyll # This query will pull blog posts from all entries across all blogs. If # you've got unpublished, deleted or otherwise hidden posts please sift # through the created posts to make sure nothing is accidently published. - - QUERY = "SELECT id, permalink, body, published_at, title FROM contents WHERE user_id = 1 AND type = 'Article' AND published_at IS NOT NULL ORDER BY published_at" + QUERY = "SELECT id, \ + permalink, \ + body, \ + published_at, \ + title \ + FROM contents \ + WHERE user_id = 1 AND \ + type = 'Article' AND \ + published_at IS NOT NULL \ + ORDER BY published_at" def self.process(dbname, user, pass, host = 'localhost') - db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') + db = Sequel.mysql(dbname, :user => user, + :password => pass, + :host => host, + :encoding => 'utf8') FileUtils.mkdir_p "_posts" @@ -49,16 +60,10 @@ module Jekyll slug = post[:permalink] date = post[:published_at] content = post[:body] -# more_content = '' - # Be sure to include the body and extended body. -# if more_content != nil -# content = content + " \n" + more_content -# end - - # Ideally, this script would determine the post format (markdown, html - # , etc) and create files with proper extensions. At this point it - # just assumes that markdown will be acceptable. + # Ideally, this script would determine the post format (markdown, + # html, etc) and create files with proper extensions. At this point + # it just assumes that markdown will be acceptable. name = [date.year, date.month, date.day, slug].join('-') + ".markdown" data = { diff --git a/lib/jekyll/migrators/mt.rb b/lib/jekyll/migrators/mt.rb index c9d79b7d..048c84db 100644 --- a/lib/jekyll/migrators/mt.rb +++ b/lib/jekyll/migrators/mt.rb @@ -18,7 +18,14 @@ module Jekyll # This query will pull blog posts from all entries across all blogs. If # you've got unpublished, deleted or otherwise hidden posts please sift # through the created posts to make sure nothing is accidently published. - QUERY = "SELECT entry_id, entry_basename, entry_text, entry_text_more, entry_authored_on, entry_title, entry_convert_breaks FROM mt_entry" + QUERY = "SELECT entry_id, \ + entry_basename, \ + entry_text, \ + entry_text_more, \ + entry_authored_on, \ + entry_title, \ + entry_convert_breaks \ + FROM mt_entry" def self.process(dbname, user, pass, host = 'localhost') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') @@ -38,17 +45,18 @@ module Jekyll content = content + " \n" + more_content end - # Ideally, this script would determine the post format (markdown, html - # , etc) and create files with proper extensions. At this point it - # just assumes that markdown will be acceptable. - name = [date.year, date.month, date.day, slug].join('-') + '.' + self.suffix(entry_convert_breaks) + # Ideally, this script would determine the post format (markdown, + # html, etc) and create files with proper extensions. At this point + # it just assumes that markdown will be acceptable. + name = [date.year, date.month, date.day, slug].join('-') + '.' + + self.suffix(entry_convert_breaks) data = { 'layout' => 'post', 'title' => title.to_s, 'mt_id' => post[:entry_id], 'date' => date - }.delete_if { |k,v| v.nil? || v == ''}.to_yaml + }.delete_if { |k,v| v.nil? || v == '' }.to_yaml File.open("_posts/#{name}", "w") do |f| f.puts data @@ -60,17 +68,18 @@ module Jekyll def self.suffix(entry_type) if entry_type.nil? || entry_type.include?("markdown") - # The markdown plugin I have saves this as "markdown_with_smarty_pants", so I just look for "markdown". - "markdown" - elsif entry_type.include?("textile") - # This is saved as "textile_2" on my installation of MT 5.1. - "textile" - elsif entry_type == "0" || entry_type.include?("richtext") - # richtext looks to me like it's saved as HTML, so I include it here. - "html" - else - # Other values might need custom work. - entry_type + # The markdown plugin I have saves this as + # "markdown_with_smarty_pants", so I just look for "markdown". + "markdown" + elsif entry_type.include?("textile") + # This is saved as "textile_2" on my installation of MT 5.1. + "textile" + elsif entry_type == "0" || entry_type.include?("richtext") + # Richtext looks to me like it's saved as HTML, so I include it here. + "html" + else + # Other values might need custom work. + entry_type end end end diff --git a/lib/jekyll/migrators/posterous.rb b/lib/jekyll/migrators/posterous.rb index e9cfe8f2..6cfc2430 100644 --- a/lib/jekyll/migrators/posterous.rb +++ b/lib/jekyll/migrators/posterous.rb @@ -9,17 +9,16 @@ require "json" module Jekyll module Posterous - def self.fetch(uri_str, limit = 10) # You should choose better exception. raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0 - + response = nil - Net::HTTP.start('posterous.com') {|http| + Net::HTTP.start('posterous.com') do |http| req = Net::HTTP::Get.new(uri_str) req.basic_auth @email, @pass response = http.request(req) - } + end case response when Net::HTTPSuccess then response @@ -27,26 +26,24 @@ module Jekyll else response.error! end end - - + def self.process(email, pass, blog = 'primary') @email, @pass = email, pass @api_token = JSON.parse(self.fetch("/api/2/auth/token").body)['api_token'] FileUtils.mkdir_p "_posts" - + posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}").body) page = 1 - + while posts.any? - posts.each do |post| title = post["title"] - slug = title.gsub(/[^[:alnum:]]+/, '-').downcase + slug = title.gsub(/[^[:alnum:]]+/, '-').downcase date = Date.parse(post["display_date"]) content = post["body_html"] published = !post["is_private"] name = "%02d-%02d-%02d-%s.html" % [date.year, date.month, date.day, slug] - + # Get the relevant fields as a hash, delete empty fields and convert # to YAML for the header data = { @@ -61,13 +58,11 @@ module Jekyll f.puts "---" f.puts content end - end - + page += 1 - posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body) + posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body) end - end end -end \ No newline at end of file +end diff --git a/lib/jekyll/migrators/textpattern.rb b/lib/jekyll/migrators/textpattern.rb index 4b371197..58634c48 100644 --- a/lib/jekyll/migrators/textpattern.rb +++ b/lib/jekyll/migrators/textpattern.rb @@ -11,10 +11,17 @@ require 'fileutils' module Jekyll module TextPattern # Reads a MySQL database via Sequel and creates a post file for each post. - # The only posts selected are those with a status of 4 or 5, which means "live" - # and "sticky" respectively. - # Other statuses is 1 => draft, 2 => hidden and 3 => pending - QUERY = "select Title, url_title, Posted, Body, Keywords from textpattern where Status = '4' or Status = '5'" + # The only posts selected are those with a status of 4 or 5, which means + # "live" and "sticky" respectively. + # Other statuses are 1 => draft, 2 => hidden and 3 => pending. + QUERY = "SELECT Title, \ + url_title, \ + Posted, \ + Body, \ + Keywords \ + FROM textpattern \ + WHERE Status = '4' OR \ + Status = '5'" def self.process(dbname, user, pass, host = 'localhost') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') @@ -22,7 +29,7 @@ module Jekyll FileUtils.mkdir_p "_posts" db[QUERY].each do |post| - # Get required fields and construct Jekyll compatible name + # Get required fields and construct Jekyll compatible name. title = post[:Title] slug = post[:url_title] date = post[:Posted] @@ -31,14 +38,14 @@ module Jekyll name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile" # Get the relevant fields as a hash, delete empty fields and convert - # to YAML for the header + # to YAML for the header. data = { 'layout' => 'post', 'title' => title.to_s, 'tags' => post[:Keywords].split(',') }.delete_if { |k,v| v.nil? || v == ''}.to_yaml - # Write out the data and content to file + # Write out the data and content to file. File.open("_posts/#{name}", "w") do |f| f.puts data f.puts "---" @@ -47,4 +54,4 @@ module Jekyll end end end -end \ No newline at end of file +end diff --git a/lib/jekyll/migrators/typo.rb b/lib/jekyll/migrators/typo.rb index 1795a6bb..3de5130a 100644 --- a/lib/jekyll/migrators/typo.rb +++ b/lib/jekyll/migrators/typo.rb @@ -5,8 +5,8 @@ require 'sequel' module Jekyll module Typo - # this SQL *should* work for both MySQL and PostgreSQL, but I haven't - # tested PostgreSQL yet (as of 2008-12-16) + # This SQL *should* work for both MySQL and PostgreSQL, but I haven't + # tested PostgreSQL yet (as of 2008-12-16). SQL = <<-EOS SELECT c.id id, c.title title, @@ -30,8 +30,9 @@ module Jekyll sprintf("%.02d", post[:date].month), sprintf("%.02d", post[:date].day), post[:slug].strip ].join('-') + # Can have more than one text filter in this field, but we just want - # the first one for this + # the first one for this. name += '.' + post[:filter].split(' ')[0] File.open("_posts/#{name}", 'w') do |f| @@ -45,5 +46,5 @@ module Jekyll end end - end # module Typo -end # module Jekyll + end +end diff --git a/lib/jekyll/migrators/wordpress.rb b/lib/jekyll/migrators/wordpress.rb index cb61f97a..535859c9 100644 --- a/lib/jekyll/migrators/wordpress.rb +++ b/lib/jekyll/migrators/wordpress.rb @@ -11,20 +11,27 @@ require 'yaml' module Jekyll module WordPress - def self.process(dbname, user, pass, host = 'localhost', table_prefix = 'wp_') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') - FileUtils.mkdir_p "_posts" - + FileUtils.mkdir_p("_posts") + # Reads a MySQL database via Sequel and creates a post file for each - # post in wp_posts that has post_status = 'publish'. - # This restriction is made because 'draft' posts are not guaranteed to - # have valid dates. - query = "select post_title, post_name, post_date, post_content, post_excerpt, ID, guid from #{table_prefix}posts where post_status = 'publish' and post_type = 'post'" + # post in wp_posts that has post_status = 'publish'. This restriction is + # made because 'draft' posts are not guaranteed to have valid dates. + query = "SELECT post_title, \ + post_name, \ + post_date, \ + post_content, \ + post_excerpt, \ + ID, \ + guid \ + FROM #{table_prefix}posts \ + WHERE post_status = 'publish' AND \ + post_type = 'post'" db[query].each do |post| - # Get required fields and construct Jekyll compatible name + # Get required fields and construct Jekyll compatible name. title = post[:post_title] slug = post[:post_name] date = post[:post_date] @@ -33,7 +40,7 @@ module Jekyll slug] # Get the relevant fields as a hash, delete empty fields and convert - # to YAML for the header + # to YAML for the header. data = { 'layout' => 'post', 'title' => title.to_s, @@ -41,7 +48,7 @@ module Jekyll 'wordpress_id' => post[:ID], 'wordpress_url' => post[:guid], 'date' => date - }.delete_if { |k,v| v.nil? || v == ''}.to_yaml + }.delete_if { |k,v| v.nil? || v == '' }.to_yaml # Write out the data and content to file File.open("_posts/#{name}", "w") do |f| @@ -50,7 +57,6 @@ module Jekyll f.puts content end end - end end end diff --git a/lib/jekyll/migrators/wordpressdotcom.rb b/lib/jekyll/migrators/wordpressdotcom.rb index 069d6118..53218e50 100644 --- a/lib/jekyll/migrators/wordpressdotcom.rb +++ b/lib/jekyll/migrators/wordpressdotcom.rb @@ -6,40 +6,37 @@ require 'fileutils' require 'yaml' module Jekyll - - # This importer takes a wordpress.xml file, - # which can be exported from your - # wordpress.com blog (/wp-admin/export.php) + # This importer takes a wordpress.xml file, which can be exported from your + # wordpress.com blog (/wp-admin/export.php). module WordpressDotCom def self.process(filename = "wordpress.xml") FileUtils.mkdir_p "_posts" posts = 0 - doc = Hpricot::XML(File.read(filename)) - - (doc/:channel/:item).each do |item| - title = item.at(:title).inner_text.strip - permalink_title = item.at('wp:post_name').inner_text - date = Time.parse(item.at(:pubDate).inner_text) - tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq - name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html" - header = { - 'layout' => 'post', - 'title' => title, - 'tags' => tags - } - - File.open("_posts/#{name}", "w") do |f| - f.puts header.to_yaml - f.puts '---' + doc = Hpricot::XML(File.read(filename)) + + (doc/:channel/:item).each do |item| + title = item.at(:title).inner_text.strip + permalink_title = item.at('wp:post_name').inner_text + date = Time.parse(item.at(:pubDate).inner_text) + tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq + name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html" + header = { + 'layout' => 'post', + 'title' => title, + 'tags' => tags + } + + File.open("_posts/#{name}", "w") do |f| + f.puts header.to_yaml + f.puts '---' f.puts item.at('content:encoded').inner_text end - posts += 1 - end + posts += 1 + end - puts "Imported #{posts} posts" + puts "Imported #{posts} posts" end end - -end \ No newline at end of file +end