Remove migrators

2012-12-22 17:49:33 +00:00 · 2012-12-22 17:49:33 +00:00 · b9da30bc8f
parent a151a16f09
commit b9da30bc8f
15 changed files with 0 additions and 1250 deletions
--- a/15
+++ b/15
@ -82,21 +82,6 @@ end
 #
 #############################################################################
 namespace :migrate do
  desc "Migrate from mephisto in the current directory"
  task :mephisto do
    sh %q(ruby -r './lib/jekyll/migrators/mephisto' -e 'Jekyll::Mephisto.postgres(:database => "#{ENV["DB"]}")')
  end
  desc "Migrate from Movable Type in the current directory"
  task :mt do
    sh %q(ruby -r './lib/jekyll/migrators/mt' -e 'Jekyll::MT.process("#{ENV["DB"]}", "#{ENV["USER"]}", "#{ENV["PASS"]}")')
  end
  desc "Migrate from Typo in the current directory"
  task :typo do
    sh %q(ruby -r './lib/jekyll/migrators/typo' -e 'Jekyll::Typo.process("#{ENV["DB"]}", "#{ENV["USER"]}", "#{ENV["PASS"]}")')
  end
 end
 begin
  require 'cucumber/rake/task'
  Cucumber::Rake::Task.new(:features) do |t|
--- a/lib/jekyll/migrators/csv.rb
+++ b/lib/jekyll/migrators/csv.rb
@ -1,26 +0,0 @@
 module Jekyll
  module CSV
    # Reads a csv with title, permalink, body, published_at, and filter.
    # It creates a post file for each row in the csv
    def self.process(file = "posts.csv")
      FileUtils.mkdir_p "_posts"
      posts = 0
      FasterCSV.foreach(file) do |row|
        next if row[0] == "title"
        posts += 1
        name = row[3].split(" ")[0]+"-"+row[1]+(row[4] =~ /markdown/ ? ".markdown" : ".textile")
        File.open("_posts/#{name}", "w") do |f|
          f.puts <<-HEADER
 ---
 layout: post
 title: #{row[0]}
 ---
          HEADER
          f.puts row[2]
        end
      end
      "Created #{posts} posts!"
    end
  end
 end
--- a/lib/jekyll/migrators/drupal.rb
+++ b/lib/jekyll/migrators/drupal.rb
@ -1,103 +0,0 @@
 require 'rubygems'
 require 'sequel'
 require 'fileutils'
 require 'yaml'
 # NOTE: This converter requires Sequel and the MySQL gems.
 # The MySQL gem can be difficult to install on OS X. Once you have MySQL
 # installed, running the following commands should work:
 # $ sudo gem install sequel
 # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
 module Jekyll
  module Drupal
    # Reads a MySQL database via Sequel and creates a post file for each post
    # in wp_posts that has post_status = 'publish'. This restriction is made
    # because 'draft' posts are not guaranteed to have valid dates.
    QUERY = "SELECT n.nid, \
                    n.title, \
                    nr.body, \
                    n.created, \
                    n.status \
             FROM node AS n, \
                  node_revisions AS nr \
             WHERE (n.type = 'blog' OR n.type = 'story') \
             AND n.vid = nr.vid"
    def self.process(dbname, user, pass, host = 'localhost', prefix = '')
      db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
      if prefix != ''
        QUERY[" node "] = " " + prefix + "node "
        QUERY[" node_revisions "] = " " + prefix + "node_revisions "
      end
      FileUtils.mkdir_p "_posts"
      FileUtils.mkdir_p "_drafts"
      # Create the refresh layout
      # Change the refresh url if you customized your permalink config
      File.open("_layouts/refresh.html", "w") do |f|
        f.puts <<EOF
 <!DOCTYPE html>
 <html>
 <head>
 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
 <meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
 </head>
 </html>
 EOF
      end
      db[QUERY].each do |post|
        # Get required fields and construct Jekyll compatible name
        node_id = post[:nid]
        title = post[:title]
        content = post[:body]
        created = post[:created]
        time = Time.at(created)
        is_published = post[:status] == 1
        dir = is_published ? "_posts" : "_drafts"
        slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
        name = time.strftime("%Y-%m-%d-") + slug + '.md'
        # Get the relevant fields as a hash, delete empty fields and convert
        # to YAML for the header
        data = {
           'layout' => 'post',
           'title' => title.to_s,
           'created' => created,
         }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
        # Write out the data and content to file
        File.open("#{dir}/#{name}", "w") do |f|
          f.puts data
          f.puts "---"
          f.puts content
        end
        # Make a file to redirect from the old Drupal URL
        if is_published
          aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
          aliases.push(:dst => "node/#{node_id}")
          aliases.each do |url_alias|
            FileUtils.mkdir_p url_alias[:dst]
            File.open("#{url_alias[:dst]}/index.md", "w") do |f|
              f.puts "---"
              f.puts "layout: refresh"
              f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
              f.puts "---"
            end
          end
        end
      end
      # TODO: Make dirs & files for nodes of type 'page'
        # Make refresh pages for these as well
      # TODO: Make refresh dirs & files according to entries in url_alias table
    end
  end
 end
--- a/lib/jekyll/migrators/enki.rb
+++ b/lib/jekyll/migrators/enki.rb
@ -1,49 +0,0 @@
 # Adapted by Rodrigo Pinto <rodrigopqn@gmail.com>
 # Based on typo.rb by Toby DiPasquale
 require 'fileutils'
 require 'rubygems'
 require 'sequel'
 module Jekyll
  module Enki
    SQL = <<-EOS
      SELECT p.id,
             p.title,
             p.slug,
             p.body,
             p.published_at as date,
             p.cached_tag_list as tags
      FROM posts p
    EOS
    # Just working with postgres, but can be easily adapted
    # to work with both mysql and postgres.
    def self.process(dbname, user, pass, host = 'localhost')
      FileUtils.mkdir_p('_posts')
      db = Sequel.postgres(:database => dbname,
                           :user => user,
                           :password => pass,
                           :host => host,
                           :encoding => 'utf8')
      db[SQL].each do |post|
        name = [ sprintf("%.04d", post[:date].year),
                 sprintf("%.02d", post[:date].month),
                 sprintf("%.02d", post[:date].day),
                 post[:slug].strip ].join('-')
        name += '.textile'
        File.open("_posts/#{name}", 'w') do |f|
          f.puts({ 'layout'   => 'post',
                   'title'    => post[:title].to_s,
                   'enki_id'  => post[:id],
                   'categories'  => post[:tags]
                 }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
          f.puts '---'
          f.puts post[:body].delete("\r")
        end
      end
    end
  end
 end
--- a/lib/jekyll/migrators/joomla.rb
+++ b/lib/jekyll/migrators/joomla.rb
@ -1,53 +0,0 @@
 require 'rubygems'
 require 'sequel'
 require 'fileutils'
 require 'yaml'
 # NOTE: This migrator is made for Joomla 1.5 databases.
 # NOTE: This converter requires Sequel and the MySQL gems.
 # The MySQL gem can be difficult to install on OS X. Once you have MySQL
 # installed, running the following commands should work:
 # $ sudo gem install sequel
 # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
 module Jekyll
  module Joomla
    def self.process(dbname, user, pass, host = 'localhost', table_prefix = 'jos_', section = '1')
      db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
      FileUtils.mkdir_p("_posts")
      # Reads a MySQL database via Sequel and creates a post file for each
      # post in wp_posts that has post_status = 'publish'. This restriction is
      # made because 'draft' posts are not guaranteed to have valid dates.
      query = "SELECT `title`, `alias`, CONCAT(`introtext`,`fulltext`) as content, `created`, `id` FROM #{table_prefix}content WHERE state = '0' OR state = '1' AND sectionid = '#{section}'"
      db[query].each do |post|
        # Get required fields and construct Jekyll compatible name.
        title = post[:title]
        slug = post[:alias]
        date = post[:created]
        content = post[:content]
        name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
                                               slug]
        # Get the relevant fields as a hash, delete empty fields and convert
        # to YAML for the header.
        data = {
           'layout' => 'post',
           'title' => title.to_s,
           'joomla_id' => post[:id],
           'joomla_url' => post[:alias],
           'date' => date
         }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
        # Write out the data and content to file
        File.open("_posts/#{name}", "w") do |f|
          f.puts data
          f.puts "---"
          f.puts content
        end
      end
    end
  end
 end
--- a/lib/jekyll/migrators/marley.rb
+++ b/lib/jekyll/migrators/marley.rb
@ -1,52 +0,0 @@
 require 'yaml'
 require 'fileutils'
 module Jekyll
  module Marley
    def self.regexp
      { :id    => /^\d{0,4}-{0,1}(.*)$/,
        :title => /^#\s*(.*)\s+$/,
        :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
        :published_on => /.*\s+\(([0-9\/]+)\)$/,
        :perex => /^([^\#\n]+\n)$/,
        :meta  => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp 
      }
    end
    def self.process(marley_data_dir)
      raise ArgumentError, "marley dir #{marley_data_dir} not found" unless File.directory?(marley_data_dir)
      FileUtils.mkdir_p "_posts"
      posts = 0
      Dir["#{marley_data_dir}/**/*.txt"].each do |f|
        next unless File.exists?(f)
        #copied over from marley's app/lib/post.rb
        file_content  = File.read(f)
        meta_content  = file_content.slice!( self.regexp[:meta] )
        body          = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
        title = file_content.scan( self.regexp[:title] ).first.to_s.strip
        prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
        published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
        meta          = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
        meta['title'] = title
        meta['layout'] = 'post'
        formatted_date = published_on.strftime('%Y-%m-%d')
        post_name =  File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
        name = "#{formatted_date}-#{post_name}" 
        File.open("_posts/#{name}.markdown", "w") do |f|
          f.puts meta.to_yaml
          f.puts "---\n"
          f.puts "\n#{prerex}\n\n" if prerex
          f.puts body
        end
        posts += 1
      end
      "Created #{posts} posts!"
    end
  end
 end
--- a/lib/jekyll/migrators/mephisto.rb
+++ b/lib/jekyll/migrators/mephisto.rb
@ -1,84 +0,0 @@
 # Quickly hacked together my Michael Ivey
 # Based on mt.rb by Nick Gerakines, open source and publically
 # available under the MIT license. Use this module at your own risk.
 require 'rubygems'
 require 'sequel'
 require 'fastercsv'
 require 'fileutils'
 require File.join(File.dirname(__FILE__),"csv.rb")
 # NOTE: This converter requires Sequel and the MySQL gems.
 # The MySQL gem can be difficult to install on OS X. Once you have MySQL
 # installed, running the following commands should work:
 # $ sudo gem install sequel
 # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
 module Jekyll
  module Mephisto
    #Accepts a hash with database config variables, exports mephisto posts into a csv
    #export PGPASSWORD if you must
    def self.postgres(c)
      sql = <<-SQL
      BEGIN;
      CREATE TEMP TABLE jekyll AS
        SELECT title, permalink, body, published_at, filter FROM contents
        WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
      COPY jekyll TO STDOUT WITH CSV HEADER;
      ROLLBACK;
      SQL
      command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
      puts command
      `#{command}`
      CSV.process
    end
    # This query will pull blog posts from all entries across all blogs. If
    # you've got unpublished, deleted or otherwise hidden posts please sift
    # through the created posts to make sure nothing is accidently published.
    QUERY = "SELECT id, \
                    permalink, \
                    body, \
                    published_at, \
                    title \
             FROM contents \
             WHERE user_id = 1 AND \
                   type = 'Article' AND \
                   published_at IS NOT NULL \
             ORDER BY published_at"
    def self.process(dbname, user, pass, host = 'localhost')
      db = Sequel.mysql(dbname, :user => user,
                                :password => pass,
                                :host => host,
                                :encoding => 'utf8')
      FileUtils.mkdir_p "_posts"
      db[QUERY].each do |post|
        title = post[:title]
        slug = post[:permalink]
        date = post[:published_at]
        content = post[:body]
        # Ideally, this script would determine the post format (markdown,
        # html, etc) and create files with proper extensions. At this point
        # it just assumes that markdown will be acceptable.
        name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
        data = {
           'layout' => 'post',
           'title' => title.to_s,
           'mt_id' => post[:entry_id],
         }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
        File.open("_posts/#{name}", "w") do |f|
          f.puts data
          f.puts "---"
          f.puts content
        end
      end
    end
  end
 end
--- a/lib/jekyll/migrators/mt.rb
+++ b/lib/jekyll/migrators/mt.rb
@ -1,86 +0,0 @@
 # Created by Nick Gerakines, open source and publically available under the
 # MIT license. Use this module at your own risk.
 # I'm an Erlang/Perl/C++ guy so please forgive my dirty ruby.
 require 'rubygems'
 require 'sequel'
 require 'fileutils'
 require 'yaml'
 # NOTE: This converter requires Sequel and the MySQL gems.
 # The MySQL gem can be difficult to install on OS X. Once you have MySQL
 # installed, running the following commands should work:
 # $ sudo gem install sequel
 # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
 module Jekyll
  module MT
    # This query will pull blog posts from all entries across all blogs. If
    # you've got unpublished, deleted or otherwise hidden posts please sift
    # through the created posts to make sure nothing is accidently published.
    QUERY = "SELECT entry_id, \
                    entry_basename, \
                    entry_text, \
                    entry_text_more, \
                    entry_authored_on, \
                    entry_title, \
                    entry_convert_breaks \
             FROM mt_entry"
    def self.process(dbname, user, pass, host = 'localhost')
      db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
      FileUtils.mkdir_p "_posts"
      db[QUERY].each do |post|
        title = post[:entry_title]
        slug = post[:entry_basename].gsub(/_/, '-')
        date = post[:entry_authored_on]
        content = post[:entry_text]
        more_content = post[:entry_text_more]
        entry_convert_breaks = post[:entry_convert_breaks]
        # Be sure to include the body and extended body.
        if more_content != nil
          content = content + " \n" + more_content
        end
        # Ideally, this script would determine the post format (markdown,
        # html, etc) and create files with proper extensions. At this point
        # it just assumes that markdown will be acceptable.
        name = [date.year, date.month, date.day, slug].join('-') + '.' +
               self.suffix(entry_convert_breaks)
        data = {
           'layout' => 'post',
           'title' => title.to_s,
           'mt_id' => post[:entry_id],
           'date' => date
         }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
        File.open("_posts/#{name}", "w") do |f|
          f.puts data
          f.puts "---"
          f.puts content
        end
      end
    end
    def self.suffix(entry_type)
      if entry_type.nil? || entry_type.include?("markdown")
        # The markdown plugin I have saves this as
        # "markdown_with_smarty_pants", so I just look for "markdown".
        "markdown"
      elsif entry_type.include?("textile")
        # This is saved as "textile_2" on my installation of MT 5.1.
        "textile"
      elsif entry_type == "0" || entry_type.include?("richtext")
        # Richtext looks to me like it's saved as HTML, so I include it here.
        "html"
      else
        # Other values might need custom work.
        entry_type
      end
    end
  end
 end
--- a/lib/jekyll/migrators/posterous.rb
+++ b/lib/jekyll/migrators/posterous.rb
@ -1,67 +0,0 @@
 require 'rubygems'
 require 'jekyll'
 require 'fileutils'
 require 'net/http'
 require 'uri'
 require "json"
 # ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, api_key, blog)'
 module Jekyll
  module Posterous
    def self.fetch(uri_str, limit = 10)
      # You should choose better exception.
      raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
      response = nil
      Net::HTTP.start('posterous.com') do |http|
        req = Net::HTTP::Get.new(uri_str)
        req.basic_auth @email, @pass
        response = http.request(req)
      end
      case response
        when Net::HTTPSuccess     then response
        when Net::HTTPRedirection then fetch(response['location'], limit - 1)
        else response.error!
      end
    end
    def self.process(email, pass, api_token, blog = 'primary')
      @email, @pass, @api_token = email, pass, api_token
      FileUtils.mkdir_p "_posts"
      posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}").body)
      page = 1
      while posts.any?
        posts.each do |post|
          title = post["title"]
          slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
          date = Date.parse(post["display_date"])
          content = post["body_html"]
          published = !post["is_private"]
          name = "%02d-%02d-%02d-%s.html" % [date.year, date.month, date.day, slug]
          # Get the relevant fields as a hash, delete empty fields and convert
          # to YAML for the header
          data = {
             'layout' => 'post',
             'title' => title.to_s,
             'published' => published
           }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
          # Write out the data and content to file
          File.open("_posts/#{name}", "w") do |f|
            f.puts data
            f.puts "---"
            f.puts content
          end
        end
        page += 1
        posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body)
      end
    end
  end
 end
--- a/lib/jekyll/migrators/rss.rb
+++ b/lib/jekyll/migrators/rss.rb
@ -1,47 +0,0 @@
 # Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22.
 # Use at your own risk. The end.
 #
 # Usage:
 #   (URL)
 #   ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('http://yourdomain.com/your-favorite-feed.xml')"
 #
 #   (Local file)
 #   ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('./somefile/on/your/computer.xml')"
 require 'rubygems'
 require 'rss/1.0'
 require 'rss/2.0'
 require 'open-uri'
 require 'fileutils'
 require 'yaml'
 module Jekyll
  module MigrateRSS
    # The `source` argument may be a URL or a local file.
    def self.process(source)
      content = ""
      open(source) { |s| content = s.read }
      rss = RSS::Parser.parse(content, false)
      raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
      rss.items.each do |item|
        formatted_date = item.date.strftime('%Y-%m-%d')
        post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map { |i| i.downcase if i != '' }.compact.join('-')
        name = "#{formatted_date}-#{post_name}" 
        header = {
          'layout' => 'post',
          'title' => item.title
        }
        File.open("_posts/#{name}.html", "w") do |f|
          f.puts header.to_yaml
          f.puts "---\n"
          f.puts item.description
        end
      end
    end
  end
 end
--- a/lib/jekyll/migrators/textpattern.rb
+++ b/lib/jekyll/migrators/textpattern.rb
@ -1,58 +0,0 @@
 require 'rubygems'
 require 'sequel'
 require 'fileutils'
 require 'yaml'
 # NOTE: This converter requires Sequel and the MySQL gems.
 # The MySQL gem can be difficult to install on OS X. Once you have MySQL
 # installed, running the following commands should work:
 # $ sudo gem install sequel
 # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
 module Jekyll
  module TextPattern
    # Reads a MySQL database via Sequel and creates a post file for each post.
    # The only posts selected are those with a status of 4 or 5, which means
    # "live" and "sticky" respectively.
    # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
    QUERY = "SELECT Title, \
                    url_title, \
                    Posted, \
                    Body, \
                    Keywords \
             FROM textpattern \
             WHERE Status = '4' OR \
                   Status = '5'"
    def self.process(dbname, user, pass, host = 'localhost')
      db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
      FileUtils.mkdir_p "_posts"
      db[QUERY].each do |post|
        # Get required fields and construct Jekyll compatible name.
        title = post[:Title]
        slug = post[:url_title]
        date = post[:Posted]
        content = post[:Body]
        name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
        # Get the relevant fields as a hash, delete empty fields and convert
        # to YAML for the header.
        data = {
           'layout' => 'post',
           'title' => title.to_s,
           'tags' => post[:Keywords].split(',')
         }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
        # Write out the data and content to file.
        File.open("_posts/#{name}", "w") do |f|
          f.puts data
          f.puts "---"
          f.puts content
        end
      end
    end
  end
 end
--- a/lib/jekyll/migrators/tumblr.rb
+++ b/lib/jekyll/migrators/tumblr.rb
@ -1,195 +0,0 @@
 require 'rubygems'
 require 'open-uri'
 require 'fileutils'
 require 'nokogiri'
 require 'date'
 require 'json'
 require 'uri'
 require 'jekyll'
 module Jekyll
  module Tumblr
    def self.process(url, format = "html", grab_images = false,
                     add_highlights = false, rewrite_urls = true)
      @grab_images = grab_images
      FileUtils.mkdir_p "_posts/tumblr"
      url += "/api/read/json/"
      per_page = 50
      posts = []
      # Two passes are required so that we can rewrite URLs.
      # First pass builds up an array of each post as a hash.
      begin
        current_page = (current_page || -1) + 1
        feed = open(url + "?num=#{per_page}&start=#{current_page * per_page}")
        json = feed.readlines.join("\n")[21...-2]  # Strip Tumblr's JSONP chars.
        blog = JSON.parse(json)
        puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
        posts += blog["posts"].map { |post| post_to_hash(post, format) }
      end until blog["posts"].size < per_page
      # Rewrite URLs and create redirects.
      posts = rewrite_urls_and_redirects posts if rewrite_urls
      # Second pass for writing post files.
      posts.each do |post|
        if format == "md"
          post[:content] = html_to_markdown post[:content]
          post[:content] = add_syntax_highlights post[:content] if add_highlights
        end
        File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
          f.puts post[:header].to_yaml + "---\n" + post[:content]
        end
      end
    end
    private
    # Converts each type of Tumblr post to a hash with all required
    # data for Jekyll.
    def self.post_to_hash(post, format)
      case post['type']
        when "regular"
          title = post["regular-title"]
          content = post["regular-body"]
        when "link"
          title = post["link-text"] || post["link-url"]
          content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
          unless post["link-description"].nil?
            content << "<br/>" + post["link-description"]
          end
        when "photo"
          title = post["photo-caption"]
          max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max
          url = post["photo-url"] || post["photo-url-#{max_size}"]
          ext = "." + post[post.keys.select { |k|
            k =~ /^photo-url-/ && post[k].split("/").last =~ /\./
          }.first].split(".").last
          content = "<img src=\"#{save_file(url, ext)}\"/>"
          unless post["photo-link-url"].nil?
            content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
          end
        when "audio"
          if !post["id3-title"].nil?
            title = post["id3-title"]
            content = post.at["audio-player"] + "<br/>" + post["audio-caption"]
          else
            title = post["audio-caption"]
            content = post.at["audio-player"]
          end
        when "quote"
          title = post["quote-text"]
          content = "<blockquote>#{post["quote-text"]}</blockquote>"
          unless post["quote-source"].nil?
            content << "&#8212;" + post["quote-source"]
          end
        when "conversation"
          title = post["conversation-title"]
          content = "<section><dialog>"
          post["conversation"]["line"].each do |line|
            content << "<dt>#{line['label']}</dt><dd>#{line}</dd>"
          end
          content << "</section></dialog>"
        when "video"
          title = post["video-title"]
          content = post["video-player"]
          unless post["video-caption"].nil?
            content << "<br/>" + post["video-caption"]
          end
      end
      date = Date.parse(post['date']).to_s
      title = Nokogiri::HTML(title).text
      slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
      {
        :name => "#{date}-#{slug}.#{format}",
        :header => {
          "layout" => "post",
          "title" => title,
          "tags" => post["tags"],
        },
        :content => content,
        :url => post["url"],
        :slug => post["url-with-slug"],
      }
    end
    # Create a Hash of old urls => new urls, for rewriting and
    # redirects, and replace urls in each post. Instantiate Jekyll
    # site/posts to get the correct permalink format.
    def self.rewrite_urls_and_redirects(posts)
      site = Jekyll::Site.new(Jekyll.configuration({}))
      dir = File.join(File.dirname(__FILE__), "..")
      urls = Hash[posts.map { |post|
        # Create an initial empty file for the post so that
        # we can instantiate a post object.
        File.open("_posts/tumblr/#{post[:name]}", "w")
        tumblr_url = URI.parse(post[:slug]).path
        jekyll_url = Jekyll::Post.new(site, dir, "", "tumblr/" + post[:name]).url
        redirect_dir = tumblr_url.sub(/\//, "") + "/"
        FileUtils.mkdir_p redirect_dir
        File.open(redirect_dir + "index.html", "w") do |f|
          f.puts "<html><head><meta http-equiv='Refresh' content='0; " +
                 "url=#{jekyll_url}'></head><body></body></html>"
        end
        [tumblr_url, jekyll_url]
      }]
      posts.map { |post|
        urls.each do |tumblr_url, jekyll_url|
          post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
        end
        post
      }
    end
    # Uses Python's html2text to convert a post's content to
    # markdown. Preserve HTML tables as per the markdown docs.
    def self.html_to_markdown(content)
      preserve = ["table", "tr", "th", "td"]
      preserve.each do |tag|
        content.gsub!(/<#{tag}/i, "$$" + tag)
        content.gsub!(/<\/#{tag}/i, "||" + tag)
      end
      content = %x[echo '#{content.gsub("'", "''")}' | html2text]
      preserve.each do |tag|
        content.gsub!("$$" + tag, "<" + tag)
        content.gsub!("||" + tag, "</" + tag)
      end
      content
    end
    # Adds pygments highlight tags to code blocks in posts that use
    # markdown format. This doesn't guess the language of the code
    # block, so you should modify this to suit your own content.
    # For example, my code block only contain Python and JavaScript,
    # so I can assume the block is JavaScript if it contains a
    # semi-colon.
    def self.add_syntax_highlights(content)
      lines = content.split("\n")
      block, indent, lang, start = false, /^    /, nil, nil
      lines.each_with_index do |line, i|
        if !block && line =~ indent
          block = true
          lang = "python"
          start = i
        elsif block
          lang = "javascript" if line =~ /;$/
          block = line =~ indent && i < lines.size - 1 # Also handle EOF
          if !block
            lines[start] = "{% highlight #{lang} %}"
            lines[i - 1] = "{% endhighlight %}"
          end
          lines[i] = lines[i].sub(indent, "")
        end
      end
      lines.join("\n")
    end
    def self.save_file(url, ext)
      if @grab_images
        path = "tumblr_files/#{url.split('/').last}"
        path += ext unless path =~ /#{ext}$/
        FileUtils.mkdir_p "tumblr_files"
        File.open(path, "w") { |f| f.write(open(url).read) }
        url = "/" + path
      end
      url
    end
  end
 end
--- a/lib/jekyll/migrators/typo.rb
+++ b/lib/jekyll/migrators/typo.rb
@ -1,51 +0,0 @@
 # Author: Toby DiPasquale <toby@cbcg.net>
 require 'fileutils'
 require 'rubygems'
 require 'sequel'
 require 'yaml'
 module Jekyll
  module Typo
    # This SQL *should* work for both MySQL and PostgreSQL, but I haven't
    # tested PostgreSQL yet (as of 2008-12-16).
    SQL = <<-EOS
    SELECT c.id id,
           c.title title,
           c.permalink slug,
           c.body body,
           c.published_at date,
           c.state state,
           COALESCE(tf.name, 'html') filter
      FROM contents c
           LEFT OUTER JOIN text_filters tf
                        ON c.text_filter_id = tf.id
    EOS
    def self.process dbname, user, pass, host='localhost'
      FileUtils.mkdir_p '_posts'
      db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
      db[SQL].each do |post|
        next unless post[:state] =~ /published/
        name = [ sprintf("%.04d", post[:date].year),
                 sprintf("%.02d", post[:date].month),
                 sprintf("%.02d", post[:date].day),
                 post[:slug].strip ].join('-')
        # Can have more than one text filter in this field, but we just want
        # the first one for this.
        name += '.' + post[:filter].split(' ')[0]
        File.open("_posts/#{name}", 'w') do |f|
          f.puts({ 'layout'   => 'post',
                   'title'    => post[:title].to_s,
                   'typo_id'  => post[:id]
                 }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
          f.puts '---'
          f.puts post[:body].delete("\r")
        end
      end
    end
  end
 end
--- a/lib/jekyll/migrators/wordpress.rb
+++ b/lib/jekyll/migrators/wordpress.rb
@ -1,294 +0,0 @@
 require 'rubygems'
 require 'sequel'
 require 'fileutils'
 require 'yaml'
 # NOTE: This converter requires Sequel and the MySQL gems.
 # The MySQL gem can be difficult to install on OS X. Once you have MySQL
 # installed, running the following commands should work:
 # $ sudo gem install sequel
 # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
 module Jekyll
  module WordPress
    # Main migrator function. Call this to perform the migration.
    # 
    # dbname::  The name of the database
    # user::    The database user name
    # pass::    The database user's password
    # host::    The address of the MySQL database host. Default: 'localhost'
    # options:: A hash table of configuration options.
    # 
    # Supported options are:
    # 
    # :table_prefix::   Prefix of database tables used by WordPress.
    #                   Default: 'wp_'
    # :clean_entities:: If true, convert non-ASCII characters to HTML
    #                   entities in the posts, comments, titles, and
    #                   names. Requires the 'htmlentities' gem to
    #                   work. Default: true.
    # :comments::       If true, migrate post comments too. Comments
    #                   are saved in the post's YAML front matter.
    #                   Default: true.
    # :categories::     If true, save the post's categories in its
    #                   YAML front matter.
    # :tags::           If true, save the post's tags in its
    #                   YAML front matter.
    # :more_excerpt::   If true, when a post has no excerpt but
    #                   does have a <!-- more --> tag, use the
    #                   preceding post content as the excerpt.
    #                   Default: true.
    # :more_anchor::    If true, convert a <!-- more --> tag into
    #                   two HTML anchors with ids "more" and
    #                   "more-NNN" (where NNN is the post number).
    #                   Default: true.
    # :status::         Array of allowed post statuses. Only
    #                   posts with matching status will be migrated.
    #                   Known statuses are :publish, :draft, :private,
    #                   and :revision. If this is nil or an empty
    #                   array, all posts are migrated regardless of
    #                   status. Default: [:publish].
    # 
    def self.process(dbname, user, pass, host='localhost', options={})
      options = {
        :table_prefix   => 'wp_',
        :clean_entities => true,
        :comments       => true,
        :categories     => true,
        :tags           => true,
        :more_excerpt   => true,
        :more_anchor    => true,
        :status         => [:publish] # :draft, :private, :revision
      }.merge(options)
      if options[:clean_entities]
        begin
          require 'htmlentities'
        rescue LoadError
          STDERR.puts "Could not require 'htmlentities', so the " +
                      ":clean_entities option is now disabled."
          options[:clean_entities] = false
        end
      end
      FileUtils.mkdir_p("_posts")
      db = Sequel.mysql(dbname, :user => user, :password => pass,
                        :host => host, :encoding => 'utf8')
      px = options[:table_prefix]
      posts_query = "
         SELECT
           posts.ID            AS `id`,
           posts.guid          AS `guid`,
           posts.post_type     AS `type`,
           posts.post_status   AS `status`,
           posts.post_title    AS `title`,
           posts.post_name     AS `slug`,
           posts.post_date     AS `date`,
           posts.post_content  AS `content`,
           posts.post_excerpt  AS `excerpt`,
           posts.comment_count AS `comment_count`,
           users.display_name  AS `author`,
           users.user_login    AS `author_login`,
           users.user_email    AS `author_email`,
           users.user_url      AS `author_url`
         FROM #{px}posts AS `posts`
           LEFT JOIN #{px}users AS `users`
             ON posts.post_author = users.ID"
      if options[:status] and not options[:status].empty?
        status = options[:status][0]
        posts_query << "
         WHERE posts.post_status = '#{status.to_s}'"
        options[:status][1..-1].each do |status|
          posts_query << " OR
           posts.post_status = '#{status.to_s}'"
        end
      end
      db[posts_query].each do |post|
        process_post(post, db, options)
      end
    end
    def self.process_post(post, db, options)
      px = options[:table_prefix]
      title = post[:title]
      if options[:clean_entities]
        title = clean_entities(title)
      end
      slug = post[:slug]
      if !slug or slug.empty?
        slug = sluggify(title)
      end
      date = post[:date] || Time.now
      name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
                                             date.day, slug]
      content = post[:content].to_s
      if options[:clean_entities]
        content = clean_entities(content)
      end
      excerpt = post[:excerpt].to_s
      more_index = content.index(/<!-- *more *-->/)
      more_anchor = nil
      if more_index
        if options[:more_excerpt] and
            (post[:excerpt].nil? or post[:excerpt].empty?)
          excerpt = content[0...more_index]
        end
        if options[:more_anchor]
          more_link = "more"
          content.sub!(/<!-- *more *-->/,
                       "<a id=\"more\"></a>" + 
                       "<a id=\"more-#{post[:id]}\"></a>")
        end
      end
      categories = []
      tags = []
      if options[:categories] or options[:tags]
        cquery =
          "SELECT
             terms.name AS `name`,
             ttax.taxonomy AS `type`
           FROM
             #{px}terms AS `terms`,
             #{px}term_relationships AS `trels`,
             #{px}term_taxonomy AS `ttax`
           WHERE
             trels.object_id = '#{post[:id]}' AND
             trels.term_taxonomy_id = ttax.term_taxonomy_id AND
             terms.term_id = ttax.term_id"
        db[cquery].each do |term|
          if options[:categories] and term[:type] == "category"
            if options[:clean_entities]
              categories << clean_entities(term[:name])
            else
              categories << term[:name]
            end
          elsif options[:tags] and term[:type] == "post_tag"
            if options[:clean_entities]
              tags << clean_entities(term[:name])
            else
              tags << term[:name]
            end
          end
        end
      end
      comments = []
      if options[:comments] and post[:comment_count].to_i > 0
        cquery =
          "SELECT
             comment_ID           AS `id`,
             comment_author       AS `author`,
             comment_author_email AS `author_email`,
             comment_author_url   AS `author_url`,
             comment_date         AS `date`,
             comment_date_gmt     AS `date_gmt`,
             comment_content      AS `content`
           FROM #{px}comments
           WHERE
             comment_post_ID = '#{post[:id]}' AND
             comment_approved != 'spam'"
        db[cquery].each do |comment|
          comcontent = comment[:content].to_s
          if comcontent.respond_to?(:force_encoding)
            comcontent.force_encoding("UTF-8")
          end
          if options[:clean_entities]
            comcontent = clean_entities(comcontent)
          end
          comauthor = comment[:author].to_s
          if options[:clean_entities]
            comauthor = clean_entities(comauthor)
          end
          comments << {
            'id'           => comment[:id].to_i,
            'author'       => comauthor,
            'author_email' => comment[:author_email].to_s,
            'author_url'   => comment[:author_url].to_s,
            'date'         => comment[:date].to_s,
            'date_gmt'     => comment[:date_gmt].to_s,
            'content'      => comcontent,
          }
        end
        comments.sort!{ |a,b| a['id'] <=> b['id'] }
      end
      # Get the relevant fields as a hash, delete empty fields and
      # convert to YAML for the header.
      data = {
        'layout'        => post[:type].to_s,
        'status'        => post[:status].to_s,
        'published'     => (post[:status].to_s == "publish"),
        'title'         => title.to_s,
        'author'        => post[:author].to_s,
        'author_login'  => post[:author_login].to_s,
        'author_email'  => post[:author_email].to_s,
        'author_url'    => post[:author_url].to_s,
        'excerpt'       => excerpt,
        'more_anchor'   => more_anchor,
        'wordpress_id'  => post[:id],
        'wordpress_url' => post[:guid].to_s,
        'date'          => date,
        'categories'    => options[:categories] ? categories : nil,
        'tags'          => options[:tags] ? tags : nil,
        'comments'      => options[:comments] ? comments : nil,
      }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
      # Write out the data and content to file
      File.open("_posts/#{name}", "w") do |f|
        f.puts data
        f.puts "---"
        f.puts content
      end
    end
    def self.clean_entities( text )
      if text.respond_to?(:force_encoding)
        text.force_encoding("UTF-8")
      end
      text = HTMLEntities.new.encode(text, :named)
      # We don't want to convert these, it would break all
      # HTML tags in the post and comments.
      text.gsub!("&amp;", "&")
      text.gsub!("&lt;", "<")
      text.gsub!("&gt;", ">")
      text.gsub!("&quot;", '"')
      text.gsub!("&apos;", "'")
      text
    end
    def self.sluggify( title )
      begin
        require 'unidecode'
        title = title.to_ascii
      rescue LoadError
        STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
      end
      title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
    end
  end
 end
--- a/lib/jekyll/migrators/wordpressdotcom.rb
+++ b/lib/jekyll/migrators/wordpressdotcom.rb
@ -1,70 +0,0 @@
 # coding: utf-8
 require 'rubygems'
 require 'hpricot'
 require 'fileutils'
 require 'yaml'
 require 'time'
 module Jekyll
  # This importer takes a wordpress.xml file, which can be exported from your
  # wordpress.com blog (/wp-admin/export.php).
  module WordpressDotCom
    def self.process(filename = "wordpress.xml")
      import_count = Hash.new(0)
      doc = Hpricot::XML(File.read(filename))
      (doc/:channel/:item).each do |item|
        title = item.at(:title).inner_text.strip
        permalink_title = item.at('wp:post_name').inner_text
        # Fallback to "prettified" title if post_name is empty (can happen)
        if permalink_title == ""
          permalink_title = title.downcase.split.join('-')
        end
        date = Time.parse(item.at('wp:post_date').inner_text)
        status = item.at('wp:status').inner_text
        if status == "publish" 
          published = true
        else
          published = false
        end
        type = item.at('wp:post_type').inner_text
        tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
        metas = Hash.new
        item.search("wp:postmeta").each do |meta|
          key = meta.at('wp:meta_key').inner_text
          value = meta.at('wp:meta_value').inner_text
          metas[key] = value;
        end
        name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html"
        header = {
          'layout' => type,
          'title'  => title,
          'tags'   => tags,
          'status'   => status,
          'type'   => type,
          'published' => published,
          'meta'   => metas
        }
        FileUtils.mkdir_p "_#{type}s"
        File.open("_#{type}s/#{name}", "w") do |f|
          f.puts header.to_yaml
          f.puts '---'
          f.puts item.at('content:encoded').inner_text
        end
        import_count[type] += 1
      end
      import_count.each do |key, value|
        puts "Imported #{value} #{key}s"
      end
    end
  end
 end