From b9da30bc8ffa4a2d0a9280e755d0904609d4fc97 Mon Sep 17 00:00:00 2001 From: Tom Bell Date: Sat, 22 Dec 2012 17:49:33 +0000 Subject: [PATCH] Remove migrators --- Rakefile | 15 -- lib/jekyll/migrators/csv.rb | 26 --- lib/jekyll/migrators/drupal.rb | 103 --------- lib/jekyll/migrators/enki.rb | 49 ---- lib/jekyll/migrators/joomla.rb | 53 ----- lib/jekyll/migrators/marley.rb | 52 ----- lib/jekyll/migrators/mephisto.rb | 84 ------- lib/jekyll/migrators/mt.rb | 86 ------- lib/jekyll/migrators/posterous.rb | 67 ------ lib/jekyll/migrators/rss.rb | 47 ---- lib/jekyll/migrators/textpattern.rb | 58 ----- lib/jekyll/migrators/tumblr.rb | 195 ---------------- lib/jekyll/migrators/typo.rb | 51 ---- lib/jekyll/migrators/wordpress.rb | 294 ------------------------ lib/jekyll/migrators/wordpressdotcom.rb | 70 ------ 15 files changed, 1250 deletions(-) delete mode 100644 lib/jekyll/migrators/csv.rb delete mode 100644 lib/jekyll/migrators/drupal.rb delete mode 100644 lib/jekyll/migrators/enki.rb delete mode 100644 lib/jekyll/migrators/joomla.rb delete mode 100644 lib/jekyll/migrators/marley.rb delete mode 100644 lib/jekyll/migrators/mephisto.rb delete mode 100644 lib/jekyll/migrators/mt.rb delete mode 100644 lib/jekyll/migrators/posterous.rb delete mode 100644 lib/jekyll/migrators/rss.rb delete mode 100644 lib/jekyll/migrators/textpattern.rb delete mode 100644 lib/jekyll/migrators/tumblr.rb delete mode 100644 lib/jekyll/migrators/typo.rb delete mode 100644 lib/jekyll/migrators/wordpress.rb delete mode 100644 lib/jekyll/migrators/wordpressdotcom.rb diff --git a/Rakefile b/Rakefile index 8eb1360d..a267c50d 100644 --- a/Rakefile +++ b/Rakefile @@ -82,21 +82,6 @@ end # ############################################################################# -namespace :migrate do - desc "Migrate from mephisto in the current directory" - task :mephisto do - sh %q(ruby -r './lib/jekyll/migrators/mephisto' -e 'Jekyll::Mephisto.postgres(:database => "#{ENV["DB"]}")') - end - desc "Migrate from Movable Type in the current directory" - task :mt do - sh %q(ruby -r './lib/jekyll/migrators/mt' -e 'Jekyll::MT.process("#{ENV["DB"]}", "#{ENV["USER"]}", "#{ENV["PASS"]}")') - end - desc "Migrate from Typo in the current directory" - task :typo do - sh %q(ruby -r './lib/jekyll/migrators/typo' -e 'Jekyll::Typo.process("#{ENV["DB"]}", "#{ENV["USER"]}", "#{ENV["PASS"]}")') - end -end - begin require 'cucumber/rake/task' Cucumber::Rake::Task.new(:features) do |t| diff --git a/lib/jekyll/migrators/csv.rb b/lib/jekyll/migrators/csv.rb deleted file mode 100644 index ce5203b7..00000000 --- a/lib/jekyll/migrators/csv.rb +++ /dev/null @@ -1,26 +0,0 @@ -module Jekyll - module CSV - # Reads a csv with title, permalink, body, published_at, and filter. - # It creates a post file for each row in the csv - def self.process(file = "posts.csv") - FileUtils.mkdir_p "_posts" - posts = 0 - FasterCSV.foreach(file) do |row| - next if row[0] == "title" - posts += 1 - name = row[3].split(" ")[0]+"-"+row[1]+(row[4] =~ /markdown/ ? ".markdown" : ".textile") - File.open("_posts/#{name}", "w") do |f| - f.puts <<-HEADER ---- -layout: post -title: #{row[0]} ---- - - HEADER - f.puts row[2] - end - end - "Created #{posts} posts!" - end - end -end diff --git a/lib/jekyll/migrators/drupal.rb b/lib/jekyll/migrators/drupal.rb deleted file mode 100644 index 7fd16aef..00000000 --- a/lib/jekyll/migrators/drupal.rb +++ /dev/null @@ -1,103 +0,0 @@ -require 'rubygems' -require 'sequel' -require 'fileutils' -require 'yaml' - -# NOTE: This converter requires Sequel and the MySQL gems. -# The MySQL gem can be difficult to install on OS X. Once you have MySQL -# installed, running the following commands should work: -# $ sudo gem install sequel -# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config - -module Jekyll - module Drupal - # Reads a MySQL database via Sequel and creates a post file for each post - # in wp_posts that has post_status = 'publish'. This restriction is made - # because 'draft' posts are not guaranteed to have valid dates. - QUERY = "SELECT n.nid, \ - n.title, \ - nr.body, \ - n.created, \ - n.status \ - FROM node AS n, \ - node_revisions AS nr \ - WHERE (n.type = 'blog' OR n.type = 'story') \ - AND n.vid = nr.vid" - - def self.process(dbname, user, pass, host = 'localhost', prefix = '') - db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') - - if prefix != '' - QUERY[" node "] = " " + prefix + "node " - QUERY[" node_revisions "] = " " + prefix + "node_revisions " - end - - FileUtils.mkdir_p "_posts" - FileUtils.mkdir_p "_drafts" - - # Create the refresh layout - # Change the refresh url if you customized your permalink config - File.open("_layouts/refresh.html", "w") do |f| - f.puts < - - - - - - -EOF - end - - db[QUERY].each do |post| - # Get required fields and construct Jekyll compatible name - node_id = post[:nid] - title = post[:title] - content = post[:body] - created = post[:created] - time = Time.at(created) - is_published = post[:status] == 1 - dir = is_published ? "_posts" : "_drafts" - slug = title.strip.downcase.gsub(/(&|&)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '') - name = time.strftime("%Y-%m-%d-") + slug + '.md' - - # Get the relevant fields as a hash, delete empty fields and convert - # to YAML for the header - data = { - 'layout' => 'post', - 'title' => title.to_s, - 'created' => created, - }.delete_if { |k,v| v.nil? || v == ''}.to_yaml - - # Write out the data and content to file - File.open("#{dir}/#{name}", "w") do |f| - f.puts data - f.puts "---" - f.puts content - end - - # Make a file to redirect from the old Drupal URL - if is_published - aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all - - aliases.push(:dst => "node/#{node_id}") - - aliases.each do |url_alias| - FileUtils.mkdir_p url_alias[:dst] - File.open("#{url_alias[:dst]}/index.md", "w") do |f| - f.puts "---" - f.puts "layout: refresh" - f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}" - f.puts "---" - end - end - end - end - - # TODO: Make dirs & files for nodes of type 'page' - # Make refresh pages for these as well - - # TODO: Make refresh dirs & files according to entries in url_alias table - end - end -end diff --git a/lib/jekyll/migrators/enki.rb b/lib/jekyll/migrators/enki.rb deleted file mode 100644 index 61cb2562..00000000 --- a/lib/jekyll/migrators/enki.rb +++ /dev/null @@ -1,49 +0,0 @@ -# Adapted by Rodrigo Pinto -# Based on typo.rb by Toby DiPasquale - -require 'fileutils' -require 'rubygems' -require 'sequel' - -module Jekyll - module Enki - SQL = <<-EOS - SELECT p.id, - p.title, - p.slug, - p.body, - p.published_at as date, - p.cached_tag_list as tags - FROM posts p - EOS - - # Just working with postgres, but can be easily adapted - # to work with both mysql and postgres. - def self.process(dbname, user, pass, host = 'localhost') - FileUtils.mkdir_p('_posts') - db = Sequel.postgres(:database => dbname, - :user => user, - :password => pass, - :host => host, - :encoding => 'utf8') - - db[SQL].each do |post| - name = [ sprintf("%.04d", post[:date].year), - sprintf("%.02d", post[:date].month), - sprintf("%.02d", post[:date].day), - post[:slug].strip ].join('-') - name += '.textile' - - File.open("_posts/#{name}", 'w') do |f| - f.puts({ 'layout' => 'post', - 'title' => post[:title].to_s, - 'enki_id' => post[:id], - 'categories' => post[:tags] - }.delete_if { |k, v| v.nil? || v == '' }.to_yaml) - f.puts '---' - f.puts post[:body].delete("\r") - end - end - end - end -end diff --git a/lib/jekyll/migrators/joomla.rb b/lib/jekyll/migrators/joomla.rb deleted file mode 100644 index 87f1e105..00000000 --- a/lib/jekyll/migrators/joomla.rb +++ /dev/null @@ -1,53 +0,0 @@ -require 'rubygems' -require 'sequel' -require 'fileutils' -require 'yaml' - -# NOTE: This migrator is made for Joomla 1.5 databases. -# NOTE: This converter requires Sequel and the MySQL gems. -# The MySQL gem can be difficult to install on OS X. Once you have MySQL -# installed, running the following commands should work: -# $ sudo gem install sequel -# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config - -module Jekyll - module Joomla - def self.process(dbname, user, pass, host = 'localhost', table_prefix = 'jos_', section = '1') - db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') - - FileUtils.mkdir_p("_posts") - - # Reads a MySQL database via Sequel and creates a post file for each - # post in wp_posts that has post_status = 'publish'. This restriction is - # made because 'draft' posts are not guaranteed to have valid dates. - query = "SELECT `title`, `alias`, CONCAT(`introtext`,`fulltext`) as content, `created`, `id` FROM #{table_prefix}content WHERE state = '0' OR state = '1' AND sectionid = '#{section}'" - - db[query].each do |post| - # Get required fields and construct Jekyll compatible name. - title = post[:title] - slug = post[:alias] - date = post[:created] - content = post[:content] - name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day, - slug] - - # Get the relevant fields as a hash, delete empty fields and convert - # to YAML for the header. - data = { - 'layout' => 'post', - 'title' => title.to_s, - 'joomla_id' => post[:id], - 'joomla_url' => post[:alias], - 'date' => date - }.delete_if { |k,v| v.nil? || v == '' }.to_yaml - - # Write out the data and content to file - File.open("_posts/#{name}", "w") do |f| - f.puts data - f.puts "---" - f.puts content - end - end - end - end -end diff --git a/lib/jekyll/migrators/marley.rb b/lib/jekyll/migrators/marley.rb deleted file mode 100644 index 21bcead5..00000000 --- a/lib/jekyll/migrators/marley.rb +++ /dev/null @@ -1,52 +0,0 @@ -require 'yaml' -require 'fileutils' - -module Jekyll - module Marley - def self.regexp - { :id => /^\d{0,4}-{0,1}(.*)$/, - :title => /^#\s*(.*)\s+$/, - :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/, - :published_on => /.*\s+\(([0-9\/]+)\)$/, - :perex => /^([^\#\n]+\n)$/, - :meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp - } - end - - def self.process(marley_data_dir) - raise ArgumentError, "marley dir #{marley_data_dir} not found" unless File.directory?(marley_data_dir) - - FileUtils.mkdir_p "_posts" - - posts = 0 - Dir["#{marley_data_dir}/**/*.txt"].each do |f| - next unless File.exists?(f) - - #copied over from marley's app/lib/post.rb - file_content = File.read(f) - meta_content = file_content.slice!( self.regexp[:meta] ) - body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip - - title = file_content.scan( self.regexp[:title] ).first.to_s.strip - prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip - published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) ) - meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {} - meta['title'] = title - meta['layout'] = 'post' - - formatted_date = published_on.strftime('%Y-%m-%d') - post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '') - - name = "#{formatted_date}-#{post_name}" - File.open("_posts/#{name}.markdown", "w") do |f| - f.puts meta.to_yaml - f.puts "---\n" - f.puts "\n#{prerex}\n\n" if prerex - f.puts body - end - posts += 1 - end - "Created #{posts} posts!" - end - end -end diff --git a/lib/jekyll/migrators/mephisto.rb b/lib/jekyll/migrators/mephisto.rb deleted file mode 100644 index 7622c722..00000000 --- a/lib/jekyll/migrators/mephisto.rb +++ /dev/null @@ -1,84 +0,0 @@ -# Quickly hacked together my Michael Ivey -# Based on mt.rb by Nick Gerakines, open source and publically -# available under the MIT license. Use this module at your own risk. - -require 'rubygems' -require 'sequel' -require 'fastercsv' -require 'fileutils' -require File.join(File.dirname(__FILE__),"csv.rb") - -# NOTE: This converter requires Sequel and the MySQL gems. -# The MySQL gem can be difficult to install on OS X. Once you have MySQL -# installed, running the following commands should work: -# $ sudo gem install sequel -# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config - -module Jekyll - module Mephisto - #Accepts a hash with database config variables, exports mephisto posts into a csv - #export PGPASSWORD if you must - def self.postgres(c) - sql = <<-SQL - BEGIN; - CREATE TEMP TABLE jekyll AS - SELECT title, permalink, body, published_at, filter FROM contents - WHERE user_id = 1 AND type = 'Article' ORDER BY published_at; - COPY jekyll TO STDOUT WITH CSV HEADER; - ROLLBACK; - SQL - command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"}) - puts command - `#{command}` - CSV.process - end - - # This query will pull blog posts from all entries across all blogs. If - # you've got unpublished, deleted or otherwise hidden posts please sift - # through the created posts to make sure nothing is accidently published. - QUERY = "SELECT id, \ - permalink, \ - body, \ - published_at, \ - title \ - FROM contents \ - WHERE user_id = 1 AND \ - type = 'Article' AND \ - published_at IS NOT NULL \ - ORDER BY published_at" - - def self.process(dbname, user, pass, host = 'localhost') - db = Sequel.mysql(dbname, :user => user, - :password => pass, - :host => host, - :encoding => 'utf8') - - FileUtils.mkdir_p "_posts" - - db[QUERY].each do |post| - title = post[:title] - slug = post[:permalink] - date = post[:published_at] - content = post[:body] - - # Ideally, this script would determine the post format (markdown, - # html, etc) and create files with proper extensions. At this point - # it just assumes that markdown will be acceptable. - name = [date.year, date.month, date.day, slug].join('-') + ".markdown" - - data = { - 'layout' => 'post', - 'title' => title.to_s, - 'mt_id' => post[:entry_id], - }.delete_if { |k,v| v.nil? || v == ''}.to_yaml - - File.open("_posts/#{name}", "w") do |f| - f.puts data - f.puts "---" - f.puts content - end - end - - end - end -end diff --git a/lib/jekyll/migrators/mt.rb b/lib/jekyll/migrators/mt.rb deleted file mode 100644 index 048c84db..00000000 --- a/lib/jekyll/migrators/mt.rb +++ /dev/null @@ -1,86 +0,0 @@ -# Created by Nick Gerakines, open source and publically available under the -# MIT license. Use this module at your own risk. -# I'm an Erlang/Perl/C++ guy so please forgive my dirty ruby. - -require 'rubygems' -require 'sequel' -require 'fileutils' -require 'yaml' - -# NOTE: This converter requires Sequel and the MySQL gems. -# The MySQL gem can be difficult to install on OS X. Once you have MySQL -# installed, running the following commands should work: -# $ sudo gem install sequel -# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config - -module Jekyll - module MT - # This query will pull blog posts from all entries across all blogs. If - # you've got unpublished, deleted or otherwise hidden posts please sift - # through the created posts to make sure nothing is accidently published. - QUERY = "SELECT entry_id, \ - entry_basename, \ - entry_text, \ - entry_text_more, \ - entry_authored_on, \ - entry_title, \ - entry_convert_breaks \ - FROM mt_entry" - - def self.process(dbname, user, pass, host = 'localhost') - db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') - - FileUtils.mkdir_p "_posts" - - db[QUERY].each do |post| - title = post[:entry_title] - slug = post[:entry_basename].gsub(/_/, '-') - date = post[:entry_authored_on] - content = post[:entry_text] - more_content = post[:entry_text_more] - entry_convert_breaks = post[:entry_convert_breaks] - - # Be sure to include the body and extended body. - if more_content != nil - content = content + " \n" + more_content - end - - # Ideally, this script would determine the post format (markdown, - # html, etc) and create files with proper extensions. At this point - # it just assumes that markdown will be acceptable. - name = [date.year, date.month, date.day, slug].join('-') + '.' + - self.suffix(entry_convert_breaks) - - data = { - 'layout' => 'post', - 'title' => title.to_s, - 'mt_id' => post[:entry_id], - 'date' => date - }.delete_if { |k,v| v.nil? || v == '' }.to_yaml - - File.open("_posts/#{name}", "w") do |f| - f.puts data - f.puts "---" - f.puts content - end - end - end - - def self.suffix(entry_type) - if entry_type.nil? || entry_type.include?("markdown") - # The markdown plugin I have saves this as - # "markdown_with_smarty_pants", so I just look for "markdown". - "markdown" - elsif entry_type.include?("textile") - # This is saved as "textile_2" on my installation of MT 5.1. - "textile" - elsif entry_type == "0" || entry_type.include?("richtext") - # Richtext looks to me like it's saved as HTML, so I include it here. - "html" - else - # Other values might need custom work. - entry_type - end - end - end -end diff --git a/lib/jekyll/migrators/posterous.rb b/lib/jekyll/migrators/posterous.rb deleted file mode 100644 index 0a2280f2..00000000 --- a/lib/jekyll/migrators/posterous.rb +++ /dev/null @@ -1,67 +0,0 @@ -require 'rubygems' -require 'jekyll' -require 'fileutils' -require 'net/http' -require 'uri' -require "json" - -# ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, api_key, blog)' - -module Jekyll - module Posterous - def self.fetch(uri_str, limit = 10) - # You should choose better exception. - raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0 - - response = nil - Net::HTTP.start('posterous.com') do |http| - req = Net::HTTP::Get.new(uri_str) - req.basic_auth @email, @pass - response = http.request(req) - end - - case response - when Net::HTTPSuccess then response - when Net::HTTPRedirection then fetch(response['location'], limit - 1) - else response.error! - end - end - - def self.process(email, pass, api_token, blog = 'primary') - @email, @pass, @api_token = email, pass, api_token - FileUtils.mkdir_p "_posts" - - posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}").body) - page = 1 - - while posts.any? - posts.each do |post| - title = post["title"] - slug = title.gsub(/[^[:alnum:]]+/, '-').downcase - date = Date.parse(post["display_date"]) - content = post["body_html"] - published = !post["is_private"] - name = "%02d-%02d-%02d-%s.html" % [date.year, date.month, date.day, slug] - - # Get the relevant fields as a hash, delete empty fields and convert - # to YAML for the header - data = { - 'layout' => 'post', - 'title' => title.to_s, - 'published' => published - }.delete_if { |k,v| v.nil? || v == ''}.to_yaml - - # Write out the data and content to file - File.open("_posts/#{name}", "w") do |f| - f.puts data - f.puts "---" - f.puts content - end - end - - page += 1 - posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body) - end - end - end -end diff --git a/lib/jekyll/migrators/rss.rb b/lib/jekyll/migrators/rss.rb deleted file mode 100644 index 461abd35..00000000 --- a/lib/jekyll/migrators/rss.rb +++ /dev/null @@ -1,47 +0,0 @@ -# Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22. -# Use at your own risk. The end. -# -# Usage: -# (URL) -# ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('http://yourdomain.com/your-favorite-feed.xml')" -# -# (Local file) -# ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('./somefile/on/your/computer.xml')" - -require 'rubygems' -require 'rss/1.0' -require 'rss/2.0' -require 'open-uri' -require 'fileutils' -require 'yaml' - -module Jekyll - module MigrateRSS - - # The `source` argument may be a URL or a local file. - def self.process(source) - content = "" - open(source) { |s| content = s.read } - rss = RSS::Parser.parse(content, false) - - raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss - - rss.items.each do |item| - formatted_date = item.date.strftime('%Y-%m-%d') - post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map { |i| i.downcase if i != '' }.compact.join('-') - name = "#{formatted_date}-#{post_name}" - - header = { - 'layout' => 'post', - 'title' => item.title - } - - File.open("_posts/#{name}.html", "w") do |f| - f.puts header.to_yaml - f.puts "---\n" - f.puts item.description - end - end - end - end -end \ No newline at end of file diff --git a/lib/jekyll/migrators/textpattern.rb b/lib/jekyll/migrators/textpattern.rb deleted file mode 100644 index 3b370ed9..00000000 --- a/lib/jekyll/migrators/textpattern.rb +++ /dev/null @@ -1,58 +0,0 @@ -require 'rubygems' -require 'sequel' -require 'fileutils' -require 'yaml' - -# NOTE: This converter requires Sequel and the MySQL gems. -# The MySQL gem can be difficult to install on OS X. Once you have MySQL -# installed, running the following commands should work: -# $ sudo gem install sequel -# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config - -module Jekyll - module TextPattern - # Reads a MySQL database via Sequel and creates a post file for each post. - # The only posts selected are those with a status of 4 or 5, which means - # "live" and "sticky" respectively. - # Other statuses are 1 => draft, 2 => hidden and 3 => pending. - QUERY = "SELECT Title, \ - url_title, \ - Posted, \ - Body, \ - Keywords \ - FROM textpattern \ - WHERE Status = '4' OR \ - Status = '5'" - - def self.process(dbname, user, pass, host = 'localhost') - db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') - - FileUtils.mkdir_p "_posts" - - db[QUERY].each do |post| - # Get required fields and construct Jekyll compatible name. - title = post[:Title] - slug = post[:url_title] - date = post[:Posted] - content = post[:Body] - - name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile" - - # Get the relevant fields as a hash, delete empty fields and convert - # to YAML for the header. - data = { - 'layout' => 'post', - 'title' => title.to_s, - 'tags' => post[:Keywords].split(',') - }.delete_if { |k,v| v.nil? || v == ''}.to_yaml - - # Write out the data and content to file. - File.open("_posts/#{name}", "w") do |f| - f.puts data - f.puts "---" - f.puts content - end - end - end - end -end diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb deleted file mode 100644 index 367a83c9..00000000 --- a/lib/jekyll/migrators/tumblr.rb +++ /dev/null @@ -1,195 +0,0 @@ -require 'rubygems' -require 'open-uri' -require 'fileutils' -require 'nokogiri' -require 'date' -require 'json' -require 'uri' -require 'jekyll' - -module Jekyll - module Tumblr - def self.process(url, format = "html", grab_images = false, - add_highlights = false, rewrite_urls = true) - @grab_images = grab_images - FileUtils.mkdir_p "_posts/tumblr" - url += "/api/read/json/" - per_page = 50 - posts = [] - # Two passes are required so that we can rewrite URLs. - # First pass builds up an array of each post as a hash. - begin - current_page = (current_page || -1) + 1 - feed = open(url + "?num=#{per_page}&start=#{current_page * per_page}") - json = feed.readlines.join("\n")[21...-2] # Strip Tumblr's JSONP chars. - blog = JSON.parse(json) - puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}" - posts += blog["posts"].map { |post| post_to_hash(post, format) } - end until blog["posts"].size < per_page - # Rewrite URLs and create redirects. - posts = rewrite_urls_and_redirects posts if rewrite_urls - # Second pass for writing post files. - posts.each do |post| - if format == "md" - post[:content] = html_to_markdown post[:content] - post[:content] = add_syntax_highlights post[:content] if add_highlights - end - File.open("_posts/tumblr/#{post[:name]}", "w") do |f| - f.puts post[:header].to_yaml + "---\n" + post[:content] - end - end - end - - private - - # Converts each type of Tumblr post to a hash with all required - # data for Jekyll. - def self.post_to_hash(post, format) - case post['type'] - when "regular" - title = post["regular-title"] - content = post["regular-body"] - when "link" - title = post["link-text"] || post["link-url"] - content = "#{title}" - unless post["link-description"].nil? - content << "
" + post["link-description"] - end - when "photo" - title = post["photo-caption"] - max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max - url = post["photo-url"] || post["photo-url-#{max_size}"] - ext = "." + post[post.keys.select { |k| - k =~ /^photo-url-/ && post[k].split("/").last =~ /\./ - }.first].split(".").last - content = "" - unless post["photo-link-url"].nil? - content = "#{content}" - end - when "audio" - if !post["id3-title"].nil? - title = post["id3-title"] - content = post.at["audio-player"] + "
" + post["audio-caption"] - else - title = post["audio-caption"] - content = post.at["audio-player"] - end - when "quote" - title = post["quote-text"] - content = "
#{post["quote-text"]}
" - unless post["quote-source"].nil? - content << "—" + post["quote-source"] - end - when "conversation" - title = post["conversation-title"] - content = "
" - post["conversation"]["line"].each do |line| - content << "
#{line['label']}
#{line}
" - end - content << "
" - when "video" - title = post["video-title"] - content = post["video-player"] - unless post["video-caption"].nil? - content << "
" + post["video-caption"] - end - end - date = Date.parse(post['date']).to_s - title = Nokogiri::HTML(title).text - slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') - { - :name => "#{date}-#{slug}.#{format}", - :header => { - "layout" => "post", - "title" => title, - "tags" => post["tags"], - }, - :content => content, - :url => post["url"], - :slug => post["url-with-slug"], - } - end - - # Create a Hash of old urls => new urls, for rewriting and - # redirects, and replace urls in each post. Instantiate Jekyll - # site/posts to get the correct permalink format. - def self.rewrite_urls_and_redirects(posts) - site = Jekyll::Site.new(Jekyll.configuration({})) - dir = File.join(File.dirname(__FILE__), "..") - urls = Hash[posts.map { |post| - # Create an initial empty file for the post so that - # we can instantiate a post object. - File.open("_posts/tumblr/#{post[:name]}", "w") - tumblr_url = URI.parse(post[:slug]).path - jekyll_url = Jekyll::Post.new(site, dir, "", "tumblr/" + post[:name]).url - redirect_dir = tumblr_url.sub(/\//, "") + "/" - FileUtils.mkdir_p redirect_dir - File.open(redirect_dir + "index.html", "w") do |f| - f.puts "" - end - [tumblr_url, jekyll_url] - }] - posts.map { |post| - urls.each do |tumblr_url, jekyll_url| - post[:content].gsub!(/#{tumblr_url}/i, jekyll_url) - end - post - } - end - - # Uses Python's html2text to convert a post's content to - # markdown. Preserve HTML tables as per the markdown docs. - def self.html_to_markdown(content) - preserve = ["table", "tr", "th", "td"] - preserve.each do |tag| - content.gsub!(/<#{tag}/i, "$$" + tag) - content.gsub!(/<\/#{tag}/i, "||" + tag) - end - content = %x[echo '#{content.gsub("'", "''")}' | html2text] - preserve.each do |tag| - content.gsub!("$$" + tag, "<" + tag) - content.gsub!("||" + tag, " -require 'fileutils' -require 'rubygems' -require 'sequel' -require 'yaml' - -module Jekyll - module Typo - # This SQL *should* work for both MySQL and PostgreSQL, but I haven't - # tested PostgreSQL yet (as of 2008-12-16). - SQL = <<-EOS - SELECT c.id id, - c.title title, - c.permalink slug, - c.body body, - c.published_at date, - c.state state, - COALESCE(tf.name, 'html') filter - FROM contents c - LEFT OUTER JOIN text_filters tf - ON c.text_filter_id = tf.id - EOS - - def self.process dbname, user, pass, host='localhost' - FileUtils.mkdir_p '_posts' - db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') - db[SQL].each do |post| - next unless post[:state] =~ /published/ - - name = [ sprintf("%.04d", post[:date].year), - sprintf("%.02d", post[:date].month), - sprintf("%.02d", post[:date].day), - post[:slug].strip ].join('-') - - # Can have more than one text filter in this field, but we just want - # the first one for this. - name += '.' + post[:filter].split(' ')[0] - - File.open("_posts/#{name}", 'w') do |f| - f.puts({ 'layout' => 'post', - 'title' => post[:title].to_s, - 'typo_id' => post[:id] - }.delete_if { |k, v| v.nil? || v == '' }.to_yaml) - f.puts '---' - f.puts post[:body].delete("\r") - end - end - end - - end -end diff --git a/lib/jekyll/migrators/wordpress.rb b/lib/jekyll/migrators/wordpress.rb deleted file mode 100644 index d2d19039..00000000 --- a/lib/jekyll/migrators/wordpress.rb +++ /dev/null @@ -1,294 +0,0 @@ -require 'rubygems' -require 'sequel' -require 'fileutils' -require 'yaml' - -# NOTE: This converter requires Sequel and the MySQL gems. -# The MySQL gem can be difficult to install on OS X. Once you have MySQL -# installed, running the following commands should work: -# $ sudo gem install sequel -# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config - -module Jekyll - module WordPress - - # Main migrator function. Call this to perform the migration. - # - # dbname:: The name of the database - # user:: The database user name - # pass:: The database user's password - # host:: The address of the MySQL database host. Default: 'localhost' - # options:: A hash table of configuration options. - # - # Supported options are: - # - # :table_prefix:: Prefix of database tables used by WordPress. - # Default: 'wp_' - # :clean_entities:: If true, convert non-ASCII characters to HTML - # entities in the posts, comments, titles, and - # names. Requires the 'htmlentities' gem to - # work. Default: true. - # :comments:: If true, migrate post comments too. Comments - # are saved in the post's YAML front matter. - # Default: true. - # :categories:: If true, save the post's categories in its - # YAML front matter. - # :tags:: If true, save the post's tags in its - # YAML front matter. - # :more_excerpt:: If true, when a post has no excerpt but - # does have a tag, use the - # preceding post content as the excerpt. - # Default: true. - # :more_anchor:: If true, convert a tag into - # two HTML anchors with ids "more" and - # "more-NNN" (where NNN is the post number). - # Default: true. - # :status:: Array of allowed post statuses. Only - # posts with matching status will be migrated. - # Known statuses are :publish, :draft, :private, - # and :revision. If this is nil or an empty - # array, all posts are migrated regardless of - # status. Default: [:publish]. - # - def self.process(dbname, user, pass, host='localhost', options={}) - options = { - :table_prefix => 'wp_', - :clean_entities => true, - :comments => true, - :categories => true, - :tags => true, - :more_excerpt => true, - :more_anchor => true, - :status => [:publish] # :draft, :private, :revision - }.merge(options) - - if options[:clean_entities] - begin - require 'htmlentities' - rescue LoadError - STDERR.puts "Could not require 'htmlentities', so the " + - ":clean_entities option is now disabled." - options[:clean_entities] = false - end - end - - FileUtils.mkdir_p("_posts") - - db = Sequel.mysql(dbname, :user => user, :password => pass, - :host => host, :encoding => 'utf8') - - px = options[:table_prefix] - - posts_query = " - SELECT - posts.ID AS `id`, - posts.guid AS `guid`, - posts.post_type AS `type`, - posts.post_status AS `status`, - posts.post_title AS `title`, - posts.post_name AS `slug`, - posts.post_date AS `date`, - posts.post_content AS `content`, - posts.post_excerpt AS `excerpt`, - posts.comment_count AS `comment_count`, - users.display_name AS `author`, - users.user_login AS `author_login`, - users.user_email AS `author_email`, - users.user_url AS `author_url` - FROM #{px}posts AS `posts` - LEFT JOIN #{px}users AS `users` - ON posts.post_author = users.ID" - - if options[:status] and not options[:status].empty? - status = options[:status][0] - posts_query << " - WHERE posts.post_status = '#{status.to_s}'" - options[:status][1..-1].each do |status| - posts_query << " OR - posts.post_status = '#{status.to_s}'" - end - end - - db[posts_query].each do |post| - process_post(post, db, options) - end - end - - - def self.process_post(post, db, options) - px = options[:table_prefix] - - title = post[:title] - if options[:clean_entities] - title = clean_entities(title) - end - - slug = post[:slug] - if !slug or slug.empty? - slug = sluggify(title) - end - - date = post[:date] || Time.now - name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, - date.day, slug] - content = post[:content].to_s - if options[:clean_entities] - content = clean_entities(content) - end - - excerpt = post[:excerpt].to_s - - more_index = content.index(//) - more_anchor = nil - if more_index - if options[:more_excerpt] and - (post[:excerpt].nil? or post[:excerpt].empty?) - excerpt = content[0...more_index] - end - if options[:more_anchor] - more_link = "more" - content.sub!(//, - "" + - "") - end - end - - categories = [] - tags = [] - - if options[:categories] or options[:tags] - - cquery = - "SELECT - terms.name AS `name`, - ttax.taxonomy AS `type` - FROM - #{px}terms AS `terms`, - #{px}term_relationships AS `trels`, - #{px}term_taxonomy AS `ttax` - WHERE - trels.object_id = '#{post[:id]}' AND - trels.term_taxonomy_id = ttax.term_taxonomy_id AND - terms.term_id = ttax.term_id" - - db[cquery].each do |term| - if options[:categories] and term[:type] == "category" - if options[:clean_entities] - categories << clean_entities(term[:name]) - else - categories << term[:name] - end - elsif options[:tags] and term[:type] == "post_tag" - if options[:clean_entities] - tags << clean_entities(term[:name]) - else - tags << term[:name] - end - end - end - end - - comments = [] - - if options[:comments] and post[:comment_count].to_i > 0 - cquery = - "SELECT - comment_ID AS `id`, - comment_author AS `author`, - comment_author_email AS `author_email`, - comment_author_url AS `author_url`, - comment_date AS `date`, - comment_date_gmt AS `date_gmt`, - comment_content AS `content` - FROM #{px}comments - WHERE - comment_post_ID = '#{post[:id]}' AND - comment_approved != 'spam'" - - - db[cquery].each do |comment| - - comcontent = comment[:content].to_s - if comcontent.respond_to?(:force_encoding) - comcontent.force_encoding("UTF-8") - end - if options[:clean_entities] - comcontent = clean_entities(comcontent) - end - comauthor = comment[:author].to_s - if options[:clean_entities] - comauthor = clean_entities(comauthor) - end - - comments << { - 'id' => comment[:id].to_i, - 'author' => comauthor, - 'author_email' => comment[:author_email].to_s, - 'author_url' => comment[:author_url].to_s, - 'date' => comment[:date].to_s, - 'date_gmt' => comment[:date_gmt].to_s, - 'content' => comcontent, - } - end - - comments.sort!{ |a,b| a['id'] <=> b['id'] } - end - - # Get the relevant fields as a hash, delete empty fields and - # convert to YAML for the header. - data = { - 'layout' => post[:type].to_s, - 'status' => post[:status].to_s, - 'published' => (post[:status].to_s == "publish"), - 'title' => title.to_s, - 'author' => post[:author].to_s, - 'author_login' => post[:author_login].to_s, - 'author_email' => post[:author_email].to_s, - 'author_url' => post[:author_url].to_s, - 'excerpt' => excerpt, - 'more_anchor' => more_anchor, - 'wordpress_id' => post[:id], - 'wordpress_url' => post[:guid].to_s, - 'date' => date, - 'categories' => options[:categories] ? categories : nil, - 'tags' => options[:tags] ? tags : nil, - 'comments' => options[:comments] ? comments : nil, - }.delete_if { |k,v| v.nil? || v == '' }.to_yaml - - # Write out the data and content to file - File.open("_posts/#{name}", "w") do |f| - f.puts data - f.puts "---" - f.puts content - end - end - - - def self.clean_entities( text ) - if text.respond_to?(:force_encoding) - text.force_encoding("UTF-8") - end - text = HTMLEntities.new.encode(text, :named) - # We don't want to convert these, it would break all - # HTML tags in the post and comments. - text.gsub!("&", "&") - text.gsub!("<", "<") - text.gsub!(">", ">") - text.gsub!(""", '"') - text.gsub!("'", "'") - text - end - - - def self.sluggify( title ) - begin - require 'unidecode' - title = title.to_ascii - rescue LoadError - STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode." - end - title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-") - end - - end -end diff --git a/lib/jekyll/migrators/wordpressdotcom.rb b/lib/jekyll/migrators/wordpressdotcom.rb deleted file mode 100644 index 701c2af4..00000000 --- a/lib/jekyll/migrators/wordpressdotcom.rb +++ /dev/null @@ -1,70 +0,0 @@ -# coding: utf-8 - -require 'rubygems' -require 'hpricot' -require 'fileutils' -require 'yaml' -require 'time' - -module Jekyll - # This importer takes a wordpress.xml file, which can be exported from your - # wordpress.com blog (/wp-admin/export.php). - module WordpressDotCom - def self.process(filename = "wordpress.xml") - import_count = Hash.new(0) - doc = Hpricot::XML(File.read(filename)) - - (doc/:channel/:item).each do |item| - title = item.at(:title).inner_text.strip - permalink_title = item.at('wp:post_name').inner_text - # Fallback to "prettified" title if post_name is empty (can happen) - if permalink_title == "" - permalink_title = title.downcase.split.join('-') - end - - date = Time.parse(item.at('wp:post_date').inner_text) - status = item.at('wp:status').inner_text - - if status == "publish" - published = true - else - published = false - end - - type = item.at('wp:post_type').inner_text - tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq - - metas = Hash.new - item.search("wp:postmeta").each do |meta| - key = meta.at('wp:meta_key').inner_text - value = meta.at('wp:meta_value').inner_text - metas[key] = value; - end - - name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html" - header = { - 'layout' => type, - 'title' => title, - 'tags' => tags, - 'status' => status, - 'type' => type, - 'published' => published, - 'meta' => metas - } - - FileUtils.mkdir_p "_#{type}s" - File.open("_#{type}s/#{name}", "w") do |f| - f.puts header.to_yaml - f.puts '---' - f.puts item.at('content:encoded').inner_text - end - - import_count[type] += 1 - end - - import_count.each do |key, value| - puts "Imported #{value} #{key}s" - end - end - end -end