Remove migrators

This commit is contained in:
Tom Bell 2012-12-22 17:49:33 +00:00
parent a151a16f09
commit b9da30bc8f
15 changed files with 0 additions and 1250 deletions

View File

@ -82,21 +82,6 @@ end
# #
############################################################################# #############################################################################
namespace :migrate do
desc "Migrate from mephisto in the current directory"
task :mephisto do
sh %q(ruby -r './lib/jekyll/migrators/mephisto' -e 'Jekyll::Mephisto.postgres(:database => "#{ENV["DB"]}")')
end
desc "Migrate from Movable Type in the current directory"
task :mt do
sh %q(ruby -r './lib/jekyll/migrators/mt' -e 'Jekyll::MT.process("#{ENV["DB"]}", "#{ENV["USER"]}", "#{ENV["PASS"]}")')
end
desc "Migrate from Typo in the current directory"
task :typo do
sh %q(ruby -r './lib/jekyll/migrators/typo' -e 'Jekyll::Typo.process("#{ENV["DB"]}", "#{ENV["USER"]}", "#{ENV["PASS"]}")')
end
end
begin begin
require 'cucumber/rake/task' require 'cucumber/rake/task'
Cucumber::Rake::Task.new(:features) do |t| Cucumber::Rake::Task.new(:features) do |t|

View File

@ -1,26 +0,0 @@
module Jekyll
module CSV
# Reads a csv with title, permalink, body, published_at, and filter.
# It creates a post file for each row in the csv
def self.process(file = "posts.csv")
FileUtils.mkdir_p "_posts"
posts = 0
FasterCSV.foreach(file) do |row|
next if row[0] == "title"
posts += 1
name = row[3].split(" ")[0]+"-"+row[1]+(row[4] =~ /markdown/ ? ".markdown" : ".textile")
File.open("_posts/#{name}", "w") do |f|
f.puts <<-HEADER
---
layout: post
title: #{row[0]}
---
HEADER
f.puts row[2]
end
end
"Created #{posts} posts!"
end
end
end

View File

@ -1,103 +0,0 @@
require 'rubygems'
require 'sequel'
require 'fileutils'
require 'yaml'
# NOTE: This converter requires Sequel and the MySQL gems.
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
# installed, running the following commands should work:
# $ sudo gem install sequel
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
module Jekyll
module Drupal
# Reads a MySQL database via Sequel and creates a post file for each post
# in wp_posts that has post_status = 'publish'. This restriction is made
# because 'draft' posts are not guaranteed to have valid dates.
QUERY = "SELECT n.nid, \
n.title, \
nr.body, \
n.created, \
n.status \
FROM node AS n, \
node_revisions AS nr \
WHERE (n.type = 'blog' OR n.type = 'story') \
AND n.vid = nr.vid"
def self.process(dbname, user, pass, host = 'localhost', prefix = '')
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
if prefix != ''
QUERY[" node "] = " " + prefix + "node "
QUERY[" node_revisions "] = " " + prefix + "node_revisions "
end
FileUtils.mkdir_p "_posts"
FileUtils.mkdir_p "_drafts"
# Create the refresh layout
# Change the refresh url if you customized your permalink config
File.open("_layouts/refresh.html", "w") do |f|
f.puts <<EOF
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
</head>
</html>
EOF
end
db[QUERY].each do |post|
# Get required fields and construct Jekyll compatible name
node_id = post[:nid]
title = post[:title]
content = post[:body]
created = post[:created]
time = Time.at(created)
is_published = post[:status] == 1
dir = is_published ? "_posts" : "_drafts"
slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
name = time.strftime("%Y-%m-%d-") + slug + '.md'
# Get the relevant fields as a hash, delete empty fields and convert
# to YAML for the header
data = {
'layout' => 'post',
'title' => title.to_s,
'created' => created,
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
# Write out the data and content to file
File.open("#{dir}/#{name}", "w") do |f|
f.puts data
f.puts "---"
f.puts content
end
# Make a file to redirect from the old Drupal URL
if is_published
aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
aliases.push(:dst => "node/#{node_id}")
aliases.each do |url_alias|
FileUtils.mkdir_p url_alias[:dst]
File.open("#{url_alias[:dst]}/index.md", "w") do |f|
f.puts "---"
f.puts "layout: refresh"
f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
f.puts "---"
end
end
end
end
# TODO: Make dirs & files for nodes of type 'page'
# Make refresh pages for these as well
# TODO: Make refresh dirs & files according to entries in url_alias table
end
end
end

View File

@ -1,49 +0,0 @@
# Adapted by Rodrigo Pinto <rodrigopqn@gmail.com>
# Based on typo.rb by Toby DiPasquale
require 'fileutils'
require 'rubygems'
require 'sequel'
module Jekyll
module Enki
SQL = <<-EOS
SELECT p.id,
p.title,
p.slug,
p.body,
p.published_at as date,
p.cached_tag_list as tags
FROM posts p
EOS
# Just working with postgres, but can be easily adapted
# to work with both mysql and postgres.
def self.process(dbname, user, pass, host = 'localhost')
FileUtils.mkdir_p('_posts')
db = Sequel.postgres(:database => dbname,
:user => user,
:password => pass,
:host => host,
:encoding => 'utf8')
db[SQL].each do |post|
name = [ sprintf("%.04d", post[:date].year),
sprintf("%.02d", post[:date].month),
sprintf("%.02d", post[:date].day),
post[:slug].strip ].join('-')
name += '.textile'
File.open("_posts/#{name}", 'w') do |f|
f.puts({ 'layout' => 'post',
'title' => post[:title].to_s,
'enki_id' => post[:id],
'categories' => post[:tags]
}.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
f.puts '---'
f.puts post[:body].delete("\r")
end
end
end
end
end

View File

@ -1,53 +0,0 @@
require 'rubygems'
require 'sequel'
require 'fileutils'
require 'yaml'
# NOTE: This migrator is made for Joomla 1.5 databases.
# NOTE: This converter requires Sequel and the MySQL gems.
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
# installed, running the following commands should work:
# $ sudo gem install sequel
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
module Jekyll
module Joomla
def self.process(dbname, user, pass, host = 'localhost', table_prefix = 'jos_', section = '1')
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
FileUtils.mkdir_p("_posts")
# Reads a MySQL database via Sequel and creates a post file for each
# post in wp_posts that has post_status = 'publish'. This restriction is
# made because 'draft' posts are not guaranteed to have valid dates.
query = "SELECT `title`, `alias`, CONCAT(`introtext`,`fulltext`) as content, `created`, `id` FROM #{table_prefix}content WHERE state = '0' OR state = '1' AND sectionid = '#{section}'"
db[query].each do |post|
# Get required fields and construct Jekyll compatible name.
title = post[:title]
slug = post[:alias]
date = post[:created]
content = post[:content]
name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
slug]
# Get the relevant fields as a hash, delete empty fields and convert
# to YAML for the header.
data = {
'layout' => 'post',
'title' => title.to_s,
'joomla_id' => post[:id],
'joomla_url' => post[:alias],
'date' => date
}.delete_if { |k,v| v.nil? || v == '' }.to_yaml
# Write out the data and content to file
File.open("_posts/#{name}", "w") do |f|
f.puts data
f.puts "---"
f.puts content
end
end
end
end
end

View File

@ -1,52 +0,0 @@
require 'yaml'
require 'fileutils'
module Jekyll
module Marley
def self.regexp
{ :id => /^\d{0,4}-{0,1}(.*)$/,
:title => /^#\s*(.*)\s+$/,
:title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
:published_on => /.*\s+\(([0-9\/]+)\)$/,
:perex => /^([^\#\n]+\n)$/,
:meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
}
end
def self.process(marley_data_dir)
raise ArgumentError, "marley dir #{marley_data_dir} not found" unless File.directory?(marley_data_dir)
FileUtils.mkdir_p "_posts"
posts = 0
Dir["#{marley_data_dir}/**/*.txt"].each do |f|
next unless File.exists?(f)
#copied over from marley's app/lib/post.rb
file_content = File.read(f)
meta_content = file_content.slice!( self.regexp[:meta] )
body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
title = file_content.scan( self.regexp[:title] ).first.to_s.strip
prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
meta['title'] = title
meta['layout'] = 'post'
formatted_date = published_on.strftime('%Y-%m-%d')
post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
name = "#{formatted_date}-#{post_name}"
File.open("_posts/#{name}.markdown", "w") do |f|
f.puts meta.to_yaml
f.puts "---\n"
f.puts "\n#{prerex}\n\n" if prerex
f.puts body
end
posts += 1
end
"Created #{posts} posts!"
end
end
end

View File

@ -1,84 +0,0 @@
# Quickly hacked together my Michael Ivey
# Based on mt.rb by Nick Gerakines, open source and publically
# available under the MIT license. Use this module at your own risk.
require 'rubygems'
require 'sequel'
require 'fastercsv'
require 'fileutils'
require File.join(File.dirname(__FILE__),"csv.rb")
# NOTE: This converter requires Sequel and the MySQL gems.
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
# installed, running the following commands should work:
# $ sudo gem install sequel
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
module Jekyll
module Mephisto
#Accepts a hash with database config variables, exports mephisto posts into a csv
#export PGPASSWORD if you must
def self.postgres(c)
sql = <<-SQL
BEGIN;
CREATE TEMP TABLE jekyll AS
SELECT title, permalink, body, published_at, filter FROM contents
WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
COPY jekyll TO STDOUT WITH CSV HEADER;
ROLLBACK;
SQL
command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
puts command
`#{command}`
CSV.process
end
# This query will pull blog posts from all entries across all blogs. If
# you've got unpublished, deleted or otherwise hidden posts please sift
# through the created posts to make sure nothing is accidently published.
QUERY = "SELECT id, \
permalink, \
body, \
published_at, \
title \
FROM contents \
WHERE user_id = 1 AND \
type = 'Article' AND \
published_at IS NOT NULL \
ORDER BY published_at"
def self.process(dbname, user, pass, host = 'localhost')
db = Sequel.mysql(dbname, :user => user,
:password => pass,
:host => host,
:encoding => 'utf8')
FileUtils.mkdir_p "_posts"
db[QUERY].each do |post|
title = post[:title]
slug = post[:permalink]
date = post[:published_at]
content = post[:body]
# Ideally, this script would determine the post format (markdown,
# html, etc) and create files with proper extensions. At this point
# it just assumes that markdown will be acceptable.
name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
data = {
'layout' => 'post',
'title' => title.to_s,
'mt_id' => post[:entry_id],
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
File.open("_posts/#{name}", "w") do |f|
f.puts data
f.puts "---"
f.puts content
end
end
end
end
end

View File

@ -1,86 +0,0 @@
# Created by Nick Gerakines, open source and publically available under the
# MIT license. Use this module at your own risk.
# I'm an Erlang/Perl/C++ guy so please forgive my dirty ruby.
require 'rubygems'
require 'sequel'
require 'fileutils'
require 'yaml'
# NOTE: This converter requires Sequel and the MySQL gems.
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
# installed, running the following commands should work:
# $ sudo gem install sequel
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
module Jekyll
module MT
# This query will pull blog posts from all entries across all blogs. If
# you've got unpublished, deleted or otherwise hidden posts please sift
# through the created posts to make sure nothing is accidently published.
QUERY = "SELECT entry_id, \
entry_basename, \
entry_text, \
entry_text_more, \
entry_authored_on, \
entry_title, \
entry_convert_breaks \
FROM mt_entry"
def self.process(dbname, user, pass, host = 'localhost')
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
FileUtils.mkdir_p "_posts"
db[QUERY].each do |post|
title = post[:entry_title]
slug = post[:entry_basename].gsub(/_/, '-')
date = post[:entry_authored_on]
content = post[:entry_text]
more_content = post[:entry_text_more]
entry_convert_breaks = post[:entry_convert_breaks]
# Be sure to include the body and extended body.
if more_content != nil
content = content + " \n" + more_content
end
# Ideally, this script would determine the post format (markdown,
# html, etc) and create files with proper extensions. At this point
# it just assumes that markdown will be acceptable.
name = [date.year, date.month, date.day, slug].join('-') + '.' +
self.suffix(entry_convert_breaks)
data = {
'layout' => 'post',
'title' => title.to_s,
'mt_id' => post[:entry_id],
'date' => date
}.delete_if { |k,v| v.nil? || v == '' }.to_yaml
File.open("_posts/#{name}", "w") do |f|
f.puts data
f.puts "---"
f.puts content
end
end
end
def self.suffix(entry_type)
if entry_type.nil? || entry_type.include?("markdown")
# The markdown plugin I have saves this as
# "markdown_with_smarty_pants", so I just look for "markdown".
"markdown"
elsif entry_type.include?("textile")
# This is saved as "textile_2" on my installation of MT 5.1.
"textile"
elsif entry_type == "0" || entry_type.include?("richtext")
# Richtext looks to me like it's saved as HTML, so I include it here.
"html"
else
# Other values might need custom work.
entry_type
end
end
end
end

View File

@ -1,67 +0,0 @@
require 'rubygems'
require 'jekyll'
require 'fileutils'
require 'net/http'
require 'uri'
require "json"
# ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, api_key, blog)'
module Jekyll
module Posterous
def self.fetch(uri_str, limit = 10)
# You should choose better exception.
raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
response = nil
Net::HTTP.start('posterous.com') do |http|
req = Net::HTTP::Get.new(uri_str)
req.basic_auth @email, @pass
response = http.request(req)
end
case response
when Net::HTTPSuccess then response
when Net::HTTPRedirection then fetch(response['location'], limit - 1)
else response.error!
end
end
def self.process(email, pass, api_token, blog = 'primary')
@email, @pass, @api_token = email, pass, api_token
FileUtils.mkdir_p "_posts"
posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}").body)
page = 1
while posts.any?
posts.each do |post|
title = post["title"]
slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
date = Date.parse(post["display_date"])
content = post["body_html"]
published = !post["is_private"]
name = "%02d-%02d-%02d-%s.html" % [date.year, date.month, date.day, slug]
# Get the relevant fields as a hash, delete empty fields and convert
# to YAML for the header
data = {
'layout' => 'post',
'title' => title.to_s,
'published' => published
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
# Write out the data and content to file
File.open("_posts/#{name}", "w") do |f|
f.puts data
f.puts "---"
f.puts content
end
end
page += 1
posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body)
end
end
end
end

View File

@ -1,47 +0,0 @@
# Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22.
# Use at your own risk. The end.
#
# Usage:
# (URL)
# ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('http://yourdomain.com/your-favorite-feed.xml')"
#
# (Local file)
# ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('./somefile/on/your/computer.xml')"
require 'rubygems'
require 'rss/1.0'
require 'rss/2.0'
require 'open-uri'
require 'fileutils'
require 'yaml'
module Jekyll
module MigrateRSS
# The `source` argument may be a URL or a local file.
def self.process(source)
content = ""
open(source) { |s| content = s.read }
rss = RSS::Parser.parse(content, false)
raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
rss.items.each do |item|
formatted_date = item.date.strftime('%Y-%m-%d')
post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map { |i| i.downcase if i != '' }.compact.join('-')
name = "#{formatted_date}-#{post_name}"
header = {
'layout' => 'post',
'title' => item.title
}
File.open("_posts/#{name}.html", "w") do |f|
f.puts header.to_yaml
f.puts "---\n"
f.puts item.description
end
end
end
end
end

View File

@ -1,58 +0,0 @@
require 'rubygems'
require 'sequel'
require 'fileutils'
require 'yaml'
# NOTE: This converter requires Sequel and the MySQL gems.
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
# installed, running the following commands should work:
# $ sudo gem install sequel
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
module Jekyll
module TextPattern
# Reads a MySQL database via Sequel and creates a post file for each post.
# The only posts selected are those with a status of 4 or 5, which means
# "live" and "sticky" respectively.
# Other statuses are 1 => draft, 2 => hidden and 3 => pending.
QUERY = "SELECT Title, \
url_title, \
Posted, \
Body, \
Keywords \
FROM textpattern \
WHERE Status = '4' OR \
Status = '5'"
def self.process(dbname, user, pass, host = 'localhost')
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
FileUtils.mkdir_p "_posts"
db[QUERY].each do |post|
# Get required fields and construct Jekyll compatible name.
title = post[:Title]
slug = post[:url_title]
date = post[:Posted]
content = post[:Body]
name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
# Get the relevant fields as a hash, delete empty fields and convert
# to YAML for the header.
data = {
'layout' => 'post',
'title' => title.to_s,
'tags' => post[:Keywords].split(',')
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
# Write out the data and content to file.
File.open("_posts/#{name}", "w") do |f|
f.puts data
f.puts "---"
f.puts content
end
end
end
end
end

View File

@ -1,195 +0,0 @@
require 'rubygems'
require 'open-uri'
require 'fileutils'
require 'nokogiri'
require 'date'
require 'json'
require 'uri'
require 'jekyll'
module Jekyll
module Tumblr
def self.process(url, format = "html", grab_images = false,
add_highlights = false, rewrite_urls = true)
@grab_images = grab_images
FileUtils.mkdir_p "_posts/tumblr"
url += "/api/read/json/"
per_page = 50
posts = []
# Two passes are required so that we can rewrite URLs.
# First pass builds up an array of each post as a hash.
begin
current_page = (current_page || -1) + 1
feed = open(url + "?num=#{per_page}&start=#{current_page * per_page}")
json = feed.readlines.join("\n")[21...-2] # Strip Tumblr's JSONP chars.
blog = JSON.parse(json)
puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
posts += blog["posts"].map { |post| post_to_hash(post, format) }
end until blog["posts"].size < per_page
# Rewrite URLs and create redirects.
posts = rewrite_urls_and_redirects posts if rewrite_urls
# Second pass for writing post files.
posts.each do |post|
if format == "md"
post[:content] = html_to_markdown post[:content]
post[:content] = add_syntax_highlights post[:content] if add_highlights
end
File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
f.puts post[:header].to_yaml + "---\n" + post[:content]
end
end
end
private
# Converts each type of Tumblr post to a hash with all required
# data for Jekyll.
def self.post_to_hash(post, format)
case post['type']
when "regular"
title = post["regular-title"]
content = post["regular-body"]
when "link"
title = post["link-text"] || post["link-url"]
content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
unless post["link-description"].nil?
content << "<br/>" + post["link-description"]
end
when "photo"
title = post["photo-caption"]
max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max
url = post["photo-url"] || post["photo-url-#{max_size}"]
ext = "." + post[post.keys.select { |k|
k =~ /^photo-url-/ && post[k].split("/").last =~ /\./
}.first].split(".").last
content = "<img src=\"#{save_file(url, ext)}\"/>"
unless post["photo-link-url"].nil?
content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
end
when "audio"
if !post["id3-title"].nil?
title = post["id3-title"]
content = post.at["audio-player"] + "<br/>" + post["audio-caption"]
else
title = post["audio-caption"]
content = post.at["audio-player"]
end
when "quote"
title = post["quote-text"]
content = "<blockquote>#{post["quote-text"]}</blockquote>"
unless post["quote-source"].nil?
content << "&#8212;" + post["quote-source"]
end
when "conversation"
title = post["conversation-title"]
content = "<section><dialog>"
post["conversation"]["line"].each do |line|
content << "<dt>#{line['label']}</dt><dd>#{line}</dd>"
end
content << "</section></dialog>"
when "video"
title = post["video-title"]
content = post["video-player"]
unless post["video-caption"].nil?
content << "<br/>" + post["video-caption"]
end
end
date = Date.parse(post['date']).to_s
title = Nokogiri::HTML(title).text
slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
{
:name => "#{date}-#{slug}.#{format}",
:header => {
"layout" => "post",
"title" => title,
"tags" => post["tags"],
},
:content => content,
:url => post["url"],
:slug => post["url-with-slug"],
}
end
# Create a Hash of old urls => new urls, for rewriting and
# redirects, and replace urls in each post. Instantiate Jekyll
# site/posts to get the correct permalink format.
def self.rewrite_urls_and_redirects(posts)
site = Jekyll::Site.new(Jekyll.configuration({}))
dir = File.join(File.dirname(__FILE__), "..")
urls = Hash[posts.map { |post|
# Create an initial empty file for the post so that
# we can instantiate a post object.
File.open("_posts/tumblr/#{post[:name]}", "w")
tumblr_url = URI.parse(post[:slug]).path
jekyll_url = Jekyll::Post.new(site, dir, "", "tumblr/" + post[:name]).url
redirect_dir = tumblr_url.sub(/\//, "") + "/"
FileUtils.mkdir_p redirect_dir
File.open(redirect_dir + "index.html", "w") do |f|
f.puts "<html><head><meta http-equiv='Refresh' content='0; " +
"url=#{jekyll_url}'></head><body></body></html>"
end
[tumblr_url, jekyll_url]
}]
posts.map { |post|
urls.each do |tumblr_url, jekyll_url|
post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
end
post
}
end
# Uses Python's html2text to convert a post's content to
# markdown. Preserve HTML tables as per the markdown docs.
def self.html_to_markdown(content)
preserve = ["table", "tr", "th", "td"]
preserve.each do |tag|
content.gsub!(/<#{tag}/i, "$$" + tag)
content.gsub!(/<\/#{tag}/i, "||" + tag)
end
content = %x[echo '#{content.gsub("'", "''")}' | html2text]
preserve.each do |tag|
content.gsub!("$$" + tag, "<" + tag)
content.gsub!("||" + tag, "</" + tag)
end
content
end
# Adds pygments highlight tags to code blocks in posts that use
# markdown format. This doesn't guess the language of the code
# block, so you should modify this to suit your own content.
# For example, my code block only contain Python and JavaScript,
# so I can assume the block is JavaScript if it contains a
# semi-colon.
def self.add_syntax_highlights(content)
lines = content.split("\n")
block, indent, lang, start = false, /^ /, nil, nil
lines.each_with_index do |line, i|
if !block && line =~ indent
block = true
lang = "python"
start = i
elsif block
lang = "javascript" if line =~ /;$/
block = line =~ indent && i < lines.size - 1 # Also handle EOF
if !block
lines[start] = "{% highlight #{lang} %}"
lines[i - 1] = "{% endhighlight %}"
end
lines[i] = lines[i].sub(indent, "")
end
end
lines.join("\n")
end
def self.save_file(url, ext)
if @grab_images
path = "tumblr_files/#{url.split('/').last}"
path += ext unless path =~ /#{ext}$/
FileUtils.mkdir_p "tumblr_files"
File.open(path, "w") { |f| f.write(open(url).read) }
url = "/" + path
end
url
end
end
end

View File

@ -1,51 +0,0 @@
# Author: Toby DiPasquale <toby@cbcg.net>
require 'fileutils'
require 'rubygems'
require 'sequel'
require 'yaml'
module Jekyll
module Typo
# This SQL *should* work for both MySQL and PostgreSQL, but I haven't
# tested PostgreSQL yet (as of 2008-12-16).
SQL = <<-EOS
SELECT c.id id,
c.title title,
c.permalink slug,
c.body body,
c.published_at date,
c.state state,
COALESCE(tf.name, 'html') filter
FROM contents c
LEFT OUTER JOIN text_filters tf
ON c.text_filter_id = tf.id
EOS
def self.process dbname, user, pass, host='localhost'
FileUtils.mkdir_p '_posts'
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
db[SQL].each do |post|
next unless post[:state] =~ /published/
name = [ sprintf("%.04d", post[:date].year),
sprintf("%.02d", post[:date].month),
sprintf("%.02d", post[:date].day),
post[:slug].strip ].join('-')
# Can have more than one text filter in this field, but we just want
# the first one for this.
name += '.' + post[:filter].split(' ')[0]
File.open("_posts/#{name}", 'w') do |f|
f.puts({ 'layout' => 'post',
'title' => post[:title].to_s,
'typo_id' => post[:id]
}.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
f.puts '---'
f.puts post[:body].delete("\r")
end
end
end
end
end

View File

@ -1,294 +0,0 @@
require 'rubygems'
require 'sequel'
require 'fileutils'
require 'yaml'
# NOTE: This converter requires Sequel and the MySQL gems.
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
# installed, running the following commands should work:
# $ sudo gem install sequel
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
module Jekyll
module WordPress
# Main migrator function. Call this to perform the migration.
#
# dbname:: The name of the database
# user:: The database user name
# pass:: The database user's password
# host:: The address of the MySQL database host. Default: 'localhost'
# options:: A hash table of configuration options.
#
# Supported options are:
#
# :table_prefix:: Prefix of database tables used by WordPress.
# Default: 'wp_'
# :clean_entities:: If true, convert non-ASCII characters to HTML
# entities in the posts, comments, titles, and
# names. Requires the 'htmlentities' gem to
# work. Default: true.
# :comments:: If true, migrate post comments too. Comments
# are saved in the post's YAML front matter.
# Default: true.
# :categories:: If true, save the post's categories in its
# YAML front matter.
# :tags:: If true, save the post's tags in its
# YAML front matter.
# :more_excerpt:: If true, when a post has no excerpt but
# does have a <!-- more --> tag, use the
# preceding post content as the excerpt.
# Default: true.
# :more_anchor:: If true, convert a <!-- more --> tag into
# two HTML anchors with ids "more" and
# "more-NNN" (where NNN is the post number).
# Default: true.
# :status:: Array of allowed post statuses. Only
# posts with matching status will be migrated.
# Known statuses are :publish, :draft, :private,
# and :revision. If this is nil or an empty
# array, all posts are migrated regardless of
# status. Default: [:publish].
#
def self.process(dbname, user, pass, host='localhost', options={})
options = {
:table_prefix => 'wp_',
:clean_entities => true,
:comments => true,
:categories => true,
:tags => true,
:more_excerpt => true,
:more_anchor => true,
:status => [:publish] # :draft, :private, :revision
}.merge(options)
if options[:clean_entities]
begin
require 'htmlentities'
rescue LoadError
STDERR.puts "Could not require 'htmlentities', so the " +
":clean_entities option is now disabled."
options[:clean_entities] = false
end
end
FileUtils.mkdir_p("_posts")
db = Sequel.mysql(dbname, :user => user, :password => pass,
:host => host, :encoding => 'utf8')
px = options[:table_prefix]
posts_query = "
SELECT
posts.ID AS `id`,
posts.guid AS `guid`,
posts.post_type AS `type`,
posts.post_status AS `status`,
posts.post_title AS `title`,
posts.post_name AS `slug`,
posts.post_date AS `date`,
posts.post_content AS `content`,
posts.post_excerpt AS `excerpt`,
posts.comment_count AS `comment_count`,
users.display_name AS `author`,
users.user_login AS `author_login`,
users.user_email AS `author_email`,
users.user_url AS `author_url`
FROM #{px}posts AS `posts`
LEFT JOIN #{px}users AS `users`
ON posts.post_author = users.ID"
if options[:status] and not options[:status].empty?
status = options[:status][0]
posts_query << "
WHERE posts.post_status = '#{status.to_s}'"
options[:status][1..-1].each do |status|
posts_query << " OR
posts.post_status = '#{status.to_s}'"
end
end
db[posts_query].each do |post|
process_post(post, db, options)
end
end
def self.process_post(post, db, options)
px = options[:table_prefix]
title = post[:title]
if options[:clean_entities]
title = clean_entities(title)
end
slug = post[:slug]
if !slug or slug.empty?
slug = sluggify(title)
end
date = post[:date] || Time.now
name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
date.day, slug]
content = post[:content].to_s
if options[:clean_entities]
content = clean_entities(content)
end
excerpt = post[:excerpt].to_s
more_index = content.index(/<!-- *more *-->/)
more_anchor = nil
if more_index
if options[:more_excerpt] and
(post[:excerpt].nil? or post[:excerpt].empty?)
excerpt = content[0...more_index]
end
if options[:more_anchor]
more_link = "more"
content.sub!(/<!-- *more *-->/,
"<a id=\"more\"></a>" +
"<a id=\"more-#{post[:id]}\"></a>")
end
end
categories = []
tags = []
if options[:categories] or options[:tags]
cquery =
"SELECT
terms.name AS `name`,
ttax.taxonomy AS `type`
FROM
#{px}terms AS `terms`,
#{px}term_relationships AS `trels`,
#{px}term_taxonomy AS `ttax`
WHERE
trels.object_id = '#{post[:id]}' AND
trels.term_taxonomy_id = ttax.term_taxonomy_id AND
terms.term_id = ttax.term_id"
db[cquery].each do |term|
if options[:categories] and term[:type] == "category"
if options[:clean_entities]
categories << clean_entities(term[:name])
else
categories << term[:name]
end
elsif options[:tags] and term[:type] == "post_tag"
if options[:clean_entities]
tags << clean_entities(term[:name])
else
tags << term[:name]
end
end
end
end
comments = []
if options[:comments] and post[:comment_count].to_i > 0
cquery =
"SELECT
comment_ID AS `id`,
comment_author AS `author`,
comment_author_email AS `author_email`,
comment_author_url AS `author_url`,
comment_date AS `date`,
comment_date_gmt AS `date_gmt`,
comment_content AS `content`
FROM #{px}comments
WHERE
comment_post_ID = '#{post[:id]}' AND
comment_approved != 'spam'"
db[cquery].each do |comment|
comcontent = comment[:content].to_s
if comcontent.respond_to?(:force_encoding)
comcontent.force_encoding("UTF-8")
end
if options[:clean_entities]
comcontent = clean_entities(comcontent)
end
comauthor = comment[:author].to_s
if options[:clean_entities]
comauthor = clean_entities(comauthor)
end
comments << {
'id' => comment[:id].to_i,
'author' => comauthor,
'author_email' => comment[:author_email].to_s,
'author_url' => comment[:author_url].to_s,
'date' => comment[:date].to_s,
'date_gmt' => comment[:date_gmt].to_s,
'content' => comcontent,
}
end
comments.sort!{ |a,b| a['id'] <=> b['id'] }
end
# Get the relevant fields as a hash, delete empty fields and
# convert to YAML for the header.
data = {
'layout' => post[:type].to_s,
'status' => post[:status].to_s,
'published' => (post[:status].to_s == "publish"),
'title' => title.to_s,
'author' => post[:author].to_s,
'author_login' => post[:author_login].to_s,
'author_email' => post[:author_email].to_s,
'author_url' => post[:author_url].to_s,
'excerpt' => excerpt,
'more_anchor' => more_anchor,
'wordpress_id' => post[:id],
'wordpress_url' => post[:guid].to_s,
'date' => date,
'categories' => options[:categories] ? categories : nil,
'tags' => options[:tags] ? tags : nil,
'comments' => options[:comments] ? comments : nil,
}.delete_if { |k,v| v.nil? || v == '' }.to_yaml
# Write out the data and content to file
File.open("_posts/#{name}", "w") do |f|
f.puts data
f.puts "---"
f.puts content
end
end
def self.clean_entities( text )
if text.respond_to?(:force_encoding)
text.force_encoding("UTF-8")
end
text = HTMLEntities.new.encode(text, :named)
# We don't want to convert these, it would break all
# HTML tags in the post and comments.
text.gsub!("&amp;", "&")
text.gsub!("&lt;", "<")
text.gsub!("&gt;", ">")
text.gsub!("&quot;", '"')
text.gsub!("&apos;", "'")
text
end
def self.sluggify( title )
begin
require 'unidecode'
title = title.to_ascii
rescue LoadError
STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
end
title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
end
end
end

View File

@ -1,70 +0,0 @@
# coding: utf-8
require 'rubygems'
require 'hpricot'
require 'fileutils'
require 'yaml'
require 'time'
module Jekyll
# This importer takes a wordpress.xml file, which can be exported from your
# wordpress.com blog (/wp-admin/export.php).
module WordpressDotCom
def self.process(filename = "wordpress.xml")
import_count = Hash.new(0)
doc = Hpricot::XML(File.read(filename))
(doc/:channel/:item).each do |item|
title = item.at(:title).inner_text.strip
permalink_title = item.at('wp:post_name').inner_text
# Fallback to "prettified" title if post_name is empty (can happen)
if permalink_title == ""
permalink_title = title.downcase.split.join('-')
end
date = Time.parse(item.at('wp:post_date').inner_text)
status = item.at('wp:status').inner_text
if status == "publish"
published = true
else
published = false
end
type = item.at('wp:post_type').inner_text
tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
metas = Hash.new
item.search("wp:postmeta").each do |meta|
key = meta.at('wp:meta_key').inner_text
value = meta.at('wp:meta_value').inner_text
metas[key] = value;
end
name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html"
header = {
'layout' => type,
'title' => title,
'tags' => tags,
'status' => status,
'type' => type,
'published' => published,
'meta' => metas
}
FileUtils.mkdir_p "_#{type}s"
File.open("_#{type}s/#{name}", "w") do |f|
f.puts header.to_yaml
f.puts '---'
f.puts item.at('content:encoded').inner_text
end
import_count[type] += 1
end
import_count.each do |key, value|
puts "Imported #{value} #{key}s"
end
end
end
end