Clean up migrators formatting.

This commit is contained in:
Tom Preston-Werner 2011-05-08 15:08:53 -07:00
parent a428acec1c
commit aa0d82fa96
10 changed files with 130 additions and 105 deletions

View File

@ -1,7 +1,7 @@
module Jekyll module Jekyll
module CSV module CSV
#Reads a csv with title, permalink, body, published_at, and filter. # Reads a csv with title, permalink, body, published_at, and filter.
#It creates a post file for each row in the csv # It creates a post file for each row in the csv
def self.process(file = "posts.csv") def self.process(file = "posts.csv")
FileUtils.mkdir_p "_posts" FileUtils.mkdir_p "_posts"
posts = 0 posts = 0

View File

@ -11,12 +11,18 @@ require 'yaml'
module Jekyll module Jekyll
module Drupal module Drupal
# Reads a MySQL database via Sequel and creates a post file for each post
# Reads a MySQL database via Sequel and creates a post file for each # in wp_posts that has post_status = 'publish'. This restriction is made
# post in wp_posts that has post_status = 'publish'. # because 'draft' posts are not guaranteed to have valid dates.
# This restriction is made because 'draft' posts are not guaranteed to QUERY = "SELECT node.nid, \
# have valid dates. node.title, \
QUERY = "SELECT node.nid, node.title, node_revisions.body, node.created, node.status FROM node, node_revisions WHERE (node.type = 'blog' OR node.type = 'story') AND node.vid = node_revisions.vid" node_revisions.body, \
node.created, \
node.status \
FROM node, \
node_revisions \
WHERE (node.type = 'blog' OR node.type = 'story') \
AND node.vid = node_revisions.vid"
def self.process(dbname, user, pass, host = 'localhost') def self.process(dbname, user, pass, host = 'localhost')
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')

View File

@ -3,7 +3,6 @@ require 'fileutils'
module Jekyll module Jekyll
module Marley module Marley
def self.regexp def self.regexp
{ :id => /^\d{0,4}-{0,1}(.*)$/, { :id => /^\d{0,4}-{0,1}(.*)$/,
:title => /^#\s*(.*)\s+$/, :title => /^#\s*(.*)\s+$/,

View File

@ -36,11 +36,22 @@ module Jekyll
# This query will pull blog posts from all entries across all blogs. If # This query will pull blog posts from all entries across all blogs. If
# you've got unpublished, deleted or otherwise hidden posts please sift # you've got unpublished, deleted or otherwise hidden posts please sift
# through the created posts to make sure nothing is accidently published. # through the created posts to make sure nothing is accidently published.
QUERY = "SELECT id, \
QUERY = "SELECT id, permalink, body, published_at, title FROM contents WHERE user_id = 1 AND type = 'Article' AND published_at IS NOT NULL ORDER BY published_at" permalink, \
body, \
published_at, \
title \
FROM contents \
WHERE user_id = 1 AND \
type = 'Article' AND \
published_at IS NOT NULL \
ORDER BY published_at"
def self.process(dbname, user, pass, host = 'localhost') def self.process(dbname, user, pass, host = 'localhost')
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') db = Sequel.mysql(dbname, :user => user,
:password => pass,
:host => host,
:encoding => 'utf8')
FileUtils.mkdir_p "_posts" FileUtils.mkdir_p "_posts"
@ -49,16 +60,10 @@ module Jekyll
slug = post[:permalink] slug = post[:permalink]
date = post[:published_at] date = post[:published_at]
content = post[:body] content = post[:body]
# more_content = ''
# Be sure to include the body and extended body. # Ideally, this script would determine the post format (markdown,
# if more_content != nil # html, etc) and create files with proper extensions. At this point
# content = content + " \n" + more_content # it just assumes that markdown will be acceptable.
# end
# Ideally, this script would determine the post format (markdown, html
# , etc) and create files with proper extensions. At this point it
# just assumes that markdown will be acceptable.
name = [date.year, date.month, date.day, slug].join('-') + ".markdown" name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
data = { data = {

View File

@ -18,7 +18,14 @@ module Jekyll
# This query will pull blog posts from all entries across all blogs. If # This query will pull blog posts from all entries across all blogs. If
# you've got unpublished, deleted or otherwise hidden posts please sift # you've got unpublished, deleted or otherwise hidden posts please sift
# through the created posts to make sure nothing is accidently published. # through the created posts to make sure nothing is accidently published.
QUERY = "SELECT entry_id, entry_basename, entry_text, entry_text_more, entry_authored_on, entry_title, entry_convert_breaks FROM mt_entry" QUERY = "SELECT entry_id, \
entry_basename, \
entry_text, \
entry_text_more, \
entry_authored_on, \
entry_title, \
entry_convert_breaks \
FROM mt_entry"
def self.process(dbname, user, pass, host = 'localhost') def self.process(dbname, user, pass, host = 'localhost')
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
@ -38,17 +45,18 @@ module Jekyll
content = content + " \n" + more_content content = content + " \n" + more_content
end end
# Ideally, this script would determine the post format (markdown, html # Ideally, this script would determine the post format (markdown,
# , etc) and create files with proper extensions. At this point it # html, etc) and create files with proper extensions. At this point
# just assumes that markdown will be acceptable. # it just assumes that markdown will be acceptable.
name = [date.year, date.month, date.day, slug].join('-') + '.' + self.suffix(entry_convert_breaks) name = [date.year, date.month, date.day, slug].join('-') + '.' +
self.suffix(entry_convert_breaks)
data = { data = {
'layout' => 'post', 'layout' => 'post',
'title' => title.to_s, 'title' => title.to_s,
'mt_id' => post[:entry_id], 'mt_id' => post[:entry_id],
'date' => date 'date' => date
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
File.open("_posts/#{name}", "w") do |f| File.open("_posts/#{name}", "w") do |f|
f.puts data f.puts data
@ -60,17 +68,18 @@ module Jekyll
def self.suffix(entry_type) def self.suffix(entry_type)
if entry_type.nil? || entry_type.include?("markdown") if entry_type.nil? || entry_type.include?("markdown")
# The markdown plugin I have saves this as "markdown_with_smarty_pants", so I just look for "markdown". # The markdown plugin I have saves this as
"markdown" # "markdown_with_smarty_pants", so I just look for "markdown".
elsif entry_type.include?("textile") "markdown"
# This is saved as "textile_2" on my installation of MT 5.1. elsif entry_type.include?("textile")
"textile" # This is saved as "textile_2" on my installation of MT 5.1.
elsif entry_type == "0" || entry_type.include?("richtext") "textile"
# richtext looks to me like it's saved as HTML, so I include it here. elsif entry_type == "0" || entry_type.include?("richtext")
"html" # Richtext looks to me like it's saved as HTML, so I include it here.
else "html"
# Other values might need custom work. else
entry_type # Other values might need custom work.
entry_type
end end
end end
end end

View File

@ -9,17 +9,16 @@ require "json"
module Jekyll module Jekyll
module Posterous module Posterous
def self.fetch(uri_str, limit = 10) def self.fetch(uri_str, limit = 10)
# You should choose better exception. # You should choose better exception.
raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0 raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
response = nil response = nil
Net::HTTP.start('posterous.com') {|http| Net::HTTP.start('posterous.com') do |http|
req = Net::HTTP::Get.new(uri_str) req = Net::HTTP::Get.new(uri_str)
req.basic_auth @email, @pass req.basic_auth @email, @pass
response = http.request(req) response = http.request(req)
} end
case response case response
when Net::HTTPSuccess then response when Net::HTTPSuccess then response
@ -28,7 +27,6 @@ module Jekyll
end end
end end
def self.process(email, pass, blog = 'primary') def self.process(email, pass, blog = 'primary')
@email, @pass = email, pass @email, @pass = email, pass
@api_token = JSON.parse(self.fetch("/api/2/auth/token").body)['api_token'] @api_token = JSON.parse(self.fetch("/api/2/auth/token").body)['api_token']
@ -38,7 +36,6 @@ module Jekyll
page = 1 page = 1
while posts.any? while posts.any?
posts.each do |post| posts.each do |post|
title = post["title"] title = post["title"]
slug = title.gsub(/[^[:alnum:]]+/, '-').downcase slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
@ -61,13 +58,11 @@ module Jekyll
f.puts "---" f.puts "---"
f.puts content f.puts content
end end
end end
page += 1 page += 1
posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body) posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body)
end end
end end
end end
end end

View File

@ -11,10 +11,17 @@ require 'fileutils'
module Jekyll module Jekyll
module TextPattern module TextPattern
# Reads a MySQL database via Sequel and creates a post file for each post. # Reads a MySQL database via Sequel and creates a post file for each post.
# The only posts selected are those with a status of 4 or 5, which means "live" # The only posts selected are those with a status of 4 or 5, which means
# and "sticky" respectively. # "live" and "sticky" respectively.
# Other statuses is 1 => draft, 2 => hidden and 3 => pending # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
QUERY = "select Title, url_title, Posted, Body, Keywords from textpattern where Status = '4' or Status = '5'" QUERY = "SELECT Title, \
url_title, \
Posted, \
Body, \
Keywords \
FROM textpattern \
WHERE Status = '4' OR \
Status = '5'"
def self.process(dbname, user, pass, host = 'localhost') def self.process(dbname, user, pass, host = 'localhost')
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
@ -22,7 +29,7 @@ module Jekyll
FileUtils.mkdir_p "_posts" FileUtils.mkdir_p "_posts"
db[QUERY].each do |post| db[QUERY].each do |post|
# Get required fields and construct Jekyll compatible name # Get required fields and construct Jekyll compatible name.
title = post[:Title] title = post[:Title]
slug = post[:url_title] slug = post[:url_title]
date = post[:Posted] date = post[:Posted]
@ -31,14 +38,14 @@ module Jekyll
name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile" name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
# Get the relevant fields as a hash, delete empty fields and convert # Get the relevant fields as a hash, delete empty fields and convert
# to YAML for the header # to YAML for the header.
data = { data = {
'layout' => 'post', 'layout' => 'post',
'title' => title.to_s, 'title' => title.to_s,
'tags' => post[:Keywords].split(',') 'tags' => post[:Keywords].split(',')
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
# Write out the data and content to file # Write out the data and content to file.
File.open("_posts/#{name}", "w") do |f| File.open("_posts/#{name}", "w") do |f|
f.puts data f.puts data
f.puts "---" f.puts "---"

View File

@ -5,8 +5,8 @@ require 'sequel'
module Jekyll module Jekyll
module Typo module Typo
# this SQL *should* work for both MySQL and PostgreSQL, but I haven't # This SQL *should* work for both MySQL and PostgreSQL, but I haven't
# tested PostgreSQL yet (as of 2008-12-16) # tested PostgreSQL yet (as of 2008-12-16).
SQL = <<-EOS SQL = <<-EOS
SELECT c.id id, SELECT c.id id,
c.title title, c.title title,
@ -30,8 +30,9 @@ module Jekyll
sprintf("%.02d", post[:date].month), sprintf("%.02d", post[:date].month),
sprintf("%.02d", post[:date].day), sprintf("%.02d", post[:date].day),
post[:slug].strip ].join('-') post[:slug].strip ].join('-')
# Can have more than one text filter in this field, but we just want # Can have more than one text filter in this field, but we just want
# the first one for this # the first one for this.
name += '.' + post[:filter].split(' ')[0] name += '.' + post[:filter].split(' ')[0]
File.open("_posts/#{name}", 'w') do |f| File.open("_posts/#{name}", 'w') do |f|
@ -45,5 +46,5 @@ module Jekyll
end end
end end
end # module Typo end
end # module Jekyll end

View File

@ -11,20 +11,27 @@ require 'yaml'
module Jekyll module Jekyll
module WordPress module WordPress
def self.process(dbname, user, pass, host = 'localhost', table_prefix = 'wp_') def self.process(dbname, user, pass, host = 'localhost', table_prefix = 'wp_')
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
FileUtils.mkdir_p "_posts" FileUtils.mkdir_p("_posts")
# Reads a MySQL database via Sequel and creates a post file for each # Reads a MySQL database via Sequel and creates a post file for each
# post in wp_posts that has post_status = 'publish'. # post in wp_posts that has post_status = 'publish'. This restriction is
# This restriction is made because 'draft' posts are not guaranteed to # made because 'draft' posts are not guaranteed to have valid dates.
# have valid dates. query = "SELECT post_title, \
query = "select post_title, post_name, post_date, post_content, post_excerpt, ID, guid from #{table_prefix}posts where post_status = 'publish' and post_type = 'post'" post_name, \
post_date, \
post_content, \
post_excerpt, \
ID, \
guid \
FROM #{table_prefix}posts \
WHERE post_status = 'publish' AND \
post_type = 'post'"
db[query].each do |post| db[query].each do |post|
# Get required fields and construct Jekyll compatible name # Get required fields and construct Jekyll compatible name.
title = post[:post_title] title = post[:post_title]
slug = post[:post_name] slug = post[:post_name]
date = post[:post_date] date = post[:post_date]
@ -33,7 +40,7 @@ module Jekyll
slug] slug]
# Get the relevant fields as a hash, delete empty fields and convert # Get the relevant fields as a hash, delete empty fields and convert
# to YAML for the header # to YAML for the header.
data = { data = {
'layout' => 'post', 'layout' => 'post',
'title' => title.to_s, 'title' => title.to_s,
@ -41,7 +48,7 @@ module Jekyll
'wordpress_id' => post[:ID], 'wordpress_id' => post[:ID],
'wordpress_url' => post[:guid], 'wordpress_url' => post[:guid],
'date' => date 'date' => date
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
# Write out the data and content to file # Write out the data and content to file
File.open("_posts/#{name}", "w") do |f| File.open("_posts/#{name}", "w") do |f|
@ -50,7 +57,6 @@ module Jekyll
f.puts content f.puts content
end end
end end
end end
end end
end end

View File

@ -6,40 +6,37 @@ require 'fileutils'
require 'yaml' require 'yaml'
module Jekyll module Jekyll
# This importer takes a wordpress.xml file, which can be exported from your
# This importer takes a wordpress.xml file, # wordpress.com blog (/wp-admin/export.php).
# which can be exported from your
# wordpress.com blog (/wp-admin/export.php)
module WordpressDotCom module WordpressDotCom
def self.process(filename = "wordpress.xml") def self.process(filename = "wordpress.xml")
FileUtils.mkdir_p "_posts" FileUtils.mkdir_p "_posts"
posts = 0 posts = 0
doc = Hpricot::XML(File.read(filename)) doc = Hpricot::XML(File.read(filename))
(doc/:channel/:item).each do |item| (doc/:channel/:item).each do |item|
title = item.at(:title).inner_text.strip title = item.at(:title).inner_text.strip
permalink_title = item.at('wp:post_name').inner_text permalink_title = item.at('wp:post_name').inner_text
date = Time.parse(item.at(:pubDate).inner_text) date = Time.parse(item.at(:pubDate).inner_text)
tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html" name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html"
header = { header = {
'layout' => 'post', 'layout' => 'post',
'title' => title, 'title' => title,
'tags' => tags 'tags' => tags
} }
File.open("_posts/#{name}", "w") do |f| File.open("_posts/#{name}", "w") do |f|
f.puts header.to_yaml f.puts header.to_yaml
f.puts '---' f.puts '---'
f.puts item.at('content:encoded').inner_text f.puts item.at('content:encoded').inner_text
end end
posts += 1 posts += 1
end end
puts "Imported #{posts} posts" puts "Imported #{posts} posts"
end end
end end
end end