Remove migrators
This commit is contained in:
parent
a151a16f09
commit
b9da30bc8f
15
Rakefile
15
Rakefile
|
@ -82,21 +82,6 @@ end
|
||||||
#
|
#
|
||||||
#############################################################################
|
#############################################################################
|
||||||
|
|
||||||
namespace :migrate do
|
|
||||||
desc "Migrate from mephisto in the current directory"
|
|
||||||
task :mephisto do
|
|
||||||
sh %q(ruby -r './lib/jekyll/migrators/mephisto' -e 'Jekyll::Mephisto.postgres(:database => "#{ENV["DB"]}")')
|
|
||||||
end
|
|
||||||
desc "Migrate from Movable Type in the current directory"
|
|
||||||
task :mt do
|
|
||||||
sh %q(ruby -r './lib/jekyll/migrators/mt' -e 'Jekyll::MT.process("#{ENV["DB"]}", "#{ENV["USER"]}", "#{ENV["PASS"]}")')
|
|
||||||
end
|
|
||||||
desc "Migrate from Typo in the current directory"
|
|
||||||
task :typo do
|
|
||||||
sh %q(ruby -r './lib/jekyll/migrators/typo' -e 'Jekyll::Typo.process("#{ENV["DB"]}", "#{ENV["USER"]}", "#{ENV["PASS"]}")')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
begin
|
begin
|
||||||
require 'cucumber/rake/task'
|
require 'cucumber/rake/task'
|
||||||
Cucumber::Rake::Task.new(:features) do |t|
|
Cucumber::Rake::Task.new(:features) do |t|
|
||||||
|
|
|
@ -1,26 +0,0 @@
|
||||||
module Jekyll
|
|
||||||
module CSV
|
|
||||||
# Reads a csv with title, permalink, body, published_at, and filter.
|
|
||||||
# It creates a post file for each row in the csv
|
|
||||||
def self.process(file = "posts.csv")
|
|
||||||
FileUtils.mkdir_p "_posts"
|
|
||||||
posts = 0
|
|
||||||
FasterCSV.foreach(file) do |row|
|
|
||||||
next if row[0] == "title"
|
|
||||||
posts += 1
|
|
||||||
name = row[3].split(" ")[0]+"-"+row[1]+(row[4] =~ /markdown/ ? ".markdown" : ".textile")
|
|
||||||
File.open("_posts/#{name}", "w") do |f|
|
|
||||||
f.puts <<-HEADER
|
|
||||||
---
|
|
||||||
layout: post
|
|
||||||
title: #{row[0]}
|
|
||||||
---
|
|
||||||
|
|
||||||
HEADER
|
|
||||||
f.puts row[2]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
"Created #{posts} posts!"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,103 +0,0 @@
|
||||||
require 'rubygems'
|
|
||||||
require 'sequel'
|
|
||||||
require 'fileutils'
|
|
||||||
require 'yaml'
|
|
||||||
|
|
||||||
# NOTE: This converter requires Sequel and the MySQL gems.
|
|
||||||
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
|
|
||||||
# installed, running the following commands should work:
|
|
||||||
# $ sudo gem install sequel
|
|
||||||
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module Drupal
|
|
||||||
# Reads a MySQL database via Sequel and creates a post file for each post
|
|
||||||
# in wp_posts that has post_status = 'publish'. This restriction is made
|
|
||||||
# because 'draft' posts are not guaranteed to have valid dates.
|
|
||||||
QUERY = "SELECT n.nid, \
|
|
||||||
n.title, \
|
|
||||||
nr.body, \
|
|
||||||
n.created, \
|
|
||||||
n.status \
|
|
||||||
FROM node AS n, \
|
|
||||||
node_revisions AS nr \
|
|
||||||
WHERE (n.type = 'blog' OR n.type = 'story') \
|
|
||||||
AND n.vid = nr.vid"
|
|
||||||
|
|
||||||
def self.process(dbname, user, pass, host = 'localhost', prefix = '')
|
|
||||||
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
|
|
||||||
|
|
||||||
if prefix != ''
|
|
||||||
QUERY[" node "] = " " + prefix + "node "
|
|
||||||
QUERY[" node_revisions "] = " " + prefix + "node_revisions "
|
|
||||||
end
|
|
||||||
|
|
||||||
FileUtils.mkdir_p "_posts"
|
|
||||||
FileUtils.mkdir_p "_drafts"
|
|
||||||
|
|
||||||
# Create the refresh layout
|
|
||||||
# Change the refresh url if you customized your permalink config
|
|
||||||
File.open("_layouts/refresh.html", "w") do |f|
|
|
||||||
f.puts <<EOF
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
|
|
||||||
<meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
|
|
||||||
</head>
|
|
||||||
</html>
|
|
||||||
EOF
|
|
||||||
end
|
|
||||||
|
|
||||||
db[QUERY].each do |post|
|
|
||||||
# Get required fields and construct Jekyll compatible name
|
|
||||||
node_id = post[:nid]
|
|
||||||
title = post[:title]
|
|
||||||
content = post[:body]
|
|
||||||
created = post[:created]
|
|
||||||
time = Time.at(created)
|
|
||||||
is_published = post[:status] == 1
|
|
||||||
dir = is_published ? "_posts" : "_drafts"
|
|
||||||
slug = title.strip.downcase.gsub(/(&|&)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
|
|
||||||
name = time.strftime("%Y-%m-%d-") + slug + '.md'
|
|
||||||
|
|
||||||
# Get the relevant fields as a hash, delete empty fields and convert
|
|
||||||
# to YAML for the header
|
|
||||||
data = {
|
|
||||||
'layout' => 'post',
|
|
||||||
'title' => title.to_s,
|
|
||||||
'created' => created,
|
|
||||||
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
|
|
||||||
|
|
||||||
# Write out the data and content to file
|
|
||||||
File.open("#{dir}/#{name}", "w") do |f|
|
|
||||||
f.puts data
|
|
||||||
f.puts "---"
|
|
||||||
f.puts content
|
|
||||||
end
|
|
||||||
|
|
||||||
# Make a file to redirect from the old Drupal URL
|
|
||||||
if is_published
|
|
||||||
aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
|
|
||||||
|
|
||||||
aliases.push(:dst => "node/#{node_id}")
|
|
||||||
|
|
||||||
aliases.each do |url_alias|
|
|
||||||
FileUtils.mkdir_p url_alias[:dst]
|
|
||||||
File.open("#{url_alias[:dst]}/index.md", "w") do |f|
|
|
||||||
f.puts "---"
|
|
||||||
f.puts "layout: refresh"
|
|
||||||
f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
|
|
||||||
f.puts "---"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# TODO: Make dirs & files for nodes of type 'page'
|
|
||||||
# Make refresh pages for these as well
|
|
||||||
|
|
||||||
# TODO: Make refresh dirs & files according to entries in url_alias table
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,49 +0,0 @@
|
||||||
# Adapted by Rodrigo Pinto <rodrigopqn@gmail.com>
|
|
||||||
# Based on typo.rb by Toby DiPasquale
|
|
||||||
|
|
||||||
require 'fileutils'
|
|
||||||
require 'rubygems'
|
|
||||||
require 'sequel'
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module Enki
|
|
||||||
SQL = <<-EOS
|
|
||||||
SELECT p.id,
|
|
||||||
p.title,
|
|
||||||
p.slug,
|
|
||||||
p.body,
|
|
||||||
p.published_at as date,
|
|
||||||
p.cached_tag_list as tags
|
|
||||||
FROM posts p
|
|
||||||
EOS
|
|
||||||
|
|
||||||
# Just working with postgres, but can be easily adapted
|
|
||||||
# to work with both mysql and postgres.
|
|
||||||
def self.process(dbname, user, pass, host = 'localhost')
|
|
||||||
FileUtils.mkdir_p('_posts')
|
|
||||||
db = Sequel.postgres(:database => dbname,
|
|
||||||
:user => user,
|
|
||||||
:password => pass,
|
|
||||||
:host => host,
|
|
||||||
:encoding => 'utf8')
|
|
||||||
|
|
||||||
db[SQL].each do |post|
|
|
||||||
name = [ sprintf("%.04d", post[:date].year),
|
|
||||||
sprintf("%.02d", post[:date].month),
|
|
||||||
sprintf("%.02d", post[:date].day),
|
|
||||||
post[:slug].strip ].join('-')
|
|
||||||
name += '.textile'
|
|
||||||
|
|
||||||
File.open("_posts/#{name}", 'w') do |f|
|
|
||||||
f.puts({ 'layout' => 'post',
|
|
||||||
'title' => post[:title].to_s,
|
|
||||||
'enki_id' => post[:id],
|
|
||||||
'categories' => post[:tags]
|
|
||||||
}.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
|
|
||||||
f.puts '---'
|
|
||||||
f.puts post[:body].delete("\r")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,53 +0,0 @@
|
||||||
require 'rubygems'
|
|
||||||
require 'sequel'
|
|
||||||
require 'fileutils'
|
|
||||||
require 'yaml'
|
|
||||||
|
|
||||||
# NOTE: This migrator is made for Joomla 1.5 databases.
|
|
||||||
# NOTE: This converter requires Sequel and the MySQL gems.
|
|
||||||
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
|
|
||||||
# installed, running the following commands should work:
|
|
||||||
# $ sudo gem install sequel
|
|
||||||
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module Joomla
|
|
||||||
def self.process(dbname, user, pass, host = 'localhost', table_prefix = 'jos_', section = '1')
|
|
||||||
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
|
|
||||||
|
|
||||||
FileUtils.mkdir_p("_posts")
|
|
||||||
|
|
||||||
# Reads a MySQL database via Sequel and creates a post file for each
|
|
||||||
# post in wp_posts that has post_status = 'publish'. This restriction is
|
|
||||||
# made because 'draft' posts are not guaranteed to have valid dates.
|
|
||||||
query = "SELECT `title`, `alias`, CONCAT(`introtext`,`fulltext`) as content, `created`, `id` FROM #{table_prefix}content WHERE state = '0' OR state = '1' AND sectionid = '#{section}'"
|
|
||||||
|
|
||||||
db[query].each do |post|
|
|
||||||
# Get required fields and construct Jekyll compatible name.
|
|
||||||
title = post[:title]
|
|
||||||
slug = post[:alias]
|
|
||||||
date = post[:created]
|
|
||||||
content = post[:content]
|
|
||||||
name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
|
|
||||||
slug]
|
|
||||||
|
|
||||||
# Get the relevant fields as a hash, delete empty fields and convert
|
|
||||||
# to YAML for the header.
|
|
||||||
data = {
|
|
||||||
'layout' => 'post',
|
|
||||||
'title' => title.to_s,
|
|
||||||
'joomla_id' => post[:id],
|
|
||||||
'joomla_url' => post[:alias],
|
|
||||||
'date' => date
|
|
||||||
}.delete_if { |k,v| v.nil? || v == '' }.to_yaml
|
|
||||||
|
|
||||||
# Write out the data and content to file
|
|
||||||
File.open("_posts/#{name}", "w") do |f|
|
|
||||||
f.puts data
|
|
||||||
f.puts "---"
|
|
||||||
f.puts content
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,52 +0,0 @@
|
||||||
require 'yaml'
|
|
||||||
require 'fileutils'
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module Marley
|
|
||||||
def self.regexp
|
|
||||||
{ :id => /^\d{0,4}-{0,1}(.*)$/,
|
|
||||||
:title => /^#\s*(.*)\s+$/,
|
|
||||||
:title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
|
|
||||||
:published_on => /.*\s+\(([0-9\/]+)\)$/,
|
|
||||||
:perex => /^([^\#\n]+\n)$/,
|
|
||||||
:meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
def self.process(marley_data_dir)
|
|
||||||
raise ArgumentError, "marley dir #{marley_data_dir} not found" unless File.directory?(marley_data_dir)
|
|
||||||
|
|
||||||
FileUtils.mkdir_p "_posts"
|
|
||||||
|
|
||||||
posts = 0
|
|
||||||
Dir["#{marley_data_dir}/**/*.txt"].each do |f|
|
|
||||||
next unless File.exists?(f)
|
|
||||||
|
|
||||||
#copied over from marley's app/lib/post.rb
|
|
||||||
file_content = File.read(f)
|
|
||||||
meta_content = file_content.slice!( self.regexp[:meta] )
|
|
||||||
body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
|
|
||||||
|
|
||||||
title = file_content.scan( self.regexp[:title] ).first.to_s.strip
|
|
||||||
prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
|
|
||||||
published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
|
|
||||||
meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
|
|
||||||
meta['title'] = title
|
|
||||||
meta['layout'] = 'post'
|
|
||||||
|
|
||||||
formatted_date = published_on.strftime('%Y-%m-%d')
|
|
||||||
post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
|
|
||||||
|
|
||||||
name = "#{formatted_date}-#{post_name}"
|
|
||||||
File.open("_posts/#{name}.markdown", "w") do |f|
|
|
||||||
f.puts meta.to_yaml
|
|
||||||
f.puts "---\n"
|
|
||||||
f.puts "\n#{prerex}\n\n" if prerex
|
|
||||||
f.puts body
|
|
||||||
end
|
|
||||||
posts += 1
|
|
||||||
end
|
|
||||||
"Created #{posts} posts!"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,84 +0,0 @@
|
||||||
# Quickly hacked together my Michael Ivey
|
|
||||||
# Based on mt.rb by Nick Gerakines, open source and publically
|
|
||||||
# available under the MIT license. Use this module at your own risk.
|
|
||||||
|
|
||||||
require 'rubygems'
|
|
||||||
require 'sequel'
|
|
||||||
require 'fastercsv'
|
|
||||||
require 'fileutils'
|
|
||||||
require File.join(File.dirname(__FILE__),"csv.rb")
|
|
||||||
|
|
||||||
# NOTE: This converter requires Sequel and the MySQL gems.
|
|
||||||
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
|
|
||||||
# installed, running the following commands should work:
|
|
||||||
# $ sudo gem install sequel
|
|
||||||
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module Mephisto
|
|
||||||
#Accepts a hash with database config variables, exports mephisto posts into a csv
|
|
||||||
#export PGPASSWORD if you must
|
|
||||||
def self.postgres(c)
|
|
||||||
sql = <<-SQL
|
|
||||||
BEGIN;
|
|
||||||
CREATE TEMP TABLE jekyll AS
|
|
||||||
SELECT title, permalink, body, published_at, filter FROM contents
|
|
||||||
WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
|
|
||||||
COPY jekyll TO STDOUT WITH CSV HEADER;
|
|
||||||
ROLLBACK;
|
|
||||||
SQL
|
|
||||||
command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
|
|
||||||
puts command
|
|
||||||
`#{command}`
|
|
||||||
CSV.process
|
|
||||||
end
|
|
||||||
|
|
||||||
# This query will pull blog posts from all entries across all blogs. If
|
|
||||||
# you've got unpublished, deleted or otherwise hidden posts please sift
|
|
||||||
# through the created posts to make sure nothing is accidently published.
|
|
||||||
QUERY = "SELECT id, \
|
|
||||||
permalink, \
|
|
||||||
body, \
|
|
||||||
published_at, \
|
|
||||||
title \
|
|
||||||
FROM contents \
|
|
||||||
WHERE user_id = 1 AND \
|
|
||||||
type = 'Article' AND \
|
|
||||||
published_at IS NOT NULL \
|
|
||||||
ORDER BY published_at"
|
|
||||||
|
|
||||||
def self.process(dbname, user, pass, host = 'localhost')
|
|
||||||
db = Sequel.mysql(dbname, :user => user,
|
|
||||||
:password => pass,
|
|
||||||
:host => host,
|
|
||||||
:encoding => 'utf8')
|
|
||||||
|
|
||||||
FileUtils.mkdir_p "_posts"
|
|
||||||
|
|
||||||
db[QUERY].each do |post|
|
|
||||||
title = post[:title]
|
|
||||||
slug = post[:permalink]
|
|
||||||
date = post[:published_at]
|
|
||||||
content = post[:body]
|
|
||||||
|
|
||||||
# Ideally, this script would determine the post format (markdown,
|
|
||||||
# html, etc) and create files with proper extensions. At this point
|
|
||||||
# it just assumes that markdown will be acceptable.
|
|
||||||
name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'layout' => 'post',
|
|
||||||
'title' => title.to_s,
|
|
||||||
'mt_id' => post[:entry_id],
|
|
||||||
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
|
|
||||||
|
|
||||||
File.open("_posts/#{name}", "w") do |f|
|
|
||||||
f.puts data
|
|
||||||
f.puts "---"
|
|
||||||
f.puts content
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,86 +0,0 @@
|
||||||
# Created by Nick Gerakines, open source and publically available under the
|
|
||||||
# MIT license. Use this module at your own risk.
|
|
||||||
# I'm an Erlang/Perl/C++ guy so please forgive my dirty ruby.
|
|
||||||
|
|
||||||
require 'rubygems'
|
|
||||||
require 'sequel'
|
|
||||||
require 'fileutils'
|
|
||||||
require 'yaml'
|
|
||||||
|
|
||||||
# NOTE: This converter requires Sequel and the MySQL gems.
|
|
||||||
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
|
|
||||||
# installed, running the following commands should work:
|
|
||||||
# $ sudo gem install sequel
|
|
||||||
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module MT
|
|
||||||
# This query will pull blog posts from all entries across all blogs. If
|
|
||||||
# you've got unpublished, deleted or otherwise hidden posts please sift
|
|
||||||
# through the created posts to make sure nothing is accidently published.
|
|
||||||
QUERY = "SELECT entry_id, \
|
|
||||||
entry_basename, \
|
|
||||||
entry_text, \
|
|
||||||
entry_text_more, \
|
|
||||||
entry_authored_on, \
|
|
||||||
entry_title, \
|
|
||||||
entry_convert_breaks \
|
|
||||||
FROM mt_entry"
|
|
||||||
|
|
||||||
def self.process(dbname, user, pass, host = 'localhost')
|
|
||||||
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
|
|
||||||
|
|
||||||
FileUtils.mkdir_p "_posts"
|
|
||||||
|
|
||||||
db[QUERY].each do |post|
|
|
||||||
title = post[:entry_title]
|
|
||||||
slug = post[:entry_basename].gsub(/_/, '-')
|
|
||||||
date = post[:entry_authored_on]
|
|
||||||
content = post[:entry_text]
|
|
||||||
more_content = post[:entry_text_more]
|
|
||||||
entry_convert_breaks = post[:entry_convert_breaks]
|
|
||||||
|
|
||||||
# Be sure to include the body and extended body.
|
|
||||||
if more_content != nil
|
|
||||||
content = content + " \n" + more_content
|
|
||||||
end
|
|
||||||
|
|
||||||
# Ideally, this script would determine the post format (markdown,
|
|
||||||
# html, etc) and create files with proper extensions. At this point
|
|
||||||
# it just assumes that markdown will be acceptable.
|
|
||||||
name = [date.year, date.month, date.day, slug].join('-') + '.' +
|
|
||||||
self.suffix(entry_convert_breaks)
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'layout' => 'post',
|
|
||||||
'title' => title.to_s,
|
|
||||||
'mt_id' => post[:entry_id],
|
|
||||||
'date' => date
|
|
||||||
}.delete_if { |k,v| v.nil? || v == '' }.to_yaml
|
|
||||||
|
|
||||||
File.open("_posts/#{name}", "w") do |f|
|
|
||||||
f.puts data
|
|
||||||
f.puts "---"
|
|
||||||
f.puts content
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def self.suffix(entry_type)
|
|
||||||
if entry_type.nil? || entry_type.include?("markdown")
|
|
||||||
# The markdown plugin I have saves this as
|
|
||||||
# "markdown_with_smarty_pants", so I just look for "markdown".
|
|
||||||
"markdown"
|
|
||||||
elsif entry_type.include?("textile")
|
|
||||||
# This is saved as "textile_2" on my installation of MT 5.1.
|
|
||||||
"textile"
|
|
||||||
elsif entry_type == "0" || entry_type.include?("richtext")
|
|
||||||
# Richtext looks to me like it's saved as HTML, so I include it here.
|
|
||||||
"html"
|
|
||||||
else
|
|
||||||
# Other values might need custom work.
|
|
||||||
entry_type
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,67 +0,0 @@
|
||||||
require 'rubygems'
|
|
||||||
require 'jekyll'
|
|
||||||
require 'fileutils'
|
|
||||||
require 'net/http'
|
|
||||||
require 'uri'
|
|
||||||
require "json"
|
|
||||||
|
|
||||||
# ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, api_key, blog)'
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module Posterous
|
|
||||||
def self.fetch(uri_str, limit = 10)
|
|
||||||
# You should choose better exception.
|
|
||||||
raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
|
|
||||||
|
|
||||||
response = nil
|
|
||||||
Net::HTTP.start('posterous.com') do |http|
|
|
||||||
req = Net::HTTP::Get.new(uri_str)
|
|
||||||
req.basic_auth @email, @pass
|
|
||||||
response = http.request(req)
|
|
||||||
end
|
|
||||||
|
|
||||||
case response
|
|
||||||
when Net::HTTPSuccess then response
|
|
||||||
when Net::HTTPRedirection then fetch(response['location'], limit - 1)
|
|
||||||
else response.error!
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def self.process(email, pass, api_token, blog = 'primary')
|
|
||||||
@email, @pass, @api_token = email, pass, api_token
|
|
||||||
FileUtils.mkdir_p "_posts"
|
|
||||||
|
|
||||||
posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}").body)
|
|
||||||
page = 1
|
|
||||||
|
|
||||||
while posts.any?
|
|
||||||
posts.each do |post|
|
|
||||||
title = post["title"]
|
|
||||||
slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
|
|
||||||
date = Date.parse(post["display_date"])
|
|
||||||
content = post["body_html"]
|
|
||||||
published = !post["is_private"]
|
|
||||||
name = "%02d-%02d-%02d-%s.html" % [date.year, date.month, date.day, slug]
|
|
||||||
|
|
||||||
# Get the relevant fields as a hash, delete empty fields and convert
|
|
||||||
# to YAML for the header
|
|
||||||
data = {
|
|
||||||
'layout' => 'post',
|
|
||||||
'title' => title.to_s,
|
|
||||||
'published' => published
|
|
||||||
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
|
|
||||||
|
|
||||||
# Write out the data and content to file
|
|
||||||
File.open("_posts/#{name}", "w") do |f|
|
|
||||||
f.puts data
|
|
||||||
f.puts "---"
|
|
||||||
f.puts content
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
page += 1
|
|
||||||
posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,47 +0,0 @@
|
||||||
# Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22.
|
|
||||||
# Use at your own risk. The end.
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
# (URL)
|
|
||||||
# ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('http://yourdomain.com/your-favorite-feed.xml')"
|
|
||||||
#
|
|
||||||
# (Local file)
|
|
||||||
# ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('./somefile/on/your/computer.xml')"
|
|
||||||
|
|
||||||
require 'rubygems'
|
|
||||||
require 'rss/1.0'
|
|
||||||
require 'rss/2.0'
|
|
||||||
require 'open-uri'
|
|
||||||
require 'fileutils'
|
|
||||||
require 'yaml'
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module MigrateRSS
|
|
||||||
|
|
||||||
# The `source` argument may be a URL or a local file.
|
|
||||||
def self.process(source)
|
|
||||||
content = ""
|
|
||||||
open(source) { |s| content = s.read }
|
|
||||||
rss = RSS::Parser.parse(content, false)
|
|
||||||
|
|
||||||
raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
|
|
||||||
|
|
||||||
rss.items.each do |item|
|
|
||||||
formatted_date = item.date.strftime('%Y-%m-%d')
|
|
||||||
post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map { |i| i.downcase if i != '' }.compact.join('-')
|
|
||||||
name = "#{formatted_date}-#{post_name}"
|
|
||||||
|
|
||||||
header = {
|
|
||||||
'layout' => 'post',
|
|
||||||
'title' => item.title
|
|
||||||
}
|
|
||||||
|
|
||||||
File.open("_posts/#{name}.html", "w") do |f|
|
|
||||||
f.puts header.to_yaml
|
|
||||||
f.puts "---\n"
|
|
||||||
f.puts item.description
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,58 +0,0 @@
|
||||||
require 'rubygems'
|
|
||||||
require 'sequel'
|
|
||||||
require 'fileutils'
|
|
||||||
require 'yaml'
|
|
||||||
|
|
||||||
# NOTE: This converter requires Sequel and the MySQL gems.
|
|
||||||
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
|
|
||||||
# installed, running the following commands should work:
|
|
||||||
# $ sudo gem install sequel
|
|
||||||
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module TextPattern
|
|
||||||
# Reads a MySQL database via Sequel and creates a post file for each post.
|
|
||||||
# The only posts selected are those with a status of 4 or 5, which means
|
|
||||||
# "live" and "sticky" respectively.
|
|
||||||
# Other statuses are 1 => draft, 2 => hidden and 3 => pending.
|
|
||||||
QUERY = "SELECT Title, \
|
|
||||||
url_title, \
|
|
||||||
Posted, \
|
|
||||||
Body, \
|
|
||||||
Keywords \
|
|
||||||
FROM textpattern \
|
|
||||||
WHERE Status = '4' OR \
|
|
||||||
Status = '5'"
|
|
||||||
|
|
||||||
def self.process(dbname, user, pass, host = 'localhost')
|
|
||||||
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
|
|
||||||
|
|
||||||
FileUtils.mkdir_p "_posts"
|
|
||||||
|
|
||||||
db[QUERY].each do |post|
|
|
||||||
# Get required fields and construct Jekyll compatible name.
|
|
||||||
title = post[:Title]
|
|
||||||
slug = post[:url_title]
|
|
||||||
date = post[:Posted]
|
|
||||||
content = post[:Body]
|
|
||||||
|
|
||||||
name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
|
|
||||||
|
|
||||||
# Get the relevant fields as a hash, delete empty fields and convert
|
|
||||||
# to YAML for the header.
|
|
||||||
data = {
|
|
||||||
'layout' => 'post',
|
|
||||||
'title' => title.to_s,
|
|
||||||
'tags' => post[:Keywords].split(',')
|
|
||||||
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
|
|
||||||
|
|
||||||
# Write out the data and content to file.
|
|
||||||
File.open("_posts/#{name}", "w") do |f|
|
|
||||||
f.puts data
|
|
||||||
f.puts "---"
|
|
||||||
f.puts content
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,195 +0,0 @@
|
||||||
require 'rubygems'
|
|
||||||
require 'open-uri'
|
|
||||||
require 'fileutils'
|
|
||||||
require 'nokogiri'
|
|
||||||
require 'date'
|
|
||||||
require 'json'
|
|
||||||
require 'uri'
|
|
||||||
require 'jekyll'
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module Tumblr
|
|
||||||
def self.process(url, format = "html", grab_images = false,
|
|
||||||
add_highlights = false, rewrite_urls = true)
|
|
||||||
@grab_images = grab_images
|
|
||||||
FileUtils.mkdir_p "_posts/tumblr"
|
|
||||||
url += "/api/read/json/"
|
|
||||||
per_page = 50
|
|
||||||
posts = []
|
|
||||||
# Two passes are required so that we can rewrite URLs.
|
|
||||||
# First pass builds up an array of each post as a hash.
|
|
||||||
begin
|
|
||||||
current_page = (current_page || -1) + 1
|
|
||||||
feed = open(url + "?num=#{per_page}&start=#{current_page * per_page}")
|
|
||||||
json = feed.readlines.join("\n")[21...-2] # Strip Tumblr's JSONP chars.
|
|
||||||
blog = JSON.parse(json)
|
|
||||||
puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
|
|
||||||
posts += blog["posts"].map { |post| post_to_hash(post, format) }
|
|
||||||
end until blog["posts"].size < per_page
|
|
||||||
# Rewrite URLs and create redirects.
|
|
||||||
posts = rewrite_urls_and_redirects posts if rewrite_urls
|
|
||||||
# Second pass for writing post files.
|
|
||||||
posts.each do |post|
|
|
||||||
if format == "md"
|
|
||||||
post[:content] = html_to_markdown post[:content]
|
|
||||||
post[:content] = add_syntax_highlights post[:content] if add_highlights
|
|
||||||
end
|
|
||||||
File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
|
|
||||||
f.puts post[:header].to_yaml + "---\n" + post[:content]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
# Converts each type of Tumblr post to a hash with all required
|
|
||||||
# data for Jekyll.
|
|
||||||
def self.post_to_hash(post, format)
|
|
||||||
case post['type']
|
|
||||||
when "regular"
|
|
||||||
title = post["regular-title"]
|
|
||||||
content = post["regular-body"]
|
|
||||||
when "link"
|
|
||||||
title = post["link-text"] || post["link-url"]
|
|
||||||
content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
|
|
||||||
unless post["link-description"].nil?
|
|
||||||
content << "<br/>" + post["link-description"]
|
|
||||||
end
|
|
||||||
when "photo"
|
|
||||||
title = post["photo-caption"]
|
|
||||||
max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max
|
|
||||||
url = post["photo-url"] || post["photo-url-#{max_size}"]
|
|
||||||
ext = "." + post[post.keys.select { |k|
|
|
||||||
k =~ /^photo-url-/ && post[k].split("/").last =~ /\./
|
|
||||||
}.first].split(".").last
|
|
||||||
content = "<img src=\"#{save_file(url, ext)}\"/>"
|
|
||||||
unless post["photo-link-url"].nil?
|
|
||||||
content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
|
|
||||||
end
|
|
||||||
when "audio"
|
|
||||||
if !post["id3-title"].nil?
|
|
||||||
title = post["id3-title"]
|
|
||||||
content = post.at["audio-player"] + "<br/>" + post["audio-caption"]
|
|
||||||
else
|
|
||||||
title = post["audio-caption"]
|
|
||||||
content = post.at["audio-player"]
|
|
||||||
end
|
|
||||||
when "quote"
|
|
||||||
title = post["quote-text"]
|
|
||||||
content = "<blockquote>#{post["quote-text"]}</blockquote>"
|
|
||||||
unless post["quote-source"].nil?
|
|
||||||
content << "—" + post["quote-source"]
|
|
||||||
end
|
|
||||||
when "conversation"
|
|
||||||
title = post["conversation-title"]
|
|
||||||
content = "<section><dialog>"
|
|
||||||
post["conversation"]["line"].each do |line|
|
|
||||||
content << "<dt>#{line['label']}</dt><dd>#{line}</dd>"
|
|
||||||
end
|
|
||||||
content << "</section></dialog>"
|
|
||||||
when "video"
|
|
||||||
title = post["video-title"]
|
|
||||||
content = post["video-player"]
|
|
||||||
unless post["video-caption"].nil?
|
|
||||||
content << "<br/>" + post["video-caption"]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
date = Date.parse(post['date']).to_s
|
|
||||||
title = Nokogiri::HTML(title).text
|
|
||||||
slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
|
|
||||||
{
|
|
||||||
:name => "#{date}-#{slug}.#{format}",
|
|
||||||
:header => {
|
|
||||||
"layout" => "post",
|
|
||||||
"title" => title,
|
|
||||||
"tags" => post["tags"],
|
|
||||||
},
|
|
||||||
:content => content,
|
|
||||||
:url => post["url"],
|
|
||||||
:slug => post["url-with-slug"],
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
# Create a Hash of old urls => new urls, for rewriting and
|
|
||||||
# redirects, and replace urls in each post. Instantiate Jekyll
|
|
||||||
# site/posts to get the correct permalink format.
|
|
||||||
def self.rewrite_urls_and_redirects(posts)
|
|
||||||
site = Jekyll::Site.new(Jekyll.configuration({}))
|
|
||||||
dir = File.join(File.dirname(__FILE__), "..")
|
|
||||||
urls = Hash[posts.map { |post|
|
|
||||||
# Create an initial empty file for the post so that
|
|
||||||
# we can instantiate a post object.
|
|
||||||
File.open("_posts/tumblr/#{post[:name]}", "w")
|
|
||||||
tumblr_url = URI.parse(post[:slug]).path
|
|
||||||
jekyll_url = Jekyll::Post.new(site, dir, "", "tumblr/" + post[:name]).url
|
|
||||||
redirect_dir = tumblr_url.sub(/\//, "") + "/"
|
|
||||||
FileUtils.mkdir_p redirect_dir
|
|
||||||
File.open(redirect_dir + "index.html", "w") do |f|
|
|
||||||
f.puts "<html><head><meta http-equiv='Refresh' content='0; " +
|
|
||||||
"url=#{jekyll_url}'></head><body></body></html>"
|
|
||||||
end
|
|
||||||
[tumblr_url, jekyll_url]
|
|
||||||
}]
|
|
||||||
posts.map { |post|
|
|
||||||
urls.each do |tumblr_url, jekyll_url|
|
|
||||||
post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
|
|
||||||
end
|
|
||||||
post
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
# Uses Python's html2text to convert a post's content to
|
|
||||||
# markdown. Preserve HTML tables as per the markdown docs.
|
|
||||||
def self.html_to_markdown(content)
|
|
||||||
preserve = ["table", "tr", "th", "td"]
|
|
||||||
preserve.each do |tag|
|
|
||||||
content.gsub!(/<#{tag}/i, "$$" + tag)
|
|
||||||
content.gsub!(/<\/#{tag}/i, "||" + tag)
|
|
||||||
end
|
|
||||||
content = %x[echo '#{content.gsub("'", "''")}' | html2text]
|
|
||||||
preserve.each do |tag|
|
|
||||||
content.gsub!("$$" + tag, "<" + tag)
|
|
||||||
content.gsub!("||" + tag, "</" + tag)
|
|
||||||
end
|
|
||||||
content
|
|
||||||
end
|
|
||||||
|
|
||||||
# Adds pygments highlight tags to code blocks in posts that use
|
|
||||||
# markdown format. This doesn't guess the language of the code
|
|
||||||
# block, so you should modify this to suit your own content.
|
|
||||||
# For example, my code block only contain Python and JavaScript,
|
|
||||||
# so I can assume the block is JavaScript if it contains a
|
|
||||||
# semi-colon.
|
|
||||||
def self.add_syntax_highlights(content)
|
|
||||||
lines = content.split("\n")
|
|
||||||
block, indent, lang, start = false, /^ /, nil, nil
|
|
||||||
lines.each_with_index do |line, i|
|
|
||||||
if !block && line =~ indent
|
|
||||||
block = true
|
|
||||||
lang = "python"
|
|
||||||
start = i
|
|
||||||
elsif block
|
|
||||||
lang = "javascript" if line =~ /;$/
|
|
||||||
block = line =~ indent && i < lines.size - 1 # Also handle EOF
|
|
||||||
if !block
|
|
||||||
lines[start] = "{% highlight #{lang} %}"
|
|
||||||
lines[i - 1] = "{% endhighlight %}"
|
|
||||||
end
|
|
||||||
lines[i] = lines[i].sub(indent, "")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
lines.join("\n")
|
|
||||||
end
|
|
||||||
|
|
||||||
def self.save_file(url, ext)
|
|
||||||
if @grab_images
|
|
||||||
path = "tumblr_files/#{url.split('/').last}"
|
|
||||||
path += ext unless path =~ /#{ext}$/
|
|
||||||
FileUtils.mkdir_p "tumblr_files"
|
|
||||||
File.open(path, "w") { |f| f.write(open(url).read) }
|
|
||||||
url = "/" + path
|
|
||||||
end
|
|
||||||
url
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,51 +0,0 @@
|
||||||
# Author: Toby DiPasquale <toby@cbcg.net>
|
|
||||||
require 'fileutils'
|
|
||||||
require 'rubygems'
|
|
||||||
require 'sequel'
|
|
||||||
require 'yaml'
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module Typo
|
|
||||||
# This SQL *should* work for both MySQL and PostgreSQL, but I haven't
|
|
||||||
# tested PostgreSQL yet (as of 2008-12-16).
|
|
||||||
SQL = <<-EOS
|
|
||||||
SELECT c.id id,
|
|
||||||
c.title title,
|
|
||||||
c.permalink slug,
|
|
||||||
c.body body,
|
|
||||||
c.published_at date,
|
|
||||||
c.state state,
|
|
||||||
COALESCE(tf.name, 'html') filter
|
|
||||||
FROM contents c
|
|
||||||
LEFT OUTER JOIN text_filters tf
|
|
||||||
ON c.text_filter_id = tf.id
|
|
||||||
EOS
|
|
||||||
|
|
||||||
def self.process dbname, user, pass, host='localhost'
|
|
||||||
FileUtils.mkdir_p '_posts'
|
|
||||||
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
|
|
||||||
db[SQL].each do |post|
|
|
||||||
next unless post[:state] =~ /published/
|
|
||||||
|
|
||||||
name = [ sprintf("%.04d", post[:date].year),
|
|
||||||
sprintf("%.02d", post[:date].month),
|
|
||||||
sprintf("%.02d", post[:date].day),
|
|
||||||
post[:slug].strip ].join('-')
|
|
||||||
|
|
||||||
# Can have more than one text filter in this field, but we just want
|
|
||||||
# the first one for this.
|
|
||||||
name += '.' + post[:filter].split(' ')[0]
|
|
||||||
|
|
||||||
File.open("_posts/#{name}", 'w') do |f|
|
|
||||||
f.puts({ 'layout' => 'post',
|
|
||||||
'title' => post[:title].to_s,
|
|
||||||
'typo_id' => post[:id]
|
|
||||||
}.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
|
|
||||||
f.puts '---'
|
|
||||||
f.puts post[:body].delete("\r")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,294 +0,0 @@
|
||||||
require 'rubygems'
|
|
||||||
require 'sequel'
|
|
||||||
require 'fileutils'
|
|
||||||
require 'yaml'
|
|
||||||
|
|
||||||
# NOTE: This converter requires Sequel and the MySQL gems.
|
|
||||||
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
|
|
||||||
# installed, running the following commands should work:
|
|
||||||
# $ sudo gem install sequel
|
|
||||||
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
module WordPress
|
|
||||||
|
|
||||||
# Main migrator function. Call this to perform the migration.
|
|
||||||
#
|
|
||||||
# dbname:: The name of the database
|
|
||||||
# user:: The database user name
|
|
||||||
# pass:: The database user's password
|
|
||||||
# host:: The address of the MySQL database host. Default: 'localhost'
|
|
||||||
# options:: A hash table of configuration options.
|
|
||||||
#
|
|
||||||
# Supported options are:
|
|
||||||
#
|
|
||||||
# :table_prefix:: Prefix of database tables used by WordPress.
|
|
||||||
# Default: 'wp_'
|
|
||||||
# :clean_entities:: If true, convert non-ASCII characters to HTML
|
|
||||||
# entities in the posts, comments, titles, and
|
|
||||||
# names. Requires the 'htmlentities' gem to
|
|
||||||
# work. Default: true.
|
|
||||||
# :comments:: If true, migrate post comments too. Comments
|
|
||||||
# are saved in the post's YAML front matter.
|
|
||||||
# Default: true.
|
|
||||||
# :categories:: If true, save the post's categories in its
|
|
||||||
# YAML front matter.
|
|
||||||
# :tags:: If true, save the post's tags in its
|
|
||||||
# YAML front matter.
|
|
||||||
# :more_excerpt:: If true, when a post has no excerpt but
|
|
||||||
# does have a <!-- more --> tag, use the
|
|
||||||
# preceding post content as the excerpt.
|
|
||||||
# Default: true.
|
|
||||||
# :more_anchor:: If true, convert a <!-- more --> tag into
|
|
||||||
# two HTML anchors with ids "more" and
|
|
||||||
# "more-NNN" (where NNN is the post number).
|
|
||||||
# Default: true.
|
|
||||||
# :status:: Array of allowed post statuses. Only
|
|
||||||
# posts with matching status will be migrated.
|
|
||||||
# Known statuses are :publish, :draft, :private,
|
|
||||||
# and :revision. If this is nil or an empty
|
|
||||||
# array, all posts are migrated regardless of
|
|
||||||
# status. Default: [:publish].
|
|
||||||
#
|
|
||||||
def self.process(dbname, user, pass, host='localhost', options={})
|
|
||||||
options = {
|
|
||||||
:table_prefix => 'wp_',
|
|
||||||
:clean_entities => true,
|
|
||||||
:comments => true,
|
|
||||||
:categories => true,
|
|
||||||
:tags => true,
|
|
||||||
:more_excerpt => true,
|
|
||||||
:more_anchor => true,
|
|
||||||
:status => [:publish] # :draft, :private, :revision
|
|
||||||
}.merge(options)
|
|
||||||
|
|
||||||
if options[:clean_entities]
|
|
||||||
begin
|
|
||||||
require 'htmlentities'
|
|
||||||
rescue LoadError
|
|
||||||
STDERR.puts "Could not require 'htmlentities', so the " +
|
|
||||||
":clean_entities option is now disabled."
|
|
||||||
options[:clean_entities] = false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
FileUtils.mkdir_p("_posts")
|
|
||||||
|
|
||||||
db = Sequel.mysql(dbname, :user => user, :password => pass,
|
|
||||||
:host => host, :encoding => 'utf8')
|
|
||||||
|
|
||||||
px = options[:table_prefix]
|
|
||||||
|
|
||||||
posts_query = "
|
|
||||||
SELECT
|
|
||||||
posts.ID AS `id`,
|
|
||||||
posts.guid AS `guid`,
|
|
||||||
posts.post_type AS `type`,
|
|
||||||
posts.post_status AS `status`,
|
|
||||||
posts.post_title AS `title`,
|
|
||||||
posts.post_name AS `slug`,
|
|
||||||
posts.post_date AS `date`,
|
|
||||||
posts.post_content AS `content`,
|
|
||||||
posts.post_excerpt AS `excerpt`,
|
|
||||||
posts.comment_count AS `comment_count`,
|
|
||||||
users.display_name AS `author`,
|
|
||||||
users.user_login AS `author_login`,
|
|
||||||
users.user_email AS `author_email`,
|
|
||||||
users.user_url AS `author_url`
|
|
||||||
FROM #{px}posts AS `posts`
|
|
||||||
LEFT JOIN #{px}users AS `users`
|
|
||||||
ON posts.post_author = users.ID"
|
|
||||||
|
|
||||||
if options[:status] and not options[:status].empty?
|
|
||||||
status = options[:status][0]
|
|
||||||
posts_query << "
|
|
||||||
WHERE posts.post_status = '#{status.to_s}'"
|
|
||||||
options[:status][1..-1].each do |status|
|
|
||||||
posts_query << " OR
|
|
||||||
posts.post_status = '#{status.to_s}'"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
db[posts_query].each do |post|
|
|
||||||
process_post(post, db, options)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
def self.process_post(post, db, options)
|
|
||||||
px = options[:table_prefix]
|
|
||||||
|
|
||||||
title = post[:title]
|
|
||||||
if options[:clean_entities]
|
|
||||||
title = clean_entities(title)
|
|
||||||
end
|
|
||||||
|
|
||||||
slug = post[:slug]
|
|
||||||
if !slug or slug.empty?
|
|
||||||
slug = sluggify(title)
|
|
||||||
end
|
|
||||||
|
|
||||||
date = post[:date] || Time.now
|
|
||||||
name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
|
|
||||||
date.day, slug]
|
|
||||||
content = post[:content].to_s
|
|
||||||
if options[:clean_entities]
|
|
||||||
content = clean_entities(content)
|
|
||||||
end
|
|
||||||
|
|
||||||
excerpt = post[:excerpt].to_s
|
|
||||||
|
|
||||||
more_index = content.index(/<!-- *more *-->/)
|
|
||||||
more_anchor = nil
|
|
||||||
if more_index
|
|
||||||
if options[:more_excerpt] and
|
|
||||||
(post[:excerpt].nil? or post[:excerpt].empty?)
|
|
||||||
excerpt = content[0...more_index]
|
|
||||||
end
|
|
||||||
if options[:more_anchor]
|
|
||||||
more_link = "more"
|
|
||||||
content.sub!(/<!-- *more *-->/,
|
|
||||||
"<a id=\"more\"></a>" +
|
|
||||||
"<a id=\"more-#{post[:id]}\"></a>")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
categories = []
|
|
||||||
tags = []
|
|
||||||
|
|
||||||
if options[:categories] or options[:tags]
|
|
||||||
|
|
||||||
cquery =
|
|
||||||
"SELECT
|
|
||||||
terms.name AS `name`,
|
|
||||||
ttax.taxonomy AS `type`
|
|
||||||
FROM
|
|
||||||
#{px}terms AS `terms`,
|
|
||||||
#{px}term_relationships AS `trels`,
|
|
||||||
#{px}term_taxonomy AS `ttax`
|
|
||||||
WHERE
|
|
||||||
trels.object_id = '#{post[:id]}' AND
|
|
||||||
trels.term_taxonomy_id = ttax.term_taxonomy_id AND
|
|
||||||
terms.term_id = ttax.term_id"
|
|
||||||
|
|
||||||
db[cquery].each do |term|
|
|
||||||
if options[:categories] and term[:type] == "category"
|
|
||||||
if options[:clean_entities]
|
|
||||||
categories << clean_entities(term[:name])
|
|
||||||
else
|
|
||||||
categories << term[:name]
|
|
||||||
end
|
|
||||||
elsif options[:tags] and term[:type] == "post_tag"
|
|
||||||
if options[:clean_entities]
|
|
||||||
tags << clean_entities(term[:name])
|
|
||||||
else
|
|
||||||
tags << term[:name]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
comments = []
|
|
||||||
|
|
||||||
if options[:comments] and post[:comment_count].to_i > 0
|
|
||||||
cquery =
|
|
||||||
"SELECT
|
|
||||||
comment_ID AS `id`,
|
|
||||||
comment_author AS `author`,
|
|
||||||
comment_author_email AS `author_email`,
|
|
||||||
comment_author_url AS `author_url`,
|
|
||||||
comment_date AS `date`,
|
|
||||||
comment_date_gmt AS `date_gmt`,
|
|
||||||
comment_content AS `content`
|
|
||||||
FROM #{px}comments
|
|
||||||
WHERE
|
|
||||||
comment_post_ID = '#{post[:id]}' AND
|
|
||||||
comment_approved != 'spam'"
|
|
||||||
|
|
||||||
|
|
||||||
db[cquery].each do |comment|
|
|
||||||
|
|
||||||
comcontent = comment[:content].to_s
|
|
||||||
if comcontent.respond_to?(:force_encoding)
|
|
||||||
comcontent.force_encoding("UTF-8")
|
|
||||||
end
|
|
||||||
if options[:clean_entities]
|
|
||||||
comcontent = clean_entities(comcontent)
|
|
||||||
end
|
|
||||||
comauthor = comment[:author].to_s
|
|
||||||
if options[:clean_entities]
|
|
||||||
comauthor = clean_entities(comauthor)
|
|
||||||
end
|
|
||||||
|
|
||||||
comments << {
|
|
||||||
'id' => comment[:id].to_i,
|
|
||||||
'author' => comauthor,
|
|
||||||
'author_email' => comment[:author_email].to_s,
|
|
||||||
'author_url' => comment[:author_url].to_s,
|
|
||||||
'date' => comment[:date].to_s,
|
|
||||||
'date_gmt' => comment[:date_gmt].to_s,
|
|
||||||
'content' => comcontent,
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
comments.sort!{ |a,b| a['id'] <=> b['id'] }
|
|
||||||
end
|
|
||||||
|
|
||||||
# Get the relevant fields as a hash, delete empty fields and
|
|
||||||
# convert to YAML for the header.
|
|
||||||
data = {
|
|
||||||
'layout' => post[:type].to_s,
|
|
||||||
'status' => post[:status].to_s,
|
|
||||||
'published' => (post[:status].to_s == "publish"),
|
|
||||||
'title' => title.to_s,
|
|
||||||
'author' => post[:author].to_s,
|
|
||||||
'author_login' => post[:author_login].to_s,
|
|
||||||
'author_email' => post[:author_email].to_s,
|
|
||||||
'author_url' => post[:author_url].to_s,
|
|
||||||
'excerpt' => excerpt,
|
|
||||||
'more_anchor' => more_anchor,
|
|
||||||
'wordpress_id' => post[:id],
|
|
||||||
'wordpress_url' => post[:guid].to_s,
|
|
||||||
'date' => date,
|
|
||||||
'categories' => options[:categories] ? categories : nil,
|
|
||||||
'tags' => options[:tags] ? tags : nil,
|
|
||||||
'comments' => options[:comments] ? comments : nil,
|
|
||||||
}.delete_if { |k,v| v.nil? || v == '' }.to_yaml
|
|
||||||
|
|
||||||
# Write out the data and content to file
|
|
||||||
File.open("_posts/#{name}", "w") do |f|
|
|
||||||
f.puts data
|
|
||||||
f.puts "---"
|
|
||||||
f.puts content
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
def self.clean_entities( text )
|
|
||||||
if text.respond_to?(:force_encoding)
|
|
||||||
text.force_encoding("UTF-8")
|
|
||||||
end
|
|
||||||
text = HTMLEntities.new.encode(text, :named)
|
|
||||||
# We don't want to convert these, it would break all
|
|
||||||
# HTML tags in the post and comments.
|
|
||||||
text.gsub!("&", "&")
|
|
||||||
text.gsub!("<", "<")
|
|
||||||
text.gsub!(">", ">")
|
|
||||||
text.gsub!(""", '"')
|
|
||||||
text.gsub!("'", "'")
|
|
||||||
text
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
def self.sluggify( title )
|
|
||||||
begin
|
|
||||||
require 'unidecode'
|
|
||||||
title = title.to_ascii
|
|
||||||
rescue LoadError
|
|
||||||
STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
|
|
||||||
end
|
|
||||||
title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,70 +0,0 @@
|
||||||
# coding: utf-8
|
|
||||||
|
|
||||||
require 'rubygems'
|
|
||||||
require 'hpricot'
|
|
||||||
require 'fileutils'
|
|
||||||
require 'yaml'
|
|
||||||
require 'time'
|
|
||||||
|
|
||||||
module Jekyll
|
|
||||||
# This importer takes a wordpress.xml file, which can be exported from your
|
|
||||||
# wordpress.com blog (/wp-admin/export.php).
|
|
||||||
module WordpressDotCom
|
|
||||||
def self.process(filename = "wordpress.xml")
|
|
||||||
import_count = Hash.new(0)
|
|
||||||
doc = Hpricot::XML(File.read(filename))
|
|
||||||
|
|
||||||
(doc/:channel/:item).each do |item|
|
|
||||||
title = item.at(:title).inner_text.strip
|
|
||||||
permalink_title = item.at('wp:post_name').inner_text
|
|
||||||
# Fallback to "prettified" title if post_name is empty (can happen)
|
|
||||||
if permalink_title == ""
|
|
||||||
permalink_title = title.downcase.split.join('-')
|
|
||||||
end
|
|
||||||
|
|
||||||
date = Time.parse(item.at('wp:post_date').inner_text)
|
|
||||||
status = item.at('wp:status').inner_text
|
|
||||||
|
|
||||||
if status == "publish"
|
|
||||||
published = true
|
|
||||||
else
|
|
||||||
published = false
|
|
||||||
end
|
|
||||||
|
|
||||||
type = item.at('wp:post_type').inner_text
|
|
||||||
tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
|
|
||||||
|
|
||||||
metas = Hash.new
|
|
||||||
item.search("wp:postmeta").each do |meta|
|
|
||||||
key = meta.at('wp:meta_key').inner_text
|
|
||||||
value = meta.at('wp:meta_value').inner_text
|
|
||||||
metas[key] = value;
|
|
||||||
end
|
|
||||||
|
|
||||||
name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html"
|
|
||||||
header = {
|
|
||||||
'layout' => type,
|
|
||||||
'title' => title,
|
|
||||||
'tags' => tags,
|
|
||||||
'status' => status,
|
|
||||||
'type' => type,
|
|
||||||
'published' => published,
|
|
||||||
'meta' => metas
|
|
||||||
}
|
|
||||||
|
|
||||||
FileUtils.mkdir_p "_#{type}s"
|
|
||||||
File.open("_#{type}s/#{name}", "w") do |f|
|
|
||||||
f.puts header.to_yaml
|
|
||||||
f.puts '---'
|
|
||||||
f.puts item.at('content:encoded').inner_text
|
|
||||||
end
|
|
||||||
|
|
||||||
import_count[type] += 1
|
|
||||||
end
|
|
||||||
|
|
||||||
import_count.each do |key, value|
|
|
||||||
puts "Imported #{value} #{key}s"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
Loading…
Reference in New Issue