diff --git a/.gitignore b/.gitignore index 68bbac59..66fa741f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ +Gemfile.lock test/dest *.gem pkg/ *.swp *~ _site/ +.bundle/ diff --git a/Gemfile b/Gemfile new file mode 100644 index 00000000..e45e65f8 --- /dev/null +++ b/Gemfile @@ -0,0 +1,2 @@ +source :rubygems +gemspec diff --git a/History.txt b/History.txt index 0034195a..ef9a75ee 100644 --- a/History.txt +++ b/History.txt @@ -1,3 +1,19 @@ +== HEAD + * Major Enhancements + * Add command line importer functionality (#253) + * Minor Enhancements + * Switch to Albino gem + * Bundler support + * Use English library to avoid hoops (#292) + * Add Posterous importer (#254) + * Fixes for Wordpress importer (#274, #252, #271) + * Better error message for invalid post date (#291) + * Print formatted fatal exceptions to stdout on build failure + * Add Tumblr importer (#323) + * Add Enki importer (#320) + * Bug Fixes + * Secure additional path exploits + == 0.10.0 / 2010-12-16 * Bug Fixes * Add --no-server option. @@ -75,7 +91,7 @@ * Empty tags causes error in read_posts (#84) * Fix pagination to adhere to read/render/write paradigm * Test Enhancement - * cucumber features no longer use site.ports.first where a better + * cucumber features no longer use site.posts.first where a better alternative is available == 0.5.6 / 2010-01-08 diff --git a/README.textile b/README.textile index ebc4bf63..4cefb9ff 100644 --- a/README.textile +++ b/README.textile @@ -27,7 +27,6 @@ h2. Runtime Dependencies * Classifier: Generating related posts (Ruby) * Maruku: Default markdown engine (Ruby) * Directory Watcher: Auto-regeneration of sites (Ruby) -* Open4: Talking to pygments for syntax highlighting (Ruby) * Pygments: Syntax highlighting (Python) h2. Developer Dependencies @@ -35,6 +34,7 @@ h2. Developer Dependencies * Shoulda: Test framework (Ruby) * RR: Mocking (Ruby) * RedGreen: Nicer test output (Ruby) +* RDiscount: Discount Markdown Processor (Ruby) h2. License diff --git a/Rakefile b/Rakefile index 11551f90..923816c0 100644 --- a/Rakefile +++ b/Rakefile @@ -2,6 +2,8 @@ require 'rubygems' require 'rake' require 'date' +$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), *%w[lib])) + ############################################################################# # # Helper functions diff --git a/bin/jekyll b/bin/jekyll index 984aa43b..753eedad 100755 --- a/bin/jekyll +++ b/bin/jekyll @@ -9,7 +9,8 @@ Basic Command Line Usage: jekyll # . -> ./_site jekyll # . -> jekyll # -> - + jekyll import # imports posts using named import script + Configuration is read from '/_config.yml' but can be overriden using the following options: @@ -18,11 +19,37 @@ HELP require 'optparse' require 'jekyll' + exec = {} options = {} opts = OptionParser.new do |opts| opts.banner = help + opts.on("--file [PATH]", "File to import from") do |import_file| + options['file'] = import_file + end + + opts.on("--dbname [TEXT]", "DB to import from") do |import_dbname| + options['dbname'] = import_dbname + end + + opts.on("--user [TEXT]", "Username to use when importing") do |import_user| + options['user'] = import_user + end + + opts.on("--pass [TEXT]", "Password to use when importing") do |import_pass| + options['pass'] = import_pass + end + + opts.on("--host [HOST ADDRESS]", "Host to import from") do |import_host| + options['host'] = import_host + end + + opts.on("--site [SITE NAME]", "Site to import from") do |import_site| + options['site'] = import_site + end + + opts.on("--[no-]safe", "Safe mode (default unsafe)") do |safe| options['safe'] = safe end @@ -105,6 +132,61 @@ end # Read command line options into `options` hash opts.parse! + +# Check for import stuff +if ARGV.size > 0 + if ARGV[0] == 'import' + migrator = ARGV[1] + + if migrator.nil? + puts "Invalid options. Run `jekyll --help` for assistance." + exit(1) + else + migrator = migrator.downcase + end + + cmd_options = [] + ['file', 'dbname', 'user', 'pass', 'host', 'site'].each do |p| + cmd_options << "\"#{options[p]}\"" unless options[p].nil? + end + + # It's import time + puts "Importing..." + + # Ideally, this shouldn't be necessary. Maybe parse the actual + # src files for the migrator name? + migrators = { + :posterous => 'Posterous', + :wordpressdotcom => 'WordpressDotCom', + :wordpress => 'Wordpress', + :csv => 'CSV', + :drupal => 'Drupal', + :enki => 'Enki', + :mephisto => 'Mephisto', + :mt => 'MT', + :textpattern => 'TextPattern', + :tumblr => 'Tumblr', + :typo => 'Typo' + } + + app_root = File.join(File.dirname(__FILE__), '..') + + require "#{app_root}/lib/jekyll/migrators/#{migrator}" + + if Jekyll.const_defined?(migrators[migrator.to_sym]) + migrator_class = Jekyll.const_get(migrators[migrator.to_sym]) + migrator_class.process(*cmd_options) + else + puts "Invalid migrator. Run `jekyll --help` for assistance." + exit(1) + end + + exit(0) + end +end + + + # Get source and destintation from command line case ARGV.size when 0 @@ -162,7 +244,11 @@ else puts "Building site: #{source} -> #{destination}" begin site.process - rescue Jekyll::FatalException + rescue Jekyll::FatalException => e + puts + puts "ERROR: YOUR SITE COULD NOT BE BUILT:" + puts "------------------------------------" + puts e.message exit(1) end puts "Successfully generated site: #{source} -> #{destination}" diff --git a/features/post_data.feature b/features/post_data.feature index 36fe9382..c6d45b04 100644 --- a/features/post_data.feature +++ b/features/post_data.feature @@ -31,10 +31,10 @@ Feature: Post data And I have the following post: | title | date | layout | content | | Star Wars | 3/27/2009 | simple | Luke, I am your father. | - And I have a simple layout that contains "Post date: {{ page.date }}" + And I have a simple layout that contains "Post date: {{ page.date | date_to_string }}" When I run jekyll Then the _site directory should exist - And I should see "Post date: Fri Mar 27" in "_site/2009/03/27/star-wars.html" + And I should see "Post date: 27 Mar 2009" in "_site/2009/03/27/star-wars.html" Scenario: Use post.id variable Given I have a _posts directory diff --git a/features/support/env.rb b/features/support/env.rb index 0f513754..3166ce9c 100644 --- a/features/support/env.rb +++ b/features/support/env.rb @@ -14,3 +14,6 @@ def run_jekyll(opts = {}) command << " >> /dev/null 2>&1" if opts[:debug].nil? system command end + +# work around "invalid option: --format" cucumber bug (see #296) +Test::Unit.run = true if RUBY_VERSION < '1.9' diff --git a/jekyll.gemspec b/jekyll.gemspec index 444605ad..2b2efdba 100644 --- a/jekyll.gemspec +++ b/jekyll.gemspec @@ -18,22 +18,23 @@ Gem::Specification.new do |s| s.require_paths = %w[lib] s.executables = ["jekyll"] - s.default_executable = 'jekyll' s.rdoc_options = ["--charset=UTF-8"] s.extra_rdoc_files = %w[README.textile LICENSE] - s.add_runtime_dependency('liquid', [">= 1.9.0"]) - s.add_runtime_dependency('classifier', [">= 1.3.1"]) - s.add_runtime_dependency('directory_watcher', [">= 1.1.1"]) - s.add_runtime_dependency('maruku', [">= 0.5.9"]) + s.add_runtime_dependency('liquid', ">= 1.9.0") + s.add_runtime_dependency('classifier', ">= 1.3.1") + s.add_runtime_dependency('directory_watcher', ">= 1.1.1") + s.add_runtime_dependency('maruku', ">= 0.5.9") + s.add_runtime_dependency('kramdown', ">= 0.13.2") + s.add_runtime_dependency('albino', ">= 1.3.2") - s.add_development_dependency('redgreen', [">= 4.2.1"]) - s.add_development_dependency('shoulda', [">= 4.2.1"]) - s.add_development_dependency('rr', [">= 4.2.1"]) - s.add_development_dependency('cucumber', [">= 4.2.1"]) - s.add_development_dependency('RedCloth', [">= 4.2.1"]) - s.add_development_dependency('kramdown', [">= 0.12.0"]) + s.add_development_dependency('redgreen', ">= 1.2.2") + s.add_development_dependency('shoulda', ">= 2.11.3") + s.add_development_dependency('rr', ">= 1.0.2") + s.add_development_dependency('cucumber', ">= 0.10.0") + s.add_development_dependency('RedCloth', ">= 4.2.1") + s.add_development_dependency('rdiscount', ">= 1.6.5") # = MANIFEST = s.files = %w[ diff --git a/lib/jekyll.rb b/lib/jekyll.rb index 59cd1dec..3ffc36b4 100644 --- a/lib/jekyll.rb +++ b/lib/jekyll.rb @@ -19,10 +19,12 @@ require 'rubygems' require 'fileutils' require 'time' require 'yaml' +require 'English' # 3rd party require 'liquid' require 'maruku' +require 'albino' # internal requires require 'jekyll/core_ext' @@ -32,7 +34,6 @@ require 'jekyll/layout' require 'jekyll/page' require 'jekyll/post' require 'jekyll/filters' -require 'jekyll/albino' require 'jekyll/static_file' require 'jekyll/errors' @@ -96,8 +97,8 @@ module Jekyll } } - # Generate a Jekyll configuration Hash by merging the default options - # with anything in _config.yml, and adding the given options on top. + # Public: Generate a Jekyll configuration Hash by merging the default + # options with anything in _config.yml, and adding the given options on top. # # override - A Hash of config directives that override any options in both # the defaults and the config file. See Jekyll::DEFAULTS for a diff --git a/lib/jekyll/albino.rb b/lib/jekyll/albino.rb deleted file mode 100644 index f71e6382..00000000 --- a/lib/jekyll/albino.rb +++ /dev/null @@ -1,120 +0,0 @@ -## -# Wrapper for the Pygments command line tool, pygmentize. -# -# Pygments: http://pygments.org/ -# -# Assumes pygmentize is in the path. If not, set its location -# with Albino.bin = '/path/to/pygmentize' -# -# Use like so: -# -# @syntaxer = Albino.new('/some/file.rb', :ruby) -# puts @syntaxer.colorize -# -# This'll print out an HTMLized, Ruby-highlighted version -# of '/some/file.rb'. -# -# To use another formatter, pass it as the third argument: -# -# @syntaxer = Albino.new('/some/file.rb', :ruby, :bbcode) -# puts @syntaxer.colorize -# -# You can also use the #colorize class method: -# -# puts Albino.colorize('/some/file.rb', :ruby) -# -# Another also: you get a #to_s, for somewhat nicer use in Rails views. -# -# ... helper file ... -# def highlight(text) -# Albino.new(text, :ruby) -# end -# -# ... view file ... -# <%= highlight text %> -# -# The default lexer is 'text'. You need to specify a lexer yourself; -# because we are using STDIN there is no auto-detect. -# -# To see all lexers and formatters available, run `pygmentize -L`. -# -# Chris Wanstrath // chris@ozmm.org -# GitHub // http://github.com -# - -class Albino - @@bin = Rails.development? ? 'pygmentize' : '/usr/bin/pygmentize' rescue 'pygmentize' - - def self.bin=(path) - @@bin = path - end - - def self.colorize(*args) - new(*args).colorize - end - - def initialize(target, lexer = :text, format = :html) - @target = target - @options = { :l => lexer, :f => format, :O => 'encoding=utf-8' } - end - - def execute(command) - output = '' - IO.popen(command, mode='r+') do |p| - p.write @target - p.close_write - output = p.read.strip - end - output - end - - def colorize(options = {}) - html = execute(@@bin + convert_options(options)) - # Work around an RDiscount bug: http://gist.github.com/97682 - html.to_s.sub(%r{\Z}, "\n") - end - alias_method :to_s, :colorize - - def convert_options(options = {}) - @options.merge(options).inject('') do |string, (flag, value)| - string + " -#{flag} #{value}" - end - end -end - -if $0 == __FILE__ - require 'rubygems' - require 'test/spec' - require 'mocha' - begin require 'redgreen'; rescue LoadError; end - - context "Albino" do - setup do - @syntaxer = Albino.new(__FILE__, :ruby) - end - - specify "defaults to text" do - syntaxer = Albino.new(__FILE__) - syntaxer.expects(:execute).with('pygmentize -f html -l text').returns(true) - syntaxer.colorize - end - - specify "accepts options" do - @syntaxer.expects(:execute).with('pygmentize -f html -l ruby').returns(true) - @syntaxer.colorize - end - - specify "works with strings" do - syntaxer = Albino.new('class New; end', :ruby) - assert_match %r(highlight), syntaxer.colorize - end - - specify "aliases to_s" do - assert_equal @syntaxer.colorize, @syntaxer.to_s - end - - specify "class method colorize" do - assert_equal @syntaxer.colorize, Albino.colorize(__FILE__, :ruby) - end - end -end diff --git a/lib/jekyll/convertible.rb b/lib/jekyll/convertible.rb index 3c2b93e1..1723fa0d 100644 --- a/lib/jekyll/convertible.rb +++ b/lib/jekyll/convertible.rb @@ -10,21 +10,22 @@ # self.output= module Jekyll module Convertible - # Return the contents as a string + # Returns the contents as a String. def to_s self.content || '' end - # Read the YAML frontmatter - # +base+ is the String path to the dir containing the file - # +name+ is the String filename of the file + # Read the YAML frontmatter. # - # Returns nothing + # base - The String path to the dir containing the file. + # name - The String filename of the file. + # + # Returns nothing. def read_yaml(base, name) self.content = File.read(File.join(base, name)) if self.content =~ /^(---\s*\n.*?\n?)^(---\s*$\n?)/m - self.content = self.content[($1.size + $2.size)..-1] + self.content = $POSTMATCH begin self.data = YAML.load($1) @@ -38,42 +39,46 @@ module Jekyll # Transform the contents based on the content type. # - # Returns nothing + # Returns nothing. def transform self.content = converter.convert(self.content) end - # Determine the extension depending on content_type + # Determine the extension depending on content_type. # - # Returns the extensions for the output file + # Returns the String extension for the output file. + # e.g. ".html" for an HTML output file. def output_ext converter.output_ext(self.ext) end # Determine which converter to use based on this convertible's - # extension + # extension. + # + # Returns the Converter instance. def converter @converter ||= self.site.converters.find { |c| c.matches(self.ext) } end - # Add any necessary layouts to this convertible document - # +layouts+ is a Hash of {"name" => "layout"} - # +site_payload+ is the site payload hash + # Add any necessary layouts to this convertible document. # - # Returns nothing + # payload - The site payload Hash. + # layouts - A Hash of {"name" => "layout"}. + # + # Returns nothing. def do_layout(payload, layouts) info = { :filters => [Jekyll::Filters], :registers => { :site => self.site } } # render and transform content (this becomes the final content of the object) payload["pygments_prefix"] = converter.pygments_prefix payload["pygments_suffix"] = converter.pygments_suffix - + begin self.content = Liquid::Template.parse(self.content).render(payload, info) rescue => e puts "Liquid Exception: #{e.message} in #{self.data["layout"]}" end - + self.transform # output keeps track of what will finally be written diff --git a/lib/jekyll/filters.rb b/lib/jekyll/filters.rb index 3356eadf..dbd9f511 100644 --- a/lib/jekyll/filters.rb +++ b/lib/jekyll/filters.rb @@ -3,18 +3,43 @@ require 'uri' module Jekyll module Filters + # Convert a Textile string into HTML output. + # + # input - The Textile String to convert. + # + # Returns the HTML formatted String. def textilize(input) TextileConverter.new.convert(input) end + # Format a date in short format e.g. "27 Jan 2011". + # + # date - the Time to format. + # + # Returns the formatting String. def date_to_string(date) date.strftime("%d %b %Y") end + # Format a date in long format e.g. "27 January 2011". + # + # date - The Time to format. + # + # Returns the formatted String. def date_to_long_string(date) date.strftime("%d %B %Y") end + # Format a date for use in XML. + # + # date - The Time to format. + # + # Examples + # + # date_to_xmlschema(Time.now) + # # => "2011-04-24T20:34:46+08:00" + # + # Returns the formatted String. def date_to_xmlschema(date) date.xmlschema end @@ -23,6 +48,17 @@ module Jekyll CGI.escapeHTML(input) end + # CGI escape a string for use in a URL. Replaces any special characters + # with appropriate %XX replacements. + # + # input - The String to escape. + # + # Examples + # + # cgi_escape('foo,bar;baz?') + # # => "foo%2Cbar%3Bbaz%3F" + # + # Returns the escaped String. def cgi_escape(input) CGI::escape(input) end @@ -31,10 +67,26 @@ module Jekyll URI.escape(input) end + # Count the number of words in the input string. + # + # input - The String on which to operate. + # + # Returns the Integer word count. def number_of_words(input) input.split.length end + # Join an array of things into a string by separating with commes and the + # word "and" for the last one. + # + # array - The Array of Strings to join. + # + # Examples + # + # array_to_sentence_string(["apples", "oranges", "grapes"]) + # # => "apples, oranges, and grapes" + # + # Returns the formatted String. def array_to_sentence_string(array) connector = "and" case array.length diff --git a/lib/jekyll/generators/pagination.rb b/lib/jekyll/generators/pagination.rb index 59d71093..847de4f8 100644 --- a/lib/jekyll/generators/pagination.rb +++ b/lib/jekyll/generators/pagination.rb @@ -1,8 +1,14 @@ module Jekyll class Pagination < Generator + # This generator is safe from arbitrary code execution. safe true + # Generate paginated pages if necessary. + # + # site - The Site. + # + # Returns nothing. def generate(site) site.pages.dup.each do |page| paginate(site, page) if Pager.pagination_enabled?(site.config, page.name) @@ -10,9 +16,11 @@ module Jekyll end # Paginates the blog's posts. Renders the index.html file into paginated - # directories, ie: page2/index.html, page3/index.html, etc and adds more + # directories, e.g.: page2/index.html, page3/index.html, etc and adds more # site-wide data. - # +page+ is the index.html Page that requires pagination + # + # site - The Site. + # page - The index.html Page that requires pagination. # # {"paginator" => { "page" => , # "per_page" => , @@ -36,22 +44,38 @@ module Jekyll end end end - end class Pager attr_reader :page, :per_page, :posts, :total_posts, :total_pages, :previous_page, :next_page + # Calculate the number of pages. + # + # all_posts - The Array of all Posts. + # per_page - The Integer of entries per page. + # + # Returns the Integer number of pages. def self.calculate_pages(all_posts, per_page) - num_pages = all_posts.size / per_page.to_i - num_pages = num_pages + 1 if all_posts.size % per_page.to_i != 0 - num_pages + (all_posts.size.to_f / per_page.to_i).ceil end + # Determine if pagination is enabled for a given file. + # + # config - The configuration Hash. + # file - The String filename of the file. + # + # Returns true if pagination is enabled, false otherwise. def self.pagination_enabled?(config, file) file == 'index.html' && !config['paginate'].nil? end + # Initialize a new Pager. + # + # config - The Hash configuration of the site. + # page - The Integer page number. + # all_posts - The Array of all the site's Posts. + # num_pages - The Integer number of pages or nil if you'd like the number + # of pages calculated. def initialize(config, page, all_posts, num_pages = nil) @page = page @per_page = config['paginate'].to_i @@ -70,6 +94,9 @@ module Jekyll @next_page = @page != @total_pages ? @page + 1 : nil end + # Convert this Pager's data to a Hash suitable for use by Liquid. + # + # Returns the Hash representation of this Pager. def to_liquid { 'page' => page, @@ -83,5 +110,4 @@ module Jekyll end end - end diff --git a/lib/jekyll/layout.rb b/lib/jekyll/layout.rb index bc048a5d..15845730 100644 --- a/lib/jekyll/layout.rb +++ b/lib/jekyll/layout.rb @@ -3,16 +3,23 @@ module Jekyll class Layout include Convertible - attr_accessor :site + # Gets the Site object. + attr_reader :site + + # Gets/Sets the extension of this layout. attr_accessor :ext - attr_accessor :data, :content + + # Gets/Sets the Hash that holds the metadata for this layout. + attr_accessor :data + + # Gets/Sets the content of this layout. + attr_accessor :content # Initialize a new Layout. - # +site+ is the Site - # +base+ is the String path to the - # +name+ is the String filename of the post file # - # Returns + # site - The Site. + # base - The String path to the source. + # name - The String filename of the post file. def initialize(site, base, name) @site = site @base = base @@ -24,13 +31,14 @@ module Jekyll self.read_yaml(base, name) end - # Extract information from the layout filename - # +name+ is the String filename of the layout file + # Extract information from the layout filename. # - # Returns nothing + # name - The String filename of the layout file. + # + # Returns nothing. def process(name) self.ext = File.extname(name) end end -end \ No newline at end of file +end diff --git a/lib/jekyll/migrators/csv.rb b/lib/jekyll/migrators/csv.rb index 657b35b7..ce5203b7 100644 --- a/lib/jekyll/migrators/csv.rb +++ b/lib/jekyll/migrators/csv.rb @@ -1,7 +1,7 @@ module Jekyll module CSV - #Reads a csv with title, permalink, body, published_at, and filter. - #It creates a post file for each row in the csv + # Reads a csv with title, permalink, body, published_at, and filter. + # It creates a post file for each row in the csv def self.process(file = "posts.csv") FileUtils.mkdir_p "_posts" posts = 0 @@ -23,4 +23,4 @@ title: #{row[0]} "Created #{posts} posts!" end end -end \ No newline at end of file +end diff --git a/lib/jekyll/migrators/drupal.rb b/lib/jekyll/migrators/drupal.rb index be198197..32e335cc 100644 --- a/lib/jekyll/migrators/drupal.rb +++ b/lib/jekyll/migrators/drupal.rb @@ -11,12 +11,18 @@ require 'yaml' module Jekyll module Drupal - - # Reads a MySQL database via Sequel and creates a post file for each - # post in wp_posts that has post_status = 'publish'. - # This restriction is made because 'draft' posts are not guaranteed to - # have valid dates. - QUERY = "SELECT node.nid, node.title, node_revisions.body, node.created, node.status FROM node, node_revisions WHERE (node.type = 'blog' OR node.type = 'story') AND node.vid = node_revisions.vid" + # Reads a MySQL database via Sequel and creates a post file for each post + # in wp_posts that has post_status = 'publish'. This restriction is made + # because 'draft' posts are not guaranteed to have valid dates. + QUERY = "SELECT node.nid, \ + node.title, \ + node_revisions.body, \ + node.created, \ + node.status \ + FROM node, \ + node_revisions \ + WHERE (node.type = 'blog' OR node.type = 'story') \ + AND node.vid = node_revisions.vid" def self.process(dbname, user, pass, host = 'localhost') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') diff --git a/lib/jekyll/migrators/enki.rb b/lib/jekyll/migrators/enki.rb new file mode 100644 index 00000000..61cb2562 --- /dev/null +++ b/lib/jekyll/migrators/enki.rb @@ -0,0 +1,49 @@ +# Adapted by Rodrigo Pinto +# Based on typo.rb by Toby DiPasquale + +require 'fileutils' +require 'rubygems' +require 'sequel' + +module Jekyll + module Enki + SQL = <<-EOS + SELECT p.id, + p.title, + p.slug, + p.body, + p.published_at as date, + p.cached_tag_list as tags + FROM posts p + EOS + + # Just working with postgres, but can be easily adapted + # to work with both mysql and postgres. + def self.process(dbname, user, pass, host = 'localhost') + FileUtils.mkdir_p('_posts') + db = Sequel.postgres(:database => dbname, + :user => user, + :password => pass, + :host => host, + :encoding => 'utf8') + + db[SQL].each do |post| + name = [ sprintf("%.04d", post[:date].year), + sprintf("%.02d", post[:date].month), + sprintf("%.02d", post[:date].day), + post[:slug].strip ].join('-') + name += '.textile' + + File.open("_posts/#{name}", 'w') do |f| + f.puts({ 'layout' => 'post', + 'title' => post[:title].to_s, + 'enki_id' => post[:id], + 'categories' => post[:tags] + }.delete_if { |k, v| v.nil? || v == '' }.to_yaml) + f.puts '---' + f.puts post[:body].delete("\r") + end + end + end + end +end diff --git a/lib/jekyll/migrators/marley.rb b/lib/jekyll/migrators/marley.rb index 390a5a28..21bcead5 100644 --- a/lib/jekyll/migrators/marley.rb +++ b/lib/jekyll/migrators/marley.rb @@ -3,7 +3,6 @@ require 'fileutils' module Jekyll module Marley - def self.regexp { :id => /^\d{0,4}-{0,1}(.*)$/, :title => /^#\s*(.*)\s+$/, diff --git a/lib/jekyll/migrators/mephisto.rb b/lib/jekyll/migrators/mephisto.rb index 9a3e33ae..7622c722 100644 --- a/lib/jekyll/migrators/mephisto.rb +++ b/lib/jekyll/migrators/mephisto.rb @@ -36,11 +36,22 @@ module Jekyll # This query will pull blog posts from all entries across all blogs. If # you've got unpublished, deleted or otherwise hidden posts please sift # through the created posts to make sure nothing is accidently published. - - QUERY = "SELECT id, permalink, body, published_at, title FROM contents WHERE user_id = 1 AND type = 'Article' AND published_at IS NOT NULL ORDER BY published_at" + QUERY = "SELECT id, \ + permalink, \ + body, \ + published_at, \ + title \ + FROM contents \ + WHERE user_id = 1 AND \ + type = 'Article' AND \ + published_at IS NOT NULL \ + ORDER BY published_at" def self.process(dbname, user, pass, host = 'localhost') - db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') + db = Sequel.mysql(dbname, :user => user, + :password => pass, + :host => host, + :encoding => 'utf8') FileUtils.mkdir_p "_posts" @@ -49,16 +60,10 @@ module Jekyll slug = post[:permalink] date = post[:published_at] content = post[:body] -# more_content = '' - # Be sure to include the body and extended body. -# if more_content != nil -# content = content + " \n" + more_content -# end - - # Ideally, this script would determine the post format (markdown, html - # , etc) and create files with proper extensions. At this point it - # just assumes that markdown will be acceptable. + # Ideally, this script would determine the post format (markdown, + # html, etc) and create files with proper extensions. At this point + # it just assumes that markdown will be acceptable. name = [date.year, date.month, date.day, slug].join('-') + ".markdown" data = { diff --git a/lib/jekyll/migrators/mt.rb b/lib/jekyll/migrators/mt.rb index c9d79b7d..048c84db 100644 --- a/lib/jekyll/migrators/mt.rb +++ b/lib/jekyll/migrators/mt.rb @@ -18,7 +18,14 @@ module Jekyll # This query will pull blog posts from all entries across all blogs. If # you've got unpublished, deleted or otherwise hidden posts please sift # through the created posts to make sure nothing is accidently published. - QUERY = "SELECT entry_id, entry_basename, entry_text, entry_text_more, entry_authored_on, entry_title, entry_convert_breaks FROM mt_entry" + QUERY = "SELECT entry_id, \ + entry_basename, \ + entry_text, \ + entry_text_more, \ + entry_authored_on, \ + entry_title, \ + entry_convert_breaks \ + FROM mt_entry" def self.process(dbname, user, pass, host = 'localhost') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') @@ -38,17 +45,18 @@ module Jekyll content = content + " \n" + more_content end - # Ideally, this script would determine the post format (markdown, html - # , etc) and create files with proper extensions. At this point it - # just assumes that markdown will be acceptable. - name = [date.year, date.month, date.day, slug].join('-') + '.' + self.suffix(entry_convert_breaks) + # Ideally, this script would determine the post format (markdown, + # html, etc) and create files with proper extensions. At this point + # it just assumes that markdown will be acceptable. + name = [date.year, date.month, date.day, slug].join('-') + '.' + + self.suffix(entry_convert_breaks) data = { 'layout' => 'post', 'title' => title.to_s, 'mt_id' => post[:entry_id], 'date' => date - }.delete_if { |k,v| v.nil? || v == ''}.to_yaml + }.delete_if { |k,v| v.nil? || v == '' }.to_yaml File.open("_posts/#{name}", "w") do |f| f.puts data @@ -60,17 +68,18 @@ module Jekyll def self.suffix(entry_type) if entry_type.nil? || entry_type.include?("markdown") - # The markdown plugin I have saves this as "markdown_with_smarty_pants", so I just look for "markdown". - "markdown" - elsif entry_type.include?("textile") - # This is saved as "textile_2" on my installation of MT 5.1. - "textile" - elsif entry_type == "0" || entry_type.include?("richtext") - # richtext looks to me like it's saved as HTML, so I include it here. - "html" - else - # Other values might need custom work. - entry_type + # The markdown plugin I have saves this as + # "markdown_with_smarty_pants", so I just look for "markdown". + "markdown" + elsif entry_type.include?("textile") + # This is saved as "textile_2" on my installation of MT 5.1. + "textile" + elsif entry_type == "0" || entry_type.include?("richtext") + # Richtext looks to me like it's saved as HTML, so I include it here. + "html" + else + # Other values might need custom work. + entry_type end end end diff --git a/lib/jekyll/migrators/posterous.rb b/lib/jekyll/migrators/posterous.rb new file mode 100644 index 00000000..6cfc2430 --- /dev/null +++ b/lib/jekyll/migrators/posterous.rb @@ -0,0 +1,68 @@ +require 'rubygems' +require 'jekyll' +require 'fileutils' +require 'net/http' +require 'uri' +require "json" + +# ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, blog)' + +module Jekyll + module Posterous + def self.fetch(uri_str, limit = 10) + # You should choose better exception. + raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0 + + response = nil + Net::HTTP.start('posterous.com') do |http| + req = Net::HTTP::Get.new(uri_str) + req.basic_auth @email, @pass + response = http.request(req) + end + + case response + when Net::HTTPSuccess then response + when Net::HTTPRedirection then fetch(response['location'], limit - 1) + else response.error! + end + end + + def self.process(email, pass, blog = 'primary') + @email, @pass = email, pass + @api_token = JSON.parse(self.fetch("/api/2/auth/token").body)['api_token'] + FileUtils.mkdir_p "_posts" + + posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}").body) + page = 1 + + while posts.any? + posts.each do |post| + title = post["title"] + slug = title.gsub(/[^[:alnum:]]+/, '-').downcase + date = Date.parse(post["display_date"]) + content = post["body_html"] + published = !post["is_private"] + name = "%02d-%02d-%02d-%s.html" % [date.year, date.month, date.day, slug] + + # Get the relevant fields as a hash, delete empty fields and convert + # to YAML for the header + data = { + 'layout' => 'post', + 'title' => title.to_s, + 'published' => published + }.delete_if { |k,v| v.nil? || v == ''}.to_yaml + + # Write out the data and content to file + File.open("_posts/#{name}", "w") do |f| + f.puts data + f.puts "---" + f.puts content + end + end + + page += 1 + posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body) + end + end + end +end diff --git a/lib/jekyll/migrators/textpattern.rb b/lib/jekyll/migrators/textpattern.rb index 4b371197..58634c48 100644 --- a/lib/jekyll/migrators/textpattern.rb +++ b/lib/jekyll/migrators/textpattern.rb @@ -11,10 +11,17 @@ require 'fileutils' module Jekyll module TextPattern # Reads a MySQL database via Sequel and creates a post file for each post. - # The only posts selected are those with a status of 4 or 5, which means "live" - # and "sticky" respectively. - # Other statuses is 1 => draft, 2 => hidden and 3 => pending - QUERY = "select Title, url_title, Posted, Body, Keywords from textpattern where Status = '4' or Status = '5'" + # The only posts selected are those with a status of 4 or 5, which means + # "live" and "sticky" respectively. + # Other statuses are 1 => draft, 2 => hidden and 3 => pending. + QUERY = "SELECT Title, \ + url_title, \ + Posted, \ + Body, \ + Keywords \ + FROM textpattern \ + WHERE Status = '4' OR \ + Status = '5'" def self.process(dbname, user, pass, host = 'localhost') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') @@ -22,7 +29,7 @@ module Jekyll FileUtils.mkdir_p "_posts" db[QUERY].each do |post| - # Get required fields and construct Jekyll compatible name + # Get required fields and construct Jekyll compatible name. title = post[:Title] slug = post[:url_title] date = post[:Posted] @@ -31,14 +38,14 @@ module Jekyll name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile" # Get the relevant fields as a hash, delete empty fields and convert - # to YAML for the header + # to YAML for the header. data = { 'layout' => 'post', 'title' => title.to_s, 'tags' => post[:Keywords].split(',') }.delete_if { |k,v| v.nil? || v == ''}.to_yaml - # Write out the data and content to file + # Write out the data and content to file. File.open("_posts/#{name}", "w") do |f| f.puts data f.puts "---" @@ -47,4 +54,4 @@ module Jekyll end end end -end \ No newline at end of file +end diff --git a/lib/jekyll/migrators/tumblr.rb b/lib/jekyll/migrators/tumblr.rb new file mode 100644 index 00000000..0345b4a9 --- /dev/null +++ b/lib/jekyll/migrators/tumblr.rb @@ -0,0 +1,119 @@ +require 'rubygems' +require 'nokogiri' +require 'open-uri' +require 'fileutils' +require 'CGI' +require 'iconv' +require 'date' + +module Jekyll + module Tumblr + def self.process(url, grab_images = false) + current_page = 0 + + while true + f = open(url + "/api/read?num=50&start=#{current_page * 50}") + doc = Nokogiri::HTML(Iconv.conv("utf-8", f.charset, f.readlines.join("\n"))) + + puts "Page: #{current_page + 1} - Posts: #{(doc/:tumblr/:posts/:post).size}" + + FileUtils.mkdir_p "_posts/tumblr" + + (doc/:tumblr/:posts/:post).each do |post| + title = "" + content = nil + name = nil + + if post['type'] == "regular" + title_element = post.at("regular-title") + title = title_element.inner_text unless title_element == nil + content = CGI::unescapeHTML post.at("regular-body").inner_html unless post.at("regular-body") == nil + elsif post['type'] == "link" + title = post.at("link-text").inner_html unless post.at("link-text") == nil + + if post.at("link-text") != nil + content = "#{post.at("link-text").inner_html}" + else + content = "#{post.at("link-url").inner_html}" + end + + content << "
" + CGI::unescapeHTML(post.at("link-description").inner_html) unless post.at("link-description") == nil + elsif post['type'] == "photo" + content = "" + + if post.at("photo-link-url") != nil + content = "" + else + content = "" + end + + if post.at("photo-caption") != nil + content << "
" unless content == nil + content << CGI::unescapeHTML(post.at("photo-caption").inner_html) + end + elsif post['type'] == "audio" + content = CGI::unescapeHTML(post.at("audio-player").inner_html) + content << CGI::unescapeHTML(post.at("audio-caption").inner_html) unless post.at("audio-caption") == nil + elsif post['type'] == "quote" + content = "
" + CGI::unescapeHTML(post.at("quote-text").inner_html) + "
" + content << "—" + CGI::unescapeHTML(post.at("quote-source").inner_html) unless post.at("quote-source") == nil + elsif post['type'] == "conversation" + title = post.at("conversation-title").inner_html unless post.at("conversation-title") == nil + content = "
" + + (post/:conversation/:line).each do |line| + content << "
" + line['label'] + "
" + line.inner_html + "
" unless line['label'] == nil || line == nil + end + + content << "
" + elsif post['type'] == "video" + title = post.at("video-title").inner_html unless post.at("video-title") == nil + content = CGI::unescapeHTML(post.at("video-player").inner_html) + content << CGI::unescapeHTML(post.at("video-caption").inner_html) unless post.at("video-caption") == nil + end # End post types + + name = "#{Date.parse(post['date']).to_s}-#{post['id'].downcase.gsub(/[^a-z0-9]/, '-')}.html" + + if title != nil || content != nil && name != nil + File.open("_posts/tumblr/#{name}", "w") do |f| + + f.puts <<-HEADER +--- +layout: post +title: #{title} +--- + +HEADER + + f.puts content + end # End file + end + + end # End post XML + + if (doc/:tumblr/:posts/:post).size < 50 + break + else + current_page = current_page + 1 + end + + end # End while loop + end # End method + + private + + def self.save_file(url, grab_image = false) + unless grab_image == false + FileUtils.mkdir_p "tumblr_files" + + File.open("tumblr_files/#{url.split('/').last}", "w") do |f| + f.write(open(url).read) + end + + return "/tumblr_files/#{url.split('/').last}" + else + return url + end + end + end +end diff --git a/lib/jekyll/migrators/typo.rb b/lib/jekyll/migrators/typo.rb index 1795a6bb..3de5130a 100644 --- a/lib/jekyll/migrators/typo.rb +++ b/lib/jekyll/migrators/typo.rb @@ -5,8 +5,8 @@ require 'sequel' module Jekyll module Typo - # this SQL *should* work for both MySQL and PostgreSQL, but I haven't - # tested PostgreSQL yet (as of 2008-12-16) + # This SQL *should* work for both MySQL and PostgreSQL, but I haven't + # tested PostgreSQL yet (as of 2008-12-16). SQL = <<-EOS SELECT c.id id, c.title title, @@ -30,8 +30,9 @@ module Jekyll sprintf("%.02d", post[:date].month), sprintf("%.02d", post[:date].day), post[:slug].strip ].join('-') + # Can have more than one text filter in this field, but we just want - # the first one for this + # the first one for this. name += '.' + post[:filter].split(' ')[0] File.open("_posts/#{name}", 'w') do |f| @@ -45,5 +46,5 @@ module Jekyll end end - end # module Typo -end # module Jekyll + end +end diff --git a/lib/jekyll/migrators/wordpress.com.rb b/lib/jekyll/migrators/wordpress.com.rb deleted file mode 100644 index 5be1b42d..00000000 --- a/lib/jekyll/migrators/wordpress.com.rb +++ /dev/null @@ -1,38 +0,0 @@ -require 'rubygems' -require 'hpricot' -require 'fileutils' - -# This importer takes a wordpress.xml file, -# which can be exported from your -# wordpress.com blog (/wp-admin/export.php) - -module Jekyll - module WordpressDotCom - def self.process(filename = "wordpress.xml") - FileUtils.mkdir_p "_posts" - posts = 0 - - doc = Hpricot::XML(File.read(filename)) - - (doc/:channel/:item).each do |item| - title = item.at(:title).inner_text - name = "#{Date.parse((doc/:channel/:item).first.at(:pubDate).inner_text).to_s("%Y-%m-%d")}-#{title.downcase.gsub('[^a-z0-9]', '-')}.html" - - File.open("_posts/#{name}", "w") do |f| - f.puts <<-HEADER ---- -layout: post -title: #{title} ---- - -HEADER - f.puts item.at('content:encoded').inner_text - end - - posts += 1 - end - - "Imported #{posts} posts" - end - end -end \ No newline at end of file diff --git a/lib/jekyll/migrators/wordpress.rb b/lib/jekyll/migrators/wordpress.rb index 4b01726c..535859c9 100644 --- a/lib/jekyll/migrators/wordpress.rb +++ b/lib/jekyll/migrators/wordpress.rb @@ -11,20 +11,27 @@ require 'yaml' module Jekyll module WordPress - - # Reads a MySQL database via Sequel and creates a post file for each - # post in wp_posts that has post_status = 'publish'. - # This restriction is made because 'draft' posts are not guaranteed to - # have valid dates. - QUERY = "select post_title, post_name, post_date, post_content, post_excerpt, ID, guid from wp_posts where post_status = 'publish' and post_type = 'post'" - - def self.process(dbname, user, pass, host = 'localhost') + def self.process(dbname, user, pass, host = 'localhost', table_prefix = 'wp_') db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') - FileUtils.mkdir_p "_posts" + FileUtils.mkdir_p("_posts") - db[QUERY].each do |post| - # Get required fields and construct Jekyll compatible name + # Reads a MySQL database via Sequel and creates a post file for each + # post in wp_posts that has post_status = 'publish'. This restriction is + # made because 'draft' posts are not guaranteed to have valid dates. + query = "SELECT post_title, \ + post_name, \ + post_date, \ + post_content, \ + post_excerpt, \ + ID, \ + guid \ + FROM #{table_prefix}posts \ + WHERE post_status = 'publish' AND \ + post_type = 'post'" + + db[query].each do |post| + # Get required fields and construct Jekyll compatible name. title = post[:post_title] slug = post[:post_name] date = post[:post_date] @@ -33,7 +40,7 @@ module Jekyll slug] # Get the relevant fields as a hash, delete empty fields and convert - # to YAML for the header + # to YAML for the header. data = { 'layout' => 'post', 'title' => title.to_s, @@ -41,7 +48,7 @@ module Jekyll 'wordpress_id' => post[:ID], 'wordpress_url' => post[:guid], 'date' => date - }.delete_if { |k,v| v.nil? || v == ''}.to_yaml + }.delete_if { |k,v| v.nil? || v == '' }.to_yaml # Write out the data and content to file File.open("_posts/#{name}", "w") do |f| @@ -50,7 +57,6 @@ module Jekyll f.puts content end end - end end end diff --git a/lib/jekyll/migrators/wordpressdotcom.rb b/lib/jekyll/migrators/wordpressdotcom.rb new file mode 100644 index 00000000..53218e50 --- /dev/null +++ b/lib/jekyll/migrators/wordpressdotcom.rb @@ -0,0 +1,42 @@ +# coding: utf-8 + +require 'rubygems' +require 'hpricot' +require 'fileutils' +require 'yaml' + +module Jekyll + # This importer takes a wordpress.xml file, which can be exported from your + # wordpress.com blog (/wp-admin/export.php). + module WordpressDotCom + def self.process(filename = "wordpress.xml") + FileUtils.mkdir_p "_posts" + posts = 0 + + doc = Hpricot::XML(File.read(filename)) + + (doc/:channel/:item).each do |item| + title = item.at(:title).inner_text.strip + permalink_title = item.at('wp:post_name').inner_text + date = Time.parse(item.at(:pubDate).inner_text) + tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq + name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html" + header = { + 'layout' => 'post', + 'title' => title, + 'tags' => tags + } + + File.open("_posts/#{name}", "w") do |f| + f.puts header.to_yaml + f.puts '---' + f.puts item.at('content:encoded').inner_text + end + + posts += 1 + end + + puts "Imported #{posts} posts" + end + end +end diff --git a/lib/jekyll/page.rb b/lib/jekyll/page.rb index 2e996308..a4a0f9f3 100644 --- a/lib/jekyll/page.rb +++ b/lib/jekyll/page.rb @@ -8,12 +8,11 @@ module Jekyll attr_accessor :data, :content, :output # Initialize a new Page. - # +site+ is the Site - # +base+ is the String path to the - # +dir+ is the String path between and the file - # +name+ is the String filename of the file # - # Returns + # site - The Site object. + # base - The String path to the source. + # dir - The String path between the source and the file. + # name - The String filename of the file. def initialize(site, base, dir, name) @site = site @base = base @@ -26,22 +25,24 @@ module Jekyll # The generated directory into which the page will be placed # upon generation. This is derived from the permalink or, if - # permalink is absent, set to '/' + # permalink is absent, we be '/' # - # Returns + # Returns the String destination directory. def dir url[-1, 1] == '/' ? url : File.dirname(url) end - # The full path and filename of the post. - # Defined in the YAML of the post body - # (Optional) + # The full path and filename of the post. Defined in the YAML of the post + # body. # - # Returns + # Returns the String permalink or nil if none has been set. def permalink self.data && self.data['permalink'] end + # The template of the permalink. + # + # Returns the template String. def template if self.site.permalink_style == :pretty && !index? && html? "/:basename/" @@ -50,35 +51,45 @@ module Jekyll end end - # The generated relative url of this page - # e.g. /about.html + # The generated relative url of this page. e.g. /about.html. # - # Returns + # Returns the String url. def url - return permalink if permalink + return @url if @url - @url ||= { - "basename" => self.basename, - "output_ext" => self.output_ext, - }.inject(template) { |result, token| - result.gsub(/:#{token.first}/, token.last) - }.gsub(/\/\//, "/") + url = if permalink + permalink + else + { + "basename" => self.basename, + "output_ext" => self.output_ext, + }.inject(template) { |result, token| + result.gsub(/:#{token.first}/, token.last) + }.gsub(/\/\//, "/") + end + + # sanitize url + @url = url.split('/').reject{ |part| part =~ /^\.+$/ }.join('/') + @url += "/" if url =~ /\/$/ + @url end - # Extract information from the page filename - # +name+ is the String filename of the page file + # Extract information from the page filename. # - # Returns nothing + # name - The String filename of the page file. + # + # Returns nothing. def process(name) self.ext = File.extname(name) self.basename = name[0 .. -self.ext.length-1] end # Add any necessary layouts to this post - # +layouts+ is a Hash of {"name" => "layout"} - # +site_payload+ is the site payload hash # - # Returns nothing + # layouts - The Hash of {"name" => "layout"}. + # site_payload - The site payload Hash. + # + # Returns nothing. def render(layouts, site_payload) payload = { "page" => self.to_liquid, @@ -88,27 +99,33 @@ module Jekyll do_layout(payload, layouts) end + # Convert this Page's data to a Hash suitable for use by Liquid. + # + # Returns the Hash representation of this Page. def to_liquid self.data.deep_merge({ "url" => File.join(@dir, self.url), "content" => self.content }) end - + # Obtain destination path. - # +dest+ is the String path to the destination dir # - # Returns destination file path. + # dest - The String path to the destination dir. + # + # Returns the destination file path String. def destination(dest) - # The url needs to be unescaped in order to preserve the correct filename + # The url needs to be unescaped in order to preserve the correct + # filename. path = File.join(dest, @dir, CGI.unescape(self.url)) path = File.join(path, "index.html") if self.url =~ /\/$/ path end # Write the generated page file to the destination directory. - # +dest+ is the String path to the destination dir # - # Returns nothing + # dest - The String path to the destination dir. + # + # Returns nothing. def write(dest) path = destination(dest) FileUtils.mkdir_p(File.dirname(path)) @@ -117,14 +134,17 @@ module Jekyll end end + # Returns the object as a debug String. def inspect "#" end + # Returns the Boolean of whether this Page is HTML or not. def html? output_ext == '.html' end + # Returns the Boolean of whether this Page is an index file or not. def index? basename == 'index' end diff --git a/lib/jekyll/post.rb b/lib/jekyll/post.rb index 7747eff4..5502994c 100644 --- a/lib/jekyll/post.rb +++ b/lib/jekyll/post.rb @@ -78,6 +78,8 @@ module Jekyll self.date = Time.parse(date) self.slug = slug self.ext = ext + rescue ArgumentError + raise FatalException.new("Post #{name} does not have a valid date.") end # The generated directory into which the post will be placed @@ -117,20 +119,29 @@ module Jekyll # # Returns def url - return permalink if permalink + return @url if @url - @url ||= { - "year" => date.strftime("%Y"), - "month" => date.strftime("%m"), - "day" => date.strftime("%d"), - "title" => CGI.escape(slug), - "i_day" => date.strftime("%d").to_i.to_s, - "i_month" => date.strftime("%m").to_i.to_s, - "categories" => categories.join('/'), - "output_ext" => self.output_ext - }.inject(template) { |result, token| - result.gsub(/:#{Regexp.escape token.first}/, token.last) - }.gsub(/\/\//, "/") + url = if permalink + permalink + else + { + "year" => date.strftime("%Y"), + "month" => date.strftime("%m"), + "day" => date.strftime("%d"), + "title" => CGI.escape(slug), + "i_day" => date.strftime("%d").to_i.to_s, + "i_month" => date.strftime("%m").to_i.to_s, + "categories" => categories.join('/'), + "output_ext" => self.output_ext + }.inject(template) { |result, token| + result.gsub(/:#{Regexp.escape token.first}/, token.last) + }.gsub(/\/\//, "/") + end + + # sanitize url + @url = url.split('/').reject{ |part| part =~ /^\.+$/ }.join('/') + @url += "/" if url =~ /\/$/ + @url end # The UID for this post (useful in feeds) diff --git a/lib/jekyll/site.rb b/lib/jekyll/site.rb index 55b5c9d4..41905d65 100644 --- a/lib/jekyll/site.rb +++ b/lib/jekyll/site.rb @@ -1,3 +1,5 @@ +require 'set' + module Jekyll class Site @@ -7,10 +9,9 @@ module Jekyll attr_accessor :converters, :generators - # Initialize the site - # +config+ is a Hash containing site configurations details + # Public: Initialize a new Site. # - # Returns + # config - A Hash containing site configuration details. def initialize(config) self.config = config.clone @@ -29,6 +30,21 @@ module Jekyll self.setup end + # Public: Read, process, and write this Site to output. + # + # Returns nothing. + def process + self.reset + self.read + self.generate + self.render + self.cleanup + self.write + end + + # Reset Site details. + # + # Returns nothing def reset self.time = if self.config['time'] Time.parse(self.config['time'].to_s) @@ -42,13 +58,18 @@ module Jekyll self.categories = Hash.new { |hash, key| hash[key] = [] } self.tags = Hash.new { |hash, key| hash[key] = [] } - raise ArgumentError, "Limit posts must be nil or >= 1" if !self.limit_posts.nil? && self.limit_posts < 1 + if !self.limit_posts.nil? && self.limit_posts < 1 + raise ArgumentError, "Limit posts must be nil or >= 1" + end end + # Load necessary libraries, plugins, converters, and generators. + # + # Returns nothing. def setup require 'classifier' if self.lsi - # If safe mode is off, load in any ruby files under the plugins + # If safe mode is off, load in any Ruby files under the plugins # directory. unless self.safe Dir[File.join(self.plugins, "**/*.rb")].each do |f| @@ -69,29 +90,18 @@ module Jekyll end end - # Do the actual work of processing the site and generating the - # real deal. 5 phases; reset, read, generate, render, write. This allows - # rendering to have full site payload available. + # Read Site data from disk and load it into internal data structures. # - # Returns nothing - def process - self.reset - self.read - self.generate - self.render - self.cleanup - self.write - end - + # Returns nothing. def read - self.read_layouts # existing implementation did this at top level only so preserved that + self.read_layouts self.read_directories end # Read all the files in //_layouts and create a new Layout # object with each one. # - # Returns nothing + # Returns nothing. def read_layouts(dir = '') base = File.join(self.source, dir, "_layouts") return unless File.exists?(base) @@ -104,10 +114,44 @@ module Jekyll end end + # Recursively traverse directories to find posts, pages and static files + # that will become part of the site according to the rules in + # filter_entries. + # + # dir - The String relative path of the directory to read. + # + # Returns nothing. + def read_directories(dir = '') + base = File.join(self.source, dir) + entries = Dir.chdir(base) { filter_entries(Dir['*']) } + + self.read_posts(dir) + + entries.each do |f| + f_abs = File.join(base, f) + f_rel = File.join(dir, f) + if File.directory?(f_abs) + next if self.dest.sub(/\/$/, '') == f_abs + read_directories(f_rel) + elsif !File.symlink?(f_abs) + first3 = File.open(f_abs) { |fd| fd.read(3) } + if first3 == "---" + # file appears to have a YAML header so process it as a page + pages << Page.new(self, self.source, dir, f) + else + # otherwise treat it as a static file + static_files << StaticFile.new(self, self.source, dir, f) + end + end + end + end + # Read all the files in //_posts and create a new Post # object with each one. # - # Returns nothing + # dir - The String relative path of the directory to read. + # + # Returns nothing. def read_posts(dir) base = File.join(self.source, dir, '_posts') return unless File.exists?(base) @@ -132,12 +176,18 @@ module Jekyll self.posts = self.posts[-limit_posts, limit_posts] if limit_posts end + # Run each of the Generators. + # + # Returns nothing. def generate self.generators.each do |generator| generator.generate(self) end end + # Render the site to the destination. + # + # Returns nothing. def render self.posts.each do |post| post.render(self.layouts, site_payload) @@ -147,24 +197,24 @@ module Jekyll page.render(self.layouts, site_payload) end - self.categories.values.map { |ps| ps.sort! { |a, b| b <=> a} } - self.tags.values.map { |ps| ps.sort! { |a, b| b <=> a} } + self.categories.values.map { |ps| ps.sort! { |a, b| b <=> a } } + self.tags.values.map { |ps| ps.sort! { |a, b| b <=> a } } rescue Errno::ENOENT => e # ignore missing layout dir end - - # Remove orphaned files and empty directories in destination + + # Remove orphaned files and empty directories in destination. # - # Returns nothing + # Returns nothing. def cleanup # all files and directories in destination, including hidden ones - dest_files = [] + dest_files = Set.new Dir.glob(File.join(self.dest, "**", "*"), File::FNM_DOTMATCH) do |file| dest_files << file unless file =~ /\/\.{1,2}$/ end # files to be written - files = [] + files = Set.new self.posts.each do |post| files << post.destination(self.dest) end @@ -174,18 +224,20 @@ module Jekyll self.static_files.each do |sf| files << sf.destination(self.dest) end - + # adding files' parent directories - files.each { |file| files << File.dirname(file) unless files.include? File.dirname(file) } - + dirs = Set.new + files.each { |file| dirs << File.dirname(file) } + files.merge(dirs) + obsolete_files = dest_files - files - - FileUtils.rm_rf(obsolete_files) + + FileUtils.rm_rf(obsolete_files.to_a) end - # Write static files, pages and posts + # Write static files, pages, and posts. # - # Returns nothing + # Returns nothing. def write self.posts.each do |post| post.write(self.dest) @@ -198,59 +250,45 @@ module Jekyll end end - # Reads the directories and finds posts, pages and static files that will - # become part of the valid site according to the rules in +filter_entries+. - # The +dir+ String is a relative path used to call this method - # recursively as it descends through directories + # Constructs a Hash of Posts indexed by the specified Post attribute. # - # Returns nothing - def read_directories(dir = '') - base = File.join(self.source, dir) - entries = filter_entries(Dir.entries(base)) - - self.read_posts(dir) - - entries.each do |f| - f_abs = File.join(base, f) - f_rel = File.join(dir, f) - if File.directory?(f_abs) - next if self.dest.sub(/\/$/, '') == f_abs - read_directories(f_rel) - elsif !File.symlink?(f_abs) - first3 = File.open(f_abs) { |fd| fd.read(3) } - if first3 == "---" - # file appears to have a YAML header so process it as a page - pages << Page.new(self, self.source, dir, f) - else - # otherwise treat it as a static file - static_files << StaticFile.new(self, self.source, dir, f) - end - end - end - end - - # Constructs a hash map of Posts indexed by the specified Post attribute + # post_attr - The String name of the Post attribute. # - # Returns {post_attr => []} + # Examples + # + # post_attr_hash('categories') + # # => { 'tech' => [, ], + # # 'ruby' => [] } + # + # Returns the Hash: { attr => posts } where + # attr - One of the values for the requested attribute. + # posts - The Array of Posts with the given attr value. def post_attr_hash(post_attr) - # Build a hash map based on the specified post attribute ( post attr => array of posts ) - # then sort each array in reverse order + # Build a hash map based on the specified post attribute ( post attr => + # array of posts ) then sort each array in reverse order. hash = Hash.new { |hash, key| hash[key] = Array.new } self.posts.each { |p| p.send(post_attr.to_sym).each { |t| hash[t] << p } } - hash.values.map { |sortme| sortme.sort! { |a, b| b <=> a} } - return hash + hash.values.map { |sortme| sortme.sort! { |a, b| b <=> a } } + hash end - # The Hash payload containing site-wide data + # The Hash payload containing site-wide data. # - # Returns {"site" => {"time" =>