Merge pull request #3545 from delftswa2014/site_extract_readers

Merge pull request 3545
This commit is contained in:
Parker Moore 2015-03-23 19:53:07 -07:00
commit e7d0b6c5a5
13 changed files with 410 additions and 216 deletions

View File

@ -49,12 +49,19 @@ module Jekyll
autoload :Filters, 'jekyll/filters'
autoload :FrontmatterDefaults, 'jekyll/frontmatter_defaults'
autoload :Layout, 'jekyll/layout'
autoload :LayoutReader, 'jekyll/layout_reader'
autoload :CollectionReader, 'jekyll/readers/collection_reader'
autoload :DataReader, 'jekyll/readers/data_reader'
autoload :LayoutReader, 'jekyll/readers/layout_reader'
autoload :DraftReader, 'jekyll/readers/draft_reader'
autoload :PostReader, 'jekyll/readers/post_reader'
autoload :PageReader, 'jekyll/readers/page_reader'
autoload :StaticFileReader, 'jekyll/readers/static_file_reader'
autoload :LogAdapter, 'jekyll/log_adapter'
autoload :Page, 'jekyll/page'
autoload :PluginManager, 'jekyll/plugin_manager'
autoload :Post, 'jekyll/post'
autoload :Publisher, 'jekyll/publisher'
autoload :Reader, 'jekyll/reader'
autoload :Regenerator, 'jekyll/regenerator'
autoload :RelatedPosts, 'jekyll/related_posts'
autoload :Renderer, 'jekyll/renderer'

121
lib/jekyll/reader.rb Normal file
View File

@ -0,0 +1,121 @@
# encoding: UTF-8
require 'csv'
module Jekyll
class Reader
attr_reader :site
def initialize(site)
@site = site
end
# Read Site data from disk and load it into internal data structures.
#
# Returns nothing.
def read
@site.layouts = LayoutReader.new(site).read
read_directories
@site.data = DataReader.new(site).read(site.config['data_source'])
CollectionReader.new(site).read
end
# Recursively traverse directories to find posts, pages and static files
# that will become part of the site according to the rules in
# filter_entries.
#
# dir - The String relative path of the directory to read. Default: ''.
#
# Returns nothing.
def read_directories(dir = '')
base = site.in_source_dir(dir)
dot = Dir.chdir(base) { filter_entries(Dir.entries('.'), base) }
dot_dirs = dot.select{ |file| File.directory?(@site.in_source_dir(base,file)) }
dot_files = (dot - dot_dirs)
dot_pages = dot_files.select{ |file| Utils.has_yaml_header?(@site.in_source_dir(base,file)) }
dot_static_files = dot_files - dot_pages
retrieve_posts(dir)
retrieve_dirs(base, dir, dot_dirs)
retrieve_pages(dir, dot_pages)
retrieve_static_files(dir, dot_static_files)
end
# Retrieves all the posts(posts/drafts) from the given directory
# and add them to the site and sort them.
#
# dir - The String representing the directory to retrieve the posts from.
#
# Returns nothing.
def retrieve_posts(dir)
site.posts.concat(PostReader.new(site).read(dir))
site.posts.concat(DraftReader.new(site).read(dir)) if site.show_drafts
site.posts.sort!
end
# Recursively traverse directories with the read_directories function.
#
# base - The String representing the site's base directory.
# dir - The String representing the directory to traverse down.
# dot_dirs - The Array of subdirectories in the dir.
#
# Returns nothing.
def retrieve_dirs(base, dir, dot_dirs)
dot_dirs.map { |file|
dir_path = site.in_source_dir(dir,file)
rel_path = File.join(dir, file)
@site.reader.read_directories(rel_path) unless @site.dest.sub(/\/$/, '') == dir_path
}
end
# Retrieve all the pages from the current directory,
# add them to the site and sort them.
#
# dir - The String representing the directory retrieve the pages from.
# dot_pages - The Array of pages in the dir.
#
# Returns nothing.
def retrieve_pages(dir, dot_pages)
site.pages.concat(PageReader.new(site, dir).read(dot_pages))
site.pages.sort_by!(&:name)
end
# Retrieve all the static files from the current directory,
# add them to the site and sort them.
#
# dir - The directory retrieve the static files from.
# dot_static_files - The static files in the dir.
#
# Returns nothing.
def retrieve_static_files(dir, dot_static_files)
site.static_files.concat(StaticFileReader.new(site, dir).read(dot_static_files))
site.static_files.sort_by!(&:relative_path)
end
# Filter out any files/directories that are hidden or backup files (start
# with "." or "#" or end with "~"), or contain site content (start with "_"),
# or are excluded in the site configuration, unless they are web server
# files such as '.htaccess'.
#
# entries - The Array of String file/directory entries to filter.
# base_directory - The string representing the optional base directory.
#
# Returns the Array of filtered entries.
def filter_entries(entries, base_directory = nil)
EntryFilter.new(site, base_directory).filter(entries)
end
# Read the entries from a particular directory for processing
#
# dir - The String representing the relative path of the directory to read.
# subfolder - The String representing the directory to read.
#
# Returns the list of entries to process
def get_entries(dir, subfolder)
base = site.in_source_dir(dir, subfolder)
return [] unless File.exist?(base)
entries = Dir.chdir(base) { filter_entries(Dir['**/*'], base) }
entries.delete_if { |e| File.directory?(site.in_source_dir(base, e)) }
end
end
end

View File

@ -0,0 +1,19 @@
module Jekyll
class CollectionReader
attr_reader :site, :content
def initialize(site)
@site = site
@content = {}
end
# Read in all collections specified in the configuration
#
# Returns nothing.
def read
site.collections.each do |_, collection|
collection.read unless collection.label.eql?('data')
end
end
end
end

View File

@ -0,0 +1,69 @@
module Jekyll
class DataReader
attr_reader :site, :content
def initialize(site)
@site = site
@content = {}
end
# Read all the files in <source>/<dir>/_drafts and create a new Draft
# object with each one.
#
# dir - The String relative path of the directory to read.
#
# Returns nothing.
def read(dir)
base = site.in_source_dir(dir)
read_data_to(base, @content)
@content
end
# Read and parse all yaml files under <dir> and add them to the
# <data> variable.
#
# dir - The string absolute path of the directory to read.
# data - The variable to which data will be added.
#
# Returns nothing
def read_data_to(dir, data)
return unless File.directory?(dir) && (!site.safe || !File.symlink?(dir))
entries = Dir.chdir(dir) do
Dir['*.{yaml,yml,json,csv}'] + Dir['*'].select { |fn| File.directory?(fn) }
end
entries.each do |entry|
path = @site.in_source_dir(dir, entry)
next if File.symlink?(path) && site.safe
key = sanitize_filename(File.basename(entry, '.*'))
if File.directory?(path)
read_data_to(path, data[key] = {})
else
data[key] = read_data_file(path)
end
end
end
# Determines how to read a data file.
#
# Returns the contents of the data file.
def read_data_file(path)
case File.extname(path).downcase
when '.csv'
CSV.read(path, {
:headers => true,
:encoding => site.config['encoding']
}).map(&:to_hash)
else
SafeYAML.load_file(path)
end
end
def sanitize_filename(name)
name.gsub!(/[^\w\s-]+/, '')
name.gsub!(/(^|\b\s)\s+($|\s?\b)/, '\\1\\2')
name.gsub(/\s+/, '_')
end
end
end

View File

@ -0,0 +1,37 @@
module Jekyll
class DraftReader
attr_reader :site, :unfiltered_content
def initialize(site)
@site = site
@unfiltered_content = Array.new
end
# Read all the files in <source>/<dir>/_drafts and create a new Draft
# object with each one.
#
# dir - The String relative path of the directory to read.
#
# Returns nothing.
def read(dir)
@unfiltered_content = read_content(dir, '_drafts')
@unfiltered_content.select{ |draft| site.publisher.publish?(draft) }
end
# Read all the content files from <source>/<dir>/magic_dir
# and return them with the type klass.
#
# dir - The String relative path of the directory to read.
# magic_dir - The String relative directory to <dir>,
# looks for content here.
# klass - The return type of the content.
#
# Returns klass type of content files
def read_content(dir, magic_dir)
@site.reader.get_entries(dir, magic_dir).map do |entry|
Draft.new(site, site.source, dir, entry) if Draft.valid?(entry)
end.reject do |entry|
entry.nil?
end
end
end
end

View File

@ -0,0 +1,21 @@
module Jekyll
class PageReader
attr_reader :site, :dir, :unfiltered_content
def initialize(site, dir)
@site = site
@dir = dir
@unfiltered_content = Array.new
end
# Read all the files in <source>/<dir>/ for Yaml header and create a new Page
# object for each file.
#
# dir - The String relative path of the directory to read.
#
# Returns an array of static pages.
def read(files)
files.map{ |page| @unfiltered_content << Page.new(@site, @site.source, @dir, page) }
@unfiltered_content.select{ |page| site.publisher.publish?(page) }
end
end
end

View File

@ -0,0 +1,37 @@
module Jekyll
class PostReader
attr_reader :site, :unfiltered_content
def initialize(site)
@site = site
@unfiltered_content = Array.new
end
# Read all the files in <source>/<dir>/_posts and create a new Post
# object with each one.
#
# dir - The String relative path of the directory to read.
#
# Returns nothing.
def read(dir)
@unfiltered_content = read_content(dir, '_posts')
@unfiltered_content.select{ |post| site.publisher.publish?(post) }
end
# Read all the content files from <source>/<dir>/magic_dir
# and return them with the type klass.
#
# dir - The String relative path of the directory to read.
# magic_dir - The String relative directory to <dir>,
# looks for content here.
# klass - The return type of the content.
#
# Returns klass type of content files
def read_content(dir, magic_dir)
@site.reader.get_entries(dir, magic_dir).map do |entry|
Post.new(site, site.source, dir, entry) if Post.valid?(entry)
end.reject do |entry|
entry.nil?
end
end
end
end

View File

@ -0,0 +1,21 @@
module Jekyll
class StaticFileReader
attr_reader :site, :dir, :unfiltered_content
def initialize(site, dir)
@site = site
@dir = dir
@unfiltered_content = Array.new
end
# Read all the files in <source>/<dir>/ for Yaml header and create a new Page
# object for each file.
#
# dir - The String relative path of the directory to read.
#
# Returns an array of static files.
def read(files)
files.map{ |file| @unfiltered_content << StaticFile.new(@site, @site.source, @dir, file)}
@unfiltered_content
end
end
end

View File

@ -4,13 +4,13 @@ require 'csv'
module Jekyll
class Site
attr_reader :source, :dest, :config
attr_accessor :layouts, :posts, :pages, :static_files,
attr_accessor :layouts, :posts, :pages, :static_files, :drafts,
:exclude, :include, :lsi, :highlighter, :permalink_style,
:time, :future, :unpublished, :safe, :plugins, :limit_posts,
:show_drafts, :keep_files, :baseurl, :data, :file_read_opts,
:gems, :plugin_manager
attr_accessor :converters, :generators
attr_accessor :converters, :generators, :reader
attr_reader :regenerator
# Public: Initialize a new Site.
@ -28,6 +28,8 @@ module Jekyll
@source = File.expand_path(config['source']).freeze
@dest = File.expand_path(config['destination']).freeze
@reader = Jekyll::Reader.new(self)
# Initialize incremental regenerator
@regenerator = Regenerator.new(self)
@ -98,30 +100,6 @@ module Jekyll
end
end
# Public: Prefix a given path with the source directory.
#
# paths - (optional) path elements to a file or directory within the
# source directory
#
# Returns a path which is prefixed with the source directory.
def in_source_dir(*paths)
paths.reduce(source) do |base, path|
Jekyll.sanitized_path(base, path)
end
end
# Public: Prefix a given path with the destination directory.
#
# paths - (optional) path elements to a file or directory within the
# destination directory
#
# Returns a path which is prefixed with the destination directory.
def in_dest_dir(*paths)
paths.reduce(dest) do |base, path|
Jekyll.sanitized_path(base, path)
end
end
# The list of collections and their corresponding Jekyll::Collection instances.
# If config['collections'] is set, a new instance is created for each item in the collection.
# If config['collections'] is not set, a new hash is returned.
@ -152,140 +130,8 @@ module Jekyll
#
# Returns nothing.
def read
self.layouts = LayoutReader.new(self).read
read_directories
read_data(config['data_source'])
read_collections
end
# Recursively traverse directories to find posts, pages and static files
# that will become part of the site according to the rules in
# filter_entries.
#
# dir - The String relative path of the directory to read. Default: ''.
#
# Returns nothing.
def read_directories(dir = '')
base = in_source_dir(dir)
entries = Dir.chdir(base) { filter_entries(Dir.entries('.'), base) }
read_posts(dir)
read_drafts(dir) if show_drafts
posts.sort!
limit_posts! if limit_posts > 0 # limit the posts if :limit_posts option is set
entries.each do |f|
f_abs = in_source_dir(base, f)
if File.directory?(f_abs)
f_rel = File.join(dir, f)
read_directories(f_rel) unless dest.sub(/\/$/, '') == f_abs
elsif Utils.has_yaml_header?(f_abs)
page = Page.new(self, source, dir, f)
pages << page if publisher.publish?(page)
else
static_files << StaticFile.new(self, source, dir, f)
end
end
pages.sort_by!(&:name)
static_files.sort_by!(&:relative_path)
end
# Read all the files in <source>/<dir>/_posts and create a new Post
# object with each one.
#
# dir - The String relative path of the directory to read.
#
# Returns nothing.
def read_posts(dir)
posts = read_content(dir, '_posts', Post)
posts.each do |post|
aggregate_post_info(post) if publisher.publish?(post)
end
end
# Read all the files in <source>/<dir>/_drafts and create a new Post
# object with each one.
#
# dir - The String relative path of the directory to read.
#
# Returns nothing.
def read_drafts(dir)
drafts = read_content(dir, '_drafts', Draft)
drafts.each do |draft|
if draft.published?
aggregate_post_info(draft)
end
end
end
def read_content(dir, magic_dir, klass)
get_entries(dir, magic_dir).map do |entry|
klass.new(self, source, dir, entry) if klass.valid?(entry)
end.reject do |entry|
entry.nil?
end
end
# Read and parse all yaml files under <source>/<dir>
#
# Returns nothing
def read_data(dir)
base = in_source_dir(dir)
read_data_to(base, self.data)
end
# Read and parse all yaml files under <dir> and add them to the
# <data> variable.
#
# dir - The string absolute path of the directory to read.
# data - The variable to which data will be added.
#
# Returns nothing
def read_data_to(dir, data)
return unless File.directory?(dir) && (!safe || !File.symlink?(dir))
entries = Dir.chdir(dir) do
Dir['*.{yaml,yml,json,csv}'] + Dir['*'].select { |fn| File.directory?(fn) }
end
entries.each do |entry|
path = in_source_dir(dir, entry)
next if File.symlink?(path) && safe
key = sanitize_filename(File.basename(entry, '.*'))
if File.directory?(path)
read_data_to(path, data[key] = {})
else
data[key] = read_data_file(path)
end
end
end
# Determines how to read a data file.
#
# Returns the contents of the data file.
def read_data_file(path)
case File.extname(path).downcase
when '.csv'
CSV.read(path, {
:headers => true,
:encoding => config['encoding']
}).map(&:to_hash)
else
SafeYAML.load_file(path)
end
end
# Read in all collections specified in the configuration
#
# Returns nothing.
def read_collections
collections.each do |_, collection|
collection.read unless collection.label.eql?("data")
end
reader.read
limit_posts!
end
# Run each of the Generators.
@ -301,7 +147,7 @@ module Jekyll
#
# Returns nothing.
def render
relative_permalinks_deprecation_method
relative_permalinks_are_deprecated
payload = site_payload
collections.each do |label, collection|
@ -412,18 +258,6 @@ module Jekyll
}
end
# Filter out any files/directories that are hidden or backup files (start
# with "." or "#" or end with "~"), or contain site content (start with "_"),
# or are excluded in the site configuration, unless they are web server
# files such as '.htaccess'.
#
# entries - The Array of String file/directory entries to filter.
#
# Returns the Array of filtered entries.
def filter_entries(entries, base_directory = nil)
EntryFilter.new(self, base_directory).filter(entries)
end
# Get the implementation class for the given Converter.
#
# klass - The Class of the Converter to fetch.
@ -448,30 +282,12 @@ module Jekyll
end
end
# Read the entries from a particular directory for processing
# Warns the user if permanent links are relative to the parent
# directory. As this is a deprecated function of Jekyll.
#
# dir - The String relative path of the directory to read
# subfolder - The String directory to read
#
# Returns the list of entries to process
def get_entries(dir, subfolder)
base = in_source_dir(dir, subfolder)
return [] unless File.exist?(base)
entries = Dir.chdir(base) { filter_entries(Dir['**/*'], base) }
entries.delete_if { |e| File.directory?(in_source_dir(base, e)) }
end
# Aggregate post information
#
# post - The Post object to aggregate information for
#
# Returns nothing
def aggregate_post_info(post)
posts << post
end
def relative_permalinks_deprecation_method
if config['relative_permalinks'] && has_relative_page?
# Returns
def relative_permalinks_are_deprecated
if config['relative_permalinks'] && has_relative_page?
Jekyll::Deprecator.deprecation_message "Since v2.0, permalinks for pages" +
" in subfolders must be relative to the" +
" site source directory, not the parent" +
@ -480,16 +296,23 @@ module Jekyll
end
end
# Get the to be written documents
#
# Returns an Array of Documents which should be written
def docs_to_write
documents.select(&:write?)
end
# Get all the documents
#
# Returns an Array of all Documents
def documents
collections.reduce(Set.new) do |docs, (_, collection)|
docs + collection.docs + collection.files
end.to_a
end
def each_site_file
%w(posts pages static_files docs_to_write).each do |type|
send(type).each do |item|
@ -498,6 +321,10 @@ module Jekyll
end
end
# Returns the FrontmatterDefaults or creates a new FrontmatterDefaults
# if it doesn't already exist.
#
# Returns The FrontmatterDefaults
def frontmatter_defaults
@frontmatter_defaults ||= FrontmatterDefaults.new(self)
end
@ -509,29 +336,64 @@ module Jekyll
override['full_rebuild'] || config['full_rebuild']
end
# Returns the publisher or creates a new publisher if it doesn't
# already exist.
#
# Returns The Publisher
def publisher
@publisher ||= Publisher.new(self)
end
# Public: Prefix a given path with the source directory.
#
# paths - (optional) path elements to a file or directory within the
# source directory
#
# Returns a path which is prefixed with the source directory.
def in_source_dir(*paths)
paths.reduce(source) do |base, path|
Jekyll.sanitized_path(base, path)
end
end
# Public: Prefix a given path with the destination directory.
#
# paths - (optional) path elements to a file or directory within the
# destination directory
#
# Returns a path which is prefixed with the destination directory.
def in_dest_dir(*paths)
paths.reduce(dest) do |base, path|
Jekyll.sanitized_path(base, path)
end
end
private
# Checks if the site has any pages containing relative links
#
# Returns a Boolean: true for usage of relateive permalinks, false
# if it doesn't
def has_relative_page?
pages.any? { |page| page.uses_relative_permalinks }
end
# Limits the current posts; removes the posts which exceed the limit_posts
#
# Returns nothing
def limit_posts!
limit = posts.length < limit_posts ? posts.length : limit_posts
self.posts = posts[-limit, limit]
if limit_posts > 0
limit = posts.length < limit_posts ? posts.length : limit_posts
self.posts = posts[-limit, limit]
end
end
# Returns the Cleaner or creates a new Cleaner if it doesn't
# already exist.
#
# Returns The Cleaner
def site_cleaner
@site_cleaner ||= Cleaner.new(self)
end
def sanitize_filename(name)
name.gsub!(/[^\w\s-]+/, '')
name.gsub!(/(^|\b\s)\s+($|\s?\b)/, '\\1\\2')
name.gsub(/\s+/, '_')
end
end
end

View File

@ -19,7 +19,7 @@ class TestEntryFilter < JekyllUnitTest
files = %w[index.html site.css .htaccess vendor]
@site.exclude = excludes + ["exclude*"]
assert_equal files, @site.filter_entries(excludes + files + ["excludeA"])
assert_equal files, @site.reader.filter_entries(excludes + files + ["excludeA"])
end
should "filter entries with exclude relative to site source" do
@ -27,7 +27,7 @@ class TestEntryFilter < JekyllUnitTest
files = %w[index.html vendor/css .htaccess]
@site.exclude = excludes
assert_equal files, @site.filter_entries(excludes + files + ["css"])
assert_equal files, @site.reader.filter_entries(excludes + files + ["css"])
end
should "filter excluded directory and contained files" do
@ -35,7 +35,7 @@ class TestEntryFilter < JekyllUnitTest
files = %w[index.html .htaccess]
@site.exclude = excludes
assert_equal files, @site.filter_entries(excludes + files + ["css", "css/main.css", "css/vendor.css"])
assert_equal files, @site.reader.filter_entries(excludes + files + ["css", "css/main.css", "css/vendor.css"])
end
should "not filter entries within include" do
@ -43,26 +43,26 @@ class TestEntryFilter < JekyllUnitTest
files = %w[index.html _index.html .htaccess includeA]
@site.include = includes
assert_equal files, @site.filter_entries(files)
assert_equal files, @site.reader.filter_entries(files)
end
should "filter symlink entries when safe mode enabled" do
site = Site.new(site_configuration('safe' => true))
allow(File).to receive(:symlink?).with('symlink.js').and_return(true)
files = %w[symlink.js]
assert_equal [], site.filter_entries(files)
assert_equal [], site.reader.filter_entries(files)
end
should "not filter symlink entries when safe mode disabled" do
allow(File).to receive(:symlink?).with('symlink.js').and_return(true)
files = %w[symlink.js]
assert_equal files, @site.filter_entries(files)
assert_equal files, @site.reader.filter_entries(files)
end
should "not include symlinks in safe mode" do
site = Site.new(site_configuration('safe' => true))
site.read_directories("symlink-test")
site.reader.read_directories("symlink-test")
assert_equal [], site.pages
assert_equal [], site.static_files
end
@ -70,7 +70,7 @@ class TestEntryFilter < JekyllUnitTest
should "include symlinks in unsafe mode" do
site = Site.new(site_configuration)
site.read_directories("symlink-test")
site.reader.read_directories("symlink-test")
refute_equal [], site.pages
refute_equal [], site.static_files
end

View File

@ -190,7 +190,7 @@ class TestSite < JekyllUnitTest
end
should "read posts" do
@site.read_posts('')
@site.posts.concat(PostReader.new(@site).read(''))
posts = Dir[source_dir('_posts', '**', '*')]
posts.delete_if { |post| File.directory?(post) && !Post.valid?(post) }
assert_equal posts.size - @num_invalid_posts, @site.posts.size
@ -370,7 +370,7 @@ class TestSite < JekyllUnitTest
site = Site.new(site_configuration)
site.process
file_content = site.read_data_file(source_dir('_data', 'members.yaml'))
file_content = DataReader.new(site).read_data_file(source_dir('_data', 'members.yaml'))
assert_equal site.data['members'], file_content
assert_equal site.site_payload['site']['data']['members'], file_content

View File

@ -12,7 +12,7 @@ class TestTags < JekyllUnitTest
site = fixture_site({"highlighter" => "rouge"}.merge(override))
if override['read_posts']
site.read_posts('')
site.posts.concat(PostReader.new(site).read(''))
end
info = { :filters => [Jekyll::Filters], :registers => { :site => site } }