Cleanup EntryFilter and make it far more robust.

* Allow users to filter directories by ending their path with "/"
* Allow users to filter with a Regexp, some scenariors can really require it.
* Use Pathutil#in_path? for Symlink verification, it real/expand.

This also requires some downstream work in "jekyll-watch" which at this time is
not very robust, it doesn't recognize the difference either, and should probably
start doing so (what I mean is detecting "/" and using the full path.)
This commit is contained in:
Jordon Bedwell 2016-04-30 01:16:43 -05:00
parent 53d6b91fc8
commit 3751b47c50
No known key found for this signature in database
GPG Key ID: E051B220DFADB075
4 changed files with 72 additions and 17 deletions

View File

@ -36,4 +36,5 @@ Gem::Specification.new do |s|
s.add_runtime_dependency('rouge', '~> 1.7')
s.add_runtime_dependency('jekyll-sass-converter', '~> 1.0')
s.add_runtime_dependency('jekyll-watch', '~> 1.1')
s.add_runtime_dependency("pathutil", "~> 0.9")
end

View File

@ -16,6 +16,7 @@ end
require 'rubygems'
# stdlib
require "pathutil"
require 'forwardable'
require 'fileutils'
require 'time'

View File

@ -1,12 +1,15 @@
module Jekyll
class EntryFilter
SPECIAL_LEADING_CHARACTERS = ['.', '_', '#', '~'].freeze
attr_reader :site
SPECIAL_LEADING_CHARACTERS = [
'.', '_', '#', '~'
].freeze
def initialize(site, base_directory = nil)
@site = site
@base_directory = derive_base_directory(@site, base_directory.to_s.dup)
@base_directory = derive_base_directory(
@site, base_directory.to_s.dup
)
end
def base_directory
@ -14,14 +17,14 @@ module Jekyll
end
def derive_base_directory(site, base_dir)
if base_dir.start_with?(site.source)
base_dir[site.source] = ""
end
base_dir[site.source] = "" if base_dir.start_with?(site.source)
base_dir
end
def relative_to_source(entry)
File.join(base_directory, entry)
File.join(
base_directory, entry
)
end
def filter(entries)
@ -33,7 +36,9 @@ module Jekyll
end
def included?(entry)
glob_include?(site.include, entry)
glob_include?(site.include,
entry
)
end
def special?(entry)
@ -51,25 +56,62 @@ module Jekyll
excluded
end
# --
# Check if a file is a symlink.
# NOTE: This can be converted to allowing even in safe,
# since we use Pathutil#in_path? now.
# --
def symlink?(entry)
site.safe && File.symlink?(entry) && symlink_outside_site_source?(entry)
end
# --
# NOTE: Pathutil#in_path? gets the realpath.
# @param [<Anything>] entry the entry you want to validate.
# Check if a path is outside of our given root.
# --
def symlink_outside_site_source?(entry)
! File.realpath(entry).start_with?(File.realpath(@site.source))
end
def ensure_leading_slash(path)
path[0..0] == "/" ? path : "/#{path}"
!Pathutil.new(entry).in_path?(
site.in_source_dir
)
end
# --
# Check if an entry matches a specific pattern and return true,false.
# Returns true if path matches against any glob pattern.
# Look for more detail about glob pattern in method File::fnmatch.
# --
def glob_include?(enum, e)
entry = ensure_leading_slash(e)
entry = Pathutil.new(site.in_source_dir).join(e)
enum.any? do |exp|
item = ensure_leading_slash(exp)
File.fnmatch?(item, entry) || entry.start_with?(item)
# Users who send a Regexp knows what they want to
# exclude, so let them send a Regexp to exclude files,
# we will not bother caring if it works or not, it's
# on them at this point.
if exp.is_a?(Regexp)
entry =~ exp
else
item = Pathutil.new(site.in_source_dir).join(exp)
# If it's a directory they want to exclude, AKA
# ends with a "/" then we will go on to check and
# see if the entry falls within that path and
# exclude it if that's the case.
if e.end_with?("/")
entry.in_path?(
item
)
else
File.fnmatch?(item, entry) ||
entry.to_path.start_with?(
item
)
end
end
end
end
end

View File

@ -14,6 +14,17 @@ class TestEntryFilter < JekyllUnitTest
assert_equal %w[foo.markdown bar.markdown baz.markdown .htaccess], entries
end
should "allow regexp filtering" do
files = %w(README.md)
@site.exclude = excludes = [
/README/
]
assert_empty @site.reader.filter_entries(
files
)
end
should "filter entries with exclude" do
excludes = %w[README TODO vendor/bundle]
files = %w[index.html site.css .htaccess vendor]