From 3751b47c504fe80fff5acd51a58904b5975eff4c Mon Sep 17 00:00:00 2001 From: Jordon Bedwell Date: Sat, 30 Apr 2016 01:16:43 -0500 Subject: [PATCH] Cleanup EntryFilter and make it far more robust. * Allow users to filter directories by ending their path with "/" * Allow users to filter with a Regexp, some scenariors can really require it. * Use Pathutil#in_path? for Symlink verification, it real/expand. This also requires some downstream work in "jekyll-watch" which at this time is not very robust, it doesn't recognize the difference either, and should probably start doing so (what I mean is detecting "/" and using the full path.) --- jekyll.gemspec | 1 + lib/jekyll.rb | 1 + lib/jekyll/entry_filter.rb | 76 +++++++++++++++++++++++++++++--------- test/test_entry_filter.rb | 11 ++++++ 4 files changed, 72 insertions(+), 17 deletions(-) diff --git a/jekyll.gemspec b/jekyll.gemspec index 62c94414..3f32f0a9 100644 --- a/jekyll.gemspec +++ b/jekyll.gemspec @@ -36,4 +36,5 @@ Gem::Specification.new do |s| s.add_runtime_dependency('rouge', '~> 1.7') s.add_runtime_dependency('jekyll-sass-converter', '~> 1.0') s.add_runtime_dependency('jekyll-watch', '~> 1.1') + s.add_runtime_dependency("pathutil", "~> 0.9") end diff --git a/lib/jekyll.rb b/lib/jekyll.rb index 8afdf06a..ffc1539f 100644 --- a/lib/jekyll.rb +++ b/lib/jekyll.rb @@ -16,6 +16,7 @@ end require 'rubygems' # stdlib +require "pathutil" require 'forwardable' require 'fileutils' require 'time' diff --git a/lib/jekyll/entry_filter.rb b/lib/jekyll/entry_filter.rb index cccd05f9..496bf56f 100644 --- a/lib/jekyll/entry_filter.rb +++ b/lib/jekyll/entry_filter.rb @@ -1,12 +1,15 @@ module Jekyll class EntryFilter - SPECIAL_LEADING_CHARACTERS = ['.', '_', '#', '~'].freeze - attr_reader :site + SPECIAL_LEADING_CHARACTERS = [ + '.', '_', '#', '~' + ].freeze def initialize(site, base_directory = nil) @site = site - @base_directory = derive_base_directory(@site, base_directory.to_s.dup) + @base_directory = derive_base_directory( + @site, base_directory.to_s.dup + ) end def base_directory @@ -14,14 +17,14 @@ module Jekyll end def derive_base_directory(site, base_dir) - if base_dir.start_with?(site.source) - base_dir[site.source] = "" - end + base_dir[site.source] = "" if base_dir.start_with?(site.source) base_dir end def relative_to_source(entry) - File.join(base_directory, entry) + File.join( + base_directory, entry + ) end def filter(entries) @@ -33,7 +36,9 @@ module Jekyll end def included?(entry) - glob_include?(site.include, entry) + glob_include?(site.include, + entry + ) end def special?(entry) @@ -51,25 +56,62 @@ module Jekyll excluded end + # -- + # Check if a file is a symlink. + # NOTE: This can be converted to allowing even in safe, + # since we use Pathutil#in_path? now. + # -- def symlink?(entry) site.safe && File.symlink?(entry) && symlink_outside_site_source?(entry) end + # -- + # NOTE: Pathutil#in_path? gets the realpath. + # @param [] entry the entry you want to validate. + # Check if a path is outside of our given root. + # -- def symlink_outside_site_source?(entry) - ! File.realpath(entry).start_with?(File.realpath(@site.source)) - end - - def ensure_leading_slash(path) - path[0..0] == "/" ? path : "/#{path}" + !Pathutil.new(entry).in_path?( + site.in_source_dir + ) end + # -- + # Check if an entry matches a specific pattern and return true,false. # Returns true if path matches against any glob pattern. - # Look for more detail about glob pattern in method File::fnmatch. + # -- def glob_include?(enum, e) - entry = ensure_leading_slash(e) + entry = Pathutil.new(site.in_source_dir).join(e) enum.any? do |exp| - item = ensure_leading_slash(exp) - File.fnmatch?(item, entry) || entry.start_with?(item) + + # Users who send a Regexp knows what they want to + # exclude, so let them send a Regexp to exclude files, + # we will not bother caring if it works or not, it's + # on them at this point. + + if exp.is_a?(Regexp) + entry =~ exp + + else + item = Pathutil.new(site.in_source_dir).join(exp) + + # If it's a directory they want to exclude, AKA + # ends with a "/" then we will go on to check and + # see if the entry falls within that path and + # exclude it if that's the case. + + if e.end_with?("/") + entry.in_path?( + item + ) + + else + File.fnmatch?(item, entry) || + entry.to_path.start_with?( + item + ) + end + end end end end diff --git a/test/test_entry_filter.rb b/test/test_entry_filter.rb index d20c9be2..d9e38300 100644 --- a/test/test_entry_filter.rb +++ b/test/test_entry_filter.rb @@ -14,6 +14,17 @@ class TestEntryFilter < JekyllUnitTest assert_equal %w[foo.markdown bar.markdown baz.markdown .htaccess], entries end + should "allow regexp filtering" do + files = %w(README.md) + @site.exclude = excludes = [ + /README/ + ] + + assert_empty @site.reader.filter_entries( + files + ) + end + should "filter entries with exclude" do excludes = %w[README TODO vendor/bundle] files = %w[index.html site.css .htaccess vendor]