From 160a6816af841a13479432a6bc94407996d4c239 Mon Sep 17 00:00:00 2001 From: Ashwin Maroli Date: Thu, 29 Sep 2022 18:16:24 +0530 Subject: [PATCH] Incrementally rebuild when a data file is changed (#8771) Merge pull request 8771 --- features/incremental_rebuild.feature | 53 ++++++++++++++++++ lib/jekyll.rb | 2 + lib/jekyll/data_entry.rb | 83 ++++++++++++++++++++++++++++ lib/jekyll/data_hash.rb | 61 ++++++++++++++++++++ lib/jekyll/drops/site_drop.rb | 7 ++- lib/jekyll/readers/data_reader.rb | 9 ++- lib/jekyll/utils.rb | 9 ++- test/source/_data/boolean.yml | 1 + test/source/_data/languages_plus.yml | 4 ++ test/test_data_entry.rb | 34 ++++++++++++ test/test_data_hash.rb | 57 +++++++++++++++++++ 11 files changed, 315 insertions(+), 5 deletions(-) create mode 100644 lib/jekyll/data_entry.rb create mode 100644 lib/jekyll/data_hash.rb create mode 100644 test/source/_data/boolean.yml create mode 100644 test/source/_data/languages_plus.yml create mode 100644 test/test_data_entry.rb create mode 100644 test/test_data_hash.rb diff --git a/features/incremental_rebuild.feature b/features/incremental_rebuild.feature index 2d22d2e1..8eacaf23 100644 --- a/features/incremental_rebuild.feature +++ b/features/incremental_rebuild.feature @@ -67,6 +67,59 @@ Feature: Incremental rebuild And the _site directory should exist And I should see "Basic Site with include tag: Regenerated by Jekyll" in "_site/index.html" + Scenario: Rebuild when a data file is changed + Given I have a _data directory + And I have a "_data/colors.yml" file that contains "[red, green, blue]" + And I have a _data/members/core directory + And I have a "_data/members/core/emeritus.yml" file with content: + """ + - name: John Doe + role: Admin + """ + And I have an _includes directory + And I have an "_includes/about.html" file with content: + """ + + """ + And I have a _layouts directory + And I have a page layout that contains "{{ content }}\n\n{% include about.html %}" + And I have a home layout that contains "{{ content }}\n\nGenerated by Jekyll" + And I have a "_layouts/post.html" page with layout "page" that contains "{{ content }}" + And I have a "_layouts/static.html" page with layout "home" that contains "{{ content }}" + And I have an "index.html" page with layout "home" that contains "{{ site.data.colors | join: '_' }}" + And I have an "about.html" page with layout "page" that contains "About Us" + And I have a configuration file with "collections_dir" set to "collections" + And I have a collections/_posts directory + And I have the following post within the "collections" directory: + | title | date | layout | content | + | Table | 2009-03-26 | post | Post with data dependency | + | Wargames | 2009-03-27 | static | Post without data dependency | + When I run jekyll build -IV + Then I should get a zero exit status + And the _site directory should exist + And I should see "red_green_blue" in "_site/index.html" + And I should see "John Doe -- Admin" in "_site/about.html" + And I should see "Rendering: index.html" in the build output + And I should see "Rendering: _posts/2009-03-27-wargames.markdown" in the build output + When I wait 1 second + Then I have a "_data/members/core/emeritus.yml" file with content: + """ + - name: Jane Doe + role: Admin + """ + When I run jekyll build -IV + Then I should get a zero exit status + And the _site directory should exist + And I should see "red_green_blue" in "_site/index.html" + And I should see "Jane Doe -- Admin" in "_site/about.html" + And I should see "Rendering: _posts/2009-03-26-table.markdown" in the build output + But I should not see "Rendering: index.html" in the build output + And I should not see "Rendering: _posts/2009-03-27-wargames.markdown" in the build output + Scenario: Rebuild when a dependency of document in custom collection_dir is changed Given I have a _includes directory And I have a configuration file with "collections_dir" set to "collections" diff --git a/lib/jekyll.rb b/lib/jekyll.rb index 231f8fd8..49b71e70 100644 --- a/lib/jekyll.rb +++ b/lib/jekyll.rb @@ -45,6 +45,8 @@ module Jekyll autoload :Collection, "jekyll/collection" autoload :Configuration, "jekyll/configuration" autoload :Convertible, "jekyll/convertible" + autoload :DataEntry, "jekyll/data_entry" + autoload :DataHash, "jekyll/data_hash" autoload :Deprecator, "jekyll/deprecator" autoload :Document, "jekyll/document" autoload :EntryFilter, "jekyll/entry_filter" diff --git a/lib/jekyll/data_entry.rb b/lib/jekyll/data_entry.rb new file mode 100644 index 00000000..fa267221 --- /dev/null +++ b/lib/jekyll/data_entry.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module Jekyll + class DataEntry + attr_accessor :context + attr_reader :data + + # Create a Jekyll wrapper for given parsed data object. + # + # site - The current Jekyll::Site instance. + # abs_path - Absolute path to the data source file. + # parsed_data - Parsed representation of data source file contents. + # + # Returns nothing. + def initialize(site, abs_path, parsed_data) + @site = site + @path = abs_path + @data = parsed_data + end + + # Liquid representation of current instance is the parsed data object. + # + # Mark as a dependency for regeneration here since every renderable object primarily uses the + # parsed data object while the parent resource is being rendered by Liquid. Accessing the data + # object directly via Ruby interface `#[]()` is outside the scope of regeneration. + # + # FIXME: Marking as dependency on every call is non-ideal. Optimize at later day. + # + # Returns the parsed data object. + def to_liquid + add_regenerator_dependencies if incremental_build? + @data + end + + # -- Overrides to maintain backwards compatibility -- + + # Any missing method will be forwarded to the underlying data object stored in the instance + # variable `@data`. + def method_missing(method, *args, &block) + @data.respond_to?(method) ? @data.send(method, *args, &block) : super + end + + def respond_to_missing?(method, *) + @data.respond_to?(method) || super + end + + def <=>(other) + data <=> (other.is_a?(self.class) ? other.data : other) + end + + def ==(other) + data == (other.is_a?(self.class) ? other.data : other) + end + + # Explicitly defined to bypass re-routing from `method_missing` hook for greater performance. + # + # Returns string representation of parsed data object. + def inspect + @data.inspect + end + + private + + def incremental_build? + @incremental = @site.config["incremental"] if @incremental.nil? + @incremental + end + + def add_regenerator_dependencies + page = context.registers[:page] + return unless page&.key?("path") + + absolute_path = \ + if page["collection"] + @site.in_source_dir(@site.config["collections_dir"], page["path"]) + else + @site.in_source_dir(page["path"]) + end + + @site.regenerator.add_dependency(absolute_path, @path) + end + end +end diff --git a/lib/jekyll/data_hash.rb b/lib/jekyll/data_hash.rb new file mode 100644 index 00000000..37ef510b --- /dev/null +++ b/lib/jekyll/data_hash.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module Jekyll + # A class that behaves very similar to Ruby's `Hash` class yet different in how it is handled by + # Liquid. This class emulates Hash by delegation instead of inheritance to minimize overridden + # methods especially since some Hash methods returns another Hash instance instead of the + # subclass instance. + class DataHash + # + # Delegate given (zero-arity) method(s) to the Hash object stored in instance variable + # `@registry`. + # NOTE: Avoiding the use of `Forwardable` module's `def_delegators` for preventing unnecessary + # creation of interim objects on multiple calls. + def self.delegate_to_registry(*symbols) + symbols.each { |sym| define_method(sym) { @registry.send(sym) } } + end + private_class_method :delegate_to_registry + + # -- core instance methods -- + + attr_accessor :context + + def initialize + @registry = {} + end + + def [](key) + @registry[key].tap do |value| + value.context = context if value.respond_to?(:context=) + end + end + + # `Hash#to_liquid` returns the Hash instance itself. + # Mimic that behavior by returning `self` instead of returning the `@registry` variable value. + def to_liquid + self + end + + # -- supplementary instance methods to emulate Hash -- + + delegate_to_registry :freeze, :inspect + + def merge(other, &block) + merged_registry = @registry.merge(other, &block) + dup.tap { |d| d.instance_variable_set(:@registry, merged_registry) } + end + + def merge!(other, &block) + @registry.merge!(other, &block) + self + end + + def method_missing(method, *args, &block) + @registry.send(method, *args, &block) + end + + def respond_to_missing?(method, *) + @registry.respond_to?(method) + end + end +end diff --git a/lib/jekyll/drops/site_drop.rb b/lib/jekyll/drops/site_drop.rb index cc3c60fb..13c5757b 100644 --- a/lib/jekyll/drops/site_drop.rb +++ b/lib/jekyll/drops/site_drop.rb @@ -7,7 +7,6 @@ module Jekyll mutable false - delegate_method_as :site_data, :data delegate_methods :time, :pages, :static_files, :tags, :categories private delegate_method_as :config, :fallback_data @@ -24,6 +23,12 @@ module Jekyll (key != "posts" && @obj.collections.key?(key)) || super end + def data + @obj.site_data.tap do |value| + value.context = @context if value.respond_to?(:context=) + end + end + def posts @site_posts ||= @obj.posts.docs.sort { |a, b| b <=> a } end diff --git a/lib/jekyll/readers/data_reader.rb b/lib/jekyll/readers/data_reader.rb index 80b57bd6..f95556da 100644 --- a/lib/jekyll/readers/data_reader.rb +++ b/lib/jekyll/readers/data_reader.rb @@ -6,7 +6,7 @@ module Jekyll def initialize(site, in_source_dir: nil) @site = site - @content = {} + @content = DataHash.new @entry_filter = EntryFilter.new(site) @in_source_dir = in_source_dir || @site.method(:in_source_dir) @source_dir = @in_source_dir.call("/") @@ -24,6 +24,8 @@ module Jekyll @content end + # rubocop:disable Metrics/AbcSize + # Read and parse all .yaml, .yml, .json, .csv and .tsv # files under and add them to the variable. # @@ -43,13 +45,14 @@ module Jekyll next if @entry_filter.symlink?(path) if File.directory?(path) - read_data_to(path, data[sanitize_filename(entry)] = {}) + read_data_to(path, data[sanitize_filename(entry)] = DataHash.new) else key = sanitize_filename(File.basename(entry, ".*")) - data[key] = read_data_file(path) + data[key] = DataEntry.new(site, path, read_data_file(path)) end end end + # rubocop:enable Metrics/AbcSize # Determines how to read a data file. # diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index 2a965270..8eb807d7 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -47,7 +47,14 @@ module Jekyll end def mergable?(value) - value.is_a?(Hash) || value.is_a?(Drops::Drop) + case value + when Hash, Drops::Drop, DataHash + true + when DataEntry + mergable?(value.data) + else + false + end end def duplicable?(obj) diff --git a/test/source/_data/boolean.yml b/test/source/_data/boolean.yml new file mode 100644 index 00000000..fe75c80b --- /dev/null +++ b/test/source/_data/boolean.yml @@ -0,0 +1 @@ +true diff --git a/test/source/_data/languages_plus.yml b/test/source/_data/languages_plus.yml new file mode 100644 index 00000000..fb98401b --- /dev/null +++ b/test/source/_data/languages_plus.yml @@ -0,0 +1,4 @@ +- java +- ruby +- rust +- golang diff --git a/test/test_data_entry.rb b/test/test_data_entry.rb new file mode 100644 index 00000000..dc6e8e93 --- /dev/null +++ b/test/test_data_entry.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require "helper" + +class TestDataEntry < JekyllUnitTest + context "Data Entry" do + setup do + site = fixture_site + site.read + @data_hash = site.site_data + end + + should "expose underlying data object es Liquid representation" do + subject = @data_hash["languages"] + assert_equal Jekyll::DataEntry, subject.class + assert_equal subject.data, subject.to_liquid + end + + should "respond to `#[](key)` when expected to but raise Exception otherwise" do + greeting = @data_hash["greetings"] + assert greeting["foo"] + + boolean = @data_hash["boolean"] # the value is a Boolean. + assert_raises(NoMethodError) { boolean["foo"] } + end + + should "compare with another instance of same class using underlying data" do + assert_equal( + [%w(java ruby), %w(java ruby rust golang)], + [@data_hash["languages_plus"], @data_hash["languages"]].sort + ) + end + end +end diff --git a/test/test_data_hash.rb b/test/test_data_hash.rb new file mode 100644 index 00000000..022c2543 --- /dev/null +++ b/test/test_data_hash.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +require "helper" + +class TestDataHash < JekyllUnitTest + context "Data Hash" do + setup do + @site = fixture_site + @site.read + end + + should "only mimic a ::Hash instance" do + subject = @site.site_data + assert_equal Jekyll::DataHash, subject.class + refute subject.is_a?(Hash) + + copy = subject.dup + assert copy["greetings"]["foo"] + assert_includes copy.dig("greetings", "foo"), "Hello!" + + copy["greetings"] = "Hola!" + assert_equal "Hola!", copy["greetings"] + refute copy["greetings"]["foo"] + + frozen_data_hash = Jekyll::DataHash.new.freeze + assert_raises(FrozenError) { frozen_data_hash["lorem"] = "ipsum" } + end + + should "be mergable" do + alpha = Jekyll::DataHash.new + beta = Jekyll::DataHash.new + + assert_equal "{}", alpha.inspect + sample_data = { "foo" => "bar" } + + assert_equal sample_data["foo"], alpha.merge(sample_data)["foo"] + assert_equal alpha.class, alpha.merge(sample_data).class + assert_empty alpha + + beta.merge!(sample_data) + assert_equal sample_data["foo"], alpha.merge(beta)["foo"] + assert_equal alpha.class, alpha.merge(beta).class + assert_empty alpha + + beta.merge!(@site.site_data) + assert_equal alpha.class, beta.class + assert_includes beta.dig("greetings", "foo"), "Hello!" + + assert_empty alpha + assert_equal sample_data["foo"], Jekyll::Utils.deep_merge_hashes(alpha, sample_data)["foo"] + assert_includes( + Jekyll::Utils.deep_merge_hashes(alpha, beta).dig("greetings", "foo"), + "Hello!" + ) + end + end +end