Add Cache class (#7169)

Merge pull request 7169
This commit is contained in:
Pat Hawks 2018-08-21 02:38:53 -05:00 committed by jekyllbot
parent 30568ca7b8
commit 24dd9f1457
7 changed files with 268 additions and 0 deletions

View File

@ -0,0 +1,87 @@
---
layout: tutorials
permalink: /tutorials/cache-api/
title: Cache API
---
Jekyll includes a caching API, which is used both internally as well as exposed
for plugins, which can be used to cache the output of deterministic functions to
speed up site generation. This cache will be persistent across builds, but
cleared when Jekyll detects any changes to `_config.yml`.
## Jekyll::Cache.new(name) → new_cache
If there has already been a cache created with `name`, this will return a
reference to that existing Cache. Otherwise, create a new Cache called `name`.
If this Cache will be used by a Gem-packaged plugin, `name` should either be the
name of the Gem, or prefixed with the name of the Gem followed by `::` (if a
plugin expects to use multiple Caches). If this Cache will be used internally by
Jekyll, `name` should be the name of the class that is using the Cache (ie:
`"Jekyll::Converters::Markdown"`).
Cached objects are shared between all Caches created with the same `name`, but
are _not_ shared between Caches with different names. There can be an object
stored with key `1` in `Jekyll::Cache.new("a")` and an object stored with key
`1` in `Jekyll::Cache.new("b")` and these will not point to the same cached
object. This way, you do not need to ensure that keys are globally unique.
## getset(key) {block}
This is the most common way to utilize the Cache.
`block` is a bit of code that takes a lot of time to compute, but always
generates the same output given a particular input (like converting Markdown to
HTML). `key` is a `String` (or an object with `to_s`) that uniquely identifies
the input to the function.
If `key` already exists in the Cache, it will be returned and `block` will never
be executed. If `key` does not exist in the Cache, `block` will be executed and
the result will be added to the Cache and returned.
```ruby
def cache
@@cache ||= Jekyll::Cache.new("ConvertMarkdown")
end
def convert_markdown_to_html(markdown)
cache.getset(markdown) do
expensive_conversion_method(markdown)
end
end
```
In the above example, `expensive_conversion_method` will only be called once for
any given `markdown` input. If `convert_markdown_to_html` is called a second
time with the same input, the cached output will be returned.
Because posts will frequently remain unchanged from one build to the next, this
is an effective way to avoid performing the same computations each time the site
is built.
## clear
This will clear all cached objects from a particular Cache. The Cache will be
empty, both in memory and on disk.
### The following methods will probably only be used in special circumstances
## cache[key] → value
Fetches `key` from Cache and returns its `value`. Raises if `key` does not exist
in Cache.
## cache[key] = value
Adds `value` to Cache under `key`.
Returns nothing.
## key?(key) → true or false
Returns `true` if `key` already exists in Cache. False otherwise.
## delete(key)
Removes `key` from Cache.
Returns nothing.

View File

@ -54,6 +54,7 @@ module Jekyll
autoload :FrontmatterDefaults, "jekyll/frontmatter_defaults" autoload :FrontmatterDefaults, "jekyll/frontmatter_defaults"
autoload :Hooks, "jekyll/hooks" autoload :Hooks, "jekyll/hooks"
autoload :Layout, "jekyll/layout" autoload :Layout, "jekyll/layout"
autoload :Cache, "jekyll/cache"
autoload :CollectionReader, "jekyll/readers/collection_reader" autoload :CollectionReader, "jekyll/readers/collection_reader"
autoload :DataReader, "jekyll/readers/data_reader" autoload :DataReader, "jekyll/readers/data_reader"
autoload :LayoutReader, "jekyll/readers/layout_reader" autoload :LayoutReader, "jekyll/readers/layout_reader"

168
lib/jekyll/cache.rb Normal file
View File

@ -0,0 +1,168 @@
# frozen_string_literal: true
require "digest"
module Jekyll
class Cache
# rubocop:disable Style/ClassVars
@@caches = {}
@@disk_cache_enabled = true
# Get an existing named cache, or create a new one if none exists
#
# name - name of the cache
#
# Returns nothing.
def initialize(name)
@@base_dir ||= File.expand_path(".jekyll-cache/Jekyll/Cache")
@cache = @@caches[name] ||= {}
@name = name.gsub(%r![^\w\s-]!, "-")
end
# Disable Marshaling cached items to disk
def self.disable_disk_cache!
@@disk_cache_enabled = false
end
# rubocop:enable Style/ClassVars
# Clear all caches
def self.clear
delete_cache_files
@@caches.each_value(&:clear)
end
# Clear this particular cache
def clear
delete_cache_files
@cache.clear
end
# Retrieve a cached item
# Raises if key does not exist in cache
#
# Returns cached value
def [](key)
return @cache[key] if @cache.key?(key)
path = path_to(hash(key))
if @@disk_cache_enabled && File.file?(path) && File.readable?(path)
@cache[key] = load(path)
else
raise
end
end
# Add an item to cache
#
# Returns nothing.
def []=(key, value)
@cache[key] = value
return unless @@disk_cache_enabled
path = path_to(hash(key))
dump(path, value)
end
# If an item already exists in the cache, retrieve it
# Else execute code block, and add the result to the cache, and return that
# result
def getset(key)
self[key]
rescue StandardError
value = yield
self[key] = value
value
end
# Remove one particular item from the cache
#
# Returns nothing.
def delete(key)
@cache.delete(key)
return unless @@disk_cache_enabled
path = path_to(hash(key))
File.delete(path)
end
# Check if `key` already exists in this cache
#
# Returns true if key exists in the cache, false otherwise
def key?(key)
# First, check if item is already cached in memory
return true if @cache.key?(key)
# Otherwise, it might be cached on disk
# but we should not consider the disk cache if it is disabled
return false unless @@disk_cache_enabled
path = path_to(hash(key))
File.file?(path) && File.readable?(path)
end
# Compare the current config to the cached config
# If they are different, clear all caches
#
# Returns nothing.
def self.clear_if_config_changed(config)
config = config.inspect
cache = Jekyll::Cache.new "Jekyll::Cache"
return if cache.key?("config") && cache["config"] == config
clear
cache = Jekyll::Cache.new "Jekyll::Cache"
cache["config"] = config
nil
end
private
# Given a hashed key, return the path to where this item would be saved on
# disk
def path_to(hash = nil)
@base_dir ||= File.join(@@base_dir, @name)
return @base_dir if hash.nil?
File.join(@base_dir, hash[0..1], hash[2..-1]).freeze
end
# Given a key, return a SHA2 hash that can be used for caching this item to
# disk
def hash(key)
Digest::SHA2.hexdigest(key).freeze
end
# Remove all this caches items from disk
#
# Returns nothing.
def delete_cache_files
FileUtils.rm_rf(path_to) if @@disk_cache_enabled
end
# Delete all cached items from all caches
#
# Returns nothing.
def self.delete_cache_files
FileUtils.rm_rf(@@base_dir) if @@disk_cache_enabled
end
private_class_method :delete_cache_files
# Load `path` from disk and return the result
# This MUST NEVER be called in Safe Mode
# rubocop:disable Security/MarshalLoad
def load(path)
raise unless @@disk_cache_enabled
cached_file = File.open(path, "rb")
value = Marshal.load(cached_file)
cached_file.close
value
end
# rubocop:enable Security/MarshalLoad
# Given a path and a value, save value to disk at path
# This should NEVER be called in Safe Mode
#
# Returns nothing.
def dump(path, value)
return unless @@disk_cache_enabled
dir = File.dirname(path)
FileUtils.mkdir_p(dir)
File.open(path, "wb") do |cached_file|
Marshal.dump(value, cached_file)
end
end
end
end

View File

@ -51,6 +51,7 @@ module Jekyll
# keep using `gems` to avoid breaking change # keep using `gems` to avoid breaking change
self.gems = config["plugins"] self.gems = config["plugins"]
configure_cache
configure_plugins configure_plugins
configure_theme configure_theme
configure_include_paths configure_include_paths
@ -100,6 +101,7 @@ module Jekyll
raise ArgumentError, "limit_posts must be a non-negative number" if limit_posts.negative? raise ArgumentError, "limit_posts must be a non-negative number" if limit_posts.negative?
Jekyll::Cache.clear_if_config_changed config
Jekyll::Hooks.trigger :site, :after_reset, self Jekyll::Hooks.trigger :site, :after_reset, self
end end
@ -421,6 +423,11 @@ module Jekyll
@site_cleaner ||= Cleaner.new(self) @site_cleaner ||= Cleaner.new(self)
end end
# Disable Marshaling cache to disk in Safe Mode
def configure_cache
Jekyll::Cache.disable_disk_cache! if safe
end
def configure_plugins def configure_plugins
self.plugin_manager = Jekyll::PluginManager.new(self) self.plugin_manager = Jekyll::PluginManager.new(self)
self.plugins = plugin_manager.plugins_path self.plugins = plugin_manager.plugins_path

View File

@ -1,4 +1,5 @@
_site _site
.sass-cache .sass-cache
.jekyll-cache
.jekyll-metadata .jekyll-metadata
vendor vendor

View File

@ -1,5 +1,6 @@
*.gem *.gem
.bundle .bundle
.jekyll-cache
.sass-cache .sass-cache
_site _site
Gemfile.lock Gemfile.lock

View File

@ -76,6 +76,9 @@ class TestSite < JekyllUnitTest
allow(File).to receive(:directory?).with(theme_dir("_sass")).and_return(true) allow(File).to receive(:directory?).with(theme_dir("_sass")).and_return(true)
allow(File).to receive(:directory?).with(theme_dir("_layouts")).and_return(true) allow(File).to receive(:directory?).with(theme_dir("_layouts")).and_return(true)
allow(File).to receive(:directory?).with(theme_dir("_includes")).and_return(false) allow(File).to receive(:directory?).with(theme_dir("_includes")).and_return(false)
allow(File).to receive(:directory?).with(
File.expand_path(".jekyll-cache/Jekyll/Cache/Jekyll--Cache")
).and_return(true)
site = fixture_site("theme" => "test-theme") site = fixture_site("theme" => "test-theme")
assert_equal [source_dir("_includes")], site.includes_load_paths assert_equal [source_dir("_includes")], site.includes_load_paths
end end