Use a Schwartzian transform with custom sorting (#6342)
Merge pull request 6342
This commit is contained in:
parent
1a4f53de14
commit
6ce912e957
|
@ -0,0 +1,115 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
# frozen_string_literal: true
|
||||||
|
#
|
||||||
|
# The Ruby documentation for #sort_by describes what's called a Schwartzian transform:
|
||||||
|
#
|
||||||
|
# > A more efficient technique is to cache the sort keys (modification times in this case)
|
||||||
|
# > before the sort. Perl users often call this approach a Schwartzian transform, after
|
||||||
|
# > Randal Schwartz. We construct a temporary array, where each element is an array
|
||||||
|
# > containing our sort key along with the filename. We sort this array, and then extract
|
||||||
|
# > the filename from the result.
|
||||||
|
# > This is exactly what sort_by does internally.
|
||||||
|
#
|
||||||
|
# The well-documented efficiency of sort_by is a good reason to use it. However, when a property
|
||||||
|
# does not exist on an item being sorted, it can cause issues (no nil's allowed!)
|
||||||
|
# In Jekyll::Filters#sort_input, we extract the property in each iteration of #sort,
|
||||||
|
# which is quite inefficient! How inefficient? This benchmark will tell you just how, and how much
|
||||||
|
# it can be improved by using the Schwartzian transform. Thanks, Randall!
|
||||||
|
|
||||||
|
require 'benchmark/ips'
|
||||||
|
require 'minitest'
|
||||||
|
require File.expand_path("../lib/jekyll", __dir__)
|
||||||
|
|
||||||
|
def site
|
||||||
|
@site ||= Jekyll::Site.new(
|
||||||
|
Jekyll.configuration("source" => File.expand_path("../docs", __dir__))
|
||||||
|
).tap(&:reset).tap(&:read)
|
||||||
|
end
|
||||||
|
|
||||||
|
def site_docs
|
||||||
|
site.collections["docs"].docs.dup
|
||||||
|
end
|
||||||
|
|
||||||
|
def sort_by_property_directly(docs, meta_key)
|
||||||
|
docs.sort! do |apple, orange|
|
||||||
|
apple_property = apple[meta_key]
|
||||||
|
orange_property = orange[meta_key]
|
||||||
|
|
||||||
|
if !apple_property.nil? && !orange_property.nil?
|
||||||
|
apple_property <=> orange_property
|
||||||
|
elsif !apple_property.nil? && orange_property.nil?
|
||||||
|
-1
|
||||||
|
elsif apple_property.nil? && !orange_property.nil?
|
||||||
|
1
|
||||||
|
else
|
||||||
|
apple <=> orange
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def schwartzian_transform(docs, meta_key)
|
||||||
|
docs.collect! { |d|
|
||||||
|
[d[meta_key], d]
|
||||||
|
}.sort! { |apple, orange|
|
||||||
|
if !apple[0].nil? && !orange[0].nil?
|
||||||
|
apple.first <=> orange.first
|
||||||
|
elsif !apple[0].nil? && orange[0].nil?
|
||||||
|
-1
|
||||||
|
elsif apple[0].nil? && !orange[0].nil?
|
||||||
|
1
|
||||||
|
else
|
||||||
|
apple[-1] <=> orange[-1]
|
||||||
|
end
|
||||||
|
}.collect! { |d| d[-1] }
|
||||||
|
end
|
||||||
|
|
||||||
|
# Before we test efficiency, do they produce the same output?
|
||||||
|
class Correctness
|
||||||
|
include Minitest::Assertions
|
||||||
|
|
||||||
|
require "pp"
|
||||||
|
define_method :mu_pp, &:pretty_inspect
|
||||||
|
|
||||||
|
attr_accessor :assertions
|
||||||
|
|
||||||
|
def initialize(docs, property)
|
||||||
|
@assertions = 0
|
||||||
|
@docs = docs
|
||||||
|
@property = property
|
||||||
|
end
|
||||||
|
|
||||||
|
def assert!
|
||||||
|
assert sort_by_property_directly(@docs, @property).is_a?(Array), "sort_by_property_directly must return an array"
|
||||||
|
assert schwartzian_transform(@docs, @property).is_a?(Array), "schwartzian_transform must return an array"
|
||||||
|
assert_equal sort_by_property_directly(@docs, @property),
|
||||||
|
schwartzian_transform(@docs, @property)
|
||||||
|
puts "Yeah, ok, correctness all checks out for property #{@property.inspect}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
Correctness.new(site_docs, "redirect_from".freeze).assert!
|
||||||
|
Correctness.new(site_docs, "title".freeze).assert!
|
||||||
|
|
||||||
|
# First, test with a property only a handful of documents have.
|
||||||
|
Benchmark.ips do |x|
|
||||||
|
x.config(time: 10, warmup: 5)
|
||||||
|
x.report('sort_by_property_directly with sparse property') do
|
||||||
|
sort_by_property_directly(site_docs, "redirect_from".freeze)
|
||||||
|
end
|
||||||
|
x.report('schwartzian_transform with sparse property') do
|
||||||
|
schwartzian_transform(site_docs, "redirect_from".freeze)
|
||||||
|
end
|
||||||
|
x.compare!
|
||||||
|
end
|
||||||
|
|
||||||
|
# Next, test with a property they all have.
|
||||||
|
Benchmark.ips do |x|
|
||||||
|
x.config(time: 10, warmup: 5)
|
||||||
|
x.report('sort_by_property_directly with non-sparse property') do
|
||||||
|
sort_by_property_directly(site_docs, "title".freeze)
|
||||||
|
end
|
||||||
|
x.report('schwartzian_transform with non-sparse property') do
|
||||||
|
schwartzian_transform(site_docs, "title".freeze)
|
||||||
|
end
|
||||||
|
x.compare!
|
||||||
|
end
|
|
@ -335,19 +335,26 @@ module Jekyll
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
def sort_input(input, property, order)
|
|
||||||
input.sort do |apple, orange|
|
|
||||||
apple_property = item_property(apple, property)
|
|
||||||
orange_property = item_property(orange, property)
|
|
||||||
|
|
||||||
if !apple_property.nil? && orange_property.nil?
|
# Sort the input Enumerable by the given property.
|
||||||
- order
|
# If the property doesn't exist, return the sort order respective of
|
||||||
elsif apple_property.nil? && !orange_property.nil?
|
# which item doesn't have the property.
|
||||||
+ order
|
# We also utilize the Schwartzian transform to make this more efficient.
|
||||||
else
|
def sort_input(input, property, order)
|
||||||
apple_property <=> orange_property
|
input.map { |item| [item_property(item, property), item] }
|
||||||
|
.sort! do |apple_info, orange_info|
|
||||||
|
apple_property = apple_info.first
|
||||||
|
orange_property = orange_info.first
|
||||||
|
|
||||||
|
if !apple_property.nil? && orange_property.nil?
|
||||||
|
- order
|
||||||
|
elsif apple_property.nil? && !orange_property.nil?
|
||||||
|
+ order
|
||||||
|
else
|
||||||
|
apple_property <=> orange_property
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
.map!(&:last)
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
Loading…
Reference in New Issue