Add latin mode to slugify (#6509)

Merge pull request 6509
This commit is contained in:
Alex Tsui 2017-11-02 20:07:25 -07:00 committed by jekyllbot
parent 53d48d52e7
commit 93e3eb06d2
5 changed files with 67 additions and 22 deletions

View File

@ -293,6 +293,18 @@ you come up with your own tags via plugins.
<p>
<code class="output">the-_config.yml-file</code>
</p>
<p>
<code class="filter">{% raw %}{{ "The _cönfig.yml file" | slugify: 'ascii' }}{% endraw %}</code>
</p>
<p>
<code class="output">the-c-nfig-yml-file</code>
</p>
<p>
<code class="filter">{% raw %}{{ "The cönfig.yml file" | slugify: 'latin' }}{% endraw %}</code>
</p>
<p>
<code class="output">the-config-yml-file</code>
</p>
</td>
</tr>
<tr>
@ -416,6 +428,8 @@ The default is `default`. They are as follows (with what they filter):
- `raw`: spaces
- `default`: spaces and non-alphanumeric characters
- `pretty`: spaces and non-alphanumeric characters except for `._~!$&'()+,;=@`
- `ascii`: spaces, non-alphanumeric, and non-ASCII characters
- `latin`: like `default`, except Latin characters are first transliterated (e.g. `àèïòü` to `aeiou`)
## Tags

View File

@ -32,6 +32,7 @@ Gem::Specification.new do |s|
s.add_runtime_dependency("addressable", "~> 2.4")
s.add_runtime_dependency("colorator", "~> 1.0")
s.add_runtime_dependency("i18n", "~> 0.7")
s.add_runtime_dependency("jekyll-sass-converter", "~> 1.0")
s.add_runtime_dependency("jekyll-watch", "~> 1.1")
s.add_runtime_dependency("kramdown", "~> 1.14")

View File

@ -32,8 +32,10 @@ require "safe_yaml/load"
require "liquid"
require "kramdown"
require "colorator"
require "i18n"
SafeYAML::OPTIONS[:suppress_warnings] = true
I18n.config.available_locales = :en
module Jekyll
# internal requires

View File

@ -1,4 +1,3 @@
# frozen_string_literal: true
module Jekyll
@ -12,7 +11,7 @@ module Jekyll
autoload :WinTZ, "jekyll/utils/win_tz"
# Constants for use in #slugify
SLUGIFY_MODES = %w(raw default pretty ascii).freeze
SLUGIFY_MODES = %w(raw default pretty ascii latin).freeze
SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze
SLUGIFY_DEFAULT_REGEXP = Regexp.new("[^[:alnum:]]+").freeze
SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze
@ -170,6 +169,10 @@ module Jekyll
# When mode is "ascii", some everything else except ASCII characters
# a-z (lowercase), A-Z (uppercase) and 0-9 (numbers) are not replaced with hyphen.
#
# When mode is "latin", the input string is first preprocessed so that
# any letters with accents are replaced with the plain letter. Afterwards,
# it follows the "default" mode of operation.
#
# If cased is true, all uppercase letters in the result string are
# replaced with their lowercase counterparts.
#
@ -184,7 +187,10 @@ module Jekyll
# # => "The-_config.yml file"
#
# slugify("The _config.yml file", "ascii")
# # => "the-config.yml-file"
# # => "the-config-yml-file"
#
# slugify("The _config.yml file", "latin")
# # => "the-config-yml-file"
#
# Returns the slugified string.
def slugify(string, mode: nil, cased: false)
@ -195,26 +201,10 @@ module Jekyll
return cased ? string : string.downcase
end
# Replace each character sequence with a hyphen
re =
case mode
when "raw"
SLUGIFY_RAW_REGEXP
when "default"
SLUGIFY_DEFAULT_REGEXP
when "pretty"
# "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL
# and is allowed in both extN and NTFS.
SLUGIFY_PRETTY_REGEXP
when "ascii"
# For web servers not being able to handle Unicode, the safe
# method is to ditch anything else but latin letters and numeric
# digits.
SLUGIFY_ASCII_REGEXP
end
# Drop accent marks from latin characters. Everything else turns to ?
string = ::I18n.transliterate(string) if mode == "latin"
# Strip according to the mode
slug = string.gsub(re, "-")
slug = replace_character_sequence_with_hyphen(string, :mode => mode)
# Remove leading/trailing hyphen
slug.gsub!(%r!^\-|\-$!i, "")
@ -337,5 +327,32 @@ module Jekyll
target[key] = val.dup if val.frozen? && duplicable?(val)
end
end
# Replace each character sequence with a hyphen.
#
# See Utils#slugify for a description of the character sequence specified
# by each mode.
private
def replace_character_sequence_with_hyphen(string, mode: "default")
replaceable_char =
case mode
when "raw"
SLUGIFY_RAW_REGEXP
when "pretty"
# "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL
# and is allowed in both extN and NTFS.
SLUGIFY_PRETTY_REGEXP
when "ascii"
# For web servers not being able to handle Unicode, the safe
# method is to ditch anything else but latin letters and numeric
# digits.
SLUGIFY_ASCII_REGEXP
else
SLUGIFY_DEFAULT_REGEXP
end
# Strip according to the mode
string.gsub(replaceable_char, "-")
end
end
end

View File

@ -207,6 +207,17 @@ class TestUtils < JekyllUnitTest
Utils.slugify("fürtive glance!!!!", :mode => "ascii")
end
should "map accented latin characters to ASCII characters" do
assert_equal "the-config-yml-file",
Utils.slugify("The _config.yml file?", :mode => "latin")
assert_equal "furtive-glance",
Utils.slugify("fürtive glance!!!!", :mode => "latin")
assert_equal "aaceeiioouu",
Utils.slugify("àáçèéíïòóúü", :mode => "latin")
assert_equal "a-z",
Utils.slugify("Aあわれ鬱господинZ", :mode => "latin")
end
should "only replace whitespace if mode is raw" do
assert_equal(
"the-_config.yml-file?",