diff --git a/docs/_docs/templates.md b/docs/_docs/templates.md
index 38e55717..81eb5b2a 100644
--- a/docs/_docs/templates.md
+++ b/docs/_docs/templates.md
@@ -293,6 +293,18 @@ you come up with your own tags via plugins.
the-_config.yml-file
+
+ {% raw %}{{ "The _cönfig.yml file" | slugify: 'ascii' }}{% endraw %}
+
+
+ the-c-nfig-yml-file
+
+
+ {% raw %}{{ "The cönfig.yml file" | slugify: 'latin' }}{% endraw %}
+
+
+ the-config-yml-file
+
@@ -416,6 +428,8 @@ The default is `default`. They are as follows (with what they filter):
- `raw`: spaces
- `default`: spaces and non-alphanumeric characters
- `pretty`: spaces and non-alphanumeric characters except for `._~!$&'()+,;=@`
+- `ascii`: spaces, non-alphanumeric, and non-ASCII characters
+- `latin`: like `default`, except Latin characters are first transliterated (e.g. `àèïòü` to `aeiou`)
## Tags
diff --git a/jekyll.gemspec b/jekyll.gemspec
index 74bcd8e3..98fd5382 100644
--- a/jekyll.gemspec
+++ b/jekyll.gemspec
@@ -32,6 +32,7 @@ Gem::Specification.new do |s|
s.add_runtime_dependency("addressable", "~> 2.4")
s.add_runtime_dependency("colorator", "~> 1.0")
+ s.add_runtime_dependency("i18n", "~> 0.7")
s.add_runtime_dependency("jekyll-sass-converter", "~> 1.0")
s.add_runtime_dependency("jekyll-watch", "~> 1.1")
s.add_runtime_dependency("kramdown", "~> 1.14")
diff --git a/lib/jekyll.rb b/lib/jekyll.rb
index cdcab8bd..733796cc 100644
--- a/lib/jekyll.rb
+++ b/lib/jekyll.rb
@@ -32,8 +32,10 @@ require "safe_yaml/load"
require "liquid"
require "kramdown"
require "colorator"
+require "i18n"
SafeYAML::OPTIONS[:suppress_warnings] = true
+I18n.config.available_locales = :en
module Jekyll
# internal requires
diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb
index 558a7586..55cf9be1 100644
--- a/lib/jekyll/utils.rb
+++ b/lib/jekyll/utils.rb
@@ -1,4 +1,3 @@
-
# frozen_string_literal: true
module Jekyll
@@ -12,7 +11,7 @@ module Jekyll
autoload :WinTZ, "jekyll/utils/win_tz"
# Constants for use in #slugify
- SLUGIFY_MODES = %w(raw default pretty ascii).freeze
+ SLUGIFY_MODES = %w(raw default pretty ascii latin).freeze
SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze
SLUGIFY_DEFAULT_REGEXP = Regexp.new("[^[:alnum:]]+").freeze
SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze
@@ -170,6 +169,10 @@ module Jekyll
# When mode is "ascii", some everything else except ASCII characters
# a-z (lowercase), A-Z (uppercase) and 0-9 (numbers) are not replaced with hyphen.
#
+ # When mode is "latin", the input string is first preprocessed so that
+ # any letters with accents are replaced with the plain letter. Afterwards,
+ # it follows the "default" mode of operation.
+ #
# If cased is true, all uppercase letters in the result string are
# replaced with their lowercase counterparts.
#
@@ -184,7 +187,10 @@ module Jekyll
# # => "The-_config.yml file"
#
# slugify("The _config.yml file", "ascii")
- # # => "the-config.yml-file"
+ # # => "the-config-yml-file"
+ #
+ # slugify("The _config.yml file", "latin")
+ # # => "the-config-yml-file"
#
# Returns the slugified string.
def slugify(string, mode: nil, cased: false)
@@ -195,26 +201,10 @@ module Jekyll
return cased ? string : string.downcase
end
- # Replace each character sequence with a hyphen
- re =
- case mode
- when "raw"
- SLUGIFY_RAW_REGEXP
- when "default"
- SLUGIFY_DEFAULT_REGEXP
- when "pretty"
- # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL
- # and is allowed in both extN and NTFS.
- SLUGIFY_PRETTY_REGEXP
- when "ascii"
- # For web servers not being able to handle Unicode, the safe
- # method is to ditch anything else but latin letters and numeric
- # digits.
- SLUGIFY_ASCII_REGEXP
- end
+ # Drop accent marks from latin characters. Everything else turns to ?
+ string = ::I18n.transliterate(string) if mode == "latin"
- # Strip according to the mode
- slug = string.gsub(re, "-")
+ slug = replace_character_sequence_with_hyphen(string, :mode => mode)
# Remove leading/trailing hyphen
slug.gsub!(%r!^\-|\-$!i, "")
@@ -337,5 +327,32 @@ module Jekyll
target[key] = val.dup if val.frozen? && duplicable?(val)
end
end
+
+ # Replace each character sequence with a hyphen.
+ #
+ # See Utils#slugify for a description of the character sequence specified
+ # by each mode.
+ private
+ def replace_character_sequence_with_hyphen(string, mode: "default")
+ replaceable_char =
+ case mode
+ when "raw"
+ SLUGIFY_RAW_REGEXP
+ when "pretty"
+ # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL
+ # and is allowed in both extN and NTFS.
+ SLUGIFY_PRETTY_REGEXP
+ when "ascii"
+ # For web servers not being able to handle Unicode, the safe
+ # method is to ditch anything else but latin letters and numeric
+ # digits.
+ SLUGIFY_ASCII_REGEXP
+ else
+ SLUGIFY_DEFAULT_REGEXP
+ end
+
+ # Strip according to the mode
+ string.gsub(replaceable_char, "-")
+ end
end
end
diff --git a/test/test_utils.rb b/test/test_utils.rb
index 771f2ade..844ef825 100644
--- a/test/test_utils.rb
+++ b/test/test_utils.rb
@@ -207,6 +207,17 @@ class TestUtils < JekyllUnitTest
Utils.slugify("fürtive glance!!!!", :mode => "ascii")
end
+ should "map accented latin characters to ASCII characters" do
+ assert_equal "the-config-yml-file",
+ Utils.slugify("The _config.yml file?", :mode => "latin")
+ assert_equal "furtive-glance",
+ Utils.slugify("fürtive glance!!!!", :mode => "latin")
+ assert_equal "aaceeiioouu",
+ Utils.slugify("àáçèéíïòóúü", :mode => "latin")
+ assert_equal "a-z",
+ Utils.slugify("Aあわれ鬱господинZ", :mode => "latin")
+ end
+
should "only replace whitespace if mode is raw" do
assert_equal(
"the-_config.yml-file?",