diff --git a/docs/_docs/templates.md b/docs/_docs/templates.md index 38e55717..81eb5b2a 100644 --- a/docs/_docs/templates.md +++ b/docs/_docs/templates.md @@ -293,6 +293,18 @@ you come up with your own tags via plugins.

the-_config.yml-file

+

+ {% raw %}{{ "The _cönfig.yml file" | slugify: 'ascii' }}{% endraw %} +

+

+ the-c-nfig-yml-file +

+

+ {% raw %}{{ "The cönfig.yml file" | slugify: 'latin' }}{% endraw %} +

+

+ the-config-yml-file +

@@ -416,6 +428,8 @@ The default is `default`. They are as follows (with what they filter): - `raw`: spaces - `default`: spaces and non-alphanumeric characters - `pretty`: spaces and non-alphanumeric characters except for `._~!$&'()+,;=@` +- `ascii`: spaces, non-alphanumeric, and non-ASCII characters +- `latin`: like `default`, except Latin characters are first transliterated (e.g. `àèïòü` to `aeiou`) ## Tags diff --git a/jekyll.gemspec b/jekyll.gemspec index 74bcd8e3..98fd5382 100644 --- a/jekyll.gemspec +++ b/jekyll.gemspec @@ -32,6 +32,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency("addressable", "~> 2.4") s.add_runtime_dependency("colorator", "~> 1.0") + s.add_runtime_dependency("i18n", "~> 0.7") s.add_runtime_dependency("jekyll-sass-converter", "~> 1.0") s.add_runtime_dependency("jekyll-watch", "~> 1.1") s.add_runtime_dependency("kramdown", "~> 1.14") diff --git a/lib/jekyll.rb b/lib/jekyll.rb index cdcab8bd..733796cc 100644 --- a/lib/jekyll.rb +++ b/lib/jekyll.rb @@ -32,8 +32,10 @@ require "safe_yaml/load" require "liquid" require "kramdown" require "colorator" +require "i18n" SafeYAML::OPTIONS[:suppress_warnings] = true +I18n.config.available_locales = :en module Jekyll # internal requires diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index 558a7586..55cf9be1 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -1,4 +1,3 @@ - # frozen_string_literal: true module Jekyll @@ -12,7 +11,7 @@ module Jekyll autoload :WinTZ, "jekyll/utils/win_tz" # Constants for use in #slugify - SLUGIFY_MODES = %w(raw default pretty ascii).freeze + SLUGIFY_MODES = %w(raw default pretty ascii latin).freeze SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze SLUGIFY_DEFAULT_REGEXP = Regexp.new("[^[:alnum:]]+").freeze SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze @@ -170,6 +169,10 @@ module Jekyll # When mode is "ascii", some everything else except ASCII characters # a-z (lowercase), A-Z (uppercase) and 0-9 (numbers) are not replaced with hyphen. # + # When mode is "latin", the input string is first preprocessed so that + # any letters with accents are replaced with the plain letter. Afterwards, + # it follows the "default" mode of operation. + # # If cased is true, all uppercase letters in the result string are # replaced with their lowercase counterparts. # @@ -184,7 +187,10 @@ module Jekyll # # => "The-_config.yml file" # # slugify("The _config.yml file", "ascii") - # # => "the-config.yml-file" + # # => "the-config-yml-file" + # + # slugify("The _config.yml file", "latin") + # # => "the-config-yml-file" # # Returns the slugified string. def slugify(string, mode: nil, cased: false) @@ -195,26 +201,10 @@ module Jekyll return cased ? string : string.downcase end - # Replace each character sequence with a hyphen - re = - case mode - when "raw" - SLUGIFY_RAW_REGEXP - when "default" - SLUGIFY_DEFAULT_REGEXP - when "pretty" - # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL - # and is allowed in both extN and NTFS. - SLUGIFY_PRETTY_REGEXP - when "ascii" - # For web servers not being able to handle Unicode, the safe - # method is to ditch anything else but latin letters and numeric - # digits. - SLUGIFY_ASCII_REGEXP - end + # Drop accent marks from latin characters. Everything else turns to ? + string = ::I18n.transliterate(string) if mode == "latin" - # Strip according to the mode - slug = string.gsub(re, "-") + slug = replace_character_sequence_with_hyphen(string, :mode => mode) # Remove leading/trailing hyphen slug.gsub!(%r!^\-|\-$!i, "") @@ -337,5 +327,32 @@ module Jekyll target[key] = val.dup if val.frozen? && duplicable?(val) end end + + # Replace each character sequence with a hyphen. + # + # See Utils#slugify for a description of the character sequence specified + # by each mode. + private + def replace_character_sequence_with_hyphen(string, mode: "default") + replaceable_char = + case mode + when "raw" + SLUGIFY_RAW_REGEXP + when "pretty" + # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL + # and is allowed in both extN and NTFS. + SLUGIFY_PRETTY_REGEXP + when "ascii" + # For web servers not being able to handle Unicode, the safe + # method is to ditch anything else but latin letters and numeric + # digits. + SLUGIFY_ASCII_REGEXP + else + SLUGIFY_DEFAULT_REGEXP + end + + # Strip according to the mode + string.gsub(replaceable_char, "-") + end end end diff --git a/test/test_utils.rb b/test/test_utils.rb index 771f2ade..844ef825 100644 --- a/test/test_utils.rb +++ b/test/test_utils.rb @@ -207,6 +207,17 @@ class TestUtils < JekyllUnitTest Utils.slugify("fürtive glance!!!!", :mode => "ascii") end + should "map accented latin characters to ASCII characters" do + assert_equal "the-config-yml-file", + Utils.slugify("The _config.yml file?", :mode => "latin") + assert_equal "furtive-glance", + Utils.slugify("fürtive glance!!!!", :mode => "latin") + assert_equal "aaceeiioouu", + Utils.slugify("àáçèéíïòóúü", :mode => "latin") + assert_equal "a-z", + Utils.slugify("Aあわれ鬱господинZ", :mode => "latin") + end + should "only replace whitespace if mode is raw" do assert_equal( "the-_config.yml-file?",