diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index ce9800df..24c5b832 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -6,10 +6,11 @@ module Jekyll autoload :Ansi, "jekyll/utils/ansi" # Constants for use in #slugify - SLUGIFY_MODES = %w(raw default pretty).freeze + SLUGIFY_MODES = %w(raw default pretty ascii).freeze SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze SLUGIFY_DEFAULT_REGEXP = Regexp.new("[^[:alnum:]]+").freeze SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze + SLUGIFY_ASCII_REGEXP = Regexp.new("[^[A-Za-z0-9]]+").freeze # Takes an indented string and removes the preceding spaces on each line @@ -160,6 +161,9 @@ module Jekyll # When mode is "pretty", some non-alphabetic characters (._~!$&'()+,;=@) # are not replaced with hyphen. # + # When mode is "ascii", some everything else except ASCII characters + # a-z (lowercase), A-Z (uppercase) and 0-9 (numbers) are not replaced with hyphen. + # # If cased is true, all uppercase letters in the result string are # replaced with their lowercase counterparts. # @@ -173,6 +177,9 @@ module Jekyll # slugify("The _config.yml file", "pretty", true) # # => "The-_config.yml file" # + # slugify("The _config.yml file", "ascii") + # # => "the-config.yml-file" + # # Returns the slugified string. def slugify(string, mode: nil, cased: false) mode ||= "default" @@ -193,6 +200,11 @@ module Jekyll # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL # and is allowed in both extN and NTFS. SLUGIFY_PRETTY_REGEXP + when 'ascii' + # For web servers not being able to handle Unicode, the safe + # method is to ditch anything else but latin letters and numeric + # digits. + SLUGIFY_ASCII_REGEXP end # Strip according to the mode diff --git a/test/test_utils.rb b/test/test_utils.rb index 7fab0d55..d2cc60ff 100644 --- a/test/test_utils.rb +++ b/test/test_utils.rb @@ -198,6 +198,10 @@ class TestUtils < JekyllUnitTest ) end + should "replace everything else but ASCII characters" do + assert_equal "the-config-yml-file", Utils.slugify("The _config.yml file?", mode: "ascii") + end + should "only replace whitespace if mode is raw" do assert_equal( "the-_config.yml-file?",