From 36dc8c21698b853a08cb4e0c3927b04c55f67b1d Mon Sep 17 00:00:00 2001 From: Jussi Kinnula Date: Fri, 18 Mar 2016 11:13:06 +0200 Subject: [PATCH 1/7] Add urlsafe method for slugify --- lib/jekyll/utils.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index 179981ec..5140dc52 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -10,6 +10,7 @@ module Jekyll SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze SLUGIFY_DEFAULT_REGEXP = Regexp.new('[^[:alnum:]]+').freeze SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze + SLUGIFY_URLSAFE_REGEXP = Regexp.new("[^[A-Za-z0-9]]+").freeze # Takes an indented string and removes the preceding spaces on each line @@ -190,6 +191,11 @@ module Jekyll # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL # and is allowed in both extN and NTFS. SLUGIFY_PRETTY_REGEXP + when 'urlsafe' + # For web servers not being able to handle Unicode, the safe + # method is to ditch anything else but latin letters and numeric + # digits. + SLUGIFY_URLSAFE_REGEXP end # Strip according to the mode From c0dec091a2e8575f91362f5d479897bea8e48321 Mon Sep 17 00:00:00 2001 From: Jussi Kinnula Date: Fri, 18 Mar 2016 17:14:54 +0200 Subject: [PATCH 2/7] Add urlsafe to accepted slugify modes --- lib/jekyll/utils.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index 5140dc52..50890243 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -6,7 +6,7 @@ module Jekyll autoload :Ansi, "jekyll/utils/ansi" # Constants for use in #slugify - SLUGIFY_MODES = %w(raw default pretty) + SLUGIFY_MODES = %w(raw default pretty urlsafe) SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze SLUGIFY_DEFAULT_REGEXP = Regexp.new('[^[:alnum:]]+').freeze SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze From e823ac518064c200b334c5f1a2bb0b85f5fe67d6 Mon Sep 17 00:00:00 2001 From: Jussi Kinnula Date: Tue, 29 Mar 2016 09:07:12 +0300 Subject: [PATCH 3/7] Rename urlsafe to ascii, and document it (on utils.rb) --- lib/jekyll/utils.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index 50890243..c3efa7bb 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -10,7 +10,7 @@ module Jekyll SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze SLUGIFY_DEFAULT_REGEXP = Regexp.new('[^[:alnum:]]+').freeze SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze - SLUGIFY_URLSAFE_REGEXP = Regexp.new("[^[A-Za-z0-9]]+").freeze + SLUGIFY_ASCII_REGEXP = Regexp.new("[^[A-Za-z0-9]]+").freeze # Takes an indented string and removes the preceding spaces on each line @@ -158,6 +158,9 @@ module Jekyll # When mode is "pretty", some non-alphabetic characters (._~!$&'()+,;=@) # are not replaced with hyphen. # + # When mode is "ascii", some everything else except ASCII characters + # a-z (lowercase), A-Z (uppercase) and 0-9 (numbers) are not replaced with hyphen. + # # If cased is true, all uppercase letters in the result string are # replaced with their lowercase counterparts. # @@ -171,6 +174,9 @@ module Jekyll # slugify("The _config.yml file", "pretty", true) # # => "The-_config.yml file" # + # slugify("The _config.yml file", "ascii") + # # => "the-config.yml-file" + # # Returns the slugified string. def slugify(string, mode: nil, cased: false) mode ||= 'default' From 487631e935ce83d4a636d0083d6202697ed7fb7c Mon Sep 17 00:00:00 2001 From: Jussi Kinnula Date: Tue, 29 Mar 2016 09:07:30 +0300 Subject: [PATCH 4/7] Add tests for ascii slugify mode --- test/test_utils.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/test_utils.rb b/test/test_utils.rb index f102b7a6..8d5ba7a8 100644 --- a/test/test_utils.rb +++ b/test/test_utils.rb @@ -182,6 +182,10 @@ class TestUtils < JekyllUnitTest assert_equal "the-_config.yml-file", Utils.slugify("The _config.yml file?", mode: "pretty") end + should "replace everything else but ASCII characters" do + assert_equal "the-config.yml-file", Utils.slugify("The _config.yml file?", mode: "ascii") + end + should "only replace whitespace if mode is raw" do assert_equal "the-_config.yml-file?", Utils.slugify("The _config.yml file?", mode: "raw") end From 764a2c1b39b9b63bc72d9954e8a0b6e462ffd05e Mon Sep 17 00:00:00 2001 From: Jussi Kinnula Date: Tue, 29 Mar 2016 09:46:42 +0300 Subject: [PATCH 5/7] Change urlsafe to ascii also when actually slugifying --- lib/jekyll/utils.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index c3efa7bb..d4ece1a3 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -197,11 +197,11 @@ module Jekyll # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL # and is allowed in both extN and NTFS. SLUGIFY_PRETTY_REGEXP - when 'urlsafe' + when 'ascii' # For web servers not being able to handle Unicode, the safe # method is to ditch anything else but latin letters and numeric # digits. - SLUGIFY_URLSAFE_REGEXP + SLUGIFY_ASCII_REGEXP end # Strip according to the mode From 4c65772c4455404af65d988cd5e2873961598a59 Mon Sep 17 00:00:00 2001 From: Jussi Kinnula Date: Tue, 29 Mar 2016 10:01:08 +0300 Subject: [PATCH 6/7] One final "urlsafe" replaced with "ascii" --- lib/jekyll/utils.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index d4ece1a3..876126ea 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -6,7 +6,7 @@ module Jekyll autoload :Ansi, "jekyll/utils/ansi" # Constants for use in #slugify - SLUGIFY_MODES = %w(raw default pretty urlsafe) + SLUGIFY_MODES = %w(raw default pretty ascii) SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze SLUGIFY_DEFAULT_REGEXP = Regexp.new('[^[:alnum:]]+').freeze SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze From 4effe25cc113f08d9b23eaaff6fea1a7065c72f3 Mon Sep 17 00:00:00 2001 From: Jussi Kinnula Date: Tue, 29 Mar 2016 10:11:47 +0300 Subject: [PATCH 7/7] Fix slugify test --- test/test_utils.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_utils.rb b/test/test_utils.rb index 8d5ba7a8..debae0f4 100644 --- a/test/test_utils.rb +++ b/test/test_utils.rb @@ -183,7 +183,7 @@ class TestUtils < JekyllUnitTest end should "replace everything else but ASCII characters" do - assert_equal "the-config.yml-file", Utils.slugify("The _config.yml file?", mode: "ascii") + assert_equal "the-config-yml-file", Utils.slugify("The _config.yml file?", mode: "ascii") end should "only replace whitespace if mode is raw" do