From 9932eb667bf9ceb543ff274ddf0d5ced6ee6b4e4 Mon Sep 17 00:00:00 2001 From: Alberto Grespan Date: Sat, 17 May 2014 17:53:40 -0430 Subject: [PATCH 1/6] Encode URLs in utf-8 when escaping and unescaping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a problem while returning a path that has some special and possible Non-ASCII characters that may lead jekyll to break while doing the unescaping process. This is can be addressed by “forcing” ASCII to UTF-8. --- lib/jekyll/url.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/jekyll/url.rb b/lib/jekyll/url.rb index 8cd47242..f4ea4d33 100644 --- a/lib/jekyll/url.rb +++ b/lib/jekyll/url.rb @@ -89,7 +89,7 @@ module Jekyll # pct-encoded = "%" HEXDIG HEXDIG # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" # / "*" / "+" / "," / ";" / "=" - URI.escape(path, /[^a-zA-Z\d\-._~!$&\'()*+,;=:@\/]/) + URI.escape(path, /[^a-zA-Z\d\-._~!$&\'()*+,;=:@\/]/).encode('utf-8') end # Unescapes a URL path segment @@ -103,7 +103,7 @@ module Jekyll # # Returns the unescaped path. def self.unescape_path(path) - URI.unescape(path) + URI.unescape(path.encode('utf-8')) end end end From 2f3390750d50fc9cfecbd11f48e5226b4cc3077e Mon Sep 17 00:00:00 2001 From: Alberto Grespan Date: Mon, 19 May 2014 18:45:47 -0430 Subject: [PATCH 2/6] Add tests to validate encoding of URLs Added tests to validate the encoding of returned URL strings after been escaped or unescaped. --- test/test_post.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test_post.rb b/test/test_post.rb index e3fa9535..daa0a038 100644 --- a/test/test_post.rb +++ b/test/test_post.rb @@ -100,6 +100,14 @@ class TestPost < Test::Unit::TestCase assert_equal "/2014/03/22/escape-+ %20[]", @post.id end + should "return a UTF-8 escaped string" do + assert_equal Encoding::UTF_8, URL.escape_path("/rails笔记/2014/04/20/escaped/").encoding + end + + should "return a UTF-8 unescaped string" do + assert_equal Encoding::UTF_8, URL.unescape_path("/rails%E7%AC%94%E8%AE%B0/2014/04/20/escaped/").encoding + end + should "respect permalink in yaml front matter" do file = "2008-12-03-permalinked-post.textile" @post.process(file) From 77cef764d6d77e94b3e2dddaa542f5286c44eb70 Mon Sep 17 00:00:00 2001 From: Alberto Grespan Date: Mon, 19 May 2014 18:50:25 -0430 Subject: [PATCH 3/6] Force encoding before escaping or unescaping This will reassure not having any errors when escaping or unescaping. --- lib/jekyll/url.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/jekyll/url.rb b/lib/jekyll/url.rb index f4ea4d33..31186d45 100644 --- a/lib/jekyll/url.rb +++ b/lib/jekyll/url.rb @@ -89,7 +89,7 @@ module Jekyll # pct-encoded = "%" HEXDIG HEXDIG # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" # / "*" / "+" / "," / ";" / "=" - URI.escape(path, /[^a-zA-Z\d\-._~!$&\'()*+,;=:@\/]/).encode('utf-8') + URI.escape(path.force_encoding('utf-8'), /[^a-zA-Z\d\-._~!$&\'()*+,;=:@\/]/).encode('utf-8') end # Unescapes a URL path segment @@ -103,7 +103,7 @@ module Jekyll # # Returns the unescaped path. def self.unescape_path(path) - URI.unescape(path.encode('utf-8')) + URI.unescape(path.force_encoding('utf-8')) end end end From 2a1054b1a8f6eadcaf84cae1caf821a324c008a2 Mon Sep 17 00:00:00 2001 From: Alberto Grespan Date: Mon, 19 May 2014 22:08:14 -0430 Subject: [PATCH 4/6] Change test to make it pass in Ruby 1.9.3 --- test/test_post.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_post.rb b/test/test_post.rb index daa0a038..c52232cd 100644 --- a/test/test_post.rb +++ b/test/test_post.rb @@ -101,11 +101,11 @@ class TestPost < Test::Unit::TestCase end should "return a UTF-8 escaped string" do - assert_equal Encoding::UTF_8, URL.escape_path("/rails笔记/2014/04/20/escaped/").encoding + assert_equal Encoding::UTF_8, URL.escape_path("/2014/04/20/escaped/").encoding end should "return a UTF-8 unescaped string" do - assert_equal Encoding::UTF_8, URL.unescape_path("/rails%E7%AC%94%E8%AE%B0/2014/04/20/escaped/").encoding + assert_equal Encoding::UTF_8, URL.unescape_path("/2014/04/20/escaped/").encoding end should "respect permalink in yaml front matter" do From 7ce849a2b6fca299627849560ff36902321c51a5 Mon Sep 17 00:00:00 2001 From: Alberto Grespan Date: Tue, 20 May 2014 00:03:40 -0430 Subject: [PATCH 5/6] Revert changes and add encoding to the test file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add encoding to the test file as Ruby 1.9.3 doesn’t defaults to utf-8. * Remove the forced encoding as encode seems too aggressive. --- lib/jekyll/url.rb | 4 ++-- test/test_post.rb | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/jekyll/url.rb b/lib/jekyll/url.rb index 31186d45..f4ea4d33 100644 --- a/lib/jekyll/url.rb +++ b/lib/jekyll/url.rb @@ -89,7 +89,7 @@ module Jekyll # pct-encoded = "%" HEXDIG HEXDIG # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" # / "*" / "+" / "," / ";" / "=" - URI.escape(path.force_encoding('utf-8'), /[^a-zA-Z\d\-._~!$&\'()*+,;=:@\/]/).encode('utf-8') + URI.escape(path, /[^a-zA-Z\d\-._~!$&\'()*+,;=:@\/]/).encode('utf-8') end # Unescapes a URL path segment @@ -103,7 +103,7 @@ module Jekyll # # Returns the unescaped path. def self.unescape_path(path) - URI.unescape(path.force_encoding('utf-8')) + URI.unescape(path.encode('utf-8')) end end end diff --git a/test/test_post.rb b/test/test_post.rb index c52232cd..46d1fa7e 100644 --- a/test/test_post.rb +++ b/test/test_post.rb @@ -1,3 +1,5 @@ +# encoding: utf-8 + require 'helper' class TestPost < Test::Unit::TestCase @@ -101,11 +103,11 @@ class TestPost < Test::Unit::TestCase end should "return a UTF-8 escaped string" do - assert_equal Encoding::UTF_8, URL.escape_path("/2014/04/20/escaped/").encoding + assert_equal Encoding::UTF_8, URL.escape_path("/rails笔记/2014/04/20/escaped/").encoding end should "return a UTF-8 unescaped string" do - assert_equal Encoding::UTF_8, URL.unescape_path("/2014/04/20/escaped/").encoding + assert_equal Encoding::UTF_8, URL.unescape_path("/rails%E7%AC%94%E8%AE%B0/2014/04/20/escaped/").encoding end should "respect permalink in yaml front matter" do From 879184fe37f2bb631bafe632530ab05e3760dba2 Mon Sep 17 00:00:00 2001 From: Alberto Grespan Date: Tue, 20 May 2014 17:04:00 -0430 Subject: [PATCH 6/6] Update unescaped string test Nothing was being tested without explicitly making the string encoding ASCII. --- test/test_post.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_post.rb b/test/test_post.rb index 46d1fa7e..17f937f1 100644 --- a/test/test_post.rb +++ b/test/test_post.rb @@ -107,7 +107,7 @@ class TestPost < Test::Unit::TestCase end should "return a UTF-8 unescaped string" do - assert_equal Encoding::UTF_8, URL.unescape_path("/rails%E7%AC%94%E8%AE%B0/2014/04/20/escaped/").encoding + assert_equal Encoding::UTF_8, URL.unescape_path("/rails%E7%AC%94%E8%AE%B0/2014/04/20/escaped/".encode(Encoding::ASCII)).encoding end should "respect permalink in yaml front matter" do