From eebb6414bf9dbcb3e02ed11b22cde3e6f96b7f4f Mon Sep 17 00:00:00 2001 From: nitoyon Date: Wed, 6 Nov 2013 00:29:27 +0900 Subject: [PATCH 1/3] Fix Post#url escape Post#url was escaped using CGI.escape. When file name contains a space character, its url points to non-existing URL. For example, when we have a post named '2014-01-02-foo bar.md', we expect its url to be '/2014/01/02/foo%20bar.html', but it was actually '/2014/01/02/foo+bar.html'. We now define Jekyll::URL.escape_path and Jekyll::URL.unescape_path, and use them to escape and unescape Post#url --- lib/jekyll/post.rb | 6 +-- lib/jekyll/url.rb | 40 +++++++++++++++++++ .../_posts/2014-03-22-escape-+ %20[].markdown | 6 +++ test/test_generated_site.rb | 2 +- test/test_post.rb | 29 +++++++++++++- 5 files changed, 78 insertions(+), 5 deletions(-) create mode 100644 test/source/_posts/2014-03-22-escape-+ %20[].markdown diff --git a/lib/jekyll/post.rb b/lib/jekyll/post.rb index 180faa27..05a5cb8f 100644 --- a/lib/jekyll/post.rb +++ b/lib/jekyll/post.rb @@ -208,10 +208,10 @@ module Jekyll :year => date.strftime("%Y"), :month => date.strftime("%m"), :day => date.strftime("%d"), - :title => CGI.escape(slug), + :title => URL.escape_path(slug), :i_day => date.strftime("%d").to_i.to_s, :i_month => date.strftime("%m").to_i.to_s, - :categories => (categories || []).map { |c| URI.escape(c.to_s) }.join('/'), + :categories => (categories || []).map { |c| URL.escape_path(c.to_s) }.join('/'), :short_month => date.strftime("%b"), :y_day => date.strftime("%j"), :output_ext => output_ext @@ -260,7 +260,7 @@ module Jekyll # Returns destination file path String. def destination(dest) # The url needs to be unescaped in order to preserve the correct filename - path = Jekyll.sanitized_path(dest, CGI.unescape(url)) + path = Jekyll.sanitized_path(dest, URL.unescape_path(url)) path = File.join(path, "index.html") if path[/\.html$/].nil? path end diff --git a/lib/jekyll/url.rb b/lib/jekyll/url.rb index 813b9c87..509c94cb 100644 --- a/lib/jekyll/url.rb +++ b/lib/jekyll/url.rb @@ -1,3 +1,5 @@ +require 'uri' + # Public: Methods that generate a URL for a resource such as a Post or a Page. # # Examples @@ -65,5 +67,43 @@ module Jekyll url end + + # Escapes a path to be a valid URL path segment + # + # path - The path to be escaped. + # + # Examples: + # + # URL.escape_path("/a b") + # # => "/a%20b" + # + # Returns the escaped path. + def self.escape_path(path) + # Because URI.escape doesn't escape '?', '[' and ']' by defaut, + # specify unsafe string (except unreserved, sub-delims, ":", "@" and "/"). + # + # URI path segment is defined in RFC 3986 as follows: + # segment = *pchar + # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + # pct-encoded = "%" HEXDIG HEXDIG + # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + # / "*" / "+" / "," / ";" / "=" + URI.escape(path, /[^a-zA-Z\d\-._~!$&\'()*+,;=:@\/]/) + end + + # Unescapes a URL path segment + # + # path - The path to be unescaped. + # + # Examples: + # + # URL.unescape_path("/a%20b") + # # => "/a b" + # + # Returns the unescaped path. + def self.unescape_path(path) + URI.unescape(path) + end end end diff --git a/test/source/_posts/2014-03-22-escape-+ %20[].markdown b/test/source/_posts/2014-03-22-escape-+ %20[].markdown new file mode 100644 index 00000000..abe120b1 --- /dev/null +++ b/test/source/_posts/2014-03-22-escape-+ %20[].markdown @@ -0,0 +1,6 @@ +--- +layout: default +title: Plus space percent +--- + +Signs are nice diff --git a/test/test_generated_site.rb b/test/test_generated_site.rb index 5059dd12..d960e743 100644 --- a/test/test_generated_site.rb +++ b/test/test_generated_site.rb @@ -14,7 +14,7 @@ class TestGeneratedSite < Test::Unit::TestCase end should "ensure post count is as expected" do - assert_equal 39, @site.posts.size + assert_equal 40, @site.posts.size end should "insert site.posts into the index" do diff --git a/test/test_post.rb b/test/test_post.rb index 7d4ab599..2d3900ca 100644 --- a/test/test_post.rb +++ b/test/test_post.rb @@ -86,13 +86,20 @@ class TestPost < Test::Unit::TestCase end end - should "CGI escape urls" do + should "escape urls" do @post.categories = [] @post.process("2009-03-12-hash-#1.markdown") assert_equal "/2009/03/12/hash-%231.html", @post.url assert_equal "/2009/03/12/hash-#1", @post.id end + should "escape urls with non-alphabetic characters" do + @post.categories = [] + @post.process("2014-03-22-escape-+ %20[].markdown") + assert_equal "/2014/03/22/escape-+%20%2520%5B%5D.html", @post.url + assert_equal "/2014/03/22/escape-+ %20[]", @post.id + end + should "respect permalink in yaml front matter" do file = "2008-12-03-permalinked-post.textile" @post.process(file) @@ -521,6 +528,26 @@ class TestPost < Test::Unit::TestCase assert File.exists?(File.join(dest_dir, '2008', '10', '18', 'foo-bar.html')) end + should "write properly when url has hash" do + post = setup_post("2009-03-12-hash-#1.markdown") + do_render(post) + post.write(dest_dir) + + assert File.directory?(dest_dir) + assert File.exists?(File.join(dest_dir, '2009', '03', '12', + 'hash-#1.html')) + end + + should "write properly when url has space" do + post = setup_post("2014-03-22-escape-+ %20[].markdown") + do_render(post) + post.write(dest_dir) + + assert File.directory?(dest_dir) + assert File.exists?(File.join(dest_dir, '2014', '03', '22', + 'escape-+ %20[].html')) + end + should "write properly without html extension" do post = setup_post("2008-10-18-foo-bar.textile") post.site.permalink_style = ":title" From e3e1c11509e20a8fdeed14d3a4d8ba78bc202c34 Mon Sep 17 00:00:00 2001 From: nitoyon Date: Wed, 6 Nov 2013 00:29:50 +0900 Subject: [PATCH 2/3] Fix Page#url escape Post#url wasn't escaped at all. For example, when we have a page named 'a#b.html', we expect its url to be 'a%23b.html', but it was actually 'a#b.html'. We now use Jekyll::URL.escape_path and Jekyll::URL.unescape_path. --- lib/jekyll/page.rb | 6 +++--- test/source/+/%# +.md | 6 ++++++ test/test_filters.rb | 2 +- test/test_page.rb | 14 ++++++++++++++ test/test_site.rb | 1 + 5 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 test/source/+/%# +.md diff --git a/lib/jekyll/page.rb b/lib/jekyll/page.rb index 0c71c7d1..fa85a250 100644 --- a/lib/jekyll/page.rb +++ b/lib/jekyll/page.rb @@ -86,8 +86,8 @@ module Jekyll # desired placeholder replacements. For details see "url.rb" def url_placeholders { - :path => @dir, - :basename => basename, + :path => URL.escape_path(@dir), + :basename => URL.escape_path(basename), :output_ext => output_ext } end @@ -135,7 +135,7 @@ module Jekyll # # Returns the destination file path String. def destination(dest) - path = Jekyll.sanitized_path(dest, url) + path = Jekyll.sanitized_path(dest, URL.unescape_path(url)) path = File.join(path, "index.html") if url =~ /\/$/ path end diff --git a/test/source/+/%# +.md b/test/source/+/%# +.md new file mode 100644 index 00000000..a850f4dc --- /dev/null +++ b/test/source/+/%# +.md @@ -0,0 +1,6 @@ +--- +layout: default +title : Page name with non-alphabetic character +--- +Line 1 +{{ page.title }} diff --git a/test/test_filters.rb b/test/test_filters.rb index 9a514229..8c6e0259 100644 --- a/test/test_filters.rb +++ b/test/test_filters.rb @@ -125,7 +125,7 @@ class TestFilters < Test::Unit::TestCase case g["name"] when "default" assert g["items"].is_a?(Array), "The list of grouped items for 'default' is not an Array." - assert_equal 4, g["items"].size + assert_equal 5, g["items"].size when "nil" assert g["items"].is_a?(Array), "The list of grouped items for 'nil' is not an Array." assert_equal 2, g["items"].size diff --git a/test/test_page.rb b/test/test_page.rb index 8e60922f..90f68c96 100644 --- a/test/test_page.rb +++ b/test/test_page.rb @@ -30,6 +30,11 @@ class TestPage < Test::Unit::TestCase assert_equal false, @page.published? end + should "create url with non-alphabetic characters" do + @page = setup_page('+', '%# +.md') + assert_equal "/+/%25%23%20+.html", @page.url + end + context "in a directory hierarchy" do should "create url based on filename" do @page = setup_page('/contacts', 'bar.html') @@ -174,6 +179,15 @@ class TestPage < Test::Unit::TestCase assert File.exists?(File.join(dest_dir, '+', 'plus+in+url')) end + should "write even when permalink has '%# +'" do + page = setup_page('+', '%# +.md') + do_render(page) + page.write(dest_dir) + + assert File.directory?(dest_dir) + assert File.exists?(File.join(dest_dir, '+', '%# +.html')) + end + should "write properly without html extension" do page = setup_page('contacts.html') page.site.permalink_style = :pretty diff --git a/test/test_site.rb b/test/test_site.rb index c7ba23f9..f166c5a5 100644 --- a/test/test_site.rb +++ b/test/test_site.rb @@ -159,6 +159,7 @@ class TestSite < Test::Unit::TestCase @site.process # files in symlinked directories may appear twice sorted_pages = %w( + %#\ +.md .htaccess about.html bar.html From c56ce248c982e256c91bcc0abbf0509e5c7d2857 Mon Sep 17 00:00:00 2001 From: nitoyon Date: Fri, 21 Mar 2014 16:21:41 +0000 Subject: [PATCH 3/3] Move URL escape to Jekyll::URL --- lib/jekyll/page.rb | 4 ++-- lib/jekyll/post.rb | 4 ++-- lib/jekyll/url.rb | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/jekyll/page.rb b/lib/jekyll/page.rb index fa85a250..5f768cc6 100644 --- a/lib/jekyll/page.rb +++ b/lib/jekyll/page.rb @@ -86,8 +86,8 @@ module Jekyll # desired placeholder replacements. For details see "url.rb" def url_placeholders { - :path => URL.escape_path(@dir), - :basename => URL.escape_path(basename), + :path => @dir, + :basename => basename, :output_ext => output_ext } end diff --git a/lib/jekyll/post.rb b/lib/jekyll/post.rb index 05a5cb8f..2308a33d 100644 --- a/lib/jekyll/post.rb +++ b/lib/jekyll/post.rb @@ -208,10 +208,10 @@ module Jekyll :year => date.strftime("%Y"), :month => date.strftime("%m"), :day => date.strftime("%d"), - :title => URL.escape_path(slug), + :title => slug, :i_day => date.strftime("%d").to_i.to_s, :i_month => date.strftime("%m").to_i.to_s, - :categories => (categories || []).map { |c| URL.escape_path(c.to_s) }.join('/'), + :categories => (categories || []).map { |c| c.to_s }.join('/'), :short_month => date.strftime("%b"), :y_day => date.strftime("%j"), :output_ext => output_ext diff --git a/lib/jekyll/url.rb b/lib/jekyll/url.rb index 509c94cb..66b4412d 100644 --- a/lib/jekyll/url.rb +++ b/lib/jekyll/url.rb @@ -46,7 +46,7 @@ module Jekyll # Returns the _unsanitizied_ String URL def generate_url @placeholders.inject(@template) do |result, token| - result.gsub(/:#{token.first}/, token.last) + result.gsub(/:#{token.first}/, self.class.escape_path(token.last)) end end