Problematic UTF+bom files (#6322)

Merge pull request 6322
This commit is contained in:
Frank Taillandier 2017-10-18 05:15:26 +02:00 committed by jekyllbot
parent e0a97b5f12
commit 363bd6c7eb
6 changed files with 53 additions and 4 deletions

View File

@ -444,6 +444,7 @@ module Jekyll
def configure_file_read_opts
self.file_read_opts = {}
self.file_read_opts[:encoding] = config["encoding"] if config["encoding"]
self.file_read_opts = Jekyll::Utils.merged_file_read_opts(self, {})
end
private

View File

@ -301,6 +301,9 @@ module Jekyll
# and a given param
def merged_file_read_opts(site, opts)
merged = (site ? site.file_read_opts : {}).merge(opts)
if merged[:encoding] && !merged[:encoding].start_with?("bom|")
merged[:encoding] = "bom|#{merged[:encoding]}"
end
if merged["encoding"] && !merged["encoding"].start_with?("bom|")
merged["encoding"] = "bom|#{merged["encoding"]}"
end

View File

@ -0,0 +1,11 @@
---
layout: post
title: "UTF8CRLFandBOM"
date: 2017-04-05 16:16:01 -0800
categories: bom
---
This file was created with CR/LFs, and encoded as UTF8 with a BOM
Youll find this post in your `_posts` directory. Go ahead and edit it and re-build the site to see your changes. You can rebuild the site in many different ways, but the most common way is to run `bundle exec jekyll serve`, which launches a web server and auto-regenerates your site when a file is updated.
To add new posts, simply add a file in the `_posts` directory that follows the convention `YYYY-MM-DD-name-of-post.ext` and includes the necessary front matter. Take a look at the source for this post to get an idea about how it works.

Binary file not shown.

View File

@ -7,6 +7,15 @@ class TestDocument < JekyllUnitTest
assert_equal(one[key], other[key])
end
def setup_encoded_document(filename)
site = fixture_site("collections" => ["encodings"])
site.process
Document.new(site.in_source_dir(File.join("_encodings", filename)), {
:site => site,
:collection => site.collections["encodings"],
}).tap(&:read)
end
context "a document in a collection" do
setup do
@site = fixture_site({
@ -529,4 +538,24 @@ class TestDocument < JekyllUnitTest
assert_equal true, File.file?(@dest_file)
end
end
context "a document with UTF-8 CLRF" do
setup do
@document = setup_encoded_document "UTF8CRLFandBOM.md"
end
should "not throw an error" do
Jekyll::Renderer.new(@document.site, @document).render_document
end
end
context "a document with UTF-16LE CLRF" do
setup do
@document = setup_encoded_document "Unicode16LECRLFandBOM.md"
end
should "not throw an error" do
Jekyll::Renderer.new(@document.site, @document).render_document
end
end
end

View File

@ -387,16 +387,21 @@ class TestUtils < JekyllUnitTest
should "ignore encoding if it's not there" do
opts = Utils.merged_file_read_opts(nil, {})
assert_nil opts["encoding"]
assert_nil opts[:encoding]
end
should "add bom to encoding" do
opts = Utils.merged_file_read_opts(nil, { "encoding" => "utf-8" })
assert_equal "bom|utf-8", opts["encoding"]
opts = { "encoding" => "utf-8", :encoding => "utf-8" }
merged = Utils.merged_file_read_opts(nil, opts)
assert_equal "bom|utf-8", merged["encoding"]
assert_equal "bom|utf-8", merged[:encoding]
end
should "preserve bom in encoding" do
opts = Utils.merged_file_read_opts(nil, { "encoding" => "bom|utf-8" })
assert_equal "bom|utf-8", opts["encoding"]
opts = { "encoding" => "bom|another", :encoding => "bom|another" }
merged = Utils.merged_file_read_opts(nil, opts)
assert_equal "bom|another", merged["encoding"]
assert_equal "bom|another", merged[:encoding]
end
end
end