Allow Configurable Converters on CSV (#8858)

Merge pull request 8858
This commit is contained in:
MichaelCordingley 2022-04-01 09:42:01 -04:00 committed by GitHub
parent d4e10d5954
commit 66e337984e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 119 additions and 7 deletions

View File

@ -48,6 +48,9 @@ themes\.googleusercontent\.com/static/fonts/[^/]+/v\d+/[^.]+.
# google_site_verification:
google_site_verification: [-a-zA-Z=;:/0-9+]*
# Ruby-doc.org
https://ruby-doc\.org/.*
# Contributors
alphabetical order.*:.*
twitter_handle: .*

View File

@ -148,3 +148,30 @@ author: dave
{% endraw %}
For information on how to build robust navigation for your site (especially if you have a documentation website or another type of Jekyll site with a lot of pages to organize), see [Navigation]({{ '/tutorials/navigation/' | relative_url }}).
## CSV/TSV Parse Options
The way Ruby parses CSV and TSV files can be customized with the `csv_reader` and `tsv_reader`
configuration options. Each configuration key exposes the same options:
`converters`: What [CSV converters](https://ruby-doc.org/stdlib-2.5.0/libdoc/csv/rdoc/CSV.html#Converters) should be
used when parsing the file. Available options are `integer`, `float`, `numeric`, `date`, `date_time` and
`all`. By default, this list is empty.
`encoding`: What encoding the files are in. Defaults to the site `encoding` configuration option.
`headers`: Boolean field for whether to parse the first line of the file as headers. When `false`, it treats the
first row as data. Defaults to `true`.
Examples:
```yaml
csv_reader:
converters:
- numeric
- datetime
headers: true
encoding: utf-8
tsv_reader:
converters:
- all
headers: false
```

View File

@ -59,14 +59,9 @@ module Jekyll
case File.extname(path).downcase
when ".csv"
CSV.read(path,
:headers => true,
:encoding => site.config["encoding"]).map(&:to_hash)
CSV.read(path, **csv_config).map { |row| convert_row(row) }
when ".tsv"
CSV.read(path,
:col_sep => "\t",
:headers => true,
:encoding => site.config["encoding"]).map(&:to_hash)
CSV.read(path, **tsv_config).map { |row| convert_row(row) }
else
SafeYAML.load_file(path)
end
@ -76,5 +71,43 @@ module Jekyll
name.gsub(%r![^\w\s-]+|(?<=^|\b\s)\s+(?=$|\s?\b)!, "")
.gsub(%r!\s+!, "_")
end
private
# @return [Hash]
def csv_config
@csv_config ||= read_config("csv_reader")
end
# @return [Hash]
def tsv_config
@tsv_config ||= read_config("tsv_reader", { :col_sep => "\t" })
end
# @param config_key [String]
# @param overrides [Hash]
# @return [Hash]
# @see https://ruby-doc.org/stdlib-2.5.0/libdoc/csv/rdoc/CSV.html#Converters
def read_config(config_key, overrides = {})
reader_config = config[config_key] || {}
defaults = {
:converters => reader_config.fetch("csv_converters", []).map(&:to_sym),
:headers => reader_config.fetch("headers", true),
:encoding => reader_config.fetch("encoding", config["encoding"]),
}
defaults.merge(overrides)
end
def config
@config ||= site.config
end
# @param row [Array, CSV::Row]
# @return [Array, Hash]
def convert_row(row)
row.instance_of?(CSV::Row) ? row.to_hash : row
end
end
end

3
test/fixtures/sample.csv vendored Normal file
View File

@ -0,0 +1,3 @@
id,field_a
1,"foo"
2,"bar"
1 id field_a
2 1 foo
3 2 bar

3
test/fixtures/sample.tsv vendored Normal file
View File

@ -0,0 +1,3 @@
id field_a
1 "foo"
2 "bar"
1 id field_a
2 1 foo
3 2 bar

View File

@ -14,4 +14,47 @@ class TestDataReader < JekyllUnitTest
)
end
end
context "with no csv options set" do
setup do
@reader = DataReader.new(fixture_site)
@parsed = [{ "id" => "1", "field_a" => "foo" }, { "id" => "2", "field_a" => "bar" }]
end
should "parse CSV normally" do
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.csv", __dir__))
end
should "parse TSV normally" do
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.tsv", __dir__))
end
end
context "with csv options set" do
setup do
reader_config = {
"csv_converters" => [:numeric],
"headers" => false,
}
@reader = DataReader.new(
fixture_site(
{
"csv_reader" => reader_config,
"tsv_reader" => reader_config,
}
)
)
@parsed = [%w(id field_a), [1, "foo"], [2, "bar"]]
end
should "parse CSV with options" do
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.csv", __dir__))
end
should "parse TSV with options" do
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.tsv", __dir__))
end
end
end