diff --git a/.github/actions/spelling/patterns.txt b/.github/actions/spelling/patterns.txt index ae5c5e2a..341825df 100644 --- a/.github/actions/spelling/patterns.txt +++ b/.github/actions/spelling/patterns.txt @@ -48,6 +48,9 @@ themes\.googleusercontent\.com/static/fonts/[^/]+/v\d+/[^.]+. # google_site_verification: google_site_verification: [-a-zA-Z=;:/0-9+]* +# Ruby-doc.org +https://ruby-doc\.org/.* + # Contributors alphabetical order.*:.* twitter_handle: .* diff --git a/docs/_docs/datafiles.md b/docs/_docs/datafiles.md index 2140a4c3..c78b376a 100644 --- a/docs/_docs/datafiles.md +++ b/docs/_docs/datafiles.md @@ -148,3 +148,30 @@ author: dave {% endraw %} For information on how to build robust navigation for your site (especially if you have a documentation website or another type of Jekyll site with a lot of pages to organize), see [Navigation]({{ '/tutorials/navigation/' | relative_url }}). + +## CSV/TSV Parse Options + +The way Ruby parses CSV and TSV files can be customized with the `csv_reader` and `tsv_reader` +configuration options. Each configuration key exposes the same options: + +`converters`: What [CSV converters](https://ruby-doc.org/stdlib-2.5.0/libdoc/csv/rdoc/CSV.html#Converters) should be + used when parsing the file. Available options are `integer`, `float`, `numeric`, `date`, `date_time` and + `all`. By default, this list is empty. +`encoding`: What encoding the files are in. Defaults to the site `encoding` configuration option. +`headers`: Boolean field for whether to parse the first line of the file as headers. When `false`, it treats the + first row as data. Defaults to `true`. + +Examples: + +```yaml +csv_reader: + converters: + - numeric + - datetime + headers: true + encoding: utf-8 +tsv_reader: + converters: + - all + headers: false +``` diff --git a/lib/jekyll/readers/data_reader.rb b/lib/jekyll/readers/data_reader.rb index d34076d0..80b57bd6 100644 --- a/lib/jekyll/readers/data_reader.rb +++ b/lib/jekyll/readers/data_reader.rb @@ -59,14 +59,9 @@ module Jekyll case File.extname(path).downcase when ".csv" - CSV.read(path, - :headers => true, - :encoding => site.config["encoding"]).map(&:to_hash) + CSV.read(path, **csv_config).map { |row| convert_row(row) } when ".tsv" - CSV.read(path, - :col_sep => "\t", - :headers => true, - :encoding => site.config["encoding"]).map(&:to_hash) + CSV.read(path, **tsv_config).map { |row| convert_row(row) } else SafeYAML.load_file(path) end @@ -76,5 +71,43 @@ module Jekyll name.gsub(%r![^\w\s-]+|(?<=^|\b\s)\s+(?=$|\s?\b)!, "") .gsub(%r!\s+!, "_") end + + private + + # @return [Hash] + def csv_config + @csv_config ||= read_config("csv_reader") + end + + # @return [Hash] + def tsv_config + @tsv_config ||= read_config("tsv_reader", { :col_sep => "\t" }) + end + + # @param config_key [String] + # @param overrides [Hash] + # @return [Hash] + # @see https://ruby-doc.org/stdlib-2.5.0/libdoc/csv/rdoc/CSV.html#Converters + def read_config(config_key, overrides = {}) + reader_config = config[config_key] || {} + + defaults = { + :converters => reader_config.fetch("csv_converters", []).map(&:to_sym), + :headers => reader_config.fetch("headers", true), + :encoding => reader_config.fetch("encoding", config["encoding"]), + } + + defaults.merge(overrides) + end + + def config + @config ||= site.config + end + + # @param row [Array, CSV::Row] + # @return [Array, Hash] + def convert_row(row) + row.instance_of?(CSV::Row) ? row.to_hash : row + end end end diff --git a/test/fixtures/sample.csv b/test/fixtures/sample.csv new file mode 100644 index 00000000..b4b36e6d --- /dev/null +++ b/test/fixtures/sample.csv @@ -0,0 +1,3 @@ +id,field_a +1,"foo" +2,"bar" diff --git a/test/fixtures/sample.tsv b/test/fixtures/sample.tsv new file mode 100644 index 00000000..c467a88a --- /dev/null +++ b/test/fixtures/sample.tsv @@ -0,0 +1,3 @@ +id field_a +1 "foo" +2 "bar" diff --git a/test/test_data_reader.rb b/test/test_data_reader.rb index 038ec679..0c0fd247 100644 --- a/test/test_data_reader.rb +++ b/test/test_data_reader.rb @@ -14,4 +14,47 @@ class TestDataReader < JekyllUnitTest ) end end + + context "with no csv options set" do + setup do + @reader = DataReader.new(fixture_site) + @parsed = [{ "id" => "1", "field_a" => "foo" }, { "id" => "2", "field_a" => "bar" }] + end + + should "parse CSV normally" do + assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.csv", __dir__)) + end + + should "parse TSV normally" do + assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.tsv", __dir__)) + end + end + + context "with csv options set" do + setup do + reader_config = { + "csv_converters" => [:numeric], + "headers" => false, + } + + @reader = DataReader.new( + fixture_site( + { + "csv_reader" => reader_config, + "tsv_reader" => reader_config, + } + ) + ) + + @parsed = [%w(id field_a), [1, "foo"], [2, "bar"]] + end + + should "parse CSV with options" do + assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.csv", __dir__)) + end + + should "parse TSV with options" do + assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.tsv", __dir__)) + end + end end