diff --git a/lib/jekyll/commands/serve.rb b/lib/jekyll/commands/serve.rb index 7357b939..7467977d 100644 --- a/lib/jekyll/commands/serve.rb +++ b/lib/jekyll/commands/serve.rb @@ -194,6 +194,7 @@ module Jekyll :JekyllOptions => opts, :DoNotReverseLookup => true, :MimeTypes => mime_types, + :MimeTypesCharset => mime_types_charset, :DocumentRoot => opts["destination"], :StartCallback => start_callback(opts["detach"]), :StopCallback => stop_callback(opts["detach"]), @@ -353,6 +354,10 @@ module Jekyll WEBrick::HTTPUtils.load_mime_types(file) end + def mime_types_charset + SafeYAML.load_file(File.expand_path("serve/mime_types_charset.json", __dir__)) + end + def read_file(source_dir, file_path) File.read(Jekyll.sanitized_path(source_dir, file_path)) end diff --git a/lib/jekyll/commands/serve/mime_types_charset.json b/lib/jekyll/commands/serve/mime_types_charset.json new file mode 100644 index 00000000..017469b9 --- /dev/null +++ b/lib/jekyll/commands/serve/mime_types_charset.json @@ -0,0 +1,71 @@ +{ + "application/javascript": "UTF-8", + "application/json": "UTF-8", + "application/manifest+json": "UTF-8", + "application/vnd.syncml+xml": "UTF-8", + "application/vnd.syncml.dm+wbxml": "UTF-8", + "application/vnd.syncml.dm+xml": "UTF-8", + "application/vnd.syncml.dmddf+xml": "UTF-8", + "application/vnd.wap.wbxml": "UTF-8", + "text/cache-manifest": "UTF-8", + "text/calendar": "UTF-8", + "text/coffeescript": "UTF-8", + "text/css": "UTF-8", + "text/csv": "UTF-8", + "text/html": "UTF-8", + "text/jade": "UTF-8", + "text/jsx": "UTF-8", + "text/less": "UTF-8", + "text/markdown": "UTF-8", + "text/mathml": "UTF-8", + "text/mdx": "UTF-8", + "text/n3": "UTF-8", + "text/plain": "UTF-8", + "text/prs.lines.tag": "UTF-8", + "text/richtext": "UTF-8", + "text/sgml": "UTF-8", + "text/shex": "UTF-8", + "text/slim": "UTF-8", + "text/spdx": "UTF-8", + "text/stylus": "UTF-8", + "text/tab-separated-values": "UTF-8", + "text/troff": "UTF-8", + "text/turtle": "UTF-8", + "text/uri-list": "UTF-8", + "text/vcard": "UTF-8", + "text/vnd.curl": "UTF-8", + "text/vnd.curl.dcurl": "UTF-8", + "text/vnd.curl.mcurl": "UTF-8", + "text/vnd.curl.scurl": "UTF-8", + "text/vnd.familysearch.gedcom": "UTF-8", + "text/vnd.fly": "UTF-8", + "text/vnd.fmi.flexstor": "UTF-8", + "text/vnd.graphviz": "UTF-8", + "text/vnd.in3d.3dml": "UTF-8", + "text/vnd.in3d.spot": "UTF-8", + "text/vnd.sun.j2me.app-descriptor": "UTF-8", + "text/vnd.wap.wml": "UTF-8", + "text/vnd.wap.wmlscript": "UTF-8", + "text/vtt": "UTF-8", + "text/x-asm": "UTF-8", + "text/x-c": "UTF-8", + "text/x-component": "UTF-8", + "text/x-fortran": "UTF-8", + "text/x-handlebars-template": "UTF-8", + "text/x-java-source": "UTF-8", + "text/x-lua": "UTF-8", + "text/x-markdown": "UTF-8", + "text/x-nfo": "UTF-8", + "text/x-opml": "UTF-8", + "text/x-pascal": "UTF-8", + "text/x-processing": "UTF-8", + "text/x-sass": "UTF-8", + "text/x-scss": "UTF-8", + "text/x-setext": "UTF-8", + "text/x-sfv": "UTF-8", + "text/x-suse-ymp": "UTF-8", + "text/x-uuencode": "UTF-8", + "text/x-vcalendar": "UTF-8", + "text/x-vcard": "UTF-8", + "text/yaml": "UTF-8" +} diff --git a/lib/jekyll/commands/serve/servlet.rb b/lib/jekyll/commands/serve/servlet.rb index cee9c663..d91f871f 100644 --- a/lib/jekyll/commands/serve/servlet.rb +++ b/lib/jekyll/commands/serve/servlet.rb @@ -134,6 +134,7 @@ module Jekyll def initialize(server, root, callbacks) # So we can access them easily. @jekyll_opts = server.config[:JekyllOptions] + @mime_types_charset = server.config[:MimeTypesCharset] set_defaults super end @@ -173,7 +174,7 @@ module Jekyll end end - validate_and_ensure_charset(req, res) + conditionally_inject_charset(res) res.header.merge!(@headers) rtn end @@ -181,13 +182,16 @@ module Jekyll private - def validate_and_ensure_charset(_req, res) - key = res.header.keys.grep(%r!content-type!i).first - typ = res.header[key] + # Inject charset based on Jekyll config only if our mime-types database contains + # the charset metadata. + # + # Refer `script/vendor-mimes` in the repository for further details. + def conditionally_inject_charset(res) + typ = res.header["content-type"] + return unless @mime_types_charset.key?(typ) + return if %r!;\s*charset=!.match?(typ) - unless %r!;\s*charset=!.match?(typ) - res.header[key] = "#{typ}; charset=#{@jekyll_opts["encoding"]}" - end + res.header["content-type"] = "#{typ}; charset=#{@jekyll_opts["encoding"]}" end def set_defaults diff --git a/lib/jekyll/mime.types b/lib/jekyll/mime.types index 0a03f010..10f713c6 100644 --- a/lib/jekyll/mime.types +++ b/lib/jekyll/mime.types @@ -19,6 +19,7 @@ application/cdmi-container cdmic application/cdmi-domain cdmid application/cdmi-object cdmio application/cdmi-queue cdmiq +application/cpl+xml cpl application/cu-seeme cu application/dash+xml mpd application/dash-patch+xml mpp @@ -63,6 +64,7 @@ application/marcxml+xml mrcx application/mathematica ma mb nb application/mathml+xml mathml application/mbox mbox +application/media-policy-dataset+xml mpf application/mediaservercontrol+xml mscml application/metalink+xml metalink application/metalink4+xml meta4 @@ -88,7 +90,8 @@ application/p2p-overlay+xml relo application/patch-ops-error+xml xer application/pdf pdf application/pgp-encrypted pgp -application/pgp-signature asc sig +application/pgp-keys asc +application/pgp-signature sig application/pics-rules prf application/pkcs10 p10 application/pkcs7-mime p7c p7m @@ -529,6 +532,7 @@ application/vnd.zul zir zi application/vnd.zzazz.deck+xml zaz application/voicexml+xml vxml application/wasm wasm +application/watcherinfo+xml wif application/widget wgt application/winhlp hlp application/wsdl+xml wsdl @@ -719,6 +723,8 @@ font/woff woff font/woff2 woff2 image/aces exr image/apng apng +image/avci avci +image/avcs avcs image/avif avif image/bmp bmp image/cgm cgm diff --git a/script/vendor-mimes b/script/vendor-mimes index 4a240f70..0242d539 100755 --- a/script/vendor-mimes +++ b/script/vendor-mimes @@ -2,16 +2,41 @@ # Vendors the MIME type config from the mime-db list # usage: script/vendor-mimes +require 'colorator' require 'json' require 'open-uri' -config = File.expand_path "../lib/jekyll/mime.types", __dir__ +# ---- Helpers ---- -# Create an array of vendored mimetype => [extensions] -mimes = {} -json = URI.open('https://raw.githubusercontent.com/jshttp/mime-db/master/db.json').read +{ + :info => :cyan, + :success => :green, + :error => :red, +}.each do |type, color| + define_method("log_#{type}") do |msg| + puts " #{msg}".send(color) + end +end + +# ---- + +json = begin + log_info "Reading remote data.." + URI.open("https://raw.githubusercontent.com/jshttp/mime-db/master/db.json").read +rescue StandardError => e + log_error "Error reading remote data!" + log_error e.message + log_error "Aborting." + exit 1 +end + +log_info "Parsing remote data.." data = JSON.parse(json) data.reject! { |mime, meta| meta["extensions"].nil? || meta["extensions"].empty? } + +log_info "Generating interim mime data-hashes.." +mimes = {} +charset_data = {} data.each do |mime, meta| # Normalize extensions and mime-types mime = mime.downcase.strip @@ -23,8 +48,15 @@ data.each do |mime, meta| next if extensions.empty? mimes[mime] = [] if mimes[mime].nil? mimes[mime].concat extensions + + # Extract mime-types with "charset" metadata + charset_data[mime] = meta["charset"] if meta.key?("charset") + + # Assign `UTF-8` charset for mime-types under the `text` domain if not already assigned upstream + charset_data[mime] ||= "UTF-8" if mime.start_with?("text/") end +log_info "Formatting primary hash and writing to file.." strlen = mimes.keys.max_by(&:length).length output = "" output << "# Woah there. Do not edit this file directly.\n" @@ -32,4 +64,14 @@ output << "# This file is generated automatically by script/vendor-mimes.\n\n" mimes = mimes.sort_by { |k,v| k } output << mimes.map { |mime,extensions| "#{mime.ljust(strlen)} #{extensions.sort.join(" ")}" }.join("\n") +config = File.expand_path "../lib/jekyll/mime.types", __dir__ File.write(config, output) +log_info "Done! See: #{config.inspect.white}" + +# --- Generate JSON file from charset_data ---- +puts + +log_info "Dumping mimetype-charset mapping as JSON.." +json_file = File.expand_path "../lib/jekyll/commands/serve/mime_types_charset.json", __dir__ +File.write(json_file, JSON.pretty_generate(charset_data) + "\n") +log_success "and done! See: #{json_file.inspect.white}" diff --git a/test/helper.rb b/test/helper.rb index 90adacc1..c40adcba 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -245,6 +245,7 @@ module TestWEBrick :ServerType => Thread, :Logger => WEBrick::Log.new(logger), :AccessLog => [[logger, ""]], + :MimeTypesCharset => Jekyll::Commands::Serve.send(:mime_types_charset), :JekyllOptions => {}, } end