From 2b8ae866c21c309805460e90973bf6ce6820a72b Mon Sep 17 00:00:00 2001 From: Austin Ziegler Date: Thu, 6 Jan 2022 23:06:12 -0500 Subject: [PATCH] Ensure priority sort over alpha sort - Added extension priority map. This is an imperfect solution, and is not used by default with default configuration (column-based data). - We may want to consider a revised columnar format for a future version that has a bit more information than is present in the base file. - Adding the sort priority and extension priority helped, but because the alphanumeric sort was first in `MIME::Type#priority_compare`, the results weren't as good as they should have been. We now sort by the sort priority values _first_ and the alphanumeric values _second_. - Stored sort priority was not respected because it depends on flags not kept in the base file. Added support for a binary file with this to ensure it is loaded. --- lib/mime/type.rb | 31 ++++++++++++++++++++++--------- lib/mime/type/columnar.rb | 11 +++++++++++ lib/mime/types.rb | 4 ++++ lib/mime/types/_columnar.rb | 27 +++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 9 deletions(-) diff --git a/lib/mime/type.rb b/lib/mime/type.rb index 54df240..b977602 100644 --- a/lib/mime/type.rb +++ b/lib/mime/type.rb @@ -188,8 +188,8 @@ def <=>(other) # consumers of mime-types. For the next major version of MIME::Types, this # method will become #<=> and #priority_compare will be removed. def priority_compare(other) - if (cmp = simplified <=> other.simplified).zero? - __sort_priority <=> other.__sort_priority + if (cmp = __sort_priority <=> other.__sort_priority).zero? + simplified <=> other.simplified else cmp end @@ -229,7 +229,7 @@ def hash # The computed sort priority value. This is _not_ intended to be used by most # callers. - def __sort_priority + def __sort_priority # :nodoc: @__sort_priority || update_sort_priority end @@ -324,17 +324,24 @@ def preferred_extension=(value) # :nodoc: end ## - # Optional extension priorities for this MIME type. This is a relative value - # similar to nice(1). An explicitly set `preferred_extension` is automatically - # given a relative priority of `-10`. + # Optional extension priorities for this MIME type. This is a map of + # extensions to relative priority values (+-20..20+) similar to +nice(1)+. + # Unless otherwise specified in the data, an explicitly set + # +preferred_extension+ is automatically given a relative priority of +-10+. # # :attr_reader: extension_priorities attr_accessor :extension_priorities ## # Returns the priority for the provided extension or extensions. If a priority - # is not set, the default priority is 0. The range for priorities is -20..20, - # inclusive. + # is not set, the default priority is +0+. The range for priorities is + # +-20..20+, inclusive. + # + # Obsolete MIME types have a +3 penalty applied to their + # extension priority and unregistered MIME types have a +2 + # penalty to their extension priority, meaning that the highest priority an + # obsolete, unregistered MIME type can have is +-15+. The lowest priority is + # always +20. def extension_priority(*exts) exts.map { |ext| get_extension_priority(ext) }.min end @@ -650,7 +657,7 @@ def clear_extension_priority(ext) end def get_extension_priority(ext) - [[-20, __extension_priorities[ext] || 0].max, 20].min + [[-20, (__extension_priorities[ext] || 0) + __priority_penalty].max, 20].min end def set_preferred_extension_priority(ext) @@ -686,6 +693,12 @@ def update_sort_priority extension_count = [0, 16 - extension_count].max @__sort_priority = obsolete | registered | provisional | complete | extension_count + @__priority_penalty = (@obsolete ? 3 : 0) + (@registered ? 0 : 2) + end + + def __priority_penalty + update_sort_priority if @__priority_penalty.nil? + @__priority_penalty end def content_type=(type_string) diff --git a/lib/mime/type/columnar.rb b/lib/mime/type/columnar.rb index ec4dec2..a51f9d9 100644 --- a/lib/mime/type/columnar.rb +++ b/lib/mime/type/columnar.rb @@ -53,6 +53,17 @@ def encode_with(coder) # :nodoc: super end + def update_sort_priority + if @container.__fully_loaded? + super + else + obsolete = (@__sort_priority & (1 << 7)) != 0 + registered = (@__sort_priority & (1 << 5)) == 0 + + @__priority_penalty = (@obsolete ? 3 : 0) + (@registered ? 0 : 2) + end + end + class << self undef column end diff --git a/lib/mime/types.rb b/lib/mime/types.rb index aec2951..7b16719 100644 --- a/lib/mime/types.rb +++ b/lib/mime/types.rb @@ -204,6 +204,10 @@ def add_type(type, quiet = false) index_extensions!(type) end + def __fully_loaded? # :nodoc: + true + end + private def add_type_variant!(mime_type) diff --git a/lib/mime/types/_columnar.rb b/lib/mime/types/_columnar.rb index 253920c..c191e39 100644 --- a/lib/mime/types/_columnar.rb +++ b/lib/mime/types/_columnar.rb @@ -18,6 +18,10 @@ def self.extended(obj) # :nodoc: obj.instance_variable_set(:@__files__, Set.new) end + def __fully_loaded? # :nodoc: + @__files__.size == 10 + end + # Load the first column data file (type and extensions). def load_base_data(path) # :nodoc: @__root__ = path @@ -33,6 +37,10 @@ def load_base_data(path) # :nodoc: add(type) end + each_file_byte("spri") do |type, byte| + type.instance_variable_set(:@__sort_priority, byte) + end + self end @@ -60,6 +68,25 @@ def each_file_line(name, lookup = true) end end + def each_file_byte(name) + LOAD_MUTEX.synchronize do + next if @__files__.include?(name) + + i = -1 + + filename = File.join(@__root__, "mime.#{name}.column") + + next unless File.exist?(filename) + + IO.binread(filename).unpack("C*").each do |byte| + (type = @__mime_data__[i += 1]) || next + yield type, byte + end + + @__files__ << name + end + end + def load_encoding each_file_line("encoding") do |type, line| pool ||= {}