Skip to content

Commit b99a1a5

Browse files
authored
Correctly parse HTTP accept headers. (#75)
* Add support for `accept-charset` header. * Add support for `accept-encoding` header. * Add support for `accept-language` header. * Add support for `accept` header.
1 parent 99294c5 commit b99a1a5

13 files changed

+740
-2
lines changed

lib/protocol/http/header/accept.rb

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# frozen_string_literal: true
2+
3+
# Released under the MIT License.
4+
# Copyright, 2025, by Samuel Williams.
5+
6+
require_relative "split"
7+
require_relative "quoted_string"
8+
require_relative "../error"
9+
10+
module Protocol
11+
module HTTP
12+
module Header
13+
# The `accept-content-type` header represents a list of content-types that the client can accept.
14+
class Accept < Array
15+
# Regular expression used to split values on commas, with optional surrounding whitespace, taking into account quoted strings.
16+
SEPARATOR = /
17+
(?: # Start non-capturing group
18+
"[^"\\]*" # Match quoted strings (no escaping of quotes within)
19+
| # OR
20+
[^,"]+ # Match non-quoted strings until a comma or quote
21+
)+
22+
(?=,|\z) # Match until a comma or end of string
23+
/x
24+
25+
ParseError = Class.new(Error)
26+
27+
MEDIA_RANGE = /\A(?<type>#{TOKEN})\/(?<subtype>#{TOKEN})(?<parameters>.*)\z/
28+
29+
PARAMETER = /\s*;\s*(?<key>#{TOKEN})=((?<value>#{TOKEN})|(?<quoted_value>#{QUOTED_STRING}))/
30+
31+
# A single entry in the Accept: header, which includes a mime type and associated parameters. A media range can include wild cards, but a media type is a specific type and subtype.
32+
MediaRange = Struct.new(:type, :subtype, :parameters) do
33+
def initialize(type, subtype = "*", parameters = {})
34+
super(type, subtype, parameters)
35+
end
36+
37+
def <=> other
38+
other.quality_factor <=> self.quality_factor
39+
end
40+
41+
def parameters_string
42+
return "" if parameters == nil or parameters.empty?
43+
44+
parameters.collect do |key, value|
45+
";#{key.to_s}=#{QuotedString.quote(value.to_s)}"
46+
end.join
47+
end
48+
49+
def === other
50+
if other.is_a? self.class
51+
super
52+
else
53+
return self.range_string === other
54+
end
55+
end
56+
57+
def range_string
58+
"#{type}/#{subtype}"
59+
end
60+
61+
def to_s
62+
"#{type}/#{subtype}#{parameters_string}"
63+
end
64+
65+
alias to_str to_s
66+
67+
def quality_factor
68+
parameters.fetch("q", 1.0).to_f
69+
end
70+
71+
def split(*args)
72+
return [type, subtype]
73+
end
74+
end
75+
76+
# Parse the `accept` header value into a list of content types.
77+
#
78+
# @parameter value [String] the value of the header.
79+
def initialize(value = nil)
80+
if value
81+
super(value.scan(SEPARATOR).map(&:strip))
82+
end
83+
end
84+
85+
# Adds one or more comma-separated values to the header.
86+
#
87+
# The input string is split into distinct entries and appended to the array.
88+
#
89+
# @parameter value [String] the value or values to add, separated by commas.
90+
def << (value)
91+
self.concat(value.scan(SEPARATOR).map(&:strip))
92+
end
93+
94+
# Serializes the stored values into a comma-separated string.
95+
#
96+
# @returns [String] the serialized representation of the header values.
97+
def to_s
98+
join(",")
99+
end
100+
101+
# Parse the `accept` header.
102+
#
103+
# @returns [Array(Charset)] the list of content types and their associated parameters.
104+
def media_ranges
105+
self.map do |value|
106+
self.parse_media_range(value)
107+
end
108+
end
109+
110+
private
111+
112+
def parse_media_range(value)
113+
if match = value.match(MEDIA_RANGE)
114+
type = match[:type]
115+
subtype = match[:subtype]
116+
parameters = {}
117+
118+
match[:parameters].scan(PARAMETER) do |key, value, quoted_value|
119+
if quoted_value
120+
value = QuotedString.unquote(quoted_value)
121+
end
122+
123+
parameters[key] = value
124+
end
125+
126+
return MediaRange.new(type, subtype, parameters)
127+
else
128+
raise ParseError, "Invalid media type: #{value.inspect}"
129+
end
130+
end
131+
end
132+
end
133+
end
134+
end
+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# frozen_string_literal: true
2+
3+
# Released under the MIT License.
4+
# Copyright, 2025, by Samuel Williams.
5+
6+
require_relative "split"
7+
require_relative "quoted_string"
8+
require_relative "../error"
9+
10+
module Protocol
11+
module HTTP
12+
module Header
13+
# The `accept-charset` header represents a list of character sets that the client can accept.
14+
class AcceptCharset < Split
15+
ParseError = Class.new(Error)
16+
17+
# https://tools.ietf.org/html/rfc7231#section-5.3.3
18+
CHARSET = /\A(?<name>#{TOKEN})(;q=(?<q>#{QVALUE}))?\z/
19+
20+
Charset = Struct.new(:name, :q) do
21+
def quality_factor
22+
(q || 1.0).to_f
23+
end
24+
25+
def <=> other
26+
other.quality_factor <=> self.quality_factor
27+
end
28+
end
29+
30+
# Parse the `accept-charset` header value into a list of character sets.
31+
#
32+
# @returns [Array(Charset)] the list of character sets and their associated quality factors.
33+
def charsets
34+
self.map do |value|
35+
if match = value.match(CHARSET)
36+
Charset.new(match[:name], match[:q])
37+
else
38+
raise ParseError.new("Could not parse character set: #{value.inspect}")
39+
end
40+
end
41+
end
42+
end
43+
end
44+
end
45+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# frozen_string_literal: true
2+
3+
# Released under the MIT License.
4+
# Copyright, 2025, by Samuel Williams.
5+
6+
require_relative "split"
7+
require_relative "quoted_string"
8+
require_relative "../error"
9+
10+
module Protocol
11+
module HTTP
12+
module Header
13+
# The `accept-encoding` header represents a list of encodings that the client can accept.
14+
class AcceptEncoding < Split
15+
ParseError = Class.new(Error)
16+
17+
# https://tools.ietf.org/html/rfc7231#section-5.3.1
18+
QVALUE = /0(\.[0-9]{0,3})?|1(\.[0]{0,3})?/
19+
20+
# https://tools.ietf.org/html/rfc7231#section-5.3.4
21+
ENCODING = /\A(?<name>#{TOKEN})(;q=(?<q>#{QVALUE}))?\z/
22+
23+
Encoding = Struct.new(:name, :q) do
24+
def quality_factor
25+
(q || 1.0).to_f
26+
end
27+
28+
def <=> other
29+
other.quality_factor <=> self.quality_factor
30+
end
31+
end
32+
33+
# Parse the `accept-encoding` header value into a list of encodings.
34+
#
35+
# @returns [Array(Charset)] the list of character sets and their associated quality factors.
36+
def encodings
37+
self.map do |value|
38+
if match = value.match(ENCODING)
39+
Encoding.new(match[:name], match[:q])
40+
else
41+
raise ParseError.new("Could not parse encoding: #{value.inspect}")
42+
end
43+
end
44+
end
45+
end
46+
end
47+
end
48+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# frozen_string_literal: true
2+
3+
# Released under the MIT License.
4+
# Copyright, 2025, by Samuel Williams.
5+
6+
require_relative "split"
7+
require_relative "quoted_string"
8+
require_relative "../error"
9+
10+
module Protocol
11+
module HTTP
12+
module Header
13+
# The `accept-language` header represents a list of languages that the client can accept.
14+
class AcceptLanguage < Split
15+
ParseError = Class.new(Error)
16+
17+
# https://tools.ietf.org/html/rfc3066#section-2.1
18+
NAME = /\*|[A-Z]{1,8}(-[A-Z0-9]{1,8})*/i
19+
20+
# https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.9
21+
QVALUE = /0(\.[0-9]{0,6})?|1(\.[0]{0,6})?/
22+
23+
# https://greenbytes.de/tech/webdav/rfc7231.html#quality.values
24+
LANGUAGE = /\A(?<name>#{NAME})(\s*;\s*q=(?<q>#{QVALUE}))?\z/
25+
26+
Language = Struct.new(:name, :q) do
27+
def quality_factor
28+
(q || 1.0).to_f
29+
end
30+
31+
def <=> other
32+
other.quality_factor <=> self.quality_factor
33+
end
34+
end
35+
36+
# Parse the `accept-language` header value into a list of languages.
37+
#
38+
# @returns [Array(Charset)] the list of character sets and their associated quality factors.
39+
def languages
40+
self.map do |value|
41+
if match = value.match(LANGUAGE)
42+
Language.new(match[:name], match[:q])
43+
else
44+
raise ParseError.new("Could not parse language: #{value.inspect}")
45+
end
46+
end
47+
end
48+
end
49+
end
50+
end
51+
end
+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# frozen_string_literal: true
2+
3+
# Released under the MIT License.
4+
# Copyright, 2025, by Samuel Williams.
5+
6+
module Protocol
7+
module HTTP
8+
module Header
9+
# According to https://tools.ietf.org/html/rfc7231#appendix-C
10+
TOKEN = /[!#$%&'*+\-.^_`|~0-9A-Z]+/i
11+
12+
QUOTED_STRING = /"(?:.(?!(?<!\\)"))*.?"/
13+
14+
# https://tools.ietf.org/html/rfc7231#section-5.3.1
15+
QVALUE = /0(\.[0-9]{0,3})?|1(\.[0]{0,3})?/
16+
17+
# Handling of HTTP quoted strings.
18+
module QuotedString
19+
# Unquote a "quoted-string" value according to <https://tools.ietf.org/html/rfc7230#section-3.2.6>. It should already match the QUOTED_STRING pattern above by the parser.
20+
def self.unquote(value, normalize_whitespace = true)
21+
value = value[1...-1]
22+
23+
value.gsub!(/\\(.)/, '\1')
24+
25+
if normalize_whitespace
26+
# LWS = [CRLF] 1*( SP | HT )
27+
value.gsub!(/[\r\n]+\s+/, " ")
28+
end
29+
30+
return value
31+
end
32+
33+
QUOTES_REQUIRED = /[()<>@,;:\\"\/\[\]?={} \t]/
34+
35+
# Quote a string for HTTP header values if required.
36+
#
37+
# @raises [ArgumentError] if the value contains invalid characters like control characters or newlines.
38+
def self.quote(value, force = false)
39+
# Check if quoting is required:
40+
if value =~ QUOTES_REQUIRED or force
41+
"\"#{value.gsub(/["\\]/, '\\\\\0')}\""
42+
else
43+
value
44+
end
45+
end
46+
end
47+
end
48+
end
49+
end

lib/protocol/http/header/split.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def initialize(value = nil)
3030
#
3131
# @parameter value [String] the value or values to add, separated by commas.
3232
def << value
33-
self.push(*value.split(COMMA))
33+
self.concat(value.split(COMMA))
3434
end
3535

3636
# Serializes the stored values into a comma-separated string.

lib/protocol/http/headers.rb

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# frozen_string_literal: true
22

33
# Released under the MIT License.
4-
# Copyright, 2018-2024, by Samuel Williams.
4+
# Copyright, 2018-2025, by Samuel Williams.
55

66
require_relative "header/split"
77
require_relative "header/multiple"
8+
89
require_relative "header/cookie"
910
require_relative "header/connection"
1011
require_relative "header/cache_control"
@@ -15,6 +16,11 @@
1516
require_relative "header/date"
1617
require_relative "header/priority"
1718

19+
require_relative "header/accept"
20+
require_relative "header/accept_charset"
21+
require_relative "header/accept_encoding"
22+
require_relative "header/accept_language"
23+
1824
module Protocol
1925
module HTTP
2026
# @namespace
@@ -277,6 +283,12 @@ def []= key, value
277283
"last-modified" => Header::Date,
278284
"if-modified-since" => Header::Date,
279285
"if-unmodified-since" => Header::Date,
286+
287+
# Accept headers:
288+
"accept" => Header::Accept,
289+
"accept-charset" => Header::AcceptCharset,
290+
"accept-encoding" => Header::AcceptEncoding,
291+
"accept-language" => Header::AcceptLanguage,
280292
}.tap{|hash| hash.default = Split}
281293

282294
# Delete all header values for the given key, and return the merged value.

releases.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Releases
22

3+
## Unreleased
4+
5+
- Add support for parsing `accept`, `accept-charset`, `accept-encoding` and `accept-language` headers into structured values.
6+
37
## v0.46.0
48

59
- Add support for `priority:` header.

0 commit comments

Comments
 (0)