Skip to content

Commit 68cf963

Browse files
Relax header parsing and allow high bytes. (#45)
1 parent 3303e52 commit 68cf963

File tree

2 files changed

+17
-4
lines changed

2 files changed

+17
-4
lines changed

lib/protocol/http1/connection.rb

+3-4
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,9 @@ module HTTP1
3737

3838
# HTTP/1.x header parser:
3939
FIELD_NAME = TOKEN
40-
WS = /[ \t]/ # Whitespace.
41-
OWS = /#{WS}*/ # Optional whitespace.
42-
VCHAR = /[!-~]/ # Match visible characters from ASCII 33 to 126.
43-
FIELD_VALUE = /#{VCHAR}+(?:#{WS}+#{VCHAR}+)*/.freeze
40+
OWS = /[ \t]*/
41+
# A field value is any string of characters that does not contain a null character, CR, or LF. After reflecting on the RFCs and surveying real implementations, I came to the conclusion that the RFCs are too restrictive. Most servers only check for the presence of null bytes, and obviously CR/LF characters have semantic meaning in the parser. So, I decided to follow this defacto standard, even if I'm not entirely happy with it.
42+
FIELD_VALUE = /[^\0\r\n]+/.freeze
4443
HEADER = /\A(#{FIELD_NAME}):#{OWS}(?:(#{FIELD_VALUE})#{OWS})?\z/.freeze
4544

4645
VALID_FIELD_NAME = /\A#{FIELD_NAME}\z/.freeze

test/protocol/http1/connection/headers.rb

+14
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,20 @@
5959
"user-agent: Mozilla\x7FHacker Browser"
6060
]}
6161

62+
it "allows the request" do
63+
authority, method, target, version, headers, body = server.read_request
64+
65+
expect(headers).to have_keys(
66+
"user-agent" => be == "Mozilla\x7FHacker Browser"
67+
)
68+
end
69+
end
70+
71+
with "header that contains null character" do
72+
let(:headers) {[
73+
"user-agent: Mozilla\x00Hacker Browser"
74+
]}
75+
6276
it "rejects the request" do
6377
expect do
6478
server.read_request

0 commit comments

Comments
 (0)