Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BE-4191 CVE-2022-24761 Validate that incoming HTTP requests match the RFC7230 standard #1

Merged
merged 1 commit into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,30 @@
1.4.4.1 (2024-07-25)
-----

Security Bugfix
~~~~~~~~~~~~~~~

- Waitress now validates that chunked encoding extensions are valid, and don't
contain invalid characters that are not allowed. They are still skipped/not
processed, but if they contain invalid data we no longer continue in and
return a 400 Bad Request. This stops potential HTTP desync/HTTP request
smuggling. Thanks to Zhang Zeyu for reporting this issue. See
https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36

- Waitress now validates that the chunk length is only valid hex digits when
parsing chunked encoding, and values such as ``0x01`` and ``+01`` are no
longer supported. This stops potential HTTP desync/HTTP request smuggling.
Thanks to Zhang Zeyu for reporting this issue. See
https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36

- Waitress now validates that the Content-Length sent by a remote contains only
digits in accordance with RFC7230 and will return a 400 Bad Request when the
Content-Length header contains invalid data, such as ``+10`` which would
previously get parsed as ``10`` and accepted. This stops potential HTTP
desync/HTTP request smuggling Thanks to Zhang Zeyu for reporting this issue. See
https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36


1.4.4 (2020-06-01)
------------------

Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = waitress
version = 1.4.4
version = 1.4.4.1
description = Waitress WSGI server
long_description = file: README.rst, CHANGES.txt
long_description_content_type = text/x-rst
Expand Down
12 changes: 6 additions & 6 deletions src/waitress/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,14 @@
from waitress.buffers import OverflowableBuffer
from waitress.compat import tostr, unquote_bytes_to_wsgi, urlparse
from waitress.receiver import ChunkedReceiver, FixedStreamReceiver
from waitress.rfc7230 import HEADER_FIELD_RE, ONLY_DIGIT_RE
from waitress.utilities import (
BadRequest,
RequestEntityTooLarge,
RequestHeaderFieldsTooLarge,
ServerNotImplemented,
find_double_newline,
)
from .rfc7230 import HEADER_FIELD


class ParsingError(Exception):
pass
Expand Down Expand Up @@ -209,7 +208,7 @@ def parse_header(self, header_plus):

headers = self.headers
for line in lines:
header = HEADER_FIELD.match(line)
header = HEADER_FIELD_RE.match(line)

if not header:
raise ParsingError("Invalid header")
Expand Down Expand Up @@ -299,11 +298,12 @@ def parse_header(self, header_plus):
self.connection_close = True

if not self.chunked:
try:
cl = int(headers.get("CONTENT_LENGTH", 0))
except ValueError:
cl = headers.get("CONTENT_LENGTH", "0")

if not ONLY_DIGIT_RE.match(cl.encode("latin-1")):
raise ParsingError("Content-Length is invalid")

cl = int(cl)
self.content_length = cl
if cl > 0:
buf = OverflowableBuffer(self.adj.inbuf_overflow)
Expand Down
28 changes: 21 additions & 7 deletions src/waitress/receiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Data Chunk Receiver
"""

from waitress.rfc7230 import CHUNK_EXT_RE, ONLY_HEXDIG_RE
from waitress.utilities import BadRequest, find_double_newline


Expand Down Expand Up @@ -110,6 +111,7 @@ def received(self, s):
s = b""
else:
self.chunk_end = b""

if pos == 0:
# Chop off the terminating CR LF from the chunk
s = s[2:]
Expand All @@ -133,20 +135,32 @@ def received(self, s):
line = s[:pos]
s = s[pos + 2 :]
self.control_line = b""
line = line.strip()

if line:
# Begin a new chunk.
semi = line.find(b";")

if semi >= 0:
# discard extension info.
extinfo = line[semi:]
valid_ext_info = CHUNK_EXT_RE.match(extinfo)

if not valid_ext_info:
self.error = BadRequest("Invalid chunk extension")
self.all_chunks_received = True

break

line = line[:semi]
try:
sz = int(line.strip(), 16) # hexadecimal
except ValueError: # garbage in input
self.error = BadRequest("garbage in chunked encoding input")
sz = 0

if not ONLY_HEXDIG_RE.match(line):
self.error = BadRequest("Invalid chunk size")
self.all_chunks_received = True

break

# Can not fail due to matching against the regular
# expression above
sz = int(line, 16) # hexadecimal

if sz > 0:
# Start a new chunk.
Expand Down
28 changes: 27 additions & 1 deletion src/waitress/rfc7230.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

from .compat import tobytes

HEXDIG = "[0-9a-fA-F]"
DIGIT = "[0-9]"

WS = "[ \t]"
OWS = WS + "{0,}?"
RWS = WS + "{1,}?"
Expand All @@ -27,6 +30,12 @@
# ; visible (printing) characters
VCHAR = r"\x21-\x7e"

# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
QDTEXT = "[\t \x21\x23-\x5b\\\x5d-\x7e" + OBS_TEXT + "]"

QUOTED_PAIR = r"\\" + "([\t " + VCHAR + OBS_TEXT + "])"
QUOTED_STRING = '"(?:(?:' + QDTEXT + ")|(?:" + QUOTED_PAIR + '))*"'

# header-field = field-name ":" OWS field-value OWS
# field-name = token
# field-value = *( field-content / obs-fold )
Expand All @@ -45,8 +54,25 @@
# Which allows the field value here to just see if there is even a value in the first place
FIELD_VALUE = "(?:" + FIELD_CONTENT + ")?"

HEADER_FIELD = re.compile(
# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
# chunk-ext-name = token
# chunk-ext-val = token / quoted-string

CHUNK_EXT_NAME = TOKEN
CHUNK_EXT_VAL = "(?:" + TOKEN + ")|(?:" + QUOTED_STRING + ")"
CHUNK_EXT = (
"(?:;(?P<extension>" + CHUNK_EXT_NAME + ")(?:=(?P<value>" + CHUNK_EXT_VAL + "))?)*"
)

# Pre-compiled regular expressions for use elsewhere
ONLY_HEXDIG_RE = re.compile(("^" + HEXDIG + "+$").encode("latin-1"))
ONLY_DIGIT_RE = re.compile(("^" + DIGIT + "+$").encode("latin-1"))
HEADER_FIELD_RE = re.compile(
tobytes(
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
)
)

QUOTED_PAIR_RE = re.compile(QUOTED_PAIR)
QUOTED_STRING_RE = re.compile(QUOTED_STRING)
CHUNK_EXT_RE = re.compile(("^" + CHUNK_EXT + "$").encode("latin-1"))
28 changes: 3 additions & 25 deletions src/waitress/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import stat
import time

from .rfc7230 import OBS_TEXT, VCHAR
from .rfc7230 import QUOTED_PAIR_RE, QUOTED_STRING_RE

logger = logging.getLogger("waitress")
queue_logger = logging.getLogger("waitress.queue")
Expand Down Expand Up @@ -216,40 +216,18 @@ def parse_http_date(d):
return retval


# RFC 5234 Appendix B.1 "Core Rules":
# VCHAR = %x21-7E
# ; visible (printing) characters
vchar_re = VCHAR

# RFC 7230 Section 3.2.6 "Field Value Components":
# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
# qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
# obs-text = %x80-FF
# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
obs_text_re = OBS_TEXT

# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"

quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])"
quoted_string_re = '"(?:(?:' + qdtext_re + ")|(?:" + quoted_pair_re + '))*"'

quoted_string = re.compile(quoted_string_re)
quoted_pair = re.compile(quoted_pair_re)


def undquote(value):
if value.startswith('"') and value.endswith('"'):
# So it claims to be DQUOTE'ed, let's validate that
matches = quoted_string.match(value)
matches = QUOTED_STRING_RE.match(value)

if matches and matches.end() == len(value):
# Remove the DQUOTE's from the value
value = value[1:-1]

# Remove all backslashes that are followed by a valid vchar or
# obs-text
value = quoted_pair.sub(r"\1", value)
value = QUOTED_PAIR_RE.sub(r"\1", value)

return value
elif not value.startswith('"') and not value.endswith('"'):
Expand Down
50 changes: 47 additions & 3 deletions tests/test_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def test_chunking_request_without_content(self):
self.assertFalse("transfer-encoding" in headers)

def test_chunking_request_with_content(self):
control_line = b"20;\r\n" # 20 hex = 32 dec
control_line = b"20\r\n" # 20 hex = 32 dec
s = b"This string has 32 characters.\r\n"
expected = s * 12
header = tobytes("GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n")
Expand All @@ -321,7 +321,7 @@ def test_chunking_request_with_content(self):
self.assertFalse("transfer-encoding" in headers)

def test_broken_chunked_encoding(self):
control_line = "20;\r\n" # 20 hex = 32 dec
control_line = "20\r\n" # 20 hex = 32 dec
s = "This string has 32 characters.\r\n"
to_send = "GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
to_send += control_line + s + "\r\n"
Expand All @@ -345,8 +345,52 @@ def test_broken_chunked_encoding(self):
self.send_check_error(to_send)
self.assertRaises(ConnectionClosed, read_http, fp)

def test_broken_chunked_encoding_invalid_hex(self):
control_line = b"0x20\r\n" # 20 hex = 32 dec
s = b"This string has 32 characters.\r\n"
to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
to_send += control_line + s + b"\r\n"
self.connect()
self.sock.send(to_send)
with self.sock.makefile("rb", 0) as fp:
line, headers, response_body = read_http(fp)
self.assertline(line, "400", "Bad Request", "HTTP/1.1")
cl = int(headers["content-length"])
self.assertEqual(cl, len(response_body))
self.assertIn(b"Invalid chunk size", response_body)
self.assertEqual(
sorted(headers.keys()),
["connection", "content-length", "content-type", "date", "server"],
)
self.assertEqual(headers["content-type"], "text/plain")
# connection has been closed
self.send_check_error(to_send)
self.assertRaises(ConnectionClosed, read_http, fp)

def test_broken_chunked_encoding_invalid_extension(self):
control_line = b"20;invalid=\r\n" # 20 hex = 32 dec
s = b"This string has 32 characters.\r\n"
to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
to_send += control_line + s + b"\r\n"
self.connect()
self.sock.send(to_send)
with self.sock.makefile("rb", 0) as fp:
line, headers, response_body = read_http(fp)
self.assertline(line, "400", "Bad Request", "HTTP/1.1")
cl = int(headers["content-length"])
self.assertEqual(cl, len(response_body))
self.assertIn(b"Invalid chunk extension", response_body)
self.assertEqual(
sorted(headers.keys()),
["connection", "content-length", "content-type", "date", "server"],
)
self.assertEqual(headers["content-type"], "text/plain")
# connection has been closed
self.send_check_error(to_send)
self.assertRaises(ConnectionClosed, read_http, fp)

def test_broken_chunked_encoding_missing_chunk_end(self):
control_line = "20;\r\n" # 20 hex = 32 dec
control_line = "20\r\n" # 20 hex = 32 dec
s = "This string has 32 characters.\r\n"
to_send = "GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
to_send += control_line + s
Expand Down
22 changes: 21 additions & 1 deletion tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def test_received_chunked_completed_sets_content_length(self):
b"Transfer-Encoding: chunked\r\n"
b"X-Foo: 1\r\n"
b"\r\n"
b"1d;\r\n"
b"1d\r\n"
b"This string has 29 characters\r\n"
b"0\r\n\r\n"
)
Expand Down Expand Up @@ -194,6 +194,26 @@ def test_parse_header_bad_content_length(self):
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_bad_content_length_plus(self):
data = b"GET /foobar HTTP/8.4\r\ncontent-length: +10\r\n"

try:
self.parser.parse_header(data)
except ParsingError as e:
self.assertIn("Content-Length is invalid", e.args[0])
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_bad_content_length_minus(self):
data = b"GET /foobar HTTP/8.4\r\ncontent-length: -10\r\n"

try:
self.parser.parse_header(data)
except ParsingError as e:
self.assertIn("Content-Length is invalid", e.args[0])
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_multiple_content_length(self):
from waitress.parser import ParsingError

Expand Down
Loading
Loading