Merge ~andrey-fedoseev/twisted:10323-http-request-smuggling into ~launchpad/twisted:lp-backport

Proposed by Andrey Fedoseev
Status: Merged
Approved by: Andrey Fedoseev
Approved revision: 0c77389557e45a8fae6b9e467a5f37f53452464c
Merge reported by: Andrey Fedoseev
Merged at revision: 0c77389557e45a8fae6b9e467a5f37f53452464c
Proposed branch: ~andrey-fedoseev/twisted:10323-http-request-smuggling
Merge into: ~launchpad/twisted:lp-backport
Diff against target: 710 lines (+485/-61)
4 files modified
src/twisted/_version.py (+1/-1)
src/twisted/web/http.py (+210/-56)
src/twisted/web/newsfragments/10323.bugfix (+1/-0)
src/twisted/web/test/test_http.py (+273/-4)
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Review via email: mp+426621@code.launchpad.net

Commit message

10323: Inconsistent Interpretation of HTTP Requests ('HTTP Request Smuggling') in twisted.web

To post a comment you must log in.
Revision history for this message
Jürgen Gmach (jugmac00) wrote (last edit ):

@Colin: Do we have a common procedure for backports? e.g. mark them with a comment?

Any opinion on the cherry-pick? Took me a min to deduct that 10323 is GitHub's issue number in the Twisted project.

Revision history for this message
Colin Watson (cjwatson) wrote :

The substance of the changes here looks OK to me, thanks. We don't need to mark backports with comments - that's what git history is for.

However, please amend the git commit message with a reference to https://github.com/twisted/twisted/issues/10323 (and perhaps a description of anything not entirely straightforward about the backport, since it doesn't quite look like a straight cherry-pick of the commit referenced in the upstream issue?) before landing this, so that people don't have to try to deduce it themselves if necessary.

review: Approve
Revision history for this message
Jürgen Gmach (jugmac00) wrote :

@Colin - how would you find all backports quickly if you do not use a marker - only with git commits? This might be handy once we try to use upstream again - but maybe this info is not even necessary. Let's hope we can just switch to upstream at some point and rely on the test suite to guide us.

Revision history for this message
Colin Watson (cjwatson) wrote :

"Find all backports quickly" isn't a use case I ever need. When rebasing on a new upstream, git commits are exactly the right tool.

Revision history for this message
Colin Watson (cjwatson) wrote :

Also, adding markers in the code makes things actively *worse* when you're trying to rebase on a new upstream, because now git can't do its job properly and help you: instead of being able to automatically drop patches which are already upstream, everything will conflict. So I really strongly recommend against that.

Revision history for this message
Andrey Fedoseev (andrey-fedoseev) wrote :

@Colin

I've updated the commit message.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/src/twisted/_version.py b/src/twisted/_version.py
2index 05d17b9..68213c3 100644
3--- a/src/twisted/_version.py
4+++ b/src/twisted/_version.py
5@@ -24,5 +24,5 @@ class LocalVersion(Version):
6 local = public
7
8
9-__version__ = LocalVersion('Twisted', 20, 3, 0, local='+lp8')
10+__version__ = LocalVersion('Twisted', 20, 3, 0, local='+lp9')
11 __all__ = ["__version__"]
12diff --git a/src/twisted/web/http.py b/src/twisted/web/http.py
13index b7afa8b..ffe970b 100644
14--- a/src/twisted/web/http.py
15+++ b/src/twisted/web/http.py
16@@ -340,10 +340,39 @@ def toChunk(data):
17
18
19
20+def _ishexdigits(b):
21+ """
22+ Is the string case-insensitively hexidecimal?
23+
24+ It must be composed of one or more characters in the ranges a-f, A-F
25+ and 0-9.
26+ """
27+ for c in b:
28+ if c not in b"0123456789abcdefABCDEF":
29+ return False
30+ return b != b""
31+
32+
33+def _hexint(b):
34+ """
35+ Decode a hexadecimal integer.
36+
37+ Unlike L{int(b, 16)}, this raises L{ValueError} when the integer has
38+ a prefix like C{b'0x'}, C{b'+'}, or C{b'-'}, which is desirable when
39+ parsing network protocols.
40+ """
41+ if not _ishexdigits(b):
42+ raise ValueError(b)
43+ return int(b, 16)
44+
45+
46 def fromChunk(data):
47 """
48 Convert chunk to string.
49
50+ Note that this function is not specification compliant: it doesn't handle
51+ chunk extensions.
52+
53 @type data: C{bytes}
54
55 @return: tuple of (result, remaining) - both C{bytes}.
56@@ -352,7 +381,7 @@ def fromChunk(data):
57 byte string.
58 """
59 prefix, rest = data.split(b'\r\n', 1)
60- length = int(prefix, 16)
61+ length = _hexint(prefix)
62 if length < 0:
63 raise ValueError("Chunk length must be >= 0, not %d" % (length,))
64 if rest[length:length + 2] != b'\r\n':
65@@ -1775,11 +1804,54 @@ class _IdentityTransferDecoder(object):
66 raise _DataLoss()
67
68
69+maxChunkSizeLineLength = 1024
70+
71+
72+_chunkExtChars = (
73+ b"\t !\"#$%&'()*+,-./0123456789:;<=>?@"
74+ b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`"
75+ b"abcdefghijklmnopqrstuvwxyz{|}~"
76+ b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
77+ b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
78+ b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
79+ b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
80+ b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
81+ b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
82+ b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
83+ b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
84+)
85+"""
86+Characters that are valid in a chunk extension.
87+
88+See RFC 7230 section 4.1.1::
89+
90+ chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
91+
92+ chunk-ext-name = token
93+ chunk-ext-val = token / quoted-string
94+
95+And section 3.2.6::
96+
97+ token = 1*tchar
98+
99+ tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
100+ / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
101+ / DIGIT / ALPHA
102+ ; any VCHAR, except delimiters
103+
104+ quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
105+ qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
106+ obs-text = %x80-FF
107+
108+We don't check if chunk extensions are well-formed beyond validating that they
109+don't contain characters outside this range.
110+"""
111+
112
113 class _ChunkedTransferDecoder(object):
114 """
115- Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616,
116- section 3.6.1. This protocol can interpret the contents of a request or
117+ Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 7230,
118+ section 4.1. This protocol can interpret the contents of a request or
119 response body which uses the I{chunked} Transfer-Encoding. It cannot
120 interpret any of the rest of the HTTP protocol.
121
122@@ -1795,7 +1867,7 @@ class _ChunkedTransferDecoder(object):
123 noticed. -exarkun
124
125 @ivar dataCallback: A one-argument callable which will be invoked each
126- time application data is received.
127+ time application data is received. This callback is not reentrant.
128
129 @ivar finishCallback: A one-argument callable which will be invoked when
130 the terminal chunk is received. It will be invoked with all bytes
131@@ -1813,81 +1885,162 @@ class _ChunkedTransferDecoder(object):
132 read. For C{'BODY'}, the contents of a chunk are being read. For
133 C{'FINISHED'}, the last chunk has been completely read and no more
134 input is valid.
135+
136+ @ivar _buffer: Accumulated received data for the current state. At each
137+ state transition this is truncated at the front so that index 0 is
138+ where the next state shall begin.
139+
140+ @ivar _start: While in the C{'CHUNK_LENGTH'} state, tracks the index into
141+ the buffer at which search for CRLF should resume. Resuming the search
142+ at this position avoids doing quadratic work if the chunk length line
143+ arrives over many calls to C{dataReceived}.
144+
145+ Not used in any other state.
146 """
147+
148 state = 'CHUNK_LENGTH'
149
150 def __init__(self, dataCallback, finishCallback):
151 self.dataCallback = dataCallback
152 self.finishCallback = finishCallback
153- self._buffer = b''
154+ self._buffer = bytearray()
155+ self._start = 0
156+
157+ def _dataReceived_CHUNK_LENGTH(self):
158+ """
159+ Read the chunk size line, ignoring any extensions.
160
161+ @returns: C{True} once the line has been read and removed from
162+ C{self._buffer}. C{False} when more data is required.
163
164- def _dataReceived_CHUNK_LENGTH(self, data):
165- if b'\r\n' in data:
166- line, rest = data.split(b'\r\n', 1)
167- parts = line.split(b';')
168- try:
169- self.length = int(parts[0], 16)
170- except ValueError:
171- raise _MalformedChunkedDataError(
172- "Chunk-size must be an integer.")
173- if self.length == 0:
174- self.state = 'TRAILER'
175- else:
176- self.state = 'BODY'
177- return rest
178- else:
179- self._buffer = data
180- return b''
181+ @raises _MalformedChunkedDataError: when the chunk size cannot be
182+ decoded or the length of the line exceeds L{maxChunkSizeLineLength}.
183+ """
184+ eolIndex = self._buffer.find(b'\r\n', self._start)
185
186+ if eolIndex >= maxChunkSizeLineLength or (
187+ eolIndex == -1 and len(self._buffer) > maxChunkSizeLineLength
188+ ):
189+ raise _MalformedChunkedDataError(
190+ "Chunk size line exceeds maximum of {} bytes.".format(
191+ maxChunkSizeLineLength
192+ )
193+ )
194
195- def _dataReceived_CRLF(self, data):
196- if data.startswith(b'\r\n'):
197- self.state = 'CHUNK_LENGTH'
198- return data[2:]
199- else:
200- self._buffer = data
201- return b''
202+ if eolIndex == -1:
203+ # Restart the search upon receipt of more data at the start of the
204+ # new data, minus one in case the last character of the buffer is
205+ # CR.
206+ self._start = len(self._buffer) - 1
207+ return False
208+
209+ endOfLengthIndex = self._buffer.find(b';', 0, eolIndex)
210+ if endOfLengthIndex == -1:
211+ endOfLengthIndex = eolIndex
212+ rawLength = self._buffer[0:endOfLengthIndex]
213+ try:
214+ length = _hexint(rawLength)
215+ except ValueError:
216+ raise _MalformedChunkedDataError("Chunk-size must be an integer.")
217
218+ ext = self._buffer[endOfLengthIndex + 1 : eolIndex]
219+ if ext and ext.translate(None, _chunkExtChars) != b'':
220+ raise _MalformedChunkedDataError(
221+ "Invalid characters in chunk extensions: {}.".format(ext)
222+ )
223
224- def _dataReceived_TRAILER(self, data):
225- if data.startswith(b'\r\n'):
226- data = data[2:]
227- self.state = 'FINISHED'
228- self.finishCallback(data)
229+ if length == 0:
230+ self.state = 'TRAILER'
231 else:
232- self._buffer = data
233- return b''
234+ self.state = 'BODY'
235
236+ self.length = length
237+ del self._buffer[0 : eolIndex + 2]
238+ self._start = 0
239+ return True
240
241- def _dataReceived_BODY(self, data):
242- if len(data) >= self.length:
243- chunk, data = data[:self.length], data[self.length:]
244- self.dataCallback(chunk)
245- self.state = 'CRLF'
246- return data
247- elif len(data) < self.length:
248- self.length -= len(data)
249- self.dataCallback(data)
250- return b''
251+ def _dataReceived_CRLF(self):
252+ """
253+ Await the carriage return and line feed characters that are the end of
254+ chunk marker that follow the chunk data.
255
256+ @returns: C{True} when the CRLF have been read, otherwise C{False}.
257
258- def _dataReceived_FINISHED(self, data):
259+ @raises _MalformedChunkedDataError: when anything other than CRLF are
260+ received.
261+ """
262+ if len(self._buffer) < 2:
263+ return False
264+
265+ if not self._buffer.startswith(b'\r\n'):
266+ raise _MalformedChunkedDataError("Chunk did not end with CRLF")
267+
268+ self.state = 'CHUNK_LENGTH'
269+ del self._buffer[0:2]
270+ return True
271+
272+ def _dataReceived_TRAILER(self):
273+ """
274+ Await the carriage return and line feed characters that follow the
275+ terminal zero-length chunk. Then invoke C{finishCallback} and switch to
276+ state C{'FINISHED'}.
277+
278+ @returns: C{False}, as there is either insufficient data to continue,
279+ or no data remains.
280+
281+ @raises _MalformedChunkedDataError: when anything other than CRLF is
282+ received.
283+ """
284+ if len(self._buffer) < 2:
285+ return False
286+
287+ if not self._buffer.startswith(b'\r\n'):
288+ raise _MalformedChunkedDataError("Chunk did not end with CRLF")
289+
290+ data = memoryview(self._buffer)[2:].tobytes()
291+ del self._buffer[:]
292+ self.state = 'FINISHED'
293+ self.finishCallback(data)
294+ return False
295+
296+ def _dataReceived_BODY(self):
297+ """
298+ Deliver any available chunk data to the C{dataCallback}. When all the
299+ remaining data for the chunk arrives, switch to state C{'CRLF'}.
300+
301+ @returns: C{True} to continue processing of any buffered data.
302+ """
303+ if len(self._buffer) >= self.length:
304+ chunk = memoryview(self._buffer)[: self.length].tobytes()
305+ del self._buffer[: self.length]
306+ self.state = 'CRLF'
307+ self.dataCallback(chunk)
308+ else:
309+ chunk = bytes(self._buffer)
310+ self.length -= len(chunk)
311+ del self._buffer[:]
312+ self.dataCallback(chunk)
313+ return True
314+
315+ def _dataReceived_FINISHED(self):
316+ """
317+ Once C{finishCallback} has been invoked receipt of additional data
318+ raises L{RuntimeError} because it represents a programming error in
319+ the caller.
320+ """
321 raise RuntimeError(
322 "_ChunkedTransferDecoder.dataReceived called after last "
323 "chunk was processed")
324
325-
326 def dataReceived(self, data):
327 """
328 Interpret data from a request or response body which uses the
329 I{chunked} Transfer-Encoding.
330 """
331- data = self._buffer + data
332- self._buffer = b''
333- while data:
334- data = getattr(self, '_dataReceived_%s' % (self.state,))(data)
335-
336+ self._buffer += data
337+ goOn = True
338+ while goOn and self._buffer:
339+ goOn = getattr(self, '_dataReceived_' + self.state)()
340
341 def noMoreData(self):
342 """
343@@ -1900,7 +2053,6 @@ class _ChunkedTransferDecoder(object):
344 "get to 'FINISHED' state." % (self.state,))
345
346
347-
348 @implementer(interfaces.IPushProducer)
349 class _NoPushProducer(object):
350 """
351@@ -2157,7 +2309,7 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
352 self.setRawMode()
353 elif line[0] in b' \t':
354 # Continuation of a multi line header.
355- self.__header = self.__header + b'\n' + line
356+ self.__header += b' ' + line.lstrip(b' \t')
357 # Regular header line.
358 # Processing of header line is delayed to allow accumulating multi
359 # line headers.
360@@ -2186,6 +2338,8 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
361
362 # Can this header determine the length?
363 if header == b'content-length':
364+ if not data.isdigit():
365+ return fail()
366 try:
367 length = int(data)
368 except ValueError:
369@@ -2240,7 +2394,7 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
370 return False
371
372 header = header.lower()
373- data = data.strip()
374+ data = data.strip(b' \t')
375
376 if not self._maybeChooseTransferDecoder(header, data):
377 return False
378diff --git a/src/twisted/web/newsfragments/10323.bugfix b/src/twisted/web/newsfragments/10323.bugfix
379new file mode 100644
380index 0000000..1890b1b
381--- /dev/null
382+++ b/src/twisted/web/newsfragments/10323.bugfix
383@@ -0,0 +1 @@
384+twisted.web.http had several several defects in HTTP request parsing that could permit HTTP request smuggling. It now disallows signed Content-Length headers, forbids illegal characters in chunked extensions, forbids 0x prefix to chunk lengths, and only strips spaces and horizontal tab characters from header values. These changes address CVE-2022-24801 and GHSA-c2jg-hw38-jrqq.
385diff --git a/src/twisted/web/test/test_http.py b/src/twisted/web/test/test_http.py
386index a3067f7..f50fabe 100644
387--- a/src/twisted/web/test/test_http.py
388+++ b/src/twisted/web/test/test_http.py
389@@ -1300,6 +1300,126 @@ class ChunkedTransferEncodingTests(unittest.TestCase):
390 p.dataReceived(b'3; x-foo=bar\r\nabc\r\n')
391 self.assertEqual(L, [b'abc'])
392
393+ def test_extensionsMalformed(self):
394+ """
395+ L{_ChunkedTransferDecoder.dataReceived} raises
396+ L{_MalformedChunkedDataError} when the chunk extension fields contain
397+ invalid characters.
398+
399+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
400+ """
401+ invalidControl = (
402+ b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\n\x0b\x0c\r\x0e\x0f'
403+ b'\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
404+ )
405+ invalidDelimiter = b'\\'
406+ invalidDel = b'\x7f'
407+ for b in invalidControl + invalidDelimiter + invalidDel:
408+ data = b'3; ' + bytes((b,)) + b'\r\nabc\r\n'
409+ p = http._ChunkedTransferDecoder(
410+ lambda b: None, # pragma: nocov
411+ lambda b: None, # pragma: nocov
412+ )
413+ self.assertRaises(http._MalformedChunkedDataError, p.dataReceived, data)
414+
415+ def test_oversizedChunkSizeLine(self):
416+ """
417+ L{_ChunkedTransferDecoder.dataReceived} raises
418+ L{_MalformedChunkedDataError} when the chunk size line exceeds 4 KiB.
419+ This applies even when the data has already been received and buffered
420+ so that behavior is consistent regardless of how bytes are framed.
421+ """
422+ p = http._ChunkedTransferDecoder(None, None)
423+ self.assertRaises(
424+ http._MalformedChunkedDataError,
425+ p.dataReceived,
426+ b"3;" + b"." * http.maxChunkSizeLineLength + b'\r\nabc\r\n',
427+ )
428+
429+ def test_oversizedChunkSizeLinePartial(self):
430+ """
431+ L{_ChunkedTransferDecoder.dataReceived} raises
432+ L{_MalformedChunkedDataError} when the amount of data buffered while
433+ looking for the end of the chunk size line exceeds 4 KiB so
434+ that buffering does not continue without bound.
435+ """
436+ p = http._ChunkedTransferDecoder(None, None)
437+ self.assertRaises(
438+ http._MalformedChunkedDataError,
439+ p.dataReceived,
440+ b"." * (http.maxChunkSizeLineLength + 1),
441+ )
442+
443+ def test_malformedChunkSize(self):
444+ """
445+ L{_ChunkedTransferDecoder.dataReceived} raises
446+ L{_MalformedChunkedDataError} when the chunk size can't be decoded as
447+ a base-16 integer.
448+ """
449+ p = http._ChunkedTransferDecoder(
450+ lambda b: None, # pragma: nocov
451+ lambda b: None, # pragma: nocov
452+ )
453+ self.assertRaises(
454+ http._MalformedChunkedDataError, p.dataReceived, b'bloop\r\nabc\r\n'
455+ )
456+
457+ def test_malformedChunkSizeNegative(self):
458+ """
459+ L{_ChunkedTransferDecoder.dataReceived} raises
460+ L{_MalformedChunkedDataError} when the chunk size is negative.
461+ """
462+ p = http._ChunkedTransferDecoder(
463+ lambda b: None, # pragma: nocov
464+ lambda b: None, # pragma: nocov
465+ )
466+ self.assertRaises(
467+ http._MalformedChunkedDataError, p.dataReceived, b'-3\r\nabc\r\n'
468+ )
469+
470+ def test_malformedChunkSizeHex(self):
471+ """
472+ L{_ChunkedTransferDecoder.dataReceived} raises
473+ L{_MalformedChunkedDataError} when the chunk size is prefixed with
474+ "0x", as if it were a Python integer literal.
475+
476+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
477+ """
478+ p = http._ChunkedTransferDecoder(
479+ lambda b: None, # pragma: nocov
480+ lambda b: None, # pragma: nocov
481+ )
482+ self.assertRaises(
483+ http._MalformedChunkedDataError, p.dataReceived, b'0x3\r\nabc\r\n'
484+ )
485+
486+ def test_malformedChunkEnd(self):
487+ r"""
488+ L{_ChunkedTransferDecoder.dataReceived} raises
489+ L{_MalformedChunkedDataError} when the chunk is followed by characters
490+ other than C{\r\n}.
491+ """
492+ p = http._ChunkedTransferDecoder(
493+ lambda b: None,
494+ lambda b: None, # pragma: nocov
495+ )
496+ self.assertRaises(
497+ http._MalformedChunkedDataError, p.dataReceived, b'3\r\nabc!!!!'
498+ )
499+
500+ def test_malformedChunkEndFinal(self):
501+ r"""
502+ L{_ChunkedTransferDecoder.dataReceived} raises
503+ L{_MalformedChunkedDataError} when the terminal zero-length chunk is
504+ followed by characters other than C{\r\n}.
505+ """
506+ p = http._ChunkedTransferDecoder(
507+ lambda b: None,
508+ lambda b: None, # pragma: nocov
509+ )
510+ self.assertRaises(
511+ http._MalformedChunkedDataError, p.dataReceived, b'3\r\nabc\r\n0\r\n!!'
512+ )
513
514 def test_finish(self):
515 """
516@@ -1389,6 +1509,8 @@ class ChunkingTests(unittest.TestCase, ResponseTestMixin):
517 chunked = b''.join(http.toChunk(s))
518 self.assertEqual((s, b''), http.fromChunk(chunked))
519 self.assertRaises(ValueError, http.fromChunk, b'-5\r\nmalformed!\r\n')
520+ self.assertRaises(ValueError, http.fromChunk, b'0xa\r\nmalformed!\r\n')
521+ self.assertRaises(ValueError, http.fromChunk, b'0XA\r\nmalformed!\r\n')
522
523 def testConcatenatedChunks(self):
524 chunked = b''.join([b''.join(http.toChunk(t)) for t in self.strings])
525@@ -1646,7 +1768,12 @@ class ParsingTests(unittest.TestCase):
526 Line folded headers are handled by L{HTTPChannel} by replacing each
527 fold with a single space by the time they are made available to the
528 L{Request}. Any leading whitespace in the folded lines of the header
529- value is preserved.
530+ value is replaced with a single space, per:
531+
532+ A server that receives an obs-fold in a request message ... MUST
533+ ... replace each received obs-fold with one or more SP octets prior
534+ to interpreting the field value or forwarding the message
535+ downstream.
536
537 See RFC 7230 section 3.2.4.
538 """
539@@ -1683,15 +1810,65 @@ class ParsingTests(unittest.TestCase):
540 )
541 self.assertEqual(
542 request.requestHeaders.getRawHeaders(b"space"),
543- [b"space space"],
544+ [b"space space"],
545 )
546 self.assertEqual(
547 request.requestHeaders.getRawHeaders(b"spaces"),
548- [b"spaces spaces spaces"],
549+ [b"spaces spaces spaces"],
550 )
551 self.assertEqual(
552 request.requestHeaders.getRawHeaders(b"tab"),
553- [b"t \ta \tb"],
554+ [b"t a b"],
555+ )
556+
557+ def test_headerStripWhitespace(self):
558+ """
559+ Leading and trailing space and tab characters are stripped from
560+ headers. Other forms of whitespace are preserved.
561+
562+ See RFC 7230 section 3.2.3 and 3.2.4.
563+ """
564+ processed = []
565+
566+ class MyRequest(http.Request):
567+ def process(self):
568+ processed.append(self)
569+ self.finish()
570+
571+ requestLines = [
572+ b"GET / HTTP/1.0",
573+ b"spaces: spaces were stripped ",
574+ b"tabs: \t\ttabs were stripped\t\t",
575+ b"spaces-and-tabs: \t \t spaces and tabs were stripped\t \t",
576+ b"line-tab: \v vertical tab was preserved\v\t",
577+ b"form-feed: \f form feed was preserved \f ",
578+ b"",
579+ b"",
580+ ]
581+
582+ self.runRequest(b"\n".join(requestLines), MyRequest, 0)
583+ [request] = processed
584+ # All leading and trailing whitespace is stripped from the
585+ # header-value.
586+ self.assertEqual(
587+ request.requestHeaders.getRawHeaders(b"spaces"),
588+ [b"spaces were stripped"],
589+ )
590+ self.assertEqual(
591+ request.requestHeaders.getRawHeaders(b"tabs"),
592+ [b"tabs were stripped"],
593+ )
594+ self.assertEqual(
595+ request.requestHeaders.getRawHeaders(b"spaces-and-tabs"),
596+ [b"spaces and tabs were stripped"],
597+ )
598+ self.assertEqual(
599+ request.requestHeaders.getRawHeaders(b"line-tab"),
600+ [b"\v vertical tab was preserved\v"],
601+ )
602+ self.assertEqual(
603+ request.requestHeaders.getRawHeaders(b"form-feed"),
604+ [b"\f form feed was preserved \f"],
605 )
606
607
608@@ -2267,6 +2444,58 @@ Hello,
609 ])
610
611
612+ def test_contentLengthMalformed(self):
613+ """
614+ A request with a non-integer C{Content-Length} header fails with a 400
615+ response without calling L{Request.process}.
616+ """
617+ self.assertRequestRejected(
618+ [
619+ b"GET /a HTTP/1.1",
620+ b"Content-Length: MORE THAN NINE THOUSAND!",
621+ b"Host: host.invalid",
622+ b"",
623+ b"",
624+ b"x" * 9001,
625+ ]
626+ )
627+
628+ def test_contentLengthTooPositive(self):
629+ """
630+ A request with a C{Content-Length} header that begins with a L{+} fails
631+ with a 400 response without calling L{Request.process}.
632+
633+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
634+ """
635+ self.assertRequestRejected(
636+ [
637+ b"GET /a HTTP/1.1",
638+ b"Content-Length: +100",
639+ b"Host: host.invalid",
640+ b"",
641+ b"",
642+ b"x" * 100,
643+ ]
644+ )
645+
646+ def test_contentLengthNegative(self):
647+ """
648+ A request with a C{Content-Length} header that is negative fails with
649+ a 400 response without calling L{Request.process}.
650+
651+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
652+ """
653+ self.assertRequestRejected(
654+ [
655+ b"GET /a HTTP/1.1",
656+ b"Content-Length: -100",
657+ b"Host: host.invalid",
658+ b"",
659+ b"",
660+ b"x" * 200,
661+ ]
662+ )
663+
664 def test_duplicateContentLengthsWithPipelinedRequests(self):
665 """
666 Two pipelined requests, the first of which includes multiple
667@@ -4226,3 +4455,43 @@ class HTTPClientSanitizationTests(unittest.SynchronousTestCase):
668 transport.value().splitlines(),
669 [b": ".join([sanitizedBytes, sanitizedBytes])]
670 )
671+
672+
673+class HexHelperTests(unittest.SynchronousTestCase):
674+ """
675+ Test the L{http._hexint} and L{http._ishexdigits} helper functions.
676+ """
677+
678+ badStrings = (b"", b"0x1234", b"feds", b"-123" b"+123")
679+
680+ def test_isHex(self):
681+ """
682+ L{_ishexdigits()} returns L{True} for nonempy bytestrings containing
683+ hexadecimal digits.
684+ """
685+ for s in (b"10", b"abcdef", b"AB1234", b"fed", b"123467890"):
686+ self.assertIs(True, http._ishexdigits(s))
687+
688+ def test_decodes(self):
689+ """
690+ L{_hexint()} returns the integer equivalent of the input.
691+ """
692+ self.assertEqual(10, http._hexint(b"a"))
693+ self.assertEqual(0x10, http._hexint(b"10"))
694+ self.assertEqual(0xABCD123, http._hexint(b"abCD123"))
695+
696+ def test_isNotHex(self):
697+ """
698+ L{_ishexdigits()} returns L{False} for bytestrings that don't contain
699+ hexadecimal digits, including the empty string.
700+ """
701+ for s in self.badStrings:
702+ self.assertIs(False, http._ishexdigits(s))
703+
704+ def test_decodeNotHex(self):
705+ """
706+ L{_hexint()} raises L{ValueError} for bytestrings that can't
707+ be decoded.
708+ """
709+ for s in self.badStrings:
710+ self.assertRaises(ValueError, http._hexint, s)

Subscribers

People subscribed via source and target branches