Merge ~andrey-fedoseev/twisted:10323-http-request-smuggling into ~launchpad/twisted:lp-backport
- Git
- lp:~andrey-fedoseev/twisted
- 10323-http-request-smuggling
- Merge into lp-backport
Status: | Merged |
---|---|
Approved by: | Andrey Fedoseev |
Approved revision: | 0c77389557e45a8fae6b9e467a5f37f53452464c |
Merge reported by: | Andrey Fedoseev |
Merged at revision: | 0c77389557e45a8fae6b9e467a5f37f53452464c |
Proposed branch: | ~andrey-fedoseev/twisted:10323-http-request-smuggling |
Merge into: | ~launchpad/twisted:lp-backport |
Diff against target: |
710 lines (+485/-61) 4 files modified
src/twisted/_version.py (+1/-1) src/twisted/web/http.py (+210/-56) src/twisted/web/newsfragments/10323.bugfix (+1/-0) src/twisted/web/test/test_http.py (+273/-4) |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Colin Watson (community) | Approve | ||
Review via email: mp+426621@code.launchpad.net |
Commit message
10323: Inconsistent Interpretation of HTTP Requests ('HTTP Request Smuggling') in twisted.web
Description of the change
Jürgen Gmach (jugmac00) wrote (last edit ): | # |
Colin Watson (cjwatson) wrote : | # |
The substance of the changes here looks OK to me, thanks. We don't need to mark backports with comments - that's what git history is for.
However, please amend the git commit message with a reference to https:/
Jürgen Gmach (jugmac00) wrote : | # |
@Colin - how would you find all backports quickly if you do not use a marker - only with git commits? This might be handy once we try to use upstream again - but maybe this info is not even necessary. Let's hope we can just switch to upstream at some point and rely on the test suite to guide us.
Colin Watson (cjwatson) wrote : | # |
"Find all backports quickly" isn't a use case I ever need. When rebasing on a new upstream, git commits are exactly the right tool.
Colin Watson (cjwatson) wrote : | # |
Also, adding markers in the code makes things actively *worse* when you're trying to rebase on a new upstream, because now git can't do its job properly and help you: instead of being able to automatically drop patches which are already upstream, everything will conflict. So I really strongly recommend against that.
Andrey Fedoseev (andrey-fedoseev) wrote : | # |
@Colin
I've updated the commit message.
Preview Diff
1 | diff --git a/src/twisted/_version.py b/src/twisted/_version.py |
2 | index 05d17b9..68213c3 100644 |
3 | --- a/src/twisted/_version.py |
4 | +++ b/src/twisted/_version.py |
5 | @@ -24,5 +24,5 @@ class LocalVersion(Version): |
6 | local = public |
7 | |
8 | |
9 | -__version__ = LocalVersion('Twisted', 20, 3, 0, local='+lp8') |
10 | +__version__ = LocalVersion('Twisted', 20, 3, 0, local='+lp9') |
11 | __all__ = ["__version__"] |
12 | diff --git a/src/twisted/web/http.py b/src/twisted/web/http.py |
13 | index b7afa8b..ffe970b 100644 |
14 | --- a/src/twisted/web/http.py |
15 | +++ b/src/twisted/web/http.py |
16 | @@ -340,10 +340,39 @@ def toChunk(data): |
17 | |
18 | |
19 | |
20 | +def _ishexdigits(b): |
21 | + """ |
22 | + Is the string case-insensitively hexidecimal? |
23 | + |
24 | + It must be composed of one or more characters in the ranges a-f, A-F |
25 | + and 0-9. |
26 | + """ |
27 | + for c in b: |
28 | + if c not in b"0123456789abcdefABCDEF": |
29 | + return False |
30 | + return b != b"" |
31 | + |
32 | + |
33 | +def _hexint(b): |
34 | + """ |
35 | + Decode a hexadecimal integer. |
36 | + |
37 | + Unlike L{int(b, 16)}, this raises L{ValueError} when the integer has |
38 | + a prefix like C{b'0x'}, C{b'+'}, or C{b'-'}, which is desirable when |
39 | + parsing network protocols. |
40 | + """ |
41 | + if not _ishexdigits(b): |
42 | + raise ValueError(b) |
43 | + return int(b, 16) |
44 | + |
45 | + |
46 | def fromChunk(data): |
47 | """ |
48 | Convert chunk to string. |
49 | |
50 | + Note that this function is not specification compliant: it doesn't handle |
51 | + chunk extensions. |
52 | + |
53 | @type data: C{bytes} |
54 | |
55 | @return: tuple of (result, remaining) - both C{bytes}. |
56 | @@ -352,7 +381,7 @@ def fromChunk(data): |
57 | byte string. |
58 | """ |
59 | prefix, rest = data.split(b'\r\n', 1) |
60 | - length = int(prefix, 16) |
61 | + length = _hexint(prefix) |
62 | if length < 0: |
63 | raise ValueError("Chunk length must be >= 0, not %d" % (length,)) |
64 | if rest[length:length + 2] != b'\r\n': |
65 | @@ -1775,11 +1804,54 @@ class _IdentityTransferDecoder(object): |
66 | raise _DataLoss() |
67 | |
68 | |
69 | +maxChunkSizeLineLength = 1024 |
70 | + |
71 | + |
72 | +_chunkExtChars = ( |
73 | + b"\t !\"#$%&'()*+,-./0123456789:;<=>?@" |
74 | + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`" |
75 | + b"abcdefghijklmnopqrstuvwxyz{|}~" |
76 | + b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" |
77 | + b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" |
78 | + b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" |
79 | + b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" |
80 | + b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" |
81 | + b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" |
82 | + b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" |
83 | + b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" |
84 | +) |
85 | +""" |
86 | +Characters that are valid in a chunk extension. |
87 | + |
88 | +See RFC 7230 section 4.1.1:: |
89 | + |
90 | + chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) |
91 | + |
92 | + chunk-ext-name = token |
93 | + chunk-ext-val = token / quoted-string |
94 | + |
95 | +And section 3.2.6:: |
96 | + |
97 | + token = 1*tchar |
98 | + |
99 | + tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" |
100 | + / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" |
101 | + / DIGIT / ALPHA |
102 | + ; any VCHAR, except delimiters |
103 | + |
104 | + quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE |
105 | + qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text |
106 | + obs-text = %x80-FF |
107 | + |
108 | +We don't check if chunk extensions are well-formed beyond validating that they |
109 | +don't contain characters outside this range. |
110 | +""" |
111 | + |
112 | |
113 | class _ChunkedTransferDecoder(object): |
114 | """ |
115 | - Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616, |
116 | - section 3.6.1. This protocol can interpret the contents of a request or |
117 | + Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 7230, |
118 | + section 4.1. This protocol can interpret the contents of a request or |
119 | response body which uses the I{chunked} Transfer-Encoding. It cannot |
120 | interpret any of the rest of the HTTP protocol. |
121 | |
122 | @@ -1795,7 +1867,7 @@ class _ChunkedTransferDecoder(object): |
123 | noticed. -exarkun |
124 | |
125 | @ivar dataCallback: A one-argument callable which will be invoked each |
126 | - time application data is received. |
127 | + time application data is received. This callback is not reentrant. |
128 | |
129 | @ivar finishCallback: A one-argument callable which will be invoked when |
130 | the terminal chunk is received. It will be invoked with all bytes |
131 | @@ -1813,81 +1885,162 @@ class _ChunkedTransferDecoder(object): |
132 | read. For C{'BODY'}, the contents of a chunk are being read. For |
133 | C{'FINISHED'}, the last chunk has been completely read and no more |
134 | input is valid. |
135 | + |
136 | + @ivar _buffer: Accumulated received data for the current state. At each |
137 | + state transition this is truncated at the front so that index 0 is |
138 | + where the next state shall begin. |
139 | + |
140 | + @ivar _start: While in the C{'CHUNK_LENGTH'} state, tracks the index into |
141 | + the buffer at which search for CRLF should resume. Resuming the search |
142 | + at this position avoids doing quadratic work if the chunk length line |
143 | + arrives over many calls to C{dataReceived}. |
144 | + |
145 | + Not used in any other state. |
146 | """ |
147 | + |
148 | state = 'CHUNK_LENGTH' |
149 | |
150 | def __init__(self, dataCallback, finishCallback): |
151 | self.dataCallback = dataCallback |
152 | self.finishCallback = finishCallback |
153 | - self._buffer = b'' |
154 | + self._buffer = bytearray() |
155 | + self._start = 0 |
156 | + |
157 | + def _dataReceived_CHUNK_LENGTH(self): |
158 | + """ |
159 | + Read the chunk size line, ignoring any extensions. |
160 | |
161 | + @returns: C{True} once the line has been read and removed from |
162 | + C{self._buffer}. C{False} when more data is required. |
163 | |
164 | - def _dataReceived_CHUNK_LENGTH(self, data): |
165 | - if b'\r\n' in data: |
166 | - line, rest = data.split(b'\r\n', 1) |
167 | - parts = line.split(b';') |
168 | - try: |
169 | - self.length = int(parts[0], 16) |
170 | - except ValueError: |
171 | - raise _MalformedChunkedDataError( |
172 | - "Chunk-size must be an integer.") |
173 | - if self.length == 0: |
174 | - self.state = 'TRAILER' |
175 | - else: |
176 | - self.state = 'BODY' |
177 | - return rest |
178 | - else: |
179 | - self._buffer = data |
180 | - return b'' |
181 | + @raises _MalformedChunkedDataError: when the chunk size cannot be |
182 | + decoded or the length of the line exceeds L{maxChunkSizeLineLength}. |
183 | + """ |
184 | + eolIndex = self._buffer.find(b'\r\n', self._start) |
185 | |
186 | + if eolIndex >= maxChunkSizeLineLength or ( |
187 | + eolIndex == -1 and len(self._buffer) > maxChunkSizeLineLength |
188 | + ): |
189 | + raise _MalformedChunkedDataError( |
190 | + "Chunk size line exceeds maximum of {} bytes.".format( |
191 | + maxChunkSizeLineLength |
192 | + ) |
193 | + ) |
194 | |
195 | - def _dataReceived_CRLF(self, data): |
196 | - if data.startswith(b'\r\n'): |
197 | - self.state = 'CHUNK_LENGTH' |
198 | - return data[2:] |
199 | - else: |
200 | - self._buffer = data |
201 | - return b'' |
202 | + if eolIndex == -1: |
203 | + # Restart the search upon receipt of more data at the start of the |
204 | + # new data, minus one in case the last character of the buffer is |
205 | + # CR. |
206 | + self._start = len(self._buffer) - 1 |
207 | + return False |
208 | + |
209 | + endOfLengthIndex = self._buffer.find(b';', 0, eolIndex) |
210 | + if endOfLengthIndex == -1: |
211 | + endOfLengthIndex = eolIndex |
212 | + rawLength = self._buffer[0:endOfLengthIndex] |
213 | + try: |
214 | + length = _hexint(rawLength) |
215 | + except ValueError: |
216 | + raise _MalformedChunkedDataError("Chunk-size must be an integer.") |
217 | |
218 | + ext = self._buffer[endOfLengthIndex + 1 : eolIndex] |
219 | + if ext and ext.translate(None, _chunkExtChars) != b'': |
220 | + raise _MalformedChunkedDataError( |
221 | + "Invalid characters in chunk extensions: {}.".format(ext) |
222 | + ) |
223 | |
224 | - def _dataReceived_TRAILER(self, data): |
225 | - if data.startswith(b'\r\n'): |
226 | - data = data[2:] |
227 | - self.state = 'FINISHED' |
228 | - self.finishCallback(data) |
229 | + if length == 0: |
230 | + self.state = 'TRAILER' |
231 | else: |
232 | - self._buffer = data |
233 | - return b'' |
234 | + self.state = 'BODY' |
235 | |
236 | + self.length = length |
237 | + del self._buffer[0 : eolIndex + 2] |
238 | + self._start = 0 |
239 | + return True |
240 | |
241 | - def _dataReceived_BODY(self, data): |
242 | - if len(data) >= self.length: |
243 | - chunk, data = data[:self.length], data[self.length:] |
244 | - self.dataCallback(chunk) |
245 | - self.state = 'CRLF' |
246 | - return data |
247 | - elif len(data) < self.length: |
248 | - self.length -= len(data) |
249 | - self.dataCallback(data) |
250 | - return b'' |
251 | + def _dataReceived_CRLF(self): |
252 | + """ |
253 | + Await the carriage return and line feed characters that are the end of |
254 | + chunk marker that follow the chunk data. |
255 | |
256 | + @returns: C{True} when the CRLF have been read, otherwise C{False}. |
257 | |
258 | - def _dataReceived_FINISHED(self, data): |
259 | + @raises _MalformedChunkedDataError: when anything other than CRLF are |
260 | + received. |
261 | + """ |
262 | + if len(self._buffer) < 2: |
263 | + return False |
264 | + |
265 | + if not self._buffer.startswith(b'\r\n'): |
266 | + raise _MalformedChunkedDataError("Chunk did not end with CRLF") |
267 | + |
268 | + self.state = 'CHUNK_LENGTH' |
269 | + del self._buffer[0:2] |
270 | + return True |
271 | + |
272 | + def _dataReceived_TRAILER(self): |
273 | + """ |
274 | + Await the carriage return and line feed characters that follow the |
275 | + terminal zero-length chunk. Then invoke C{finishCallback} and switch to |
276 | + state C{'FINISHED'}. |
277 | + |
278 | + @returns: C{False}, as there is either insufficient data to continue, |
279 | + or no data remains. |
280 | + |
281 | + @raises _MalformedChunkedDataError: when anything other than CRLF is |
282 | + received. |
283 | + """ |
284 | + if len(self._buffer) < 2: |
285 | + return False |
286 | + |
287 | + if not self._buffer.startswith(b'\r\n'): |
288 | + raise _MalformedChunkedDataError("Chunk did not end with CRLF") |
289 | + |
290 | + data = memoryview(self._buffer)[2:].tobytes() |
291 | + del self._buffer[:] |
292 | + self.state = 'FINISHED' |
293 | + self.finishCallback(data) |
294 | + return False |
295 | + |
296 | + def _dataReceived_BODY(self): |
297 | + """ |
298 | + Deliver any available chunk data to the C{dataCallback}. When all the |
299 | + remaining data for the chunk arrives, switch to state C{'CRLF'}. |
300 | + |
301 | + @returns: C{True} to continue processing of any buffered data. |
302 | + """ |
303 | + if len(self._buffer) >= self.length: |
304 | + chunk = memoryview(self._buffer)[: self.length].tobytes() |
305 | + del self._buffer[: self.length] |
306 | + self.state = 'CRLF' |
307 | + self.dataCallback(chunk) |
308 | + else: |
309 | + chunk = bytes(self._buffer) |
310 | + self.length -= len(chunk) |
311 | + del self._buffer[:] |
312 | + self.dataCallback(chunk) |
313 | + return True |
314 | + |
315 | + def _dataReceived_FINISHED(self): |
316 | + """ |
317 | + Once C{finishCallback} has been invoked receipt of additional data |
318 | + raises L{RuntimeError} because it represents a programming error in |
319 | + the caller. |
320 | + """ |
321 | raise RuntimeError( |
322 | "_ChunkedTransferDecoder.dataReceived called after last " |
323 | "chunk was processed") |
324 | |
325 | - |
326 | def dataReceived(self, data): |
327 | """ |
328 | Interpret data from a request or response body which uses the |
329 | I{chunked} Transfer-Encoding. |
330 | """ |
331 | - data = self._buffer + data |
332 | - self._buffer = b'' |
333 | - while data: |
334 | - data = getattr(self, '_dataReceived_%s' % (self.state,))(data) |
335 | - |
336 | + self._buffer += data |
337 | + goOn = True |
338 | + while goOn and self._buffer: |
339 | + goOn = getattr(self, '_dataReceived_' + self.state)() |
340 | |
341 | def noMoreData(self): |
342 | """ |
343 | @@ -1900,7 +2053,6 @@ class _ChunkedTransferDecoder(object): |
344 | "get to 'FINISHED' state." % (self.state,)) |
345 | |
346 | |
347 | - |
348 | @implementer(interfaces.IPushProducer) |
349 | class _NoPushProducer(object): |
350 | """ |
351 | @@ -2157,7 +2309,7 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): |
352 | self.setRawMode() |
353 | elif line[0] in b' \t': |
354 | # Continuation of a multi line header. |
355 | - self.__header = self.__header + b'\n' + line |
356 | + self.__header += b' ' + line.lstrip(b' \t') |
357 | # Regular header line. |
358 | # Processing of header line is delayed to allow accumulating multi |
359 | # line headers. |
360 | @@ -2186,6 +2338,8 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): |
361 | |
362 | # Can this header determine the length? |
363 | if header == b'content-length': |
364 | + if not data.isdigit(): |
365 | + return fail() |
366 | try: |
367 | length = int(data) |
368 | except ValueError: |
369 | @@ -2240,7 +2394,7 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): |
370 | return False |
371 | |
372 | header = header.lower() |
373 | - data = data.strip() |
374 | + data = data.strip(b' \t') |
375 | |
376 | if not self._maybeChooseTransferDecoder(header, data): |
377 | return False |
378 | diff --git a/src/twisted/web/newsfragments/10323.bugfix b/src/twisted/web/newsfragments/10323.bugfix |
379 | new file mode 100644 |
380 | index 0000000..1890b1b |
381 | --- /dev/null |
382 | +++ b/src/twisted/web/newsfragments/10323.bugfix |
383 | @@ -0,0 +1 @@ |
384 | +twisted.web.http had several several defects in HTTP request parsing that could permit HTTP request smuggling. It now disallows signed Content-Length headers, forbids illegal characters in chunked extensions, forbids 0x prefix to chunk lengths, and only strips spaces and horizontal tab characters from header values. These changes address CVE-2022-24801 and GHSA-c2jg-hw38-jrqq. |
385 | diff --git a/src/twisted/web/test/test_http.py b/src/twisted/web/test/test_http.py |
386 | index a3067f7..f50fabe 100644 |
387 | --- a/src/twisted/web/test/test_http.py |
388 | +++ b/src/twisted/web/test/test_http.py |
389 | @@ -1300,6 +1300,126 @@ class ChunkedTransferEncodingTests(unittest.TestCase): |
390 | p.dataReceived(b'3; x-foo=bar\r\nabc\r\n') |
391 | self.assertEqual(L, [b'abc']) |
392 | |
393 | + def test_extensionsMalformed(self): |
394 | + """ |
395 | + L{_ChunkedTransferDecoder.dataReceived} raises |
396 | + L{_MalformedChunkedDataError} when the chunk extension fields contain |
397 | + invalid characters. |
398 | + |
399 | + This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq. |
400 | + """ |
401 | + invalidControl = ( |
402 | + b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\n\x0b\x0c\r\x0e\x0f' |
403 | + b'\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' |
404 | + ) |
405 | + invalidDelimiter = b'\\' |
406 | + invalidDel = b'\x7f' |
407 | + for b in invalidControl + invalidDelimiter + invalidDel: |
408 | + data = b'3; ' + bytes((b,)) + b'\r\nabc\r\n' |
409 | + p = http._ChunkedTransferDecoder( |
410 | + lambda b: None, # pragma: nocov |
411 | + lambda b: None, # pragma: nocov |
412 | + ) |
413 | + self.assertRaises(http._MalformedChunkedDataError, p.dataReceived, data) |
414 | + |
415 | + def test_oversizedChunkSizeLine(self): |
416 | + """ |
417 | + L{_ChunkedTransferDecoder.dataReceived} raises |
418 | + L{_MalformedChunkedDataError} when the chunk size line exceeds 4 KiB. |
419 | + This applies even when the data has already been received and buffered |
420 | + so that behavior is consistent regardless of how bytes are framed. |
421 | + """ |
422 | + p = http._ChunkedTransferDecoder(None, None) |
423 | + self.assertRaises( |
424 | + http._MalformedChunkedDataError, |
425 | + p.dataReceived, |
426 | + b"3;" + b"." * http.maxChunkSizeLineLength + b'\r\nabc\r\n', |
427 | + ) |
428 | + |
429 | + def test_oversizedChunkSizeLinePartial(self): |
430 | + """ |
431 | + L{_ChunkedTransferDecoder.dataReceived} raises |
432 | + L{_MalformedChunkedDataError} when the amount of data buffered while |
433 | + looking for the end of the chunk size line exceeds 4 KiB so |
434 | + that buffering does not continue without bound. |
435 | + """ |
436 | + p = http._ChunkedTransferDecoder(None, None) |
437 | + self.assertRaises( |
438 | + http._MalformedChunkedDataError, |
439 | + p.dataReceived, |
440 | + b"." * (http.maxChunkSizeLineLength + 1), |
441 | + ) |
442 | + |
443 | + def test_malformedChunkSize(self): |
444 | + """ |
445 | + L{_ChunkedTransferDecoder.dataReceived} raises |
446 | + L{_MalformedChunkedDataError} when the chunk size can't be decoded as |
447 | + a base-16 integer. |
448 | + """ |
449 | + p = http._ChunkedTransferDecoder( |
450 | + lambda b: None, # pragma: nocov |
451 | + lambda b: None, # pragma: nocov |
452 | + ) |
453 | + self.assertRaises( |
454 | + http._MalformedChunkedDataError, p.dataReceived, b'bloop\r\nabc\r\n' |
455 | + ) |
456 | + |
457 | + def test_malformedChunkSizeNegative(self): |
458 | + """ |
459 | + L{_ChunkedTransferDecoder.dataReceived} raises |
460 | + L{_MalformedChunkedDataError} when the chunk size is negative. |
461 | + """ |
462 | + p = http._ChunkedTransferDecoder( |
463 | + lambda b: None, # pragma: nocov |
464 | + lambda b: None, # pragma: nocov |
465 | + ) |
466 | + self.assertRaises( |
467 | + http._MalformedChunkedDataError, p.dataReceived, b'-3\r\nabc\r\n' |
468 | + ) |
469 | + |
470 | + def test_malformedChunkSizeHex(self): |
471 | + """ |
472 | + L{_ChunkedTransferDecoder.dataReceived} raises |
473 | + L{_MalformedChunkedDataError} when the chunk size is prefixed with |
474 | + "0x", as if it were a Python integer literal. |
475 | + |
476 | + This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq. |
477 | + """ |
478 | + p = http._ChunkedTransferDecoder( |
479 | + lambda b: None, # pragma: nocov |
480 | + lambda b: None, # pragma: nocov |
481 | + ) |
482 | + self.assertRaises( |
483 | + http._MalformedChunkedDataError, p.dataReceived, b'0x3\r\nabc\r\n' |
484 | + ) |
485 | + |
486 | + def test_malformedChunkEnd(self): |
487 | + r""" |
488 | + L{_ChunkedTransferDecoder.dataReceived} raises |
489 | + L{_MalformedChunkedDataError} when the chunk is followed by characters |
490 | + other than C{\r\n}. |
491 | + """ |
492 | + p = http._ChunkedTransferDecoder( |
493 | + lambda b: None, |
494 | + lambda b: None, # pragma: nocov |
495 | + ) |
496 | + self.assertRaises( |
497 | + http._MalformedChunkedDataError, p.dataReceived, b'3\r\nabc!!!!' |
498 | + ) |
499 | + |
500 | + def test_malformedChunkEndFinal(self): |
501 | + r""" |
502 | + L{_ChunkedTransferDecoder.dataReceived} raises |
503 | + L{_MalformedChunkedDataError} when the terminal zero-length chunk is |
504 | + followed by characters other than C{\r\n}. |
505 | + """ |
506 | + p = http._ChunkedTransferDecoder( |
507 | + lambda b: None, |
508 | + lambda b: None, # pragma: nocov |
509 | + ) |
510 | + self.assertRaises( |
511 | + http._MalformedChunkedDataError, p.dataReceived, b'3\r\nabc\r\n0\r\n!!' |
512 | + ) |
513 | |
514 | def test_finish(self): |
515 | """ |
516 | @@ -1389,6 +1509,8 @@ class ChunkingTests(unittest.TestCase, ResponseTestMixin): |
517 | chunked = b''.join(http.toChunk(s)) |
518 | self.assertEqual((s, b''), http.fromChunk(chunked)) |
519 | self.assertRaises(ValueError, http.fromChunk, b'-5\r\nmalformed!\r\n') |
520 | + self.assertRaises(ValueError, http.fromChunk, b'0xa\r\nmalformed!\r\n') |
521 | + self.assertRaises(ValueError, http.fromChunk, b'0XA\r\nmalformed!\r\n') |
522 | |
523 | def testConcatenatedChunks(self): |
524 | chunked = b''.join([b''.join(http.toChunk(t)) for t in self.strings]) |
525 | @@ -1646,7 +1768,12 @@ class ParsingTests(unittest.TestCase): |
526 | Line folded headers are handled by L{HTTPChannel} by replacing each |
527 | fold with a single space by the time they are made available to the |
528 | L{Request}. Any leading whitespace in the folded lines of the header |
529 | - value is preserved. |
530 | + value is replaced with a single space, per: |
531 | + |
532 | + A server that receives an obs-fold in a request message ... MUST |
533 | + ... replace each received obs-fold with one or more SP octets prior |
534 | + to interpreting the field value or forwarding the message |
535 | + downstream. |
536 | |
537 | See RFC 7230 section 3.2.4. |
538 | """ |
539 | @@ -1683,15 +1810,65 @@ class ParsingTests(unittest.TestCase): |
540 | ) |
541 | self.assertEqual( |
542 | request.requestHeaders.getRawHeaders(b"space"), |
543 | - [b"space space"], |
544 | + [b"space space"], |
545 | ) |
546 | self.assertEqual( |
547 | request.requestHeaders.getRawHeaders(b"spaces"), |
548 | - [b"spaces spaces spaces"], |
549 | + [b"spaces spaces spaces"], |
550 | ) |
551 | self.assertEqual( |
552 | request.requestHeaders.getRawHeaders(b"tab"), |
553 | - [b"t \ta \tb"], |
554 | + [b"t a b"], |
555 | + ) |
556 | + |
557 | + def test_headerStripWhitespace(self): |
558 | + """ |
559 | + Leading and trailing space and tab characters are stripped from |
560 | + headers. Other forms of whitespace are preserved. |
561 | + |
562 | + See RFC 7230 section 3.2.3 and 3.2.4. |
563 | + """ |
564 | + processed = [] |
565 | + |
566 | + class MyRequest(http.Request): |
567 | + def process(self): |
568 | + processed.append(self) |
569 | + self.finish() |
570 | + |
571 | + requestLines = [ |
572 | + b"GET / HTTP/1.0", |
573 | + b"spaces: spaces were stripped ", |
574 | + b"tabs: \t\ttabs were stripped\t\t", |
575 | + b"spaces-and-tabs: \t \t spaces and tabs were stripped\t \t", |
576 | + b"line-tab: \v vertical tab was preserved\v\t", |
577 | + b"form-feed: \f form feed was preserved \f ", |
578 | + b"", |
579 | + b"", |
580 | + ] |
581 | + |
582 | + self.runRequest(b"\n".join(requestLines), MyRequest, 0) |
583 | + [request] = processed |
584 | + # All leading and trailing whitespace is stripped from the |
585 | + # header-value. |
586 | + self.assertEqual( |
587 | + request.requestHeaders.getRawHeaders(b"spaces"), |
588 | + [b"spaces were stripped"], |
589 | + ) |
590 | + self.assertEqual( |
591 | + request.requestHeaders.getRawHeaders(b"tabs"), |
592 | + [b"tabs were stripped"], |
593 | + ) |
594 | + self.assertEqual( |
595 | + request.requestHeaders.getRawHeaders(b"spaces-and-tabs"), |
596 | + [b"spaces and tabs were stripped"], |
597 | + ) |
598 | + self.assertEqual( |
599 | + request.requestHeaders.getRawHeaders(b"line-tab"), |
600 | + [b"\v vertical tab was preserved\v"], |
601 | + ) |
602 | + self.assertEqual( |
603 | + request.requestHeaders.getRawHeaders(b"form-feed"), |
604 | + [b"\f form feed was preserved \f"], |
605 | ) |
606 | |
607 | |
608 | @@ -2267,6 +2444,58 @@ Hello, |
609 | ]) |
610 | |
611 | |
612 | + def test_contentLengthMalformed(self): |
613 | + """ |
614 | + A request with a non-integer C{Content-Length} header fails with a 400 |
615 | + response without calling L{Request.process}. |
616 | + """ |
617 | + self.assertRequestRejected( |
618 | + [ |
619 | + b"GET /a HTTP/1.1", |
620 | + b"Content-Length: MORE THAN NINE THOUSAND!", |
621 | + b"Host: host.invalid", |
622 | + b"", |
623 | + b"", |
624 | + b"x" * 9001, |
625 | + ] |
626 | + ) |
627 | + |
628 | + def test_contentLengthTooPositive(self): |
629 | + """ |
630 | + A request with a C{Content-Length} header that begins with a L{+} fails |
631 | + with a 400 response without calling L{Request.process}. |
632 | + |
633 | + This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq. |
634 | + """ |
635 | + self.assertRequestRejected( |
636 | + [ |
637 | + b"GET /a HTTP/1.1", |
638 | + b"Content-Length: +100", |
639 | + b"Host: host.invalid", |
640 | + b"", |
641 | + b"", |
642 | + b"x" * 100, |
643 | + ] |
644 | + ) |
645 | + |
646 | + def test_contentLengthNegative(self): |
647 | + """ |
648 | + A request with a C{Content-Length} header that is negative fails with |
649 | + a 400 response without calling L{Request.process}. |
650 | + |
651 | + This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq. |
652 | + """ |
653 | + self.assertRequestRejected( |
654 | + [ |
655 | + b"GET /a HTTP/1.1", |
656 | + b"Content-Length: -100", |
657 | + b"Host: host.invalid", |
658 | + b"", |
659 | + b"", |
660 | + b"x" * 200, |
661 | + ] |
662 | + ) |
663 | + |
664 | def test_duplicateContentLengthsWithPipelinedRequests(self): |
665 | """ |
666 | Two pipelined requests, the first of which includes multiple |
667 | @@ -4226,3 +4455,43 @@ class HTTPClientSanitizationTests(unittest.SynchronousTestCase): |
668 | transport.value().splitlines(), |
669 | [b": ".join([sanitizedBytes, sanitizedBytes])] |
670 | ) |
671 | + |
672 | + |
673 | +class HexHelperTests(unittest.SynchronousTestCase): |
674 | + """ |
675 | + Test the L{http._hexint} and L{http._ishexdigits} helper functions. |
676 | + """ |
677 | + |
678 | + badStrings = (b"", b"0x1234", b"feds", b"-123" b"+123") |
679 | + |
680 | + def test_isHex(self): |
681 | + """ |
682 | + L{_ishexdigits()} returns L{True} for nonempy bytestrings containing |
683 | + hexadecimal digits. |
684 | + """ |
685 | + for s in (b"10", b"abcdef", b"AB1234", b"fed", b"123467890"): |
686 | + self.assertIs(True, http._ishexdigits(s)) |
687 | + |
688 | + def test_decodes(self): |
689 | + """ |
690 | + L{_hexint()} returns the integer equivalent of the input. |
691 | + """ |
692 | + self.assertEqual(10, http._hexint(b"a")) |
693 | + self.assertEqual(0x10, http._hexint(b"10")) |
694 | + self.assertEqual(0xABCD123, http._hexint(b"abCD123")) |
695 | + |
696 | + def test_isNotHex(self): |
697 | + """ |
698 | + L{_ishexdigits()} returns L{False} for bytestrings that don't contain |
699 | + hexadecimal digits, including the empty string. |
700 | + """ |
701 | + for s in self.badStrings: |
702 | + self.assertIs(False, http._ishexdigits(s)) |
703 | + |
704 | + def test_decodeNotHex(self): |
705 | + """ |
706 | + L{_hexint()} raises L{ValueError} for bytestrings that can't |
707 | + be decoded. |
708 | + """ |
709 | + for s in self.badStrings: |
710 | + self.assertRaises(ValueError, http._hexint, s) |
@Colin: Do we have a common procedure for backports? e.g. mark them with a comment?
Any opinion on the cherry-pick? Took me a min to deduct that 10323 is GitHub's issue number in the Twisted project.