Merge lp:~cjwatson/wadllib/fix-mime-encoding into lp:wadllib
- fix-mime-encoding
- Merge into trunk
Proposed by
Colin Watson
Status: | Merged |
---|---|
Merged at revision: | 34 |
Proposed branch: | lp:~cjwatson/wadllib/fix-mime-encoding |
Merge into: | lp:wadllib |
Diff against target: |
442 lines (+208/-82) 8 files modified
README.txt (+1/-1) setup.py (+0/-8) src/wadllib/NEWS.txt (+10/-0) src/wadllib/README.txt (+58/-11) src/wadllib/application.py (+114/-47) src/wadllib/tests/test_docs.py (+15/-7) tox.ini (+10/-0) versions.cfg (+0/-8) |
To merge this branch: | bzr merge lp:~cjwatson/wadllib/fix-mime-encoding |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Simon Davy (community) | Approve | ||
LAZR Developers | Pending | ||
Review via email: mp+348955@code.launchpad.net |
Commit message
Reimplement MIME multipart/form-data encoding without binary corruption bugs.
Description of the change
I tried to persuade the standard library to do what I wanted, but getting it to work right across all Python versions was too hard.
To post a comment you must log in.
Revision history for this message
Pierre Equoy (pieq) wrote : | # |
- 37. By Colin Watson
-
Test a few more variations of binary data.
Revision history for this message
Colin Watson (cjwatson) wrote : | # |
You just need to check out this branch and install it in a virtualenv using pip.
Anyway, I've tested this myself against a development instance of Launchpad, explicitly testing the example @pieq gave in the linked bug as well as things like files ending with an \r byte, and everything now seems to round-trip successfully.
Revision history for this message
Simon Davy (bloodearnest) wrote : | # |
LGTM, I agree that it's small enough in inline, so to speak.
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'README.txt' |
2 | --- README.txt 2012-01-18 20:55:11 +0000 |
3 | +++ README.txt 2018-07-13 10:07:36 +0000 |
4 | @@ -1,6 +1,6 @@ |
5 | Navigate HTTP resources using WADL files as guides. |
6 | |
7 | -wadllib should work with Python >= 2.4. |
8 | +wadllib should work with Python >= 2.6. |
9 | |
10 | .. |
11 | Copyright (C) 2008-2009 Canonical Ltd. |
12 | |
13 | === modified file 'setup.py' |
14 | --- setup.py 2014-06-24 17:54:15 +0000 |
15 | +++ setup.py 2018-07-13 10:07:36 +0000 |
16 | @@ -44,14 +44,6 @@ |
17 | 'setuptools', |
18 | 'lazr.uri', |
19 | ] |
20 | -if sys.version < '2.5': |
21 | - # This module is in the standard library as of Python 2.5. In Python 3, |
22 | - # it doesn't work when fetched separately. |
23 | - install_requires.append('elementtree') |
24 | -if sys.version < '2.6': |
25 | - # This module is in the standard library as of Python 2.6. In Python 3, |
26 | - # it doesn't work when fetched separately. |
27 | - install_requires.extend('simplejson') |
28 | |
29 | setup( |
30 | name='wadllib', |
31 | |
32 | === modified file 'src/wadllib/NEWS.txt' |
33 | --- src/wadllib/NEWS.txt 2013-02-25 18:53:09 +0000 |
34 | +++ src/wadllib/NEWS.txt 2018-07-13 10:07:36 +0000 |
35 | @@ -2,6 +2,16 @@ |
36 | NEWS for wadllib |
37 | ================ |
38 | |
39 | +1.3.3 |
40 | +===== |
41 | + |
42 | +- Drop support for Python < 2.6. |
43 | +- Add tox testing support. |
44 | +- Implement a subset of MIME multipart/form-data encoding locally rather |
45 | + than using the standard library's email module, which doesn't have good |
46 | + handling of binary parts and corrupts bytes in them that look like line |
47 | + endings in various ways depending on the Python version. [bug=1729754] |
48 | + |
49 | 1.3.2 (2013-02-25) |
50 | ================== |
51 | |
52 | |
53 | === modified file 'src/wadllib/README.txt' |
54 | --- src/wadllib/README.txt 2015-02-11 04:19:08 +0000 |
55 | +++ src/wadllib/README.txt 2018-07-13 10:07:36 +0000 |
56 | @@ -295,10 +295,7 @@ |
57 | to another object. The 'link' property of a parameter lets you examine |
58 | links before following them. |
59 | |
60 | - >>> try: |
61 | - ... import simplejson as json |
62 | - ... except ImportError: |
63 | - ... import json |
64 | + >>> import json |
65 | >>> links_wadl = application_for('links-wadl.xml') |
66 | >>> service_root = links_wadl.get_resource_by_path('') |
67 | >>> representation = json.dumps( |
68 | @@ -568,13 +565,63 @@ |
69 | ... "http://www.example.com/", binary_stream) |
70 | >>> service_root = binary_wadl.get_resource_by_path('') |
71 | |
72 | - >>> method = service_root.get_method('post', 'multipart/form-data') |
73 | - >>> media_type, doc = method.build_representation( |
74 | - ... text_field="text", binary_field="\x01\x02") |
75 | - >>> print(media_type) |
76 | - multipart/form-data; boundary=... |
77 | - >>> b'\x01\x02' in doc |
78 | - True |
79 | +Define a helper that processes the representation the same way |
80 | +zope.publisher would. |
81 | + |
82 | + >>> import cgi |
83 | + >>> import io |
84 | + >>> def assert_message_parts(media_type, doc, expected): |
85 | + ... if sys.version_info[0] == 3 and sys.version_info[1] < 3: |
86 | + ... # We can't do much due to https://bugs.python.org/issue18013. |
87 | + ... for value in expected: |
88 | + ... if not isinstance(value, bytes): |
89 | + ... value = value.encode('UTF-8') |
90 | + ... assert value in doc |
91 | + ... return |
92 | + ... environ = { |
93 | + ... 'REQUEST_METHOD': 'POST', |
94 | + ... 'CONTENT_TYPE': media_type, |
95 | + ... 'CONTENT_LENGTH': str(len(doc)), |
96 | + ... } |
97 | + ... kwargs = ( |
98 | + ... {'encoding': 'UTF-8'} if sys.version_info[0] >= 3 else {}) |
99 | + ... fs = cgi.FieldStorage( |
100 | + ... fp=io.BytesIO(doc), environ=environ, keep_blank_values=1, |
101 | + ... **kwargs) |
102 | + ... values = [] |
103 | + ... def append_values(fields): |
104 | + ... for field in fields: |
105 | + ... if field.list: |
106 | + ... append_values(field.list) |
107 | + ... else: |
108 | + ... values.append(field.value) |
109 | + ... append_values(fs.list) |
110 | + ... assert values == expected, ( |
111 | + ... 'Expected %s, got %s' % (expected, values)) |
112 | + |
113 | + >>> method = service_root.get_method('post', 'multipart/form-data') |
114 | + >>> media_type, doc = method.build_representation( |
115 | + ... text_field="text", binary_field=b"\x01\x02\r\x81\r") |
116 | + >>> print(media_type) |
117 | + multipart/form-data; boundary=... |
118 | + >>> assert_message_parts(media_type, doc, ['text', b'\x01\x02\r\x81\r']) |
119 | + |
120 | + >>> method = service_root.get_method('post', 'multipart/form-data') |
121 | + >>> media_type, doc = method.build_representation( |
122 | + ... text_field="text\n", binary_field=b"\x01\x02\r\x81\n\r") |
123 | + >>> print(media_type) |
124 | + multipart/form-data; boundary=... |
125 | + >>> assert_message_parts( |
126 | + ... media_type, doc, ['text\r\n', b'\x01\x02\r\x81\n\r']) |
127 | + |
128 | + >>> method = service_root.get_method('post', 'multipart/form-data') |
129 | + >>> media_type, doc = method.build_representation( |
130 | + ... text_field="text\r\nmore\r\n", |
131 | + ... binary_field=b"\x01\x02\r\n\x81\r\x82\n") |
132 | + >>> print(media_type) |
133 | + multipart/form-data; boundary=... |
134 | + >>> assert_message_parts( |
135 | + ... media_type, doc, ['text\r\nmore\r\n', b'\x01\x02\r\n\x81\r\x82\n']) |
136 | |
137 | >>> method = service_root.get_method('post', 'text/unknown') |
138 | >>> method.build_representation(field="value") |
139 | |
140 | === modified file 'src/wadllib/application.py' |
141 | --- src/wadllib/application.py 2015-02-11 03:56:31 +0000 |
142 | +++ src/wadllib/application.py 2018-07-13 10:07:36 +0000 |
143 | @@ -1,4 +1,4 @@ |
144 | -# Copyright 2008-2013 Canonical Ltd. All rights reserved. |
145 | +# Copyright 2008-2018 Canonical Ltd. All rights reserved. |
146 | |
147 | # This file is part of wadllib. |
148 | # |
149 | @@ -41,38 +41,20 @@ |
150 | 'WADLError', |
151 | ] |
152 | |
153 | -try: |
154 | - # Make sure to try the Python 2 form first; we don't want to use |
155 | - # io.StringIO in Python 2, because it is strict about only accepting |
156 | - # unicode input. |
157 | - from cStringIO import StringIO as BytesIO |
158 | -except ImportError: |
159 | - from io import BytesIO |
160 | import datetime |
161 | -try: |
162 | - from email.mime.multipart import MIMEMultipart |
163 | - from email.mime.nonmultipart import MIMENonMultipart |
164 | -except ImportError: |
165 | - # We must be on 2.4. |
166 | - from email.MIMEMultipart import MIMEMultipart |
167 | - from email.MIMENonMultipart import MIMENonMultipart |
168 | - |
169 | +from email.utils import quote |
170 | +import io |
171 | +import json |
172 | +import random |
173 | +import re |
174 | +import sys |
175 | import time |
176 | try: |
177 | from urllib.parse import urlencode |
178 | except ImportError: |
179 | from urllib import urlencode |
180 | -try: |
181 | - import simplejson as json |
182 | -except ImportError: |
183 | - import json |
184 | -try: |
185 | - import xml.etree.cElementTree as ET |
186 | -except ImportError: |
187 | - try: |
188 | - import cElementTree as ET |
189 | - except ImportError: |
190 | - import elementtree.ElementTree as ET |
191 | +import xml.etree.cElementTree as ET |
192 | + |
193 | from lazr.uri import URI, merge |
194 | |
195 | from wadllib import ( |
196 | @@ -823,6 +805,105 @@ |
197 | """The media type of the representation described here.""" |
198 | return self.resolve_definition().tag.attrib['mediaType'] |
199 | |
200 | + def _make_boundary(self, all_parts): |
201 | + """Make a random boundary that does not appear in `all_parts`.""" |
202 | + _width = len(repr(sys.maxsize - 1)) |
203 | + _fmt = '%%0%dd' % _width |
204 | + token = random.randrange(sys.maxsize) |
205 | + boundary = ('=' * 15) + (_fmt % token) + '==' |
206 | + if all_parts is None: |
207 | + return boundary |
208 | + b = boundary |
209 | + counter = 0 |
210 | + while True: |
211 | + pattern = ('^--' + re.escape(b) + '(--)?$').encode('ascii') |
212 | + if not re.search(pattern, all_parts, flags=re.MULTILINE): |
213 | + break |
214 | + b = boundary + '.' + str(counter) |
215 | + counter += 1 |
216 | + return b |
217 | + |
218 | + def _write_headers(self, buf, headers): |
219 | + """Write MIME headers to a file object.""" |
220 | + for key, value in headers: |
221 | + buf.write(key.encode('UTF-8')) |
222 | + buf.write(b': ') |
223 | + buf.write(value.encode('UTF-8')) |
224 | + buf.write(b'\r\n') |
225 | + buf.write(b'\r\n') |
226 | + |
227 | + def _write_boundary(self, buf, boundary, closing=False): |
228 | + """Write a multipart boundary to a file object.""" |
229 | + buf.write(b'--') |
230 | + buf.write(boundary.encode('UTF-8')) |
231 | + if closing: |
232 | + buf.write(b'--') |
233 | + buf.write(b'\r\n') |
234 | + |
235 | + def _generate_multipart_form(self, parts): |
236 | + """Generate a multipart/form-data message. |
237 | + |
238 | + This is very loosely based on the email module in the Python standard |
239 | + library. However, that module doesn't really support directly embedding |
240 | + binary data in a form: various versions of Python have mangled line |
241 | + separators in different ways, and none of them get it quite right. |
242 | + Since we only need a tiny subset of MIME here, it's easier to implement |
243 | + it ourselves. |
244 | + |
245 | + :return: a tuple of two elements: the Content-Type of the message, and |
246 | + the entire encoded message as a byte string. |
247 | + """ |
248 | + # Generate the subparts first so that we can calculate a safe boundary. |
249 | + encoded_parts = [] |
250 | + for is_binary, name, value in parts: |
251 | + buf = io.BytesIO() |
252 | + if is_binary: |
253 | + ctype = 'application/octet-stream' |
254 | + # RFC 7578 says that the filename parameter isn't mandatory |
255 | + # in our case, but without it cgi.FieldStorage tries to |
256 | + # decode as text on Python 3. |
257 | + cdisp = 'form-data; name="%s"; filename="%s"' % ( |
258 | + quote(name), quote(name)) |
259 | + else: |
260 | + ctype = 'text/plain; charset="utf-8"' |
261 | + cdisp = 'form-data; name="%s"' % quote(name) |
262 | + self._write_headers(buf, [ |
263 | + ('MIME-Version', '1.0'), |
264 | + ('Content-Type', ctype), |
265 | + ('Content-Disposition', cdisp), |
266 | + ]) |
267 | + if is_binary: |
268 | + if not isinstance(value, bytes): |
269 | + raise TypeError('bytes payload expected: %s' % type(value)) |
270 | + buf.write(value) |
271 | + else: |
272 | + if not isinstance(value, str): |
273 | + raise TypeError('str payload expected: %s' % type(value)) |
274 | + lines = re.split(r'\r\n|\r|\n', value) |
275 | + for line in lines[:-1]: |
276 | + buf.write(line.encode('UTF-8')) |
277 | + buf.write(b'\r\n') |
278 | + buf.write(lines[-1].encode('UTF-8')) |
279 | + encoded_parts.append(buf.getvalue()) |
280 | + |
281 | + # Create a suitable boundary. |
282 | + boundary = self._make_boundary(b'\r\n'.join(encoded_parts)) |
283 | + |
284 | + # Now we can write the multipart headers, followed by all the parts. |
285 | + buf = io.BytesIO() |
286 | + ctype = 'multipart/form-data; boundary="%s"' % quote(boundary) |
287 | + self._write_headers(buf, [ |
288 | + ('MIME-Version', '1.0'), |
289 | + ('Content-Type', ctype), |
290 | + ]) |
291 | + for encoded_part in encoded_parts: |
292 | + self._write_boundary(buf, boundary) |
293 | + buf.write(encoded_part) |
294 | + buf.write(b'\r\n') |
295 | + self._write_boundary(buf, boundary, closing=True) |
296 | + |
297 | + return ctype, buf.getvalue() |
298 | + |
299 | def bind(self, param_values, **kw_param_values): |
300 | """Bind the definition to parameter values, creating a document. |
301 | |
302 | @@ -836,29 +917,13 @@ |
303 | if media_type == 'application/x-www-form-urlencoded': |
304 | doc = urlencode(sorted(validated_values.items())) |
305 | elif media_type == 'multipart/form-data': |
306 | - outer = MIMEMultipart() |
307 | - outer.set_type('multipart/form-data') |
308 | + parts = [] |
309 | missing = object() |
310 | for param in params: |
311 | value = validated_values.get(param.name, missing) |
312 | if value is not missing: |
313 | - if param.type == 'binary': |
314 | - maintype, subtype = 'application', 'octet-stream' |
315 | - params = {} |
316 | - else: |
317 | - maintype, subtype = 'text', 'plain' |
318 | - params = {'charset' : 'utf-8'} |
319 | - inner = MIMENonMultipart(maintype, subtype, **params) |
320 | - inner.set_payload(value) |
321 | - inner['Content-Disposition'] = ( |
322 | - 'form-data; name="%s"' % param.name) |
323 | - outer.attach(inner) |
324 | - if hasattr(outer, "as_bytes"): |
325 | - doc = outer.as_bytes() |
326 | - else: |
327 | - doc = outer.as_string(unixfrom=False) |
328 | - media_type = (outer.get_content_type() + |
329 | - '; boundary="%s"' % outer.get_boundary()) |
330 | + parts.append((param.type == 'binary', param.name, value)) |
331 | + media_type, doc = self._generate_multipart_form(parts) |
332 | elif media_type == 'application/json': |
333 | doc = json.dumps(validated_values) |
334 | else: |
335 | @@ -1104,7 +1169,9 @@ |
336 | |
337 | def _from_string(self, markup): |
338 | """Turns markup into a document.""" |
339 | - return self._from_stream(BytesIO(markup)) |
340 | + if not isinstance(markup, bytes): |
341 | + markup = markup.encode("UTF-8") |
342 | + return self._from_stream(io.BytesIO(markup)) |
343 | |
344 | def get_resource_type(self, resource_type_url): |
345 | """Retrieve a resource type by the URL of its description.""" |
346 | |
347 | === modified file 'src/wadllib/tests/test_docs.py' |
348 | --- src/wadllib/tests/test_docs.py 2009-03-20 20:30:07 +0000 |
349 | +++ src/wadllib/tests/test_docs.py 2018-07-13 10:07:36 +0000 |
350 | @@ -1,4 +1,4 @@ |
351 | -# Copyright 2009 Canonical Ltd. All rights reserved. |
352 | +# Copyright 2009-2018 Canonical Ltd. All rights reserved. |
353 | # |
354 | # This file is part of wadllib |
355 | # |
356 | @@ -16,16 +16,20 @@ |
357 | |
358 | """Test harness.""" |
359 | |
360 | +from __future__ import absolute_import, print_function |
361 | + |
362 | __metaclass__ = type |
363 | __all__ = [ |
364 | - 'additional_tests', |
365 | + 'load_tests', |
366 | ] |
367 | |
368 | +import __future__ |
369 | import atexit |
370 | import doctest |
371 | import os |
372 | + |
373 | import pkg_resources |
374 | -import unittest |
375 | + |
376 | |
377 | DOCTEST_FLAGS = ( |
378 | doctest.ELLIPSIS | |
379 | @@ -33,7 +37,7 @@ |
380 | doctest.REPORT_NDIFF) |
381 | |
382 | |
383 | -def additional_tests(): |
384 | +def load_tests(loader, tests, pattern): |
385 | doctest_files = [ |
386 | os.path.abspath( |
387 | pkg_resources.resource_filename('wadllib', 'README.txt'))] |
388 | @@ -44,7 +48,11 @@ |
389 | os.path.abspath( |
390 | pkg_resources.resource_filename( |
391 | 'wadllib', 'docs/%s' % name))) |
392 | - kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) |
393 | + globs = { |
394 | + future_item: getattr(__future__, future_item) |
395 | + for future_item in ('absolute_import', 'print_function')} |
396 | + kwargs = dict( |
397 | + module_relative=False, globs=globs, optionflags=DOCTEST_FLAGS) |
398 | atexit.register(pkg_resources.cleanup_resources) |
399 | - return unittest.TestSuite(( |
400 | - doctest.DocFileSuite(*doctest_files, **kwargs))) |
401 | + tests.addTest(doctest.DocFileSuite(*doctest_files, **kwargs)) |
402 | + return tests |
403 | |
404 | === added file 'tox.ini' |
405 | --- tox.ini 1970-01-01 00:00:00 +0000 |
406 | +++ tox.ini 2018-07-13 10:07:36 +0000 |
407 | @@ -0,0 +1,10 @@ |
408 | +[tox] |
409 | +envlist = |
410 | + py27,py32,py33,py34,py35,py36,py37 |
411 | + |
412 | +[testenv] |
413 | +commands = |
414 | + zope-testrunner --test-path src --tests-pattern ^tests {posargs} |
415 | +deps = |
416 | + .[test] |
417 | + zope-testrunner |
418 | |
419 | === modified file 'versions.cfg' |
420 | --- versions.cfg 2011-11-25 19:27:47 +0000 |
421 | +++ versions.cfg 2018-07-13 10:07:36 +0000 |
422 | @@ -26,10 +26,6 @@ |
423 | |
424 | # Required by: |
425 | # wadllib==1.2.0 |
426 | -elementtree = 1.2.6-20050316 |
427 | - |
428 | -# Required by: |
429 | -# wadllib==1.2.0 |
430 | lazr.uri = 1.0.2 |
431 | |
432 | # Required by: |
433 | @@ -46,10 +42,6 @@ |
434 | setuptools = 0.6c11 |
435 | |
436 | # Required by: |
437 | -# wadllib==1.2.0 |
438 | -simplejson = 2.2.1 |
439 | - |
440 | -# Required by: |
441 | # z3c.recipe.sphinxdoc==0.0.8 |
442 | zc.buildout = 1.5.2 |
443 |
Hi,
is there an easy way to test this change to upload a binary attachment to the Staging instance of Launchpad?