Merge lp:~allenap/maas/utf8-before-urlencode into lp:~maas-committers/maas/trunk

Proposed by Gavin Panella
Status: Merged
Approved by: Gavin Panella
Approved revision: no longer in the source branch.
Merged at revision: 1087
Proposed branch: lp:~allenap/maas/utf8-before-urlencode
Merge into: lp:~maas-committers/maas/trunk
Prerequisite: lp:~allenap/maas/urlencode-is-duckist
Diff against target: 103 lines (+47/-1)
3 files modified
src/maascli/api.py (+1/-1)
src/maascli/tests/test_utils.py (+28/-0)
src/maascli/utils.py (+18/-0)
To merge this branch: bzr merge lp:~allenap/maas/utf8-before-urlencode
Reviewer Review Type Date Requested Status
Julian Edwards (community) Approve
Review via email: mp+126587@code.launchpad.net

Commit message

Always UTF-8 encode Unicode strings before URL encoding them.

The standard library's urllib.urlencode is not Unicode safe. This new version is, and is round-trip tested with Django's stated query string handling policy.

To post a comment you must log in.
Revision history for this message
Julian Edwards (julian-edwards) :
review: Approve
Revision history for this message
MAAS Lander (maas-lander) wrote :

No proposals found for merge of lp:~allenap/maas/urlencode-is-duckist into lp:maas.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/maascli/api.py'
2--- src/maascli/api.py 2012-09-27 03:05:24 +0000
3+++ src/maascli/api.py 2012-09-27 03:05:24 +0000
4@@ -18,7 +18,6 @@
5 import httplib
6 import json
7 import sys
8-from urllib import urlencode
9 from urlparse import (
10 urljoin,
11 urlparse,
12@@ -42,6 +41,7 @@
13 handler_command_name,
14 parse_docstring,
15 safe_name,
16+ urlencode,
17 )
18
19
20
21=== modified file 'src/maascli/tests/test_utils.py'
22--- src/maascli/tests/test_utils.py 2012-09-16 21:32:49 +0000
23+++ src/maascli/tests/test_utils.py 2012-09-27 03:05:24 +0000
24@@ -12,8 +12,16 @@
25 __metaclass__ = type
26 __all__ = []
27
28+from urllib import unquote
29+
30+from django.utils.encoding import smart_unicode
31 from maascli import utils
32 from maastesting.testcase import TestCase
33+from testtools.matchers import (
34+ Equals,
35+ IsInstance,
36+ MatchesAll,
37+ )
38
39
40 class TestDocstringParsing(TestCase):
41@@ -187,3 +195,23 @@
42 # string.
43 self.assertIsInstance(utils.ensure_trailing_slash(u"fred"), unicode)
44 self.assertIsInstance(utils.ensure_trailing_slash(b"fred"), bytes)
45+
46+ def test_urlencode_encodes_utf8_and_quotes(self):
47+ # urlencode UTF-8 encodes unicode strings and applies standard query
48+ # string quoting rules, and always returns a byte string.
49+ data = [("=\u1234", "&\u4321")]
50+ query = utils.urlencode(data)
51+ self.assertThat(
52+ query, MatchesAll(
53+ Equals(b"%3D%E1%88%B4=%26%E4%8C%A1"),
54+ IsInstance(bytes)))
55+
56+ def test_urlencode_roundtrip_through_django(self):
57+ # Check that urlencode's approach works with Django, as described on
58+ # https://docs.djangoproject.com/en/dev/ref/unicode/.
59+ data = [("=\u1234", "&\u4321")]
60+ query = utils.urlencode(data)
61+ name, value = query.split(b"=")
62+ name, value = unquote(name), unquote(value)
63+ name, value = smart_unicode(name), smart_unicode(value)
64+ self.assertEqual(data, [(name, value)])
65
66=== modified file 'src/maascli/utils.py'
67--- src/maascli/utils.py 2012-09-16 21:32:49 +0000
68+++ src/maascli/utils.py 2012-09-27 03:05:24 +0000
69@@ -15,12 +15,14 @@
70 "handler_command_name",
71 "parse_docstring",
72 "safe_name",
73+ "urlencode",
74 ]
75
76 from functools import partial
77 from inspect import getdoc
78 import re
79 from textwrap import dedent
80+from urllib import quote_plus
81
82
83 re_paragraph_splitter = re.compile(
84@@ -86,3 +88,19 @@
85 """Ensure that `string` has a trailing forward-slash."""
86 slash = b"/" if isinstance(string, bytes) else u"/"
87 return (string + slash) if not string.endswith(slash) else string
88+
89+
90+def urlencode(data):
91+ """A version of `urllib.urlencode` that isn't insane.
92+
93+ This only cares that `data` is an iterable of iterables. Each sub-iterable
94+ must be of overall length 2, i.e. a name/value pair.
95+
96+ Unicode strings will be encoded to UTF-8. This is what Django expects; see
97+ `smart_text` in the Django documentation.
98+ """
99+ enc = lambda string: quote_plus(
100+ string.encode("utf-8") if isinstance(string, unicode) else string)
101+ return b"&".join(
102+ b"%s=%s" % (enc(name), enc(value))
103+ for name, value in data)