Merge lp:~allenap/maas/utf8-before-urlencode into lp:~maas-committers/maas/trunk

Proposed by Gavin Panella
Status: Merged
Approved by: Gavin Panella
Approved revision: no longer in the source branch.
Merged at revision: 1087
Proposed branch: lp:~allenap/maas/utf8-before-urlencode
Merge into: lp:~maas-committers/maas/trunk
Prerequisite: lp:~allenap/maas/urlencode-is-duckist
Diff against target: 103 lines (+47/-1)
3 files modified
src/maascli/api.py (+1/-1)
src/maascli/tests/test_utils.py (+28/-0)
src/maascli/utils.py (+18/-0)
To merge this branch: bzr merge lp:~allenap/maas/utf8-before-urlencode
Reviewer Review Type Date Requested Status
Julian Edwards (community) Approve
Review via email: mp+126587@code.launchpad.net

Commit message

Always UTF-8 encode Unicode strings before URL encoding them.

The standard library's urllib.urlencode is not Unicode safe. This new version is, and is round-trip tested with Django's stated query string handling policy.

To post a comment you must log in.
Revision history for this message
Julian Edwards (julian-edwards) :
review: Approve
Revision history for this message
MAAS Lander (maas-lander) wrote :

No proposals found for merge of lp:~allenap/maas/urlencode-is-duckist into lp:maas.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'src/maascli/api.py'
--- src/maascli/api.py 2012-09-27 03:05:24 +0000
+++ src/maascli/api.py 2012-09-27 03:05:24 +0000
@@ -18,7 +18,6 @@
18import httplib18import httplib
19import json19import json
20import sys20import sys
21from urllib import urlencode
22from urlparse import (21from urlparse import (
23 urljoin,22 urljoin,
24 urlparse,23 urlparse,
@@ -42,6 +41,7 @@
42 handler_command_name,41 handler_command_name,
43 parse_docstring,42 parse_docstring,
44 safe_name,43 safe_name,
44 urlencode,
45 )45 )
4646
4747
4848
=== modified file 'src/maascli/tests/test_utils.py'
--- src/maascli/tests/test_utils.py 2012-09-16 21:32:49 +0000
+++ src/maascli/tests/test_utils.py 2012-09-27 03:05:24 +0000
@@ -12,8 +12,16 @@
12__metaclass__ = type12__metaclass__ = type
13__all__ = []13__all__ = []
1414
15from urllib import unquote
16
17from django.utils.encoding import smart_unicode
15from maascli import utils18from maascli import utils
16from maastesting.testcase import TestCase19from maastesting.testcase import TestCase
20from testtools.matchers import (
21 Equals,
22 IsInstance,
23 MatchesAll,
24 )
1725
1826
19class TestDocstringParsing(TestCase):27class TestDocstringParsing(TestCase):
@@ -187,3 +195,23 @@
187 # string.195 # string.
188 self.assertIsInstance(utils.ensure_trailing_slash(u"fred"), unicode)196 self.assertIsInstance(utils.ensure_trailing_slash(u"fred"), unicode)
189 self.assertIsInstance(utils.ensure_trailing_slash(b"fred"), bytes)197 self.assertIsInstance(utils.ensure_trailing_slash(b"fred"), bytes)
198
199 def test_urlencode_encodes_utf8_and_quotes(self):
200 # urlencode UTF-8 encodes unicode strings and applies standard query
201 # string quoting rules, and always returns a byte string.
202 data = [("=\u1234", "&\u4321")]
203 query = utils.urlencode(data)
204 self.assertThat(
205 query, MatchesAll(
206 Equals(b"%3D%E1%88%B4=%26%E4%8C%A1"),
207 IsInstance(bytes)))
208
209 def test_urlencode_roundtrip_through_django(self):
210 # Check that urlencode's approach works with Django, as described on
211 # https://docs.djangoproject.com/en/dev/ref/unicode/.
212 data = [("=\u1234", "&\u4321")]
213 query = utils.urlencode(data)
214 name, value = query.split(b"=")
215 name, value = unquote(name), unquote(value)
216 name, value = smart_unicode(name), smart_unicode(value)
217 self.assertEqual(data, [(name, value)])
190218
=== modified file 'src/maascli/utils.py'
--- src/maascli/utils.py 2012-09-16 21:32:49 +0000
+++ src/maascli/utils.py 2012-09-27 03:05:24 +0000
@@ -15,12 +15,14 @@
15 "handler_command_name",15 "handler_command_name",
16 "parse_docstring",16 "parse_docstring",
17 "safe_name",17 "safe_name",
18 "urlencode",
18 ]19 ]
1920
20from functools import partial21from functools import partial
21from inspect import getdoc22from inspect import getdoc
22import re23import re
23from textwrap import dedent24from textwrap import dedent
25from urllib import quote_plus
2426
2527
26re_paragraph_splitter = re.compile(28re_paragraph_splitter = re.compile(
@@ -86,3 +88,19 @@
86 """Ensure that `string` has a trailing forward-slash."""88 """Ensure that `string` has a trailing forward-slash."""
87 slash = b"/" if isinstance(string, bytes) else u"/"89 slash = b"/" if isinstance(string, bytes) else u"/"
88 return (string + slash) if not string.endswith(slash) else string90 return (string + slash) if not string.endswith(slash) else string
91
92
93def urlencode(data):
94 """A version of `urllib.urlencode` that isn't insane.
95
96 This only cares that `data` is an iterable of iterables. Each sub-iterable
97 must be of overall length 2, i.e. a name/value pair.
98
99 Unicode strings will be encoded to UTF-8. This is what Django expects; see
100 `smart_text` in the Django documentation.
101 """
102 enc = lambda string: quote_plus(
103 string.encode("utf-8") if isinstance(string, unicode) else string)
104 return b"&".join(
105 b"%s=%s" % (enc(name), enc(value))
106 for name, value in data)