Merge lp:~jelmer/bzr/urlutils-quote into lp:bzr

Proposed by Jelmer Vernooij
Status: Merged
Approved by: Jelmer Vernooij
Approved revision: no longer in the source branch.
Merged at revision: 6383
Proposed branch: lp:~jelmer/bzr/urlutils-quote
Merge into: lp:bzr
Diff against target: 538 lines (+136/-47)
15 files modified
bzrlib/mail_client.py (+5/-6)
bzrlib/plugins/launchpad/lp_registration.py (+2/-2)
bzrlib/plugins/weave_fmt/repository.py (+2/-3)
bzrlib/smart/medium.py (+1/-2)
bzrlib/tests/http_server.py (+2/-2)
bzrlib/tests/test_ftp_transport.py (+3/-3)
bzrlib/tests/test_import_tariff.py (+3/-0)
bzrlib/tests/test_mail_client.py (+1/-3)
bzrlib/tests/test_permissions.py (+3/-4)
bzrlib/tests/test_urlutils.py (+17/-0)
bzrlib/transport/__init__.py (+2/-3)
bzrlib/transport/http/__init__.py (+3/-4)
bzrlib/urlutils.py (+82/-9)
bzrlib/versionedfile.py (+5/-6)
doc/en/release-notes/bzr-2.5.txt (+5/-0)
To merge this branch: bzr merge lp:~jelmer/bzr/urlutils-quote
Reviewer Review Type Date Requested Status
Martin Pool Approve
Review via email: mp+86177@code.launchpad.net

Commit message

Include quote() and unquote() directly in bzrlib.urlutils.

Description of the change

Include quote() and unquote() directly in bzrlib.urlutils.

This avoids having to import urllib for non-http operations. urllib imports
socket, which import ssl. This adds overhead to the bzr startup time
for local operations.

To post a comment you must log in.
Revision history for this message
Martin Pool (mbp) wrote :

very nice. thanks for adding an import tariff.

  vote approve

review: Approve
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

sent to pqm by email

Revision history for this message
Jelmer Vernooij (jelmer) wrote :

sent to pqm by email

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bzrlib/mail_client.py'
2--- bzrlib/mail_client.py 2010-04-23 08:51:52 +0000
3+++ bzrlib/mail_client.py 2011-12-19 01:33:35 +0000
4@@ -19,7 +19,6 @@
5 import subprocess
6 import sys
7 import tempfile
8-import urllib
9
10 import bzrlib
11 from bzrlib import (
12@@ -310,7 +309,7 @@
13 message_options['attachment'] = urlutils.local_path_to_url(
14 attach_path)
15 if body is not None:
16- options_list = ['body=%s' % urllib.quote(self._encode_safe(body))]
17+ options_list = ['body=%s' % urlutils.quote(self._encode_safe(body))]
18 else:
19 options_list = []
20 options_list.extend(["%s='%s'" % (k, v) for k, v in
21@@ -352,15 +351,15 @@
22 """See ExternalMailClient._get_compose_commandline"""
23 compose_url = []
24 if from_ is not None:
25- compose_url.append('from=' + urllib.quote(from_))
26+ compose_url.append('from=' + urlutils.quote(from_))
27 if subject is not None:
28- # Don't use urllib.quote_plus because Claws doesn't seem
29+ # Don't use urlutils.quote_plus because Claws doesn't seem
30 # to recognise spaces encoded as "+".
31 compose_url.append(
32- 'subject=' + urllib.quote(self._encode_safe(subject)))
33+ 'subject=' + urlutils.quote(self._encode_safe(subject)))
34 if body is not None:
35 compose_url.append(
36- 'body=' + urllib.quote(self._encode_safe(body)))
37+ 'body=' + urlutils.quote(self._encode_safe(body)))
38 # to must be supplied for the claws-mail --compose syntax to work.
39 if to is None:
40 raise errors.NoMailAddressSpecified()
41
42=== modified file 'bzrlib/plugins/launchpad/lp_registration.py'
43--- bzrlib/plugins/launchpad/lp_registration.py 2011-03-24 11:41:42 +0000
44+++ bzrlib/plugins/launchpad/lp_registration.py 2011-12-19 01:33:35 +0000
45@@ -166,8 +166,8 @@
46 # the url? perhaps a bit more secure against accidentally
47 # revealing it. std66 s3.2.1 discourages putting the
48 # password in the url.
49- hostinfo = '%s:%s@%s' % (urllib.quote(self.registrant_email),
50- urllib.quote(self.registrant_password),
51+ hostinfo = '%s:%s@%s' % (urlutils.quote(self.registrant_email),
52+ urlutils.quote(self.registrant_password),
53 hostinfo)
54 url = urlunsplit((scheme, hostinfo, path, '', ''))
55 else:
56
57=== modified file 'bzrlib/plugins/weave_fmt/repository.py'
58--- bzrlib/plugins/weave_fmt/repository.py 2011-12-07 14:03:01 +0000
59+++ bzrlib/plugins/weave_fmt/repository.py 2011-12-19 01:33:35 +0000
60@@ -23,7 +23,6 @@
61 import gzip
62 import os
63 from cStringIO import StringIO
64-import urllib
65
66 from bzrlib.lazy_import import lazy_import
67 lazy_import(globals(), """
68@@ -708,7 +707,7 @@
69 raise errors.ObjectNotLocked(self)
70 relpaths = set()
71 for quoted_relpath in self._transport.iter_files_recursive():
72- relpath = urllib.unquote(quoted_relpath)
73+ relpath = urlutils.unquote(quoted_relpath)
74 path, ext = os.path.splitext(relpath)
75 if ext == '.gz':
76 relpath = path
77@@ -748,7 +747,7 @@
78 raise errors.ObjectNotLocked(self)
79 relpaths = set()
80 for quoted_relpath in self._transport.iter_files_recursive():
81- relpath = urllib.unquote(quoted_relpath)
82+ relpath = urlutils.unquote(quoted_relpath)
83 path, ext = os.path.splitext(relpath)
84 if ext == '.gz':
85 relpath = path
86
87=== modified file 'bzrlib/smart/medium.py'
88--- bzrlib/smart/medium.py 2011-11-25 17:54:52 +0000
89+++ bzrlib/smart/medium.py 2011-12-19 01:33:35 +0000
90@@ -28,7 +28,6 @@
91 import os
92 import sys
93 import time
94-import urllib
95
96 import bzrlib
97 from bzrlib.lazy_import import lazy_import
98@@ -840,7 +839,7 @@
99 """
100 medium_base = urlutils.join(self.base, '/')
101 rel_url = urlutils.relative_url(medium_base, transport.base)
102- return urllib.unquote(rel_url)
103+ return urlutils.unquote(rel_url)
104
105
106 class SmartClientStreamMedium(SmartClientMedium):
107
108=== modified file 'bzrlib/tests/http_server.py'
109--- bzrlib/tests/http_server.py 2011-03-08 16:00:55 +0000
110+++ bzrlib/tests/http_server.py 2011-12-19 01:33:35 +0000
111@@ -22,9 +22,9 @@
112 import re
113 import SimpleHTTPServer
114 import socket
115-import urllib
116 import urlparse
117
118+from bzrlib import urlutils
119 from bzrlib.tests import test_server
120
121
122@@ -336,7 +336,7 @@
123 """
124 # abandon query parameters
125 path = urlparse.urlparse(path)[2]
126- path = posixpath.normpath(urllib.unquote(path))
127+ path = posixpath.normpath(urlutils.unquote(path))
128 path = path.decode('utf-8')
129 words = path.split('/')
130 words = filter(None, words)
131
132=== modified file 'bzrlib/tests/test_ftp_transport.py'
133--- bzrlib/tests/test_ftp_transport.py 2011-08-19 22:34:02 +0000
134+++ bzrlib/tests/test_ftp_transport.py 2011-12-19 01:33:35 +0000
135@@ -16,7 +16,6 @@
136
137 import ftplib
138 import getpass
139-import urllib
140
141 from bzrlib import (
142 config,
143@@ -24,6 +23,7 @@
144 tests,
145 transport,
146 ui,
147+ urlutils,
148 )
149
150 from bzrlib.transport import ftp
151@@ -85,9 +85,9 @@
152 parsed_url = transport.ConnectedTransport._split_url(base)
153 new_url = parsed_url.clone()
154 new_url.user = self.user
155- new_url.quoted_user = urllib.quote(self.user)
156+ new_url.quoted_user = urlutils.quote(self.user)
157 new_url.password = self.password
158- new_url.quoted_password = urllib.quote(self.password)
159+ new_url.quoted_password = urlutils.quote(self.password)
160 return str(new_url)
161
162 def test_no_prompt_for_username(self):
163
164=== modified file 'bzrlib/tests/test_import_tariff.py'
165--- bzrlib/tests/test_import_tariff.py 2011-12-18 15:49:48 +0000
166+++ bzrlib/tests/test_import_tariff.py 2011-12-19 01:33:35 +0000
167@@ -190,11 +190,14 @@
168 'bzrlib.xml8',
169 'getpass',
170 'kerberos',
171+ 'ssl',
172+ 'socket',
173 'smtplib',
174 'tarfile',
175 'tempfile',
176 'termios',
177 'tty',
178+ 'urllib',
179 ] + old_format_modules)
180 # TODO: similar test for repository-only operations, checking we avoid
181 # loading wt-specific stuff
182
183=== modified file 'bzrlib/tests/test_mail_client.py'
184--- bzrlib/tests/test_mail_client.py 2009-09-02 08:26:27 +0000
185+++ bzrlib/tests/test_mail_client.py 2011-12-19 01:33:35 +0000
186@@ -14,8 +14,6 @@
187 # along with this program; if not, write to the Free Software
188 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
189
190-import urllib
191-
192 from bzrlib import (
193 errors,
194 mail_client,
195@@ -212,7 +210,7 @@
196 claws = mail_client.Claws(None)
197 cmdline = claws._get_compose_commandline(
198 u'jrandom@example.org', u'\xb5cosm of fun!', u'file%')
199- subject_string = urllib.quote(
200+ subject_string = urlutils.quote(
201 u'\xb5cosm of fun!'.encode(osutils.get_user_encoding(), 'replace'))
202 self.assertEqual(
203 ['--compose',
204
205=== modified file 'bzrlib/tests/test_permissions.py'
206--- bzrlib/tests/test_permissions.py 2011-02-25 15:24:22 +0000
207+++ bzrlib/tests/test_permissions.py 2011-12-19 01:33:35 +0000
208@@ -32,9 +32,8 @@
209
210 import os
211 import sys
212-import urllib
213
214-from bzrlib import transport
215+from bzrlib import urlutils
216 from bzrlib.branch import Branch
217 from bzrlib.bzrdir import BzrDir
218 from bzrlib.tests import TestCaseWithTransport, TestSkipped
219@@ -68,11 +67,11 @@
220 test.assertTransportMode(t, base, dir_mode)
221 for root, dirs, files in os.walk(base):
222 for d in dirs:
223- p = '/'.join([urllib.quote(x) for x in root.split('/\\') + [d]])
224+ p = '/'.join([urlutils.quote(x) for x in root.split('/\\') + [d]])
225 test.assertTransportMode(t, p, dir_mode)
226 for f in files:
227 p = os.path.join(root, f)
228- p = '/'.join([urllib.quote(x) for x in root.split('/\\') + [f]])
229+ p = '/'.join([urlutils.quote(x) for x in root.split('/\\') + [f]])
230 test.assertTransportMode(t, p, file_mode)
231
232
233
234=== modified file 'bzrlib/tests/test_urlutils.py'
235--- bzrlib/tests/test_urlutils.py 2011-11-24 13:15:51 +0000
236+++ bzrlib/tests/test_urlutils.py 2011-12-19 01:33:35 +0000
237@@ -1026,3 +1026,20 @@
238 urlutils.file_relpath, "file:///A:/b", "file:///A:/")
239 self.assertRaises(PathNotChild,
240 urlutils.file_relpath, "file:///A:/b/c", "file:///A:/b")
241+
242+
243+class QuoteTests(TestCase):
244+
245+ def test_quote(self):
246+ self.assertEqual('abc%20def', urlutils.quote('abc def'))
247+ self.assertEqual('abc%2Fdef', urlutils.quote('abc/def', safe=''))
248+ self.assertEqual('abc/def', urlutils.quote('abc/def', safe='/'))
249+
250+ def test_quote_tildes(self):
251+ self.assertEqual('%7Efoo', urlutils.quote('~foo'))
252+ self.assertEqual('~foo', urlutils.quote('~foo', safe='/~'))
253+
254+ def test_unquote(self):
255+ self.assertEqual('%', urlutils.unquote('%25'))
256+ self.assertEqual('\xc3\xa5', urlutils.unquote('%C3%A5'))
257+ self.assertEqual(u"\xe5", urlutils.unquote(u'\xe5'))
258
259=== modified file 'bzrlib/transport/__init__.py'
260--- bzrlib/transport/__init__.py 2011-11-25 17:49:44 +0000
261+++ bzrlib/transport/__init__.py 2011-12-19 01:33:35 +0000
262@@ -33,7 +33,6 @@
263 lazy_import(globals(), """
264 import errno
265 from stat import S_ISDIR
266-import urllib
267 import urlparse
268
269 from bzrlib import (
270@@ -1415,12 +1414,12 @@
271
272 :return: The corresponding URL.
273 """
274- netloc = urllib.quote(host)
275+ netloc = urlutils.quote(host)
276 if user is not None:
277 # Note that we don't put the password back even if we
278 # have one so that it doesn't get accidentally
279 # exposed.
280- netloc = '%s@%s' % (urllib.quote(user), netloc)
281+ netloc = '%s@%s' % (urlutils.quote(user), netloc)
282 if port is not None:
283 netloc = '%s:%d' % (netloc, port)
284 path = urlutils.escape(path)
285
286=== modified file 'bzrlib/transport/http/__init__.py'
287--- bzrlib/transport/http/__init__.py 2011-09-19 13:02:42 +0000
288+++ bzrlib/transport/http/__init__.py 2011-12-19 01:33:35 +0000
289@@ -22,7 +22,6 @@
290 from cStringIO import StringIO
291 import re
292 import urlparse
293-import urllib
294 import sys
295 import weakref
296
297@@ -63,9 +62,9 @@
298 host = netloc.split(':', 1)[0]
299 else:
300 host = netloc
301- username = urllib.unquote(username)
302+ username = urlutils.unquote(username)
303 if password is not None:
304- password = urllib.unquote(password)
305+ password = urlutils.unquote(password)
306 else:
307 password = ui.ui_factory.get_password(
308 prompt=u'HTTP %(user)s@%(host)s password',
309@@ -589,7 +588,7 @@
310 if transport_base.startswith('bzr+'):
311 transport_base = transport_base[4:]
312 rel_url = urlutils.relative_url(self.base, transport_base)
313- return urllib.unquote(rel_url)
314+ return urlutils.unquote(rel_url)
315
316 def send_http_smart_request(self, bytes):
317 try:
318
319=== modified file 'bzrlib/urlutils.py'
320--- bzrlib/urlutils.py 2011-11-24 13:15:51 +0000
321+++ bzrlib/urlutils.py 2011-12-19 01:33:35 +0000
322@@ -23,7 +23,6 @@
323 from bzrlib.lazy_import import lazy_import
324 lazy_import(globals(), """
325 from posixpath import split as _posix_split
326-import urllib
327 import urlparse
328
329 from bzrlib import (
330@@ -60,13 +59,87 @@
331 return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
332
333
334+# Private copies of quote and unquote, copied from Python's
335+# urllib module because urllib unconditionally imports socket, which imports
336+# ssl.
337+
338+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
339+ 'abcdefghijklmnopqrstuvwxyz'
340+ '0123456789' '_.-')
341+_safe_map = {}
342+for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
343+ _safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)
344+_safe_quoters = {}
345+
346+
347+def quote(s, safe='/'):
348+ """quote('abc def') -> 'abc%20def'
349+
350+ Each part of a URL, e.g. the path info, the query, etc., has a
351+ different set of reserved characters that must be quoted.
352+
353+ RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
354+ the following reserved characters.
355+
356+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
357+ "$" | ","
358+
359+ Each of these characters is reserved in some component of a URL,
360+ but not necessarily in all of them.
361+
362+ By default, the quote function is intended for quoting the path
363+ section of a URL. Thus, it will not encode '/'. This character
364+ is reserved, but in typical usage the quote function is being
365+ called on a path where the existing slash characters are used as
366+ reserved characters.
367+ """
368+ # fastpath
369+ if not s:
370+ if s is None:
371+ raise TypeError('None object cannot be quoted')
372+ return s
373+ cachekey = (safe, always_safe)
374+ try:
375+ (quoter, safe) = _safe_quoters[cachekey]
376+ except KeyError:
377+ safe_map = _safe_map.copy()
378+ safe_map.update([(c, c) for c in safe])
379+ quoter = safe_map.__getitem__
380+ safe = always_safe + safe
381+ _safe_quoters[cachekey] = (quoter, safe)
382+ if not s.rstrip(safe):
383+ return s
384+ return ''.join(map(quoter, s))
385+
386+
387+_hexdig = '0123456789ABCDEFabcdef'
388+_hextochr = dict((a + b, chr(int(a + b, 16)))
389+ for a in _hexdig for b in _hexdig)
390+
391+def unquote(s):
392+ """unquote('abc%20def') -> 'abc def'."""
393+ res = s.split('%')
394+ # fastpath
395+ if len(res) == 1:
396+ return s
397+ s = res[0]
398+ for item in res[1:]:
399+ try:
400+ s += _hextochr[item[:2]] + item[2:]
401+ except KeyError:
402+ s += '%' + item
403+ except UnicodeDecodeError:
404+ s += unichr(int(item[:2], 16)) + item[2:]
405+ return s
406+
407+
408 def escape(relpath):
409 """Escape relpath to be a valid url."""
410 if isinstance(relpath, unicode):
411 relpath = relpath.encode('utf-8')
412 # After quoting and encoding, the path should be perfectly
413 # safe as a plain ASCII string, str() just enforces this
414- return str(urllib.quote(relpath, safe='/~'))
415+ return str(quote(relpath, safe='/~'))
416
417
418 def file_relpath(base, path):
419@@ -566,7 +639,7 @@
420 This returns a Unicode path from a URL
421 """
422 # jam 20060427 URLs are supposed to be ASCII only strings
423- # If they are passed in as unicode, urllib.unquote
424+ # If they are passed in as unicode, unquote
425 # will return a UNICODE string, which actually contains
426 # utf-8 bytes. So we have to ensure that they are
427 # plain ASCII strings, or the final .decode will
428@@ -577,7 +650,7 @@
429 except UnicodeError, e:
430 raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
431
432- unquoted = urllib.unquote(url)
433+ unquoted = unquote(url)
434 try:
435 unicode_path = unquoted.decode('utf-8')
436 except UnicodeError, e:
437@@ -742,20 +815,20 @@
438 port, quoted_path):
439 self.scheme = scheme
440 self.quoted_host = quoted_host
441- self.host = urllib.unquote(self.quoted_host)
442+ self.host = unquote(self.quoted_host)
443 self.quoted_user = quoted_user
444 if self.quoted_user is not None:
445- self.user = urllib.unquote(self.quoted_user)
446+ self.user = unquote(self.quoted_user)
447 else:
448 self.user = None
449 self.quoted_password = quoted_password
450 if self.quoted_password is not None:
451- self.password = urllib.unquote(self.quoted_password)
452+ self.password = unquote(self.quoted_password)
453 else:
454 self.password = None
455 self.port = port
456 self.quoted_path = _url_hex_escapes_re.sub(_unescape_safe_chars, quoted_path)
457- self.path = urllib.unquote(self.quoted_path)
458+ self.path = unquote(self.quoted_path)
459
460 def __eq__(self, other):
461 return (isinstance(other, self.__class__) and
462@@ -871,7 +944,7 @@
463 if offset is not None:
464 relative = unescape(offset).encode('utf-8')
465 path = self._combine_paths(self.path, relative)
466- path = urllib.quote(path, safe="/~")
467+ path = quote(path, safe="/~")
468 else:
469 path = self.quoted_path
470 return self.__class__(self.scheme, self.quoted_user,
471
472=== modified file 'bzrlib/versionedfile.py'
473--- bzrlib/versionedfile.py 2011-05-16 10:08:01 +0000
474+++ bzrlib/versionedfile.py 2011-12-19 01:33:35 +0000
475@@ -24,8 +24,6 @@
476
477 from bzrlib.lazy_import import lazy_import
478 lazy_import(globals(), """
479-import urllib
480-
481 from bzrlib import (
482 annotate,
483 bencode,
484@@ -38,6 +36,7 @@
485 multiparent,
486 tsort,
487 revision,
488+ urlutils,
489 )
490 """)
491 from bzrlib.registry import Registry
492@@ -821,11 +820,11 @@
493
494 def map(self, key):
495 """See KeyMapper.map()."""
496- return urllib.quote(self._map(key))
497+ return urlutils.quote(self._map(key))
498
499 def unmap(self, partition_id):
500 """See KeyMapper.unmap()."""
501- return self._unmap(urllib.unquote(partition_id))
502+ return self._unmap(urlutils.unquote(partition_id))
503
504
505 class PrefixMapper(URLEscapeMapper):
506@@ -878,7 +877,7 @@
507 def _escape(self, prefix):
508 """Turn a key element into a filesystem safe string.
509
510- This is similar to a plain urllib.quote, except
511+ This is similar to a plain urlutils.quote, except
512 it uses specific safe characters, so that it doesn't
513 have to translate a lot of valid file ids.
514 """
515@@ -891,7 +890,7 @@
516
517 def _unescape(self, basename):
518 """Escaped names are easily unescaped by urlutils."""
519- return urllib.unquote(basename)
520+ return urlutils.unquote(basename)
521
522
523 def make_versioned_files_factory(versioned_file_factory, mapper):
524
525=== modified file 'doc/en/release-notes/bzr-2.5.txt'
526--- doc/en/release-notes/bzr-2.5.txt 2011-12-15 14:47:22 +0000
527+++ doc/en/release-notes/bzr-2.5.txt 2011-12-19 01:33:35 +0000
528@@ -116,6 +116,11 @@
529 speeding up various commands including ``bzr export``,
530 ``bzr checkout`` and ``bzr cat``. (Jelmer Vernooij, #608640)
531
532+* ``bzrlib.urlutils`` now includes ``quote`` and ``unquote`` functions,
533+ rather than importing them from ``urllib``. This prevents loading
534+ of the ``socket``, ``ssl`` and ``urllib`` modules for
535+ local bzr operations. (Jelmer Vernooij)
536+
537 Testing
538 *******
539