Merge lp:~thumper/launchpad/fix-unicode-error-in-urlparse into lp:launchpad

Proposed by Tim Penhey
Status: Merged
Approved by: Robert Collins
Approved revision: no longer in the source branch.
Merged at revision: 12795
Proposed branch: lp:~thumper/launchpad/fix-unicode-error-in-urlparse
Merge into: lp:launchpad
Diff against target: 430 lines (+125/-136)
6 files modified
lib/canonical/launchpad/interfaces/validation.py (+0/-105)
lib/lp/app/validators/tests/test_validators.py (+10/-3)
lib/lp/app/validators/url.py (+112/-25)
lib/lp/blueprints/interfaces/specification.py (+1/-1)
lib/lp/registry/browser/announcement.py (+1/-1)
lib/lp/registry/interfaces/productseries.py (+1/-1)
To merge this branch: bzr merge lp:~thumper/launchpad/fix-unicode-error-in-urlparse
Reviewer Review Type Date Requested Status
Robert Collins (community) Approve
Review via email: mp+57103@code.launchpad.net

Commit message

[r=lifeless][bug=35077,325268] Non ascii urls are not considered valid.

Description of the change

I moved the validate_url and valid_webref functions from
canonical.launchpad.interfaces.validation into
lp.app.validators.url, thus satisfying bug 35077.

valid_branch_url wasn't being used anywhere, so I deleted it.

A few lint cleanup removals in that file too.

To keep with the existing validator tests, I put them in the
docstrings too.

To post a comment you must log in.
Revision history for this message
Robert Collins (lifeless) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/canonical/launchpad/interfaces/validation.py'
2--- lib/canonical/launchpad/interfaces/validation.py 2011-03-23 16:28:51 +0000
3+++ lib/canonical/launchpad/interfaces/validation.py 2011-04-11 09:39:32 +0000
4@@ -7,9 +7,6 @@
5
6 __all__ = [
7 'can_be_nominated_for_series',
8- 'validate_url',
9- 'valid_webref',
10- 'valid_branch_url',
11 'non_duplicate_branch',
12 'valid_bug_number',
13 'valid_cve_sequence',
14@@ -24,7 +21,6 @@
15
16 from cgi import escape
17 from textwrap import dedent
18-import urllib
19
20 from zope.app.form.interfaces import WidgetsError
21 from zope.component import getUtility
22@@ -32,9 +28,7 @@
23 from lazr.restful.error import expose
24
25 from canonical.launchpad import _
26-from canonical.launchpad.interfaces.account import IAccount
27 from canonical.launchpad.interfaces.emailaddress import (
28- IEmailAddress,
29 IEmailAddressSet,
30 )
31 from canonical.launchpad.interfaces.launchpad import ILaunchBag
32@@ -44,7 +38,6 @@
33 from lp.app.validators import LaunchpadValidationError
34 from lp.app.validators.cve import valid_cve
35 from lp.app.validators.email import valid_email
36-from lp.app.validators.url import valid_absolute_url
37
38
39 def can_be_nominated_for_series(series):
40@@ -64,101 +57,6 @@
41 return True
42
43
44-# XXX matsubara 2006-03-15 bug=35077:
45-# The validations functions that deals with URLs should be in
46-# validators/ and we should have them as separete constraints in trusted.sql.
47-def validate_url(url, valid_schemes):
48- """Returns a boolean stating whether 'url' is a valid URL.
49-
50- A URL is valid if:
51- - its URL scheme is in the provided 'valid_schemes' list, and
52- - it has a non-empty host name.
53-
54- None and an empty string are not valid URLs::
55-
56- >>> validate_url(None, [])
57- False
58- >>> validate_url('', [])
59- False
60-
61- The valid_schemes list is checked::
62-
63- >>> validate_url('http://example.com', ['http'])
64- True
65- >>> validate_url('http://example.com', ['https', 'ftp'])
66- False
67-
68- A URL without a host name is not valid:
69-
70- >>> validate_url('http://', ['http'])
71- False
72-
73- """
74- if not url:
75- return False
76- scheme, host = urllib.splittype(url)
77- if not scheme in valid_schemes:
78- return False
79- if not valid_absolute_url(url):
80- return False
81- return True
82-
83-
84-def valid_webref(web_ref):
85- """Returns True if web_ref is a valid download URL, or raises a
86- LaunchpadValidationError.
87-
88- >>> valid_webref('http://example.com')
89- True
90- >>> valid_webref('https://example.com/foo/bar')
91- True
92- >>> valid_webref('ftp://example.com/~ming')
93- True
94- >>> valid_webref('sftp://example.com//absolute/path/maybe')
95- True
96- >>> valid_webref('other://example.com/moo')
97- Traceback (most recent call last):
98- ...
99- LaunchpadValidationError: ...
100- """
101- if validate_url(web_ref, ['http', 'https', 'ftp', 'sftp']):
102- # Allow ftp so valid_webref can be used for download_url, and so
103- # it doesn't lock out weird projects where the site or
104- # screenshots are kept on ftp.
105- return True
106- else:
107- raise LaunchpadValidationError(_(dedent("""
108- Not a valid URL. Please enter the full URL, including the
109- scheme (for instance, http:// for a web URL), and ensure the
110- URL uses either http, https or ftp.""")))
111-
112-
113-def valid_branch_url(branch_url):
114- """Returns True if web_ref is a valid download URL, or raises a
115- LaunchpadValidationError.
116-
117- >>> valid_branch_url('http://example.com')
118- True
119- >>> valid_branch_url('https://example.com/foo/bar')
120- True
121- >>> valid_branch_url('ftp://example.com/~ming')
122- True
123- >>> valid_branch_url('sftp://example.com//absolute/path/maybe')
124- True
125- >>> valid_branch_url('other://example.com/moo')
126- Traceback (most recent call last):
127- ...
128- LaunchpadValidationError: ...
129- """
130- if validate_url(branch_url, ['http', 'https', 'ftp', 'sftp', 'bzr+ssh']):
131- return True
132- else:
133- raise LaunchpadValidationError(_(dedent("""
134- Not a valid URL. Please enter the full URL, including the
135- scheme (for instance, http:// for a web URL), and ensure the
136- URL uses http, https, ftp, sftp, or bzr+ssh.""")))
137-
138-
139 def non_duplicate_branch(value):
140 """Ensure that this branch hasn't already been linked to this bug."""
141 current_bug = getUtility(ILaunchBag).bug
142@@ -224,9 +122,6 @@
143 """Check that the given email is valid and not registered to
144 another launchpad account.
145 """
146- from canonical.launchpad.webapp.publisher import canonical_url
147- from canonical.launchpad.interfaces.emailaddress import IEmailAddressSet
148-
149 _validate_email(email)
150 _check_email_availability(email)
151 return True
152
153=== modified file 'lib/lp/app/validators/tests/test_validators.py'
154--- lib/lp/app/validators/tests/test_validators.py 2011-02-23 22:27:41 +0000
155+++ lib/lp/app/validators/tests/test_validators.py 2011-04-11 09:39:32 +0000
156@@ -5,7 +5,11 @@
157
158 __metaclass__ = type
159
160-from doctest import DocTestSuite
161+from doctest import (
162+ DocTestSuite,
163+ ELLIPSIS,
164+ NORMALIZE_WHITESPACE,
165+ )
166 from unittest import TestSuite
167
168 from canonical.launchpad.testing.systemdocs import (
169@@ -20,7 +24,8 @@
170
171 # Include the doctests in __init__.py.
172 from lp.app import validators
173- suite.addTest(DocTestSuite(validators))
174+ suite.addTest(
175+ DocTestSuite(validators, optionflags=ELLIPSIS | NORMALIZE_WHITESPACE))
176
177 from lp.app.validators import email, name, url, version
178 suite.addTest(suitefor(email))
179@@ -33,7 +38,9 @@
180
181 def suitefor(module):
182 """Make a doctest suite with common setUp and tearDown functions."""
183- suite = DocTestSuite(module, setUp=setUp, tearDown=tearDown)
184+ suite = DocTestSuite(
185+ module, setUp=setUp, tearDown=tearDown,
186+ optionflags=ELLIPSIS | NORMALIZE_WHITESPACE)
187 # We have to invoke the LaunchpadFunctionalLayer in order to
188 # initialize the ZCA machinery, which is a pre-requisite for using
189 # login().
190
191=== modified file 'lib/lp/app/validators/url.py'
192--- lib/lp/app/validators/url.py 2011-02-23 20:26:53 +0000
193+++ lib/lp/app/validators/url.py 2011-04-11 09:39:32 +0000
194@@ -3,14 +3,24 @@
195
196 __metaclass__ = type
197
198+__all__ = [
199+ 'builder_url_validator',
200+ 'valid_absolute_url',
201+ 'valid_builder_url',
202+ 'valid_webref',
203+ 'validate_url',
204+ ]
205+
206 from textwrap import dedent
207+import urllib
208
209 from canonical.launchpad import _
210+from canonical.launchpad.webapp.url import urlparse
211 from lp.app.validators import LaunchpadValidationError
212
213
214 def valid_absolute_url(name):
215- """validate an absolute URL.
216+ """Validate an absolute URL.
217
218 It looks like this function has been deprecated by
219 canonical.launchpad.interfaces.validation.
220@@ -18,25 +28,27 @@
221 We define this as something that can be parsed into a URL that has both
222 a protocol and a network address.
223
224- >>> valid_absolute_url('sftp://chinstrap.ubuntu.com/foo/bar')
225- True
226- >>> valid_absolute_url('http://www.example.com')
227- True
228- >>> valid_absolute_url('whatever:/uxample.com/blah')
229- False
230-
231- # XXX: 2010-04-26, Salgado, bug=570244: This test only works against
232- # python2.6 but we still need to run on python2.5, so we should uncomment
233- # it only when we no longer need to run on 2.5.
234- >>> #valid_absolute_url('whatever://example.com/blah')
235- True
236+ >>> valid_absolute_url('sftp://chinstrap.ubuntu.com/foo/bar')
237+ True
238+ >>> valid_absolute_url('http://www.example.com')
239+ True
240+ >>> valid_absolute_url('whatever:/uxample.com/blah')
241+ False
242+ >>> valid_absolute_url('whatever://example.com/blah')
243+ True
244+
245+ Unicode urls are ascii encoded, and a failure here means it isn't valid.
246+
247+ >>> valid_absolute_url(u'http://www.example.com/test...')
248+ True
249+ >>> valid_absolute_url(u'http://www.example.com/test\u2026')
250+ False
251+
252 """
253- # Have to import urlparse locally since imports from helpers.py
254- # causes this module to be imported, and we can't import stuff from
255- # webapp at that point, since webapp imports stuff from helpers.py
256- # as well.
257- from canonical.launchpad.webapp.url import urlparse
258- (scheme, netloc, path, params, query, fragment) = urlparse(name)
259+ try:
260+ (scheme, netloc, path, params, query, fragment) = urlparse(name)
261+ except UnicodeEncodeError:
262+ return False
263 # note that URL checking is also done inside the database, in
264 # trusted.sql, the valid_absolute_url function, and that code uses
265 # stdlib urlparse, not our customized version.
266@@ -44,6 +56,7 @@
267 return False
268 return True
269
270+
271 def valid_builder_url(url):
272 """validate a url for a builder.
273
274@@ -56,13 +69,12 @@
275 False
276 >>> valid_builder_url('ftp://foo.com/')
277 False
278+
279 """
280- # Have to import urlparse locally since imports from helpers.py
281- # causes this module to be imported, and we can't import stuff from
282- # webapp at that point, since webapp imports stuff from helpers.py
283- # as well.
284- from canonical.launchpad.webapp.url import urlparse
285- (scheme, netloc, path, params, query, fragment) = urlparse(url)
286+ try:
287+ (scheme, netloc, path, params, query, fragment) = urlparse(url)
288+ except UnicodeEncodeError:
289+ return False
290 if scheme != 'http':
291 return False
292 if params or query or fragment:
293@@ -71,6 +83,7 @@
294 return False
295 return True
296
297+
298 def builder_url_validator(url):
299 """Return True if the url is valid, or raise a LaunchpadValidationError"""
300 if not valid_builder_url(url):
301@@ -79,3 +92,77 @@
302 http://host/ or http://host:port/ only.
303 """), mapping={'url': url}))
304 return True
305+
306+
307+def validate_url(url, valid_schemes):
308+ """Returns a boolean stating whether 'url' is a valid URL.
309+
310+ A URL is valid if:
311+ - its URL scheme is in the provided 'valid_schemes' list, and
312+ - it has a non-empty host name.
313+
314+ None and an empty string are not valid URLs::
315+
316+ >>> validate_url(None, [])
317+ False
318+ >>> validate_url('', [])
319+ False
320+
321+ The valid_schemes list is checked::
322+
323+ >>> validate_url('http://example.com', ['http'])
324+ True
325+ >>> validate_url('http://example.com', ['https', 'ftp'])
326+ False
327+
328+ A URL without a host name is not valid:
329+
330+ >>> validate_url('http://', ['http'])
331+ False
332+
333+ Unicode urls are converted to ascii for checking. Failure to convert
334+ results in failure.
335+
336+ >>> validate_url(u'http://example.com', ['http'])
337+ True
338+ >>> validate_url(u'http://example.com/test\u2026', ['http'])
339+ False
340+
341+ """
342+ if not url:
343+ return False
344+ scheme, host = urllib.splittype(url)
345+ if not scheme in valid_schemes:
346+ return False
347+ if not valid_absolute_url(url):
348+ return False
349+ return True
350+
351+
352+def valid_webref(web_ref):
353+ """Returns True if web_ref is a valid download URL, or raises a
354+ LaunchpadValidationError.
355+
356+ >>> valid_webref('http://example.com')
357+ True
358+ >>> valid_webref('https://example.com/foo/bar')
359+ True
360+ >>> valid_webref('ftp://example.com/~ming')
361+ True
362+ >>> valid_webref('sftp://example.com//absolute/path/maybe')
363+ True
364+ >>> valid_webref('other://example.com/moo')
365+ Traceback (most recent call last):
366+ ...
367+ LaunchpadValidationError: ...
368+ """
369+ if validate_url(web_ref, ['http', 'https', 'ftp', 'sftp']):
370+ # Allow ftp so valid_webref can be used for download_url, and so
371+ # it doesn't lock out weird projects where the site or
372+ # screenshots are kept on ftp.
373+ return True
374+ else:
375+ raise LaunchpadValidationError(_(dedent("""
376+ Not a valid URL. Please enter the full URL, including the
377+ scheme (for instance, http:// for a web URL), and ensure the
378+ URL uses either http, https or ftp.""")))
379
380=== modified file 'lib/lp/blueprints/interfaces/specification.py'
381--- lib/lp/blueprints/interfaces/specification.py 2011-03-28 20:54:50 +0000
382+++ lib/lp/blueprints/interfaces/specification.py 2011-04-11 09:39:32 +0000
383@@ -46,8 +46,8 @@
384 )
385
386 from canonical.launchpad import _
387-from canonical.launchpad.interfaces.validation import valid_webref
388 from lp.app.validators import LaunchpadValidationError
389+from lp.app.validators.url import valid_webref
390 from lp.blueprints.enums import (
391 SpecificationDefinitionStatus,
392 SpecificationGoalStatus,
393
394=== modified file 'lib/lp/registry/browser/announcement.py'
395--- lib/lp/registry/browser/announcement.py 2011-02-02 15:43:31 +0000
396+++ lib/lp/registry/browser/announcement.py 2011-04-11 09:39:32 +0000
397@@ -33,7 +33,6 @@
398 FeedsMixin,
399 RootAnnouncementsFeedLink,
400 )
401-from canonical.launchpad.interfaces.validation import valid_webref
402 from canonical.launchpad.webapp.authorization import check_permission
403 from canonical.launchpad.webapp.batching import BatchNavigator
404 from canonical.launchpad.webapp.menu import (
405@@ -50,6 +49,7 @@
406 custom_widget,
407 LaunchpadFormView,
408 )
409+from lp.app.validators.url import valid_webref
410 from lp.app.widgets.announcementdate import AnnouncementDateWidget
411 from lp.registry.interfaces.announcement import IAnnouncement
412 from lp.services.fields import (
413
414=== modified file 'lib/lp/registry/interfaces/productseries.py'
415--- lib/lp/registry/interfaces/productseries.py 2011-03-28 20:54:50 +0000
416+++ lib/lp/registry/interfaces/productseries.py 2011-04-11 09:39:32 +0000
417@@ -46,12 +46,12 @@
418
419 from canonical.launchpad import _
420 from canonical.launchpad.interfaces.launchpad import IHasAppointedDriver
421-from canonical.launchpad.interfaces.validation import validate_url
422 from canonical.launchpad.webapp.url import urlparse
423 from lp.app.errors import NameLookupFailed
424 from lp.app.interfaces.launchpad import IServiceUsage
425 from lp.app.validators import LaunchpadValidationError
426 from lp.app.validators.name import name_validator
427+from lp.app.validators.url import validate_url
428 from lp.blueprints.interfaces.specificationtarget import ISpecificationGoal
429 from lp.bugs.interfaces.bugtarget import (
430 IBugTarget,