Merge lp:~cjwatson/launchpad/explicit-proxy-product-release-finder-more into lp:launchpad
- explicit-proxy-product-release-finder-more
- Merge into devel
Proposed by
Colin Watson
Status: | Merged |
---|---|
Merged at revision: | 18720 |
Proposed branch: | lp:~cjwatson/launchpad/explicit-proxy-product-release-finder-more |
Merge into: | lp:launchpad |
Diff against target: |
480 lines (+89/-164) 2 files modified
lib/lp/registry/scripts/productreleasefinder/walker.py (+23/-61) lib/lp/registry/tests/test_prf_walker.py (+66/-103) |
To merge this branch: | bzr merge lp:~cjwatson/launchpad/explicit-proxy-product-release-finder-more |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
William Grant | code | Approve | |
Review via email: mp+349175@code.launchpad.net |
Commit message
Convert the product release finder's HTTPWalker to urlfetch.
Description of the change
I missed this when converting the rest of the product release finder.
To post a comment you must log in.
Revision history for this message
William Grant (wgrant) : | # |
review:
Approve
(code)
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lib/lp/registry/scripts/productreleasefinder/walker.py' |
2 | --- lib/lp/registry/scripts/productreleasefinder/walker.py 2018-05-06 08:52:34 +0000 |
3 | +++ lib/lp/registry/scripts/productreleasefinder/walker.py 2018-07-09 11:53:19 +0000 |
4 | @@ -13,10 +13,8 @@ |
5 | ] |
6 | |
7 | import ftplib |
8 | -import os |
9 | import socket |
10 | from urllib import unquote_plus |
11 | -import urllib2 |
12 | from urlparse import ( |
13 | urljoin, |
14 | urlsplit, |
15 | @@ -30,10 +28,16 @@ |
16 | InvalidURIError, |
17 | URI, |
18 | ) |
19 | +import requests |
20 | import scandir |
21 | |
22 | from lp.registry.scripts.productreleasefinder import log |
23 | from lp.services.beautifulsoup import BeautifulSoup |
24 | +from lp.services.config import config |
25 | +from lp.services.timeout import ( |
26 | + TimeoutError, |
27 | + urlfetch, |
28 | + ) |
29 | from lp.services.webapp.url import urlappend |
30 | |
31 | |
32 | @@ -47,19 +51,6 @@ |
33 | pass |
34 | |
35 | |
36 | -class Request(urllib2.Request): |
37 | - """A urllib2 Request object that can override the request method.""" |
38 | - |
39 | - method = None |
40 | - |
41 | - def get_method(self): |
42 | - """See `urllib2.Request`.""" |
43 | - if self.method is not None: |
44 | - return self.method |
45 | - else: |
46 | - return urllib2.Request.get_method(self) |
47 | - |
48 | - |
49 | class WalkerBase: |
50 | """Base class for URL walkers. |
51 | |
52 | @@ -278,21 +269,6 @@ |
53 | # Whether to ignore or parse fragments in the URL |
54 | FRAGMENTS = True |
55 | |
56 | - # All the urls handlers used to support the schemas. Redirects are not |
57 | - # supported. |
58 | - handlers = ( |
59 | - urllib2.ProxyHandler, |
60 | - urllib2.UnknownHandler, |
61 | - urllib2.HTTPHandler, |
62 | - urllib2.HTTPDefaultErrorHandler, |
63 | - urllib2.HTTPSHandler, |
64 | - urllib2.HTTPDefaultErrorHandler, |
65 | - urllib2.FTPHandler, |
66 | - urllib2.FileHandler, |
67 | - urllib2.HTTPErrorProcessor) |
68 | - |
69 | - _opener = None |
70 | - |
71 | def open(self): |
72 | """Open the HTTP connection.""" |
73 | self.log.info('Walking %s://%s', self.scheme, self.host) |
74 | @@ -304,19 +280,12 @@ |
75 | def request(self, method, path): |
76 | """Make an HTTP request. |
77 | |
78 | - Returns the HTTPResponse object. |
79 | + Returns the Response object. |
80 | """ |
81 | - # We build a custom opener, because we don't want redirects to be |
82 | - # followed. |
83 | - if self._opener is None: |
84 | - self._opener = urllib2.OpenerDirector() |
85 | - for handler in self.handlers: |
86 | - self._opener.add_handler(handler()) |
87 | - |
88 | self.log.debug("Requesting %s with method %s", path, method) |
89 | - request = Request(urljoin(self.base, path)) |
90 | - request.method = method |
91 | - return self._opener.open(request) |
92 | + return urlfetch( |
93 | + urljoin(self.base, path), method=method, allow_redirects=False, |
94 | + trust_env=False, use_proxy=True, allow_ftp=True) |
95 | |
96 | def isDirectory(self, path): |
97 | """Return whether the path is a directory. |
98 | @@ -335,19 +304,15 @@ |
99 | |
100 | self.log.debug("Checking if %s is a directory" % path) |
101 | try: |
102 | - self.request("HEAD", path) |
103 | + response = self.request("HEAD", path) |
104 | + except (TimeoutError, requests.RequestException) as exc: |
105 | + raise HTTPWalkerError(str(exc)) |
106 | + |
107 | + if not response.is_redirect or "location" not in response.headers: |
108 | return False |
109 | - except urllib2.HTTPError as exc: |
110 | - if exc.code != 301: |
111 | - return False |
112 | - except (IOError, socket.error) as exc: |
113 | - # Raise HTTPWalkerError for other IO or socket errors. |
114 | - raise HTTPWalkerError(str(exc)) |
115 | - |
116 | - # We have a 301 redirect error from here on. |
117 | - url = exc.hdrs.getheader("location") |
118 | - (scheme, netloc, redirect_path, query, fragment) \ |
119 | - = urlsplit(url, self.scheme, self.FRAGMENTS) |
120 | + url = response.headers["location"] |
121 | + scheme, netloc, redirect_path, _, _ = urlsplit( |
122 | + url, self.scheme, self.FRAGMENTS) |
123 | |
124 | if len(scheme) and scheme != self.scheme: |
125 | return False |
126 | @@ -368,11 +333,8 @@ |
127 | self.log.info("Listing %s" % dirname) |
128 | try: |
129 | response = self.request("GET", dirname) |
130 | - try: |
131 | - soup = BeautifulSoup(response.read()) |
132 | - finally: |
133 | - response.close() |
134 | - except (IOError, socket.error) as exc: |
135 | + soup = BeautifulSoup(response.content) |
136 | + except (TimeoutError, requests.RequestException) as exc: |
137 | raise HTTPWalkerError(str(exc)) |
138 | |
139 | base = URI(self.base).resolve(dirname) |
140 | @@ -413,9 +375,9 @@ |
141 | """Return a walker for the URL given.""" |
142 | (scheme, netloc, path, query, fragment) = urlsplit(url, "file") |
143 | if scheme in ["ftp"]: |
144 | - # If ftp_proxy is set, use the HTTPWalker class since we are |
145 | - # talking to an HTTP proxy. |
146 | - if 'ftp_proxy' in os.environ: |
147 | + # If an HTTP proxy is configured, use the HTTPWalker class so that |
148 | + # FTP requests are made via the proxy. |
149 | + if config.launchpad.http_proxy: |
150 | return HTTPWalker(url, log_parent) |
151 | else: |
152 | return FTPWalker(url, log_parent) |
153 | |
154 | === modified file 'lib/lp/registry/tests/test_prf_walker.py' |
155 | --- lib/lp/registry/tests/test_prf_walker.py 2018-01-02 16:10:26 +0000 |
156 | +++ lib/lp/registry/tests/test_prf_walker.py 2018-07-09 11:53:19 +0000 |
157 | @@ -1,15 +1,24 @@ |
158 | -# Copyright 2009-2011 Canonical Ltd. This software is licensed under the |
159 | +# Copyright 2009-2018 Canonical Ltd. This software is licensed under the |
160 | # GNU Affero General Public License version 3 (see the file LICENSE). |
161 | |
162 | """Tests for lp.registry.scripts.productreleasefinder.walker.""" |
163 | |
164 | import logging |
165 | import StringIO |
166 | -import urlparse |
167 | - |
168 | -from lazr.restful.utils import safe_hasattr |
169 | - |
170 | -from lp.registry.scripts.productreleasefinder.walker import WalkerBase |
171 | + |
172 | +import responses |
173 | + |
174 | +from lp.registry.scripts.productreleasefinder.walker import ( |
175 | + combine_url, |
176 | + FTPWalker, |
177 | + HTTPWalker, |
178 | + WalkerBase, |
179 | + WalkerError, |
180 | + ) |
181 | +from lp.services.timeout import ( |
182 | + get_default_timeout_function, |
183 | + set_default_timeout_function, |
184 | + ) |
185 | from lp.testing import ( |
186 | reset_logging, |
187 | TestCase, |
188 | @@ -20,14 +29,12 @@ |
189 | |
190 | def testCreatesDefaultLogger(self): |
191 | """WalkerBase creates a default logger.""" |
192 | - from logging import Logger |
193 | w = WalkerBase("/") |
194 | - self.assertTrue(isinstance(w.log, Logger)) |
195 | + self.assertTrue(isinstance(w.log, logging.Logger)) |
196 | |
197 | def testCreatesChildLogger(self): |
198 | """WalkerBase creates a child logger if given a parent.""" |
199 | - from logging import getLogger |
200 | - parent = getLogger("foo") |
201 | + parent = logging.getLogger("foo") |
202 | w = WalkerBase("/", log_parent=parent) |
203 | self.assertEqual(w.log.parent, parent) |
204 | |
205 | @@ -56,8 +63,6 @@ |
206 | |
207 | def testWrongScheme(self): |
208 | """WalkerBase raises WalkerError when given an unhandled scheme.""" |
209 | - from lp.registry.scripts.productreleasefinder.walker import ( |
210 | - WalkerBase, WalkerError) |
211 | self.assertRaises(WalkerError, WalkerBase, "foo://localhost/") |
212 | |
213 | def testUnescapesHost(self): |
214 | @@ -201,35 +206,25 @@ |
215 | |
216 | def testFtpScheme(self): |
217 | """FTPWalker works when initialized with an ftp-scheme URL.""" |
218 | - from lp.registry.scripts.productreleasefinder.walker import ( |
219 | - FTPWalker) |
220 | w = FTPWalker("ftp://localhost/") |
221 | self.assertEqual(w.host, "localhost") |
222 | |
223 | def testNoScheme(self): |
224 | """FTPWalker works when given a URL with no scheme.""" |
225 | - from lp.registry.scripts.productreleasefinder.walker import ( |
226 | - FTPWalker) |
227 | w = FTPWalker("/") |
228 | self.assertEqual(w.host, "") |
229 | |
230 | def testWrongScheme(self): |
231 | """FTPWalker raises WalkerError when given an unhandled scheme.""" |
232 | - from lp.registry.scripts.productreleasefinder.walker import ( |
233 | - FTPWalker, WalkerError) |
234 | self.assertRaises(WalkerError, FTPWalker, "http://localhost/") |
235 | |
236 | def testNoUsername(self): |
237 | """FTPWalker stores 'anonymous' when there is no username.""" |
238 | - from lp.registry.scripts.productreleasefinder.walker import ( |
239 | - FTPWalker) |
240 | w = FTPWalker("ftp://localhost/") |
241 | self.assertEqual(w.user, "anonymous") |
242 | |
243 | def testNoPassword(self): |
244 | """FTPWalker stores empty string when there is no password.""" |
245 | - from lp.registry.scripts.productreleasefinder.walker import ( |
246 | - FTPWalker) |
247 | w = FTPWalker("ftp://scott@localhost/") |
248 | self.assertEqual(w.passwd, "") |
249 | |
250 | @@ -238,88 +233,35 @@ |
251 | |
252 | def testHttpScheme(self): |
253 | """HTTPWalker works when initialized with an http-scheme URL.""" |
254 | - from lp.registry.scripts.productreleasefinder.walker import ( |
255 | - HTTPWalker) |
256 | w = HTTPWalker("http://localhost/") |
257 | self.assertEqual(w.host, "localhost") |
258 | |
259 | def testHttpsScheme(self): |
260 | """HTTPWalker works when initialized with an https-scheme URL.""" |
261 | - from lp.registry.scripts.productreleasefinder.walker import ( |
262 | - HTTPWalker) |
263 | w = HTTPWalker("https://localhost/") |
264 | self.assertEqual(w.host, "localhost") |
265 | |
266 | def testNoScheme(self): |
267 | """HTTPWalker works when given a URL with no scheme.""" |
268 | - from lp.registry.scripts.productreleasefinder.walker import ( |
269 | - HTTPWalker) |
270 | w = HTTPWalker("/") |
271 | self.assertEqual(w.host, "") |
272 | |
273 | def testWrongScheme(self): |
274 | """HTTPWalker raises WalkerError when given an unhandled scheme.""" |
275 | - from lp.registry.scripts.productreleasefinder.walker import ( |
276 | - HTTPWalker, WalkerError) |
277 | self.assertRaises(WalkerError, HTTPWalker, "foo://localhost/") |
278 | |
279 | |
280 | -class HTTPWalker_url_schemes_and_handlers(TestCase): |
281 | - """Verify there is a handler for each URL scheme.""" |
282 | +class HTTPWalker_ListDir(TestCase): |
283 | |
284 | def setUp(self): |
285 | - TestCase.setUp(self) |
286 | - from lp.registry.scripts.productreleasefinder.walker import ( |
287 | - HTTPWalker) |
288 | - self.walker = HTTPWalker("http://localhost/") |
289 | - |
290 | - def verify_url_scheme_and_handler(self, scheme, handler): |
291 | - self.assertIn(scheme, self.walker.URL_SCHEMES) |
292 | - self.assertIn(handler, self.walker.handlers) |
293 | - # urllib2 uses a naming convention to select the handler for |
294 | - # a URL scheme. This test is sanity to check to ensure that the |
295 | - # HTTPWalker's configuration of the OpenerDirector is will work. |
296 | - method_name = '%s_open' % scheme |
297 | - self.assertTrue(safe_hasattr(handler, method_name)) |
298 | - |
299 | - def test_http_request(self): |
300 | - import urllib2 |
301 | - self.verify_url_scheme_and_handler('http', urllib2.HTTPHandler) |
302 | - |
303 | - def test_https_request(self): |
304 | - import urllib2 |
305 | - self.verify_url_scheme_and_handler('https', urllib2.HTTPSHandler) |
306 | - |
307 | - def test_ftp_request(self): |
308 | - import urllib2 |
309 | - self.verify_url_scheme_and_handler('ftp', urllib2.FTPHandler) |
310 | - |
311 | - |
312 | -class HTTPWalker_ListDir(TestCase): |
313 | - |
314 | - def tearDown(self): |
315 | - reset_logging() |
316 | - super(HTTPWalker_ListDir, self).tearDown() |
317 | - |
318 | - def setUpWalker(self, listing_url, listing_content): |
319 | - from lp.registry.scripts.productreleasefinder.walker import ( |
320 | - HTTPWalker) |
321 | - test = self |
322 | - |
323 | - class TestHTTPWalker(HTTPWalker): |
324 | - |
325 | - def request(self, method, path): |
326 | - test.assertEqual(method, 'GET') |
327 | - test.assertEqual(urlparse.urljoin(self.base, path), |
328 | - listing_url) |
329 | - return StringIO.StringIO(listing_content) |
330 | - |
331 | - def isDirectory(self, path): |
332 | - return path.endswith('/') |
333 | - |
334 | - logging.basicConfig(level=logging.CRITICAL) |
335 | - return TestHTTPWalker(listing_url, logging.getLogger()) |
336 | - |
337 | + super(HTTPWalker_ListDir, self).setUp() |
338 | + self.addCleanup(reset_logging) |
339 | + original_timeout_function = get_default_timeout_function() |
340 | + set_default_timeout_function(lambda: 60.0) |
341 | + self.addCleanup( |
342 | + set_default_timeout_function, original_timeout_function) |
343 | + |
344 | + @responses.activate |
345 | def testApacheListing(self): |
346 | # Test that list() handles a standard Apache dir listing. |
347 | content = ''' |
348 | @@ -341,15 +283,22 @@ |
349 | <address>Apache/2.2.3 (Unix) Server at <a href="mailto:ftp-adm@acc.umu.se">ftp.acc.umu.se</a> Port 80</address> |
350 | </body></html> |
351 | ''' |
352 | - walker = self.setUpWalker( |
353 | - 'http://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/', content) |
354 | + listing_url = 'http://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/' |
355 | + responses.add('GET', listing_url, body=content) |
356 | + expected_filenames = [ |
357 | + 'LATEST-IS-0.5.0', |
358 | + 'gnome-gpg-0.5.0.md5sum', |
359 | + 'gnome-gpg-0.5.0.tar.bz2', |
360 | + 'gnome-gpg-0.5.0.tar.gz', |
361 | + ] |
362 | + for filename in expected_filenames: |
363 | + responses.add('HEAD', listing_url + filename) |
364 | + walker = HTTPWalker(listing_url, logging.getLogger()) |
365 | dirnames, filenames = walker.list('/pub/GNOME/sources/gnome-gpg/0.5/') |
366 | self.assertEqual(dirnames, []) |
367 | - self.assertEqual(filenames, ['LATEST-IS-0.5.0', |
368 | - 'gnome-gpg-0.5.0.md5sum', |
369 | - 'gnome-gpg-0.5.0.tar.bz2', |
370 | - 'gnome-gpg-0.5.0.tar.gz']) |
371 | + self.assertEqual(filenames, expected_filenames) |
372 | |
373 | + @responses.activate |
374 | def testSquidFtpListing(self): |
375 | # Test that a Squid FTP listing can be parsed. |
376 | content = ''' |
377 | @@ -375,8 +324,9 @@ |
378 | Generated Wed, 06 Sep 2006 11:04:02 GMT by squid (squid/2.5.STABLE12) |
379 | </ADDRESS></BODY></HTML> |
380 | ''' |
381 | - walker = self.setUpWalker( |
382 | - 'ftp://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/', content) |
383 | + listing_url = 'ftp://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/' |
384 | + responses.add('GET', listing_url, body=content) |
385 | + walker = HTTPWalker(listing_url, logging.getLogger()) |
386 | dirnames, filenames = walker.list('/pub/GNOME/sources/gnome-gpg/0.5/') |
387 | self.assertEqual(dirnames, []) |
388 | self.assertEqual(filenames, ['LATEST-IS-0.5.0', |
389 | @@ -384,9 +334,10 @@ |
390 | 'gnome-gpg-0.5.0.tar.bz2', |
391 | 'gnome-gpg-0.5.0.tar.gz']) |
392 | |
393 | + @responses.activate |
394 | def testNonAsciiListing(self): |
395 | # Test that list() handles non-ASCII output. |
396 | - content = ''' |
397 | + content = b''' |
398 | <html> |
399 | <head> |
400 | <title>Listing</title> |
401 | @@ -409,11 +360,17 @@ |
402 | </pre> |
403 | </html> |
404 | ''' |
405 | - walker = self.setUpWalker('http://example.com/foo/', content) |
406 | + listing_url = 'http://example.com/foo/' |
407 | + responses.add('GET', listing_url, body=content) |
408 | + expected_filenames = ['file1', 'file2', 'file3', 'file99'] |
409 | + for filename in expected_filenames: |
410 | + responses.add('HEAD', listing_url + filename) |
411 | + walker = HTTPWalker(listing_url, logging.getLogger()) |
412 | dirnames, filenames = walker.list('/foo/') |
413 | self.assertEqual(dirnames, ['subdir1/', 'subdir2/', 'subdir3/']) |
414 | - self.assertEqual(filenames, ['file1', 'file2', 'file3', 'file99']) |
415 | + self.assertEqual(filenames, expected_filenames) |
416 | |
417 | + @responses.activate |
418 | def testDotPaths(self): |
419 | # Test that paths containing dots are handled correctly. |
420 | # |
421 | @@ -436,11 +393,15 @@ |
422 | </pre> |
423 | </html> |
424 | ''' |
425 | - walker = self.setUpWalker('http://example.com/foo/', content) |
426 | + listing_url = 'http://example.com/foo/' |
427 | + responses.add('GET', listing_url, body=content) |
428 | + responses.add('HEAD', listing_url + 'file2') |
429 | + walker = HTTPWalker(listing_url, logging.getLogger()) |
430 | dirnames, filenames = walker.list('/foo/') |
431 | self.assertEqual(dirnames, ['dir/']) |
432 | self.assertEqual(filenames, ['file2']) |
433 | |
434 | + @responses.activate |
435 | def testNamedAnchors(self): |
436 | # Test that the directory listing parser code handles named anchors. |
437 | # These are <a> tags without an href attribute. |
438 | @@ -458,15 +419,21 @@ |
439 | </pre> |
440 | </html> |
441 | ''' |
442 | - walker = self.setUpWalker('http://example.com/foo/', content) |
443 | + listing_url = 'http://example.com/foo/' |
444 | + responses.add('GET', listing_url, body=content) |
445 | + responses.add('HEAD', listing_url + 'file1') |
446 | + walker = HTTPWalker(listing_url, logging.getLogger()) |
447 | dirnames, filenames = walker.list('/foo/') |
448 | self.assertEqual(dirnames, ['dir1/']) |
449 | self.assertEqual(filenames, ['file1']) |
450 | |
451 | + @responses.activate |
452 | def testGarbageListing(self): |
453 | # Make sure that garbage doesn't trip up the dir lister. |
454 | - content = '\x01\x02\x03\x00\xff\xf2\xablkjsdflkjsfkljfds' |
455 | - walker = self.setUpWalker('http://example.com/foo/', content) |
456 | + content = b'\x01\x02\x03\x00\xff\xf2\xablkjsdflkjsfkljfds' |
457 | + listing_url = 'http://example.com/foo/' |
458 | + responses.add('GET', listing_url, body=content) |
459 | + walker = HTTPWalker(listing_url, logging.getLogger()) |
460 | dirnames, filenames = walker.list('/foo/') |
461 | self.assertEqual(dirnames, []) |
462 | self.assertEqual(filenames, []) |
463 | @@ -481,8 +448,6 @@ |
464 | def testFtpIsDirectory(self): |
465 | # Test that no requests are made by isDirectory() when walking |
466 | # FTP sites. |
467 | - from lp.registry.scripts.productreleasefinder.walker import ( |
468 | - HTTPWalker) |
469 | test = self |
470 | |
471 | class TestHTTPWalker(HTTPWalker): |
472 | @@ -501,8 +466,6 @@ |
473 | |
474 | def testConstructsUrl(self): |
475 | """combine_url constructs the URL correctly.""" |
476 | - from lp.registry.scripts.productreleasefinder.walker import ( |
477 | - combine_url) |
478 | self.assertEqual(combine_url("file:///base", "/subdir/", "file"), |
479 | "file:///subdir/file") |
480 | self.assertEqual(combine_url("file:///base", "/subdir", "file"), |