Merge lp:~cjwatson/launchpad/explicit-proxy-product-release-finder-more into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18720
Proposed branch: lp:~cjwatson/launchpad/explicit-proxy-product-release-finder-more
Merge into: lp:launchpad
Diff against target: 480 lines (+89/-164)
2 files modified
lib/lp/registry/scripts/productreleasefinder/walker.py (+23/-61)
lib/lp/registry/tests/test_prf_walker.py (+66/-103)
To merge this branch: bzr merge lp:~cjwatson/launchpad/explicit-proxy-product-release-finder-more
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+349175@code.launchpad.net

Commit message

Convert the product release finder's HTTPWalker to urlfetch.

Description of the change

I missed this when converting the rest of the product release finder.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/registry/scripts/productreleasefinder/walker.py'
2--- lib/lp/registry/scripts/productreleasefinder/walker.py 2018-05-06 08:52:34 +0000
3+++ lib/lp/registry/scripts/productreleasefinder/walker.py 2018-07-09 11:53:19 +0000
4@@ -13,10 +13,8 @@
5 ]
6
7 import ftplib
8-import os
9 import socket
10 from urllib import unquote_plus
11-import urllib2
12 from urlparse import (
13 urljoin,
14 urlsplit,
15@@ -30,10 +28,16 @@
16 InvalidURIError,
17 URI,
18 )
19+import requests
20 import scandir
21
22 from lp.registry.scripts.productreleasefinder import log
23 from lp.services.beautifulsoup import BeautifulSoup
24+from lp.services.config import config
25+from lp.services.timeout import (
26+ TimeoutError,
27+ urlfetch,
28+ )
29 from lp.services.webapp.url import urlappend
30
31
32@@ -47,19 +51,6 @@
33 pass
34
35
36-class Request(urllib2.Request):
37- """A urllib2 Request object that can override the request method."""
38-
39- method = None
40-
41- def get_method(self):
42- """See `urllib2.Request`."""
43- if self.method is not None:
44- return self.method
45- else:
46- return urllib2.Request.get_method(self)
47-
48-
49 class WalkerBase:
50 """Base class for URL walkers.
51
52@@ -278,21 +269,6 @@
53 # Whether to ignore or parse fragments in the URL
54 FRAGMENTS = True
55
56- # All the urls handlers used to support the schemas. Redirects are not
57- # supported.
58- handlers = (
59- urllib2.ProxyHandler,
60- urllib2.UnknownHandler,
61- urllib2.HTTPHandler,
62- urllib2.HTTPDefaultErrorHandler,
63- urllib2.HTTPSHandler,
64- urllib2.HTTPDefaultErrorHandler,
65- urllib2.FTPHandler,
66- urllib2.FileHandler,
67- urllib2.HTTPErrorProcessor)
68-
69- _opener = None
70-
71 def open(self):
72 """Open the HTTP connection."""
73 self.log.info('Walking %s://%s', self.scheme, self.host)
74@@ -304,19 +280,12 @@
75 def request(self, method, path):
76 """Make an HTTP request.
77
78- Returns the HTTPResponse object.
79+ Returns the Response object.
80 """
81- # We build a custom opener, because we don't want redirects to be
82- # followed.
83- if self._opener is None:
84- self._opener = urllib2.OpenerDirector()
85- for handler in self.handlers:
86- self._opener.add_handler(handler())
87-
88 self.log.debug("Requesting %s with method %s", path, method)
89- request = Request(urljoin(self.base, path))
90- request.method = method
91- return self._opener.open(request)
92+ return urlfetch(
93+ urljoin(self.base, path), method=method, allow_redirects=False,
94+ trust_env=False, use_proxy=True, allow_ftp=True)
95
96 def isDirectory(self, path):
97 """Return whether the path is a directory.
98@@ -335,19 +304,15 @@
99
100 self.log.debug("Checking if %s is a directory" % path)
101 try:
102- self.request("HEAD", path)
103+ response = self.request("HEAD", path)
104+ except (TimeoutError, requests.RequestException) as exc:
105+ raise HTTPWalkerError(str(exc))
106+
107+ if not response.is_redirect or "location" not in response.headers:
108 return False
109- except urllib2.HTTPError as exc:
110- if exc.code != 301:
111- return False
112- except (IOError, socket.error) as exc:
113- # Raise HTTPWalkerError for other IO or socket errors.
114- raise HTTPWalkerError(str(exc))
115-
116- # We have a 301 redirect error from here on.
117- url = exc.hdrs.getheader("location")
118- (scheme, netloc, redirect_path, query, fragment) \
119- = urlsplit(url, self.scheme, self.FRAGMENTS)
120+ url = response.headers["location"]
121+ scheme, netloc, redirect_path, _, _ = urlsplit(
122+ url, self.scheme, self.FRAGMENTS)
123
124 if len(scheme) and scheme != self.scheme:
125 return False
126@@ -368,11 +333,8 @@
127 self.log.info("Listing %s" % dirname)
128 try:
129 response = self.request("GET", dirname)
130- try:
131- soup = BeautifulSoup(response.read())
132- finally:
133- response.close()
134- except (IOError, socket.error) as exc:
135+ soup = BeautifulSoup(response.content)
136+ except (TimeoutError, requests.RequestException) as exc:
137 raise HTTPWalkerError(str(exc))
138
139 base = URI(self.base).resolve(dirname)
140@@ -413,9 +375,9 @@
141 """Return a walker for the URL given."""
142 (scheme, netloc, path, query, fragment) = urlsplit(url, "file")
143 if scheme in ["ftp"]:
144- # If ftp_proxy is set, use the HTTPWalker class since we are
145- # talking to an HTTP proxy.
146- if 'ftp_proxy' in os.environ:
147+ # If an HTTP proxy is configured, use the HTTPWalker class so that
148+ # FTP requests are made via the proxy.
149+ if config.launchpad.http_proxy:
150 return HTTPWalker(url, log_parent)
151 else:
152 return FTPWalker(url, log_parent)
153
154=== modified file 'lib/lp/registry/tests/test_prf_walker.py'
155--- lib/lp/registry/tests/test_prf_walker.py 2018-01-02 16:10:26 +0000
156+++ lib/lp/registry/tests/test_prf_walker.py 2018-07-09 11:53:19 +0000
157@@ -1,15 +1,24 @@
158-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
159+# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
160 # GNU Affero General Public License version 3 (see the file LICENSE).
161
162 """Tests for lp.registry.scripts.productreleasefinder.walker."""
163
164 import logging
165 import StringIO
166-import urlparse
167-
168-from lazr.restful.utils import safe_hasattr
169-
170-from lp.registry.scripts.productreleasefinder.walker import WalkerBase
171+
172+import responses
173+
174+from lp.registry.scripts.productreleasefinder.walker import (
175+ combine_url,
176+ FTPWalker,
177+ HTTPWalker,
178+ WalkerBase,
179+ WalkerError,
180+ )
181+from lp.services.timeout import (
182+ get_default_timeout_function,
183+ set_default_timeout_function,
184+ )
185 from lp.testing import (
186 reset_logging,
187 TestCase,
188@@ -20,14 +29,12 @@
189
190 def testCreatesDefaultLogger(self):
191 """WalkerBase creates a default logger."""
192- from logging import Logger
193 w = WalkerBase("/")
194- self.assertTrue(isinstance(w.log, Logger))
195+ self.assertTrue(isinstance(w.log, logging.Logger))
196
197 def testCreatesChildLogger(self):
198 """WalkerBase creates a child logger if given a parent."""
199- from logging import getLogger
200- parent = getLogger("foo")
201+ parent = logging.getLogger("foo")
202 w = WalkerBase("/", log_parent=parent)
203 self.assertEqual(w.log.parent, parent)
204
205@@ -56,8 +63,6 @@
206
207 def testWrongScheme(self):
208 """WalkerBase raises WalkerError when given an unhandled scheme."""
209- from lp.registry.scripts.productreleasefinder.walker import (
210- WalkerBase, WalkerError)
211 self.assertRaises(WalkerError, WalkerBase, "foo://localhost/")
212
213 def testUnescapesHost(self):
214@@ -201,35 +206,25 @@
215
216 def testFtpScheme(self):
217 """FTPWalker works when initialized with an ftp-scheme URL."""
218- from lp.registry.scripts.productreleasefinder.walker import (
219- FTPWalker)
220 w = FTPWalker("ftp://localhost/")
221 self.assertEqual(w.host, "localhost")
222
223 def testNoScheme(self):
224 """FTPWalker works when given a URL with no scheme."""
225- from lp.registry.scripts.productreleasefinder.walker import (
226- FTPWalker)
227 w = FTPWalker("/")
228 self.assertEqual(w.host, "")
229
230 def testWrongScheme(self):
231 """FTPWalker raises WalkerError when given an unhandled scheme."""
232- from lp.registry.scripts.productreleasefinder.walker import (
233- FTPWalker, WalkerError)
234 self.assertRaises(WalkerError, FTPWalker, "http://localhost/")
235
236 def testNoUsername(self):
237 """FTPWalker stores 'anonymous' when there is no username."""
238- from lp.registry.scripts.productreleasefinder.walker import (
239- FTPWalker)
240 w = FTPWalker("ftp://localhost/")
241 self.assertEqual(w.user, "anonymous")
242
243 def testNoPassword(self):
244 """FTPWalker stores empty string when there is no password."""
245- from lp.registry.scripts.productreleasefinder.walker import (
246- FTPWalker)
247 w = FTPWalker("ftp://scott@localhost/")
248 self.assertEqual(w.passwd, "")
249
250@@ -238,88 +233,35 @@
251
252 def testHttpScheme(self):
253 """HTTPWalker works when initialized with an http-scheme URL."""
254- from lp.registry.scripts.productreleasefinder.walker import (
255- HTTPWalker)
256 w = HTTPWalker("http://localhost/")
257 self.assertEqual(w.host, "localhost")
258
259 def testHttpsScheme(self):
260 """HTTPWalker works when initialized with an https-scheme URL."""
261- from lp.registry.scripts.productreleasefinder.walker import (
262- HTTPWalker)
263 w = HTTPWalker("https://localhost/")
264 self.assertEqual(w.host, "localhost")
265
266 def testNoScheme(self):
267 """HTTPWalker works when given a URL with no scheme."""
268- from lp.registry.scripts.productreleasefinder.walker import (
269- HTTPWalker)
270 w = HTTPWalker("/")
271 self.assertEqual(w.host, "")
272
273 def testWrongScheme(self):
274 """HTTPWalker raises WalkerError when given an unhandled scheme."""
275- from lp.registry.scripts.productreleasefinder.walker import (
276- HTTPWalker, WalkerError)
277 self.assertRaises(WalkerError, HTTPWalker, "foo://localhost/")
278
279
280-class HTTPWalker_url_schemes_and_handlers(TestCase):
281- """Verify there is a handler for each URL scheme."""
282+class HTTPWalker_ListDir(TestCase):
283
284 def setUp(self):
285- TestCase.setUp(self)
286- from lp.registry.scripts.productreleasefinder.walker import (
287- HTTPWalker)
288- self.walker = HTTPWalker("http://localhost/")
289-
290- def verify_url_scheme_and_handler(self, scheme, handler):
291- self.assertIn(scheme, self.walker.URL_SCHEMES)
292- self.assertIn(handler, self.walker.handlers)
293- # urllib2 uses a naming convention to select the handler for
294- # a URL scheme. This test is sanity to check to ensure that the
295- # HTTPWalker's configuration of the OpenerDirector is will work.
296- method_name = '%s_open' % scheme
297- self.assertTrue(safe_hasattr(handler, method_name))
298-
299- def test_http_request(self):
300- import urllib2
301- self.verify_url_scheme_and_handler('http', urllib2.HTTPHandler)
302-
303- def test_https_request(self):
304- import urllib2
305- self.verify_url_scheme_and_handler('https', urllib2.HTTPSHandler)
306-
307- def test_ftp_request(self):
308- import urllib2
309- self.verify_url_scheme_and_handler('ftp', urllib2.FTPHandler)
310-
311-
312-class HTTPWalker_ListDir(TestCase):
313-
314- def tearDown(self):
315- reset_logging()
316- super(HTTPWalker_ListDir, self).tearDown()
317-
318- def setUpWalker(self, listing_url, listing_content):
319- from lp.registry.scripts.productreleasefinder.walker import (
320- HTTPWalker)
321- test = self
322-
323- class TestHTTPWalker(HTTPWalker):
324-
325- def request(self, method, path):
326- test.assertEqual(method, 'GET')
327- test.assertEqual(urlparse.urljoin(self.base, path),
328- listing_url)
329- return StringIO.StringIO(listing_content)
330-
331- def isDirectory(self, path):
332- return path.endswith('/')
333-
334- logging.basicConfig(level=logging.CRITICAL)
335- return TestHTTPWalker(listing_url, logging.getLogger())
336-
337+ super(HTTPWalker_ListDir, self).setUp()
338+ self.addCleanup(reset_logging)
339+ original_timeout_function = get_default_timeout_function()
340+ set_default_timeout_function(lambda: 60.0)
341+ self.addCleanup(
342+ set_default_timeout_function, original_timeout_function)
343+
344+ @responses.activate
345 def testApacheListing(self):
346 # Test that list() handles a standard Apache dir listing.
347 content = '''
348@@ -341,15 +283,22 @@
349 <address>Apache/2.2.3 (Unix) Server at <a href="mailto:ftp-adm@acc.umu.se">ftp.acc.umu.se</a> Port 80</address>
350 </body></html>
351 '''
352- walker = self.setUpWalker(
353- 'http://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/', content)
354+ listing_url = 'http://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/'
355+ responses.add('GET', listing_url, body=content)
356+ expected_filenames = [
357+ 'LATEST-IS-0.5.0',
358+ 'gnome-gpg-0.5.0.md5sum',
359+ 'gnome-gpg-0.5.0.tar.bz2',
360+ 'gnome-gpg-0.5.0.tar.gz',
361+ ]
362+ for filename in expected_filenames:
363+ responses.add('HEAD', listing_url + filename)
364+ walker = HTTPWalker(listing_url, logging.getLogger())
365 dirnames, filenames = walker.list('/pub/GNOME/sources/gnome-gpg/0.5/')
366 self.assertEqual(dirnames, [])
367- self.assertEqual(filenames, ['LATEST-IS-0.5.0',
368- 'gnome-gpg-0.5.0.md5sum',
369- 'gnome-gpg-0.5.0.tar.bz2',
370- 'gnome-gpg-0.5.0.tar.gz'])
371+ self.assertEqual(filenames, expected_filenames)
372
373+ @responses.activate
374 def testSquidFtpListing(self):
375 # Test that a Squid FTP listing can be parsed.
376 content = '''
377@@ -375,8 +324,9 @@
378 Generated Wed, 06 Sep 2006 11:04:02 GMT by squid (squid/2.5.STABLE12)
379 </ADDRESS></BODY></HTML>
380 '''
381- walker = self.setUpWalker(
382- 'ftp://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/', content)
383+ listing_url = 'ftp://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/'
384+ responses.add('GET', listing_url, body=content)
385+ walker = HTTPWalker(listing_url, logging.getLogger())
386 dirnames, filenames = walker.list('/pub/GNOME/sources/gnome-gpg/0.5/')
387 self.assertEqual(dirnames, [])
388 self.assertEqual(filenames, ['LATEST-IS-0.5.0',
389@@ -384,9 +334,10 @@
390 'gnome-gpg-0.5.0.tar.bz2',
391 'gnome-gpg-0.5.0.tar.gz'])
392
393+ @responses.activate
394 def testNonAsciiListing(self):
395 # Test that list() handles non-ASCII output.
396- content = '''
397+ content = b'''
398 <html>
399 <head>
400 <title>Listing</title>
401@@ -409,11 +360,17 @@
402 </pre>
403 </html>
404 '''
405- walker = self.setUpWalker('http://example.com/foo/', content)
406+ listing_url = 'http://example.com/foo/'
407+ responses.add('GET', listing_url, body=content)
408+ expected_filenames = ['file1', 'file2', 'file3', 'file99']
409+ for filename in expected_filenames:
410+ responses.add('HEAD', listing_url + filename)
411+ walker = HTTPWalker(listing_url, logging.getLogger())
412 dirnames, filenames = walker.list('/foo/')
413 self.assertEqual(dirnames, ['subdir1/', 'subdir2/', 'subdir3/'])
414- self.assertEqual(filenames, ['file1', 'file2', 'file3', 'file99'])
415+ self.assertEqual(filenames, expected_filenames)
416
417+ @responses.activate
418 def testDotPaths(self):
419 # Test that paths containing dots are handled correctly.
420 #
421@@ -436,11 +393,15 @@
422 </pre>
423 </html>
424 '''
425- walker = self.setUpWalker('http://example.com/foo/', content)
426+ listing_url = 'http://example.com/foo/'
427+ responses.add('GET', listing_url, body=content)
428+ responses.add('HEAD', listing_url + 'file2')
429+ walker = HTTPWalker(listing_url, logging.getLogger())
430 dirnames, filenames = walker.list('/foo/')
431 self.assertEqual(dirnames, ['dir/'])
432 self.assertEqual(filenames, ['file2'])
433
434+ @responses.activate
435 def testNamedAnchors(self):
436 # Test that the directory listing parser code handles named anchors.
437 # These are <a> tags without an href attribute.
438@@ -458,15 +419,21 @@
439 </pre>
440 </html>
441 '''
442- walker = self.setUpWalker('http://example.com/foo/', content)
443+ listing_url = 'http://example.com/foo/'
444+ responses.add('GET', listing_url, body=content)
445+ responses.add('HEAD', listing_url + 'file1')
446+ walker = HTTPWalker(listing_url, logging.getLogger())
447 dirnames, filenames = walker.list('/foo/')
448 self.assertEqual(dirnames, ['dir1/'])
449 self.assertEqual(filenames, ['file1'])
450
451+ @responses.activate
452 def testGarbageListing(self):
453 # Make sure that garbage doesn't trip up the dir lister.
454- content = '\x01\x02\x03\x00\xff\xf2\xablkjsdflkjsfkljfds'
455- walker = self.setUpWalker('http://example.com/foo/', content)
456+ content = b'\x01\x02\x03\x00\xff\xf2\xablkjsdflkjsfkljfds'
457+ listing_url = 'http://example.com/foo/'
458+ responses.add('GET', listing_url, body=content)
459+ walker = HTTPWalker(listing_url, logging.getLogger())
460 dirnames, filenames = walker.list('/foo/')
461 self.assertEqual(dirnames, [])
462 self.assertEqual(filenames, [])
463@@ -481,8 +448,6 @@
464 def testFtpIsDirectory(self):
465 # Test that no requests are made by isDirectory() when walking
466 # FTP sites.
467- from lp.registry.scripts.productreleasefinder.walker import (
468- HTTPWalker)
469 test = self
470
471 class TestHTTPWalker(HTTPWalker):
472@@ -501,8 +466,6 @@
473
474 def testConstructsUrl(self):
475 """combine_url constructs the URL correctly."""
476- from lp.registry.scripts.productreleasefinder.walker import (
477- combine_url)
478 self.assertEqual(combine_url("file:///base", "/subdir/", "file"),
479 "file:///subdir/file")
480 self.assertEqual(combine_url("file:///base", "/subdir", "file"),