Merge lp:~cjwatson/launchpad/timeout-with-requests into lp:launchpad

Proposed by Colin Watson on 2015-03-11
Status: Merged
Merged at revision: 18001
Proposed branch: lp:~cjwatson/launchpad/timeout-with-requests
Merge into: lp:launchpad
Diff against target: 490 lines (+174/-75)
7 files modified
lib/lp/app/browser/root.py (+5/-4)
lib/lp/services/googlesearch/__init__.py (+9/-7)
lib/lp/services/googlesearch/doc/google-searchservice.txt (+8/-0)
lib/lp/services/googlesearch/tests/test_google.py (+9/-8)
lib/lp/services/gpg/handler.py (+6/-7)
lib/lp/services/tests/test_timeout.py (+23/-14)
lib/lp/services/timeout.py (+114/-35)
To merge this branch: bzr merge lp:~cjwatson/launchpad/timeout-with-requests
Reviewer Review Type Date Requested Status
William Grant code 2015-03-11 Approve on 2016-03-23
Review via email: mp+252570@code.launchpad.net

Commit Message

Convert lp.services.timeout to use requests rather than urllib2.

Description of the Change

Convert lp.services.timeout to use requests rather than urllib2.

This is far more difficult than it ought to be: ideally none of this Cleanable* edifice would be needed in the first place, and requests/urllib3 makes us dig through several layers to get to the actual socket. There seems to be no reasonable way to implement this without reimplementing a couple of methods from requests, so this has to be checked when we upgrade to new versions.

To post a comment you must log in.
William Grant (wgrant) wrote :

You disable trust_env, but AFAIK we rely on squid.internal for accessing Google and blog.launchpad.net.

review: Needs Fixing (code)
William Grant (wgrant) :
review: Approve (code)
Colin Watson (cjwatson) :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/app/browser/root.py'
2--- lib/lp/app/browser/root.py 2015-04-29 00:36:59 +0000
3+++ lib/lp/app/browser/root.py 2016-04-19 14:24:39 +0000
4@@ -1,4 +1,4 @@
5-# Copyright 2009-2013 Canonical Ltd. This software is licensed under the
6+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
7 # GNU Affero General Public License version 3 (see the file LICENSE).
8 """Browser code for the Launchpad root page."""
9
10@@ -14,6 +14,7 @@
11
12 import feedparser
13 from lazr.batchnavigator.z3batching import batch
14+import requests
15 from zope.component import getUtility
16 from zope.formlib.interfaces import ConversionError
17 from zope.interface import Interface
18@@ -172,10 +173,10 @@
19 return cached_data
20 try:
21 # Use urlfetch which supports timeout
22- data = urlfetch(config.launchpad.homepage_recent_posts_feed)
23- except IOError:
24+ response = urlfetch(config.launchpad.homepage_recent_posts_feed)
25+ except requests.RequestException:
26 return []
27- feed = feedparser.parse(data)
28+ feed = feedparser.parse(response.content)
29 posts = []
30 max_count = config.launchpad.homepage_recent_posts_count
31 # FeedParser takes care of HTML sanitisation.
32
33=== modified file 'lib/lp/services/googlesearch/__init__.py'
34--- lib/lp/services/googlesearch/__init__.py 2015-07-08 16:05:11 +0000
35+++ lib/lp/services/googlesearch/__init__.py 2016-04-19 14:24:39 +0000
36@@ -1,4 +1,4 @@
37-# Copyright 2009 Canonical Ltd. This software is licensed under the
38+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
39 # GNU Affero General Public License version 3 (see the file LICENSE).
40
41 """Interfaces for searching and working with results."""
42@@ -12,7 +12,6 @@
43 ]
44
45 import urllib
46-import urllib2
47 from urlparse import (
48 parse_qsl,
49 urlunparse,
50@@ -21,6 +20,7 @@
51
52 from lazr.restful.utils import get_current_browser_request
53 from lazr.uri import URI
54+import requests
55 from zope.interface import implementer
56
57 from lp.services.config import config
58@@ -32,7 +32,10 @@
59 ISearchService,
60 )
61 from lp.services.timeline.requesttimeline import get_request_timeline
62-from lp.services.timeout import TimeoutError
63+from lp.services.timeout import (
64+ TimeoutError,
65+ urlfetch,
66+ )
67 from lp.services.webapp import urlparse
68
69
70@@ -203,20 +206,19 @@
71 :raise: `GoogleWrongGSPVersion` if the xml cannot be parsed.
72 """
73 search_url = self.create_search_url(terms, start=start)
74- from lp.services.timeout import urlfetch
75 request = get_current_browser_request()
76 timeline = get_request_timeline(request)
77 action = timeline.start("google-search-api", search_url)
78 try:
79- gsp_xml = urlfetch(search_url)
80- except (TimeoutError, urllib2.HTTPError, urllib2.URLError) as error:
81+ response = urlfetch(search_url)
82+ except (TimeoutError, requests.RequestException) as error:
83 # Google search service errors are not code errors. Let the
84 # call site choose to handle the unavailable service.
85 raise GoogleResponseError(
86 "The response errored: %s" % str(error))
87 finally:
88 action.finish()
89- page_matches = self._parse_google_search_protocol(gsp_xml)
90+ page_matches = self._parse_google_search_protocol(response.content)
91 return page_matches
92
93 def _checkParameter(self, name, value, is_int=False):
94
95=== modified file 'lib/lp/services/googlesearch/doc/google-searchservice.txt'
96--- lib/lp/services/googlesearch/doc/google-searchservice.txt 2011-12-30 08:13:14 +0000
97+++ lib/lp/services/googlesearch/doc/google-searchservice.txt 2016-04-19 14:24:39 +0000
98@@ -6,6 +6,12 @@
99 (cs-be) client. Given one or more terms, it will retrieve an XML
100 summary of the matching launchpad.net pages.
101
102+We silence logging of new HTTP connections from requests throughout.
103+
104+ >>> from fixtures import FakeLogger
105+ >>> logger = FakeLogger()
106+ >>> logger.setUp()
107+
108
109 GoogleSearchService
110 ===================
111@@ -616,3 +622,5 @@
112 # Restore the configuration and the timeout state.
113 >>> timeout_data = config.pop('timeout_data')
114 >>> set_default_timeout_function(old_timeout_function)
115+
116+ >>> logger.cleanUp()
117
118=== modified file 'lib/lp/services/googlesearch/tests/test_google.py'
119--- lib/lp/services/googlesearch/tests/test_google.py 2011-12-30 08:13:14 +0000
120+++ lib/lp/services/googlesearch/tests/test_google.py 2016-04-19 14:24:39 +0000
121@@ -1,4 +1,4 @@
122-# Copyright 2011 Canonical Ltd. This software is licensed under the
123+# Copyright 2011-2016 Canonical Ltd. This software is licensed under the
124 # GNU Affero General Public License version 3 (see the file LICENSE).
125
126 """Test the google search service."""
127@@ -6,16 +6,17 @@
128 __metaclass__ = type
129
130 from contextlib import contextmanager
131-from urllib2 import (
132+
133+from requests.exceptions import (
134+ ConnectionError,
135 HTTPError,
136- URLError,
137 )
138
139 from lp.services.googlesearch import GoogleSearchService
140 from lp.services.googlesearch.interfaces import GoogleResponseError
141 from lp.services.timeout import TimeoutError
142 from lp.testing import TestCase
143-from lp.testing.layers import FunctionalLayer
144+from lp.testing.layers import LaunchpadFunctionalLayer
145
146
147 @contextmanager
148@@ -41,7 +42,7 @@
149 class TestGoogleSearchService(TestCase):
150 """Test GoogleSearchService."""
151
152- layer = FunctionalLayer
153+ layer = LaunchpadFunctionalLayer
154
155 def setUp(self):
156 super(TestGoogleSearchService, self).setUp()
157@@ -54,9 +55,9 @@
158 self.assertRaises(
159 GoogleResponseError, self.search_service.search, 'fnord')
160
161- def test_search_converts_URLError(self):
162- # The method converts URLError to GoogleResponseError.
163- with urlfetch_exception(URLError, 'oops'):
164+ def test_search_converts_ConnectionError(self):
165+ # The method converts ConnectionError to GoogleResponseError.
166+ with urlfetch_exception(ConnectionError, 'oops'):
167 self.assertRaises(
168 GoogleResponseError, self.search_service.search, 'fnord')
169
170
171=== modified file 'lib/lp/services/gpg/handler.py'
172--- lib/lp/services/gpg/handler.py 2016-03-30 03:14:02 +0000
173+++ lib/lp/services/gpg/handler.py 2016-04-19 14:24:39 +0000
174@@ -20,11 +20,11 @@
175 import sys
176 import tempfile
177 import urllib
178-import urllib2
179
180 import gpgme
181 from gpgservice_client import GPGClient
182 from lazr.restful.utils import get_current_browser_request
183+import requests
184 from zope.interface import implementer
185 from zope.security.proxy import removeSecurityProxy
186
187@@ -490,21 +490,20 @@
188 # minutes." The details of the error do not matter for users
189 # (and for the code in callsites), but we should be able to see
190 # if this problem occurs too often.
191- except urllib2.HTTPError as exc:
192+ except requests.HTTPError as exc:
193 # Old versions of SKS return a 500 error when queried for a
194 # non-existent key. Production was upgraded in 2013/01, but
195 # let's leave this here for a while.
196 #
197 # We can extract the fact that the key is unknown by looking
198 # into the response's content.
199- if exc.code in (404, 500) and exc.fp is not None:
200- content = exc.fp.read()
201+ if exc.response.status_code in (404, 500):
202 no_key_message = 'No results found: No keys found'
203- if content.find(no_key_message) >= 0:
204+ if exc.response.content.find(no_key_message) >= 0:
205 raise GPGKeyDoesNotExistOnServer(fingerprint)
206 errorlog.globalErrorUtility.raising(sys.exc_info(), request)
207 raise GPGKeyTemporarilyNotFoundError(fingerprint)
208- except (TimeoutError, urllib2.URLError) as exc:
209+ except (TimeoutError, requests.RequestException) as exc:
210 errorlog.globalErrorUtility.raising(sys.exc_info(), request)
211 raise GPGKeyTemporarilyNotFoundError(fingerprint)
212 finally:
213@@ -513,7 +512,7 @@
214 def _grabPage(self, action, fingerprint):
215 """Wrapper to collect KeyServer Pages."""
216 url = self.getURLForKeyInServer(fingerprint, action)
217- return urlfetch(url)
218+ return urlfetch(url).content
219
220
221 @implementer(IPymeSignature)
222
223=== modified file 'lib/lp/services/tests/test_timeout.py'
224--- lib/lp/services/tests/test_timeout.py 2015-10-14 15:22:01 +0000
225+++ lib/lp/services/tests/test_timeout.py 2016-04-19 14:24:39 +0000
226@@ -1,4 +1,4 @@
227-# Copyright 2012 Canonical Ltd. This software is licensed under the
228+# Copyright 2012-2016 Canonical Ltd. This software is licensed under the
229 # GNU Affero General Public License version 3 (see the file LICENSE).
230
231 """timeout.py tests.
232@@ -13,9 +13,14 @@
233 import socket
234 from textwrap import dedent
235 import threading
236-import urllib2
237 import xmlrpclib
238
239+from requests.exceptions import (
240+ ConnectionError,
241+ InvalidSchema,
242+ )
243+from testtools.matchers import MatchesStructure
244+
245 from lp.services.timeout import (
246 get_default_timeout_function,
247 set_default_timeout_function,
248@@ -197,10 +202,10 @@
249 self.assertEqual([True], using_default)
250
251 def make_test_socket(self):
252- """One common use case for timing out is when making an HTTP request to
253- an external site to fetch content. To this end, the timeout module has
254- a urlfetch() function that retrieve a URL using custom urllib2 handlers
255- that will timeout using the default timeout function and clean-up the
256+ """One common use case for timing out is when making an HTTP request
257+ to an external site to fetch content. To this end, the timeout
258+ module has a urlfetch() function that retrieves a URL in such a way
259+ as to timeout using the default timeout function and clean-up the
260 socket properly.
261 """
262 sock = socket.socket()
263@@ -213,11 +218,11 @@
264 http_server_url = 'http://%s:%d/' % sock.getsockname()
265 return sock, http_server_url
266
267- def test_urlfetch_raises_urllib2_exceptions(self):
268- """Normal urllib2 exceptions are raised."""
269+ def test_urlfetch_raises_requests_exceptions(self):
270+ """Normal requests exceptions are raised."""
271 sock, http_server_url = self.make_test_socket()
272
273- e = self.assertRaises(urllib2.URLError, urlfetch, http_server_url)
274+ e = self.assertRaises(ConnectionError, urlfetch, http_server_url)
275 self.assertIn('Connection refused', str(e))
276
277 def test_urlfetch_timeout_after_listen(self):
278@@ -290,15 +295,19 @@
279
280 t = threading.Thread(target=success_result)
281 t.start()
282- self.assertEqual('Success.', urlfetch(http_server_url))
283+ self.assertThat(
284+ urlfetch(http_server_url),
285+ MatchesStructure.byEquality(status_code=200, content='Success.'))
286 t.join()
287
288- def test_urlfetch_only_supports_http_urls(self):
289- """urlfetch() only supports http urls:"""
290+ def test_urlfetch_does_not_support_ftp_urls(self):
291+ """urlfetch() does not support ftp urls."""
292 set_default_timeout_function(lambda: 1)
293 self.addCleanup(set_default_timeout_function, None)
294- e = self.assertRaises(AssertionError, urlfetch, 'ftp://localhost')
295- self.assertEqual('only http is supported.', str(e))
296+ url = 'ftp://localhost/'
297+ e = self.assertRaises(InvalidSchema, urlfetch, url)
298+ self.assertEqual(
299+ "No connection adapters were found for '%s'" % url, str(e))
300
301 def test_xmlrpc_transport(self):
302 """ Another use case for timeouts is communicating with external
303
304=== modified file 'lib/lp/services/timeout.py'
305--- lib/lp/services/timeout.py 2014-08-29 01:41:14 +0000
306+++ lib/lp/services/timeout.py 2016-04-19 14:24:39 +0000
307@@ -1,4 +1,4 @@
308-# Copyright 2009 Canonical Ltd. This software is licensed under the
309+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
310 # GNU Affero General Public License version 3 (see the file LICENSE).
311
312 """Helpers to time out external operations."""
313@@ -14,16 +14,29 @@
314 "with_timeout",
315 ]
316
317-import httplib
318 import socket
319 import sys
320-from threading import Thread
321-import urllib2
322+from threading import (
323+ Lock,
324+ Thread,
325+ )
326 from xmlrpclib import (
327 SafeTransport,
328 Transport,
329 )
330
331+from requests import Session
332+from requests.adapters import (
333+ DEFAULT_POOLBLOCK,
334+ HTTPAdapter,
335+ )
336+from requests.packages.urllib3.connectionpool import (
337+ HTTPConnectionPool,
338+ HTTPSConnectionPool,
339+ )
340+from requests.packages.urllib3.exceptions import ClosedPoolError
341+from requests.packages.urllib3.poolmanager import PoolManager
342+
343
344 default_timeout_function = None
345
346@@ -145,47 +158,113 @@
347 return call_with_timeout
348
349
350-class CleanableHTTPHandler(urllib2.HTTPHandler):
351- """Subclass of `urllib2.HTTPHandler` that can be cleaned-up."""
352-
353- def http_open(self, req):
354- """See `urllib2.HTTPHandler`."""
355- def connection_factory(*args, **kwargs):
356- """Save the created connection so that we can clean it up."""
357- self.__conn = httplib.HTTPConnection(*args, **kwargs)
358- return self.__conn
359- return self.do_open(connection_factory, req)
360-
361- def reset_connection(self):
362- """Reset the underlying HTTP connection."""
363- try:
364- self.__conn.sock.shutdown(socket.SHUT_RDWR)
365- except AttributeError:
366- # It's possible that the other thread closed the socket
367- # beforehand.
368- pass
369- self.__conn.close()
370+class CleanableConnectionPoolMixin:
371+ """Enhance urllib3's connection pools to support forced socket cleanup."""
372+
373+ def __init__(self, *args, **kwargs):
374+ super(CleanableConnectionPoolMixin, self).__init__(*args, **kwargs)
375+ self._all_connections = []
376+ self._all_connections_mutex = Lock()
377+
378+ def _new_conn(self):
379+ with self._all_connections_mutex:
380+ if self._all_connections is None:
381+ raise ClosedPoolError(self, "Pool is closed.")
382+ conn = super(CleanableConnectionPoolMixin, self)._new_conn()
383+ self._all_connections.append(conn)
384+ return conn
385+
386+ def close(self):
387+ with self._all_connections_mutex:
388+ if self._all_connections is None:
389+ return
390+ for conn in self._all_connections:
391+ sock = getattr(conn, "sock", None)
392+ if sock is not None:
393+ sock.shutdown(socket.SHUT_RDWR)
394+ sock.close()
395+ conn.sock = None
396+ self._all_connections = None
397+ super(CleanableConnectionPoolMixin, self).close()
398+
399+
400+class CleanableHTTPConnectionPool(
401+ CleanableConnectionPoolMixin, HTTPConnectionPool):
402+ pass
403+
404+
405+class CleanableHTTPSConnectionPool(
406+ CleanableConnectionPoolMixin, HTTPSConnectionPool):
407+ pass
408+
409+
410+cleanable_pool_classes_by_scheme = {
411+ "http": CleanableHTTPConnectionPool,
412+ "https": CleanableHTTPSConnectionPool,
413+ }
414+
415+
416+class CleanablePoolManager(PoolManager):
417+ """A version of urllib3's PoolManager supporting forced socket cleanup."""
418+
419+ # XXX cjwatson 2015-03-11: Reimplements PoolManager._new_pool; check
420+ # this when upgrading requests.
421+ def _new_pool(self, scheme, host, port):
422+ if scheme not in cleanable_pool_classes_by_scheme:
423+ raise ValueError("Unhandled scheme: %s" % scheme)
424+ pool_cls = cleanable_pool_classes_by_scheme[scheme]
425+ kwargs = self.connection_pool_kw
426+ if scheme == 'http':
427+ kwargs = self.connection_pool_kw.copy()
428+ for kw in ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
429+ 'ssl_version'):
430+ kwargs.pop(kw, None)
431+
432+ return pool_cls(host, port, **kwargs)
433+
434+
435+class CleanableHTTPAdapter(HTTPAdapter):
436+ """Enhance HTTPAdapter to use CleanablePoolManager."""
437+
438+ # XXX cjwatson 2015-03-11: Reimplements HTTPAdapter.init_poolmanager;
439+ # check this when upgrading requests.
440+ def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK,
441+ **pool_kwargs):
442+ # save these values for pickling
443+ self._pool_connections = connections
444+ self._pool_maxsize = maxsize
445+ self._pool_block = block
446+
447+ self.poolmanager = CleanablePoolManager(
448+ num_pools=connections, maxsize=maxsize, block=block, strict=True,
449+ **pool_kwargs)
450
451
452 class URLFetcher:
453 """Object fetching remote URLs with a time out."""
454
455 @with_timeout(cleanup='cleanup')
456- def fetch(self, url, data=None):
457+ def fetch(self, url, **request_kwargs):
458 """Fetch the URL using a custom HTTP handler supporting timeout."""
459- assert url.startswith('http://'), "only http is supported."
460- self.handler = CleanableHTTPHandler()
461- opener = urllib2.build_opener(self.handler)
462- return opener.open(url, data).read()
463+ request_kwargs.setdefault("method", "GET")
464+ self.session = Session()
465+ # Mount our custom adapters.
466+ self.session.mount("https://", CleanableHTTPAdapter())
467+ self.session.mount("http://", CleanableHTTPAdapter())
468+ response = self.session.request(url=url, **request_kwargs)
469+ response.raise_for_status()
470+ # Make sure the content has been consumed before returning.
471+ response.content
472+ return response
473
474 def cleanup(self):
475 """Reset the connection when the operation timed out."""
476- self.handler.reset_connection()
477-
478-
479-def urlfetch(url, data=None):
480- """Wrapper for `urllib2.urlopen()` that times out."""
481- return URLFetcher().fetch(url, data)
482+ self.session.close()
483+
484+
485+def urlfetch(url, **request_kwargs):
486+ """Wrapper for `requests.get()` that times out."""
487+ return URLFetcher().fetch(url, **request_kwargs)
488
489
490 class TransportWithTimeout(Transport):