Merge lp:~cjwatson/launchpad/explicit-proxy-product-release-finder into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18711
Proposed branch: lp:~cjwatson/launchpad/explicit-proxy-product-release-finder
Merge into: lp:launchpad
Diff against target: 376 lines (+133/-37)
5 files modified
lib/lp/registry/scripts/productreleasefinder/finder.py (+31/-18)
lib/lp/registry/tests/test_prf_finder.py (+61/-16)
lib/lp/services/config/schema-lazr.conf (+5/-0)
lib/lp/services/tests/test_timeout.py (+24/-0)
lib/lp/services/timeout.py (+12/-3)
To merge this branch: bzr merge lp:~cjwatson/launchpad/explicit-proxy-product-release-finder
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+348564@code.launchpad.net

Commit message

Convert the product release finder to urlfetch.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/registry/scripts/productreleasefinder/finder.py'
2--- lib/lp/registry/scripts/productreleasefinder/finder.py 2014-06-03 10:43:24 +0000
3+++ lib/lp/registry/scripts/productreleasefinder/finder.py 2018-06-26 20:55:58 +0000
4@@ -1,4 +1,4 @@
5-# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
6+# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
7 # GNU Affero General Public License version 3 (see the file LICENSE).
8
9 __metaclass__ = type
10@@ -12,11 +12,12 @@
11 import mimetypes
12 import os
13 import re
14-import urllib
15+import tempfile
16 import urlparse
17
18 from cscvs.dircompare import path
19 import pytz
20+import requests
21 from zope.component import getUtility
22
23 from lp.app.validators.name import invalid_name_pattern
24@@ -32,12 +33,17 @@
25 from lp.registry.model.productseries import ProductSeries
26 from lp.registry.scripts.productreleasefinder.filter import FilterPattern
27 from lp.registry.scripts.productreleasefinder.hose import Hose
28+from lp.services.config import config
29 from lp.services.database import (
30 read_transaction,
31 write_transaction,
32 )
33 from lp.services.database.interfaces import IStore
34 from lp.services.librarian.model import LibraryFileAlias
35+from lp.services.timeout import (
36+ default_timeout,
37+ urlfetch,
38+ )
39
40
41 processors = '|'.join([
42@@ -104,8 +110,9 @@
43
44 def findReleases(self):
45 """Scan for new releases in all products."""
46- for product_name, filters in self.getFilters():
47- self.handleProduct(product_name, filters)
48+ with default_timeout(config.productreleasefinder.timeout):
49+ for product_name, filters in self.getFilters():
50+ self.handleProduct(product_name, filters)
51
52 @read_transaction
53 def getFilters(self):
54@@ -220,21 +227,27 @@
55 mimetype = 'application/octet-stream'
56
57 self.log.info("Downloading %s", url)
58- try:
59- local, headers = urllib.urlretrieve(url)
60- stat = os.stat(local)
61- except IOError:
62- self.log.error("Download of %s failed", url)
63- raise
64- except OSError:
65- self.log.error("Unable to stat downloaded file")
66- raise
67+ with tempfile.TemporaryFile(prefix="product-release-finder") as fp:
68+ try:
69+ response = urlfetch(
70+ url, trust_env=False, use_proxy=True, output_file=fp)
71+ # XXX cjwatson 2018-06-26: This will all change with
72+ # requests 3.x. See:
73+ # https://blog.petrzemek.net/2018/04/22/
74+ expected_length = response.headers.get("Content-Length")
75+ if expected_length is not None:
76+ actual_length = response.raw.tell()
77+ expected_length = int(expected_length)
78+ if actual_length < expected_length:
79+ raise IOError(
80+ "Incomplete read: got %d, expected %d" %
81+ (actual_length, expected_length))
82+ except (IOError, requests.RequestException):
83+ self.log.exception("Download of %s failed", url)
84+ raise
85+ stat = os.fstat(fp.fileno())
86+ fp.seek(0)
87
88- try:
89- fp = open(local, 'r')
90- os.unlink(local)
91 self.addReleaseTarball(product_name, series_name, version,
92 filename, stat.st_size, fp, mimetype)
93 file_names.add(filename)
94- finally:
95- fp.close()
96
97=== modified file 'lib/lp/registry/tests/test_prf_finder.py'
98--- lib/lp/registry/tests/test_prf_finder.py 2018-01-02 10:54:31 +0000
99+++ lib/lp/registry/tests/test_prf_finder.py 2018-06-26 20:55:58 +0000
100@@ -1,4 +1,4 @@
101-# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
102+# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
103 # GNU Affero General Public License version 3 (see the file LICENSE).
104
105 import logging
106@@ -8,6 +8,7 @@
107 import tempfile
108 import unittest
109
110+import responses
111 import transaction
112 from zope.component import getUtility
113 from zope.interface.verify import verifyObject
114@@ -219,22 +220,29 @@
115 def create_tarball(self, file_name):
116 """create a release tarball for testing"""
117 file_path = os.path.join(self.release_root, file_name)
118- try:
119- fp = open(file_path, 'w')
120+ with open(file_path, 'w') as fp:
121 fp.write('foo')
122- finally:
123- fp.close()
124 return file_path, file_name
125
126+ def add_tarball_response(self, file_path):
127+ def callback(request):
128+ with open(file_path, 'rb') as f:
129+ file_size = os.fstat(f.fileno()).st_size
130+ return 200, {'Content-Length': str(file_size)}, f.read()
131+
132+ url = 'http://example.com/' + file_path.lstrip('/')
133+ responses.add_callback('GET', url, callback)
134+ return url
135+
136 def setUp(self):
137 switch_dbuser(config.productreleasefinder.dbuser)
138 self.release_root = tempfile.mkdtemp()
139- self.release_url = 'file://' + self.release_root
140
141 def tearDown(self):
142 shutil.rmtree(self.release_root, ignore_errors=True)
143 reset_logging()
144
145+ @responses.activate
146 def test_handleRelease(self):
147 ztm = self.layer.txn
148 logging.basicConfig(level=logging.CRITICAL)
149@@ -243,7 +251,8 @@
150 file_path, file_name = self.create_tarball(
151 'evolution-42.0.orig.tar.gz')
152 file_names = set()
153- prf.handleRelease('evolution', 'trunk', file_path, file_names)
154+ url = self.add_tarball_response(file_path)
155+ prf.handleRelease('evolution', 'trunk', url, file_names)
156 self.assertTrue(file_name in file_names)
157 self.assertFalse(alt_file_name in file_names)
158
159@@ -271,6 +280,7 @@
160 bound = field.bind(fileinfo)
161 bound.validate(bound.get(fileinfo))
162
163+ @responses.activate
164 def test_handleReleaseWithExistingRelease(self):
165 # Test that handleRelease() can add a file release to an
166 # existing ProductRelease.
167@@ -289,7 +299,8 @@
168 prf = ProductReleaseFinder(ztm, logging.getLogger())
169 file_path, file_name = self.create_tarball('evolution-2.1.6.tar.gz')
170 file_names = prf.getReleaseFileNames('evolution')
171- prf.handleRelease('evolution', 'trunk', file_path, file_names)
172+ url = self.add_tarball_response(file_path)
173+ prf.handleRelease('evolution', 'trunk', url, file_names)
174
175 # verify that we now have files attached to the release:
176 evo = getUtility(IProductSet).getByName('evolution')
177@@ -297,6 +308,7 @@
178 release = trunk.getRelease('2.1.6')
179 self.assertEqual(release.files.count(), 1)
180
181+ @responses.activate
182 def test_handleReleaseTwice(self):
183 # Test that handleRelease() handles the case where a tarball
184 # has already been attached to the ProductRelease. We do this
185@@ -306,13 +318,15 @@
186 prf = ProductReleaseFinder(ztm, logging.getLogger())
187 file_path, file_name = self.create_tarball('evolution-42.0.tar.gz')
188 file_names = prf.getReleaseFileNames('evolution')
189- prf.handleRelease('evolution', 'trunk', file_path, file_names)
190- prf.handleRelease('evolution', 'trunk', file_path, file_names)
191+ url = self.add_tarball_response(file_path)
192+ prf.handleRelease('evolution', 'trunk', url, file_names)
193+ prf.handleRelease('evolution', 'trunk', url, file_names)
194 evo = getUtility(IProductSet).getByName('evolution')
195 trunk = evo.getSeries('trunk')
196 release = trunk.getRelease('42.0')
197 self.assertEqual(release.files.count(), 1)
198
199+ @responses.activate
200 def test_handleReleaseTwice_multiple_series(self):
201 # Series can have overlaping release file globs, but versions
202 # are unique to a project. A file is uploaded to a release only
203@@ -322,14 +336,17 @@
204 prf = ProductReleaseFinder(ztm, logging.getLogger())
205 file_path, file_name = self.create_tarball('evolution-1.2.3.tar.gz')
206 file_names = prf.getReleaseFileNames('evolution')
207- prf.handleRelease('evolution', 'trunk', file_path, file_names)
208+ url = self.add_tarball_response(file_path)
209+ prf.handleRelease('evolution', 'trunk', url, file_names)
210 file_path, file_name = self.create_tarball('evolution-1.2.3.tar.gz')
211- prf.handleRelease('evolution', '1.0', file_path, file_names)
212+ url = self.add_tarball_response(file_path)
213+ prf.handleRelease('evolution', '1.0', url, file_names)
214 product = getUtility(IProductSet).getByName('evolution')
215 release = product.getMilestone('1.2.3').product_release
216 self.assertEqual(release.files.count(), 1)
217
218- def test_handleRelease_alternate_verstion(self):
219+ @responses.activate
220+ def test_handleRelease_alternate_version(self):
221 """Verify that tar.gz and tar.bz2 versions are both uploaded."""
222 ztm = self.layer.txn
223 logging.basicConfig(level=logging.CRITICAL)
224@@ -338,8 +355,10 @@
225 alt_file_path, alt_file_name = self.create_tarball(
226 'evolution-45.0.tar.bz2')
227 file_names = prf.getReleaseFileNames('evolution')
228- prf.handleRelease('evolution', 'trunk', file_path, file_names)
229- prf.handleRelease('evolution', 'trunk', alt_file_path, file_names)
230+ url = self.add_tarball_response(file_path)
231+ prf.handleRelease('evolution', 'trunk', url, file_names)
232+ alt_url = self.add_tarball_response(alt_file_path)
233+ prf.handleRelease('evolution', 'trunk', alt_url, file_names)
234 evo = getUtility(IProductSet).getByName('evolution')
235 trunk = evo.getSeries('trunk')
236 release = trunk.getRelease('45.0')
237@@ -365,12 +384,38 @@
238 fp.write('foo')
239 fp.close()
240
241- url = self.release_url + '/evolution420.tar.gz'
242+ url = 'file://' + self.release_root + '/evolution420.tar.gz'
243 file_names = prf.getReleaseFileNames('evolution')
244 prf.handleRelease('evolution', 'trunk', url, file_names)
245 self.assertEqual(
246 "Unable to parse version from %s\n" % url, output.getvalue())
247
248+ @responses.activate
249+ def test_handleRelease_short_response(self):
250+ # handleRelease raises an exception on short responses.
251+ ztm = self.layer.txn
252+ logging.basicConfig(level=logging.CRITICAL)
253+ prf = ProductReleaseFinder(ztm, logging.getLogger())
254+ file_path, file_name = self.create_tarball(
255+ 'evolution-42.0.orig.tar.gz')
256+ file_size = os.stat(file_path).st_size
257+ file_names = set()
258+
259+ def callback(request):
260+ with open(file_path, 'rb') as f:
261+ file_size = os.fstat(f.fileno()).st_size
262+ return 200, {'Content-Length': str(file_size + 1)}, f.read()
263+
264+ url = 'http://example.com/' + file_path.lstrip('/')
265+ responses.add_callback('GET', url, callback)
266+ with self.assertRaises(IOError) as ctx:
267+ prf.handleRelease('evolution', 'trunk', url, file_names)
268+ self.assertEqual(
269+ 'Incomplete read: got %d, expected %d' % (
270+ file_size, file_size + 1),
271+ str(ctx.exception))
272+ self.assertNotIn(file_name, file_names)
273+
274
275 class ExtractVersionTestCase(unittest.TestCase):
276 """Verify that release version names are correctly extracted."""
277
278=== modified file 'lib/lp/services/config/schema-lazr.conf'
279--- lib/lp/services/config/schema-lazr.conf 2018-06-26 14:50:04 +0000
280+++ lib/lp/services/config/schema-lazr.conf 2018-06-26 20:55:58 +0000
281@@ -1540,6 +1540,11 @@
282 # datatype: string
283 dbuser: productreleasefinder
284
285+# The time in seconds that the product release finder will allow for
286+# downloading release tarballs.
287+# datatype: integer
288+timeout: 300
289+
290
291 [profiling]
292 # When set to True, the user is allowed to request profiles be run for
293
294=== modified file 'lib/lp/services/tests/test_timeout.py'
295--- lib/lp/services/tests/test_timeout.py 2018-06-05 01:50:30 +0000
296+++ lib/lp/services/tests/test_timeout.py 2018-06-26 20:55:58 +0000
297@@ -430,6 +430,30 @@
298 headers=ContainsDict({'Content-Length': Equals(8)}),
299 content=Equals('Success.')))
300
301+ def test_urlfetch_writes_to_output_file(self):
302+ """If given an output_file, urlfetch writes to it."""
303+ sock, http_server_url = self.make_test_socket()
304+ sock.listen(1)
305+
306+ def success_result():
307+ (client_sock, client_addr) = sock.accept()
308+ client_sock.sendall(dedent("""\
309+ HTTP/1.0 200 Ok
310+ Content-Type: text/plain
311+ Content-Length: 8
312+
313+ Success."""))
314+ client_sock.close()
315+
316+ t = threading.Thread(target=success_result)
317+ t.start()
318+ output_path = self.useFixture(TempDir()).join('out')
319+ with open(output_path, 'wb+') as f:
320+ urlfetch(http_server_url, output_file=f)
321+ f.seek(0)
322+ self.assertEqual(b'Success.', f.read())
323+ t.join()
324+
325 def test_xmlrpc_transport(self):
326 """ Another use case for timeouts is communicating with external
327 systems using XMLRPC. In order to allow timeouts using XMLRPC we
328
329=== modified file 'lib/lp/services/timeout.py'
330--- lib/lp/services/timeout.py 2018-06-22 22:07:42 +0000
331+++ lib/lp/services/timeout.py 2018-06-26 20:55:58 +0000
332@@ -41,6 +41,7 @@
333 from requests.packages.urllib3.exceptions import ClosedPoolError
334 from requests.packages.urllib3.poolmanager import PoolManager
335 from requests_file import FileAdapter
336+from requests_toolbelt.downloadutils import stream
337 from six import reraise
338
339 from lp.services.config import config
340@@ -324,7 +325,7 @@
341
342 @with_timeout(cleanup='cleanup')
343 def fetch(self, url, trust_env=None, use_proxy=False, allow_file=False,
344- **request_kwargs):
345+ output_file=None, **request_kwargs):
346 """Fetch the URL using a custom HTTP handler supporting timeout.
347
348 :param url: The URL to fetch.
349@@ -334,6 +335,8 @@
350 :param use_proxy: If True, use Launchpad's configured proxy.
351 :param allow_file: If True, allow file:// URLs. (Be careful to only
352 pass this if the URL is trusted.)
353+ :param output_file: If not None, download the response content to
354+ this file object or path.
355 :param request_kwargs: Additional keyword arguments passed on to
356 `Session.request`.
357 """
358@@ -351,10 +354,16 @@
359 request_kwargs.setdefault("proxies", {})
360 request_kwargs["proxies"]["http"] = config.launchpad.http_proxy
361 request_kwargs["proxies"]["https"] = config.launchpad.http_proxy
362+ if output_file is not None:
363+ request_kwargs["stream"] = True
364 response = self.session.request(url=url, **request_kwargs)
365 response.raise_for_status()
366- # Make sure the content has been consumed before returning.
367- response.content
368+ if output_file is None:
369+ # Make sure the content has been consumed before returning.
370+ response.content
371+ else:
372+ # Download the content to the given file.
373+ stream.stream_response_to_file(response, path=output_file)
374 # The responses library doesn't persist cookies in the session
375 # (https://github.com/getsentry/responses/issues/80). Work around
376 # this.