Merge lp:~cjwatson/launchpad/cveimport-requests-handles-gzip into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18683
Proposed branch: lp:~cjwatson/launchpad/cveimport-requests-handles-gzip
Merge into: lp:launchpad
Diff against target: 148 lines (+87/-24)
2 files modified
lib/lp/bugs/scripts/cveimport.py (+24/-24)
lib/lp/bugs/scripts/tests/test_cveimport.py (+63/-0)
To merge this branch: bzr merge lp:~cjwatson/launchpad/cveimport-requests-handles-gzip
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+347465@code.launchpad.net

Commit message

Fix handling of CVE database URLs returning data with Content-Encoding: gzip.

Description of the change

I broke this in https://code.launchpad.net/~cjwatson/launchpad/cveimport-requests/+merge/347201. We have to be a bit cunning to arrange that the file:// URL test in cve-update.txt still works.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/bugs/scripts/cveimport.py'
--- lib/lp/bugs/scripts/cveimport.py 2018-05-31 13:14:12 +0000
+++ lib/lp/bugs/scripts/cveimport.py 2018-06-05 17:07:09 +0000
@@ -8,7 +8,7 @@
8__metaclass__ = type8__metaclass__ = type
99
10import gzip10import gzip
11import StringIO11import io
12import time12import time
13import xml.etree.cElementTree as cElementTree13import xml.etree.cElementTree as cElementTree
1414
@@ -198,7 +198,7 @@
198 self.parser.add_option(198 self.parser.add_option(
199 "-u", "--cveurl", dest="cveurl",199 "-u", "--cveurl", dest="cveurl",
200 default=config.cveupdater.cve_db_url,200 default=config.cveupdater.cve_db_url,
201 help="The URL for the gzipped XML CVE database.")201 help="The URL for the XML CVE database.")
202202
203 def main(self):203 def main(self):
204 self.logger.info('Initializing...')204 self.logger.info('Initializing...')
@@ -209,29 +209,8 @@
209 raise LaunchpadScriptFailure(209 raise LaunchpadScriptFailure(
210 'Unable to open CVE database in %s'210 'Unable to open CVE database in %s'
211 % self.options.cvefile)211 % self.options.cvefile)
212
213 elif self.options.cveurl is not None:212 elif self.options.cveurl is not None:
214 self.logger.info("Downloading CVE database from %s..." %213 cve_db = self.fetchCVEURL(self.options.cveurl)
215 self.options.cveurl)
216 proxies = {}
217 if config.launchpad.http_proxy:
218 proxies['http'] = config.launchpad.http_proxy
219 proxies['https'] = config.launchpad.http_proxy
220 try:
221 with override_timeout(config.cveupdater.timeout):
222 # Command-line options are trusted, so allow file://
223 # URLs to ease testing.
224 response = urlfetch(
225 self.options.cveurl, proxies=proxies, allow_file=True)
226 except requests.RequestException:
227 raise LaunchpadScriptFailure(
228 'Unable to connect for CVE database %s'
229 % self.options.cveurl)
230
231 cve_db_gz = response.content
232 self.logger.info("%d bytes downloaded." % len(cve_db_gz))
233 cve_db = gzip.GzipFile(
234 fileobj=StringIO.StringIO(cve_db_gz)).read()
235 else:214 else:
236 raise LaunchpadScriptFailure('No CVE database file or URL given.')215 raise LaunchpadScriptFailure('No CVE database file or URL given.')
237216
@@ -243,6 +222,27 @@
243 self.logger.info('%d seconds to update database.'222 self.logger.info('%d seconds to update database.'
244 % (finish_time - start_time))223 % (finish_time - start_time))
245224
225 def fetchCVEURL(self, url):
226 """Fetch CVE data from a URL, decompressing if necessary."""
227 self.logger.info("Downloading CVE database from %s..." % url)
228 try:
229 with override_timeout(config.cveupdater.timeout):
230 # Command-line options are trusted, so allow file://
231 # URLs to ease testing.
232 response = urlfetch(url, use_proxy=True, allow_file=True)
233 except requests.RequestException:
234 raise LaunchpadScriptFailure(
235 'Unable to connect for CVE database %s' % url)
236
237 cve_db = response.content
238 self.logger.info("%d bytes downloaded." % len(cve_db))
239 # requests will normally decompress this automatically, but that
240 # might not be the case if we're given a file:// URL to a gzipped
241 # file.
242 if cve_db[:2] == b'\037\213': # gzip magic
243 cve_db = gzip.GzipFile(fileobj=io.BytesIO(cve_db)).read()
244 return cve_db
245
246 def processCVEXML(self, cve_xml):246 def processCVEXML(self, cve_xml):
247 """Process the CVE XML file.247 """Process the CVE XML file.
248248
249249
=== added file 'lib/lp/bugs/scripts/tests/test_cveimport.py'
--- lib/lp/bugs/scripts/tests/test_cveimport.py 1970-01-01 00:00:00 +0000
+++ lib/lp/bugs/scripts/tests/test_cveimport.py 2018-06-05 17:07:09 +0000
@@ -0,0 +1,63 @@
1# Copyright 2018 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).
3
4from __future__ import absolute_import, print_function, unicode_literals
5
6__metaclass__ = type
7
8import gzip
9import io
10
11import responses
12
13from lp.bugs.scripts.cveimport import CVEUpdater
14from lp.services.log.logger import DevNullLogger
15from lp.testing import TestCase
16
17
18class TestCVEUpdater(TestCase):
19
20 @responses.activate
21 def test_fetch_uncompressed(self):
22 # Fetching a URL returning uncompressed data works.
23 url = 'http://cve.example.com/allitems.xml'
24 body = b'<?xml version="1.0"?>'
25 responses.add(
26 'GET', url, headers={'Content-Type': 'text/xml'}, body=body)
27 cve_updater = CVEUpdater(
28 'cve-updater', test_args=[], logger=DevNullLogger())
29 self.assertEqual(body, cve_updater.fetchCVEURL(url))
30
31 @responses.activate
32 def test_fetch_content_encoding_gzip(self):
33 # Fetching a URL returning Content-Encoding: gzip works.
34 url = 'http://cve.example.com/allitems.xml.gz'
35 body = b'<?xml version="1.0"?>'
36 gzipped_body_file = io.BytesIO()
37 with gzip.GzipFile(fileobj=gzipped_body_file, mode='wb') as f:
38 f.write(body)
39 responses.add(
40 'GET', url,
41 headers={
42 'Content-Type': 'text/xml',
43 'Content-Encoding': 'gzip',
44 },
45 body=gzipped_body_file.getvalue())
46 cve_updater = CVEUpdater(
47 'cve-updater', test_args=[], logger=DevNullLogger())
48 self.assertEqual(body, cve_updater.fetchCVEURL(url))
49
50 @responses.activate
51 def test_fetch_gzipped(self):
52 # Fetching a URL returning gzipped data without Content-Encoding works.
53 url = 'http://cve.example.com/allitems.xml.gz'
54 body = b'<?xml version="1.0"?>'
55 gzipped_body_file = io.BytesIO()
56 with gzip.GzipFile(fileobj=gzipped_body_file, mode='wb') as f:
57 f.write(body)
58 responses.add(
59 'GET', url, headers={'Content-Type': 'application/x-gzip'},
60 body=gzipped_body_file.getvalue())
61 cve_updater = CVEUpdater(
62 'cve-updater', test_args=[], logger=DevNullLogger())
63 self.assertEqual(body, cve_updater.fetchCVEURL(url))