Merge lp:~cjwatson/launchpad/gina-stronger-checksums into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 17955
Proposed branch: lp:~cjwatson/launchpad/gina-stronger-checksums
Merge into: lp:launchpad
Diff against target: 423 lines (+180/-66)
5 files modified
lib/lp/archiveuploader/dscfile.py (+2/-1)
lib/lp/soyuz/scripts/gina/archive.py (+40/-39)
lib/lp/soyuz/scripts/gina/handlers.py (+3/-10)
lib/lp/soyuz/scripts/gina/packages.py (+33/-14)
lib/lp/soyuz/scripts/tests/test_gina.py (+102/-2)
To merge this branch: bzr merge lp:~cjwatson/launchpad/gina-stronger-checksums
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+289505@code.launchpad.net

Commit message

Make gina accept archives using xz/bzip2/no compression and/or using (only) stronger checksums than MD5.

Description of the change

Make gina accept archives using xz/bzip2/no compression and/or using (only) stronger checksums than MD5.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/archiveuploader/dscfile.py'
2--- lib/lp/archiveuploader/dscfile.py 2016-03-01 21:20:44 +0000
3+++ lib/lp/archiveuploader/dscfile.py 2016-03-20 22:48:41 +0000
4@@ -1,4 +1,4 @@
5-# Copyright 2009-2014 Canonical Ltd. This software is licensed under the
6+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
7 # GNU Affero General Public License version 3 (see the file LICENSE).
8
9 """ DSCFile and related.
10@@ -251,6 +251,7 @@
11 "Build-Conflicts-Indep",
12 "Checksums-Sha1",
13 "Checksums-Sha256",
14+ "Checksums-Sha512",
15 "Format",
16 "Homepage",
17 "Standards-Version",
18
19=== modified file 'lib/lp/soyuz/scripts/gina/archive.py'
20--- lib/lp/soyuz/scripts/gina/archive.py 2013-05-22 09:51:08 +0000
21+++ lib/lp/soyuz/scripts/gina/archive.py 2016-03-20 22:48:41 +0000
22@@ -1,4 +1,4 @@
23-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
24+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
25 # GNU Affero General Public License version 3 (see the file LICENSE).
26
27 """Archive pool classes.
28@@ -18,6 +18,7 @@
29
30 from collections import defaultdict
31 import os
32+import shutil
33 import tempfile
34
35 import apt_pkg
36@@ -37,7 +38,7 @@
37 class ArchiveFilesystemInfo:
38 """Archive information files holder
39
40- This class gets and holds the Packages.gz and Source.gz files
41+ This class gets and holds the Packages and Sources files
42 from a Package Archive and holds them as internal attributes
43 to be used for other classes.
44 """
45@@ -62,53 +63,53 @@
46 raise MangledArchiveError("No archive directory for %s/%s" %
47 (distroseries, component))
48
49- # Search and get the files with full path
50- sources_zipped = os.path.join(root, "dists", distroseries,
51- component, "source", "Sources.gz")
52- if not os.path.exists(sources_zipped):
53- raise MangledArchiveError("Archive missing Sources.gz at %s"
54- % sources_zipped)
55-
56 # Extract Sources index.
57- srcfd, sources_tagfile = tempfile.mkstemp()
58- call("gzip -dc %s > %s" % (sources_zipped, sources_tagfile))
59- srcfile = os.fdopen(srcfd)
60-
61- # Holds the opened files and its names.
62- self.sources_tagfile = sources_tagfile
63- self.srcfile = srcfile
64+ sources_prefix = os.path.join(
65+ root, "dists", distroseries, component, "source", "Sources")
66+ self.srcfile, self.sources_tagfile = self.openTagFile(sources_prefix)
67
68 # Detect source-only mode and skip binary index parsing.
69 if source_only:
70 return
71
72- # Extract Binaries indexes.
73+ # Extract binary indexes.
74 dist_bin_dir = os.path.join(dist_dir, "binary-%s" % arch)
75 if not os.path.exists(dist_bin_dir):
76 raise NoBinaryArchive
77
78- binaries_zipped = os.path.join(dist_bin_dir, "Packages.gz")
79- if not os.path.exists(binaries_zipped):
80- raise MangledArchiveError("Archive mising Packages.gz at %s"
81- % binaries_zipped)
82- di_zipped = os.path.join(root, "dists", distroseries, component,
83- "debian-installer", "binary-%s" % arch,
84- "Packages.gz")
85- # Extract Binary indexes.
86- binfd, binaries_tagfile = tempfile.mkstemp()
87- call("gzip -dc %s > %s" % (binaries_zipped, binaries_tagfile))
88- binfile = os.fdopen(binfd)
89-
90- difd, di_tagfile = tempfile.mkstemp()
91- if os.path.exists(di_zipped):
92- call("gzip -dc %s > %s" % (di_zipped, di_tagfile))
93- difile = os.fdopen(difd)
94-
95- # Holds the opened files and its names.
96- self.binaries_tagfile = binaries_tagfile
97- self.binfile = binfile
98- self.di_tagfile = di_tagfile
99- self.difile = difile
100+ self.binfile, self.binaries_tagfile = self.openTagFile(
101+ os.path.join(dist_bin_dir, "Packages"))
102+
103+ try:
104+ self.difile, self.di_tagfile = self.openTagFile(
105+ os.path.join(
106+ root, "dists", distroseries, component,
107+ "debian-installer", "binary-%s" % arch, "Packages"))
108+ except MangledArchiveError:
109+ # d-i binary indexes may be missing. Put something empty in
110+ # place so that PackagesMap doesn't need to care.
111+ difd, self.di_tagfile = tempfile.mkstemp()
112+ self.difile = os.fdopen(difd)
113+
114+ def openTagFile(self, prefix):
115+ for suffix in (".xz", ".bz2", ".gz", ""):
116+ if os.path.exists(prefix + suffix):
117+ # Extract index.
118+ fd, tagfile = tempfile.mkstemp()
119+ if suffix == ".xz":
120+ call("xz -dc %s > %s" % (prefix + suffix, tagfile))
121+ elif suffix == ".bz2":
122+ call("bzip2 -dc %s > %s" % (prefix + suffix, tagfile))
123+ elif suffix == ".gz":
124+ call("gzip -dc %s > %s" % (prefix + suffix, tagfile))
125+ elif suffix == "":
126+ shutil.copy(prefix + suffix, tagfile)
127+ else:
128+ raise AssertionError("Unknown suffix '%s'" % suffix)
129+ return os.fdopen(fd), tagfile
130+ else:
131+ raise MangledArchiveError(
132+ "Archive missing any variant of %s" % prefix)
133
134 def cleanup(self):
135 os.unlink(self.sources_tagfile)
136
137=== modified file 'lib/lp/soyuz/scripts/gina/handlers.py'
138--- lib/lp/soyuz/scripts/gina/handlers.py 2016-03-04 14:18:23 +0000
139+++ lib/lp/soyuz/scripts/gina/handlers.py 2016-03-20 22:48:41 +0000
140@@ -1,4 +1,4 @@
141-# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
142+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
143 # GNU Affero General Public License version 3 (see the file LICENSE).
144
145 """Gina db handlers.
146@@ -83,14 +83,7 @@
147
148 def check_not_in_librarian(files, archive_root, directory):
149 to_upload = []
150- if not isinstance(files, list):
151- # A little bit of ugliness. The source package's files attribute
152- # returns a three-tuple with md5sum, size and name. The binary
153- # package, on the other hand, only really provides a filename.
154- # This is tested through the two codepaths, so it should be safe.
155- files = [(None, files)]
156- for i in files:
157- fname = i[-1]
158+ for fname in files:
159 path = os.path.join(archive_root, directory)
160 if not os.path.exists(os.path.join(path, fname)):
161 # XXX kiko 2005-10-22: Untested
162@@ -781,7 +774,7 @@
163 def createBinaryPackage(self, bin, srcpkg, distroarchseries, archtag):
164 """Create a new binarypackage."""
165 fdir, fname = os.path.split(bin.filename)
166- to_upload = check_not_in_librarian(fname, bin.archive_root, fdir)
167+ to_upload = check_not_in_librarian([fname], bin.archive_root, fdir)
168 fname, path = to_upload[0]
169
170 componentID = self.distro_handler.getComponentByName(bin.component).id
171
172=== modified file 'lib/lp/soyuz/scripts/gina/packages.py'
173--- lib/lp/soyuz/scripts/gina/packages.py 2015-12-07 21:49:16 +0000
174+++ lib/lp/soyuz/scripts/gina/packages.py 2016-03-20 22:48:41 +0000
175@@ -1,4 +1,4 @@
176-# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
177+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
178 # GNU Affero General Public License version 3 (see the file LICENSE).
179
180 """Package information classes.
181@@ -225,8 +225,14 @@
182 (self.package, self.version))
183
184 absent = object()
185- missing = [attr for attr in self._required if
186- getattr(self, attr, absent) is absent]
187+ missing = []
188+ for attr in self._required:
189+ if isinstance(attr, tuple):
190+ if all(getattr(self, oneattr, absent) is absent
191+ for oneattr in attr):
192+ missing.append(attr)
193+ elif getattr(self, attr, absent) is absent:
194+ missing.append(attr)
195 if missing:
196 raise MissingRequiredArguments(missing)
197
198@@ -249,14 +255,26 @@
199 self.date_uploaded = UTC_NOW
200 return True
201
202- def set_field(self, key, value):
203- """Record an arbitrary control field."""
204- lowkey = key.lower()
205+ def is_field_known(self, lowfield):
206+ """Is this field a known one?"""
207 # _known_fields contains the fields that archiveuploader recognises
208 # from a raw .dsc or .*deb; _required contains a few extra fields
209 # that are added to Sources and Packages index files. If a field is
210 # in neither, it counts as user-defined.
211- if lowkey in self._known_fields or lowkey in self._required:
212+ if lowfield in self._known_fields:
213+ return True
214+ for required in self._required:
215+ if isinstance(required, tuple):
216+ if lowfield in required:
217+ return True
218+ elif lowfield == required:
219+ return True
220+ return False
221+
222+ def set_field(self, key, value):
223+ """Record an arbitrary control field."""
224+ lowkey = key.lower()
225+ if self.is_field_known(lowkey):
226 setattr(self, lowkey.replace("-", "_"), value)
227 else:
228 if self._user_defined is None:
229@@ -294,7 +312,7 @@
230 'section',
231 'architecture',
232 'directory',
233- 'files',
234+ ('files', 'checksums-sha1', 'checksums-sha256', 'checksums-sha512'),
235 'component',
236 ]
237
238@@ -326,11 +344,12 @@
239 except UnicodeDecodeError:
240 raise DisplayNameDecodingError(
241 "Could not decode name %s" % displayname)
242- elif k == 'Files':
243- self.files = []
244- files = v.split("\n")
245- for f in files:
246- self.files.append(stripseq(f.split(" ")))
247+ elif k == 'Files' or k.startswith('Checksums-'):
248+ if not hasattr(self, 'files'):
249+ self.files = []
250+ files = v.split("\n")
251+ for f in files:
252+ self.files.append(stripseq(f.split(" "))[-1])
253 else:
254 self.set_field(k, v)
255
256@@ -413,7 +432,7 @@
257 'filename',
258 'component',
259 'size',
260- 'md5sum',
261+ ('md5sum', 'sha1', 'sha256', 'sha512'),
262 'description',
263 'summary',
264 'priority',
265
266=== modified file 'lib/lp/soyuz/scripts/tests/test_gina.py'
267--- lib/lp/soyuz/scripts/tests/test_gina.py 2015-12-07 21:49:16 +0000
268+++ lib/lp/soyuz/scripts/tests/test_gina.py 2016-03-20 22:48:41 +0000
269@@ -1,13 +1,16 @@
270-# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
271+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
272 # GNU Affero General Public License version 3 (see the file LICENSE).
273
274 from doctest import DocTestSuite
275 import hashlib
276 import os
277+import shutil
278+import subprocess
279 import tempfile
280 from textwrap import dedent
281 from unittest import TestLoader
282
283+import apt_pkg
284 from fixtures import EnvironmentVariableFixture
285 import transaction
286
287@@ -23,6 +26,7 @@
288 from lp.soyuz.scripts.gina import ExecutionError
289 from lp.soyuz.scripts.gina.archive import (
290 ArchiveComponentItems,
291+ ArchiveFilesystemInfo,
292 PackagesMap,
293 )
294 from lp.soyuz.scripts.gina.dominate import dominate_imported_source_packages
295@@ -36,10 +40,14 @@
296 )
297 from lp.soyuz.scripts.gina.packages import (
298 BinaryPackageData,
299+ MissingRequiredArguments,
300 SourcePackageData,
301 )
302 from lp.soyuz.scripts.gina.runner import import_sourcepackages
303-from lp.testing import TestCaseWithFactory
304+from lp.testing import (
305+ TestCase,
306+ TestCaseWithFactory,
307+ )
308 from lp.testing.faketransaction import FakeTransaction
309 from lp.testing.layers import (
310 LaunchpadZopelessLayer,
311@@ -164,6 +172,49 @@
312 PackagePublishingStatus.DELETED, PackagePublishingStatus.DELETED])
313
314
315+class TestArchiveFilesystemInfo(TestCase):
316+
317+ def assertCompressionTypeWorks(self, compressor_func):
318+ archive_root = self.useTempDir()
319+ sampledata_root = os.path.join(
320+ os.path.dirname(__file__), "gina_test_archive")
321+ sampledata_component_dir = os.path.join(
322+ sampledata_root, "dists", "breezy", "main")
323+ component_dir = os.path.join(archive_root, "dists", "breezy", "main")
324+ os.makedirs(os.path.join(component_dir, "source"))
325+ shutil.copy(
326+ os.path.join(sampledata_component_dir, "source", "Sources"),
327+ os.path.join(component_dir, "source", "Sources"))
328+ compressor_func(os.path.join(component_dir, "source", "Sources"))
329+ os.makedirs(os.path.join(component_dir, "binary-i386"))
330+ shutil.copy(
331+ os.path.join(sampledata_component_dir, "binary-i386", "Packages"),
332+ os.path.join(component_dir, "binary-i386", "Packages"))
333+ compressor_func(os.path.join(component_dir, "binary-i386", "Packages"))
334+
335+ archive_info = ArchiveFilesystemInfo(
336+ archive_root, "breezy", "main", "i386")
337+ sources = apt_pkg.TagFile(archive_info.srcfile)
338+ self.assertEqual("archive-copier", next(sources)["Package"])
339+ binaries = apt_pkg.TagFile(archive_info.binfile)
340+ self.assertEqual("python-pam", next(binaries)["Package"])
341+
342+ def test_uncompressed(self):
343+ self.assertCompressionTypeWorks(lambda path: None)
344+
345+ def test_gzip(self):
346+ self.assertCompressionTypeWorks(
347+ lambda path: subprocess.check_call(["gzip", path]))
348+
349+ def test_bzip2(self):
350+ self.assertCompressionTypeWorks(
351+ lambda path: subprocess.check_call(["bzip2", path]))
352+
353+ def test_xz(self):
354+ self.assertCompressionTypeWorks(
355+ lambda path: subprocess.check_call(["xz", path]))
356+
357+
358 class TestSourcePackageData(TestCaseWithFactory):
359
360 layer = ZopelessDatabaseLayer
361@@ -287,6 +338,28 @@
362 tempfile.tempdir = None
363 self.assertEqual([], os.listdir(unpack_tmpdir))
364
365+ def test_checksum_fields(self):
366+ # We only need one of Files or Checksums-*.
367+ base_dsc_contents = {
368+ "Package": "foo",
369+ "Binary": "foo",
370+ "Version": "1.0-1",
371+ "Maintainer": "Foo Bar <foo@canonical.com>",
372+ "Section": "misc",
373+ "Architecture": "all",
374+ "Directory": "pool/main/f/foo",
375+ "Component": "main",
376+ }
377+ for field in (
378+ "Files", "Checksums-Sha1", "Checksums-Sha256",
379+ "Checksums-Sha512"):
380+ dsc_contents = dict(base_dsc_contents)
381+ dsc_contents[field] = "xxx 000 foo_1.0-1.dsc"
382+ sp_data = SourcePackageData(**dsc_contents)
383+ self.assertEqual(["foo_1.0-1.dsc"], sp_data.files)
384+ self.assertRaises(
385+ MissingRequiredArguments, SourcePackageData, **base_dsc_contents)
386+
387
388 class TestSourcePackageHandler(TestCaseWithFactory):
389
390@@ -352,6 +425,33 @@
391 self.assertEqual(PackagePublishingStatus.PUBLISHED, spph.status)
392
393
394+class TestBinaryPackageData(TestCaseWithFactory):
395+
396+ layer = ZopelessDatabaseLayer
397+
398+ def test_checksum_fields(self):
399+ # We only need one of MD5sum or SHA*.
400+ base_deb_contents = {
401+ "Package": "foo",
402+ "Installed-Size": "0",
403+ "Maintainer": "Foo Bar <foo@canonical.com>",
404+ "Section": "misc",
405+ "Architecture": "all",
406+ "Version": "1.0-1",
407+ "Filename": "pool/main/f/foo/foo_1.0-1_all.deb",
408+ "Component": "main",
409+ "Size": "0",
410+ "Description": "",
411+ "Priority": "extra",
412+ }
413+ for field in ("MD5sum", "SHA1", "SHA256", "SHA512"):
414+ deb_contents = dict(base_deb_contents)
415+ deb_contents[field] = "0"
416+ BinaryPackageData(**deb_contents)
417+ self.assertRaises(
418+ MissingRequiredArguments, BinaryPackageData, **base_deb_contents)
419+
420+
421 class TestBinaryPackageHandler(TestCaseWithFactory):
422
423 layer = LaunchpadZopelessLayer