Merge lp:~cjwatson/launchpad/uncompressed-indexes into lp:launchpad

Proposed by Colin Watson on 2016-02-04
Status: Merged
Merged at revision: 17912
Proposed branch: lp:~cjwatson/launchpad/uncompressed-indexes
Merge into: lp:launchpad
Diff against target: 990 lines (+271/-256)
11 files modified
lib/lp/archivepublisher/model/ftparchive.py (+4/-4)
lib/lp/archivepublisher/publishing.py (+79/-47)
lib/lp/archivepublisher/tests/apt-data/apt.conf (+3/-3)
lib/lp/archivepublisher/tests/apt-data/apt_conf_single_empty_suite_test (+3/-3)
lib/lp/archivepublisher/tests/test_ftparchive.py (+35/-26)
lib/lp/archivepublisher/tests/test_publish_ftpmaster.py (+1/-1)
lib/lp/archivepublisher/tests/test_publishdistro.py (+13/-12)
lib/lp/archivepublisher/tests/test_publisher.py (+84/-134)
lib/lp/archivepublisher/tests/test_repositoryindexfile.py (+20/-12)
lib/lp/archivepublisher/utils.py (+21/-8)
lib/lp/soyuz/doc/soyuz-upload.txt (+8/-6)
To merge this branch: bzr merge lp:~cjwatson/launchpad/uncompressed-indexes
Reviewer Review Type Date Requested Status
William Grant code 2016-02-04 Approve on 2016-02-05
Review via email: mp+285109@code.launchpad.net

Commit Message

Consistently generate Release entries for uncompressed versions of files, even if they don't exist on the filesystem. Don't create uncompressed Packages/Sources files on the filesystem.

Description of the Change

Clean up handling of uncompressed archive index files.

apt requires checksums of the uncompressed versions of index files it downloads, even if it doesn't actually download the uncompressed versions. Our approach to this for Packages, Sources, and Translation-* has historically been to create the uncompressed versions as well. However, for the primary archive this requires a hack in lp:ubuntu-archive-publishing to remove the uncompressed versions before mirroring, and for PPAs it just leaves lots of unnecessary cruft all over the filesystem. Contents handling is now moving into apt, as well as DEP-11 metadata; in both those cases it's quite inconvenient to take the same approach, so it's time to clean this up.

We now generate Release entries for uncompressed versions of files regardless of whether they exist on the filesystem, by decompressing one of the other versions. This means that we can also stop creating uncompressed Packages/Sources files, and progressively clean these up as archives are published. The hack in ubuntu-archive-publishing can be removed after this is deployed.

We'll be able to use something based on the "old_index_files" approach later on to remove .bz2 files once we have .xz support.

To post a comment you must log in.
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/archivepublisher/model/ftparchive.py'
2--- lib/lp/archivepublisher/model/ftparchive.py 2014-06-27 03:20:30 +0000
3+++ lib/lp/archivepublisher/model/ftparchive.py 2016-02-05 02:12:46 +0000
4@@ -1,4 +1,4 @@
5-# Copyright 2009-2014 Canonical Ltd. This software is licensed under the
6+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
7 # GNU Affero General Public License version 3 (see the file LICENSE).
8
9 from collections import defaultdict
10@@ -67,10 +67,10 @@
11
12 Default
13 {
14- Packages::Compress ". gzip bzip2";
15- Sources::Compress ". gzip bzip2";
16+ Packages::Compress "gzip bzip2";
17+ Sources::Compress "gzip bzip2";
18 Contents::Compress "gzip";
19- Translation::Compress ". gzip bzip2";
20+ Translation::Compress "gzip bzip2";
21 DeLinkLimit 0;
22 MaxContentsChange 12000;
23 FileMode 0644;
24
25=== modified file 'lib/lp/archivepublisher/publishing.py'
26--- lib/lp/archivepublisher/publishing.py 2016-01-14 17:00:34 +0000
27+++ lib/lp/archivepublisher/publishing.py 2016-02-05 02:12:46 +0000
28@@ -11,8 +11,10 @@
29
30 __metaclass__ = type
31
32+import bz2
33 from datetime import datetime
34 import errno
35+import gzip
36 import hashlib
37 from itertools import (
38 chain,
39@@ -110,6 +112,16 @@
40 return ordered
41
42
43+def remove_suffix(path):
44+ """Return `path` but with any compression suffix removed."""
45+ if path.endswith('.gz'):
46+ return path[:-len('.gz')]
47+ elif path.endswith('.bz2'):
48+ return path[:-len('.bz2')]
49+ else:
50+ return path
51+
52+
53 def get_suffixed_indices(path):
54 """Return a set of paths to compressed copies of the given index."""
55 return set([path + suffix for suffix in ('', '.gz', '.bz2')])
56@@ -563,20 +575,26 @@
57 self._writeSuite(distroseries, pocket)
58
59 def _allIndexFiles(self, distroseries):
60- """Return all index files on disk for a distroseries."""
61+ """Return all index files on disk for a distroseries.
62+
63+ For each index file, this yields a tuple of (function to open file
64+ in uncompressed form, path to file).
65+ """
66 components = self.archive.getComponentsForSeries(distroseries)
67 for pocket in self.archive.getPockets():
68 suite_name = distroseries.getSuite(pocket)
69 for component in components:
70- yield get_sources_path(self._config, suite_name, component)
71+ yield gzip.open, get_sources_path(
72+ self._config, suite_name, component) + ".gz"
73 for arch in distroseries.architectures:
74 if not arch.enabled:
75 continue
76- yield get_packages_path(
77- self._config, suite_name, component, arch)
78+ yield gzip.open, get_packages_path(
79+ self._config, suite_name, component, arch) + ".gz"
80 for subcomp in self.subcomponents:
81- yield get_packages_path(
82- self._config, suite_name, component, arch, subcomp)
83+ yield gzip.open, get_packages_path(
84+ self._config, suite_name, component, arch,
85+ subcomp) + ".gz"
86
87 def _latestNonEmptySeries(self):
88 """Find the latest non-empty series in an archive.
89@@ -587,11 +605,12 @@
90 through what we published on disk.
91 """
92 for distroseries in self.distro:
93- for index in self._allIndexFiles(distroseries):
94+ for open_func, index in self._allIndexFiles(distroseries):
95 try:
96- if os.path.getsize(index) > 0:
97- return distroseries
98- except OSError:
99+ with open_func(index) as index_file:
100+ if index_file.read(1):
101+ return distroseries
102+ except IOError:
103 pass
104
105 def createSeriesAliases(self):
106@@ -793,6 +812,7 @@
107 """Make sure the timestamps on all files in a suite match."""
108 location = os.path.join(self._config.distsroot, suite)
109 paths = [os.path.join(location, path) for path in all_files]
110+ paths = [path for path in paths if os.path.exists(path)]
111 latest_timestamp = max(os.stat(path).st_mtime for path in paths)
112 for path in paths:
113 os.utime(path, (latest_timestamp, latest_timestamp))
114@@ -831,17 +851,17 @@
115 for dep11_file in os.listdir(dep11_dir):
116 if (dep11_file.startswith("Components-") or
117 dep11_file.startswith("icons-")):
118- all_files.add(
119- os.path.join(component, "dep11", dep11_file))
120+ dep11_path = os.path.join(
121+ component, "dep11", dep11_file)
122+ all_files.add(remove_suffix(dep11_path))
123+ all_files.add(dep11_path)
124 except OSError as e:
125 if e.errno != errno.ENOENT:
126 raise
127 for architecture in all_architectures:
128 for contents_path in get_suffixed_indices(
129 'Contents-' + architecture):
130- if os.path.exists(os.path.join(
131- self._config.distsroot, suite, contents_path)):
132- all_files.add(contents_path)
133+ all_files.add(contents_path)
134
135 drsummary = "%s %s " % (self.distro.displayname,
136 distroseries.displayname)
137@@ -868,21 +888,12 @@
138 release_file["ButAutomaticUpgrades"] = "yes"
139
140 for filename in sorted(all_files, key=os.path.dirname):
141- entry = self._readIndexFileContents(suite, filename)
142- if entry is None:
143+ hashes = self._readIndexFileHashes(suite, filename)
144+ if hashes is None:
145 continue
146- release_file.setdefault("MD5Sum", []).append({
147- "md5sum": hashlib.md5(entry).hexdigest(),
148- "name": filename,
149- "size": len(entry)})
150- release_file.setdefault("SHA1", []).append({
151- "sha1": hashlib.sha1(entry).hexdigest(),
152- "name": filename,
153- "size": len(entry)})
154- release_file.setdefault("SHA256", []).append({
155- "sha256": hashlib.sha256(entry).hexdigest(),
156- "name": filename,
157- "size": len(entry)})
158+ release_file.setdefault("MD5Sum", []).append(hashes["md5sum"])
159+ release_file.setdefault("SHA1", []).append(hashes["sha1"])
160+ release_file.setdefault("SHA256", []).append(hashes["sha256"])
161
162 self._writeReleaseFile(suite, release_file)
163 all_files.add("Release")
164@@ -960,12 +971,13 @@
165
166 i18n_subpath = os.path.join(component, "i18n")
167 i18n_dir = os.path.join(self._config.distsroot, suite, i18n_subpath)
168- i18n_files = []
169+ i18n_files = set()
170 try:
171 for i18n_file in os.listdir(i18n_dir):
172 if not i18n_file.startswith('Translation-'):
173 continue
174- i18n_files.append(i18n_file)
175+ i18n_files.add(remove_suffix(i18n_file))
176+ i18n_files.add(i18n_file)
177 except OSError as e:
178 if e.errno != errno.ENOENT:
179 raise
180@@ -976,14 +988,11 @@
181
182 i18n_index = I18nIndex()
183 for i18n_file in sorted(i18n_files):
184- entry = self._readIndexFileContents(
185+ hashes = self._readIndexFileHashes(
186 suite, os.path.join(i18n_subpath, i18n_file))
187- if entry is None:
188+ if hashes is None:
189 continue
190- i18n_index.setdefault("SHA1", []).append({
191- "sha1": hashlib.sha1(entry).hexdigest(),
192- "name": i18n_file,
193- "size": len(entry)})
194+ i18n_index.setdefault("SHA1", []).append(hashes["sha1"])
195 # Schedule i18n files for inclusion in the Release file.
196 all_series_files.add(os.path.join(i18n_subpath, i18n_file))
197
198@@ -993,24 +1002,47 @@
199 # Schedule this for inclusion in the Release file.
200 all_series_files.add(os.path.join(component, "i18n", "Index"))
201
202- def _readIndexFileContents(self, distroseries_name, file_name):
203- """Read an index files' contents.
204+ def _readIndexFileHashes(self, distroseries_name, file_name):
205+ """Read an index file and return its hashes.
206
207 :param distroseries_name: Distro series name
208 :param file_name: Filename relative to the parent container directory.
209- :return: File contents, or None if the file could not be found.
210+ :return: A dictionary mapping hash field names to dictionaries of
211+ their components as defined by debian.deb822.Release (e.g.
212+ {"md5sum": {"md5sum": ..., "size": ..., "name": ...}}), or None
213+ if the file could not be found.
214 """
215+ open_func = open
216 full_name = os.path.join(self._config.distsroot,
217 distroseries_name, file_name)
218 if not os.path.exists(full_name):
219- # The file we were asked to write out doesn't exist.
220- # Most likely we have an incomplete archive (E.g. no sources
221- # for a given distroseries). This is a non-fatal issue
222- self.log.debug("Failed to find " + full_name)
223- return None
224+ if os.path.exists(full_name + '.gz'):
225+ open_func = gzip.open
226+ full_name = full_name + '.gz'
227+ elif os.path.exists(full_name + '.bz2'):
228+ open_func = bz2.BZ2File
229+ full_name = full_name + '.bz2'
230+ else:
231+ # The file we were asked to write out doesn't exist.
232+ # Most likely we have an incomplete archive (e.g. no sources
233+ # for a given distroseries). This is a non-fatal issue.
234+ self.log.debug("Failed to find " + full_name)
235+ return None
236
237- with open(full_name, 'r') as in_file:
238- return in_file.read()
239+ hashes = {
240+ "md5sum": hashlib.md5(),
241+ "sha1": hashlib.sha1(),
242+ "sha256": hashlib.sha256(),
243+ }
244+ size = 0
245+ with open_func(full_name) as in_file:
246+ for chunk in iter(lambda: in_file.read(256 * 1024), ""):
247+ for hashobj in hashes.values():
248+ hashobj.update(chunk)
249+ size += len(chunk)
250+ return {
251+ alg: {alg: hashobj.hexdigest(), "name": file_name, "size": size}
252+ for alg, hashobj in hashes.items()}
253
254 def deleteArchive(self):
255 """Delete the archive.
256
257=== modified file 'lib/lp/archivepublisher/tests/apt-data/apt.conf'
258--- lib/lp/archivepublisher/tests/apt-data/apt.conf 2014-06-11 08:23:35 +0000
259+++ lib/lp/archivepublisher/tests/apt-data/apt.conf 2016-02-05 02:12:46 +0000
260@@ -8,10 +8,10 @@
261
262 Default
263 {
264- Packages::Compress ". gzip bzip2";
265- Sources::Compress ". gzip bzip2";
266+ Packages::Compress "gzip bzip2";
267+ Sources::Compress "gzip bzip2";
268 Contents::Compress "gzip";
269- Translation::Compress ". gzip bzip2";
270+ Translation::Compress "gzip bzip2";
271 DeLinkLimit 0;
272 MaxContentsChange 12000;
273 FileMode 0644;
274
275=== modified file 'lib/lp/archivepublisher/tests/apt-data/apt_conf_single_empty_suite_test'
276--- lib/lp/archivepublisher/tests/apt-data/apt_conf_single_empty_suite_test 2014-06-11 08:23:35 +0000
277+++ lib/lp/archivepublisher/tests/apt-data/apt_conf_single_empty_suite_test 2016-02-05 02:12:46 +0000
278@@ -8,10 +8,10 @@
279
280 Default
281 {
282- Packages::Compress ". gzip bzip2";
283- Sources::Compress ". gzip bzip2";
284+ Packages::Compress "gzip bzip2";
285+ Sources::Compress "gzip bzip2";
286 Contents::Compress "gzip";
287- Translation::Compress ". gzip bzip2";
288+ Translation::Compress "gzip bzip2";
289 DeLinkLimit 0;
290 MaxContentsChange 12000;
291 FileMode 0644;
292
293=== modified file 'lib/lp/archivepublisher/tests/test_ftparchive.py'
294--- lib/lp/archivepublisher/tests/test_ftparchive.py 2015-04-09 05:16:37 +0000
295+++ lib/lp/archivepublisher/tests/test_ftparchive.py 2016-02-05 02:12:46 +0000
296@@ -1,4 +1,4 @@
297-# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
298+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
299 # GNU Affero General Public License version 3 (see the file LICENSE).
300
301 """Tests for ftparchive.py"""
302@@ -6,6 +6,7 @@
303 __metaclass__ = type
304
305 import difflib
306+import gzip
307 import os
308 import re
309 import shutil
310@@ -98,15 +99,18 @@
311 super(TestFTPArchive, self).tearDown()
312 shutil.rmtree(self._config.distroroot)
313
314- def _verifyFile(self, filename, directory):
315+ def _verifyFile(self, filename, directory,
316+ result_suffix="", result_open_func=open):
317 """Compare byte-to-byte the given file and the respective sample.
318
319 It's a poor way of testing files generated by apt-ftparchive.
320 """
321- result_path = os.path.join(directory, filename)
322- result_text = open(result_path).read()
323+ result_path = os.path.join(directory, filename) + result_suffix
324+ with result_open_func(result_path) as result_file:
325+ result_text = result_file.read()
326 sample_path = os.path.join(self._sampledir, filename)
327- sample_text = open(sample_path).read()
328+ with open(sample_path) as sample_file:
329+ sample_text = sample_file.read()
330 # When the comparison between the sample text and the generated text
331 # differ, just printing the strings will be less than optimal. Use
332 # difflib to get a line-by-line comparison that makes it much more
333@@ -115,9 +119,9 @@
334 sample_text.splitlines(), result_text.splitlines())
335 self.assertEqual(sample_text, result_text, '\n'.join(diff_lines))
336
337- def _verifyEmpty(self, path):
338+ def _verifyEmpty(self, path, open_func=open):
339 """Assert that the given file is empty."""
340- with open(path) as result_file:
341+ with open_func(path) as result_file:
342 self.assertEqual("", result_file.read())
343
344 def _addRepositoryFile(self, component, sourcename, leafname,
345@@ -436,14 +440,19 @@
346 # check'. Although they should remain active in PQM to avoid possible
347 # regressions.
348 fa.runApt(apt_conf)
349- self._verifyFile("Packages",
350- os.path.join(self._distsdir, "hoary-test", "main", "binary-i386"))
351+ self._verifyFile(
352+ "Packages",
353+ os.path.join(self._distsdir, "hoary-test", "main", "binary-i386"),
354+ result_suffix=".gz", result_open_func=gzip.open)
355 self._verifyEmpty(
356 os.path.join(
357 self._distsdir, "hoary-test", "main", "debian-installer",
358- "binary-i386", "Packages"))
359- self._verifyFile("Sources",
360- os.path.join(self._distsdir, "hoary-test", "main", "source"))
361+ "binary-i386", "Packages.gz"),
362+ open_func=gzip.open)
363+ self._verifyFile(
364+ "Sources",
365+ os.path.join(self._distsdir, "hoary-test", "main", "source"),
366+ result_suffix=".gz", result_open_func=gzip.open)
367
368 # XXX cprov 2007-03-21: see above, byte-to-byte configuration
369 # comparing is weak.
370@@ -509,23 +518,23 @@
371 fa.runApt(apt_conf)
372 self.assertTrue(os.path.exists(
373 os.path.join(self._distsdir, "hoary-test-updates", "main",
374- "binary-i386", "Packages")))
375- self.assertTrue(os.path.exists(
376- os.path.join(self._distsdir, "hoary-test-updates", "main",
377- "debian-installer", "binary-i386", "Packages")))
378- self.assertTrue(os.path.exists(
379- os.path.join(self._distsdir, "hoary-test-updates", "main",
380- "source", "Sources")))
381+ "binary-i386", "Packages.gz")))
382+ self.assertTrue(os.path.exists(
383+ os.path.join(self._distsdir, "hoary-test-updates", "main",
384+ "debian-installer", "binary-i386", "Packages.gz")))
385+ self.assertTrue(os.path.exists(
386+ os.path.join(self._distsdir, "hoary-test-updates", "main",
387+ "source", "Sources.gz")))
388
389 self.assertFalse(os.path.exists(
390 os.path.join(self._distsdir, "hoary-test", "main",
391- "binary-i386", "Packages")))
392- self.assertFalse(os.path.exists(
393- os.path.join(self._distsdir, "hoary-test", "main",
394- "debian-installer", "binary-i386", "Packages")))
395- self.assertFalse(os.path.exists(
396- os.path.join(self._distsdir, "hoary-test", "main",
397- "source", "Sources")))
398+ "binary-i386", "Packages.gz")))
399+ self.assertFalse(os.path.exists(
400+ os.path.join(self._distsdir, "hoary-test", "main",
401+ "debian-installer", "binary-i386", "Packages.gz")))
402+ self.assertFalse(os.path.exists(
403+ os.path.join(self._distsdir, "hoary-test", "main",
404+ "source", "Sources.gz")))
405
406 def test_cleanCaches_noop_if_recent(self):
407 # cleanCaches does nothing if it was run recently.
408
409=== modified file 'lib/lp/archivepublisher/tests/test_publish_ftpmaster.py'
410--- lib/lp/archivepublisher/tests/test_publish_ftpmaster.py 2016-01-13 17:54:05 +0000
411+++ lib/lp/archivepublisher/tests/test_publish_ftpmaster.py 2016-02-05 02:12:46 +0000
412@@ -1309,5 +1309,5 @@
413 self.assertEqual([], script.listSuitesNeedingIndexes(series))
414 sources = os.path.join(
415 getPubConfig(series.main_archive).distsroot,
416- series.name, "main", "source", "Sources")
417+ series.name, "main", "source", "Sources.gz")
418 self.assertTrue(file_exists(sources))
419
420=== modified file 'lib/lp/archivepublisher/tests/test_publishdistro.py'
421--- lib/lp/archivepublisher/tests/test_publishdistro.py 2015-08-28 08:38:54 +0000
422+++ lib/lp/archivepublisher/tests/test_publishdistro.py 2016-02-05 02:12:46 +0000
423@@ -1,4 +1,4 @@
424-# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
425+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
426 # GNU Affero General Public License version 3 (see the file LICENSE).
427
428 """Functional tests for publish-distro.py script."""
429@@ -376,17 +376,18 @@
430
431 # Check some index files
432 index_path = (
433- "%s/hoary-test-updates/main/binary-i386/Packages"
434- % self.config.distsroot)
435- self.assertExists(index_path)
436-
437- index_path = (
438- "%s/hoary-test-backports/main/binary-i386/Packages"
439- % self.config.distsroot)
440- self.assertExists(index_path)
441-
442- index_path = (
443- "%s/hoary-test/main/binary-i386/Packages" % self.config.distsroot)
444+ "%s/hoary-test-updates/main/binary-i386/Packages.gz"
445+ % self.config.distsroot)
446+ self.assertExists(index_path)
447+
448+ index_path = (
449+ "%s/hoary-test-backports/main/binary-i386/Packages.gz"
450+ % self.config.distsroot)
451+ self.assertExists(index_path)
452+
453+ index_path = (
454+ "%s/hoary-test/main/binary-i386/Packages.gz" %
455+ self.config.distsroot)
456 self.assertNotExists(index_path)
457
458
459
460=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
461--- lib/lp/archivepublisher/tests/test_publisher.py 2016-01-14 17:00:34 +0000
462+++ lib/lp/archivepublisher/tests/test_publisher.py 2016-02-05 02:12:46 +0000
463@@ -1009,42 +1009,39 @@
464 self.assertEqual(
465 1 + old_num_pending_archives, new_num_pending_archives)
466
467- def _checkCompressedFile(self, archive_publisher, compressed_file_path,
468- uncompressed_file_path):
469- """Assert that a compressed file is equal to its uncompressed version.
470+ def _checkCompressedFiles(self, archive_publisher, base_file_path,
471+ suffixes):
472+ """Assert that the various compressed versions of a file are equal.
473
474- Check that a compressed file, such as Packages.gz and Sources.gz,
475- and bz2 variations, matches its uncompressed partner. The file
476- paths are relative to breezy-autotest/main under the
477- archive_publisher's configured dist root. 'breezy-autotest' is
478- our test distroseries name.
479+ Check that the various versions of a compressed file, such as
480+ Packages.gz/Packages.bz2 and Sources.gz/Sources.bz2, and bz2
481+ variations, all have identical contents. The file paths are
482+ relative to breezy-autotest/main under the archive_publisher's
483+ configured dist root. 'breezy-autotest' is our test distroseries
484+ name.
485
486 The contents of the uncompressed file is returned as a list of lines
487 in the file.
488 """
489- index_compressed_path = os.path.join(
490- archive_publisher._config.distsroot, 'breezy-autotest', 'main',
491- compressed_file_path)
492- index_path = os.path.join(
493- archive_publisher._config.distsroot, 'breezy-autotest', 'main',
494- uncompressed_file_path)
495-
496- if index_compressed_path.endswith('.gz'):
497- index_compressed_contents = gzip.GzipFile(
498- filename=index_compressed_path).read().splitlines()
499- elif index_compressed_path.endswith('.bz2'):
500- index_compressed_contents = bz2.BZ2File(
501- filename=index_compressed_path).read().splitlines()
502- else:
503- raise AssertionError(
504- 'Unsupported compression: %s' % compressed_file_path)
505-
506- with open(index_path, 'r') as index_file:
507- index_contents = index_file.read().splitlines()
508-
509- self.assertEqual(index_contents, index_compressed_contents)
510-
511- return index_contents
512+ index_base_path = os.path.join(
513+ archive_publisher._config.distsroot, 'breezy-autotest', 'main',
514+ base_file_path)
515+
516+ all_contents = []
517+ for suffix in suffixes:
518+ if suffix == '.gz':
519+ open_func = gzip.open
520+ elif suffix == '.bz2':
521+ open_func = bz2.BZ2File
522+ else:
523+ open_func = open
524+ with open_func(index_base_path + suffix) as index_file:
525+ all_contents.append(index_file.read().splitlines())
526+
527+ for contents in all_contents[1:]:
528+ self.assertEqual(all_contents[0], contents)
529+
530+ return all_contents[0]
531
532 def setupPPAArchiveIndexTest(self, long_descriptions=True,
533 feature_flag=False):
534@@ -1100,16 +1097,11 @@
535 """Building Archive Indexes from PPA publications."""
536 archive_publisher = self.setupPPAArchiveIndexTest()
537
538- # A compressed and uncompressed Sources file are written;
539- # ensure that they are the same after uncompressing the former.
540- index_contents = self._checkCompressedFile(
541- archive_publisher, os.path.join('source', 'Sources.bz2'),
542- os.path.join('source', 'Sources'))
543-
544- index_contents = self._checkCompressedFile(
545- archive_publisher, os.path.join('source', 'Sources.gz'),
546- os.path.join('source', 'Sources'))
547-
548+ # Various compressed Sources files are written; ensure that they are
549+ # the same after decompression.
550+ index_contents = self._checkCompressedFiles(
551+ archive_publisher, os.path.join('source', 'Sources'),
552+ ['.gz', '.bz2'])
553 self.assertEqual(
554 ['Package: foo',
555 'Binary: foo-bin',
556@@ -1131,16 +1123,11 @@
557 ''],
558 index_contents)
559
560- # A compressed and an uncompressed Packages file are written;
561- # ensure that they are the same after uncompressing the former.
562- index_contents = self._checkCompressedFile(
563- archive_publisher, os.path.join('binary-i386', 'Packages.bz2'),
564- os.path.join('binary-i386', 'Packages'))
565-
566- index_contents = self._checkCompressedFile(
567- archive_publisher, os.path.join('binary-i386', 'Packages.gz'),
568- os.path.join('binary-i386', 'Packages'))
569-
570+ # Various compressed Packages files are written; ensure that they
571+ # are the same after decompression.
572+ index_contents = self._checkCompressedFiles(
573+ archive_publisher, os.path.join('binary-i386', 'Packages'),
574+ ['.gz', '.bz2'])
575 self.assertEqual(
576 ['Package: foo-bin',
577 'Source: foo',
578@@ -1163,19 +1150,13 @@
579 ''],
580 index_contents)
581
582- # A compressed and an uncompressed Packages file are written for
583- # 'debian-installer' section for each architecture. It will list
584+ # Various compressed Packages files are written for the
585+ # 'debian-installer' section for each architecture. They will list
586 # the 'udeb' files.
587- index_contents = self._checkCompressedFile(
588- archive_publisher,
589- os.path.join('debian-installer', 'binary-i386', 'Packages.bz2'),
590- os.path.join('debian-installer', 'binary-i386', 'Packages'))
591-
592- index_contents = self._checkCompressedFile(
593- archive_publisher,
594- os.path.join('debian-installer', 'binary-i386', 'Packages.gz'),
595- os.path.join('debian-installer', 'binary-i386', 'Packages'))
596-
597+ index_contents = self._checkCompressedFiles(
598+ archive_publisher,
599+ os.path.join('debian-installer', 'binary-i386', 'Packages'),
600+ ['.gz', '.bz2'])
601 self.assertEqual(
602 ['Package: bingo',
603 'Source: foo',
604@@ -1197,16 +1178,10 @@
605 index_contents)
606
607 # 'debug' too, when publish_debug_symbols is enabled.
608- index_contents = self._checkCompressedFile(
609- archive_publisher,
610- os.path.join('debug', 'binary-i386', 'Packages.bz2'),
611- os.path.join('debug', 'binary-i386', 'Packages'))
612-
613- index_contents = self._checkCompressedFile(
614- archive_publisher,
615- os.path.join('debug', 'binary-i386', 'Packages.gz'),
616- os.path.join('debug', 'binary-i386', 'Packages'))
617-
618+ index_contents = self._checkCompressedFiles(
619+ archive_publisher,
620+ os.path.join('debug', 'binary-i386', 'Packages'),
621+ ['.gz', '.bz2'])
622 self.assertEqual(
623 ['Package: foo-bin-dbgsym',
624 'Source: foo',
625@@ -1272,16 +1247,11 @@
626 archive_publisher = self.setupPPAArchiveIndexTest(
627 long_descriptions=False, feature_flag=True)
628
629- # A compressed and uncompressed Sources file are written;
630- # ensure that they are the same after uncompressing the former.
631- index_contents = self._checkCompressedFile(
632- archive_publisher, os.path.join('source', 'Sources.bz2'),
633- os.path.join('source', 'Sources'))
634-
635- index_contents = self._checkCompressedFile(
636- archive_publisher, os.path.join('source', 'Sources.gz'),
637- os.path.join('source', 'Sources'))
638-
639+ # Various compressed Sources files are written; ensure that they are
640+ # the same after decompression.
641+ index_contents = self._checkCompressedFiles(
642+ archive_publisher, os.path.join('source', 'Sources'),
643+ ['.gz', '.bz2'])
644 self.assertEqual(
645 ['Package: foo',
646 'Binary: foo-bin',
647@@ -1303,16 +1273,11 @@
648 ''],
649 index_contents)
650
651- # A compressed and an uncompressed Packages file are written;
652- # ensure that they are the same after uncompressing the former.
653- index_contents = self._checkCompressedFile(
654- archive_publisher, os.path.join('binary-i386', 'Packages.bz2'),
655- os.path.join('binary-i386', 'Packages'))
656-
657- index_contents = self._checkCompressedFile(
658- archive_publisher, os.path.join('binary-i386', 'Packages.gz'),
659- os.path.join('binary-i386', 'Packages'))
660-
661+ # Various compressed Packages files are written; ensure that they
662+ # are the same after decompression.
663+ index_contents = self._checkCompressedFiles(
664+ archive_publisher, os.path.join('binary-i386', 'Packages'),
665+ ['.gz', '.bz2'])
666 self.assertEqual(
667 ['Package: foo-bin',
668 'Source: foo',
669@@ -1333,19 +1298,13 @@
670 ''],
671 index_contents)
672
673- # A compressed and an uncompressed Packages file are written for
674- # 'debian-installer' section for each architecture. It will list
675+ # Various compressed Packages files are written for the
676+ # 'debian-installer' section for each architecture. They will list
677 # the 'udeb' files.
678- index_contents = self._checkCompressedFile(
679- archive_publisher,
680- os.path.join('debian-installer', 'binary-i386', 'Packages.bz2'),
681- os.path.join('debian-installer', 'binary-i386', 'Packages'))
682-
683- index_contents = self._checkCompressedFile(
684- archive_publisher,
685- os.path.join('debian-installer', 'binary-i386', 'Packages.gz'),
686- os.path.join('debian-installer', 'binary-i386', 'Packages'))
687-
688+ index_contents = self._checkCompressedFiles(
689+ archive_publisher,
690+ os.path.join('debian-installer', 'binary-i386', 'Packages'),
691+ ['.gz', '.bz2'])
692 self.assertEqual(
693 ['Package: bingo',
694 'Source: foo',
695@@ -1367,16 +1326,10 @@
696 index_contents)
697
698 # 'debug' too, when publish_debug_symbols is enabled.
699- index_contents = self._checkCompressedFile(
700- archive_publisher,
701- os.path.join('debug', 'binary-i386', 'Packages.bz2'),
702- os.path.join('debug', 'binary-i386', 'Packages'))
703-
704- index_contents = self._checkCompressedFile(
705- archive_publisher,
706- os.path.join('debug', 'binary-i386', 'Packages.gz'),
707- os.path.join('debug', 'binary-i386', 'Packages'))
708-
709+ index_contents = self._checkCompressedFiles(
710+ archive_publisher,
711+ os.path.join('debug', 'binary-i386', 'Packages'),
712+ ['.gz', '.bz2'])
713 self.assertEqual(
714 ['Package: foo-bin-dbgsym',
715 'Source: foo',
716@@ -1402,16 +1355,11 @@
717 ('breezy-autotest', PackagePublishingPocket.RELEASE) in
718 archive_publisher.release_files_needed)
719
720- # A compressed and an uncompressed Translation-en file is written.
721- # ensure that they are the same after uncompressing the former.
722- index_contents = self._checkCompressedFile(
723- archive_publisher, os.path.join('i18n', 'Translation-en.gz'),
724- os.path.join('i18n', 'Translation-en'))
725-
726- index_contents = self._checkCompressedFile(
727- archive_publisher, os.path.join('i18n', 'Translation-en.bz2'),
728- os.path.join('i18n', 'Translation-en'))
729-
730+ # Various compressed Translation-en files are written; ensure that
731+ # they are the same after decompression.
732+ index_contents = self._checkCompressedFiles(
733+ archive_publisher, os.path.join('i18n', 'Translation-en'),
734+ ['.gz', '.bz2'])
735 self.assertEqual(
736 ['Package: bingo',
737 'Description-md5: 6fecedf187298acb6bc5f15cc5807fb7',
738@@ -1656,8 +1604,8 @@
739
740 arch_sources_path = os.path.join(
741 archive_publisher._config.distsroot, 'breezy-autotest',
742- 'main', 'source', 'Sources')
743- with open(arch_sources_path) as arch_sources_file:
744+ 'main', 'source', 'Sources.gz')
745+ with gzip.open(arch_sources_path) as arch_sources_file:
746 self.assertReleaseContentsMatch(
747 release, 'main/source/Sources', arch_sources_file.read())
748
749@@ -1892,7 +1840,7 @@
750
751 suite_path = partial(
752 os.path.join, self.config.distsroot, 'breezy-autotest')
753- sources = suite_path('main', 'source', 'Sources')
754+ sources = suite_path('main', 'source', 'Sources.gz')
755 sources_timestamp = os.stat(sources).st_mtime - 60
756 os.utime(sources, (sources_timestamp, sources_timestamp))
757
758@@ -1900,7 +1848,9 @@
759
760 release = self.parseRelease(suite_path('Release'))
761 paths = ['Release'] + [entry['name'] for entry in release['md5sum']]
762- timestamps = set(os.stat(suite_path(path)).st_mtime for path in paths)
763+ timestamps = set(
764+ os.stat(suite_path(path)).st_mtime
765+ for path in paths if os.path.exists(suite_path(path)))
766 self.assertEqual(1, len(timestamps))
767
768 def testCreateSeriesAliasesNoAlias(self):
769@@ -1990,8 +1940,7 @@
770 i18n_root = os.path.join(
771 self.config.distsroot, 'breezy-autotest', 'main', 'i18n')
772
773- # Write a zero-length Translation-en file and compressed versions of
774- # it.
775+ # Write compressed versions of a zero-length Translation-en file.
776 translation_en_index = RepositoryIndexFile(
777 os.path.join(i18n_root, 'Translation-en'), self.config.temproot)
778 translation_en_index.close()
779@@ -2014,7 +1963,8 @@
780 i18n_index['sha1'][1]['size'])
781
782 # i18n/Index and i18n/Translation-en.bz2 are scheduled for inclusion
783- # in Release.
784+ # in Release. Checksums of the uncompressed version are included
785+ # despite it not actually being written to disk.
786 self.assertEqual(4, len(all_files))
787 self.assertContentEqual(
788 ['main/i18n/Index',
789@@ -2087,8 +2037,8 @@
790 publisher._config.distsroot, series.getSuite(pocket), '%s/%s')
791
792 release_template = os.path.join(arch_template, 'Release')
793- packages_template = os.path.join(arch_template, 'Packages')
794- sources_template = os.path.join(arch_template, 'Sources')
795+ packages_template = os.path.join(arch_template, 'Packages.gz')
796+ sources_template = os.path.join(arch_template, 'Sources.gz')
797 release_path = os.path.join(
798 publisher._config.distsroot, series.getSuite(pocket), 'Release')
799 with open(release_path) as release_file:
800
801=== modified file 'lib/lp/archivepublisher/tests/test_repositoryindexfile.py'
802--- lib/lp/archivepublisher/tests/test_repositoryindexfile.py 2013-08-29 10:29:01 +0000
803+++ lib/lp/archivepublisher/tests/test_repositoryindexfile.py 2016-02-05 02:12:46 +0000
804@@ -1,4 +1,4 @@
805-# Copyright 2009-2010 Canonical Ltd. This software is licensed under the
806+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
807 # GNU Affero General Public License version 3 (see the file LICENSE).
808
809 """Tests for `RepositoryIndexFile`."""
810@@ -44,7 +44,7 @@
811 def testWorkflow(self):
812 """`RepositoryIndexFile` workflow.
813
814- On creation, 3 temporary files are atomically created in the
815+ On creation, 2 temporary files are atomically created in the
816 'temp_root' location (mkstemp). One for storing the plain contents
817 and other for the corresponding compressed contents. At this point,
818 no files were created in the 'root' location yet.
819@@ -58,16 +58,15 @@
820 repo_file = self.getRepoFile('boing')
821
822 self.assertEqual(0, len(os.listdir(self.root)))
823- self.assertEqual(3, len(os.listdir(self.temp_root)))
824+ self.assertEqual(2, len(os.listdir(self.temp_root)))
825
826 repo_file.close()
827
828- self.assertEqual(3, len(os.listdir(self.root)))
829+ self.assertEqual(2, len(os.listdir(self.root)))
830 self.assertEqual(0, len(os.listdir(self.temp_root)))
831
832 resulting_files = sorted(os.listdir(self.root))
833- self.assertEqual(
834- ['boing', 'boing.bz2', 'boing.gz'], resulting_files)
835+ self.assertEqual(['boing.bz2', 'boing.gz'], resulting_files)
836
837 for filename in resulting_files:
838 file_path = os.path.join(self.root, filename)
839@@ -87,14 +86,12 @@
840 repo_file.write('hello')
841 repo_file.close()
842
843- plain_content = open(os.path.join(self.root, 'boing')).read()
844 gzip_content = gzip.open(os.path.join(self.root, 'boing.gz')).read()
845 bz2_content = bz2.decompress(
846 open(os.path.join(self.root, 'boing.bz2')).read())
847
848- self.assertEqual(plain_content, bz2_content)
849- self.assertEqual(plain_content, gzip_content)
850- self.assertEqual('hello', plain_content)
851+ self.assertEqual(gzip_content, bz2_content)
852+ self.assertEqual('hello', gzip_content)
853
854 def testUnreferencing(self):
855 """`RepositoryIndexFile` unreferencing.
856@@ -105,7 +102,7 @@
857 repo_file = self.getRepoFile('boing')
858
859 self.assertEqual(0, len(os.listdir(self.root)))
860- self.assertEqual(3, len(os.listdir(self.temp_root)))
861+ self.assertEqual(2, len(os.listdir(self.temp_root)))
862
863 del repo_file
864
865@@ -123,7 +120,7 @@
866 repo_file.close()
867
868 self.assertEqual(
869- ['boing', 'boing.bz2', 'boing.gz'],
870+ ['boing.bz2', 'boing.gz'],
871 sorted(os.listdir(missing_root)))
872
873 def testMissingTempRoot(self):
874@@ -132,3 +129,14 @@
875 self.assertRaises(
876 AssertionError, RepositoryIndexFile,
877 os.path.join(self.root, 'boing'), missing_temp_root)
878+
879+ def testRemoveOld(self):
880+ """`RepositoryIndexFile` removes old index files."""
881+ old_path = os.path.join(self.root, 'boing')
882+ with open(old_path, 'w'):
883+ pass
884+ self.assertEqual(['boing'], sorted(os.listdir(self.root)))
885+ repo_file = self.getRepoFile('boing')
886+ repo_file.close()
887+ self.assertEqual(
888+ ['boing.bz2', 'boing.gz'], sorted(os.listdir(self.root)))
889
890=== modified file 'lib/lp/archivepublisher/utils.py'
891--- lib/lp/archivepublisher/utils.py 2015-09-28 17:38:45 +0000
892+++ lib/lp/archivepublisher/utils.py 2016-02-05 02:12:46 +0000
893@@ -1,4 +1,4 @@
894-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
895+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
896 # GNU Affero General Public License version 3 (see the file LICENSE).
897
898 """Miscellaneous functions for publisher."""
899@@ -43,17 +43,21 @@
900 # File path built on initialization.
901 path = None
902
903- def __init__(self, temp_root, filename):
904+ def __init__(self, temp_root, filename, auto_open=True):
905+ self.temp_root = temp_root
906 self.filename = filename + self.suffix
907
908+ if auto_open:
909+ self.open()
910+
911+ def _buildFile(self, fd):
912+ return os.fdopen(fd, 'wb')
913+
914+ def open(self):
915 fd, self.path = tempfile.mkstemp(
916- dir=temp_root, prefix='%s_' % filename)
917-
918+ dir=self.temp_root, prefix='%s_' % self.filename)
919 self._fd = self._buildFile(fd)
920
921- def _buildFile(self, fd):
922- return os.fdopen(fd, 'wb')
923-
924 def write(self, content):
925 self._fd.write(content)
926
927@@ -102,10 +106,12 @@
928 assert os.path.exists(temp_root), 'Temporary root does not exist.'
929
930 self.index_files = (
931- PlainTempFile(temp_root, filename),
932 GzipTempFile(temp_root, filename),
933 Bzip2TempFile(temp_root, filename),
934 )
935+ self.old_index_files = (
936+ PlainTempFile(temp_root, filename, auto_open=False),
937+ )
938
939 def write(self, content):
940 """Write contents to all target medias."""
941@@ -138,3 +144,10 @@
942 mode = stat.S_IMODE(os.stat(root_path).st_mode)
943 os.chmod(root_path,
944 mode | stat.S_IWGRP | stat.S_IRGRP | stat.S_IROTH)
945+
946+ # Remove files that may have been created by older versions of this
947+ # code.
948+ for index_file in self.old_index_files:
949+ root_path = os.path.join(self.root, index_file.filename)
950+ if os.path.exists(root_path):
951+ os.remove(root_path)
952
953=== modified file 'lib/lp/soyuz/doc/soyuz-upload.txt'
954--- lib/lp/soyuz/doc/soyuz-upload.txt 2014-11-24 16:03:20 +0000
955+++ lib/lp/soyuz/doc/soyuz-upload.txt 2016-02-05 02:12:46 +0000
956@@ -464,9 +464,11 @@
957 component of ubuntutest/breezy-autotest, containing the only the
958 required entry for 'etherwake':
959
960- >>> sources = open(
961+ >>> import gzip
962+
963+ >>> sources = gzip.open(
964 ... "/var/tmp/archive/ubuntutest/dists/breezy-autotest/universe/source"
965- ... "/Sources").read()
966+ ... "/Sources.gz").read()
967 >>> print sources + '\nEND'
968 Package: etherwake
969 Binary: etherwake
970@@ -548,16 +550,16 @@
971
972 Check if the package was moved properly to the component 'multiverse':
973
974- >>> main_sources = open(
975+ >>> main_sources = gzip.open(
976 ... "/var/tmp/archive/ubuntutest/dists/breezy-autotest"
977- ... "/main/source/Sources").read()
978+ ... "/main/source/Sources.gz").read()
979 >>> print main_sources + '\nEND'
980 <BLANKLINE>
981 END
982
983- >>> multiverse_sources = open(
984+ >>> multiverse_sources = gzip.open(
985 ... "/var/tmp/archive/ubuntutest/dists/breezy-autotest"
986- ... "/multiverse/source/Sources").read()
987+ ... "/multiverse/source/Sources.gz").read()
988 >>> print multiverse_sources + '\nEND'
989 Package: drdsl
990 ...