Merge lp:~cjwatson/launchpad/inrelease-by-hash into lp:launchpad

Proposed by Colin Watson on 2018-01-26
Status: Needs review
Proposed branch: lp:~cjwatson/launchpad/inrelease-by-hash
Merge into: lp:launchpad
Prerequisite: lp:~cjwatson/launchpad/refactor-archive-signing
Diff against target: 723 lines (+258/-115)
3 files modified
lib/lp/archivepublisher/publishing.py (+52/-19)
lib/lp/archivepublisher/tests/test_publisher.py (+185/-83)
lib/lp/soyuz/model/archivefile.py (+21/-13)
To merge this branch: bzr merge lp:~cjwatson/launchpad/inrelease-by-hash
Reviewer Review Type Date Requested Status
Launchpad code reviewers 2018-01-26 Pending
Review via email: mp+336675@code.launchpad.net

Commit Message

Add Release, Release.gpg, and InRelease to by-hash directories.

Description of the Change

Like most of the by-hash stuff, this has lots of fiddly details and will want some careful QA on dogfood. But at its core it's reasonably straightforward: now that the signed files are generated early enough, we just add them to the set of files being considered by _updateByHash. I arranged to add these files to by-hash before they're renamed into place, which entailed introducing the concept of the "real file name" in a few places.

To post a comment you must log in.

Unmerged revisions

18544. By Colin Watson on 2018-01-26

Add Release, Release.gpg, and InRelease to by-hash directories.

18543. By Colin Watson on 2018-01-26

Refactor TestUpdateByHash to simulate the advance of time rather than modifying scheduled_deletion_date directly.

18542. By Colin Watson on 2018-01-25

Simplify ArchiveFileSet.reap slightly using convert_storm_clause_to_string.

18541. By Colin Watson on 2018-01-20

Use RunPartsMixin in a couple more places.

18540. By Colin Watson on 2018-01-19

Extend Release file signing to use the new ISignableArchive interface.

18539. By Colin Watson on 2018-01-19

Extend custom uploads to use the new ISignableArchive interface.

debian-installer checksums, dist-upgrader tarballs, and UEFI/etc. checksums
are now GPG-signed.

18538. By Colin Watson on 2018-01-19

Extend IArchiveSigningKey to support signing files using run-parts.

Generic signing methods are now on ISignableArchive, with methods specific
to managed signing keys remaining on IArchiveSigningKey. This allows using
the same interface to sign files with either managed keys (the PPA case) or
external run-parts scripts (the case of the Ubuntu primary archive).

18537. By Colin Watson on 2018-01-19

Move signing logic out of DirectoryHash.

It makes more sense at a higher layer, and this allows for some cleaner
abstractions in the near future.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/archivepublisher/publishing.py'
2--- lib/lp/archivepublisher/publishing.py 2018-01-26 13:45:06 +0000
3+++ lib/lp/archivepublisher/publishing.py 2018-01-26 13:45:06 +0000
4@@ -772,8 +772,8 @@
5 for pocket in self.archive.getPockets():
6 ds_pocket = (distroseries.name, pocket)
7 suite = distroseries.getSuite(pocket)
8- release_path = os.path.join(
9- self._config.distsroot, suite, "Release")
10+ suite_path = os.path.join(self._config.distsroot, suite)
11+ release_path = os.path.join(suite_path, "Release")
12
13 if is_careful:
14 if not self.isAllowed(distroseries, pocket):
15@@ -803,7 +803,11 @@
16 # We aren't publishing a new Release file for this
17 # suite, probably because it's immutable, but we still
18 # need to prune by-hash files from it.
19- self._updateByHash(suite, "Release")
20+ extra_by_hash_files = {
21+ filename: filename
22+ for filename in ("Release", "Release.gpg", "InRelease")
23+ if file_exists(os.path.join(suite_path, filename))}
24+ self._updateByHash(suite, "Release", extra_by_hash_files)
25
26 def _allIndexFiles(self, distroseries):
27 """Return all index files on disk for a distroseries.
28@@ -1025,7 +1029,7 @@
29 return self.distro.displayname
30 return "LP-PPA-%s" % get_ppa_reference(self.archive)
31
32- def _updateByHash(self, suite, release_file_name):
33+ def _updateByHash(self, suite, release_file_name, extra_files):
34 """Update by-hash files for a suite.
35
36 This takes Release file data which references a set of on-disk
37@@ -1034,6 +1038,16 @@
38 directories to be in sync with ArchiveFile. Any on-disk by-hash
39 entries that ceased to be current sufficiently long ago are removed.
40 """
41+ extra_data = {}
42+ for filename, real_filename in extra_files.items():
43+ hashes = self._readIndexFileHashes(
44+ suite, filename, real_file_name=real_filename)
45+ if hashes is None:
46+ continue
47+ for archive_hash in archive_hashes:
48+ extra_data.setdefault(archive_hash.apt_name, []).append(
49+ hashes[archive_hash.deb822_name])
50+
51 release_path = os.path.join(
52 self._config.distsroot, suite, release_file_name)
53 with open(release_path) as release_file:
54@@ -1052,12 +1066,13 @@
55 # Gather information on entries in the current Release file, and
56 # make sure nothing there is condemned.
57 current_files = {}
58- current_sha256_checksums = set()
59- for current_entry in release_data["SHA256"]:
60+ for current_entry in (
61+ release_data["SHA256"] + extra_data.get("SHA256", [])):
62 path = os.path.join(suite_dir, current_entry["name"])
63+ real_name = current_entry.get("real_name", current_entry["name"])
64+ real_path = os.path.join(suite_dir, real_name)
65 current_files[path] = (
66- int(current_entry["size"]), current_entry["sha256"])
67- current_sha256_checksums.add(current_entry["sha256"])
68+ int(current_entry["size"]), current_entry["sha256"], real_path)
69 uncondemned_files = set()
70 for db_file in archive_file_set.getByArchive(
71 self.archive, container=container, only_condemned=True,
72@@ -1117,15 +1132,16 @@
73 # XXX cjwatson 2016-03-15: This should possibly use bulk creation,
74 # although we can only avoid about a third of the queries since the
75 # librarian client has no bulk upload methods.
76- for path, (size, sha256) in current_files.items():
77- full_path = os.path.join(self._config.distsroot, path)
78+ for path, (size, sha256, real_path) in current_files.items():
79+ full_path = os.path.join(self._config.distsroot, real_path)
80 if (os.path.exists(full_path) and
81 not by_hashes.known(path, "SHA256", sha256)):
82 with open(full_path, "rb") as fileobj:
83 db_file = archive_file_set.newFromFile(
84 self.archive, container, os.path.join("dists", path),
85 fileobj, size, filenameToContentType(path))
86- by_hashes.add(path, db_file.library_file, copy_from_path=path)
87+ by_hashes.add(
88+ path, db_file.library_file, copy_from_path=real_path)
89
90 # Finally, remove any files from disk that aren't recorded in the
91 # database and aren't active.
92@@ -1173,6 +1189,9 @@
93 # special games with timestamps here, as it will interfere with the
94 # "staging" mechanism used to update these files.
95 extra_files = set()
96+ # Extra by-hash files are not listed in the Release file, but we
97+ # still want to include them in by-hash directories.
98+ extra_by_hash_files = {}
99 for component in all_components:
100 self._writeSuiteSource(
101 distroseries, pocket, component, core_files)
102@@ -1239,9 +1258,7 @@
103
104 self._writeReleaseFile(suite, release_file)
105 core_files.add("Release")
106-
107- if distroseries.publish_by_hash:
108- self._updateByHash(suite, "Release.new")
109+ extra_by_hash_files["Release"] = "Release.new"
110
111 signable_archive = ISignableArchive(self.archive)
112 if signable_archive.can_sign:
113@@ -1249,11 +1266,16 @@
114 self.log.debug("Signing Release file for %s" % suite)
115 signable_archive.signRepository(suite, suffix=".new", log=self.log)
116 core_files.add("Release.gpg")
117+ extra_by_hash_files["Release.gpg"] = "Release.gpg.new"
118 core_files.add("InRelease")
119+ extra_by_hash_files["InRelease"] = "InRelease.new"
120 else:
121 # Skip signature if the archive is not set up for signing.
122 self.log.debug("No signing key available, skipping signature.")
123
124+ if distroseries.publish_by_hash:
125+ self._updateByHash(suite, "Release.new", extra_by_hash_files)
126+
127 for name in ("Release", "Release.gpg", "InRelease"):
128 if name in core_files:
129 os.rename(
130@@ -1365,7 +1387,8 @@
131 # Schedule this for inclusion in the Release file.
132 all_series_files.add(os.path.join(component, "i18n", "Index"))
133
134- def _readIndexFileHashes(self, suite, file_name, subpath=None):
135+ def _readIndexFileHashes(self, suite, file_name, subpath=None,
136+ real_file_name=None):
137 """Read an index file and return its hashes.
138
139 :param suite: Suite name.
140@@ -1373,6 +1396,11 @@
141 :param subpath: Optional subpath within the suite root. Generated
142 indexes will not include this path. If omitted, filenames are
143 assumed to be relative to the suite root.
144+ :param real_file_name: The actual filename to open when reading
145+ data (`file_name` will still be the name used in the returned
146+ dictionary). If this is passed, then the returned hash
147+ component dictionaries will include it in additional "real_name"
148+ items.
149 :return: A dictionary mapping hash field names to dictionaries of
150 their components as defined by debian.deb822.Release (e.g.
151 {"md5sum": {"md5sum": ..., "size": ..., "name": ...}}), or None
152@@ -1380,7 +1408,8 @@
153 """
154 open_func = open
155 full_name = os.path.join(
156- self._config.distsroot, suite, subpath or '.', file_name)
157+ self._config.distsroot, suite, subpath or '.',
158+ real_file_name or file_name)
159 if not os.path.exists(full_name):
160 if os.path.exists(full_name + '.gz'):
161 open_func = gzip.open
162@@ -1404,9 +1433,13 @@
163 for hashobj in hashes.values():
164 hashobj.update(chunk)
165 size += len(chunk)
166- return {
167- alg: {alg: hashobj.hexdigest(), "name": file_name, "size": size}
168- for alg, hashobj in hashes.items()}
169+ ret = {}
170+ for alg, hashobj in hashes.items():
171+ digest = hashobj.hexdigest()
172+ ret[alg] = {alg: digest, "name": file_name, "size": size}
173+ if real_file_name:
174+ ret[alg]["real_name"] = real_file_name
175+ return ret
176
177 def deleteArchive(self):
178 """Delete the archive.
179
180=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
181--- lib/lp/archivepublisher/tests/test_publisher.py 2018-01-26 13:45:06 +0000
182+++ lib/lp/archivepublisher/tests/test_publisher.py 2018-01-26 13:45:06 +0000
183@@ -15,9 +15,11 @@
184 datetime,
185 timedelta,
186 )
187+from fnmatch import fnmatch
188 from functools import partial
189 import gzip
190 import hashlib
191+from itertools import product
192 from operator import attrgetter
193 import os
194 import shutil
195@@ -27,10 +29,12 @@
196 import time
197
198 from debian.deb822 import Release
199+from fixtures import MonkeyPatch
200 try:
201 import lzma
202 except ImportError:
203 from backports import lzma
204+import mock
205 import pytz
206 from testtools.deferredruntest import AsynchronousDeferredRunTest
207 from testtools.matchers import (
208@@ -60,6 +64,7 @@
209 IArchiveSigningKey,
210 )
211 from lp.archivepublisher.publishing import (
212+ BY_HASH_STAY_OF_EXECUTION,
213 ByHash,
214 ByHashes,
215 DirectoryHash,
216@@ -2541,6 +2546,22 @@
217 class TestUpdateByHash(TestPublisherBase):
218 """Tests for handling of by-hash files."""
219
220+ def setUpMockTime(self):
221+ """Start simulating the advance of time in the publisher."""
222+ self.times = [datetime.now(pytz.UTC)]
223+ mock_datetime = mock.patch('lp.archivepublisher.publishing.datetime')
224+ mocked_datetime = mock_datetime.start()
225+ self.addCleanup(mock_datetime.stop)
226+ mocked_datetime.utcnow = lambda: self.times[-1].replace(tzinfo=None)
227+ self.useFixture(MonkeyPatch(
228+ 'lp.soyuz.model.archivefile._now', lambda: self.times[-1]))
229+
230+ def advanceTime(self, delta=None, absolute=None):
231+ if delta is not None:
232+ self.times.append(self.times[-1] + delta)
233+ else:
234+ self.times.append(absolute)
235+
236 def runSteps(self, publisher, step_a=False, step_a2=False, step_c=False,
237 step_d=False):
238 """Run publisher steps."""
239@@ -2553,6 +2574,33 @@
240 if step_d:
241 publisher.D_writeReleaseFiles(False)
242
243+ @classmethod
244+ def _makeScheduledDeletionDateMatcher(cls, condemned_at):
245+ if condemned_at is None:
246+ return Is(None)
247+ else:
248+ return Equals(
249+ condemned_at + timedelta(days=BY_HASH_STAY_OF_EXECUTION))
250+
251+ def assertHasSuiteFiles(self, patterns, *properties):
252+ def is_interesting(path):
253+ return any(
254+ fnmatch(path, 'dists/breezy-autotest/%s' % pattern)
255+ for pattern in patterns)
256+
257+ files = [
258+ archive_file
259+ for archive_file in getUtility(IArchiveFileSet).getByArchive(
260+ self.ubuntutest.main_archive)
261+ if is_interesting(archive_file.path)]
262+ matchers = []
263+ for path, condemned_at in properties:
264+ matchers.append(MatchesStructure(
265+ path=Equals('dists/breezy-autotest/%s' % path),
266+ scheduled_deletion_date=self._makeScheduledDeletionDateMatcher(
267+ condemned_at)))
268+ self.assertThat(files, MatchesSetwise(*matchers))
269+
270 def test_disabled(self):
271 # The publisher does not create by-hash directories if it is
272 # disabled in the series configuration.
273@@ -2605,14 +2653,18 @@
274
275 suite_path = partial(
276 os.path.join, self.config.distsroot, 'breezy-autotest')
277- contents = set()
278+ top_contents = set()
279+ with open(suite_path('Release'), 'rb') as f:
280+ top_contents.add(f.read())
281+ main_contents = set()
282 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
283 with open(suite_path('main', 'source', name), 'rb') as f:
284- contents.add(f.read())
285+ main_contents.add(f.read())
286
287+ self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
288 self.assertThat(
289 suite_path('main', 'source', 'by-hash'),
290- ByHashHasContents(contents))
291+ ByHashHasContents(main_contents))
292
293 archive_files = getUtility(IArchiveFileSet).getByArchive(
294 self.ubuntutest.main_archive)
295@@ -2634,8 +2686,11 @@
296
297 suite_path = partial(
298 os.path.join, self.config.distsroot, 'breezy-autotest')
299+ top_contents = set()
300 main_contents = set()
301 universe_contents = set()
302+ with open(suite_path('Release'), 'rb') as f:
303+ top_contents.add(f.read())
304 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
305 with open(suite_path('main', 'source', name), 'rb') as f:
306 main_contents.add(f.read())
307@@ -2646,10 +2701,13 @@
308 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
309 flush_database_caches()
310
311+ with open(suite_path('Release'), 'rb') as f:
312+ top_contents.add(f.read())
313 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
314 with open(suite_path('main', 'source', name), 'rb') as f:
315 main_contents.add(f.read())
316
317+ self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
318 self.assertThat(
319 suite_path('main', 'source', 'by-hash'),
320 ByHashHasContents(main_contents))
321@@ -2660,7 +2718,8 @@
322 archive_files = getUtility(IArchiveFileSet).getByArchive(
323 self.ubuntutest.main_archive)
324 self.assertContentEqual(
325- ['dists/breezy-autotest/main/source/Sources.bz2',
326+ ['dists/breezy-autotest/Release',
327+ 'dists/breezy-autotest/main/source/Sources.bz2',
328 'dists/breezy-autotest/main/source/Sources.gz'],
329 [archive_file.path for archive_file in archive_files
330 if archive_file.scheduled_deletion_date is not None])
331@@ -2674,11 +2733,11 @@
332 self.ubuntutest.main_archive)
333 suite_path = partial(
334 os.path.join, self.config.distsroot, 'breezy-autotest')
335- get_contents_files = lambda: [
336- archive_file
337- for archive_file in getUtility(IArchiveFileSet).getByArchive(
338- self.ubuntutest.main_archive)
339- if archive_file.path.startswith('dists/breezy-autotest/Contents-')]
340+ self.setUpMockTime()
341+
342+ def get_release_contents():
343+ with open(suite_path('Release')) as f:
344+ return f.read()
345
346 # Create the first file.
347 with open_for_writing(suite_path('Contents-i386'), 'w') as f:
348@@ -2687,72 +2746,93 @@
349 self.breezy_autotest, PackagePublishingPocket.RELEASE)
350 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
351 flush_database_caches()
352- matchers = [
353- MatchesStructure(
354- path=Equals('dists/breezy-autotest/Contents-i386'),
355- scheduled_deletion_date=Is(None))]
356- self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
357+ self.assertHasSuiteFiles(
358+ ('Contents-*', 'Release'),
359+ ('Contents-i386', None), ('Release', None))
360+ releases = [get_release_contents()]
361 self.assertThat(
362- suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
363+ suite_path('by-hash'),
364+ ByHashHasContents(['A Contents file\n'] + releases))
365
366 # Add a second identical file.
367 with open_for_writing(suite_path('Contents-hppa'), 'w') as f:
368 f.write('A Contents file\n')
369+ self.advanceTime(delta=timedelta(hours=1))
370 self.runSteps(publisher, step_d=True)
371 flush_database_caches()
372- matchers.append(
373- MatchesStructure(
374- path=Equals('dists/breezy-autotest/Contents-hppa'),
375- scheduled_deletion_date=Is(None)))
376- self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
377+ self.assertHasSuiteFiles(
378+ ('Contents-*', 'Release'),
379+ ('Contents-i386', None), ('Contents-hppa', None),
380+ ('Release', self.times[1]), ('Release', None))
381+ releases.append(get_release_contents())
382 self.assertThat(
383- suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
384+ suite_path('by-hash'),
385+ ByHashHasContents(['A Contents file\n'] + releases))
386
387 # Delete the first file, but allow it its stay of execution.
388 os.unlink(suite_path('Contents-i386'))
389+ self.advanceTime(delta=timedelta(hours=1))
390 self.runSteps(publisher, step_d=True)
391 flush_database_caches()
392- matchers[0] = matchers[0].update(scheduled_deletion_date=Not(Is(None)))
393- self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
394+ self.assertHasSuiteFiles(
395+ ('Contents-*', 'Release'),
396+ ('Contents-i386', self.times[2]), ('Contents-hppa', None),
397+ ('Release', self.times[1]), ('Release', self.times[2]),
398+ ('Release', None))
399+ releases.append(get_release_contents())
400 self.assertThat(
401- suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
402+ suite_path('by-hash'),
403+ ByHashHasContents(['A Contents file\n'] + releases))
404
405 # A no-op run leaves the scheduled deletion date intact.
406+ self.advanceTime(delta=timedelta(hours=1))
407+ self.runSteps(publisher, step_d=True)
408+ flush_database_caches()
409+ self.assertHasSuiteFiles(
410+ ('Contents-*', 'Release'),
411+ ('Contents-i386', self.times[2]), ('Contents-hppa', None),
412+ ('Release', self.times[1]), ('Release', self.times[2]),
413+ ('Release', self.times[3]), ('Release', None))
414+ releases.append(get_release_contents())
415+ self.assertThat(
416+ suite_path('by-hash'),
417+ ByHashHasContents(['A Contents file\n'] + releases))
418+
419+ # Arrange for the first file to be pruned, and delete the second
420+ # file. This also puts us past the stay of execution of the first
421+ # two Release files.
422 i386_file = getUtility(IArchiveFileSet).getByArchive(
423 self.ubuntutest.main_archive,
424 path=u'dists/breezy-autotest/Contents-i386').one()
425- i386_date = i386_file.scheduled_deletion_date
426- self.runSteps(publisher, step_d=True)
427- flush_database_caches()
428- matchers[0] = matchers[0].update(
429- scheduled_deletion_date=Equals(i386_date))
430- self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
431- self.assertThat(
432- suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
433-
434- # Arrange for the first file to be pruned, and delete the second
435- # file.
436- now = datetime.now(pytz.UTC)
437- removeSecurityProxy(i386_file).scheduled_deletion_date = (
438- now - timedelta(hours=1))
439+ self.advanceTime(
440+ absolute=i386_file.scheduled_deletion_date + timedelta(minutes=5))
441 os.unlink(suite_path('Contents-hppa'))
442 self.runSteps(publisher, step_d=True)
443 flush_database_caches()
444- matchers = [matchers[1].update(scheduled_deletion_date=Not(Is(None)))]
445- self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
446+ self.assertHasSuiteFiles(
447+ ('Contents-*', 'Release'),
448+ ('Contents-hppa', self.times[4]),
449+ ('Release', self.times[3]), ('Release', self.times[4]),
450+ ('Release', None))
451+ releases.append(get_release_contents())
452 self.assertThat(
453- suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
454+ suite_path('by-hash'),
455+ ByHashHasContents(['A Contents file\n'] + releases[2:]))
456
457- # Arrange for the second file to be pruned.
458+ # Arrange for the second file to be pruned. This also puts us past
459+ # the stay of execution of the first two remaining Release files.
460 hppa_file = getUtility(IArchiveFileSet).getByArchive(
461 self.ubuntutest.main_archive,
462 path=u'dists/breezy-autotest/Contents-hppa').one()
463- removeSecurityProxy(hppa_file).scheduled_deletion_date = (
464- now - timedelta(hours=1))
465+ self.advanceTime(
466+ absolute=hppa_file.scheduled_deletion_date + timedelta(minutes=5))
467 self.runSteps(publisher, step_d=True)
468 flush_database_caches()
469- self.assertContentEqual([], get_contents_files())
470- self.assertThat(suite_path('by-hash'), Not(PathExists()))
471+ self.assertHasSuiteFiles(
472+ ('Contents-*', 'Release'),
473+ ('Release', self.times[5]), ('Release', None))
474+ releases.append(get_release_contents())
475+ self.assertThat(suite_path('by-hash'), ByHashHasContents(releases[4:]))
476
477 def test_reprieve(self):
478 # If a newly-modified index file is identical to a
479@@ -2765,6 +2845,7 @@
480 publisher = Publisher(
481 self.logger, self.config, self.disk_pool,
482 self.ubuntutest.main_archive)
483+ self.setUpMockTime()
484
485 # Publish empty index files.
486 publisher.markPocketDirty(
487@@ -2789,15 +2870,8 @@
488 ByHashHasContents(main_contents))
489
490 # Make the empty Sources file ready to prune.
491- old_archive_files = []
492- for archive_file in getUtility(IArchiveFileSet).getByArchive(
493- self.ubuntutest.main_archive):
494- if ('main/source' in archive_file.path and
495- archive_file.scheduled_deletion_date is not None):
496- old_archive_files.append(archive_file)
497- self.assertEqual(1, len(old_archive_files))
498- removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
499- datetime.now(pytz.UTC) - timedelta(hours=1))
500+ self.advanceTime(
501+ delta=timedelta(days=BY_HASH_STAY_OF_EXECUTION, hours=1))
502
503 # Delete the source package so that Sources is empty again. The
504 # empty file is reprieved and the non-empty one is condemned.
505@@ -2818,6 +2892,7 @@
506 ]))
507
508 def setUpPruneableSuite(self):
509+ self.setUpMockTime()
510 self.breezy_autotest.publish_by_hash = True
511 self.breezy_autotest.advertise_by_hash = True
512 publisher = Publisher(
513@@ -2826,47 +2901,50 @@
514
515 suite_path = partial(
516 os.path.join, self.config.distsroot, 'breezy-autotest')
517- main_contents = set()
518- for sourcename in ('foo', 'bar'):
519+ top_contents = []
520+ main_contents = []
521+ for sourcename in ('foo', 'bar', 'baz'):
522 self.getPubSource(
523 sourcename=sourcename, filecontent='Source: %s\n' % sourcename)
524 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
525+ with open(suite_path('Release'), 'rb') as f:
526+ top_contents.append(f.read())
527 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
528 with open(suite_path('main', 'source', name), 'rb') as f:
529- main_contents.add(f.read())
530+ main_contents.append(f.read())
531+ self.advanceTime(delta=timedelta(hours=6))
532 transaction.commit()
533
534+ # We have two condemned sets of index files and one uncondemned set.
535+ # main/source/Release contains a small enough amount of information
536+ # that it doesn't change.
537+ expected_suite_files = (
538+ list(product(
539+ ('main/source/Sources.gz', 'main/source/Sources.bz2',
540+ 'Release'),
541+ (self.times[1], self.times[2], None))) +
542+ [('main/source/Release', None)])
543+ self.assertHasSuiteFiles(
544+ ('main/source/*', 'Release'), *expected_suite_files)
545+ self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
546 self.assertThat(
547 suite_path('main', 'source', 'by-hash'),
548 ByHashHasContents(main_contents))
549- old_archive_files = []
550- for archive_file in getUtility(IArchiveFileSet).getByArchive(
551- self.ubuntutest.main_archive):
552- if ('main/source' in archive_file.path and
553- archive_file.scheduled_deletion_date is not None):
554- old_archive_files.append(archive_file)
555- self.assertEqual(2, len(old_archive_files))
556-
557- now = datetime.now(pytz.UTC)
558- removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
559- now + timedelta(hours=12))
560- removeSecurityProxy(old_archive_files[1]).scheduled_deletion_date = (
561- now - timedelta(hours=12))
562- old_archive_files[1].library_file.open()
563- try:
564- main_contents.remove(old_archive_files[1].library_file.read())
565- finally:
566- old_archive_files[1].library_file.close()
567- self.assertThat(
568- suite_path('main', 'source', 'by-hash'),
569- Not(ByHashHasContents(main_contents)))
570-
571- return main_contents
572+
573+ # Advance time to the point where the first condemned set of index
574+ # files is scheduled for deletion.
575+ self.advanceTime(
576+ absolute=self.times[1] + timedelta(
577+ days=BY_HASH_STAY_OF_EXECUTION, hours=1))
578+ del top_contents[0]
579+ del main_contents[:3]
580+
581+ return top_contents, main_contents
582
583 def test_prune(self):
584 # The publisher prunes files from by-hash that were condemned more
585 # than a day ago.
586- main_contents = self.setUpPruneableSuite()
587+ top_contents, main_contents = self.setUpPruneableSuite()
588 suite_path = partial(
589 os.path.join, self.config.distsroot, 'breezy-autotest')
590
591@@ -2876,7 +2954,19 @@
592 self.logger, self.config, self.disk_pool,
593 self.ubuntutest.main_archive)
594 self.runSteps(publisher, step_a2=True, step_c=True, step_d=True)
595+ transaction.commit()
596 self.assertEqual(set(), publisher.dirty_pockets)
597+ # The condemned index files are removed, and no new Release file is
598+ # generated.
599+ expected_suite_files = (
600+ list(product(
601+ ('main/source/Sources.gz', 'main/source/Sources.bz2'),
602+ (self.times[2], None))) +
603+ [('main/source/Release', None),
604+ ('Release', self.times[2]), ('Release', None)])
605+ self.assertHasSuiteFiles(
606+ ('main/source/*', 'Release'), *expected_suite_files)
607+ self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
608 self.assertThat(
609 suite_path('main', 'source', 'by-hash'),
610 ByHashHasContents(main_contents))
611@@ -2884,7 +2974,7 @@
612 def test_prune_immutable(self):
613 # The publisher prunes by-hash files from immutable suites, but
614 # doesn't regenerate the Release file in that case.
615- main_contents = self.setUpPruneableSuite()
616+ top_contents, main_contents = self.setUpPruneableSuite()
617 suite_path = partial(
618 os.path.join, self.config.distsroot, 'breezy-autotest')
619 release_path = suite_path('Release')
620@@ -2897,8 +2987,20 @@
621 self.logger, self.config, self.disk_pool,
622 self.ubuntutest.main_archive)
623 self.runSteps(publisher, step_a2=True, step_c=True, step_d=True)
624+ transaction.commit()
625 self.assertEqual(set(), publisher.dirty_pockets)
626 self.assertEqual(release_mtime, os.stat(release_path).st_mtime)
627+ # The condemned index files are removed, and no new Release file is
628+ # generated.
629+ expected_suite_files = (
630+ list(product(
631+ ('main/source/Sources.gz', 'main/source/Sources.bz2'),
632+ (self.times[2], None))) +
633+ [('main/source/Release', None),
634+ ('Release', self.times[2]), ('Release', None)])
635+ self.assertHasSuiteFiles(
636+ ('main/source/*', 'Release'), *expected_suite_files)
637+ self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
638 self.assertThat(
639 suite_path('main', 'source', 'by-hash'),
640 ByHashHasContents(main_contents))
641
642=== modified file 'lib/lp/soyuz/model/archivefile.py'
643--- lib/lp/soyuz/model/archivefile.py 2016-04-04 10:06:33 +0000
644+++ lib/lp/soyuz/model/archivefile.py 2018-01-26 13:45:06 +0000
645@@ -1,4 +1,4 @@
646-# Copyright 2016 Canonical Ltd. This software is licensed under the
647+# Copyright 2016-2018 Canonical Ltd. This software is licensed under the
648 # GNU Affero General Public License version 3 (see the file LICENSE).
649
650 """A file in an archive."""
651@@ -33,6 +33,7 @@
652 IMasterStore,
653 IStore,
654 )
655+from lp.services.database.sqlbase import convert_storm_clause_to_string
656 from lp.services.database.stormexpr import BulkUpdate
657 from lp.services.librarian.interfaces import ILibraryFileAliasSet
658 from lp.services.librarian.model import (
659@@ -76,6 +77,15 @@
660 self.scheduled_deletion_date = None
661
662
663+def _now():
664+ """Get the current transaction timestamp.
665+
666+ Tests can override this with a Storm expression or a `datetime` to
667+ simulate time changes.
668+ """
669+ return UTC_NOW
670+
671+
672 @implementer(IArchiveFileSet)
673 class ArchiveFileSet:
674 """See `IArchiveFileSet`."""
675@@ -128,7 +138,7 @@
676 ArchiveFile.library_file == LibraryFileAlias.id,
677 LibraryFileAlias.content == LibraryFileContent.id,
678 ]
679- new_date = UTC_NOW + stay_of_execution
680+ new_date = _now() + stay_of_execution
681 return_columns = [
682 ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
683 return list(IMasterStore(ArchiveFile).execute(Returning(
684@@ -162,7 +172,7 @@
685 def getContainersToReap(archive, container_prefix=None):
686 clauses = [
687 ArchiveFile.archive == archive,
688- ArchiveFile.scheduled_deletion_date < UTC_NOW,
689+ ArchiveFile.scheduled_deletion_date < _now(),
690 ]
691 if container_prefix is not None:
692 clauses.append(ArchiveFile.container.startswith(container_prefix))
693@@ -175,22 +185,20 @@
694 # XXX cjwatson 2016-03-30 bug=322972: Requires manual SQL due to
695 # lack of support for DELETE FROM ... USING ... in Storm.
696 clauses = [
697- "ArchiveFile.archive = ?",
698- "ArchiveFile.scheduled_deletion_date < "
699- "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'",
700- "ArchiveFile.library_file = LibraryFileAlias.id",
701- "LibraryFileAlias.content = LibraryFileContent.id",
702+ ArchiveFile.archive == archive,
703+ ArchiveFile.scheduled_deletion_date < _now(),
704+ ArchiveFile.library_file_id == LibraryFileAlias.id,
705+ LibraryFileAlias.contentID == LibraryFileContent.id,
706 ]
707- values = [archive.id]
708 if container is not None:
709- clauses.append("ArchiveFile.container = ?")
710- values.append(container)
711+ clauses.append(ArchiveFile.container == container)
712+ where = convert_storm_clause_to_string(And(*clauses))
713 return list(IMasterStore(ArchiveFile).execute("""
714 DELETE FROM ArchiveFile
715 USING LibraryFileAlias, LibraryFileContent
716- WHERE """ + " AND ".join(clauses) + """
717+ WHERE """ + where + """
718 RETURNING
719 ArchiveFile.container,
720 ArchiveFile.path,
721 LibraryFileContent.sha256
722- """, values))
723+ """))