Merge lp:~cjwatson/launchpad/inrelease-by-hash into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18605
Proposed branch: lp:~cjwatson/launchpad/inrelease-by-hash
Merge into: lp:launchpad
Prerequisite: lp:~cjwatson/launchpad/refactor-archive-signing
Diff against target: 723 lines (+258/-115)
3 files modified
lib/lp/archivepublisher/publishing.py (+52/-19)
lib/lp/archivepublisher/tests/test_publisher.py (+185/-83)
lib/lp/soyuz/model/archivefile.py (+21/-13)
To merge this branch: bzr merge lp:~cjwatson/launchpad/inrelease-by-hash
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+336675@code.launchpad.net

Commit message

Add Release, Release.gpg, and InRelease to by-hash directories.

Description of the change

Like most of the by-hash stuff, this has lots of fiddly details and will want some careful QA on dogfood. But at its core it's reasonably straightforward: now that the signed files are generated early enough, we just add them to the set of files being considered by _updateByHash. I arranged to add these files to by-hash before they're renamed into place, which entailed introducing the concept of the "real file name" in a few places.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py 2018-03-27 23:02:02 +0000
+++ lib/lp/archivepublisher/publishing.py 2018-03-27 23:27:31 +0000
@@ -772,8 +772,8 @@
772 for pocket in self.archive.getPockets():772 for pocket in self.archive.getPockets():
773 ds_pocket = (distroseries.name, pocket)773 ds_pocket = (distroseries.name, pocket)
774 suite = distroseries.getSuite(pocket)774 suite = distroseries.getSuite(pocket)
775 release_path = os.path.join(775 suite_path = os.path.join(self._config.distsroot, suite)
776 self._config.distsroot, suite, "Release")776 release_path = os.path.join(suite_path, "Release")
777777
778 if is_careful:778 if is_careful:
779 if not self.isAllowed(distroseries, pocket):779 if not self.isAllowed(distroseries, pocket):
@@ -803,7 +803,11 @@
803 # We aren't publishing a new Release file for this803 # We aren't publishing a new Release file for this
804 # suite, probably because it's immutable, but we still804 # suite, probably because it's immutable, but we still
805 # need to prune by-hash files from it.805 # need to prune by-hash files from it.
806 self._updateByHash(suite, "Release")806 extra_by_hash_files = {
807 filename: filename
808 for filename in ("Release", "Release.gpg", "InRelease")
809 if file_exists(os.path.join(suite_path, filename))}
810 self._updateByHash(suite, "Release", extra_by_hash_files)
807811
808 def _allIndexFiles(self, distroseries):812 def _allIndexFiles(self, distroseries):
809 """Return all index files on disk for a distroseries.813 """Return all index files on disk for a distroseries.
@@ -1025,7 +1029,7 @@
1025 return self.distro.displayname1029 return self.distro.displayname
1026 return "LP-PPA-%s" % get_ppa_reference(self.archive)1030 return "LP-PPA-%s" % get_ppa_reference(self.archive)
10271031
1028 def _updateByHash(self, suite, release_file_name):1032 def _updateByHash(self, suite, release_file_name, extra_files):
1029 """Update by-hash files for a suite.1033 """Update by-hash files for a suite.
10301034
1031 This takes Release file data which references a set of on-disk1035 This takes Release file data which references a set of on-disk
@@ -1034,6 +1038,16 @@
1034 directories to be in sync with ArchiveFile. Any on-disk by-hash1038 directories to be in sync with ArchiveFile. Any on-disk by-hash
1035 entries that ceased to be current sufficiently long ago are removed.1039 entries that ceased to be current sufficiently long ago are removed.
1036 """1040 """
1041 extra_data = {}
1042 for filename, real_filename in extra_files.items():
1043 hashes = self._readIndexFileHashes(
1044 suite, filename, real_file_name=real_filename)
1045 if hashes is None:
1046 continue
1047 for archive_hash in archive_hashes:
1048 extra_data.setdefault(archive_hash.apt_name, []).append(
1049 hashes[archive_hash.deb822_name])
1050
1037 release_path = os.path.join(1051 release_path = os.path.join(
1038 self._config.distsroot, suite, release_file_name)1052 self._config.distsroot, suite, release_file_name)
1039 with open(release_path) as release_file:1053 with open(release_path) as release_file:
@@ -1052,12 +1066,13 @@
1052 # Gather information on entries in the current Release file, and1066 # Gather information on entries in the current Release file, and
1053 # make sure nothing there is condemned.1067 # make sure nothing there is condemned.
1054 current_files = {}1068 current_files = {}
1055 current_sha256_checksums = set()1069 for current_entry in (
1056 for current_entry in release_data["SHA256"]:1070 release_data["SHA256"] + extra_data.get("SHA256", [])):
1057 path = os.path.join(suite_dir, current_entry["name"])1071 path = os.path.join(suite_dir, current_entry["name"])
1072 real_name = current_entry.get("real_name", current_entry["name"])
1073 real_path = os.path.join(suite_dir, real_name)
1058 current_files[path] = (1074 current_files[path] = (
1059 int(current_entry["size"]), current_entry["sha256"])1075 int(current_entry["size"]), current_entry["sha256"], real_path)
1060 current_sha256_checksums.add(current_entry["sha256"])
1061 uncondemned_files = set()1076 uncondemned_files = set()
1062 for db_file in archive_file_set.getByArchive(1077 for db_file in archive_file_set.getByArchive(
1063 self.archive, container=container, only_condemned=True,1078 self.archive, container=container, only_condemned=True,
@@ -1117,15 +1132,16 @@
1117 # XXX cjwatson 2016-03-15: This should possibly use bulk creation,1132 # XXX cjwatson 2016-03-15: This should possibly use bulk creation,
1118 # although we can only avoid about a third of the queries since the1133 # although we can only avoid about a third of the queries since the
1119 # librarian client has no bulk upload methods.1134 # librarian client has no bulk upload methods.
1120 for path, (size, sha256) in current_files.items():1135 for path, (size, sha256, real_path) in current_files.items():
1121 full_path = os.path.join(self._config.distsroot, path)1136 full_path = os.path.join(self._config.distsroot, real_path)
1122 if (os.path.exists(full_path) and1137 if (os.path.exists(full_path) and
1123 not by_hashes.known(path, "SHA256", sha256)):1138 not by_hashes.known(path, "SHA256", sha256)):
1124 with open(full_path, "rb") as fileobj:1139 with open(full_path, "rb") as fileobj:
1125 db_file = archive_file_set.newFromFile(1140 db_file = archive_file_set.newFromFile(
1126 self.archive, container, os.path.join("dists", path),1141 self.archive, container, os.path.join("dists", path),
1127 fileobj, size, filenameToContentType(path))1142 fileobj, size, filenameToContentType(path))
1128 by_hashes.add(path, db_file.library_file, copy_from_path=path)1143 by_hashes.add(
1144 path, db_file.library_file, copy_from_path=real_path)
11291145
1130 # Finally, remove any files from disk that aren't recorded in the1146 # Finally, remove any files from disk that aren't recorded in the
1131 # database and aren't active.1147 # database and aren't active.
@@ -1173,6 +1189,9 @@
1173 # special games with timestamps here, as it will interfere with the1189 # special games with timestamps here, as it will interfere with the
1174 # "staging" mechanism used to update these files.1190 # "staging" mechanism used to update these files.
1175 extra_files = set()1191 extra_files = set()
1192 # Extra by-hash files are not listed in the Release file, but we
1193 # still want to include them in by-hash directories.
1194 extra_by_hash_files = {}
1176 for component in all_components:1195 for component in all_components:
1177 self._writeSuiteSource(1196 self._writeSuiteSource(
1178 distroseries, pocket, component, core_files)1197 distroseries, pocket, component, core_files)
@@ -1239,9 +1258,7 @@
12391258
1240 self._writeReleaseFile(suite, release_file)1259 self._writeReleaseFile(suite, release_file)
1241 core_files.add("Release")1260 core_files.add("Release")
12421261 extra_by_hash_files["Release"] = "Release.new"
1243 if distroseries.publish_by_hash:
1244 self._updateByHash(suite, "Release.new")
12451262
1246 signable_archive = ISignableArchive(self.archive)1263 signable_archive = ISignableArchive(self.archive)
1247 if signable_archive.can_sign:1264 if signable_archive.can_sign:
@@ -1250,11 +1267,16 @@
1250 signable_archive.signRepository(1267 signable_archive.signRepository(
1251 suite, pubconf=self._config, suffix=".new", log=self.log)1268 suite, pubconf=self._config, suffix=".new", log=self.log)
1252 core_files.add("Release.gpg")1269 core_files.add("Release.gpg")
1270 extra_by_hash_files["Release.gpg"] = "Release.gpg.new"
1253 core_files.add("InRelease")1271 core_files.add("InRelease")
1272 extra_by_hash_files["InRelease"] = "InRelease.new"
1254 else:1273 else:
1255 # Skip signature if the archive is not set up for signing.1274 # Skip signature if the archive is not set up for signing.
1256 self.log.debug("No signing key available, skipping signature.")1275 self.log.debug("No signing key available, skipping signature.")
12571276
1277 if distroseries.publish_by_hash:
1278 self._updateByHash(suite, "Release.new", extra_by_hash_files)
1279
1258 for name in ("Release", "Release.gpg", "InRelease"):1280 for name in ("Release", "Release.gpg", "InRelease"):
1259 if name in core_files:1281 if name in core_files:
1260 os.rename(1282 os.rename(
@@ -1366,7 +1388,8 @@
1366 # Schedule this for inclusion in the Release file.1388 # Schedule this for inclusion in the Release file.
1367 all_series_files.add(os.path.join(component, "i18n", "Index"))1389 all_series_files.add(os.path.join(component, "i18n", "Index"))
13681390
1369 def _readIndexFileHashes(self, suite, file_name, subpath=None):1391 def _readIndexFileHashes(self, suite, file_name, subpath=None,
1392 real_file_name=None):
1370 """Read an index file and return its hashes.1393 """Read an index file and return its hashes.
13711394
1372 :param suite: Suite name.1395 :param suite: Suite name.
@@ -1374,6 +1397,11 @@
1374 :param subpath: Optional subpath within the suite root. Generated1397 :param subpath: Optional subpath within the suite root. Generated
1375 indexes will not include this path. If omitted, filenames are1398 indexes will not include this path. If omitted, filenames are
1376 assumed to be relative to the suite root.1399 assumed to be relative to the suite root.
1400 :param real_file_name: The actual filename to open when reading
1401 data (`file_name` will still be the name used in the returned
1402 dictionary). If this is passed, then the returned hash
1403 component dictionaries will include it in additional "real_name"
1404 items.
1377 :return: A dictionary mapping hash field names to dictionaries of1405 :return: A dictionary mapping hash field names to dictionaries of
1378 their components as defined by debian.deb822.Release (e.g.1406 their components as defined by debian.deb822.Release (e.g.
1379 {"md5sum": {"md5sum": ..., "size": ..., "name": ...}}), or None1407 {"md5sum": {"md5sum": ..., "size": ..., "name": ...}}), or None
@@ -1381,7 +1409,8 @@
1381 """1409 """
1382 open_func = open1410 open_func = open
1383 full_name = os.path.join(1411 full_name = os.path.join(
1384 self._config.distsroot, suite, subpath or '.', file_name)1412 self._config.distsroot, suite, subpath or '.',
1413 real_file_name or file_name)
1385 if not os.path.exists(full_name):1414 if not os.path.exists(full_name):
1386 if os.path.exists(full_name + '.gz'):1415 if os.path.exists(full_name + '.gz'):
1387 open_func = gzip.open1416 open_func = gzip.open
@@ -1405,9 +1434,13 @@
1405 for hashobj in hashes.values():1434 for hashobj in hashes.values():
1406 hashobj.update(chunk)1435 hashobj.update(chunk)
1407 size += len(chunk)1436 size += len(chunk)
1408 return {1437 ret = {}
1409 alg: {alg: hashobj.hexdigest(), "name": file_name, "size": size}1438 for alg, hashobj in hashes.items():
1410 for alg, hashobj in hashes.items()}1439 digest = hashobj.hexdigest()
1440 ret[alg] = {alg: digest, "name": file_name, "size": size}
1441 if real_file_name:
1442 ret[alg]["real_name"] = real_file_name
1443 return ret
14111444
1412 def deleteArchive(self):1445 def deleteArchive(self):
1413 """Delete the archive.1446 """Delete the archive.
14141447
=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py 2018-03-27 23:02:02 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py 2018-03-27 23:27:31 +0000
@@ -17,9 +17,11 @@
17 datetime,17 datetime,
18 timedelta,18 timedelta,
19 )19 )
20from fnmatch import fnmatch
20from functools import partial21from functools import partial
21import gzip22import gzip
22import hashlib23import hashlib
24from itertools import product
23from operator import attrgetter25from operator import attrgetter
24import os26import os
25import shutil27import shutil
@@ -29,10 +31,12 @@
29import time31import time
3032
31from debian.deb822 import Release33from debian.deb822 import Release
34from fixtures import MonkeyPatch
32try:35try:
33 import lzma36 import lzma
34except ImportError:37except ImportError:
35 from backports import lzma38 from backports import lzma
39import mock
36import pytz40import pytz
37from testscenarios import (41from testscenarios import (
38 load_tests_apply_scenarios,42 load_tests_apply_scenarios,
@@ -66,6 +70,7 @@
66 IArchiveSigningKey,70 IArchiveSigningKey,
67 )71 )
68from lp.archivepublisher.publishing import (72from lp.archivepublisher.publishing import (
73 BY_HASH_STAY_OF_EXECUTION,
69 ByHash,74 ByHash,
70 ByHashes,75 ByHashes,
71 DirectoryHash,76 DirectoryHash,
@@ -2547,6 +2552,22 @@
2547class TestUpdateByHash(TestPublisherBase):2552class TestUpdateByHash(TestPublisherBase):
2548 """Tests for handling of by-hash files."""2553 """Tests for handling of by-hash files."""
25492554
2555 def setUpMockTime(self):
2556 """Start simulating the advance of time in the publisher."""
2557 self.times = [datetime.now(pytz.UTC)]
2558 mock_datetime = mock.patch('lp.archivepublisher.publishing.datetime')
2559 mocked_datetime = mock_datetime.start()
2560 self.addCleanup(mock_datetime.stop)
2561 mocked_datetime.utcnow = lambda: self.times[-1].replace(tzinfo=None)
2562 self.useFixture(MonkeyPatch(
2563 'lp.soyuz.model.archivefile._now', lambda: self.times[-1]))
2564
2565 def advanceTime(self, delta=None, absolute=None):
2566 if delta is not None:
2567 self.times.append(self.times[-1] + delta)
2568 else:
2569 self.times.append(absolute)
2570
2550 def runSteps(self, publisher, step_a=False, step_a2=False, step_c=False,2571 def runSteps(self, publisher, step_a=False, step_a2=False, step_c=False,
2551 step_d=False):2572 step_d=False):
2552 """Run publisher steps."""2573 """Run publisher steps."""
@@ -2559,6 +2580,33 @@
2559 if step_d:2580 if step_d:
2560 publisher.D_writeReleaseFiles(False)2581 publisher.D_writeReleaseFiles(False)
25612582
2583 @classmethod
2584 def _makeScheduledDeletionDateMatcher(cls, condemned_at):
2585 if condemned_at is None:
2586 return Is(None)
2587 else:
2588 return Equals(
2589 condemned_at + timedelta(days=BY_HASH_STAY_OF_EXECUTION))
2590
2591 def assertHasSuiteFiles(self, patterns, *properties):
2592 def is_interesting(path):
2593 return any(
2594 fnmatch(path, 'dists/breezy-autotest/%s' % pattern)
2595 for pattern in patterns)
2596
2597 files = [
2598 archive_file
2599 for archive_file in getUtility(IArchiveFileSet).getByArchive(
2600 self.ubuntutest.main_archive)
2601 if is_interesting(archive_file.path)]
2602 matchers = []
2603 for path, condemned_at in properties:
2604 matchers.append(MatchesStructure(
2605 path=Equals('dists/breezy-autotest/%s' % path),
2606 scheduled_deletion_date=self._makeScheduledDeletionDateMatcher(
2607 condemned_at)))
2608 self.assertThat(files, MatchesSetwise(*matchers))
2609
2562 def test_disabled(self):2610 def test_disabled(self):
2563 # The publisher does not create by-hash directories if it is2611 # The publisher does not create by-hash directories if it is
2564 # disabled in the series configuration.2612 # disabled in the series configuration.
@@ -2611,14 +2659,18 @@
26112659
2612 suite_path = partial(2660 suite_path = partial(
2613 os.path.join, self.config.distsroot, 'breezy-autotest')2661 os.path.join, self.config.distsroot, 'breezy-autotest')
2614 contents = set()2662 top_contents = set()
2663 with open(suite_path('Release'), 'rb') as f:
2664 top_contents.add(f.read())
2665 main_contents = set()
2615 for name in ('Release', 'Sources.gz', 'Sources.bz2'):2666 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
2616 with open(suite_path('main', 'source', name), 'rb') as f:2667 with open(suite_path('main', 'source', name), 'rb') as f:
2617 contents.add(f.read())2668 main_contents.add(f.read())
26182669
2670 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
2619 self.assertThat(2671 self.assertThat(
2620 suite_path('main', 'source', 'by-hash'),2672 suite_path('main', 'source', 'by-hash'),
2621 ByHashHasContents(contents))2673 ByHashHasContents(main_contents))
26222674
2623 archive_files = getUtility(IArchiveFileSet).getByArchive(2675 archive_files = getUtility(IArchiveFileSet).getByArchive(
2624 self.ubuntutest.main_archive)2676 self.ubuntutest.main_archive)
@@ -2640,8 +2692,11 @@
26402692
2641 suite_path = partial(2693 suite_path = partial(
2642 os.path.join, self.config.distsroot, 'breezy-autotest')2694 os.path.join, self.config.distsroot, 'breezy-autotest')
2695 top_contents = set()
2643 main_contents = set()2696 main_contents = set()
2644 universe_contents = set()2697 universe_contents = set()
2698 with open(suite_path('Release'), 'rb') as f:
2699 top_contents.add(f.read())
2645 for name in ('Release', 'Sources.gz', 'Sources.bz2'):2700 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
2646 with open(suite_path('main', 'source', name), 'rb') as f:2701 with open(suite_path('main', 'source', name), 'rb') as f:
2647 main_contents.add(f.read())2702 main_contents.add(f.read())
@@ -2652,10 +2707,13 @@
2652 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)2707 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
2653 flush_database_caches()2708 flush_database_caches()
26542709
2710 with open(suite_path('Release'), 'rb') as f:
2711 top_contents.add(f.read())
2655 for name in ('Release', 'Sources.gz', 'Sources.bz2'):2712 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
2656 with open(suite_path('main', 'source', name), 'rb') as f:2713 with open(suite_path('main', 'source', name), 'rb') as f:
2657 main_contents.add(f.read())2714 main_contents.add(f.read())
26582715
2716 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
2659 self.assertThat(2717 self.assertThat(
2660 suite_path('main', 'source', 'by-hash'),2718 suite_path('main', 'source', 'by-hash'),
2661 ByHashHasContents(main_contents))2719 ByHashHasContents(main_contents))
@@ -2666,7 +2724,8 @@
2666 archive_files = getUtility(IArchiveFileSet).getByArchive(2724 archive_files = getUtility(IArchiveFileSet).getByArchive(
2667 self.ubuntutest.main_archive)2725 self.ubuntutest.main_archive)
2668 self.assertContentEqual(2726 self.assertContentEqual(
2669 ['dists/breezy-autotest/main/source/Sources.bz2',2727 ['dists/breezy-autotest/Release',
2728 'dists/breezy-autotest/main/source/Sources.bz2',
2670 'dists/breezy-autotest/main/source/Sources.gz'],2729 'dists/breezy-autotest/main/source/Sources.gz'],
2671 [archive_file.path for archive_file in archive_files2730 [archive_file.path for archive_file in archive_files
2672 if archive_file.scheduled_deletion_date is not None])2731 if archive_file.scheduled_deletion_date is not None])
@@ -2680,11 +2739,11 @@
2680 self.ubuntutest.main_archive)2739 self.ubuntutest.main_archive)
2681 suite_path = partial(2740 suite_path = partial(
2682 os.path.join, self.config.distsroot, 'breezy-autotest')2741 os.path.join, self.config.distsroot, 'breezy-autotest')
2683 get_contents_files = lambda: [2742 self.setUpMockTime()
2684 archive_file2743
2685 for archive_file in getUtility(IArchiveFileSet).getByArchive(2744 def get_release_contents():
2686 self.ubuntutest.main_archive)2745 with open(suite_path('Release')) as f:
2687 if archive_file.path.startswith('dists/breezy-autotest/Contents-')]2746 return f.read()
26882747
2689 # Create the first file.2748 # Create the first file.
2690 with open_for_writing(suite_path('Contents-i386'), 'w') as f:2749 with open_for_writing(suite_path('Contents-i386'), 'w') as f:
@@ -2693,72 +2752,93 @@
2693 self.breezy_autotest, PackagePublishingPocket.RELEASE)2752 self.breezy_autotest, PackagePublishingPocket.RELEASE)
2694 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)2753 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
2695 flush_database_caches()2754 flush_database_caches()
2696 matchers = [2755 self.assertHasSuiteFiles(
2697 MatchesStructure(2756 ('Contents-*', 'Release'),
2698 path=Equals('dists/breezy-autotest/Contents-i386'),2757 ('Contents-i386', None), ('Release', None))
2699 scheduled_deletion_date=Is(None))]2758 releases = [get_release_contents()]
2700 self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
2701 self.assertThat(2759 self.assertThat(
2702 suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))2760 suite_path('by-hash'),
2761 ByHashHasContents(['A Contents file\n'] + releases))
27032762
2704 # Add a second identical file.2763 # Add a second identical file.
2705 with open_for_writing(suite_path('Contents-hppa'), 'w') as f:2764 with open_for_writing(suite_path('Contents-hppa'), 'w') as f:
2706 f.write('A Contents file\n')2765 f.write('A Contents file\n')
2766 self.advanceTime(delta=timedelta(hours=1))
2707 self.runSteps(publisher, step_d=True)2767 self.runSteps(publisher, step_d=True)
2708 flush_database_caches()2768 flush_database_caches()
2709 matchers.append(2769 self.assertHasSuiteFiles(
2710 MatchesStructure(2770 ('Contents-*', 'Release'),
2711 path=Equals('dists/breezy-autotest/Contents-hppa'),2771 ('Contents-i386', None), ('Contents-hppa', None),
2712 scheduled_deletion_date=Is(None)))2772 ('Release', self.times[1]), ('Release', None))
2713 self.assertThat(get_contents_files(), MatchesSetwise(*matchers))2773 releases.append(get_release_contents())
2714 self.assertThat(2774 self.assertThat(
2715 suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))2775 suite_path('by-hash'),
2776 ByHashHasContents(['A Contents file\n'] + releases))
27162777
2717 # Delete the first file, but allow it its stay of execution.2778 # Delete the first file, but allow it its stay of execution.
2718 os.unlink(suite_path('Contents-i386'))2779 os.unlink(suite_path('Contents-i386'))
2780 self.advanceTime(delta=timedelta(hours=1))
2719 self.runSteps(publisher, step_d=True)2781 self.runSteps(publisher, step_d=True)
2720 flush_database_caches()2782 flush_database_caches()
2721 matchers[0] = matchers[0].update(scheduled_deletion_date=Not(Is(None)))2783 self.assertHasSuiteFiles(
2722 self.assertThat(get_contents_files(), MatchesSetwise(*matchers))2784 ('Contents-*', 'Release'),
2785 ('Contents-i386', self.times[2]), ('Contents-hppa', None),
2786 ('Release', self.times[1]), ('Release', self.times[2]),
2787 ('Release', None))
2788 releases.append(get_release_contents())
2723 self.assertThat(2789 self.assertThat(
2724 suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))2790 suite_path('by-hash'),
2791 ByHashHasContents(['A Contents file\n'] + releases))
27252792
2726 # A no-op run leaves the scheduled deletion date intact.2793 # A no-op run leaves the scheduled deletion date intact.
2794 self.advanceTime(delta=timedelta(hours=1))
2795 self.runSteps(publisher, step_d=True)
2796 flush_database_caches()
2797 self.assertHasSuiteFiles(
2798 ('Contents-*', 'Release'),
2799 ('Contents-i386', self.times[2]), ('Contents-hppa', None),
2800 ('Release', self.times[1]), ('Release', self.times[2]),
2801 ('Release', self.times[3]), ('Release', None))
2802 releases.append(get_release_contents())
2803 self.assertThat(
2804 suite_path('by-hash'),
2805 ByHashHasContents(['A Contents file\n'] + releases))
2806
2807 # Arrange for the first file to be pruned, and delete the second
2808 # file. This also puts us past the stay of execution of the first
2809 # two Release files.
2727 i386_file = getUtility(IArchiveFileSet).getByArchive(2810 i386_file = getUtility(IArchiveFileSet).getByArchive(
2728 self.ubuntutest.main_archive,2811 self.ubuntutest.main_archive,
2729 path='dists/breezy-autotest/Contents-i386').one()2812 path='dists/breezy-autotest/Contents-i386').one()
2730 i386_date = i386_file.scheduled_deletion_date2813 self.advanceTime(
2731 self.runSteps(publisher, step_d=True)2814 absolute=i386_file.scheduled_deletion_date + timedelta(minutes=5))
2732 flush_database_caches()
2733 matchers[0] = matchers[0].update(
2734 scheduled_deletion_date=Equals(i386_date))
2735 self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
2736 self.assertThat(
2737 suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
2738
2739 # Arrange for the first file to be pruned, and delete the second
2740 # file.
2741 now = datetime.now(pytz.UTC)
2742 removeSecurityProxy(i386_file).scheduled_deletion_date = (
2743 now - timedelta(hours=1))
2744 os.unlink(suite_path('Contents-hppa'))2815 os.unlink(suite_path('Contents-hppa'))
2745 self.runSteps(publisher, step_d=True)2816 self.runSteps(publisher, step_d=True)
2746 flush_database_caches()2817 flush_database_caches()
2747 matchers = [matchers[1].update(scheduled_deletion_date=Not(Is(None)))]2818 self.assertHasSuiteFiles(
2748 self.assertThat(get_contents_files(), MatchesSetwise(*matchers))2819 ('Contents-*', 'Release'),
2820 ('Contents-hppa', self.times[4]),
2821 ('Release', self.times[3]), ('Release', self.times[4]),
2822 ('Release', None))
2823 releases.append(get_release_contents())
2749 self.assertThat(2824 self.assertThat(
2750 suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))2825 suite_path('by-hash'),
2826 ByHashHasContents(['A Contents file\n'] + releases[2:]))
27512827
2752 # Arrange for the second file to be pruned.2828 # Arrange for the second file to be pruned. This also puts us past
2829 # the stay of execution of the first two remaining Release files.
2753 hppa_file = getUtility(IArchiveFileSet).getByArchive(2830 hppa_file = getUtility(IArchiveFileSet).getByArchive(
2754 self.ubuntutest.main_archive,2831 self.ubuntutest.main_archive,
2755 path='dists/breezy-autotest/Contents-hppa').one()2832 path='dists/breezy-autotest/Contents-hppa').one()
2756 removeSecurityProxy(hppa_file).scheduled_deletion_date = (2833 self.advanceTime(
2757 now - timedelta(hours=1))2834 absolute=hppa_file.scheduled_deletion_date + timedelta(minutes=5))
2758 self.runSteps(publisher, step_d=True)2835 self.runSteps(publisher, step_d=True)
2759 flush_database_caches()2836 flush_database_caches()
2760 self.assertContentEqual([], get_contents_files())2837 self.assertHasSuiteFiles(
2761 self.assertThat(suite_path('by-hash'), Not(PathExists()))2838 ('Contents-*', 'Release'),
2839 ('Release', self.times[5]), ('Release', None))
2840 releases.append(get_release_contents())
2841 self.assertThat(suite_path('by-hash'), ByHashHasContents(releases[4:]))
27622842
2763 def test_reprieve(self):2843 def test_reprieve(self):
2764 # If a newly-modified index file is identical to a2844 # If a newly-modified index file is identical to a
@@ -2771,6 +2851,7 @@
2771 publisher = Publisher(2851 publisher = Publisher(
2772 self.logger, self.config, self.disk_pool,2852 self.logger, self.config, self.disk_pool,
2773 self.ubuntutest.main_archive)2853 self.ubuntutest.main_archive)
2854 self.setUpMockTime()
27742855
2775 # Publish empty index files.2856 # Publish empty index files.
2776 publisher.markPocketDirty(2857 publisher.markPocketDirty(
@@ -2795,15 +2876,8 @@
2795 ByHashHasContents(main_contents))2876 ByHashHasContents(main_contents))
27962877
2797 # Make the empty Sources file ready to prune.2878 # Make the empty Sources file ready to prune.
2798 old_archive_files = []2879 self.advanceTime(
2799 for archive_file in getUtility(IArchiveFileSet).getByArchive(2880 delta=timedelta(days=BY_HASH_STAY_OF_EXECUTION, hours=1))
2800 self.ubuntutest.main_archive):
2801 if ('main/source' in archive_file.path and
2802 archive_file.scheduled_deletion_date is not None):
2803 old_archive_files.append(archive_file)
2804 self.assertEqual(1, len(old_archive_files))
2805 removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
2806 datetime.now(pytz.UTC) - timedelta(hours=1))
28072881
2808 # Delete the source package so that Sources is empty again. The2882 # Delete the source package so that Sources is empty again. The
2809 # empty file is reprieved and the non-empty one is condemned.2883 # empty file is reprieved and the non-empty one is condemned.
@@ -2824,6 +2898,7 @@
2824 ]))2898 ]))
28252899
2826 def setUpPruneableSuite(self):2900 def setUpPruneableSuite(self):
2901 self.setUpMockTime()
2827 self.breezy_autotest.publish_by_hash = True2902 self.breezy_autotest.publish_by_hash = True
2828 self.breezy_autotest.advertise_by_hash = True2903 self.breezy_autotest.advertise_by_hash = True
2829 publisher = Publisher(2904 publisher = Publisher(
@@ -2832,47 +2907,50 @@
28322907
2833 suite_path = partial(2908 suite_path = partial(
2834 os.path.join, self.config.distsroot, 'breezy-autotest')2909 os.path.join, self.config.distsroot, 'breezy-autotest')
2835 main_contents = set()2910 top_contents = []
2836 for sourcename in ('foo', 'bar'):2911 main_contents = []
2912 for sourcename in ('foo', 'bar', 'baz'):
2837 self.getPubSource(2913 self.getPubSource(
2838 sourcename=sourcename, filecontent='Source: %s\n' % sourcename)2914 sourcename=sourcename, filecontent='Source: %s\n' % sourcename)
2839 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)2915 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
2916 with open(suite_path('Release'), 'rb') as f:
2917 top_contents.append(f.read())
2840 for name in ('Release', 'Sources.gz', 'Sources.bz2'):2918 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
2841 with open(suite_path('main', 'source', name), 'rb') as f:2919 with open(suite_path('main', 'source', name), 'rb') as f:
2842 main_contents.add(f.read())2920 main_contents.append(f.read())
2921 self.advanceTime(delta=timedelta(hours=6))
2843 transaction.commit()2922 transaction.commit()
28442923
2924 # We have two condemned sets of index files and one uncondemned set.
2925 # main/source/Release contains a small enough amount of information
2926 # that it doesn't change.
2927 expected_suite_files = (
2928 list(product(
2929 ('main/source/Sources.gz', 'main/source/Sources.bz2',
2930 'Release'),
2931 (self.times[1], self.times[2], None))) +
2932 [('main/source/Release', None)])
2933 self.assertHasSuiteFiles(
2934 ('main/source/*', 'Release'), *expected_suite_files)
2935 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
2845 self.assertThat(2936 self.assertThat(
2846 suite_path('main', 'source', 'by-hash'),2937 suite_path('main', 'source', 'by-hash'),
2847 ByHashHasContents(main_contents))2938 ByHashHasContents(main_contents))
2848 old_archive_files = []2939
2849 for archive_file in getUtility(IArchiveFileSet).getByArchive(2940 # Advance time to the point where the first condemned set of index
2850 self.ubuntutest.main_archive):2941 # files is scheduled for deletion.
2851 if ('main/source' in archive_file.path and2942 self.advanceTime(
2852 archive_file.scheduled_deletion_date is not None):2943 absolute=self.times[1] + timedelta(
2853 old_archive_files.append(archive_file)2944 days=BY_HASH_STAY_OF_EXECUTION, hours=1))
2854 self.assertEqual(2, len(old_archive_files))2945 del top_contents[0]
28552946 del main_contents[:3]
2856 now = datetime.now(pytz.UTC)2947
2857 removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (2948 return top_contents, main_contents
2858 now + timedelta(hours=12))
2859 removeSecurityProxy(old_archive_files[1]).scheduled_deletion_date = (
2860 now - timedelta(hours=12))
2861 old_archive_files[1].library_file.open()
2862 try:
2863 main_contents.remove(old_archive_files[1].library_file.read())
2864 finally:
2865 old_archive_files[1].library_file.close()
2866 self.assertThat(
2867 suite_path('main', 'source', 'by-hash'),
2868 Not(ByHashHasContents(main_contents)))
2869
2870 return main_contents
28712949
2872 def test_prune(self):2950 def test_prune(self):
2873 # The publisher prunes files from by-hash that were condemned more2951 # The publisher prunes files from by-hash that were condemned more
2874 # than a day ago.2952 # than a day ago.
2875 main_contents = self.setUpPruneableSuite()2953 top_contents, main_contents = self.setUpPruneableSuite()
2876 suite_path = partial(2954 suite_path = partial(
2877 os.path.join, self.config.distsroot, 'breezy-autotest')2955 os.path.join, self.config.distsroot, 'breezy-autotest')
28782956
@@ -2882,7 +2960,19 @@
2882 self.logger, self.config, self.disk_pool,2960 self.logger, self.config, self.disk_pool,
2883 self.ubuntutest.main_archive)2961 self.ubuntutest.main_archive)
2884 self.runSteps(publisher, step_a2=True, step_c=True, step_d=True)2962 self.runSteps(publisher, step_a2=True, step_c=True, step_d=True)
2963 transaction.commit()
2885 self.assertEqual(set(), publisher.dirty_pockets)2964 self.assertEqual(set(), publisher.dirty_pockets)
2965 # The condemned index files are removed, and no new Release file is
2966 # generated.
2967 expected_suite_files = (
2968 list(product(
2969 ('main/source/Sources.gz', 'main/source/Sources.bz2'),
2970 (self.times[2], None))) +
2971 [('main/source/Release', None),
2972 ('Release', self.times[2]), ('Release', None)])
2973 self.assertHasSuiteFiles(
2974 ('main/source/*', 'Release'), *expected_suite_files)
2975 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
2886 self.assertThat(2976 self.assertThat(
2887 suite_path('main', 'source', 'by-hash'),2977 suite_path('main', 'source', 'by-hash'),
2888 ByHashHasContents(main_contents))2978 ByHashHasContents(main_contents))
@@ -2890,7 +2980,7 @@
2890 def test_prune_immutable(self):2980 def test_prune_immutable(self):
2891 # The publisher prunes by-hash files from immutable suites, but2981 # The publisher prunes by-hash files from immutable suites, but
2892 # doesn't regenerate the Release file in that case.2982 # doesn't regenerate the Release file in that case.
2893 main_contents = self.setUpPruneableSuite()2983 top_contents, main_contents = self.setUpPruneableSuite()
2894 suite_path = partial(2984 suite_path = partial(
2895 os.path.join, self.config.distsroot, 'breezy-autotest')2985 os.path.join, self.config.distsroot, 'breezy-autotest')
2896 release_path = suite_path('Release')2986 release_path = suite_path('Release')
@@ -2903,8 +2993,20 @@
2903 self.logger, self.config, self.disk_pool,2993 self.logger, self.config, self.disk_pool,
2904 self.ubuntutest.main_archive)2994 self.ubuntutest.main_archive)
2905 self.runSteps(publisher, step_a2=True, step_c=True, step_d=True)2995 self.runSteps(publisher, step_a2=True, step_c=True, step_d=True)
2996 transaction.commit()
2906 self.assertEqual(set(), publisher.dirty_pockets)2997 self.assertEqual(set(), publisher.dirty_pockets)
2907 self.assertEqual(release_mtime, os.stat(release_path).st_mtime)2998 self.assertEqual(release_mtime, os.stat(release_path).st_mtime)
2999 # The condemned index files are removed, and no new Release file is
3000 # generated.
3001 expected_suite_files = (
3002 list(product(
3003 ('main/source/Sources.gz', 'main/source/Sources.bz2'),
3004 (self.times[2], None))) +
3005 [('main/source/Release', None),
3006 ('Release', self.times[2]), ('Release', None)])
3007 self.assertHasSuiteFiles(
3008 ('main/source/*', 'Release'), *expected_suite_files)
3009 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
2908 self.assertThat(3010 self.assertThat(
2909 suite_path('main', 'source', 'by-hash'),3011 suite_path('main', 'source', 'by-hash'),
2910 ByHashHasContents(main_contents))3012 ByHashHasContents(main_contents))
29113013
=== modified file 'lib/lp/soyuz/model/archivefile.py'
--- lib/lp/soyuz/model/archivefile.py 2016-04-04 10:06:33 +0000
+++ lib/lp/soyuz/model/archivefile.py 2018-03-27 23:27:31 +0000
@@ -1,4 +1,4 @@
1# Copyright 2016 Canonical Ltd. This software is licensed under the1# Copyright 2016-2018 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""A file in an archive."""4"""A file in an archive."""
@@ -33,6 +33,7 @@
33 IMasterStore,33 IMasterStore,
34 IStore,34 IStore,
35 )35 )
36from lp.services.database.sqlbase import convert_storm_clause_to_string
36from lp.services.database.stormexpr import BulkUpdate37from lp.services.database.stormexpr import BulkUpdate
37from lp.services.librarian.interfaces import ILibraryFileAliasSet38from lp.services.librarian.interfaces import ILibraryFileAliasSet
38from lp.services.librarian.model import (39from lp.services.librarian.model import (
@@ -76,6 +77,15 @@
76 self.scheduled_deletion_date = None77 self.scheduled_deletion_date = None
7778
7879
80def _now():
81 """Get the current transaction timestamp.
82
83 Tests can override this with a Storm expression or a `datetime` to
84 simulate time changes.
85 """
86 return UTC_NOW
87
88
79@implementer(IArchiveFileSet)89@implementer(IArchiveFileSet)
80class ArchiveFileSet:90class ArchiveFileSet:
81 """See `IArchiveFileSet`."""91 """See `IArchiveFileSet`."""
@@ -128,7 +138,7 @@
128 ArchiveFile.library_file == LibraryFileAlias.id,138 ArchiveFile.library_file == LibraryFileAlias.id,
129 LibraryFileAlias.content == LibraryFileContent.id,139 LibraryFileAlias.content == LibraryFileContent.id,
130 ]140 ]
131 new_date = UTC_NOW + stay_of_execution141 new_date = _now() + stay_of_execution
132 return_columns = [142 return_columns = [
133 ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]143 ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
134 return list(IMasterStore(ArchiveFile).execute(Returning(144 return list(IMasterStore(ArchiveFile).execute(Returning(
@@ -162,7 +172,7 @@
162 def getContainersToReap(archive, container_prefix=None):172 def getContainersToReap(archive, container_prefix=None):
163 clauses = [173 clauses = [
164 ArchiveFile.archive == archive,174 ArchiveFile.archive == archive,
165 ArchiveFile.scheduled_deletion_date < UTC_NOW,175 ArchiveFile.scheduled_deletion_date < _now(),
166 ]176 ]
167 if container_prefix is not None:177 if container_prefix is not None:
168 clauses.append(ArchiveFile.container.startswith(container_prefix))178 clauses.append(ArchiveFile.container.startswith(container_prefix))
@@ -175,22 +185,20 @@
175 # XXX cjwatson 2016-03-30 bug=322972: Requires manual SQL due to185 # XXX cjwatson 2016-03-30 bug=322972: Requires manual SQL due to
176 # lack of support for DELETE FROM ... USING ... in Storm.186 # lack of support for DELETE FROM ... USING ... in Storm.
177 clauses = [187 clauses = [
178 "ArchiveFile.archive = ?",188 ArchiveFile.archive == archive,
179 "ArchiveFile.scheduled_deletion_date < "189 ArchiveFile.scheduled_deletion_date < _now(),
180 "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'",190 ArchiveFile.library_file_id == LibraryFileAlias.id,
181 "ArchiveFile.library_file = LibraryFileAlias.id",191 LibraryFileAlias.contentID == LibraryFileContent.id,
182 "LibraryFileAlias.content = LibraryFileContent.id",
183 ]192 ]
184 values = [archive.id]
185 if container is not None:193 if container is not None:
186 clauses.append("ArchiveFile.container = ?")194 clauses.append(ArchiveFile.container == container)
187 values.append(container)195 where = convert_storm_clause_to_string(And(*clauses))
188 return list(IMasterStore(ArchiveFile).execute("""196 return list(IMasterStore(ArchiveFile).execute("""
189 DELETE FROM ArchiveFile197 DELETE FROM ArchiveFile
190 USING LibraryFileAlias, LibraryFileContent198 USING LibraryFileAlias, LibraryFileContent
191 WHERE """ + " AND ".join(clauses) + """199 WHERE """ + where + """
192 RETURNING200 RETURNING
193 ArchiveFile.container,201 ArchiveFile.container,
194 ArchiveFile.path,202 ArchiveFile.path,
195 LibraryFileContent.sha256203 LibraryFileContent.sha256
196 """, values))204 """))