Merge lp:~cjwatson/launchpad/archive-index-by-hash into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 17975
Proposed branch: lp:~cjwatson/launchpad/archive-index-by-hash
Merge into: lp:launchpad
Prerequisite: lp:~cjwatson/launchpad/ds-publish-by-hash
Diff against target: 1502 lines (+1079/-53)
10 files modified
lib/lp/archivepublisher/model/ftparchive.py (+6/-2)
lib/lp/archivepublisher/publishing.py (+282/-19)
lib/lp/archivepublisher/tests/test_publisher.py (+599/-1)
lib/lp/registry/model/distribution.py (+14/-2)
lib/lp/services/helpers.py (+31/-12)
lib/lp/services/librarian/interfaces/__init__.py (+1/-1)
lib/lp/services/librarian/model.py (+4/-2)
lib/lp/soyuz/interfaces/archivefile.py (+25/-1)
lib/lp/soyuz/model/archivefile.py (+63/-11)
lib/lp/soyuz/tests/test_archivefile.py (+54/-2)
To merge this branch: bzr merge lp:~cjwatson/launchpad/archive-index-by-hash
Reviewer Review Type Date Requested Status
William Grant (community) code Approve
Review via email: mp+289379@code.launchpad.net

Commit message

Add files indexed by Release to the librarian and to ArchiveFile. Publish them in by-hash directories, keeping old versions for a day.

Description of the change

Add files indexed by Release to the librarian and to ArchiveFile. Publish them in by-hash directories, keeping old versions for a day.

DistroSeries.publish_by_hash is useful so that we only do this for series with a version of apt that can make use of it, but it also serves as a circuit breaker in case something goes wrong.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Needs Fixing (code)
Revision history for this message
Colin Watson (cjwatson) :
Revision history for this message
William Grant (wgrant) :
Revision history for this message
William Grant (wgrant) :
review: Needs Fixing (code)
Revision history for this message
William Grant (wgrant) :
Revision history for this message
Colin Watson (cjwatson) wrote :

Should be worth another look now.

Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/archivepublisher/model/ftparchive.py'
--- lib/lp/archivepublisher/model/ftparchive.py 2016-02-09 15:51:19 +0000
+++ lib/lp/archivepublisher/model/ftparchive.py 2016-04-02 00:45:52 +0000
@@ -54,10 +54,14 @@
54 """Ensure that the path exists and is an empty directory."""54 """Ensure that the path exists and is an empty directory."""
55 if os.path.isdir(path):55 if os.path.isdir(path):
56 for name in os.listdir(path):56 for name in os.listdir(path):
57 if name == "by-hash":
58 # Ignore existing by-hash directories; they will be cleaned
59 # up to match the rest of the directory tree later.
60 continue
57 child_path = os.path.join(path, name)61 child_path = os.path.join(path, name)
58 # Directories containing index files should never have62 # Directories containing index files should never have
59 # subdirectories. Guard against expensive mistakes by not63 # subdirectories other than by-hash. Guard against expensive
60 # recursing here.64 # mistakes by not recursing here.
61 os.unlink(child_path)65 os.unlink(child_path)
62 else:66 else:
63 os.makedirs(path, 0o755)67 os.makedirs(path, 0o755)
6468
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py 2016-03-30 09:17:31 +0000
+++ lib/lp/archivepublisher/publishing.py 2016-04-02 00:45:52 +0000
@@ -12,7 +12,11 @@
12__metaclass__ = type12__metaclass__ = type
1313
14import bz214import bz2
15from datetime import datetime15from collections import defaultdict
16from datetime import (
17 datetime,
18 timedelta,
19 )
16import errno20import errno
17import gzip21import gzip
18import hashlib22import hashlib
@@ -31,6 +35,11 @@
31 )35 )
32from storm.expr import Desc36from storm.expr import Desc
33from zope.component import getUtility37from zope.component import getUtility
38from zope.interface import (
39 Attribute,
40 implementer,
41 Interface,
42 )
3443
35from lp.app.interfaces.launchpad import ILaunchpadCelebrities44from lp.app.interfaces.launchpad import ILaunchpadCelebrities
36from lp.archivepublisher import HARDCODED_COMPONENT_ORDER45from lp.archivepublisher import HARDCODED_COMPONENT_ORDER
@@ -64,8 +73,12 @@
64from lp.services.database.constants import UTC_NOW73from lp.services.database.constants import UTC_NOW
65from lp.services.database.interfaces import IStore74from lp.services.database.interfaces import IStore
66from lp.services.features import getFeatureFlag75from lp.services.features import getFeatureFlag
76from lp.services.helpers import filenameToContentType
67from lp.services.librarian.client import LibrarianClient77from lp.services.librarian.client import LibrarianClient
68from lp.services.osutils import open_for_writing78from lp.services.osutils import (
79 ensure_directory_exists,
80 open_for_writing,
81 )
69from lp.services.utils import file_exists82from lp.services.utils import file_exists
70from lp.soyuz.enums import (83from lp.soyuz.enums import (
71 ArchivePurpose,84 ArchivePurpose,
@@ -74,6 +87,7 @@
74 PackagePublishingStatus,87 PackagePublishingStatus,
75 )88 )
76from lp.soyuz.interfaces.archive import NoSuchPPA89from lp.soyuz.interfaces.archive import NoSuchPPA
90from lp.soyuz.interfaces.archivefile import IArchiveFileSet
77from lp.soyuz.interfaces.publishing import (91from lp.soyuz.interfaces.publishing import (
78 active_publishing_status,92 active_publishing_status,
79 IPublishingSet,93 IPublishingSet,
@@ -96,6 +110,10 @@
96 }110 }
97111
98112
113# Number of days before unreferenced files are removed from by-hash.
114BY_HASH_STAY_OF_EXECUTION = 1
115
116
99def reorder_components(components):117def reorder_components(components):
100 """Return a list of the components provided.118 """Return a list of the components provided.
101119
@@ -232,6 +250,152 @@
232 return max(len(str(item['size'])) for item in self[key])250 return max(len(str(item['size'])) for item in self[key])
233251
234252
253class IArchiveHash(Interface):
254 """Represents a hash algorithm used for index files."""
255
256 hash_factory = Attribute("A hashlib class suitable for this algorithm.")
257 deb822_name = Attribute(
258 "Algorithm name expected by debian.deb822.Release.")
259 apt_name = Attribute(
260 "Algorithm name used by apt in Release files and by-hash "
261 "subdirectories.")
262 lfc_name = Attribute(
263 "LibraryFileContent attribute name corresponding to this algorithm.")
264
265
266@implementer(IArchiveHash)
267class MD5ArchiveHash:
268 hash_factory = hashlib.md5
269 deb822_name = "md5sum"
270 apt_name = "MD5Sum"
271 lfc_name = "md5"
272
273
274@implementer(IArchiveHash)
275class SHA1ArchiveHash:
276 hash_factory = hashlib.sha1
277 deb822_name = "sha1"
278 apt_name = "SHA1"
279 lfc_name = "sha1"
280
281
282@implementer(IArchiveHash)
283class SHA256ArchiveHash:
284 hash_factory = hashlib.sha256
285 deb822_name = "sha256"
286 apt_name = "SHA256"
287 lfc_name = "sha256"
288
289
290archive_hashes = [
291 MD5ArchiveHash(),
292 SHA1ArchiveHash(),
293 SHA256ArchiveHash(),
294 ]
295
296
297class ByHash:
298 """Represents a single by-hash directory tree."""
299
300 def __init__(self, root, key, log):
301 self.root = root
302 self.path = os.path.join(root, key, "by-hash")
303 self.log = log
304 self.known_digests = defaultdict(lambda: defaultdict(set))
305
306 def add(self, name, lfa, copy_from_path=None):
307 """Ensure that by-hash entries for a single file exist.
308
309 :param name: The name of the file under this directory tree.
310 :param lfa: The `ILibraryFileAlias` to add.
311 :param copy_from_path: If not None, copy file content from here
312 rather than fetching it from the librarian. This can be used
313 for newly-added files to avoid needing to commit the transaction
314 before calling this method.
315 """
316 for archive_hash in archive_hashes:
317 digest = getattr(lfa.content, archive_hash.lfc_name)
318 digest_path = os.path.join(
319 self.path, archive_hash.apt_name, digest)
320 self.known_digests[archive_hash.apt_name][digest].add(name)
321 if not os.path.exists(digest_path):
322 self.log.debug(
323 "by-hash: Creating %s for %s" % (digest_path, name))
324 ensure_directory_exists(os.path.dirname(digest_path))
325 if copy_from_path is not None:
326 os.link(
327 os.path.join(self.root, copy_from_path), digest_path)
328 else:
329 with open(digest_path, "wb") as outfile:
330 lfa.open()
331 try:
332 shutil.copyfileobj(lfa, outfile, 4 * 1024 * 1024)
333 finally:
334 lfa.close()
335
336 def known(self, name, hashname, digest):
337 """Do we know about a file with this name and digest?"""
338 names = self.known_digests[hashname].get(digest)
339 return names is not None and name in names
340
341 def prune(self):
342 """Remove all by-hash entries that we have not been told to add.
343
344 This also removes the by-hash directory itself if no entries remain.
345 """
346 prune_directory = True
347 for archive_hash in archive_hashes:
348 hash_path = os.path.join(self.path, archive_hash.apt_name)
349 if os.path.exists(hash_path):
350 prune_hash_directory = True
351 for digest in list(os.listdir(hash_path)):
352 if digest not in self.known_digests[archive_hash.apt_name]:
353 digest_path = os.path.join(hash_path, digest)
354 self.log.debug(
355 "by-hash: Deleting unreferenced %s" % digest_path)
356 os.unlink(digest_path)
357 else:
358 prune_hash_directory = False
359 if prune_hash_directory:
360 os.rmdir(hash_path)
361 else:
362 prune_directory = False
363 if prune_directory and os.path.exists(self.path):
364 os.rmdir(self.path)
365
366
367class ByHashes:
368 """Represents all by-hash directory trees in an archive."""
369
370 def __init__(self, root, log):
371 self.root = root
372 self.log = log
373 self.children = {}
374
375 def registerChild(self, dirpath):
376 """Register a single by-hash directory.
377
378 Only directories that have been registered here will be pruned by
379 the `prune` method.
380 """
381 if dirpath not in self.children:
382 self.children[dirpath] = ByHash(self.root, dirpath, self.log)
383 return self.children[dirpath]
384
385 def add(self, path, lfa, copy_from_path=None):
386 dirpath, name = os.path.split(path)
387 self.registerChild(dirpath).add(
388 name, lfa, copy_from_path=copy_from_path)
389
390 def known(self, path, hashname, digest):
391 dirpath, name = os.path.split(path)
392 return self.registerChild(dirpath).known(name, hashname, digest)
393
394 def prune(self):
395 for child in self.children.values():
396 child.prune()
397
398
235class Publisher(object):399class Publisher(object):
236 """Publisher is the class used to provide the facility to publish400 """Publisher is the class used to provide the facility to publish
237 files in the pool of a Distribution. The publisher objects will be401 files in the pool of a Distribution. The publisher objects will be
@@ -567,10 +731,20 @@
567 Otherwise we include only pockets flagged as true in dirty_pockets.731 Otherwise we include only pockets flagged as true in dirty_pockets.
568 """732 """
569 self.log.debug("* Step D: Generating Release files.")733 self.log.debug("* Step D: Generating Release files.")
734
735 archive_file_suites = set()
736 for container in getUtility(IArchiveFileSet).getContainersToReap(
737 self.archive, container_prefix=u"release:"):
738 distroseries, pocket = self.distro.getDistroSeriesAndPocket(
739 container[len(u"release:"):])
740 archive_file_suites.add((distroseries, pocket))
741 self.release_files_needed.update(archive_file_suites)
742
570 for distroseries in self.distro:743 for distroseries in self.distro:
571 for pocket in self.archive.getPockets():744 for pocket in self.archive.getPockets():
572 if not is_careful:745 if not is_careful:
573 if not self.isDirty(distroseries, pocket):746 if (not self.isDirty(distroseries, pocket) and
747 (distroseries, pocket) not in archive_file_suites):
574 self.log.debug("Skipping release files for %s/%s" %748 self.log.debug("Skipping release files for %s/%s" %
575 (distroseries.name, pocket.name))749 (distroseries.name, pocket.name))
576 continue750 continue
@@ -811,6 +985,95 @@
811 return self.distro.displayname985 return self.distro.displayname
812 return "LP-PPA-%s" % get_ppa_reference(self.archive)986 return "LP-PPA-%s" % get_ppa_reference(self.archive)
813987
988 def _updateByHash(self, suite, release_data):
989 """Update by-hash files for a suite.
990
991 This takes Release file data which references a set of on-disk
992 files, injects any newly-modified files from that set into the
993 librarian and the ArchiveFile table, and updates the on-disk by-hash
994 directories to be in sync with ArchiveFile. Any on-disk by-hash
995 entries that ceased to be current sufficiently long ago are removed.
996 """
997 archive_file_set = getUtility(IArchiveFileSet)
998 by_hashes = ByHashes(self._config.archiveroot, self.log)
999 suite_dir = os.path.relpath(
1000 os.path.join(self._config.distsroot, suite),
1001 self._config.archiveroot)
1002 container = "release:%s" % suite
1003
1004 # Gather information on entries in the current Release file, and
1005 # make sure nothing there is condemned.
1006 current_files = {}
1007 current_sha256_checksums = set()
1008 for current_entry in release_data["SHA256"]:
1009 path = os.path.join(suite_dir, current_entry["name"])
1010 current_files[path] = (
1011 current_entry["size"], current_entry["sha256"])
1012 current_sha256_checksums.add(current_entry["sha256"])
1013 for container, path, sha256 in archive_file_set.unscheduleDeletion(
1014 self.archive, container=container,
1015 sha256_checksums=current_sha256_checksums):
1016 self.log.debug(
1017 "by-hash: Unscheduled %s for %s in %s for deletion" % (
1018 sha256, path, container))
1019
1020 # Remove any condemned files from the database whose stay of
1021 # execution has elapsed. We ensure that we know about all the
1022 # relevant by-hash directory trees before doing any removals so that
1023 # we can prune them properly later.
1024 for db_file in archive_file_set.getByArchive(
1025 self.archive, container=container):
1026 by_hashes.registerChild(os.path.dirname(db_file.path))
1027 for container, path, sha256 in archive_file_set.reap(
1028 self.archive, container=container):
1029 self.log.debug(
1030 "by-hash: Deleted %s for %s in %s" % (sha256, path, container))
1031
1032 # Ensure that all files recorded in the database are in by-hash.
1033 db_files = archive_file_set.getByArchive(
1034 self.archive, container=container, eager_load=True)
1035 for db_file in db_files:
1036 by_hashes.add(db_file.path, db_file.library_file)
1037
1038 # Condemn any database records that do not correspond to current
1039 # index files.
1040 condemned_files = set()
1041 for db_file in db_files:
1042 if db_file.scheduled_deletion_date is None:
1043 path = db_file.path
1044 if path in current_files:
1045 current_sha256 = current_files[path][1]
1046 else:
1047 current_sha256 = None
1048 if db_file.library_file.content.sha256 != current_sha256:
1049 condemned_files.add(db_file)
1050 if condemned_files:
1051 for container, path, sha256 in archive_file_set.scheduleDeletion(
1052 condemned_files,
1053 timedelta(days=BY_HASH_STAY_OF_EXECUTION)):
1054 self.log.debug(
1055 "by-hash: Scheduled %s for %s in %s for deletion" % (
1056 sha256, path, container))
1057
1058 # Ensure that all the current index files are in by-hash and have
1059 # corresponding database entries.
1060 # XXX cjwatson 2016-03-15: This should possibly use bulk creation,
1061 # although we can only avoid about a third of the queries since the
1062 # librarian client has no bulk upload methods.
1063 for path, (size, sha256) in current_files.items():
1064 full_path = os.path.join(self._config.archiveroot, path)
1065 if (os.path.exists(full_path) and
1066 not by_hashes.known(path, "SHA256", sha256)):
1067 with open(full_path, "rb") as fileobj:
1068 db_file = archive_file_set.newFromFile(
1069 self.archive, container, path, fileobj,
1070 size, filenameToContentType(path))
1071 by_hashes.add(path, db_file.library_file, copy_from_path=path)
1072
1073 # Finally, remove any files from disk that aren't recorded in the
1074 # database and aren't active.
1075 by_hashes.prune()
1076
814 def _writeReleaseFile(self, suite, release_data):1077 def _writeReleaseFile(self, suite, release_data):
815 """Write a Release file to the archive.1078 """Write a Release file to the archive.
8161079
@@ -919,9 +1182,14 @@
919 hashes = self._readIndexFileHashes(suite, filename)1182 hashes = self._readIndexFileHashes(suite, filename)
920 if hashes is None:1183 if hashes is None:
921 continue1184 continue
922 release_file.setdefault("MD5Sum", []).append(hashes["md5sum"])1185 for archive_hash in archive_hashes:
923 release_file.setdefault("SHA1", []).append(hashes["sha1"])1186 release_file.setdefault(archive_hash.apt_name, []).append(
924 release_file.setdefault("SHA256", []).append(hashes["sha256"])1187 hashes[archive_hash.deb822_name])
1188
1189 if distroseries.publish_by_hash:
1190 self._updateByHash(suite, release_file)
1191 if distroseries.advertise_by_hash:
1192 release_file["Acquire-By-Hash"] = "yes"
9251193
926 self._writeReleaseFile(suite, release_file)1194 self._writeReleaseFile(suite, release_file)
927 core_files.add("Release")1195 core_files.add("Release")
@@ -1041,16 +1309,14 @@
1041 # Schedule this for inclusion in the Release file.1309 # Schedule this for inclusion in the Release file.
1042 all_series_files.add(os.path.join(component, "i18n", "Index"))1310 all_series_files.add(os.path.join(component, "i18n", "Index"))
10431311
1044 def _readIndexFileHashes(self, distroseries_name, file_name,1312 def _readIndexFileHashes(self, suite, file_name, subpath=None):
1045 subpath=None):
1046 """Read an index file and return its hashes.1313 """Read an index file and return its hashes.
10471314
1048 :param distroseries_name: Distro series name1315 :param suite: Suite name.
1049 :param file_name: Filename relative to the parent container directory.1316 :param file_name: Filename relative to the parent container directory.
1050 :param subpath: Optional subpath within the distroseries root.1317 :param subpath: Optional subpath within the suite root. Generated
1051 Generated indexes will not include this path. If omitted,1318 indexes will not include this path. If omitted, filenames are
1052 filenames are assumed to be relative to the distroseries1319 assumed to be relative to the suite root.
1053 root.
1054 :return: A dictionary mapping hash field names to dictionaries of1320 :return: A dictionary mapping hash field names to dictionaries of
1055 their components as defined by debian.deb822.Release (e.g.1321 their components as defined by debian.deb822.Release (e.g.
1056 {"md5sum": {"md5sum": ..., "size": ..., "name": ...}}), or None1322 {"md5sum": {"md5sum": ..., "size": ..., "name": ...}}), or None
@@ -1058,8 +1324,7 @@
1058 """1324 """
1059 open_func = open1325 open_func = open
1060 full_name = os.path.join(1326 full_name = os.path.join(
1061 self._config.distsroot, distroseries_name, subpath or '.',1327 self._config.distsroot, suite, subpath or '.', file_name)
1062 file_name)
1063 if not os.path.exists(full_name):1328 if not os.path.exists(full_name):
1064 if os.path.exists(full_name + '.gz'):1329 if os.path.exists(full_name + '.gz'):
1065 open_func = gzip.open1330 open_func = gzip.open
@@ -1075,10 +1340,8 @@
1075 return None1340 return None
10761341
1077 hashes = {1342 hashes = {
1078 "md5sum": hashlib.md5(),1343 archive_hash.deb822_name: archive_hash.hash_factory()
1079 "sha1": hashlib.sha1(),1344 for archive_hash in archive_hashes}
1080 "sha256": hashlib.sha256(),
1081 }
1082 size = 01345 size = 0
1083 with open_func(full_name) as in_file:1346 with open_func(full_name) as in_file:
1084 for chunk in iter(lambda: in_file.read(256 * 1024), ""):1347 for chunk in iter(lambda: in_file.read(256 * 1024), ""):
10851348
=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py 2016-03-30 09:17:31 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py 2016-04-02 00:45:52 +0000
@@ -7,9 +7,14 @@
77
8import bz28import bz2
9import crypt9import crypt
10from datetime import (
11 datetime,
12 timedelta,
13 )
10from functools import partial14from functools import partial
11import gzip15import gzip
12import hashlib16import hashlib
17from operator import attrgetter
13import os18import os
14import shutil19import shutil
15import stat20import stat
@@ -22,9 +27,20 @@
22 import lzma27 import lzma
23except ImportError:28except ImportError:
24 from backports import lzma29 from backports import lzma
30import pytz
25from testtools.matchers import (31from testtools.matchers import (
26 ContainsAll,32 ContainsAll,
33 DirContains,
34 Equals,
35 FileContains,
36 Is,
27 LessThan,37 LessThan,
38 Matcher,
39 MatchesListwise,
40 MatchesSetwise,
41 MatchesStructure,
42 Not,
43 PathExists,
28 )44 )
29import transaction45import transaction
30from zope.component import getUtility46from zope.component import getUtility
@@ -36,6 +52,8 @@
36 IArchiveSigningKey,52 IArchiveSigningKey,
37 )53 )
38from lp.archivepublisher.publishing import (54from lp.archivepublisher.publishing import (
55 ByHash,
56 ByHashes,
39 getPublisher,57 getPublisher,
40 I18nIndex,58 I18nIndex,
41 Publisher,59 Publisher,
@@ -51,6 +69,7 @@
51from lp.registry.interfaces.series import SeriesStatus69from lp.registry.interfaces.series import SeriesStatus
52from lp.services.config import config70from lp.services.config import config
53from lp.services.database.constants import UTC_NOW71from lp.services.database.constants import UTC_NOW
72from lp.services.database.sqlbase import flush_database_caches
54from lp.services.features import getFeatureFlag73from lp.services.features import getFeatureFlag
55from lp.services.features.testing import FeatureFixture74from lp.services.features.testing import FeatureFixture
56from lp.services.gpg.interfaces import IGPGHandler75from lp.services.gpg.interfaces import IGPGHandler
@@ -69,12 +88,16 @@
69 PackageUploadStatus,88 PackageUploadStatus,
70 )89 )
71from lp.soyuz.interfaces.archive import IArchiveSet90from lp.soyuz.interfaces.archive import IArchiveSet
91from lp.soyuz.interfaces.archivefile import IArchiveFileSet
72from lp.soyuz.tests.test_publishing import TestNativePublishingBase92from lp.soyuz.tests.test_publishing import TestNativePublishingBase
73from lp.testing import TestCaseWithFactory93from lp.testing import TestCaseWithFactory
74from lp.testing.fakemethod import FakeMethod94from lp.testing.fakemethod import FakeMethod
75from lp.testing.gpgkeys import gpgkeysdir95from lp.testing.gpgkeys import gpgkeysdir
76from lp.testing.keyserver import KeyServerTac96from lp.testing.keyserver import KeyServerTac
77from lp.testing.layers import ZopelessDatabaseLayer97from lp.testing.layers import (
98 LaunchpadZopelessLayer,
99 ZopelessDatabaseLayer,
100 )
78101
79102
80RELEASE = PackagePublishingPocket.RELEASE103RELEASE = PackagePublishingPocket.RELEASE
@@ -424,6 +447,226 @@
424 'i386', publications[0].distroarchseries.architecturetag)447 'i386', publications[0].distroarchseries.architecturetag)
425448
426449
450class ByHashHasContents(Matcher):
451 """Matches if a by-hash directory has exactly the specified contents."""
452
453 def __init__(self, contents):
454 self.contents = contents
455
456 def match(self, by_hash_path):
457 mismatch = DirContains(["MD5Sum", "SHA1", "SHA256"]).match(
458 by_hash_path)
459 if mismatch is not None:
460 return mismatch
461 for hashname, hashattr in (
462 ("MD5Sum", "md5"), ("SHA1", "sha1"), ("SHA256", "sha256")):
463 digests = {
464 getattr(hashlib, hashattr)(content).hexdigest(): content
465 for content in self.contents}
466 path = os.path.join(by_hash_path, hashname)
467 mismatch = DirContains(digests.keys()).match(path)
468 if mismatch is not None:
469 return mismatch
470 for digest, content in digests.items():
471 mismatch = FileContains(content).match(
472 os.path.join(path, digest))
473 if mismatch is not None:
474 return mismatch
475
476
477class ByHashesHaveContents(Matcher):
478 """Matches if only these by-hash directories exist with proper contents."""
479
480 def __init__(self, path_contents):
481 self.path_contents = path_contents
482
483 def match(self, root):
484 children = set()
485 for dirpath, dirnames, _ in os.walk(root):
486 if "by-hash" in dirnames:
487 children.add(os.path.relpath(dirpath, root))
488 mismatch = MatchesSetwise(
489 *(Equals(path) for path in self.path_contents)).match(children)
490 if mismatch is not None:
491 return mismatch
492 for path, contents in self.path_contents.items():
493 by_hash_path = os.path.join(root, path, "by-hash")
494 mismatch = ByHashHasContents(contents).match(by_hash_path)
495 if mismatch is not None:
496 return mismatch
497
498
499class TestByHash(TestCaseWithFactory):
500 """Unit tests for details of handling a single by-hash directory tree."""
501
502 layer = LaunchpadZopelessLayer
503
504 def test_add(self):
505 root = self.makeTemporaryDirectory()
506 contents = ["abc\n", "def\n"]
507 lfas = [
508 self.factory.makeLibraryFileAlias(content=content)
509 for content in contents]
510 transaction.commit()
511 by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger())
512 for lfa in lfas:
513 by_hash.add("Sources", lfa)
514 by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
515 self.assertThat(by_hash_path, ByHashHasContents(contents))
516
517 def test_add_copy_from_path(self):
518 root = self.makeTemporaryDirectory()
519 content = "abc\n"
520 sources_path = "dists/foo/main/source/Sources"
521 with open_for_writing(
522 os.path.join(root, sources_path), "w") as sources:
523 sources.write(content)
524 lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True)
525 by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger())
526 by_hash.add("Sources", lfa, copy_from_path=sources_path)
527 by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
528 self.assertThat(by_hash_path, ByHashHasContents([content]))
529
530 def test_add_existing(self):
531 root = self.makeTemporaryDirectory()
532 content = "abc\n"
533 lfa = self.factory.makeLibraryFileAlias(content=content)
534 by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
535 for hashname, hashattr in (
536 ("MD5Sum", "md5"), ("SHA1", "sha1"), ("SHA256", "sha256")):
537 digest = getattr(hashlib, hashattr)(content).hexdigest()
538 with open_for_writing(
539 os.path.join(by_hash_path, hashname, digest), "w") as f:
540 f.write(content)
541 by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger())
542 self.assertThat(by_hash_path, ByHashHasContents([content]))
543 by_hash.add("Sources", lfa)
544 self.assertThat(by_hash_path, ByHashHasContents([content]))
545
546 def test_known(self):
547 root = self.makeTemporaryDirectory()
548 content = "abc\n"
549 with open_for_writing(os.path.join(root, "abc"), "w") as f:
550 f.write(content)
551 lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True)
552 by_hash = ByHash(root, "", DevNullLogger())
553 md5 = hashlib.md5(content).hexdigest()
554 sha1 = hashlib.sha1(content).hexdigest()
555 sha256 = hashlib.sha256(content).hexdigest()
556 self.assertFalse(by_hash.known("abc", "MD5Sum", md5))
557 self.assertFalse(by_hash.known("abc", "SHA1", sha1))
558 self.assertFalse(by_hash.known("abc", "SHA256", sha256))
559 by_hash.add("abc", lfa, copy_from_path="abc")
560 self.assertTrue(by_hash.known("abc", "MD5Sum", md5))
561 self.assertTrue(by_hash.known("abc", "SHA1", sha1))
562 self.assertTrue(by_hash.known("abc", "SHA256", sha256))
563 self.assertFalse(by_hash.known("def", "SHA256", sha256))
564 by_hash.add("def", lfa, copy_from_path="abc")
565 self.assertTrue(by_hash.known("def", "SHA256", sha256))
566
567 def test_prune(self):
568 root = self.makeTemporaryDirectory()
569 content = "abc\n"
570 sources_path = "dists/foo/main/source/Sources"
571 with open_for_writing(os.path.join(root, sources_path), "w") as f:
572 f.write(content)
573 lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True)
574 by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger())
575 by_hash.add("Sources", lfa, copy_from_path=sources_path)
576 by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
577 with open_for_writing(os.path.join(by_hash_path, "MD5Sum/0"), "w"):
578 pass
579 self.assertThat(by_hash_path, Not(ByHashHasContents([content])))
580 by_hash.prune()
581 self.assertThat(by_hash_path, ByHashHasContents([content]))
582
583 def test_prune_empty(self):
584 root = self.makeTemporaryDirectory()
585 by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger())
586 by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
587 with open_for_writing(os.path.join(by_hash_path, "MD5Sum/0"), "w"):
588 pass
589 self.assertThat(by_hash_path, PathExists())
590 by_hash.prune()
591 self.assertThat(by_hash_path, Not(PathExists()))
592
593
594class TestByHashes(TestCaseWithFactory):
595 """Unit tests for details of handling a set of by-hash directory trees."""
596
597 layer = LaunchpadZopelessLayer
598
599 def test_add(self):
600 root = self.makeTemporaryDirectory()
601 self.assertThat(root, ByHashesHaveContents({}))
602 path_contents = {
603 "dists/foo/main/source": {"Sources": "abc\n"},
604 "dists/foo/main/binary-amd64": {
605 "Packages.gz": "def\n", "Packages.xz": "ghi\n"},
606 }
607 by_hashes = ByHashes(root, DevNullLogger())
608 for dirpath, contents in path_contents.items():
609 for name, content in contents.items():
610 path = os.path.join(dirpath, name)
611 with open_for_writing(os.path.join(root, path), "w") as f:
612 f.write(content)
613 lfa = self.factory.makeLibraryFileAlias(
614 content=content, db_only=True)
615 by_hashes.add(path, lfa, copy_from_path=path)
616 self.assertThat(root, ByHashesHaveContents({
617 path: contents.values()
618 for path, contents in path_contents.items()}))
619
620 def test_known(self):
621 root = self.makeTemporaryDirectory()
622 content = "abc\n"
623 sources_path = "dists/foo/main/source/Sources"
624 with open_for_writing(os.path.join(root, sources_path), "w") as f:
625 f.write(content)
626 lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True)
627 by_hashes = ByHashes(root, DevNullLogger())
628 md5 = hashlib.md5(content).hexdigest()
629 sha1 = hashlib.sha1(content).hexdigest()
630 sha256 = hashlib.sha256(content).hexdigest()
631 self.assertFalse(by_hashes.known(sources_path, "MD5Sum", md5))
632 self.assertFalse(by_hashes.known(sources_path, "SHA1", sha1))
633 self.assertFalse(by_hashes.known(sources_path, "SHA256", sha256))
634 by_hashes.add(sources_path, lfa, copy_from_path=sources_path)
635 self.assertTrue(by_hashes.known(sources_path, "MD5Sum", md5))
636 self.assertTrue(by_hashes.known(sources_path, "SHA1", sha1))
637 self.assertTrue(by_hashes.known(sources_path, "SHA256", sha256))
638
639 def test_prune(self):
640 root = self.makeTemporaryDirectory()
641 path_contents = {
642 "dists/foo/main/source": {"Sources": "abc\n"},
643 "dists/foo/main/binary-amd64": {
644 "Packages.gz": "def\n", "Packages.xz": "ghi\n"},
645 }
646 by_hashes = ByHashes(root, DevNullLogger())
647 for dirpath, contents in path_contents.items():
648 for name, content in contents.items():
649 path = os.path.join(dirpath, name)
650 with open_for_writing(os.path.join(root, path), "w") as f:
651 f.write(content)
652 lfa = self.factory.makeLibraryFileAlias(
653 content=content, db_only=True)
654 by_hashes.add(path, lfa, copy_from_path=path)
655 strays = [
656 "dists/foo/main/source/by-hash/MD5Sum/0",
657 "dists/foo/main/binary-amd64/by-hash/MD5Sum/0",
658 ]
659 for stray in strays:
660 with open_for_writing(os.path.join(root, stray), "w"):
661 pass
662 matcher = ByHashesHaveContents({
663 path: contents.values()
664 for path, contents in path_contents.items()})
665 self.assertThat(root, Not(matcher))
666 by_hashes.prune()
667 self.assertThat(root, matcher)
668
669
427class TestPublisher(TestPublisherBase):670class TestPublisher(TestPublisherBase):
428 """Testing `Publisher` behaviour."""671 """Testing `Publisher` behaviour."""
429672
@@ -1018,6 +1261,22 @@
1018 self.assertEqual(1261 self.assertEqual(
1019 1 + old_num_pending_archives, new_num_pending_archives)1262 1 + old_num_pending_archives, new_num_pending_archives)
10201263
1264 def testPendingArchiveWithReapableFiles(self):
1265 # getPendingPublicationPPAs returns archives that have reapable
1266 # ArchiveFiles.
1267 ubuntu = getUtility(IDistributionSet)['ubuntu']
1268 archive = self.factory.makeArchive()
1269 self.assertNotIn(archive, ubuntu.getPendingPublicationPPAs())
1270 archive_file = self.factory.makeArchiveFile(archive=archive)
1271 self.assertNotIn(archive, ubuntu.getPendingPublicationPPAs())
1272 now = datetime.now(pytz.UTC)
1273 removeSecurityProxy(archive_file).scheduled_deletion_date = (
1274 now + timedelta(hours=12))
1275 self.assertNotIn(archive, ubuntu.getPendingPublicationPPAs())
1276 removeSecurityProxy(archive_file).scheduled_deletion_date = (
1277 now - timedelta(hours=12))
1278 self.assertIn(archive, ubuntu.getPendingPublicationPPAs())
1279
1021 def _checkCompressedFiles(self, archive_publisher, base_file_path,1280 def _checkCompressedFiles(self, archive_publisher, base_file_path,
1022 suffixes):1281 suffixes):
1023 """Assert that the various compressed versions of a file are equal.1282 """Assert that the various compressed versions of a file are equal.
@@ -1930,6 +2189,345 @@
1930 'Release')2189 'Release')
1931 self.assertTrue(file_exists(source_release))2190 self.assertTrue(file_exists(source_release))
19322191
2192 def testUpdateByHashDisabled(self):
2193 # The publisher does not create by-hash directories if it is
2194 # disabled in the series configuration.
2195 self.assertFalse(self.breezy_autotest.publish_by_hash)
2196 self.assertFalse(self.breezy_autotest.advertise_by_hash)
2197 publisher = Publisher(
2198 self.logger, self.config, self.disk_pool,
2199 self.ubuntutest.main_archive)
2200
2201 self.getPubSource(filecontent='Source: foo\n')
2202
2203 publisher.A_publish(False)
2204 publisher.C_doFTPArchive(False)
2205 publisher.D_writeReleaseFiles(False)
2206
2207 suite_path = partial(
2208 os.path.join, self.config.distsroot, 'breezy-autotest')
2209 self.assertThat(
2210 suite_path('main', 'source', 'by-hash'), Not(PathExists()))
2211 release = self.parseRelease(suite_path('Release'))
2212 self.assertNotIn('Acquire-By-Hash', release)
2213
2214 def testUpdateByHashUnadvertised(self):
2215 # If the series configuration sets publish_by_hash but not
2216 # advertise_by_hash, then by-hash directories are created but not
2217 # advertised in Release. This is useful for testing.
2218 self.breezy_autotest.publish_by_hash = True
2219 self.assertFalse(self.breezy_autotest.advertise_by_hash)
2220 publisher = Publisher(
2221 self.logger, self.config, self.disk_pool,
2222 self.ubuntutest.main_archive)
2223
2224 self.getPubSource(filecontent='Source: foo\n')
2225
2226 publisher.A_publish(False)
2227 publisher.C_doFTPArchive(False)
2228 publisher.D_writeReleaseFiles(False)
2229
2230 suite_path = partial(
2231 os.path.join, self.config.distsroot, 'breezy-autotest')
2232 self.assertThat(suite_path('main', 'source', 'by-hash'), PathExists())
2233 release = self.parseRelease(suite_path('Release'))
2234 self.assertNotIn('Acquire-By-Hash', release)
2235
2236 def testUpdateByHashInitial(self):
2237 # An initial publisher run populates by-hash directories and leaves
2238 # no archive files scheduled for deletion.
2239 self.breezy_autotest.publish_by_hash = True
2240 self.breezy_autotest.advertise_by_hash = True
2241 publisher = Publisher(
2242 self.logger, self.config, self.disk_pool,
2243 self.ubuntutest.main_archive)
2244
2245 self.getPubSource(filecontent='Source: foo\n')
2246
2247 publisher.A_publish(False)
2248 publisher.C_doFTPArchive(False)
2249 publisher.D_writeReleaseFiles(False)
2250 flush_database_caches()
2251
2252 suite_path = partial(
2253 os.path.join, self.config.distsroot, 'breezy-autotest')
2254 contents = set()
2255 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
2256 with open(suite_path('main', 'source', name), 'rb') as f:
2257 contents.add(f.read())
2258
2259 self.assertThat(
2260 suite_path('main', 'source', 'by-hash'),
2261 ByHashHasContents(contents))
2262
2263 archive_files = getUtility(IArchiveFileSet).getByArchive(
2264 self.ubuntutest.main_archive)
2265 self.assertNotEqual([], archive_files)
2266 self.assertEqual([], [
2267 archive_file for archive_file in archive_files
2268 if archive_file.scheduled_deletion_date is not None])
2269
2270 def testUpdateByHashSubsequent(self):
2271 # A subsequent publisher run updates by-hash directories where
2272 # necessary, and marks inactive index files for later deletion.
2273 self.breezy_autotest.publish_by_hash = True
2274 self.breezy_autotest.advertise_by_hash = True
2275 publisher = Publisher(
2276 self.logger, self.config, self.disk_pool,
2277 self.ubuntutest.main_archive)
2278
2279 self.getPubSource(filecontent='Source: foo\n')
2280
2281 publisher.A_publish(False)
2282 publisher.C_doFTPArchive(False)
2283 publisher.D_writeReleaseFiles(False)
2284
2285 suite_path = partial(
2286 os.path.join, self.config.distsroot, 'breezy-autotest')
2287 main_contents = set()
2288 universe_contents = set()
2289 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
2290 with open(suite_path('main', 'source', name), 'rb') as f:
2291 main_contents.add(f.read())
2292 with open(suite_path('universe', 'source', name), 'rb') as f:
2293 universe_contents.add(f.read())
2294
2295 self.getPubSource(sourcename='baz', filecontent='Source: baz\n')
2296
2297 publisher.A_publish(False)
2298 publisher.C_doFTPArchive(False)
2299 publisher.D_writeReleaseFiles(False)
2300 flush_database_caches()
2301
2302 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
2303 with open(suite_path('main', 'source', name), 'rb') as f:
2304 main_contents.add(f.read())
2305
2306 self.assertThat(
2307 suite_path('main', 'source', 'by-hash'),
2308 ByHashHasContents(main_contents))
2309 self.assertThat(
2310 suite_path('universe', 'source', 'by-hash'),
2311 ByHashHasContents(universe_contents))
2312
2313 archive_files = getUtility(IArchiveFileSet).getByArchive(
2314 self.ubuntutest.main_archive)
2315 self.assertContentEqual(
2316 ['dists/breezy-autotest/main/source/Sources.bz2',
2317 'dists/breezy-autotest/main/source/Sources.gz'],
2318 [archive_file.path for archive_file in archive_files
2319 if archive_file.scheduled_deletion_date is not None])
2320
2321 def testUpdateByHashIdenticalFiles(self):
2322 # Multiple identical files in the same directory receive multiple
2323 # ArchiveFile rows, even though they share a by-hash entry.
2324 self.breezy_autotest.publish_by_hash = True
2325 publisher = Publisher(
2326 self.logger, self.config, self.disk_pool,
2327 self.ubuntutest.main_archive)
2328 suite_path = partial(
2329 os.path.join, self.config.distsroot, 'breezy-autotest')
2330 get_contents_files = lambda: [
2331 archive_file
2332 for archive_file in getUtility(IArchiveFileSet).getByArchive(
2333 self.ubuntutest.main_archive)
2334 if archive_file.path.startswith('dists/breezy-autotest/Contents-')]
2335
2336 # Create the first file.
2337 with open_for_writing(suite_path('Contents-i386'), 'w') as f:
2338 f.write('A Contents file\n')
2339 publisher.markPocketDirty(
2340 self.breezy_autotest, PackagePublishingPocket.RELEASE)
2341 publisher.A_publish(False)
2342 publisher.C_doFTPArchive(False)
2343 publisher.D_writeReleaseFiles(False)
2344 flush_database_caches()
2345 matchers = [
2346 MatchesStructure(
2347 path=Equals('dists/breezy-autotest/Contents-i386'),
2348 scheduled_deletion_date=Is(None))]
2349 self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
2350 self.assertThat(
2351 suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
2352
2353 # Add a second identical file.
2354 with open_for_writing(suite_path('Contents-hppa'), 'w') as f:
2355 f.write('A Contents file\n')
2356 publisher.D_writeReleaseFiles(False)
2357 flush_database_caches()
2358 matchers.append(
2359 MatchesStructure(
2360 path=Equals('dists/breezy-autotest/Contents-hppa'),
2361 scheduled_deletion_date=Is(None)))
2362 self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
2363 self.assertThat(
2364 suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
2365
2366 # Delete the first file, but allow it its stay of execution.
2367 os.unlink(suite_path('Contents-i386'))
2368 publisher.D_writeReleaseFiles(False)
2369 flush_database_caches()
2370 matchers[0] = matchers[0].update(scheduled_deletion_date=Not(Is(None)))
2371 self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
2372 self.assertThat(
2373 suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
2374
2375 # Arrange for the first file to be pruned, and delete the second
2376 # file.
2377 now = datetime.now(pytz.UTC)
2378 i386_file = getUtility(IArchiveFileSet).getByArchive(
2379 self.ubuntutest.main_archive,
2380 path=u'dists/breezy-autotest/Contents-i386').one()
2381 removeSecurityProxy(i386_file).scheduled_deletion_date = (
2382 now - timedelta(hours=1))
2383 os.unlink(suite_path('Contents-hppa'))
2384 publisher.D_writeReleaseFiles(False)
2385 flush_database_caches()
2386 matchers = [matchers[1].update(scheduled_deletion_date=Not(Is(None)))]
2387 self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
2388 self.assertThat(
2389 suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
2390
2391 # Arrange for the second file to be pruned.
2392 hppa_file = getUtility(IArchiveFileSet).getByArchive(
2393 self.ubuntutest.main_archive,
2394 path=u'dists/breezy-autotest/Contents-hppa').one()
2395 removeSecurityProxy(hppa_file).scheduled_deletion_date = (
2396 now - timedelta(hours=1))
2397 publisher.D_writeReleaseFiles(False)
2398 flush_database_caches()
2399 self.assertContentEqual([], get_contents_files())
2400 self.assertThat(suite_path('by-hash'), Not(PathExists()))
2401
2402 def testUpdateByHashReprieve(self):
2403 # If a newly-modified index file is identical to a
2404 # previously-condemned one, then it is reprieved and not pruned.
2405 self.breezy_autotest.publish_by_hash = True
2406 # Enable uncompressed index files to avoid relying on stable output
2407 # from compressors in this test.
2408 self.breezy_autotest.index_compressors = [
2409 IndexCompressionType.UNCOMPRESSED]
2410 publisher = Publisher(
2411 self.logger, self.config, self.disk_pool,
2412 self.ubuntutest.main_archive)
2413
2414 # Publish empty index files.
2415 publisher.markPocketDirty(
2416 self.breezy_autotest, PackagePublishingPocket.RELEASE)
2417 publisher.A_publish(False)
2418 publisher.C_doFTPArchive(False)
2419 publisher.D_writeReleaseFiles(False)
2420 suite_path = partial(
2421 os.path.join, self.config.distsroot, 'breezy-autotest')
2422 main_contents = set()
2423 for name in ('Release', 'Sources'):
2424 with open(suite_path('main', 'source', name), 'rb') as f:
2425 main_contents.add(f.read())
2426
2427 # Add a source package so that Sources is non-empty.
2428 pub_source = self.getPubSource(filecontent='Source: foo\n')
2429 publisher.A_publish(False)
2430 publisher.C_doFTPArchive(False)
2431 publisher.D_writeReleaseFiles(False)
2432 transaction.commit()
2433 with open(suite_path('main', 'source', 'Sources'), 'rb') as f:
2434 main_contents.add(f.read())
2435 self.assertEqual(3, len(main_contents))
2436 self.assertThat(
2437 suite_path('main', 'source', 'by-hash'),
2438 ByHashHasContents(main_contents))
2439
2440 # Make the empty Sources file ready to prune.
2441 old_archive_files = []
2442 for archive_file in getUtility(IArchiveFileSet).getByArchive(
2443 self.ubuntutest.main_archive):
2444 if ('main/source' in archive_file.path and
2445 archive_file.scheduled_deletion_date is not None):
2446 old_archive_files.append(archive_file)
2447 self.assertEqual(1, len(old_archive_files))
2448 removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
2449 datetime.now(pytz.UTC) - timedelta(hours=1))
2450
2451 # Delete the source package so that Sources is empty again. The
2452 # empty file is reprieved and the non-empty one is condemned.
2453 pub_source.requestDeletion(self.ubuntutest.owner)
2454 publisher.A_publish(False)
2455 publisher.C_doFTPArchive(False)
2456 publisher.D_writeReleaseFiles(False)
2457 transaction.commit()
2458 self.assertThat(
2459 suite_path('main', 'source', 'by-hash'),
2460 ByHashHasContents(main_contents))
2461 archive_files = getUtility(IArchiveFileSet).getByArchive(
2462 self.ubuntutest.main_archive,
2463 path=u'dists/breezy-autotest/main/source/Sources')
2464 self.assertThat(
2465 sorted(archive_files, key=attrgetter('id')),
2466 MatchesListwise([
2467 MatchesStructure(scheduled_deletion_date=Is(None)),
2468 MatchesStructure(scheduled_deletion_date=Not(Is(None))),
2469 ]))
2470
2471 def testUpdateByHashPrune(self):
2472 # The publisher prunes files from by-hash that were condemned more
2473 # than a day ago.
2474 self.breezy_autotest.publish_by_hash = True
2475 self.breezy_autotest.advertise_by_hash = True
2476 publisher = Publisher(
2477 self.logger, self.config, self.disk_pool,
2478 self.ubuntutest.main_archive)
2479
2480 suite_path = partial(
2481 os.path.join, self.config.distsroot, 'breezy-autotest')
2482 main_contents = set()
2483 for sourcename in ('foo', 'bar'):
2484 self.getPubSource(
2485 sourcename=sourcename, filecontent='Source: %s\n' % sourcename)
2486 publisher.A_publish(False)
2487 publisher.C_doFTPArchive(False)
2488 publisher.D_writeReleaseFiles(False)
2489 for name in ('Release', 'Sources.gz', 'Sources.bz2'):
2490 with open(suite_path('main', 'source', name), 'rb') as f:
2491 main_contents.add(f.read())
2492 transaction.commit()
2493 # Undo any previous determination that breezy-autotest is dirty, so
2494 # that we can use that to check that future runs don't force index
2495 # regeneration.
2496 publisher.dirty_pockets = set()
2497
2498 self.assertThat(
2499 suite_path('main', 'source', 'by-hash'),
2500 ByHashHasContents(main_contents))
2501 old_archive_files = []
2502 for archive_file in getUtility(IArchiveFileSet).getByArchive(
2503 self.ubuntutest.main_archive):
2504 if ('main/source' in archive_file.path and
2505 archive_file.scheduled_deletion_date is not None):
2506 old_archive_files.append(archive_file)
2507 self.assertEqual(2, len(old_archive_files))
2508
2509 now = datetime.now(pytz.UTC)
2510 removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
2511 now + timedelta(hours=12))
2512 removeSecurityProxy(old_archive_files[1]).scheduled_deletion_date = (
2513 now - timedelta(hours=12))
2514 old_archive_files[1].library_file.open()
2515 try:
2516 main_contents.remove(old_archive_files[1].library_file.read())
2517 finally:
2518 old_archive_files[1].library_file.close()
2519 self.assertThat(
2520 suite_path('main', 'source', 'by-hash'),
2521 Not(ByHashHasContents(main_contents)))
2522
2523 publisher.A2_markPocketsWithDeletionsDirty()
2524 publisher.C_doFTPArchive(False)
2525 publisher.D_writeReleaseFiles(False)
2526 self.assertEqual(set(), publisher.dirty_pockets)
2527 self.assertThat(
2528 suite_path('main', 'source', 'by-hash'),
2529 ByHashHasContents(main_contents))
2530
1933 def testCreateSeriesAliasesNoAlias(self):2531 def testCreateSeriesAliasesNoAlias(self):
1934 """createSeriesAliases has nothing to do by default."""2532 """createSeriesAliases has nothing to do by default."""
1935 publisher = Publisher(2533 publisher = Publisher(
19362534
=== modified file 'lib/lp/registry/model/distribution.py'
--- lib/lp/registry/model/distribution.py 2015-10-13 13:22:08 +0000
+++ lib/lp/registry/model/distribution.py 2016-04-02 00:45:52 +0000
@@ -1,4 +1,4 @@
1# Copyright 2009-2015 Canonical Ltd. This software is licensed under the1# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""Database classes for implementing distribution items."""4"""Database classes for implementing distribution items."""
@@ -1283,10 +1283,22 @@
1283 bin_query, clauseTables=['BinaryPackagePublishingHistory'],1283 bin_query, clauseTables=['BinaryPackagePublishingHistory'],
1284 orderBy=['archive.id'], distinct=True)1284 orderBy=['archive.id'], distinct=True)
12851285
1286 reapable_af_query = """
1287 Archive.purpose = %s AND
1288 Archive.distribution = %s AND
1289 ArchiveFile.archive = archive.id AND
1290 ArchiveFile.scheduled_deletion_date < %s
1291 """ % sqlvalues(ArchivePurpose.PPA, self, UTC_NOW)
1292
1293 reapable_af_archives = Archive.select(
1294 reapable_af_query, clauseTables=['ArchiveFile'],
1295 orderBy=['archive.id'], distinct=True)
1296
1286 deleting_archives = Archive.selectBy(1297 deleting_archives = Archive.selectBy(
1287 status=ArchiveStatus.DELETING).orderBy(['archive.id'])1298 status=ArchiveStatus.DELETING).orderBy(['archive.id'])
12881299
1289 return src_archives.union(bin_archives).union(deleting_archives)1300 return src_archives.union(bin_archives).union(
1301 reapable_af_archives).union(deleting_archives)
12901302
1291 def getArchiveByComponent(self, component_name):1303 def getArchiveByComponent(self, component_name):
1292 """See `IDistribution`."""1304 """See `IDistribution`."""
12931305
=== modified file 'lib/lp/services/helpers.py'
--- lib/lp/services/helpers.py 2014-05-07 15:28:50 +0000
+++ lib/lp/services/helpers.py 2016-04-02 00:45:52 +0000
@@ -10,6 +10,7 @@
1010
11__metaclass__ = type11__metaclass__ = type
1212
13from collections import OrderedDict
13from difflib import unified_diff14from difflib import unified_diff
14import re15import re
15from StringIO import StringIO16from StringIO import StringIO
@@ -224,19 +225,37 @@
224225
225 >>> filenameToContentType('test.tgz')226 >>> filenameToContentType('test.tgz')
226 'application/octet-stream'227 'application/octet-stream'
228
229 Build logs
230 >>> filenameToContentType('buildlog.txt.gz')
231 'text/plain'
232
233 Various compressed files
234
235 >>> filenameToContentType('Packages.gz')
236 'application/x-gzip'
237 >>> filenameToContentType('Packages.bz2')
238 'application/x-bzip2'
239 >>> filenameToContentType('Packages.xz')
240 'application/x-xz'
227 """241 """
228 ftmap = {".dsc": "text/plain",242 ftmap = OrderedDict([
229 ".changes": "text/plain",243 (".dsc", "text/plain"),
230 ".deb": "application/x-debian-package",244 (".changes", "text/plain"),
231 ".udeb": "application/x-debian-package",245 (".deb", "application/x-debian-package"),
232 ".txt": "text/plain",246 (".udeb", "application/x-debian-package"),
233 # For the build master logs247 (".txt", "text/plain"),
234 ".txt.gz": "text/plain",248 # For the build master logs
235 # For live filesystem builds249 (".txt.gz", "text/plain"),
236 ".manifest": "text/plain",250 # For live filesystem builds
237 ".manifest-remove": "text/plain",251 (".manifest", "text/plain"),
238 ".size": "text/plain",252 (".manifest-remove", "text/plain"),
239 }253 (".size", "text/plain"),
254 # Compressed files
255 (".gz", "application/x-gzip"),
256 (".bz2", "application/x-bzip2"),
257 (".xz", "application/x-xz"),
258 ])
240 for ending in ftmap:259 for ending in ftmap:
241 if fname.endswith(ending):260 if fname.endswith(ending):
242 return ftmap[ending]261 return ftmap[ending]
243262
=== modified file 'lib/lp/services/librarian/interfaces/__init__.py'
--- lib/lp/services/librarian/interfaces/__init__.py 2016-03-14 16:28:19 +0000
+++ lib/lp/services/librarian/interfaces/__init__.py 2016-04-02 00:45:52 +0000
@@ -155,7 +155,7 @@
155class ILibraryFileAliasSet(Interface):155class ILibraryFileAliasSet(Interface):
156156
157 def create(name, size, file, contentType, expires=None, debugID=None,157 def create(name, size, file, contentType, expires=None, debugID=None,
158 restricted=False):158 restricted=False, allow_zero_length=False):
159 """Create a file in the Librarian, returning the new alias.159 """Create a file in the Librarian, returning the new alias.
160160
161 An expiry time of None means the file will never expire until it161 An expiry time of None means the file will never expire until it
162162
=== modified file 'lib/lp/services/librarian/model.py'
--- lib/lp/services/librarian/model.py 2016-03-14 16:28:19 +0000
+++ lib/lp/services/librarian/model.py 2016-04-02 00:45:52 +0000
@@ -244,7 +244,7 @@
244 """Create and find LibraryFileAliases."""244 """Create and find LibraryFileAliases."""
245245
246 def create(self, name, size, file, contentType, expires=None,246 def create(self, name, size, file, contentType, expires=None,
247 debugID=None, restricted=False):247 debugID=None, restricted=False, allow_zero_length=False):
248 """See `ILibraryFileAliasSet`"""248 """See `ILibraryFileAliasSet`"""
249 if restricted:249 if restricted:
250 client = getUtility(IRestrictedLibrarianClient)250 client = getUtility(IRestrictedLibrarianClient)
@@ -252,7 +252,9 @@
252 client = getUtility(ILibrarianClient)252 client = getUtility(ILibrarianClient)
253 if '/' in name:253 if '/' in name:
254 raise InvalidFilename("Filename cannot contain slashes.")254 raise InvalidFilename("Filename cannot contain slashes.")
255 fid = client.addFile(name, size, file, contentType, expires, debugID)255 fid = client.addFile(
256 name, size, file, contentType, expires=expires, debugID=debugID,
257 allow_zero_length=allow_zero_length)
256 lfa = IMasterStore(LibraryFileAlias).find(258 lfa = IMasterStore(LibraryFileAlias).find(
257 LibraryFileAlias, LibraryFileAlias.id == fid).one()259 LibraryFileAlias, LibraryFileAlias.id == fid).one()
258 assert lfa is not None, "client.addFile didn't!"260 assert lfa is not None, "client.addFile didn't!"
259261
=== modified file 'lib/lp/soyuz/interfaces/archivefile.py'
--- lib/lp/soyuz/interfaces/archivefile.py 2016-03-18 15:09:37 +0000
+++ lib/lp/soyuz/interfaces/archivefile.py 2016-04-02 00:45:52 +0000
@@ -79,13 +79,15 @@
79 :param content_type: The MIME type of the file.79 :param content_type: The MIME type of the file.
80 """80 """
8181
82 def getByArchive(archive, container=None, eager_load=False):82 def getByArchive(archive, container=None, path=None, eager_load=False):
83 """Get files in an archive.83 """Get files in an archive.
8484
85 :param archive: Return files in this `IArchive`.85 :param archive: Return files in this `IArchive`.
86 :param container: Return only files with this container.86 :param container: Return only files with this container.
87 :param path: Return only files with this path.
87 :param eager_load: If True, preload related `LibraryFileAlias` and88 :param eager_load: If True, preload related `LibraryFileAlias` and
88 `LibraryFileContent` rows.89 `LibraryFileContent` rows.
90 :return: An iterable of matched files.
89 """91 """
9092
91 def scheduleDeletion(archive_files, stay_of_execution):93 def scheduleDeletion(archive_files, stay_of_execution):
@@ -94,6 +96,25 @@
94 :param archive_files: The `IArchiveFile`s to schedule for deletion.96 :param archive_files: The `IArchiveFile`s to schedule for deletion.
95 :param stay_of_execution: A `timedelta`; schedule files for deletion97 :param stay_of_execution: A `timedelta`; schedule files for deletion
96 this amount of time in the future.98 this amount of time in the future.
99 :return: An iterable of (container, path, sha256) for files that
100 were scheduled for deletion.
101 """
102
103 def unscheduleDeletion(archive, container=None, sha256_checksums=set()):
104 """Unschedule these archive files for deletion.
105
106 This is useful in the case when the new content of a file is
107 identical to a version that was previously condemned. This method's
108 signature does not match that of `scheduleDeletion`; this is more
109 convenient because in such cases we normally do not yet have
110 `ArchiveFile` rows in hand.
111
112 :param archive: Operate on files in this `IArchive`.
113 :param container: Operate only on files with this container.
114 :param sha256_checksums: Operate only on files with any of these
115 checksums.
116 :return: An iterable of (container, path, sha256) for files that
117 were unscheduled for deletion.
97 """118 """
98119
99 def getContainersToReap(archive, container_prefix=None):120 def getContainersToReap(archive, container_prefix=None):
@@ -102,6 +123,7 @@
102 :param archive: Return containers in this `IArchive`.123 :param archive: Return containers in this `IArchive`.
103 :param container_prefix: Return only containers that start with this124 :param container_prefix: Return only containers that start with this
104 prefix.125 prefix.
126 :return: An iterable of matched container names.
105 """127 """
106128
107 def reap(archive, container=None):129 def reap(archive, container=None):
@@ -109,4 +131,6 @@
109131
110 :param archive: Delete files from this `IArchive`.132 :param archive: Delete files from this `IArchive`.
111 :param container: Delete only files with this container.133 :param container: Delete only files with this container.
134 :return: An iterable of (container, path, sha256) for files that
135 were deleted.
112 """136 """
113137
=== modified file 'lib/lp/soyuz/model/archivefile.py'
--- lib/lp/soyuz/model/archivefile.py 2016-03-18 15:09:37 +0000
+++ lib/lp/soyuz/model/archivefile.py 2016-04-02 00:45:52 +0000
@@ -14,7 +14,9 @@
14import os.path14import os.path
1515
16import pytz16import pytz
17from storm.databases.postgres import Returning
17from storm.locals import (18from storm.locals import (
19 And,
18 DateTime,20 DateTime,
19 Int,21 Int,
20 Reference,22 Reference,
@@ -31,6 +33,7 @@
31 IMasterStore,33 IMasterStore,
32 IStore,34 IStore,
33 )35 )
36from lp.services.database.stormexpr import BulkUpdate
34from lp.services.librarian.interfaces import ILibraryFileAliasSet37from lp.services.librarian.interfaces import ILibraryFileAliasSet
35from lp.services.librarian.model import (38from lp.services.librarian.model import (
36 LibraryFileAlias,39 LibraryFileAlias,
@@ -89,17 +92,19 @@
89 content_type):92 content_type):
90 library_file = getUtility(ILibraryFileAliasSet).create(93 library_file = getUtility(ILibraryFileAliasSet).create(
91 os.path.basename(path), size, fileobj, content_type,94 os.path.basename(path), size, fileobj, content_type,
92 restricted=archive.private)95 restricted=archive.private, allow_zero_length=True)
93 return cls.new(archive, container, path, library_file)96 return cls.new(archive, container, path, library_file)
9497
95 @staticmethod98 @staticmethod
96 def getByArchive(archive, container=None, eager_load=False):99 def getByArchive(archive, container=None, path=None, eager_load=False):
97 """See `IArchiveFileSet`."""100 """See `IArchiveFileSet`."""
98 clauses = [ArchiveFile.archive == archive]101 clauses = [ArchiveFile.archive == archive]
99 # XXX cjwatson 2016-03-15: We'll need some more sophisticated way to102 # XXX cjwatson 2016-03-15: We'll need some more sophisticated way to
100 # match containers once we're using them for custom uploads.103 # match containers once we're using them for custom uploads.
101 if container is not None:104 if container is not None:
102 clauses.append(ArchiveFile.container == container)105 clauses.append(ArchiveFile.container == container)
106 if path is not None:
107 clauses.append(ArchiveFile.path == path)
103 archive_files = IStore(ArchiveFile).find(ArchiveFile, *clauses)108 archive_files = IStore(ArchiveFile).find(ArchiveFile, *clauses)
104109
105 def eager_load(rows):110 def eager_load(rows):
@@ -114,11 +119,43 @@
114 @staticmethod119 @staticmethod
115 def scheduleDeletion(archive_files, stay_of_execution):120 def scheduleDeletion(archive_files, stay_of_execution):
116 """See `IArchiveFileSet`."""121 """See `IArchiveFileSet`."""
117 archive_file_ids = set(122 clauses = [
118 archive_file.id for archive_file in archive_files)123 ArchiveFile.id.is_in(
119 rows = IMasterStore(ArchiveFile).find(124 set(archive_file.id for archive_file in archive_files)),
120 ArchiveFile, ArchiveFile.id.is_in(archive_file_ids))125 ArchiveFile.library_file == LibraryFileAlias.id,
121 rows.set(scheduled_deletion_date=UTC_NOW + stay_of_execution)126 LibraryFileAlias.content == LibraryFileContent.id,
127 ]
128 new_date = UTC_NOW + stay_of_execution
129 return_columns = [
130 ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
131 return list(IMasterStore(ArchiveFile).execute(Returning(
132 BulkUpdate(
133 {ArchiveFile.scheduled_deletion_date: new_date},
134 table=ArchiveFile,
135 values=[LibraryFileAlias, LibraryFileContent],
136 where=And(*clauses)),
137 columns=return_columns)))
138
139 @staticmethod
140 def unscheduleDeletion(archive, container=None, sha256_checksums=set()):
141 """See `IArchiveFileSet`."""
142 clauses = [
143 ArchiveFile.archive == archive,
144 ArchiveFile.library_file == LibraryFileAlias.id,
145 LibraryFileAlias.content == LibraryFileContent.id,
146 LibraryFileContent.sha256.is_in(sha256_checksums),
147 ]
148 if container is not None:
149 clauses.append(ArchiveFile.container == container)
150 return_columns = [
151 ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
152 return list(IMasterStore(ArchiveFile).execute(Returning(
153 BulkUpdate(
154 {ArchiveFile.scheduled_deletion_date: None},
155 table=ArchiveFile,
156 values=[LibraryFileAlias, LibraryFileContent],
157 where=And(*clauses)),
158 columns=return_columns)))
122159
123 @staticmethod160 @staticmethod
124 def getContainersToReap(archive, container_prefix=None):161 def getContainersToReap(archive, container_prefix=None):
@@ -134,10 +171,25 @@
134 @staticmethod171 @staticmethod
135 def reap(archive, container=None):172 def reap(archive, container=None):
136 """See `IArchiveFileSet`."""173 """See `IArchiveFileSet`."""
174 # XXX cjwatson 2016-03-30 bug=322972: Requires manual SQL due to
175 # lack of support for DELETE FROM ... USING ... in Storm.
137 clauses = [176 clauses = [
138 ArchiveFile.archive == archive,177 "ArchiveFile.archive = ?",
139 ArchiveFile.scheduled_deletion_date < UTC_NOW,178 "ArchiveFile.scheduled_deletion_date < "
179 "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'",
180 "ArchiveFile.library_file = LibraryFileAlias.id",
181 "LibraryFileAlias.content = LibraryFileContent.id",
140 ]182 ]
183 values = [archive.id]
141 if container is not None:184 if container is not None:
142 clauses.append(ArchiveFile.container == container)185 clauses.append("ArchiveFile.container = ?")
143 IMasterStore(ArchiveFile).find(ArchiveFile, *clauses).remove()186 values.append(container)
187 return list(IMasterStore(ArchiveFile).execute("""
188 DELETE FROM ArchiveFile
189 USING LibraryFileAlias, LibraryFileContent
190 WHERE """ + " AND ".join(clauses) + """
191 RETURNING
192 ArchiveFile.container,
193 ArchiveFile.path,
194 LibraryFileContent.sha256
195 """, values))
144196
=== modified file 'lib/lp/soyuz/tests/test_archivefile.py'
--- lib/lp/soyuz/tests/test_archivefile.py 2016-03-18 15:09:37 +0000
+++ lib/lp/soyuz/tests/test_archivefile.py 2016-04-02 00:45:52 +0000
@@ -19,6 +19,7 @@
19from zope.component import getUtility19from zope.component import getUtility
20from zope.security.proxy import removeSecurityProxy20from zope.security.proxy import removeSecurityProxy
2121
22from lp.services.database.sqlbase import flush_database_caches
22from lp.services.osutils import open_for_writing23from lp.services.osutils import open_for_writing
23from lp.soyuz.interfaces.archivefile import IArchiveFileSet24from lp.soyuz.interfaces.archivefile import IArchiveFileSet
24from lp.testing import TestCaseWithFactory25from lp.testing import TestCaseWithFactory
@@ -75,17 +76,35 @@
75 self.assertContentEqual(76 self.assertContentEqual(
76 [], archive_file_set.getByArchive(archives[0], container="bar"))77 [], archive_file_set.getByArchive(archives[0], container="bar"))
77 self.assertContentEqual(78 self.assertContentEqual(
79 [archive_files[1]],
80 archive_file_set.getByArchive(
81 archives[0], path=archive_files[1].path))
82 self.assertContentEqual(
83 [], archive_file_set.getByArchive(archives[0], path="other"))
84 self.assertContentEqual(
78 archive_files[2:], archive_file_set.getByArchive(archives[1]))85 archive_files[2:], archive_file_set.getByArchive(archives[1]))
79 self.assertContentEqual(86 self.assertContentEqual(
80 [archive_files[3]],87 [archive_files[3]],
81 archive_file_set.getByArchive(archives[1], container="foo"))88 archive_file_set.getByArchive(archives[1], container="foo"))
82 self.assertContentEqual(89 self.assertContentEqual(
83 [], archive_file_set.getByArchive(archives[1], container="bar"))90 [], archive_file_set.getByArchive(archives[1], container="bar"))
91 self.assertContentEqual(
92 [archive_files[3]],
93 archive_file_set.getByArchive(
94 archives[1], path=archive_files[3].path))
95 self.assertContentEqual(
96 [], archive_file_set.getByArchive(archives[1], path="other"))
8497
85 def test_scheduleDeletion(self):98 def test_scheduleDeletion(self):
86 archive_files = [self.factory.makeArchiveFile() for _ in range(3)]99 archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
87 getUtility(IArchiveFileSet).scheduleDeletion(100 expected_rows = [
101 (archive_file.container, archive_file.path,
102 archive_file.library_file.content.sha256)
103 for archive_file in archive_files[:2]]
104 rows = getUtility(IArchiveFileSet).scheduleDeletion(
88 archive_files[:2], timedelta(days=1))105 archive_files[:2], timedelta(days=1))
106 self.assertContentEqual(expected_rows, rows)
107 flush_database_caches()
89 tomorrow = datetime.now(pytz.UTC) + timedelta(days=1)108 tomorrow = datetime.now(pytz.UTC) + timedelta(days=1)
90 # Allow a bit of timing slack for slow tests.109 # Allow a bit of timing slack for slow tests.
91 self.assertThat(110 self.assertThat(
@@ -96,6 +115,34 @@
96 LessThan(timedelta(minutes=5)))115 LessThan(timedelta(minutes=5)))
97 self.assertIsNone(archive_files[2].scheduled_deletion_date)116 self.assertIsNone(archive_files[2].scheduled_deletion_date)
98117
118 def test_unscheduleDeletion(self):
119 archives = [self.factory.makeArchive() for _ in range(2)]
120 lfas = [
121 self.factory.makeLibraryFileAlias(db_only=True) for _ in range(3)]
122 archive_files = []
123 for archive in archives:
124 for container in ("foo", "bar"):
125 archive_files.extend([
126 self.factory.makeArchiveFile(
127 archive=archive, container=container, library_file=lfa)
128 for lfa in lfas])
129 now = datetime.now(pytz.UTC)
130 for archive_file in archive_files:
131 removeSecurityProxy(archive_file).scheduled_deletion_date = now
132 expected_rows = [
133 ("foo", archive_files[0].path, lfas[0].content.sha256),
134 ("foo", archive_files[1].path, lfas[1].content.sha256),
135 ]
136 rows = getUtility(IArchiveFileSet).unscheduleDeletion(
137 archive=archives[0], container="foo",
138 sha256_checksums=[lfas[0].content.sha256, lfas[1].content.sha256])
139 self.assertContentEqual(expected_rows, rows)
140 flush_database_caches()
141 self.assertContentEqual(
142 [archive_files[0], archive_files[1]],
143 [archive_file for archive_file in archive_files
144 if archive_file.scheduled_deletion_date is None])
145
99 def test_getContainersToReap(self):146 def test_getContainersToReap(self):
100 archive = self.factory.makeArchive()147 archive = self.factory.makeArchive()
101 archive_files = []148 archive_files = []
@@ -149,6 +196,11 @@
149 removeSecurityProxy(archive_files[4]).scheduled_deletion_date = (196 removeSecurityProxy(archive_files[4]).scheduled_deletion_date = (
150 now - timedelta(days=1))197 now - timedelta(days=1))
151 archive_file_set = getUtility(IArchiveFileSet)198 archive_file_set = getUtility(IArchiveFileSet)
152 archive_file_set.reap(archive, container="foo")199 expected_rows = [
200 ("foo", archive_files[0].path,
201 archive_files[0].library_file.content.sha256),
202 ]
203 rows = archive_file_set.reap(archive, container="foo")
204 self.assertContentEqual(expected_rows, rows)
153 self.assertContentEqual(205 self.assertContentEqual(
154 archive_files[1:4], archive_file_set.getByArchive(archive))206 archive_files[1:4], archive_file_set.getByArchive(archive))