Merge lp:~cjwatson/launchpad/archive-index-by-hash into lp:launchpad
- archive-index-by-hash
- Merge into devel
Proposed by
Colin Watson
Status: | Merged | ||||
---|---|---|---|---|---|
Merged at revision: | 17975 | ||||
Proposed branch: | lp:~cjwatson/launchpad/archive-index-by-hash | ||||
Merge into: | lp:launchpad | ||||
Prerequisite: | lp:~cjwatson/launchpad/ds-publish-by-hash | ||||
Diff against target: |
1502 lines (+1079/-53) 10 files modified
lib/lp/archivepublisher/model/ftparchive.py (+6/-2) lib/lp/archivepublisher/publishing.py (+282/-19) lib/lp/archivepublisher/tests/test_publisher.py (+599/-1) lib/lp/registry/model/distribution.py (+14/-2) lib/lp/services/helpers.py (+31/-12) lib/lp/services/librarian/interfaces/__init__.py (+1/-1) lib/lp/services/librarian/model.py (+4/-2) lib/lp/soyuz/interfaces/archivefile.py (+25/-1) lib/lp/soyuz/model/archivefile.py (+63/-11) lib/lp/soyuz/tests/test_archivefile.py (+54/-2) |
||||
To merge this branch: | bzr merge lp:~cjwatson/launchpad/archive-index-by-hash | ||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
William Grant | code | Approve | |
Review via email: mp+289379@code.launchpad.net |
Commit message
Add files indexed by Release to the librarian and to ArchiveFile. Publish them in by-hash directories, keeping old versions for a day.
Description of the change
Add files indexed by Release to the librarian and to ArchiveFile. Publish them in by-hash directories, keeping old versions for a day.
DistroSeries.
To post a comment you must log in.
Revision history for this message
William Grant (wgrant) : | # |
review:
Needs Fixing
(code)
Revision history for this message
Colin Watson (cjwatson) : | # |
Revision history for this message
William Grant (wgrant) : | # |
Revision history for this message
William Grant (wgrant) : | # |
review:
Needs Fixing
(code)
Revision history for this message
William Grant (wgrant) : | # |
Revision history for this message
Colin Watson (cjwatson) wrote : | # |
Revision history for this message
William Grant (wgrant) : | # |
review:
Approve
(code)
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lib/lp/archivepublisher/model/ftparchive.py' |
2 | --- lib/lp/archivepublisher/model/ftparchive.py 2016-02-09 15:51:19 +0000 |
3 | +++ lib/lp/archivepublisher/model/ftparchive.py 2016-04-02 00:45:52 +0000 |
4 | @@ -54,10 +54,14 @@ |
5 | """Ensure that the path exists and is an empty directory.""" |
6 | if os.path.isdir(path): |
7 | for name in os.listdir(path): |
8 | + if name == "by-hash": |
9 | + # Ignore existing by-hash directories; they will be cleaned |
10 | + # up to match the rest of the directory tree later. |
11 | + continue |
12 | child_path = os.path.join(path, name) |
13 | # Directories containing index files should never have |
14 | - # subdirectories. Guard against expensive mistakes by not |
15 | - # recursing here. |
16 | + # subdirectories other than by-hash. Guard against expensive |
17 | + # mistakes by not recursing here. |
18 | os.unlink(child_path) |
19 | else: |
20 | os.makedirs(path, 0o755) |
21 | |
22 | === modified file 'lib/lp/archivepublisher/publishing.py' |
23 | --- lib/lp/archivepublisher/publishing.py 2016-03-30 09:17:31 +0000 |
24 | +++ lib/lp/archivepublisher/publishing.py 2016-04-02 00:45:52 +0000 |
25 | @@ -12,7 +12,11 @@ |
26 | __metaclass__ = type |
27 | |
28 | import bz2 |
29 | -from datetime import datetime |
30 | +from collections import defaultdict |
31 | +from datetime import ( |
32 | + datetime, |
33 | + timedelta, |
34 | + ) |
35 | import errno |
36 | import gzip |
37 | import hashlib |
38 | @@ -31,6 +35,11 @@ |
39 | ) |
40 | from storm.expr import Desc |
41 | from zope.component import getUtility |
42 | +from zope.interface import ( |
43 | + Attribute, |
44 | + implementer, |
45 | + Interface, |
46 | + ) |
47 | |
48 | from lp.app.interfaces.launchpad import ILaunchpadCelebrities |
49 | from lp.archivepublisher import HARDCODED_COMPONENT_ORDER |
50 | @@ -64,8 +73,12 @@ |
51 | from lp.services.database.constants import UTC_NOW |
52 | from lp.services.database.interfaces import IStore |
53 | from lp.services.features import getFeatureFlag |
54 | +from lp.services.helpers import filenameToContentType |
55 | from lp.services.librarian.client import LibrarianClient |
56 | -from lp.services.osutils import open_for_writing |
57 | +from lp.services.osutils import ( |
58 | + ensure_directory_exists, |
59 | + open_for_writing, |
60 | + ) |
61 | from lp.services.utils import file_exists |
62 | from lp.soyuz.enums import ( |
63 | ArchivePurpose, |
64 | @@ -74,6 +87,7 @@ |
65 | PackagePublishingStatus, |
66 | ) |
67 | from lp.soyuz.interfaces.archive import NoSuchPPA |
68 | +from lp.soyuz.interfaces.archivefile import IArchiveFileSet |
69 | from lp.soyuz.interfaces.publishing import ( |
70 | active_publishing_status, |
71 | IPublishingSet, |
72 | @@ -96,6 +110,10 @@ |
73 | } |
74 | |
75 | |
76 | +# Number of days before unreferenced files are removed from by-hash. |
77 | +BY_HASH_STAY_OF_EXECUTION = 1 |
78 | + |
79 | + |
80 | def reorder_components(components): |
81 | """Return a list of the components provided. |
82 | |
83 | @@ -232,6 +250,152 @@ |
84 | return max(len(str(item['size'])) for item in self[key]) |
85 | |
86 | |
87 | +class IArchiveHash(Interface): |
88 | + """Represents a hash algorithm used for index files.""" |
89 | + |
90 | + hash_factory = Attribute("A hashlib class suitable for this algorithm.") |
91 | + deb822_name = Attribute( |
92 | + "Algorithm name expected by debian.deb822.Release.") |
93 | + apt_name = Attribute( |
94 | + "Algorithm name used by apt in Release files and by-hash " |
95 | + "subdirectories.") |
96 | + lfc_name = Attribute( |
97 | + "LibraryFileContent attribute name corresponding to this algorithm.") |
98 | + |
99 | + |
100 | +@implementer(IArchiveHash) |
101 | +class MD5ArchiveHash: |
102 | + hash_factory = hashlib.md5 |
103 | + deb822_name = "md5sum" |
104 | + apt_name = "MD5Sum" |
105 | + lfc_name = "md5" |
106 | + |
107 | + |
108 | +@implementer(IArchiveHash) |
109 | +class SHA1ArchiveHash: |
110 | + hash_factory = hashlib.sha1 |
111 | + deb822_name = "sha1" |
112 | + apt_name = "SHA1" |
113 | + lfc_name = "sha1" |
114 | + |
115 | + |
116 | +@implementer(IArchiveHash) |
117 | +class SHA256ArchiveHash: |
118 | + hash_factory = hashlib.sha256 |
119 | + deb822_name = "sha256" |
120 | + apt_name = "SHA256" |
121 | + lfc_name = "sha256" |
122 | + |
123 | + |
124 | +archive_hashes = [ |
125 | + MD5ArchiveHash(), |
126 | + SHA1ArchiveHash(), |
127 | + SHA256ArchiveHash(), |
128 | + ] |
129 | + |
130 | + |
131 | +class ByHash: |
132 | + """Represents a single by-hash directory tree.""" |
133 | + |
134 | + def __init__(self, root, key, log): |
135 | + self.root = root |
136 | + self.path = os.path.join(root, key, "by-hash") |
137 | + self.log = log |
138 | + self.known_digests = defaultdict(lambda: defaultdict(set)) |
139 | + |
140 | + def add(self, name, lfa, copy_from_path=None): |
141 | + """Ensure that by-hash entries for a single file exist. |
142 | + |
143 | + :param name: The name of the file under this directory tree. |
144 | + :param lfa: The `ILibraryFileAlias` to add. |
145 | + :param copy_from_path: If not None, copy file content from here |
146 | + rather than fetching it from the librarian. This can be used |
147 | + for newly-added files to avoid needing to commit the transaction |
148 | + before calling this method. |
149 | + """ |
150 | + for archive_hash in archive_hashes: |
151 | + digest = getattr(lfa.content, archive_hash.lfc_name) |
152 | + digest_path = os.path.join( |
153 | + self.path, archive_hash.apt_name, digest) |
154 | + self.known_digests[archive_hash.apt_name][digest].add(name) |
155 | + if not os.path.exists(digest_path): |
156 | + self.log.debug( |
157 | + "by-hash: Creating %s for %s" % (digest_path, name)) |
158 | + ensure_directory_exists(os.path.dirname(digest_path)) |
159 | + if copy_from_path is not None: |
160 | + os.link( |
161 | + os.path.join(self.root, copy_from_path), digest_path) |
162 | + else: |
163 | + with open(digest_path, "wb") as outfile: |
164 | + lfa.open() |
165 | + try: |
166 | + shutil.copyfileobj(lfa, outfile, 4 * 1024 * 1024) |
167 | + finally: |
168 | + lfa.close() |
169 | + |
170 | + def known(self, name, hashname, digest): |
171 | + """Do we know about a file with this name and digest?""" |
172 | + names = self.known_digests[hashname].get(digest) |
173 | + return names is not None and name in names |
174 | + |
175 | + def prune(self): |
176 | + """Remove all by-hash entries that we have not been told to add. |
177 | + |
178 | + This also removes the by-hash directory itself if no entries remain. |
179 | + """ |
180 | + prune_directory = True |
181 | + for archive_hash in archive_hashes: |
182 | + hash_path = os.path.join(self.path, archive_hash.apt_name) |
183 | + if os.path.exists(hash_path): |
184 | + prune_hash_directory = True |
185 | + for digest in list(os.listdir(hash_path)): |
186 | + if digest not in self.known_digests[archive_hash.apt_name]: |
187 | + digest_path = os.path.join(hash_path, digest) |
188 | + self.log.debug( |
189 | + "by-hash: Deleting unreferenced %s" % digest_path) |
190 | + os.unlink(digest_path) |
191 | + else: |
192 | + prune_hash_directory = False |
193 | + if prune_hash_directory: |
194 | + os.rmdir(hash_path) |
195 | + else: |
196 | + prune_directory = False |
197 | + if prune_directory and os.path.exists(self.path): |
198 | + os.rmdir(self.path) |
199 | + |
200 | + |
201 | +class ByHashes: |
202 | + """Represents all by-hash directory trees in an archive.""" |
203 | + |
204 | + def __init__(self, root, log): |
205 | + self.root = root |
206 | + self.log = log |
207 | + self.children = {} |
208 | + |
209 | + def registerChild(self, dirpath): |
210 | + """Register a single by-hash directory. |
211 | + |
212 | + Only directories that have been registered here will be pruned by |
213 | + the `prune` method. |
214 | + """ |
215 | + if dirpath not in self.children: |
216 | + self.children[dirpath] = ByHash(self.root, dirpath, self.log) |
217 | + return self.children[dirpath] |
218 | + |
219 | + def add(self, path, lfa, copy_from_path=None): |
220 | + dirpath, name = os.path.split(path) |
221 | + self.registerChild(dirpath).add( |
222 | + name, lfa, copy_from_path=copy_from_path) |
223 | + |
224 | + def known(self, path, hashname, digest): |
225 | + dirpath, name = os.path.split(path) |
226 | + return self.registerChild(dirpath).known(name, hashname, digest) |
227 | + |
228 | + def prune(self): |
229 | + for child in self.children.values(): |
230 | + child.prune() |
231 | + |
232 | + |
233 | class Publisher(object): |
234 | """Publisher is the class used to provide the facility to publish |
235 | files in the pool of a Distribution. The publisher objects will be |
236 | @@ -567,10 +731,20 @@ |
237 | Otherwise we include only pockets flagged as true in dirty_pockets. |
238 | """ |
239 | self.log.debug("* Step D: Generating Release files.") |
240 | + |
241 | + archive_file_suites = set() |
242 | + for container in getUtility(IArchiveFileSet).getContainersToReap( |
243 | + self.archive, container_prefix=u"release:"): |
244 | + distroseries, pocket = self.distro.getDistroSeriesAndPocket( |
245 | + container[len(u"release:"):]) |
246 | + archive_file_suites.add((distroseries, pocket)) |
247 | + self.release_files_needed.update(archive_file_suites) |
248 | + |
249 | for distroseries in self.distro: |
250 | for pocket in self.archive.getPockets(): |
251 | if not is_careful: |
252 | - if not self.isDirty(distroseries, pocket): |
253 | + if (not self.isDirty(distroseries, pocket) and |
254 | + (distroseries, pocket) not in archive_file_suites): |
255 | self.log.debug("Skipping release files for %s/%s" % |
256 | (distroseries.name, pocket.name)) |
257 | continue |
258 | @@ -811,6 +985,95 @@ |
259 | return self.distro.displayname |
260 | return "LP-PPA-%s" % get_ppa_reference(self.archive) |
261 | |
262 | + def _updateByHash(self, suite, release_data): |
263 | + """Update by-hash files for a suite. |
264 | + |
265 | + This takes Release file data which references a set of on-disk |
266 | + files, injects any newly-modified files from that set into the |
267 | + librarian and the ArchiveFile table, and updates the on-disk by-hash |
268 | + directories to be in sync with ArchiveFile. Any on-disk by-hash |
269 | + entries that ceased to be current sufficiently long ago are removed. |
270 | + """ |
271 | + archive_file_set = getUtility(IArchiveFileSet) |
272 | + by_hashes = ByHashes(self._config.archiveroot, self.log) |
273 | + suite_dir = os.path.relpath( |
274 | + os.path.join(self._config.distsroot, suite), |
275 | + self._config.archiveroot) |
276 | + container = "release:%s" % suite |
277 | + |
278 | + # Gather information on entries in the current Release file, and |
279 | + # make sure nothing there is condemned. |
280 | + current_files = {} |
281 | + current_sha256_checksums = set() |
282 | + for current_entry in release_data["SHA256"]: |
283 | + path = os.path.join(suite_dir, current_entry["name"]) |
284 | + current_files[path] = ( |
285 | + current_entry["size"], current_entry["sha256"]) |
286 | + current_sha256_checksums.add(current_entry["sha256"]) |
287 | + for container, path, sha256 in archive_file_set.unscheduleDeletion( |
288 | + self.archive, container=container, |
289 | + sha256_checksums=current_sha256_checksums): |
290 | + self.log.debug( |
291 | + "by-hash: Unscheduled %s for %s in %s for deletion" % ( |
292 | + sha256, path, container)) |
293 | + |
294 | + # Remove any condemned files from the database whose stay of |
295 | + # execution has elapsed. We ensure that we know about all the |
296 | + # relevant by-hash directory trees before doing any removals so that |
297 | + # we can prune them properly later. |
298 | + for db_file in archive_file_set.getByArchive( |
299 | + self.archive, container=container): |
300 | + by_hashes.registerChild(os.path.dirname(db_file.path)) |
301 | + for container, path, sha256 in archive_file_set.reap( |
302 | + self.archive, container=container): |
303 | + self.log.debug( |
304 | + "by-hash: Deleted %s for %s in %s" % (sha256, path, container)) |
305 | + |
306 | + # Ensure that all files recorded in the database are in by-hash. |
307 | + db_files = archive_file_set.getByArchive( |
308 | + self.archive, container=container, eager_load=True) |
309 | + for db_file in db_files: |
310 | + by_hashes.add(db_file.path, db_file.library_file) |
311 | + |
312 | + # Condemn any database records that do not correspond to current |
313 | + # index files. |
314 | + condemned_files = set() |
315 | + for db_file in db_files: |
316 | + if db_file.scheduled_deletion_date is None: |
317 | + path = db_file.path |
318 | + if path in current_files: |
319 | + current_sha256 = current_files[path][1] |
320 | + else: |
321 | + current_sha256 = None |
322 | + if db_file.library_file.content.sha256 != current_sha256: |
323 | + condemned_files.add(db_file) |
324 | + if condemned_files: |
325 | + for container, path, sha256 in archive_file_set.scheduleDeletion( |
326 | + condemned_files, |
327 | + timedelta(days=BY_HASH_STAY_OF_EXECUTION)): |
328 | + self.log.debug( |
329 | + "by-hash: Scheduled %s for %s in %s for deletion" % ( |
330 | + sha256, path, container)) |
331 | + |
332 | + # Ensure that all the current index files are in by-hash and have |
333 | + # corresponding database entries. |
334 | + # XXX cjwatson 2016-03-15: This should possibly use bulk creation, |
335 | + # although we can only avoid about a third of the queries since the |
336 | + # librarian client has no bulk upload methods. |
337 | + for path, (size, sha256) in current_files.items(): |
338 | + full_path = os.path.join(self._config.archiveroot, path) |
339 | + if (os.path.exists(full_path) and |
340 | + not by_hashes.known(path, "SHA256", sha256)): |
341 | + with open(full_path, "rb") as fileobj: |
342 | + db_file = archive_file_set.newFromFile( |
343 | + self.archive, container, path, fileobj, |
344 | + size, filenameToContentType(path)) |
345 | + by_hashes.add(path, db_file.library_file, copy_from_path=path) |
346 | + |
347 | + # Finally, remove any files from disk that aren't recorded in the |
348 | + # database and aren't active. |
349 | + by_hashes.prune() |
350 | + |
351 | def _writeReleaseFile(self, suite, release_data): |
352 | """Write a Release file to the archive. |
353 | |
354 | @@ -919,9 +1182,14 @@ |
355 | hashes = self._readIndexFileHashes(suite, filename) |
356 | if hashes is None: |
357 | continue |
358 | - release_file.setdefault("MD5Sum", []).append(hashes["md5sum"]) |
359 | - release_file.setdefault("SHA1", []).append(hashes["sha1"]) |
360 | - release_file.setdefault("SHA256", []).append(hashes["sha256"]) |
361 | + for archive_hash in archive_hashes: |
362 | + release_file.setdefault(archive_hash.apt_name, []).append( |
363 | + hashes[archive_hash.deb822_name]) |
364 | + |
365 | + if distroseries.publish_by_hash: |
366 | + self._updateByHash(suite, release_file) |
367 | + if distroseries.advertise_by_hash: |
368 | + release_file["Acquire-By-Hash"] = "yes" |
369 | |
370 | self._writeReleaseFile(suite, release_file) |
371 | core_files.add("Release") |
372 | @@ -1041,16 +1309,14 @@ |
373 | # Schedule this for inclusion in the Release file. |
374 | all_series_files.add(os.path.join(component, "i18n", "Index")) |
375 | |
376 | - def _readIndexFileHashes(self, distroseries_name, file_name, |
377 | - subpath=None): |
378 | + def _readIndexFileHashes(self, suite, file_name, subpath=None): |
379 | """Read an index file and return its hashes. |
380 | |
381 | - :param distroseries_name: Distro series name |
382 | + :param suite: Suite name. |
383 | :param file_name: Filename relative to the parent container directory. |
384 | - :param subpath: Optional subpath within the distroseries root. |
385 | - Generated indexes will not include this path. If omitted, |
386 | - filenames are assumed to be relative to the distroseries |
387 | - root. |
388 | + :param subpath: Optional subpath within the suite root. Generated |
389 | + indexes will not include this path. If omitted, filenames are |
390 | + assumed to be relative to the suite root. |
391 | :return: A dictionary mapping hash field names to dictionaries of |
392 | their components as defined by debian.deb822.Release (e.g. |
393 | {"md5sum": {"md5sum": ..., "size": ..., "name": ...}}), or None |
394 | @@ -1058,8 +1324,7 @@ |
395 | """ |
396 | open_func = open |
397 | full_name = os.path.join( |
398 | - self._config.distsroot, distroseries_name, subpath or '.', |
399 | - file_name) |
400 | + self._config.distsroot, suite, subpath or '.', file_name) |
401 | if not os.path.exists(full_name): |
402 | if os.path.exists(full_name + '.gz'): |
403 | open_func = gzip.open |
404 | @@ -1075,10 +1340,8 @@ |
405 | return None |
406 | |
407 | hashes = { |
408 | - "md5sum": hashlib.md5(), |
409 | - "sha1": hashlib.sha1(), |
410 | - "sha256": hashlib.sha256(), |
411 | - } |
412 | + archive_hash.deb822_name: archive_hash.hash_factory() |
413 | + for archive_hash in archive_hashes} |
414 | size = 0 |
415 | with open_func(full_name) as in_file: |
416 | for chunk in iter(lambda: in_file.read(256 * 1024), ""): |
417 | |
418 | === modified file 'lib/lp/archivepublisher/tests/test_publisher.py' |
419 | --- lib/lp/archivepublisher/tests/test_publisher.py 2016-03-30 09:17:31 +0000 |
420 | +++ lib/lp/archivepublisher/tests/test_publisher.py 2016-04-02 00:45:52 +0000 |
421 | @@ -7,9 +7,14 @@ |
422 | |
423 | import bz2 |
424 | import crypt |
425 | +from datetime import ( |
426 | + datetime, |
427 | + timedelta, |
428 | + ) |
429 | from functools import partial |
430 | import gzip |
431 | import hashlib |
432 | +from operator import attrgetter |
433 | import os |
434 | import shutil |
435 | import stat |
436 | @@ -22,9 +27,20 @@ |
437 | import lzma |
438 | except ImportError: |
439 | from backports import lzma |
440 | +import pytz |
441 | from testtools.matchers import ( |
442 | ContainsAll, |
443 | + DirContains, |
444 | + Equals, |
445 | + FileContains, |
446 | + Is, |
447 | LessThan, |
448 | + Matcher, |
449 | + MatchesListwise, |
450 | + MatchesSetwise, |
451 | + MatchesStructure, |
452 | + Not, |
453 | + PathExists, |
454 | ) |
455 | import transaction |
456 | from zope.component import getUtility |
457 | @@ -36,6 +52,8 @@ |
458 | IArchiveSigningKey, |
459 | ) |
460 | from lp.archivepublisher.publishing import ( |
461 | + ByHash, |
462 | + ByHashes, |
463 | getPublisher, |
464 | I18nIndex, |
465 | Publisher, |
466 | @@ -51,6 +69,7 @@ |
467 | from lp.registry.interfaces.series import SeriesStatus |
468 | from lp.services.config import config |
469 | from lp.services.database.constants import UTC_NOW |
470 | +from lp.services.database.sqlbase import flush_database_caches |
471 | from lp.services.features import getFeatureFlag |
472 | from lp.services.features.testing import FeatureFixture |
473 | from lp.services.gpg.interfaces import IGPGHandler |
474 | @@ -69,12 +88,16 @@ |
475 | PackageUploadStatus, |
476 | ) |
477 | from lp.soyuz.interfaces.archive import IArchiveSet |
478 | +from lp.soyuz.interfaces.archivefile import IArchiveFileSet |
479 | from lp.soyuz.tests.test_publishing import TestNativePublishingBase |
480 | from lp.testing import TestCaseWithFactory |
481 | from lp.testing.fakemethod import FakeMethod |
482 | from lp.testing.gpgkeys import gpgkeysdir |
483 | from lp.testing.keyserver import KeyServerTac |
484 | -from lp.testing.layers import ZopelessDatabaseLayer |
485 | +from lp.testing.layers import ( |
486 | + LaunchpadZopelessLayer, |
487 | + ZopelessDatabaseLayer, |
488 | + ) |
489 | |
490 | |
491 | RELEASE = PackagePublishingPocket.RELEASE |
492 | @@ -424,6 +447,226 @@ |
493 | 'i386', publications[0].distroarchseries.architecturetag) |
494 | |
495 | |
496 | +class ByHashHasContents(Matcher): |
497 | + """Matches if a by-hash directory has exactly the specified contents.""" |
498 | + |
499 | + def __init__(self, contents): |
500 | + self.contents = contents |
501 | + |
502 | + def match(self, by_hash_path): |
503 | + mismatch = DirContains(["MD5Sum", "SHA1", "SHA256"]).match( |
504 | + by_hash_path) |
505 | + if mismatch is not None: |
506 | + return mismatch |
507 | + for hashname, hashattr in ( |
508 | + ("MD5Sum", "md5"), ("SHA1", "sha1"), ("SHA256", "sha256")): |
509 | + digests = { |
510 | + getattr(hashlib, hashattr)(content).hexdigest(): content |
511 | + for content in self.contents} |
512 | + path = os.path.join(by_hash_path, hashname) |
513 | + mismatch = DirContains(digests.keys()).match(path) |
514 | + if mismatch is not None: |
515 | + return mismatch |
516 | + for digest, content in digests.items(): |
517 | + mismatch = FileContains(content).match( |
518 | + os.path.join(path, digest)) |
519 | + if mismatch is not None: |
520 | + return mismatch |
521 | + |
522 | + |
523 | +class ByHashesHaveContents(Matcher): |
524 | + """Matches if only these by-hash directories exist with proper contents.""" |
525 | + |
526 | + def __init__(self, path_contents): |
527 | + self.path_contents = path_contents |
528 | + |
529 | + def match(self, root): |
530 | + children = set() |
531 | + for dirpath, dirnames, _ in os.walk(root): |
532 | + if "by-hash" in dirnames: |
533 | + children.add(os.path.relpath(dirpath, root)) |
534 | + mismatch = MatchesSetwise( |
535 | + *(Equals(path) for path in self.path_contents)).match(children) |
536 | + if mismatch is not None: |
537 | + return mismatch |
538 | + for path, contents in self.path_contents.items(): |
539 | + by_hash_path = os.path.join(root, path, "by-hash") |
540 | + mismatch = ByHashHasContents(contents).match(by_hash_path) |
541 | + if mismatch is not None: |
542 | + return mismatch |
543 | + |
544 | + |
545 | +class TestByHash(TestCaseWithFactory): |
546 | + """Unit tests for details of handling a single by-hash directory tree.""" |
547 | + |
548 | + layer = LaunchpadZopelessLayer |
549 | + |
550 | + def test_add(self): |
551 | + root = self.makeTemporaryDirectory() |
552 | + contents = ["abc\n", "def\n"] |
553 | + lfas = [ |
554 | + self.factory.makeLibraryFileAlias(content=content) |
555 | + for content in contents] |
556 | + transaction.commit() |
557 | + by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger()) |
558 | + for lfa in lfas: |
559 | + by_hash.add("Sources", lfa) |
560 | + by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash") |
561 | + self.assertThat(by_hash_path, ByHashHasContents(contents)) |
562 | + |
563 | + def test_add_copy_from_path(self): |
564 | + root = self.makeTemporaryDirectory() |
565 | + content = "abc\n" |
566 | + sources_path = "dists/foo/main/source/Sources" |
567 | + with open_for_writing( |
568 | + os.path.join(root, sources_path), "w") as sources: |
569 | + sources.write(content) |
570 | + lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True) |
571 | + by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger()) |
572 | + by_hash.add("Sources", lfa, copy_from_path=sources_path) |
573 | + by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash") |
574 | + self.assertThat(by_hash_path, ByHashHasContents([content])) |
575 | + |
576 | + def test_add_existing(self): |
577 | + root = self.makeTemporaryDirectory() |
578 | + content = "abc\n" |
579 | + lfa = self.factory.makeLibraryFileAlias(content=content) |
580 | + by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash") |
581 | + for hashname, hashattr in ( |
582 | + ("MD5Sum", "md5"), ("SHA1", "sha1"), ("SHA256", "sha256")): |
583 | + digest = getattr(hashlib, hashattr)(content).hexdigest() |
584 | + with open_for_writing( |
585 | + os.path.join(by_hash_path, hashname, digest), "w") as f: |
586 | + f.write(content) |
587 | + by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger()) |
588 | + self.assertThat(by_hash_path, ByHashHasContents([content])) |
589 | + by_hash.add("Sources", lfa) |
590 | + self.assertThat(by_hash_path, ByHashHasContents([content])) |
591 | + |
592 | + def test_known(self): |
593 | + root = self.makeTemporaryDirectory() |
594 | + content = "abc\n" |
595 | + with open_for_writing(os.path.join(root, "abc"), "w") as f: |
596 | + f.write(content) |
597 | + lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True) |
598 | + by_hash = ByHash(root, "", DevNullLogger()) |
599 | + md5 = hashlib.md5(content).hexdigest() |
600 | + sha1 = hashlib.sha1(content).hexdigest() |
601 | + sha256 = hashlib.sha256(content).hexdigest() |
602 | + self.assertFalse(by_hash.known("abc", "MD5Sum", md5)) |
603 | + self.assertFalse(by_hash.known("abc", "SHA1", sha1)) |
604 | + self.assertFalse(by_hash.known("abc", "SHA256", sha256)) |
605 | + by_hash.add("abc", lfa, copy_from_path="abc") |
606 | + self.assertTrue(by_hash.known("abc", "MD5Sum", md5)) |
607 | + self.assertTrue(by_hash.known("abc", "SHA1", sha1)) |
608 | + self.assertTrue(by_hash.known("abc", "SHA256", sha256)) |
609 | + self.assertFalse(by_hash.known("def", "SHA256", sha256)) |
610 | + by_hash.add("def", lfa, copy_from_path="abc") |
611 | + self.assertTrue(by_hash.known("def", "SHA256", sha256)) |
612 | + |
613 | + def test_prune(self): |
614 | + root = self.makeTemporaryDirectory() |
615 | + content = "abc\n" |
616 | + sources_path = "dists/foo/main/source/Sources" |
617 | + with open_for_writing(os.path.join(root, sources_path), "w") as f: |
618 | + f.write(content) |
619 | + lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True) |
620 | + by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger()) |
621 | + by_hash.add("Sources", lfa, copy_from_path=sources_path) |
622 | + by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash") |
623 | + with open_for_writing(os.path.join(by_hash_path, "MD5Sum/0"), "w"): |
624 | + pass |
625 | + self.assertThat(by_hash_path, Not(ByHashHasContents([content]))) |
626 | + by_hash.prune() |
627 | + self.assertThat(by_hash_path, ByHashHasContents([content])) |
628 | + |
629 | + def test_prune_empty(self): |
630 | + root = self.makeTemporaryDirectory() |
631 | + by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger()) |
632 | + by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash") |
633 | + with open_for_writing(os.path.join(by_hash_path, "MD5Sum/0"), "w"): |
634 | + pass |
635 | + self.assertThat(by_hash_path, PathExists()) |
636 | + by_hash.prune() |
637 | + self.assertThat(by_hash_path, Not(PathExists())) |
638 | + |
639 | + |
640 | +class TestByHashes(TestCaseWithFactory): |
641 | + """Unit tests for details of handling a set of by-hash directory trees.""" |
642 | + |
643 | + layer = LaunchpadZopelessLayer |
644 | + |
645 | + def test_add(self): |
646 | + root = self.makeTemporaryDirectory() |
647 | + self.assertThat(root, ByHashesHaveContents({})) |
648 | + path_contents = { |
649 | + "dists/foo/main/source": {"Sources": "abc\n"}, |
650 | + "dists/foo/main/binary-amd64": { |
651 | + "Packages.gz": "def\n", "Packages.xz": "ghi\n"}, |
652 | + } |
653 | + by_hashes = ByHashes(root, DevNullLogger()) |
654 | + for dirpath, contents in path_contents.items(): |
655 | + for name, content in contents.items(): |
656 | + path = os.path.join(dirpath, name) |
657 | + with open_for_writing(os.path.join(root, path), "w") as f: |
658 | + f.write(content) |
659 | + lfa = self.factory.makeLibraryFileAlias( |
660 | + content=content, db_only=True) |
661 | + by_hashes.add(path, lfa, copy_from_path=path) |
662 | + self.assertThat(root, ByHashesHaveContents({ |
663 | + path: contents.values() |
664 | + for path, contents in path_contents.items()})) |
665 | + |
666 | + def test_known(self): |
667 | + root = self.makeTemporaryDirectory() |
668 | + content = "abc\n" |
669 | + sources_path = "dists/foo/main/source/Sources" |
670 | + with open_for_writing(os.path.join(root, sources_path), "w") as f: |
671 | + f.write(content) |
672 | + lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True) |
673 | + by_hashes = ByHashes(root, DevNullLogger()) |
674 | + md5 = hashlib.md5(content).hexdigest() |
675 | + sha1 = hashlib.sha1(content).hexdigest() |
676 | + sha256 = hashlib.sha256(content).hexdigest() |
677 | + self.assertFalse(by_hashes.known(sources_path, "MD5Sum", md5)) |
678 | + self.assertFalse(by_hashes.known(sources_path, "SHA1", sha1)) |
679 | + self.assertFalse(by_hashes.known(sources_path, "SHA256", sha256)) |
680 | + by_hashes.add(sources_path, lfa, copy_from_path=sources_path) |
681 | + self.assertTrue(by_hashes.known(sources_path, "MD5Sum", md5)) |
682 | + self.assertTrue(by_hashes.known(sources_path, "SHA1", sha1)) |
683 | + self.assertTrue(by_hashes.known(sources_path, "SHA256", sha256)) |
684 | + |
685 | + def test_prune(self): |
686 | + root = self.makeTemporaryDirectory() |
687 | + path_contents = { |
688 | + "dists/foo/main/source": {"Sources": "abc\n"}, |
689 | + "dists/foo/main/binary-amd64": { |
690 | + "Packages.gz": "def\n", "Packages.xz": "ghi\n"}, |
691 | + } |
692 | + by_hashes = ByHashes(root, DevNullLogger()) |
693 | + for dirpath, contents in path_contents.items(): |
694 | + for name, content in contents.items(): |
695 | + path = os.path.join(dirpath, name) |
696 | + with open_for_writing(os.path.join(root, path), "w") as f: |
697 | + f.write(content) |
698 | + lfa = self.factory.makeLibraryFileAlias( |
699 | + content=content, db_only=True) |
700 | + by_hashes.add(path, lfa, copy_from_path=path) |
701 | + strays = [ |
702 | + "dists/foo/main/source/by-hash/MD5Sum/0", |
703 | + "dists/foo/main/binary-amd64/by-hash/MD5Sum/0", |
704 | + ] |
705 | + for stray in strays: |
706 | + with open_for_writing(os.path.join(root, stray), "w"): |
707 | + pass |
708 | + matcher = ByHashesHaveContents({ |
709 | + path: contents.values() |
710 | + for path, contents in path_contents.items()}) |
711 | + self.assertThat(root, Not(matcher)) |
712 | + by_hashes.prune() |
713 | + self.assertThat(root, matcher) |
714 | + |
715 | + |
716 | class TestPublisher(TestPublisherBase): |
717 | """Testing `Publisher` behaviour.""" |
718 | |
719 | @@ -1018,6 +1261,22 @@ |
720 | self.assertEqual( |
721 | 1 + old_num_pending_archives, new_num_pending_archives) |
722 | |
723 | + def testPendingArchiveWithReapableFiles(self): |
724 | + # getPendingPublicationPPAs returns archives that have reapable |
725 | + # ArchiveFiles. |
726 | + ubuntu = getUtility(IDistributionSet)['ubuntu'] |
727 | + archive = self.factory.makeArchive() |
728 | + self.assertNotIn(archive, ubuntu.getPendingPublicationPPAs()) |
729 | + archive_file = self.factory.makeArchiveFile(archive=archive) |
730 | + self.assertNotIn(archive, ubuntu.getPendingPublicationPPAs()) |
731 | + now = datetime.now(pytz.UTC) |
732 | + removeSecurityProxy(archive_file).scheduled_deletion_date = ( |
733 | + now + timedelta(hours=12)) |
734 | + self.assertNotIn(archive, ubuntu.getPendingPublicationPPAs()) |
735 | + removeSecurityProxy(archive_file).scheduled_deletion_date = ( |
736 | + now - timedelta(hours=12)) |
737 | + self.assertIn(archive, ubuntu.getPendingPublicationPPAs()) |
738 | + |
739 | def _checkCompressedFiles(self, archive_publisher, base_file_path, |
740 | suffixes): |
741 | """Assert that the various compressed versions of a file are equal. |
742 | @@ -1930,6 +2189,345 @@ |
743 | 'Release') |
744 | self.assertTrue(file_exists(source_release)) |
745 | |
746 | + def testUpdateByHashDisabled(self): |
747 | + # The publisher does not create by-hash directories if it is |
748 | + # disabled in the series configuration. |
749 | + self.assertFalse(self.breezy_autotest.publish_by_hash) |
750 | + self.assertFalse(self.breezy_autotest.advertise_by_hash) |
751 | + publisher = Publisher( |
752 | + self.logger, self.config, self.disk_pool, |
753 | + self.ubuntutest.main_archive) |
754 | + |
755 | + self.getPubSource(filecontent='Source: foo\n') |
756 | + |
757 | + publisher.A_publish(False) |
758 | + publisher.C_doFTPArchive(False) |
759 | + publisher.D_writeReleaseFiles(False) |
760 | + |
761 | + suite_path = partial( |
762 | + os.path.join, self.config.distsroot, 'breezy-autotest') |
763 | + self.assertThat( |
764 | + suite_path('main', 'source', 'by-hash'), Not(PathExists())) |
765 | + release = self.parseRelease(suite_path('Release')) |
766 | + self.assertNotIn('Acquire-By-Hash', release) |
767 | + |
768 | + def testUpdateByHashUnadvertised(self): |
769 | + # If the series configuration sets publish_by_hash but not |
770 | + # advertise_by_hash, then by-hash directories are created but not |
771 | + # advertised in Release. This is useful for testing. |
772 | + self.breezy_autotest.publish_by_hash = True |
773 | + self.assertFalse(self.breezy_autotest.advertise_by_hash) |
774 | + publisher = Publisher( |
775 | + self.logger, self.config, self.disk_pool, |
776 | + self.ubuntutest.main_archive) |
777 | + |
778 | + self.getPubSource(filecontent='Source: foo\n') |
779 | + |
780 | + publisher.A_publish(False) |
781 | + publisher.C_doFTPArchive(False) |
782 | + publisher.D_writeReleaseFiles(False) |
783 | + |
784 | + suite_path = partial( |
785 | + os.path.join, self.config.distsroot, 'breezy-autotest') |
786 | + self.assertThat(suite_path('main', 'source', 'by-hash'), PathExists()) |
787 | + release = self.parseRelease(suite_path('Release')) |
788 | + self.assertNotIn('Acquire-By-Hash', release) |
789 | + |
790 | + def testUpdateByHashInitial(self): |
791 | + # An initial publisher run populates by-hash directories and leaves |
792 | + # no archive files scheduled for deletion. |
793 | + self.breezy_autotest.publish_by_hash = True |
794 | + self.breezy_autotest.advertise_by_hash = True |
795 | + publisher = Publisher( |
796 | + self.logger, self.config, self.disk_pool, |
797 | + self.ubuntutest.main_archive) |
798 | + |
799 | + self.getPubSource(filecontent='Source: foo\n') |
800 | + |
801 | + publisher.A_publish(False) |
802 | + publisher.C_doFTPArchive(False) |
803 | + publisher.D_writeReleaseFiles(False) |
804 | + flush_database_caches() |
805 | + |
806 | + suite_path = partial( |
807 | + os.path.join, self.config.distsroot, 'breezy-autotest') |
808 | + contents = set() |
809 | + for name in ('Release', 'Sources.gz', 'Sources.bz2'): |
810 | + with open(suite_path('main', 'source', name), 'rb') as f: |
811 | + contents.add(f.read()) |
812 | + |
813 | + self.assertThat( |
814 | + suite_path('main', 'source', 'by-hash'), |
815 | + ByHashHasContents(contents)) |
816 | + |
817 | + archive_files = getUtility(IArchiveFileSet).getByArchive( |
818 | + self.ubuntutest.main_archive) |
819 | + self.assertNotEqual([], archive_files) |
820 | + self.assertEqual([], [ |
821 | + archive_file for archive_file in archive_files |
822 | + if archive_file.scheduled_deletion_date is not None]) |
823 | + |
824 | + def testUpdateByHashSubsequent(self): |
825 | + # A subsequent publisher run updates by-hash directories where |
826 | + # necessary, and marks inactive index files for later deletion. |
827 | + self.breezy_autotest.publish_by_hash = True |
828 | + self.breezy_autotest.advertise_by_hash = True |
829 | + publisher = Publisher( |
830 | + self.logger, self.config, self.disk_pool, |
831 | + self.ubuntutest.main_archive) |
832 | + |
833 | + self.getPubSource(filecontent='Source: foo\n') |
834 | + |
835 | + publisher.A_publish(False) |
836 | + publisher.C_doFTPArchive(False) |
837 | + publisher.D_writeReleaseFiles(False) |
838 | + |
839 | + suite_path = partial( |
840 | + os.path.join, self.config.distsroot, 'breezy-autotest') |
841 | + main_contents = set() |
842 | + universe_contents = set() |
843 | + for name in ('Release', 'Sources.gz', 'Sources.bz2'): |
844 | + with open(suite_path('main', 'source', name), 'rb') as f: |
845 | + main_contents.add(f.read()) |
846 | + with open(suite_path('universe', 'source', name), 'rb') as f: |
847 | + universe_contents.add(f.read()) |
848 | + |
849 | + self.getPubSource(sourcename='baz', filecontent='Source: baz\n') |
850 | + |
851 | + publisher.A_publish(False) |
852 | + publisher.C_doFTPArchive(False) |
853 | + publisher.D_writeReleaseFiles(False) |
854 | + flush_database_caches() |
855 | + |
856 | + for name in ('Release', 'Sources.gz', 'Sources.bz2'): |
857 | + with open(suite_path('main', 'source', name), 'rb') as f: |
858 | + main_contents.add(f.read()) |
859 | + |
860 | + self.assertThat( |
861 | + suite_path('main', 'source', 'by-hash'), |
862 | + ByHashHasContents(main_contents)) |
863 | + self.assertThat( |
864 | + suite_path('universe', 'source', 'by-hash'), |
865 | + ByHashHasContents(universe_contents)) |
866 | + |
867 | + archive_files = getUtility(IArchiveFileSet).getByArchive( |
868 | + self.ubuntutest.main_archive) |
869 | + self.assertContentEqual( |
870 | + ['dists/breezy-autotest/main/source/Sources.bz2', |
871 | + 'dists/breezy-autotest/main/source/Sources.gz'], |
872 | + [archive_file.path for archive_file in archive_files |
873 | + if archive_file.scheduled_deletion_date is not None]) |
874 | + |
875 | + def testUpdateByHashIdenticalFiles(self): |
876 | + # Multiple identical files in the same directory receive multiple |
877 | + # ArchiveFile rows, even though they share a by-hash entry. |
878 | + self.breezy_autotest.publish_by_hash = True |
879 | + publisher = Publisher( |
880 | + self.logger, self.config, self.disk_pool, |
881 | + self.ubuntutest.main_archive) |
882 | + suite_path = partial( |
883 | + os.path.join, self.config.distsroot, 'breezy-autotest') |
884 | + get_contents_files = lambda: [ |
885 | + archive_file |
886 | + for archive_file in getUtility(IArchiveFileSet).getByArchive( |
887 | + self.ubuntutest.main_archive) |
888 | + if archive_file.path.startswith('dists/breezy-autotest/Contents-')] |
889 | + |
890 | + # Create the first file. |
891 | + with open_for_writing(suite_path('Contents-i386'), 'w') as f: |
892 | + f.write('A Contents file\n') |
893 | + publisher.markPocketDirty( |
894 | + self.breezy_autotest, PackagePublishingPocket.RELEASE) |
895 | + publisher.A_publish(False) |
896 | + publisher.C_doFTPArchive(False) |
897 | + publisher.D_writeReleaseFiles(False) |
898 | + flush_database_caches() |
899 | + matchers = [ |
900 | + MatchesStructure( |
901 | + path=Equals('dists/breezy-autotest/Contents-i386'), |
902 | + scheduled_deletion_date=Is(None))] |
903 | + self.assertThat(get_contents_files(), MatchesSetwise(*matchers)) |
904 | + self.assertThat( |
905 | + suite_path('by-hash'), ByHashHasContents(['A Contents file\n'])) |
906 | + |
907 | + # Add a second identical file. |
908 | + with open_for_writing(suite_path('Contents-hppa'), 'w') as f: |
909 | + f.write('A Contents file\n') |
910 | + publisher.D_writeReleaseFiles(False) |
911 | + flush_database_caches() |
912 | + matchers.append( |
913 | + MatchesStructure( |
914 | + path=Equals('dists/breezy-autotest/Contents-hppa'), |
915 | + scheduled_deletion_date=Is(None))) |
916 | + self.assertThat(get_contents_files(), MatchesSetwise(*matchers)) |
917 | + self.assertThat( |
918 | + suite_path('by-hash'), ByHashHasContents(['A Contents file\n'])) |
919 | + |
920 | + # Delete the first file, but allow it its stay of execution. |
921 | + os.unlink(suite_path('Contents-i386')) |
922 | + publisher.D_writeReleaseFiles(False) |
923 | + flush_database_caches() |
924 | + matchers[0] = matchers[0].update(scheduled_deletion_date=Not(Is(None))) |
925 | + self.assertThat(get_contents_files(), MatchesSetwise(*matchers)) |
926 | + self.assertThat( |
927 | + suite_path('by-hash'), ByHashHasContents(['A Contents file\n'])) |
928 | + |
929 | + # Arrange for the first file to be pruned, and delete the second |
930 | + # file. |
931 | + now = datetime.now(pytz.UTC) |
932 | + i386_file = getUtility(IArchiveFileSet).getByArchive( |
933 | + self.ubuntutest.main_archive, |
934 | + path=u'dists/breezy-autotest/Contents-i386').one() |
935 | + removeSecurityProxy(i386_file).scheduled_deletion_date = ( |
936 | + now - timedelta(hours=1)) |
937 | + os.unlink(suite_path('Contents-hppa')) |
938 | + publisher.D_writeReleaseFiles(False) |
939 | + flush_database_caches() |
940 | + matchers = [matchers[1].update(scheduled_deletion_date=Not(Is(None)))] |
941 | + self.assertThat(get_contents_files(), MatchesSetwise(*matchers)) |
942 | + self.assertThat( |
943 | + suite_path('by-hash'), ByHashHasContents(['A Contents file\n'])) |
944 | + |
945 | + # Arrange for the second file to be pruned. |
946 | + hppa_file = getUtility(IArchiveFileSet).getByArchive( |
947 | + self.ubuntutest.main_archive, |
948 | + path=u'dists/breezy-autotest/Contents-hppa').one() |
949 | + removeSecurityProxy(hppa_file).scheduled_deletion_date = ( |
950 | + now - timedelta(hours=1)) |
951 | + publisher.D_writeReleaseFiles(False) |
952 | + flush_database_caches() |
953 | + self.assertContentEqual([], get_contents_files()) |
954 | + self.assertThat(suite_path('by-hash'), Not(PathExists())) |
955 | + |
956 | + def testUpdateByHashReprieve(self): |
957 | + # If a newly-modified index file is identical to a |
958 | + # previously-condemned one, then it is reprieved and not pruned. |
959 | + self.breezy_autotest.publish_by_hash = True |
960 | + # Enable uncompressed index files to avoid relying on stable output |
961 | + # from compressors in this test. |
962 | + self.breezy_autotest.index_compressors = [ |
963 | + IndexCompressionType.UNCOMPRESSED] |
964 | + publisher = Publisher( |
965 | + self.logger, self.config, self.disk_pool, |
966 | + self.ubuntutest.main_archive) |
967 | + |
968 | + # Publish empty index files. |
969 | + publisher.markPocketDirty( |
970 | + self.breezy_autotest, PackagePublishingPocket.RELEASE) |
971 | + publisher.A_publish(False) |
972 | + publisher.C_doFTPArchive(False) |
973 | + publisher.D_writeReleaseFiles(False) |
974 | + suite_path = partial( |
975 | + os.path.join, self.config.distsroot, 'breezy-autotest') |
976 | + main_contents = set() |
977 | + for name in ('Release', 'Sources'): |
978 | + with open(suite_path('main', 'source', name), 'rb') as f: |
979 | + main_contents.add(f.read()) |
980 | + |
981 | + # Add a source package so that Sources is non-empty. |
982 | + pub_source = self.getPubSource(filecontent='Source: foo\n') |
983 | + publisher.A_publish(False) |
984 | + publisher.C_doFTPArchive(False) |
985 | + publisher.D_writeReleaseFiles(False) |
986 | + transaction.commit() |
987 | + with open(suite_path('main', 'source', 'Sources'), 'rb') as f: |
988 | + main_contents.add(f.read()) |
989 | + self.assertEqual(3, len(main_contents)) |
990 | + self.assertThat( |
991 | + suite_path('main', 'source', 'by-hash'), |
992 | + ByHashHasContents(main_contents)) |
993 | + |
994 | + # Make the empty Sources file ready to prune. |
995 | + old_archive_files = [] |
996 | + for archive_file in getUtility(IArchiveFileSet).getByArchive( |
997 | + self.ubuntutest.main_archive): |
998 | + if ('main/source' in archive_file.path and |
999 | + archive_file.scheduled_deletion_date is not None): |
1000 | + old_archive_files.append(archive_file) |
1001 | + self.assertEqual(1, len(old_archive_files)) |
1002 | + removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = ( |
1003 | + datetime.now(pytz.UTC) - timedelta(hours=1)) |
1004 | + |
1005 | + # Delete the source package so that Sources is empty again. The |
1006 | + # empty file is reprieved and the non-empty one is condemned. |
1007 | + pub_source.requestDeletion(self.ubuntutest.owner) |
1008 | + publisher.A_publish(False) |
1009 | + publisher.C_doFTPArchive(False) |
1010 | + publisher.D_writeReleaseFiles(False) |
1011 | + transaction.commit() |
1012 | + self.assertThat( |
1013 | + suite_path('main', 'source', 'by-hash'), |
1014 | + ByHashHasContents(main_contents)) |
1015 | + archive_files = getUtility(IArchiveFileSet).getByArchive( |
1016 | + self.ubuntutest.main_archive, |
1017 | + path=u'dists/breezy-autotest/main/source/Sources') |
1018 | + self.assertThat( |
1019 | + sorted(archive_files, key=attrgetter('id')), |
1020 | + MatchesListwise([ |
1021 | + MatchesStructure(scheduled_deletion_date=Is(None)), |
1022 | + MatchesStructure(scheduled_deletion_date=Not(Is(None))), |
1023 | + ])) |
1024 | + |
1025 | + def testUpdateByHashPrune(self): |
1026 | + # The publisher prunes files from by-hash that were condemned more |
1027 | + # than a day ago. |
1028 | + self.breezy_autotest.publish_by_hash = True |
1029 | + self.breezy_autotest.advertise_by_hash = True |
1030 | + publisher = Publisher( |
1031 | + self.logger, self.config, self.disk_pool, |
1032 | + self.ubuntutest.main_archive) |
1033 | + |
1034 | + suite_path = partial( |
1035 | + os.path.join, self.config.distsroot, 'breezy-autotest') |
1036 | + main_contents = set() |
1037 | + for sourcename in ('foo', 'bar'): |
1038 | + self.getPubSource( |
1039 | + sourcename=sourcename, filecontent='Source: %s\n' % sourcename) |
1040 | + publisher.A_publish(False) |
1041 | + publisher.C_doFTPArchive(False) |
1042 | + publisher.D_writeReleaseFiles(False) |
1043 | + for name in ('Release', 'Sources.gz', 'Sources.bz2'): |
1044 | + with open(suite_path('main', 'source', name), 'rb') as f: |
1045 | + main_contents.add(f.read()) |
1046 | + transaction.commit() |
1047 | + # Undo any previous determination that breezy-autotest is dirty, so |
1048 | + # that we can use that to check that future runs don't force index |
1049 | + # regeneration. |
1050 | + publisher.dirty_pockets = set() |
1051 | + |
1052 | + self.assertThat( |
1053 | + suite_path('main', 'source', 'by-hash'), |
1054 | + ByHashHasContents(main_contents)) |
1055 | + old_archive_files = [] |
1056 | + for archive_file in getUtility(IArchiveFileSet).getByArchive( |
1057 | + self.ubuntutest.main_archive): |
1058 | + if ('main/source' in archive_file.path and |
1059 | + archive_file.scheduled_deletion_date is not None): |
1060 | + old_archive_files.append(archive_file) |
1061 | + self.assertEqual(2, len(old_archive_files)) |
1062 | + |
1063 | + now = datetime.now(pytz.UTC) |
1064 | + removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = ( |
1065 | + now + timedelta(hours=12)) |
1066 | + removeSecurityProxy(old_archive_files[1]).scheduled_deletion_date = ( |
1067 | + now - timedelta(hours=12)) |
1068 | + old_archive_files[1].library_file.open() |
1069 | + try: |
1070 | + main_contents.remove(old_archive_files[1].library_file.read()) |
1071 | + finally: |
1072 | + old_archive_files[1].library_file.close() |
1073 | + self.assertThat( |
1074 | + suite_path('main', 'source', 'by-hash'), |
1075 | + Not(ByHashHasContents(main_contents))) |
1076 | + |
1077 | + publisher.A2_markPocketsWithDeletionsDirty() |
1078 | + publisher.C_doFTPArchive(False) |
1079 | + publisher.D_writeReleaseFiles(False) |
1080 | + self.assertEqual(set(), publisher.dirty_pockets) |
1081 | + self.assertThat( |
1082 | + suite_path('main', 'source', 'by-hash'), |
1083 | + ByHashHasContents(main_contents)) |
1084 | + |
1085 | def testCreateSeriesAliasesNoAlias(self): |
1086 | """createSeriesAliases has nothing to do by default.""" |
1087 | publisher = Publisher( |
1088 | |
1089 | === modified file 'lib/lp/registry/model/distribution.py' |
1090 | --- lib/lp/registry/model/distribution.py 2015-10-13 13:22:08 +0000 |
1091 | +++ lib/lp/registry/model/distribution.py 2016-04-02 00:45:52 +0000 |
1092 | @@ -1,4 +1,4 @@ |
1093 | -# Copyright 2009-2015 Canonical Ltd. This software is licensed under the |
1094 | +# Copyright 2009-2016 Canonical Ltd. This software is licensed under the |
1095 | # GNU Affero General Public License version 3 (see the file LICENSE). |
1096 | |
1097 | """Database classes for implementing distribution items.""" |
1098 | @@ -1283,10 +1283,22 @@ |
1099 | bin_query, clauseTables=['BinaryPackagePublishingHistory'], |
1100 | orderBy=['archive.id'], distinct=True) |
1101 | |
1102 | + reapable_af_query = """ |
1103 | + Archive.purpose = %s AND |
1104 | + Archive.distribution = %s AND |
1105 | + ArchiveFile.archive = archive.id AND |
1106 | + ArchiveFile.scheduled_deletion_date < %s |
1107 | + """ % sqlvalues(ArchivePurpose.PPA, self, UTC_NOW) |
1108 | + |
1109 | + reapable_af_archives = Archive.select( |
1110 | + reapable_af_query, clauseTables=['ArchiveFile'], |
1111 | + orderBy=['archive.id'], distinct=True) |
1112 | + |
1113 | deleting_archives = Archive.selectBy( |
1114 | status=ArchiveStatus.DELETING).orderBy(['archive.id']) |
1115 | |
1116 | - return src_archives.union(bin_archives).union(deleting_archives) |
1117 | + return src_archives.union(bin_archives).union( |
1118 | + reapable_af_archives).union(deleting_archives) |
1119 | |
1120 | def getArchiveByComponent(self, component_name): |
1121 | """See `IDistribution`.""" |
1122 | |
1123 | === modified file 'lib/lp/services/helpers.py' |
1124 | --- lib/lp/services/helpers.py 2014-05-07 15:28:50 +0000 |
1125 | +++ lib/lp/services/helpers.py 2016-04-02 00:45:52 +0000 |
1126 | @@ -10,6 +10,7 @@ |
1127 | |
1128 | __metaclass__ = type |
1129 | |
1130 | +from collections import OrderedDict |
1131 | from difflib import unified_diff |
1132 | import re |
1133 | from StringIO import StringIO |
1134 | @@ -224,19 +225,37 @@ |
1135 | |
1136 | >>> filenameToContentType('test.tgz') |
1137 | 'application/octet-stream' |
1138 | + |
1139 | + Build logs |
1140 | + >>> filenameToContentType('buildlog.txt.gz') |
1141 | + 'text/plain' |
1142 | + |
1143 | + Various compressed files |
1144 | + |
1145 | + >>> filenameToContentType('Packages.gz') |
1146 | + 'application/x-gzip' |
1147 | + >>> filenameToContentType('Packages.bz2') |
1148 | + 'application/x-bzip2' |
1149 | + >>> filenameToContentType('Packages.xz') |
1150 | + 'application/x-xz' |
1151 | """ |
1152 | - ftmap = {".dsc": "text/plain", |
1153 | - ".changes": "text/plain", |
1154 | - ".deb": "application/x-debian-package", |
1155 | - ".udeb": "application/x-debian-package", |
1156 | - ".txt": "text/plain", |
1157 | - # For the build master logs |
1158 | - ".txt.gz": "text/plain", |
1159 | - # For live filesystem builds |
1160 | - ".manifest": "text/plain", |
1161 | - ".manifest-remove": "text/plain", |
1162 | - ".size": "text/plain", |
1163 | - } |
1164 | + ftmap = OrderedDict([ |
1165 | + (".dsc", "text/plain"), |
1166 | + (".changes", "text/plain"), |
1167 | + (".deb", "application/x-debian-package"), |
1168 | + (".udeb", "application/x-debian-package"), |
1169 | + (".txt", "text/plain"), |
1170 | + # For the build master logs |
1171 | + (".txt.gz", "text/plain"), |
1172 | + # For live filesystem builds |
1173 | + (".manifest", "text/plain"), |
1174 | + (".manifest-remove", "text/plain"), |
1175 | + (".size", "text/plain"), |
1176 | + # Compressed files |
1177 | + (".gz", "application/x-gzip"), |
1178 | + (".bz2", "application/x-bzip2"), |
1179 | + (".xz", "application/x-xz"), |
1180 | + ]) |
1181 | for ending in ftmap: |
1182 | if fname.endswith(ending): |
1183 | return ftmap[ending] |
1184 | |
1185 | === modified file 'lib/lp/services/librarian/interfaces/__init__.py' |
1186 | --- lib/lp/services/librarian/interfaces/__init__.py 2016-03-14 16:28:19 +0000 |
1187 | +++ lib/lp/services/librarian/interfaces/__init__.py 2016-04-02 00:45:52 +0000 |
1188 | @@ -155,7 +155,7 @@ |
1189 | class ILibraryFileAliasSet(Interface): |
1190 | |
1191 | def create(name, size, file, contentType, expires=None, debugID=None, |
1192 | - restricted=False): |
1193 | + restricted=False, allow_zero_length=False): |
1194 | """Create a file in the Librarian, returning the new alias. |
1195 | |
1196 | An expiry time of None means the file will never expire until it |
1197 | |
1198 | === modified file 'lib/lp/services/librarian/model.py' |
1199 | --- lib/lp/services/librarian/model.py 2016-03-14 16:28:19 +0000 |
1200 | +++ lib/lp/services/librarian/model.py 2016-04-02 00:45:52 +0000 |
1201 | @@ -244,7 +244,7 @@ |
1202 | """Create and find LibraryFileAliases.""" |
1203 | |
1204 | def create(self, name, size, file, contentType, expires=None, |
1205 | - debugID=None, restricted=False): |
1206 | + debugID=None, restricted=False, allow_zero_length=False): |
1207 | """See `ILibraryFileAliasSet`""" |
1208 | if restricted: |
1209 | client = getUtility(IRestrictedLibrarianClient) |
1210 | @@ -252,7 +252,9 @@ |
1211 | client = getUtility(ILibrarianClient) |
1212 | if '/' in name: |
1213 | raise InvalidFilename("Filename cannot contain slashes.") |
1214 | - fid = client.addFile(name, size, file, contentType, expires, debugID) |
1215 | + fid = client.addFile( |
1216 | + name, size, file, contentType, expires=expires, debugID=debugID, |
1217 | + allow_zero_length=allow_zero_length) |
1218 | lfa = IMasterStore(LibraryFileAlias).find( |
1219 | LibraryFileAlias, LibraryFileAlias.id == fid).one() |
1220 | assert lfa is not None, "client.addFile didn't!" |
1221 | |
1222 | === modified file 'lib/lp/soyuz/interfaces/archivefile.py' |
1223 | --- lib/lp/soyuz/interfaces/archivefile.py 2016-03-18 15:09:37 +0000 |
1224 | +++ lib/lp/soyuz/interfaces/archivefile.py 2016-04-02 00:45:52 +0000 |
1225 | @@ -79,13 +79,15 @@ |
1226 | :param content_type: The MIME type of the file. |
1227 | """ |
1228 | |
1229 | - def getByArchive(archive, container=None, eager_load=False): |
1230 | + def getByArchive(archive, container=None, path=None, eager_load=False): |
1231 | """Get files in an archive. |
1232 | |
1233 | :param archive: Return files in this `IArchive`. |
1234 | :param container: Return only files with this container. |
1235 | + :param path: Return only files with this path. |
1236 | :param eager_load: If True, preload related `LibraryFileAlias` and |
1237 | `LibraryFileContent` rows. |
1238 | + :return: An iterable of matched files. |
1239 | """ |
1240 | |
1241 | def scheduleDeletion(archive_files, stay_of_execution): |
1242 | @@ -94,6 +96,25 @@ |
1243 | :param archive_files: The `IArchiveFile`s to schedule for deletion. |
1244 | :param stay_of_execution: A `timedelta`; schedule files for deletion |
1245 | this amount of time in the future. |
1246 | + :return: An iterable of (container, path, sha256) for files that |
1247 | + were scheduled for deletion. |
1248 | + """ |
1249 | + |
1250 | + def unscheduleDeletion(archive, container=None, sha256_checksums=set()): |
1251 | + """Unschedule these archive files for deletion. |
1252 | + |
1253 | + This is useful in the case when the new content of a file is |
1254 | + identical to a version that was previously condemned. This method's |
1255 | + signature does not match that of `scheduleDeletion`; this is more |
1256 | + convenient because in such cases we normally do not yet have |
1257 | + `ArchiveFile` rows in hand. |
1258 | + |
1259 | + :param archive: Operate on files in this `IArchive`. |
1260 | + :param container: Operate only on files with this container. |
1261 | + :param sha256_checksums: Operate only on files with any of these |
1262 | + checksums. |
1263 | + :return: An iterable of (container, path, sha256) for files that |
1264 | + were unscheduled for deletion. |
1265 | """ |
1266 | |
1267 | def getContainersToReap(archive, container_prefix=None): |
1268 | @@ -102,6 +123,7 @@ |
1269 | :param archive: Return containers in this `IArchive`. |
1270 | :param container_prefix: Return only containers that start with this |
1271 | prefix. |
1272 | + :return: An iterable of matched container names. |
1273 | """ |
1274 | |
1275 | def reap(archive, container=None): |
1276 | @@ -109,4 +131,6 @@ |
1277 | |
1278 | :param archive: Delete files from this `IArchive`. |
1279 | :param container: Delete only files with this container. |
1280 | + :return: An iterable of (container, path, sha256) for files that |
1281 | + were deleted. |
1282 | """ |
1283 | |
1284 | === modified file 'lib/lp/soyuz/model/archivefile.py' |
1285 | --- lib/lp/soyuz/model/archivefile.py 2016-03-18 15:09:37 +0000 |
1286 | +++ lib/lp/soyuz/model/archivefile.py 2016-04-02 00:45:52 +0000 |
1287 | @@ -14,7 +14,9 @@ |
1288 | import os.path |
1289 | |
1290 | import pytz |
1291 | +from storm.databases.postgres import Returning |
1292 | from storm.locals import ( |
1293 | + And, |
1294 | DateTime, |
1295 | Int, |
1296 | Reference, |
1297 | @@ -31,6 +33,7 @@ |
1298 | IMasterStore, |
1299 | IStore, |
1300 | ) |
1301 | +from lp.services.database.stormexpr import BulkUpdate |
1302 | from lp.services.librarian.interfaces import ILibraryFileAliasSet |
1303 | from lp.services.librarian.model import ( |
1304 | LibraryFileAlias, |
1305 | @@ -89,17 +92,19 @@ |
1306 | content_type): |
1307 | library_file = getUtility(ILibraryFileAliasSet).create( |
1308 | os.path.basename(path), size, fileobj, content_type, |
1309 | - restricted=archive.private) |
1310 | + restricted=archive.private, allow_zero_length=True) |
1311 | return cls.new(archive, container, path, library_file) |
1312 | |
1313 | @staticmethod |
1314 | - def getByArchive(archive, container=None, eager_load=False): |
1315 | + def getByArchive(archive, container=None, path=None, eager_load=False): |
1316 | """See `IArchiveFileSet`.""" |
1317 | clauses = [ArchiveFile.archive == archive] |
1318 | # XXX cjwatson 2016-03-15: We'll need some more sophisticated way to |
1319 | # match containers once we're using them for custom uploads. |
1320 | if container is not None: |
1321 | clauses.append(ArchiveFile.container == container) |
1322 | + if path is not None: |
1323 | + clauses.append(ArchiveFile.path == path) |
1324 | archive_files = IStore(ArchiveFile).find(ArchiveFile, *clauses) |
1325 | |
1326 | def eager_load(rows): |
1327 | @@ -114,11 +119,43 @@ |
1328 | @staticmethod |
1329 | def scheduleDeletion(archive_files, stay_of_execution): |
1330 | """See `IArchiveFileSet`.""" |
1331 | - archive_file_ids = set( |
1332 | - archive_file.id for archive_file in archive_files) |
1333 | - rows = IMasterStore(ArchiveFile).find( |
1334 | - ArchiveFile, ArchiveFile.id.is_in(archive_file_ids)) |
1335 | - rows.set(scheduled_deletion_date=UTC_NOW + stay_of_execution) |
1336 | + clauses = [ |
1337 | + ArchiveFile.id.is_in( |
1338 | + set(archive_file.id for archive_file in archive_files)), |
1339 | + ArchiveFile.library_file == LibraryFileAlias.id, |
1340 | + LibraryFileAlias.content == LibraryFileContent.id, |
1341 | + ] |
1342 | + new_date = UTC_NOW + stay_of_execution |
1343 | + return_columns = [ |
1344 | + ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256] |
1345 | + return list(IMasterStore(ArchiveFile).execute(Returning( |
1346 | + BulkUpdate( |
1347 | + {ArchiveFile.scheduled_deletion_date: new_date}, |
1348 | + table=ArchiveFile, |
1349 | + values=[LibraryFileAlias, LibraryFileContent], |
1350 | + where=And(*clauses)), |
1351 | + columns=return_columns))) |
1352 | + |
1353 | + @staticmethod |
1354 | + def unscheduleDeletion(archive, container=None, sha256_checksums=set()): |
1355 | + """See `IArchiveFileSet`.""" |
1356 | + clauses = [ |
1357 | + ArchiveFile.archive == archive, |
1358 | + ArchiveFile.library_file == LibraryFileAlias.id, |
1359 | + LibraryFileAlias.content == LibraryFileContent.id, |
1360 | + LibraryFileContent.sha256.is_in(sha256_checksums), |
1361 | + ] |
1362 | + if container is not None: |
1363 | + clauses.append(ArchiveFile.container == container) |
1364 | + return_columns = [ |
1365 | + ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256] |
1366 | + return list(IMasterStore(ArchiveFile).execute(Returning( |
1367 | + BulkUpdate( |
1368 | + {ArchiveFile.scheduled_deletion_date: None}, |
1369 | + table=ArchiveFile, |
1370 | + values=[LibraryFileAlias, LibraryFileContent], |
1371 | + where=And(*clauses)), |
1372 | + columns=return_columns))) |
1373 | |
1374 | @staticmethod |
1375 | def getContainersToReap(archive, container_prefix=None): |
1376 | @@ -134,10 +171,25 @@ |
1377 | @staticmethod |
1378 | def reap(archive, container=None): |
1379 | """See `IArchiveFileSet`.""" |
1380 | + # XXX cjwatson 2016-03-30 bug=322972: Requires manual SQL due to |
1381 | + # lack of support for DELETE FROM ... USING ... in Storm. |
1382 | clauses = [ |
1383 | - ArchiveFile.archive == archive, |
1384 | - ArchiveFile.scheduled_deletion_date < UTC_NOW, |
1385 | + "ArchiveFile.archive = ?", |
1386 | + "ArchiveFile.scheduled_deletion_date < " |
1387 | + "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'", |
1388 | + "ArchiveFile.library_file = LibraryFileAlias.id", |
1389 | + "LibraryFileAlias.content = LibraryFileContent.id", |
1390 | ] |
1391 | + values = [archive.id] |
1392 | if container is not None: |
1393 | - clauses.append(ArchiveFile.container == container) |
1394 | - IMasterStore(ArchiveFile).find(ArchiveFile, *clauses).remove() |
1395 | + clauses.append("ArchiveFile.container = ?") |
1396 | + values.append(container) |
1397 | + return list(IMasterStore(ArchiveFile).execute(""" |
1398 | + DELETE FROM ArchiveFile |
1399 | + USING LibraryFileAlias, LibraryFileContent |
1400 | + WHERE """ + " AND ".join(clauses) + """ |
1401 | + RETURNING |
1402 | + ArchiveFile.container, |
1403 | + ArchiveFile.path, |
1404 | + LibraryFileContent.sha256 |
1405 | + """, values)) |
1406 | |
1407 | === modified file 'lib/lp/soyuz/tests/test_archivefile.py' |
1408 | --- lib/lp/soyuz/tests/test_archivefile.py 2016-03-18 15:09:37 +0000 |
1409 | +++ lib/lp/soyuz/tests/test_archivefile.py 2016-04-02 00:45:52 +0000 |
1410 | @@ -19,6 +19,7 @@ |
1411 | from zope.component import getUtility |
1412 | from zope.security.proxy import removeSecurityProxy |
1413 | |
1414 | +from lp.services.database.sqlbase import flush_database_caches |
1415 | from lp.services.osutils import open_for_writing |
1416 | from lp.soyuz.interfaces.archivefile import IArchiveFileSet |
1417 | from lp.testing import TestCaseWithFactory |
1418 | @@ -75,17 +76,35 @@ |
1419 | self.assertContentEqual( |
1420 | [], archive_file_set.getByArchive(archives[0], container="bar")) |
1421 | self.assertContentEqual( |
1422 | + [archive_files[1]], |
1423 | + archive_file_set.getByArchive( |
1424 | + archives[0], path=archive_files[1].path)) |
1425 | + self.assertContentEqual( |
1426 | + [], archive_file_set.getByArchive(archives[0], path="other")) |
1427 | + self.assertContentEqual( |
1428 | archive_files[2:], archive_file_set.getByArchive(archives[1])) |
1429 | self.assertContentEqual( |
1430 | [archive_files[3]], |
1431 | archive_file_set.getByArchive(archives[1], container="foo")) |
1432 | self.assertContentEqual( |
1433 | [], archive_file_set.getByArchive(archives[1], container="bar")) |
1434 | + self.assertContentEqual( |
1435 | + [archive_files[3]], |
1436 | + archive_file_set.getByArchive( |
1437 | + archives[1], path=archive_files[3].path)) |
1438 | + self.assertContentEqual( |
1439 | + [], archive_file_set.getByArchive(archives[1], path="other")) |
1440 | |
1441 | def test_scheduleDeletion(self): |
1442 | archive_files = [self.factory.makeArchiveFile() for _ in range(3)] |
1443 | - getUtility(IArchiveFileSet).scheduleDeletion( |
1444 | + expected_rows = [ |
1445 | + (archive_file.container, archive_file.path, |
1446 | + archive_file.library_file.content.sha256) |
1447 | + for archive_file in archive_files[:2]] |
1448 | + rows = getUtility(IArchiveFileSet).scheduleDeletion( |
1449 | archive_files[:2], timedelta(days=1)) |
1450 | + self.assertContentEqual(expected_rows, rows) |
1451 | + flush_database_caches() |
1452 | tomorrow = datetime.now(pytz.UTC) + timedelta(days=1) |
1453 | # Allow a bit of timing slack for slow tests. |
1454 | self.assertThat( |
1455 | @@ -96,6 +115,34 @@ |
1456 | LessThan(timedelta(minutes=5))) |
1457 | self.assertIsNone(archive_files[2].scheduled_deletion_date) |
1458 | |
1459 | + def test_unscheduleDeletion(self): |
1460 | + archives = [self.factory.makeArchive() for _ in range(2)] |
1461 | + lfas = [ |
1462 | + self.factory.makeLibraryFileAlias(db_only=True) for _ in range(3)] |
1463 | + archive_files = [] |
1464 | + for archive in archives: |
1465 | + for container in ("foo", "bar"): |
1466 | + archive_files.extend([ |
1467 | + self.factory.makeArchiveFile( |
1468 | + archive=archive, container=container, library_file=lfa) |
1469 | + for lfa in lfas]) |
1470 | + now = datetime.now(pytz.UTC) |
1471 | + for archive_file in archive_files: |
1472 | + removeSecurityProxy(archive_file).scheduled_deletion_date = now |
1473 | + expected_rows = [ |
1474 | + ("foo", archive_files[0].path, lfas[0].content.sha256), |
1475 | + ("foo", archive_files[1].path, lfas[1].content.sha256), |
1476 | + ] |
1477 | + rows = getUtility(IArchiveFileSet).unscheduleDeletion( |
1478 | + archive=archives[0], container="foo", |
1479 | + sha256_checksums=[lfas[0].content.sha256, lfas[1].content.sha256]) |
1480 | + self.assertContentEqual(expected_rows, rows) |
1481 | + flush_database_caches() |
1482 | + self.assertContentEqual( |
1483 | + [archive_files[0], archive_files[1]], |
1484 | + [archive_file for archive_file in archive_files |
1485 | + if archive_file.scheduled_deletion_date is None]) |
1486 | + |
1487 | def test_getContainersToReap(self): |
1488 | archive = self.factory.makeArchive() |
1489 | archive_files = [] |
1490 | @@ -149,6 +196,11 @@ |
1491 | removeSecurityProxy(archive_files[4]).scheduled_deletion_date = ( |
1492 | now - timedelta(days=1)) |
1493 | archive_file_set = getUtility(IArchiveFileSet) |
1494 | - archive_file_set.reap(archive, container="foo") |
1495 | + expected_rows = [ |
1496 | + ("foo", archive_files[0].path, |
1497 | + archive_files[0].library_file.content.sha256), |
1498 | + ] |
1499 | + rows = archive_file_set.reap(archive, container="foo") |
1500 | + self.assertContentEqual(expected_rows, rows) |
1501 | self.assertContentEqual( |
1502 | archive_files[1:4], archive_file_set.getByArchive(archive)) |
Should be worth another look now.