Merge ~cjwatson/launchpad:archive-file-deeper-history into launchpad:master
- Git
- lp:~cjwatson/launchpad
- archive-file-deeper-history
- Merge into master
Status: | Merged |
---|---|
Approved by: | Colin Watson |
Approved revision: | 12de17eaf96654d5c8987c60bd29240fdc27fdd8 |
Merge reported by: | Otto Co-Pilot |
Merged at revision: | not available |
Proposed branch: | ~cjwatson/launchpad:archive-file-deeper-history |
Merge into: | launchpad:master |
Diff against target: |
1143 lines (+424/-157) 11 files modified
database/schema/security.cfg (+1/-0) lib/lp/archivepublisher/publishing.py (+16/-12) lib/lp/archivepublisher/tests/test_publisher.py (+101/-58) lib/lp/soyuz/interfaces/archivefile.py (+18/-4) lib/lp/soyuz/model/archivefile.py (+14/-24) lib/lp/soyuz/scripts/expire_archive_files.py (+70/-27) lib/lp/soyuz/scripts/tests/test_expire_archive_files.py (+95/-9) lib/lp/soyuz/tests/test_archivefile.py (+9/-12) lib/lp/soyuz/xmlrpc/archive.py (+10/-11) lib/lp/soyuz/xmlrpc/tests/test_archive.py (+87/-0) lib/lp/testing/factory.py (+3/-0) |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
William Grant | code | Approve | |
Review via email: mp+436359@code.launchpad.net |
Commit message
Retain more ArchiveFile history
Description of the change
Source and binary packages are removed from the published archive on disk a day after they are no longer referenced by current index files; package files in the librarian are typically expired a week after all their associated publishing history rows have been marked as removed from disk, although files in some archives (primary archives and certain named PPAs) never expire.
Archive index files, such as `Packages`, `Sources`, and `Release` files, currently work quite differently: old versions of these files are published in `by-hash` directories on disk so that clients can continue to make use of them for a grace period of a day, after which they are both removed from disk and removed from the database.
The latter approach is problematic for the forthcoming snapshot service, because it means we can only serve snapshots of index files for as long as they are still published on disk: not much more useful than what we already have!
This commit implements a much more useful policy. We now treat archive index files similarly to source and binary packages: they're removed from disk after a day as before, to prevent `by-hash` directories getting unreasonably large and causing bloat on mirrors, but now we merely set the `ArchiveFile.
This will of course cause the librarian to grow more quickly, but some back-of-
DB MP: https:/
Colin Watson (cjwatson) wrote : | # |
I removed `unscheduleDele
Expired `ArchiveFile`s aren't a problem, because (a) `getURL` just returns None and (b) `_translatePath
Preview Diff
1 | diff --git a/database/schema/security.cfg b/database/schema/security.cfg |
2 | index d7a4fbb..49f2daf 100644 |
3 | --- a/database/schema/security.cfg |
4 | +++ b/database/schema/security.cfg |
5 | @@ -2032,6 +2032,7 @@ type=user |
6 | [binaryfile-expire] |
7 | groups=script |
8 | public.archive = SELECT |
9 | +public.archivefile = SELECT |
10 | public.binarypackagefile = SELECT |
11 | public.binarypackagepublishinghistory = SELECT |
12 | public.binarypackagerelease = SELECT |
13 | diff --git a/lib/lp/archivepublisher/publishing.py b/lib/lp/archivepublisher/publishing.py |
14 | index 5778aa9..877ca78 100644 |
15 | --- a/lib/lp/archivepublisher/publishing.py |
16 | +++ b/lib/lp/archivepublisher/publishing.py |
17 | @@ -1274,17 +1274,19 @@ class Publisher: |
18 | by_hashes = ByHashes(self._config.distsroot, self.log) |
19 | existing_live_files = {} |
20 | existing_nonlive_files = {} |
21 | - keep_files = set() |
22 | reapable_files = set() |
23 | |
24 | def strip_dists(path): |
25 | assert path.startswith("dists/") |
26 | return path[len("dists/") :] |
27 | |
28 | - # Record all files from the database. |
29 | + # Record all published files from the database. |
30 | db_now = get_transaction_timestamp(IStore(ArchiveFile)) |
31 | for db_file in archive_file_set.getByArchive( |
32 | - self.archive, container=container, eager_load=True |
33 | + self.archive, |
34 | + container=container, |
35 | + only_published=True, |
36 | + eager_load=True, |
37 | ): |
38 | file_key = ( |
39 | strip_dists(db_file.path), |
40 | @@ -1375,16 +1377,18 @@ class Publisher: |
41 | # Remove any files from disk that aren't recorded in the database. |
42 | by_hashes.prune() |
43 | |
44 | - # And remove expired ArchiveFiles from the DB now that we've pruned |
45 | - # them and their directories from disk. |
46 | - delete_files = reapable_files - keep_files |
47 | - if delete_files: |
48 | - for container, path, sha256 in archive_file_set.delete( |
49 | - delete_files |
50 | - ): |
51 | + # And mark expired ArchiveFiles as deleted in the DB now that we've |
52 | + # pruned them and their directories from disk. |
53 | + if reapable_files: |
54 | + archive_file_set.markDeleted(reapable_files) |
55 | + for db_file in reapable_files: |
56 | self.log.debug( |
57 | - "by-hash: Deleted %s for %s in %s" |
58 | - % (sha256, path, container) |
59 | + "by-hash: Marked %s for %s in %s as deleted" |
60 | + % ( |
61 | + db_file.library_file.content.sha256, |
62 | + db_file.path, |
63 | + db_file.container, |
64 | + ) |
65 | ) |
66 | |
67 | def _writeReleaseFile(self, suite, release_data): |
68 | diff --git a/lib/lp/archivepublisher/tests/test_publisher.py b/lib/lp/archivepublisher/tests/test_publisher.py |
69 | index 33fc909..3aed43f 100644 |
70 | --- a/lib/lp/archivepublisher/tests/test_publisher.py |
71 | +++ b/lib/lp/archivepublisher/tests/test_publisher.py |
72 | @@ -17,6 +17,7 @@ from datetime import datetime, timedelta |
73 | from fnmatch import fnmatch |
74 | from functools import partial |
75 | from textwrap import dedent |
76 | +from typing import Optional, Sequence, Tuple |
77 | from unittest import mock |
78 | |
79 | import pytz |
80 | @@ -3066,7 +3067,24 @@ class TestUpdateByHash(TestPublisherBase): |
81 | superseded_at + timedelta(days=BY_HASH_STAY_OF_EXECUTION) |
82 | ) |
83 | |
84 | - def assertHasSuiteFiles(self, patterns, *properties): |
85 | + def assertHasSuiteFiles( |
86 | + self, |
87 | + patterns: Sequence[str], |
88 | + *properties: Tuple[str, Optional[int], Optional[int], Optional[int]] |
89 | + ) -> None: |
90 | + """Assert that the database records certain archive files. |
91 | + |
92 | + :param patterns: A sequence of `fnmatch` patterns for files under |
93 | + `dists/breezy-autotest/` that we're interested in. |
94 | + :param properties: A sequence of (`path`, `created_at`, |
95 | + `superseded_at`, `removed_at`) tuples. Each must match one of |
96 | + the `ArchiveFile` rows matching `patterns`; `path` is relative |
97 | + to `dists/breezy-autotest/`, while the `*_at` properties are |
98 | + either None (indicating that the corresponding `date_*` column |
99 | + should be None) or an index into `self.times` (indicating that |
100 | + the corresponding `date_*` column should equal that time). |
101 | + """ |
102 | + |
103 | def is_interesting(path): |
104 | return any( |
105 | fnmatch(path, "dists/breezy-autotest/%s" % pattern) |
106 | @@ -3081,7 +3099,12 @@ class TestUpdateByHash(TestPublisherBase): |
107 | if is_interesting(archive_file.path) |
108 | ] |
109 | matchers = [] |
110 | - for path, created_at, superseded_at in properties: |
111 | + for path, created_at, superseded_at, removed_at in properties: |
112 | + created_at = None if created_at is None else self.times[created_at] |
113 | + superseded_at = ( |
114 | + None if superseded_at is None else self.times[superseded_at] |
115 | + ) |
116 | + removed_at = None if removed_at is None else self.times[removed_at] |
117 | scheduled_deletion_date_matcher = ( |
118 | self._makeScheduledDeletionDateMatcher(superseded_at) |
119 | ) |
120 | @@ -3091,6 +3114,7 @@ class TestUpdateByHash(TestPublisherBase): |
121 | date_created=Equals(created_at), |
122 | date_superseded=Equals(superseded_at), |
123 | scheduled_deletion_date=scheduled_deletion_date_matcher, |
124 | + date_removed=Equals(removed_at), |
125 | ) |
126 | ) |
127 | self.assertThat(files, MatchesSetwise(*matchers)) |
128 | @@ -3280,8 +3304,8 @@ class TestUpdateByHash(TestPublisherBase): |
129 | flush_database_caches() |
130 | self.assertHasSuiteFiles( |
131 | ("Contents-*", "Release"), |
132 | - ("Contents-i386", self.times[0], None), |
133 | - ("Release", self.times[0], None), |
134 | + ("Contents-i386", 0, None, None), |
135 | + ("Release", 0, None, None), |
136 | ) |
137 | releases = [get_release_contents()] |
138 | self.assertThat( |
139 | @@ -3297,10 +3321,10 @@ class TestUpdateByHash(TestPublisherBase): |
140 | flush_database_caches() |
141 | self.assertHasSuiteFiles( |
142 | ("Contents-*", "Release"), |
143 | - ("Contents-i386", self.times[0], None), |
144 | - ("Contents-hppa", self.times[1], None), |
145 | - ("Release", self.times[0], self.times[1]), |
146 | - ("Release", self.times[1], None), |
147 | + ("Contents-i386", 0, None, None), |
148 | + ("Contents-hppa", 1, None, None), |
149 | + ("Release", 0, 1, None), |
150 | + ("Release", 1, None, None), |
151 | ) |
152 | releases.append(get_release_contents()) |
153 | self.assertThat( |
154 | @@ -3315,11 +3339,11 @@ class TestUpdateByHash(TestPublisherBase): |
155 | flush_database_caches() |
156 | self.assertHasSuiteFiles( |
157 | ("Contents-*", "Release"), |
158 | - ("Contents-i386", self.times[0], self.times[2]), |
159 | - ("Contents-hppa", self.times[1], None), |
160 | - ("Release", self.times[0], self.times[1]), |
161 | - ("Release", self.times[1], self.times[2]), |
162 | - ("Release", self.times[2], None), |
163 | + ("Contents-i386", 0, 2, None), |
164 | + ("Contents-hppa", 1, None, None), |
165 | + ("Release", 0, 1, None), |
166 | + ("Release", 1, 2, None), |
167 | + ("Release", 2, None, None), |
168 | ) |
169 | releases.append(get_release_contents()) |
170 | self.assertThat( |
171 | @@ -3333,12 +3357,12 @@ class TestUpdateByHash(TestPublisherBase): |
172 | flush_database_caches() |
173 | self.assertHasSuiteFiles( |
174 | ("Contents-*", "Release"), |
175 | - ("Contents-i386", self.times[0], self.times[2]), |
176 | - ("Contents-hppa", self.times[1], None), |
177 | - ("Release", self.times[0], self.times[1]), |
178 | - ("Release", self.times[1], self.times[2]), |
179 | - ("Release", self.times[2], self.times[3]), |
180 | - ("Release", self.times[3], None), |
181 | + ("Contents-i386", 0, 2, None), |
182 | + ("Contents-hppa", 1, None, None), |
183 | + ("Release", 0, 1, None), |
184 | + ("Release", 1, 2, None), |
185 | + ("Release", 2, 3, None), |
186 | + ("Release", 3, None, None), |
187 | ) |
188 | releases.append(get_release_contents()) |
189 | self.assertThat( |
190 | @@ -3365,10 +3389,13 @@ class TestUpdateByHash(TestPublisherBase): |
191 | flush_database_caches() |
192 | self.assertHasSuiteFiles( |
193 | ("Contents-*", "Release"), |
194 | - ("Contents-hppa", self.times[1], self.times[4]), |
195 | - ("Release", self.times[2], self.times[3]), |
196 | - ("Release", self.times[3], self.times[4]), |
197 | - ("Release", self.times[4], None), |
198 | + ("Contents-i386", 0, 2, 4), |
199 | + ("Contents-hppa", 1, 4, None), |
200 | + ("Release", 0, 1, 4), |
201 | + ("Release", 1, 2, 4), |
202 | + ("Release", 2, 3, None), |
203 | + ("Release", 3, 4, None), |
204 | + ("Release", 4, None, None), |
205 | ) |
206 | releases.append(get_release_contents()) |
207 | self.assertThat( |
208 | @@ -3393,8 +3420,14 @@ class TestUpdateByHash(TestPublisherBase): |
209 | flush_database_caches() |
210 | self.assertHasSuiteFiles( |
211 | ("Contents-*", "Release"), |
212 | - ("Release", self.times[4], self.times[5]), |
213 | - ("Release", self.times[5], None), |
214 | + ("Contents-i386", 0, 2, 4), |
215 | + ("Contents-hppa", 1, 4, 5), |
216 | + ("Release", 0, 1, 4), |
217 | + ("Release", 1, 2, 4), |
218 | + ("Release", 2, 3, 5), |
219 | + ("Release", 3, 4, 5), |
220 | + ("Release", 4, 5, None), |
221 | + ("Release", 5, None, None), |
222 | ) |
223 | releases.append(get_release_contents()) |
224 | self.assertThat(suite_path("by-hash"), ByHashHasContents(releases[4:])) |
225 | @@ -3430,7 +3463,7 @@ class TestUpdateByHash(TestPublisherBase): |
226 | main_contents.add(f.read()) |
227 | self.assertHasSuiteFiles( |
228 | ("main/source/Sources",), |
229 | - ("main/source/Sources", self.times[0], None), |
230 | + ("main/source/Sources", 0, None, None), |
231 | ) |
232 | |
233 | # Add a source package so that Sources is non-empty. |
234 | @@ -3447,8 +3480,8 @@ class TestUpdateByHash(TestPublisherBase): |
235 | ) |
236 | self.assertHasSuiteFiles( |
237 | ("main/source/Sources",), |
238 | - ("main/source/Sources", self.times[0], self.times[1]), |
239 | - ("main/source/Sources", self.times[1], None), |
240 | + ("main/source/Sources", 0, 1, None), |
241 | + ("main/source/Sources", 1, None, None), |
242 | ) |
243 | |
244 | # Delete the source package so that Sources is empty again. The |
245 | @@ -3464,9 +3497,9 @@ class TestUpdateByHash(TestPublisherBase): |
246 | ) |
247 | self.assertHasSuiteFiles( |
248 | ("main/source/Sources",), |
249 | - ("main/source/Sources", self.times[0], self.times[1]), |
250 | - ("main/source/Sources", self.times[1], self.times[2]), |
251 | - ("main/source/Sources", self.times[2], None), |
252 | + ("main/source/Sources", 0, 1, None), |
253 | + ("main/source/Sources", 1, 2, None), |
254 | + ("main/source/Sources", 2, None, None), |
255 | ) |
256 | |
257 | # Make the first empty Sources file ready to prune. This doesn't |
258 | @@ -3484,8 +3517,9 @@ class TestUpdateByHash(TestPublisherBase): |
259 | ) |
260 | self.assertHasSuiteFiles( |
261 | ("main/source/Sources",), |
262 | - ("main/source/Sources", self.times[1], self.times[2]), |
263 | - ("main/source/Sources", self.times[2], None), |
264 | + ("main/source/Sources", 0, 1, 3), |
265 | + ("main/source/Sources", 1, 2, None), |
266 | + ("main/source/Sources", 2, None, None), |
267 | ) |
268 | |
269 | def setUpPruneableSuite(self): |
270 | @@ -3515,7 +3549,10 @@ class TestUpdateByHash(TestPublisherBase): |
271 | for name in ("Release", "Sources.gz", "Sources.bz2"): |
272 | with open(suite_path("main", "source", name), "rb") as f: |
273 | main_contents.append(f.read()) |
274 | - self.advanceTime(delta=timedelta(hours=6)) |
275 | + # Advance time between each publisher run. We don't advance |
276 | + # time after the last one, since we'll do that below. |
277 | + if sourcename != "baz": |
278 | + self.advanceTime(delta=timedelta(hours=6)) |
279 | transaction.commit() |
280 | |
281 | # We have two condemned sets of index files and one uncondemned set. |
282 | @@ -3523,16 +3560,16 @@ class TestUpdateByHash(TestPublisherBase): |
283 | # that it doesn't change. |
284 | self.assertHasSuiteFiles( |
285 | ("main/source/*", "Release"), |
286 | - ("main/source/Sources.gz", self.times[0], self.times[1]), |
287 | - ("main/source/Sources.gz", self.times[1], self.times[2]), |
288 | - ("main/source/Sources.gz", self.times[2], None), |
289 | - ("main/source/Sources.bz2", self.times[0], self.times[1]), |
290 | - ("main/source/Sources.bz2", self.times[1], self.times[2]), |
291 | - ("main/source/Sources.bz2", self.times[2], None), |
292 | - ("main/source/Release", self.times[0], None), |
293 | - ("Release", self.times[0], self.times[1]), |
294 | - ("Release", self.times[1], self.times[2]), |
295 | - ("Release", self.times[2], None), |
296 | + ("main/source/Sources.gz", 0, 1, None), |
297 | + ("main/source/Sources.gz", 1, 2, None), |
298 | + ("main/source/Sources.gz", 2, None, None), |
299 | + ("main/source/Sources.bz2", 0, 1, None), |
300 | + ("main/source/Sources.bz2", 1, 2, None), |
301 | + ("main/source/Sources.bz2", 2, None, None), |
302 | + ("main/source/Release", 0, None, None), |
303 | + ("Release", 0, 1, None), |
304 | + ("Release", 1, 2, None), |
305 | + ("Release", 2, None, None), |
306 | ) |
307 | self.assertThat(suite_path("by-hash"), ByHashHasContents(top_contents)) |
308 | self.assertThat( |
309 | @@ -3574,13 +3611,16 @@ class TestUpdateByHash(TestPublisherBase): |
310 | # generated. |
311 | self.assertHasSuiteFiles( |
312 | ("main/source/*", "Release"), |
313 | - ("main/source/Sources.gz", self.times[1], self.times[2]), |
314 | - ("main/source/Sources.gz", self.times[2], None), |
315 | - ("main/source/Sources.bz2", self.times[1], self.times[2]), |
316 | - ("main/source/Sources.bz2", self.times[2], None), |
317 | - ("main/source/Release", self.times[0], None), |
318 | - ("Release", self.times[1], self.times[2]), |
319 | - ("Release", self.times[2], None), |
320 | + ("main/source/Sources.gz", 0, 1, 3), |
321 | + ("main/source/Sources.gz", 1, 2, None), |
322 | + ("main/source/Sources.gz", 2, None, None), |
323 | + ("main/source/Sources.bz2", 0, 1, 3), |
324 | + ("main/source/Sources.bz2", 1, 2, None), |
325 | + ("main/source/Sources.bz2", 2, None, None), |
326 | + ("main/source/Release", 0, None, None), |
327 | + ("Release", 0, 1, 3), |
328 | + ("Release", 1, 2, None), |
329 | + ("Release", 2, None, None), |
330 | ) |
331 | self.assertThat(suite_path("by-hash"), ByHashHasContents(top_contents)) |
332 | self.assertThat( |
333 | @@ -3615,13 +3655,16 @@ class TestUpdateByHash(TestPublisherBase): |
334 | # generated. |
335 | self.assertHasSuiteFiles( |
336 | ("main/source/*", "Release"), |
337 | - ("main/source/Sources.gz", self.times[1], self.times[2]), |
338 | - ("main/source/Sources.gz", self.times[2], None), |
339 | - ("main/source/Sources.bz2", self.times[1], self.times[2]), |
340 | - ("main/source/Sources.bz2", self.times[2], None), |
341 | - ("main/source/Release", self.times[0], None), |
342 | - ("Release", self.times[1], self.times[2]), |
343 | - ("Release", self.times[2], None), |
344 | + ("main/source/Sources.gz", 0, 1, 3), |
345 | + ("main/source/Sources.gz", 1, 2, None), |
346 | + ("main/source/Sources.gz", 2, None, None), |
347 | + ("main/source/Sources.bz2", 0, 1, 3), |
348 | + ("main/source/Sources.bz2", 1, 2, None), |
349 | + ("main/source/Sources.bz2", 2, None, None), |
350 | + ("main/source/Release", 0, None, None), |
351 | + ("Release", 0, 1, 3), |
352 | + ("Release", 1, 2, None), |
353 | + ("Release", 2, None, None), |
354 | ) |
355 | self.assertThat(suite_path("by-hash"), ByHashHasContents(top_contents)) |
356 | self.assertThat( |
357 | diff --git a/lib/lp/soyuz/interfaces/archivefile.py b/lib/lp/soyuz/interfaces/archivefile.py |
358 | index 512b07b..6c530b3 100644 |
359 | --- a/lib/lp/soyuz/interfaces/archivefile.py |
360 | +++ b/lib/lp/soyuz/interfaces/archivefile.py |
361 | @@ -72,6 +72,15 @@ class IArchiveFile(Interface): |
362 | readonly=False, |
363 | ) |
364 | |
365 | + date_removed = Datetime( |
366 | + title=_( |
367 | + "The date when this file was entirely removed from the published " |
368 | + "archive." |
369 | + ), |
370 | + required=False, |
371 | + readonly=False, |
372 | + ) |
373 | + |
374 | |
375 | class IArchiveFileSet(Interface): |
376 | """Bulk operations on files in an archive.""" |
377 | @@ -104,6 +113,7 @@ class IArchiveFileSet(Interface): |
378 | path=None, |
379 | sha256=None, |
380 | condemned=None, |
381 | + only_published=False, |
382 | eager_load=False, |
383 | ): |
384 | """Get files in an archive. |
385 | @@ -119,6 +129,8 @@ class IArchiveFileSet(Interface): |
386 | scheduled_deletion_date set; if False, return only files without |
387 | a scheduled_deletion_date set; if None (the default), return |
388 | both. |
389 | + :param only_published: If True, return only files without a |
390 | + `date_removed` set. |
391 | :param eager_load: If True, preload related `LibraryFileAlias` and |
392 | `LibraryFileContent` rows. |
393 | :return: An iterable of matched files. |
394 | @@ -141,10 +153,12 @@ class IArchiveFileSet(Interface): |
395 | :return: An iterable of matched container names. |
396 | """ |
397 | |
398 | - def delete(archive_files): |
399 | - """Delete these archive files. |
400 | + def markDeleted(archive_files): |
401 | + """Mark these archive files as deleted. |
402 | + |
403 | + This does not actually delete the rows from the database; |
404 | + `lp.soyuz.scripts.expire_archive_files` will expire their |
405 | + corresponding `LibraryFileAlias` rows as needed. |
406 | |
407 | :param archive_files: The `IArchiveFile`s to delete. |
408 | - :return: An iterable of (container, path, sha256) for files that |
409 | - were deleted. |
410 | """ |
411 | diff --git a/lib/lp/soyuz/model/archivefile.py b/lib/lp/soyuz/model/archivefile.py |
412 | index b7170dc..8272dd2 100644 |
413 | --- a/lib/lp/soyuz/model/archivefile.py |
414 | +++ b/lib/lp/soyuz/model/archivefile.py |
415 | @@ -12,7 +12,7 @@ import os.path |
416 | import re |
417 | |
418 | import pytz |
419 | -from storm.locals import And, DateTime, Int, Reference, Unicode |
420 | +from storm.locals import DateTime, Int, Reference, Unicode |
421 | from zope.component import getUtility |
422 | from zope.interface import implementer |
423 | |
424 | @@ -20,7 +20,6 @@ from lp.services.database.bulk import load_related |
425 | from lp.services.database.constants import UTC_NOW |
426 | from lp.services.database.decoratedresultset import DecoratedResultSet |
427 | from lp.services.database.interfaces import IPrimaryStore, IStore |
428 | -from lp.services.database.sqlbase import convert_storm_clause_to_string |
429 | from lp.services.database.stormbase import StormBase |
430 | from lp.services.database.stormexpr import RegexpMatch |
431 | from lp.services.librarian.interfaces import ILibraryFileAliasSet |
432 | @@ -71,6 +70,10 @@ class ArchiveFile(StormBase): |
433 | name="scheduled_deletion_date", tzinfo=pytz.UTC, allow_none=True |
434 | ) |
435 | |
436 | + date_removed = DateTime( |
437 | + name="date_removed", tzinfo=pytz.UTC, allow_none=True |
438 | + ) |
439 | + |
440 | def __init__(self, archive, container, path, library_file): |
441 | """Construct an `ArchiveFile`.""" |
442 | super().__init__() |
443 | @@ -81,6 +84,7 @@ class ArchiveFile(StormBase): |
444 | self.date_created = _now() |
445 | self.date_superseded = None |
446 | self.scheduled_deletion_date = None |
447 | + self.date_removed = None |
448 | |
449 | |
450 | @implementer(IArchiveFileSet) |
451 | @@ -116,6 +120,7 @@ class ArchiveFileSet: |
452 | path_parent=None, |
453 | sha256=None, |
454 | condemned=None, |
455 | + only_published=False, |
456 | eager_load=False, |
457 | ): |
458 | """See `IArchiveFileSet`.""" |
459 | @@ -145,6 +150,8 @@ class ArchiveFileSet: |
460 | clauses.append(ArchiveFile.scheduled_deletion_date != None) |
461 | else: |
462 | clauses.append(ArchiveFile.scheduled_deletion_date == None) |
463 | + if only_published: |
464 | + clauses.append(ArchiveFile.date_removed == None) |
465 | archive_files = IStore(ArchiveFile).find(ArchiveFile, *clauses) |
466 | |
467 | def eager_load(rows): |
468 | @@ -185,30 +192,13 @@ class ArchiveFileSet: |
469 | ) |
470 | |
471 | @staticmethod |
472 | - def delete(archive_files): |
473 | + def markDeleted(archive_files): |
474 | """See `IArchiveFileSet`.""" |
475 | - # XXX cjwatson 2016-03-30 bug=322972: Requires manual SQL due to |
476 | - # lack of support for DELETE FROM ... USING ... in Storm. |
477 | - clauses = [ |
478 | + rows = IPrimaryStore(ArchiveFile).find( |
479 | + ArchiveFile, |
480 | ArchiveFile.id.is_in( |
481 | {archive_file.id for archive_file in archive_files} |
482 | ), |
483 | - ArchiveFile.library_file_id == LibraryFileAlias.id, |
484 | - LibraryFileAlias.contentID == LibraryFileContent.id, |
485 | - ] |
486 | - where = convert_storm_clause_to_string(And(*clauses)) |
487 | - return list( |
488 | - IPrimaryStore(ArchiveFile).execute( |
489 | - """ |
490 | - DELETE FROM ArchiveFile |
491 | - USING LibraryFileAlias, LibraryFileContent |
492 | - WHERE """ |
493 | - + where |
494 | - + """ |
495 | - RETURNING |
496 | - ArchiveFile.container, |
497 | - ArchiveFile.path, |
498 | - LibraryFileContent.sha256 |
499 | - """ |
500 | - ) |
501 | + ArchiveFile.date_removed == None, |
502 | ) |
503 | + rows.set(date_removed=_now()) |
504 | diff --git a/lib/lp/soyuz/scripts/expire_archive_files.py b/lib/lp/soyuz/scripts/expire_archive_files.py |
505 | index c1c7e9f..13af377 100755 |
506 | --- a/lib/lp/soyuz/scripts/expire_archive_files.py |
507 | +++ b/lib/lp/soyuz/scripts/expire_archive_files.py |
508 | @@ -13,6 +13,7 @@ from lp.services.librarian.model import LibraryFileAlias |
509 | from lp.services.scripts.base import LaunchpadCronScript |
510 | from lp.soyuz.enums import ArchivePurpose |
511 | from lp.soyuz.model.archive import Archive |
512 | +from lp.soyuz.model.archivefile import ArchiveFile |
513 | from lp.soyuz.model.files import BinaryPackageFile, SourcePackageReleaseFile |
514 | from lp.soyuz.model.publishing import ( |
515 | BinaryPackagePublishingHistory, |
516 | @@ -98,19 +99,67 @@ class ArchiveExpirer(LaunchpadCronScript): |
517 | ), |
518 | ) |
519 | |
520 | - def _determineExpirables(self, num_days, binary): |
521 | + def _determineExpirables(self, num_days, table): |
522 | + """Return expirable LibraryFileAlias IDs based on a given table.""" |
523 | stay_of_execution = Cast("%d days" % num_days, "interval") |
524 | archive_types = (ArchivePurpose.PPA, ArchivePurpose.PARTNER) |
525 | + eligible_clauses = [] |
526 | + denied_clauses = [] |
527 | |
528 | LFA = LibraryFileAlias |
529 | - if binary: |
530 | - xPF = BinaryPackageFile |
531 | - xPPH = BinaryPackagePublishingHistory |
532 | - xPR_join = xPF.binarypackagerelease == xPPH.binarypackagereleaseID |
533 | + if table == BinaryPackageFile: |
534 | + BPF = BinaryPackageFile |
535 | + BPPH = BinaryPackagePublishingHistory |
536 | + lfa_column = BPF.libraryfile_id |
537 | + date_removed_column = BPPH.dateremoved |
538 | + eligible_clauses.extend( |
539 | + [ |
540 | + BPF.libraryfile == LFA.id, |
541 | + BPF.binarypackagerelease == BPPH.binarypackagereleaseID, |
542 | + BPPH.archive == Archive.id, |
543 | + ] |
544 | + ) |
545 | + denied_clauses.extend( |
546 | + [ |
547 | + BPF.binarypackagerelease == BPPH.binarypackagereleaseID, |
548 | + BPPH.archive == Archive.id, |
549 | + ] |
550 | + ) |
551 | + elif table == SourcePackageReleaseFile: |
552 | + SPRF = SourcePackageReleaseFile |
553 | + SPPH = SourcePackagePublishingHistory |
554 | + lfa_column = SPRF.libraryfile_id |
555 | + date_removed_column = SPPH.dateremoved |
556 | + eligible_clauses.extend( |
557 | + [ |
558 | + SPRF.libraryfile == LFA.id, |
559 | + SPRF.sourcepackagerelease == SPPH.sourcepackagereleaseID, |
560 | + SPPH.archive == Archive.id, |
561 | + ] |
562 | + ) |
563 | + denied_clauses.extend( |
564 | + [ |
565 | + SPRF.sourcepackagerelease == SPPH.sourcepackagereleaseID, |
566 | + SPPH.archive == Archive.id, |
567 | + ] |
568 | + ) |
569 | + elif table == ArchiveFile: |
570 | + lfa_column = ArchiveFile.library_file_id |
571 | + date_removed_column = ArchiveFile.date_removed |
572 | + eligible_clauses.extend( |
573 | + [ |
574 | + ArchiveFile.library_file == LFA.id, |
575 | + ArchiveFile.date_removed < UTC_NOW - stay_of_execution, |
576 | + ArchiveFile.archive == Archive.id, |
577 | + ] |
578 | + ) |
579 | + # Unlike BPF and SPRF, ArchiveFile rows don't currently share |
580 | + # LFAs (although they may share LFCs after deduplication). |
581 | + # However, we still use denied_clauses to implement |
582 | + # archive-based expiry exceptions. |
583 | + denied_clauses.append(ArchiveFile.archive == Archive.id) |
584 | else: |
585 | - xPF = SourcePackageReleaseFile |
586 | - xPPH = SourcePackagePublishingHistory |
587 | - xPR_join = xPF.sourcepackagerelease == xPPH.sourcepackagereleaseID |
588 | + raise AssertionError("Unknown table: %r" % table) |
589 | full_archive_name = Concatenate( |
590 | Person.name, Concatenate("/", Archive.name) |
591 | ) |
592 | @@ -121,19 +170,16 @@ class ArchiveExpirer(LaunchpadCronScript): |
593 | eligible = Select( |
594 | LFA.id, |
595 | where=And( |
596 | - xPF.libraryfile == LFA.id, |
597 | - xPR_join, |
598 | - xPPH.dateremoved < UTC_NOW - stay_of_execution, |
599 | - xPPH.archive == Archive.id, |
600 | + *eligible_clauses, |
601 | + date_removed_column < UTC_NOW - stay_of_execution, |
602 | Archive.purpose.is_in(archive_types), |
603 | LFA.expires == None, |
604 | ), |
605 | ) |
606 | denied = Select( |
607 | - xPF.libraryfile_id, |
608 | + lfa_column, |
609 | where=And( |
610 | - xPR_join, |
611 | - xPPH.archive == Archive.id, |
612 | + *denied_clauses, |
613 | Archive.owner == Person.id, |
614 | Or( |
615 | And( |
616 | @@ -148,21 +194,13 @@ class ArchiveExpirer(LaunchpadCronScript): |
617 | Not(full_archive_name.is_in(self.always_expire)), |
618 | ), |
619 | Not(Archive.purpose.is_in(archive_types)), |
620 | - xPPH.dateremoved > UTC_NOW - stay_of_execution, |
621 | - xPPH.dateremoved == None, |
622 | + date_removed_column > UTC_NOW - stay_of_execution, |
623 | + date_removed_column == None, |
624 | ), |
625 | ), |
626 | ) |
627 | return list(self.store.execute(Except(eligible, denied))) |
628 | |
629 | - def determineSourceExpirables(self, num_days): |
630 | - """Return expirable libraryfilealias IDs.""" |
631 | - return self._determineExpirables(num_days=num_days, binary=False) |
632 | - |
633 | - def determineBinaryExpirables(self, num_days): |
634 | - """Return expirable libraryfilealias IDs.""" |
635 | - return self._determineExpirables(num_days=num_days, binary=True) |
636 | - |
637 | def main(self): |
638 | self.logger.info("Starting the PPA binary expiration") |
639 | num_days = self.options.num_days |
640 | @@ -170,8 +208,13 @@ class ArchiveExpirer(LaunchpadCronScript): |
641 | |
642 | self.store = IStore(Archive) |
643 | |
644 | - lfa_ids = self.determineSourceExpirables(num_days) |
645 | - lfa_ids.extend(self.determineBinaryExpirables(num_days)) |
646 | + lfa_ids = [] |
647 | + for table in ( |
648 | + BinaryPackageFile, |
649 | + SourcePackageReleaseFile, |
650 | + ArchiveFile, |
651 | + ): |
652 | + lfa_ids.extend(self._determineExpirables(num_days, table)) |
653 | batch_count = 0 |
654 | batch_limit = 500 |
655 | for id in lfa_ids: |
656 | diff --git a/lib/lp/soyuz/scripts/tests/test_expire_archive_files.py b/lib/lp/soyuz/scripts/tests/test_expire_archive_files.py |
657 | index 12403fd..63c85b4 100644 |
658 | --- a/lib/lp/soyuz/scripts/tests/test_expire_archive_files.py |
659 | +++ b/lib/lp/soyuz/scripts/tests/test_expire_archive_files.py |
660 | @@ -54,7 +54,7 @@ class ArchiveExpiryTestBase(TestCaseWithFactory): |
661 | script.main() |
662 | |
663 | def _setUpExpirablePublications(self, archive=None): |
664 | - """Helper to set up two publications that are both expirable.""" |
665 | + """Helper to set up publications and indexes that are all expirable.""" |
666 | if archive is None: |
667 | archive = self.archive |
668 | pkg5 = self.stp.getPubSource( |
669 | @@ -78,7 +78,21 @@ class ArchiveExpiryTestBase(TestCaseWithFactory): |
670 | self.archive2.distribution.currentseries, pub.pocket, self.archive2 |
671 | ) |
672 | other_binary.dateremoved = self.over_threshold_date |
673 | - return pkg5, pub |
674 | + af = self.factory.makeArchiveFile( |
675 | + archive=archive, |
676 | + container="release:", |
677 | + path="dists/%s/Release" % pkg5.distroseries.getSuite(pkg5.pocket), |
678 | + date_removed=self.over_threshold_date, |
679 | + ) |
680 | + self.factory.makeArchiveFile( |
681 | + archive=self.archive2, |
682 | + container="release:", |
683 | + path="dists/%s/Release" |
684 | + % self.archive2.distribution.currentseries.getSuite(pub.pocket), |
685 | + library_file=af.library_file, |
686 | + date_removed=self.over_threshold_date, |
687 | + ) |
688 | + return pkg5, pub, af |
689 | |
690 | def assertBinaryExpired(self, publication): |
691 | self.assertNotEqual( |
692 | @@ -108,6 +122,18 @@ class ArchiveExpiryTestBase(TestCaseWithFactory): |
693 | "lfa.expires should be None, but it's not.", |
694 | ) |
695 | |
696 | + def assertIndexExpired(self, archive_file): |
697 | + self.assertIsNotNone( |
698 | + archive_file.library_file.expires, |
699 | + "lfa.expires should be set, but it's not.", |
700 | + ) |
701 | + |
702 | + def assertIndexNotExpired(self, archive_file): |
703 | + self.assertIsNone( |
704 | + archive_file.library_file.expires, |
705 | + "lfa.expires should be None, but it's not.", |
706 | + ) |
707 | + |
708 | |
709 | class ArchiveExpiryCommonTests: |
710 | """Common source/binary expiration test cases. |
711 | @@ -126,10 +152,16 @@ class ArchiveExpiryCommonTests: |
712 | [pub] = self.stp.getPubBinaries( |
713 | pub_source=pkg1, dateremoved=None, archive=self.archive |
714 | ) |
715 | + af = self.factory.makeArchiveFile( |
716 | + archive=self.archive, |
717 | + container="release:", |
718 | + path="dists/%s/Release" % pkg1.distroseries.getSuite(pkg1.pocket), |
719 | + ) |
720 | |
721 | self.runScript() |
722 | self.assertSourceNotExpired(pkg1) |
723 | self.assertBinaryNotExpired(pub) |
724 | + self.assertIndexNotExpired(af) |
725 | |
726 | def testNoExpirationWithDateUnderThreshold(self): |
727 | """Test no expiring if dateremoved too recent.""" |
728 | @@ -144,10 +176,17 @@ class ArchiveExpiryCommonTests: |
729 | dateremoved=self.under_threshold_date, |
730 | archive=self.archive, |
731 | ) |
732 | + af = self.factory.makeArchiveFile( |
733 | + archive=self.archive, |
734 | + container="release:", |
735 | + path="dists/%s/Release" % pkg2.distroseries.getSuite(pkg2.pocket), |
736 | + date_removed=self.under_threshold_date, |
737 | + ) |
738 | |
739 | self.runScript() |
740 | self.assertSourceNotExpired(pkg2) |
741 | self.assertBinaryNotExpired(pub) |
742 | + self.assertIndexNotExpired(af) |
743 | |
744 | def testExpirationWithDateOverThreshold(self): |
745 | """Test expiring works if dateremoved old enough.""" |
746 | @@ -162,10 +201,17 @@ class ArchiveExpiryCommonTests: |
747 | dateremoved=self.over_threshold_date, |
748 | archive=self.archive, |
749 | ) |
750 | + af = self.factory.makeArchiveFile( |
751 | + archive=self.archive, |
752 | + container="release:", |
753 | + path="dists/%s/Release" % pkg3.distroseries.getSuite(pkg3.pocket), |
754 | + date_removed=self.over_threshold_date, |
755 | + ) |
756 | |
757 | self.runScript() |
758 | self.assertSourceExpired(pkg3) |
759 | self.assertBinaryExpired(pub) |
760 | + self.assertIndexExpired(af) |
761 | |
762 | def testNoExpirationWithDateOverThresholdAndOtherValidPublication(self): |
763 | """Test no expiry if dateremoved old enough but other publication.""" |
764 | @@ -190,10 +236,23 @@ class ArchiveExpiryCommonTests: |
765 | self.archive2.distribution.currentseries, pub.pocket, self.archive2 |
766 | ) |
767 | other_binary.dateremoved = None |
768 | + af = self.factory.makeArchiveFile( |
769 | + archive=self.archive, |
770 | + container="release:", |
771 | + path="dists/%s/Release" % pkg4.distroseries.getSuite(pkg4.pocket), |
772 | + date_removed=self.over_threshold_date, |
773 | + ) |
774 | + self.factory.makeArchiveFile( |
775 | + archive=self.archive, |
776 | + container="release:", |
777 | + path="dists/%s/Release" % pkg4.distroseries.getSuite(pkg4.pocket), |
778 | + library_file=af.library_file, |
779 | + ) |
780 | |
781 | self.runScript() |
782 | self.assertSourceNotExpired(pkg4) |
783 | self.assertBinaryNotExpired(pub) |
784 | + self.assertIndexNotExpired(af) |
785 | |
786 | def testNoExpirationWithDateOverThresholdAndOtherPubUnderThreshold(self): |
787 | """Test no expiring. |
788 | @@ -222,10 +281,24 @@ class ArchiveExpiryCommonTests: |
789 | self.archive2.distribution.currentseries, pub.pocket, self.archive2 |
790 | ) |
791 | other_binary.dateremoved = self.under_threshold_date |
792 | + af = self.factory.makeArchiveFile( |
793 | + archive=self.archive, |
794 | + container="release:", |
795 | + path="dists/%s/Release" % pkg5.distroseries.getSuite(pkg5.pocket), |
796 | + date_removed=self.over_threshold_date, |
797 | + ) |
798 | + self.factory.makeArchiveFile( |
799 | + archive=self.archive, |
800 | + container="release:", |
801 | + path="dists/%s/Release" % pkg5.distroseries.getSuite(pkg5.pocket), |
802 | + library_file=af.library_file, |
803 | + date_removed=self.under_threshold_date, |
804 | + ) |
805 | |
806 | self.runScript() |
807 | self.assertSourceNotExpired(pkg5) |
808 | self.assertBinaryNotExpired(pub) |
809 | + self.assertIndexNotExpired(af) |
810 | |
811 | def testNoExpirationWithDateOverThresholdAndOtherPubOverThreshold(self): |
812 | """Test expiring works. |
813 | @@ -233,14 +306,15 @@ class ArchiveExpiryCommonTests: |
814 | Test expiring works if dateremoved old enough and other publication |
815 | is over date threshold. |
816 | """ |
817 | - source, binary = self._setUpExpirablePublications() |
818 | + source, binary, index = self._setUpExpirablePublications() |
819 | self.runScript() |
820 | self.assertSourceExpired(source) |
821 | self.assertBinaryExpired(binary) |
822 | + self.assertIndexExpired(index) |
823 | |
824 | def testDryRun(self): |
825 | """Test that when dryrun is specified, nothing is expired.""" |
826 | - source, binary = self._setUpExpirablePublications() |
827 | + source, binary, index = self._setUpExpirablePublications() |
828 | # We have to commit here otherwise when the script aborts it |
829 | # will remove the test publications we just created. |
830 | self.layer.txn.commit() |
831 | @@ -249,16 +323,20 @@ class ArchiveExpiryCommonTests: |
832 | script.main() |
833 | self.assertSourceNotExpired(source) |
834 | self.assertBinaryNotExpired(binary) |
835 | + self.assertIndexNotExpired(index) |
836 | |
837 | def testDoesNotAffectPrimary(self): |
838 | """Test that expiry does not happen for non-PPA publications.""" |
839 | primary_archive = getUtility(IDistributionSet)[ |
840 | "ubuntutest" |
841 | ].main_archive |
842 | - source, binary = self._setUpExpirablePublications(primary_archive) |
843 | + source, binary, index = self._setUpExpirablePublications( |
844 | + primary_archive |
845 | + ) |
846 | self.runScript() |
847 | self.assertSourceNotExpired(source) |
848 | self.assertBinaryNotExpired(binary) |
849 | + self.assertIndexNotExpired(index) |
850 | |
851 | |
852 | class TestPPAExpiry(ArchiveExpiryTestBase, ArchiveExpiryCommonTests): |
853 | @@ -280,7 +358,9 @@ class TestPPAExpiry(ArchiveExpiryTestBase, ArchiveExpiryCommonTests): |
854 | |
855 | def testNeverExpireWorks(self): |
856 | """Test that never-expiring PPA owners are not expired.""" |
857 | - source, binary = self._setUpExpirablePublications(archive=self.archive) |
858 | + source, binary, index = self._setUpExpirablePublications( |
859 | + archive=self.archive |
860 | + ) |
861 | script = self.getScript() |
862 | script.never_expire = [ |
863 | self.archive.owner.name, |
864 | @@ -289,10 +369,13 @@ class TestPPAExpiry(ArchiveExpiryTestBase, ArchiveExpiryCommonTests): |
865 | script.main() |
866 | self.assertSourceNotExpired(source) |
867 | self.assertBinaryNotExpired(binary) |
868 | + self.assertIndexNotExpired(index) |
869 | |
870 | def testNeverExpireArchivesWorks(self): |
871 | """Test that never-expiring individual PPAs are not expired.""" |
872 | - source, binary = self._setUpExpirablePublications(archive=self.archive) |
873 | + source, binary, index = self._setUpExpirablePublications( |
874 | + archive=self.archive |
875 | + ) |
876 | script = self.getScript() |
877 | script.never_expire = [ |
878 | "%s/%s" % (self.archive.owner.name, self.archive.name) |
879 | @@ -301,25 +384,28 @@ class TestPPAExpiry(ArchiveExpiryTestBase, ArchiveExpiryCommonTests): |
880 | script.main() |
881 | self.assertSourceNotExpired(source) |
882 | self.assertBinaryNotExpired(binary) |
883 | + self.assertIndexNotExpired(index) |
884 | |
885 | def testAlwaysExpireWorks(self): |
886 | """Test that always-expiring private PPAs are expired anyway.""" |
887 | p3a = self.factory.makeArchive(private=True) |
888 | - source, binary = self._setUpExpirablePublications(archive=p3a) |
889 | + source, binary, index = self._setUpExpirablePublications(archive=p3a) |
890 | script = self.getScript() |
891 | script.always_expire = ["%s/%s" % (p3a.owner.name, p3a.name)] |
892 | switch_dbuser(self.dbuser) |
893 | script.main() |
894 | self.assertSourceExpired(source) |
895 | self.assertBinaryExpired(binary) |
896 | + self.assertIndexExpired(index) |
897 | |
898 | def testPrivatePPAsNotExpired(self): |
899 | """Test that private PPAs are not expired.""" |
900 | self.archive.private = True |
901 | - source, binary = self._setUpExpirablePublications() |
902 | + source, binary, index = self._setUpExpirablePublications() |
903 | self.runScript() |
904 | self.assertSourceNotExpired(source) |
905 | self.assertBinaryNotExpired(binary) |
906 | + self.assertIndexNotExpired(index) |
907 | |
908 | |
909 | class TestPartnerExpiry(ArchiveExpiryTestBase, ArchiveExpiryCommonTests): |
910 | diff --git a/lib/lp/soyuz/tests/test_archivefile.py b/lib/lp/soyuz/tests/test_archivefile.py |
911 | index 8a7ea20..c4e2a57 100644 |
912 | --- a/lib/lp/soyuz/tests/test_archivefile.py |
913 | +++ b/lib/lp/soyuz/tests/test_archivefile.py |
914 | @@ -251,22 +251,19 @@ class TestArchiveFile(TestCaseWithFactory): |
915 | ), |
916 | ) |
917 | |
918 | - def test_delete(self): |
919 | + def test_markDeleted(self): |
920 | archive = self.factory.makeArchive() |
921 | archive_files = [ |
922 | self.factory.makeArchiveFile(archive=archive) for _ in range(4) |
923 | ] |
924 | - expected_rows = [ |
925 | - ( |
926 | - archive_file.container, |
927 | - archive_file.path, |
928 | - archive_file.library_file.content.sha256, |
929 | - ) |
930 | - for archive_file in archive_files[:2] |
931 | - ] |
932 | archive_file_set = getUtility(IArchiveFileSet) |
933 | - rows = archive_file_set.delete(archive_files[:2]) |
934 | - self.assertContentEqual(expected_rows, rows) |
935 | + archive_file_set.markDeleted(archive_files[:2]) |
936 | + flush_database_caches() |
937 | + self.assertIsNotNone(archive_files[0].date_removed) |
938 | + self.assertIsNotNone(archive_files[1].date_removed) |
939 | + self.assertIsNone(archive_files[2].date_removed) |
940 | + self.assertIsNone(archive_files[3].date_removed) |
941 | self.assertContentEqual( |
942 | - archive_files[2:], archive_file_set.getByArchive(archive) |
943 | + archive_files[2:], |
944 | + archive_file_set.getByArchive(archive, only_published=True), |
945 | ) |
946 | diff --git a/lib/lp/soyuz/xmlrpc/archive.py b/lib/lp/soyuz/xmlrpc/archive.py |
947 | index d5e5f69..f9a35a8 100644 |
948 | --- a/lib/lp/soyuz/xmlrpc/archive.py |
949 | +++ b/lib/lp/soyuz/xmlrpc/archive.py |
950 | @@ -136,6 +136,8 @@ class ArchiveAPI(LaunchpadXMLRPCView): |
951 | if checksum_type != "SHA256": |
952 | return None |
953 | |
954 | + # This implicitly includes a check that the associated LFA isn't |
955 | + # deleted, by way of joining with LFC to check the checksum. |
956 | archive_file = ( |
957 | getUtility(IArchiveFileSet) |
958 | .getByArchive( |
959 | @@ -167,7 +169,7 @@ class ArchiveAPI(LaunchpadXMLRPCView): |
960 | ) |
961 | .one() |
962 | ) |
963 | - if archive_file is None: |
964 | + if archive_file is None or archive_file.library_file.deleted: |
965 | return None |
966 | |
967 | log.info( |
968 | @@ -182,7 +184,7 @@ class ArchiveAPI(LaunchpadXMLRPCView): |
969 | self, archive_reference: str, archive, path: PurePath |
970 | ) -> Optional[str]: |
971 | lfa = archive.getPoolFileByPath(path) |
972 | - if lfa is None: |
973 | + if lfa is None or lfa.deleted: |
974 | return None |
975 | |
976 | log.info( |
977 | @@ -220,20 +222,17 @@ class ArchiveAPI(LaunchpadXMLRPCView): |
978 | return url |
979 | |
980 | # Consider other non-pool files. |
981 | - if path.parts[0] != "pool": |
982 | + elif path.parts[0] != "pool": |
983 | url = self._translatePathNonPool(archive_reference, archive, path) |
984 | if url is not None: |
985 | return url |
986 | - log.info("%s: %s not found", archive_reference, path.as_posix()) |
987 | - raise faults.NotFound( |
988 | - message="'%s' not found in '%s'." |
989 | - % (path.as_posix(), archive_reference) |
990 | - ) |
991 | |
992 | # Consider pool files. |
993 | - url = self._translatePathPool(archive_reference, archive, path) |
994 | - if url is not None: |
995 | - return url |
996 | + else: |
997 | + url = self._translatePathPool(archive_reference, archive, path) |
998 | + if url is not None: |
999 | + return url |
1000 | + |
1001 | log.info("%s: %s not found", archive_reference, path.as_posix()) |
1002 | raise faults.NotFound( |
1003 | message="'%s' not found in '%s'." |
1004 | diff --git a/lib/lp/soyuz/xmlrpc/tests/test_archive.py b/lib/lp/soyuz/xmlrpc/tests/test_archive.py |
1005 | index 1dffd43..5dd81a4 100644 |
1006 | --- a/lib/lp/soyuz/xmlrpc/tests/test_archive.py |
1007 | +++ b/lib/lp/soyuz/xmlrpc/tests/test_archive.py |
1008 | @@ -322,6 +322,26 @@ class TestArchiveAPI(TestCaseWithFactory): |
1009 | path, |
1010 | ) |
1011 | |
1012 | + def test_translatePath_by_hash_checksum_expired(self): |
1013 | + archive = removeSecurityProxy(self.factory.makeArchive()) |
1014 | + archive_file = self.factory.makeArchiveFile( |
1015 | + archive=archive, |
1016 | + container="release:jammy", |
1017 | + path="dists/jammy/InRelease", |
1018 | + ) |
1019 | + path = ( |
1020 | + "dists/jammy/by-hash/SHA256/%s" |
1021 | + % archive_file.library_file.content.sha256 |
1022 | + ) |
1023 | + removeSecurityProxy(archive_file.library_file).content = None |
1024 | + self.assertNotFound( |
1025 | + "translatePath", |
1026 | + "'%s' not found in '%s'." % (path, archive.reference), |
1027 | + "%s: %s not found" % (archive.reference, path), |
1028 | + archive.reference, |
1029 | + path, |
1030 | + ) |
1031 | + |
1032 | def test_translatePath_by_hash_checksum_found(self): |
1033 | archive = removeSecurityProxy(self.factory.makeArchive()) |
1034 | now = get_transaction_timestamp(IStore(archive)) |
1035 | @@ -389,6 +409,19 @@ class TestArchiveAPI(TestCaseWithFactory): |
1036 | "nonexistent/path", |
1037 | ) |
1038 | |
1039 | + def test_translatePath_non_pool_expired(self): |
1040 | + archive = removeSecurityProxy(self.factory.makeArchive()) |
1041 | + path = "dists/focal/InRelease" |
1042 | + archive_file = self.factory.makeArchiveFile(archive=archive, path=path) |
1043 | + removeSecurityProxy(archive_file.library_file).content = None |
1044 | + self.assertNotFound( |
1045 | + "translatePath", |
1046 | + "'%s' not found in '%s'." % (path, archive.reference), |
1047 | + "%s: %s not found" % (archive.reference, path), |
1048 | + archive.reference, |
1049 | + path, |
1050 | + ) |
1051 | + |
1052 | def test_translatePath_non_pool_found(self): |
1053 | archive = removeSecurityProxy(self.factory.makeArchive()) |
1054 | now = get_transaction_timestamp(IStore(archive)) |
1055 | @@ -471,6 +504,36 @@ class TestArchiveAPI(TestCaseWithFactory): |
1056 | path, |
1057 | ) |
1058 | |
1059 | + def test_translatePath_pool_source_expired(self): |
1060 | + archive = removeSecurityProxy(self.factory.makeArchive()) |
1061 | + spph = self.factory.makeSourcePackagePublishingHistory( |
1062 | + archive=archive, |
1063 | + status=PackagePublishingStatus.PUBLISHED, |
1064 | + sourcepackagename="test-package", |
1065 | + component="main", |
1066 | + ) |
1067 | + sprf = self.factory.makeSourcePackageReleaseFile( |
1068 | + sourcepackagerelease=spph.sourcepackagerelease, |
1069 | + library_file=self.factory.makeLibraryFileAlias( |
1070 | + filename="test-package_1.dsc", db_only=True |
1071 | + ), |
1072 | + ) |
1073 | + self.factory.makeSourcePackageReleaseFile( |
1074 | + sourcepackagerelease=spph.sourcepackagerelease, |
1075 | + library_file=self.factory.makeLibraryFileAlias( |
1076 | + filename="test-package_1.tar.xz", db_only=True |
1077 | + ), |
1078 | + ) |
1079 | + removeSecurityProxy(sprf.libraryfile).content = None |
1080 | + path = "pool/main/t/test-package/test-package_1.dsc" |
1081 | + self.assertNotFound( |
1082 | + "translatePath", |
1083 | + "'%s' not found in '%s'." % (path, archive.reference), |
1084 | + "%s: %s not found" % (archive.reference, path), |
1085 | + archive.reference, |
1086 | + path, |
1087 | + ) |
1088 | + |
1089 | def test_translatePath_pool_source_found(self): |
1090 | archive = removeSecurityProxy(self.factory.makeArchive()) |
1091 | spph = self.factory.makeSourcePackagePublishingHistory( |
1092 | @@ -551,6 +614,30 @@ class TestArchiveAPI(TestCaseWithFactory): |
1093 | path, |
1094 | ) |
1095 | |
1096 | + def test_translatePath_pool_binary_expired(self): |
1097 | + archive = removeSecurityProxy(self.factory.makeArchive()) |
1098 | + bpph = self.factory.makeBinaryPackagePublishingHistory( |
1099 | + archive=archive, |
1100 | + status=PackagePublishingStatus.PUBLISHED, |
1101 | + sourcepackagename="test-package", |
1102 | + component="main", |
1103 | + ) |
1104 | + bpf = self.factory.makeBinaryPackageFile( |
1105 | + binarypackagerelease=bpph.binarypackagerelease, |
1106 | + library_file=self.factory.makeLibraryFileAlias( |
1107 | + filename="test-package_1_amd64.deb", db_only=True |
1108 | + ), |
1109 | + ) |
1110 | + removeSecurityProxy(bpf.libraryfile).content = None |
1111 | + path = "pool/main/t/test-package/test-package_1_amd64.deb" |
1112 | + self.assertNotFound( |
1113 | + "translatePath", |
1114 | + "'%s' not found in '%s'." % (path, archive.reference), |
1115 | + "%s: %s not found" % (archive.reference, path), |
1116 | + archive.reference, |
1117 | + path, |
1118 | + ) |
1119 | + |
1120 | def test_translatePath_pool_binary_found(self): |
1121 | archive = removeSecurityProxy(self.factory.makeArchive()) |
1122 | bpph = self.factory.makeBinaryPackagePublishingHistory( |
1123 | diff --git a/lib/lp/testing/factory.py b/lib/lp/testing/factory.py |
1124 | index 62cd634..54ab372 100644 |
1125 | --- a/lib/lp/testing/factory.py |
1126 | +++ b/lib/lp/testing/factory.py |
1127 | @@ -3797,6 +3797,7 @@ class LaunchpadObjectFactory(ObjectFactory): |
1128 | library_file=None, |
1129 | date_superseded=None, |
1130 | scheduled_deletion_date=None, |
1131 | + date_removed=None, |
1132 | ): |
1133 | if archive is None: |
1134 | archive = self.makeArchive() |
1135 | @@ -3818,6 +3819,8 @@ class LaunchpadObjectFactory(ObjectFactory): |
1136 | removeSecurityProxy( |
1137 | archive_file |
1138 | ).scheduled_deletion_date = scheduled_deletion_date |
1139 | + if date_removed is not None: |
1140 | + removeSecurityProxy(archive_file).date_removed = date_removed |
1141 | return archive_file |
1142 | |
1143 | def makeBuilder( |
Looks generally good. But a couple of concerns that I haven't totally thought through yet.
Are there potential races between markDeleted and unscheduleDeletion?
Does something (maybe just ArchiveAPI. _translatePathB yHash?) need to check whether the ArchiveFile has been expired to avoid crashing?