Merge lp:~wgrant/launchpad/renovate-deathrow into lp:launchpad
- renovate-deathrow
- Merge into devel
Proposed by
William Grant
Status: | Work in progress |
---|---|
Proposed branch: | lp:~wgrant/launchpad/renovate-deathrow |
Merge into: | lp:launchpad |
Diff against target: |
816 lines (+369/-260) (has conflicts) 6 files modified
lib/lp/archivepublisher/deathrow.py (+220/-231) lib/lp/archivepublisher/scripts/processdeathrow.py (+18/-2) lib/lp/archivepublisher/tests/deathrow.txt (+58/-22) lib/lp/archivepublisher/tests/test_deathrow.py (+67/-3) lib/lp/services/database/stormexpr.py (+4/-2) lib/lp/soyuz/interfaces/publishing.py (+2/-0) Text conflict in lib/lp/archivepublisher/scripts/processdeathrow.py |
To merge this branch: | bzr merge lp:~wgrant/launchpad/renovate-deathrow |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Launchpad code reviewers | Pending | ||
Review via email: mp+306687@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Unmerged revisions
- 16519. By William Grant
-
Rework SourceDeathRow to use a more efficient query.
- 16518. By William Grant
-
Implement a hopefully correct new deathrow algorithm, and fix the tests to basically cope.
- 16517. By William Grant
-
Split out and start testing reapable file calculation.
- 16516. By William Grant
-
Use component/SPN IDs where possible.
- 16515. By William Grant
-
Start rewriting process-death-row as a LoopTuner. Currently it just removes any file referenced by a removable publication, without any of the useful checks.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lib/lp/archivepublisher/deathrow.py' |
2 | --- lib/lp/archivepublisher/deathrow.py 2012-08-17 11:15:35 +0000 |
3 | +++ lib/lp/archivepublisher/deathrow.py 2016-09-24 06:41:02 +0000 |
4 | @@ -6,31 +6,65 @@ |
5 | """ |
6 | __metaclass__ = type |
7 | |
8 | -import datetime |
9 | import logging |
10 | -import os |
11 | |
12 | -import pytz |
13 | +from storm.expr import ( |
14 | + And, |
15 | + Exists, |
16 | + Not, |
17 | + Select, |
18 | + SQL, |
19 | + ) |
20 | +from storm.locals import ( |
21 | + Int, |
22 | + Unicode, |
23 | + ) |
24 | +import transaction |
25 | |
26 | from lp.archivepublisher.config import getPubConfig |
27 | from lp.archivepublisher.diskpool import DiskPool |
28 | +from lp.registry.model.sourcepackagename import SourcePackageName |
29 | from lp.services.database.constants import UTC_NOW |
30 | -from lp.services.database.sqlbase import sqlvalues |
31 | +from lp.services.database.lpstorm import IStore |
32 | +from lp.services.database.sqlbase import convert_storm_clause_to_string |
33 | +from lp.services.database.stormexpr import Values |
34 | +from lp.services.librarian.model import ( |
35 | + LibraryFileAlias, |
36 | + LibraryFileContent, |
37 | + ) |
38 | +from lp.services.looptuner import TunableLoop |
39 | from lp.soyuz.enums import ArchivePurpose |
40 | from lp.soyuz.interfaces.publishing import ( |
41 | - IBinaryPackagePublishingHistory, |
42 | - inactive_publishing_status, |
43 | - ISourcePackagePublishingHistory, |
44 | MissingSymlinkInPool, |
45 | NotInPool, |
46 | ) |
47 | +from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild |
48 | +from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease |
49 | +from lp.soyuz.model.component import Component |
50 | +from lp.soyuz.model.files import ( |
51 | + BinaryPackageFile, |
52 | + SourcePackageReleaseFile, |
53 | + ) |
54 | from lp.soyuz.model.publishing import ( |
55 | BinaryPackagePublishingHistory, |
56 | SourcePackagePublishingHistory, |
57 | ) |
58 | |
59 | |
60 | -def getDeathRow(archive, log, pool_root_override): |
61 | +# Storm representation of calculateReapableFiles' CTE. Not a real table. |
62 | +class Interesting: |
63 | + __storm_table__ = 'interesting' |
64 | + # Meaningless, but get_cls_info needs it. |
65 | + __storm_primary__ = ( |
66 | + 'component_id', 'sourcepackagename_id', 'filename', 'md5') |
67 | + |
68 | + component_id = Int(name='component') |
69 | + sourcepackagename_id = Int(name='sourcepackagename') |
70 | + filename = Unicode() |
71 | + md5 = Unicode() |
72 | + |
73 | + |
74 | +def getDeathRows(archive, log, pool_root_override, dryrun): |
75 | """Return a Deathrow object for the archive supplied. |
76 | |
77 | :param archive: Use the publisher config for this archive to derive the |
78 | @@ -58,10 +92,12 @@ |
79 | dp = DiskPool(pool_root, pubconf.temproot, diskpool_log) |
80 | |
81 | log.debug("Preparing death row.") |
82 | - return DeathRow(archive, dp, log) |
83 | - |
84 | - |
85 | -class DeathRow: |
86 | + return ( |
87 | + SourceDeathRow(archive, dp, log, dryrun), |
88 | + BinaryDeathRow(archive, dp, log, dryrun)) |
89 | + |
90 | + |
91 | +class DeathRow(TunableLoop): |
92 | """A Distribution Archive Removal Processor. |
93 | |
94 | DeathRow will remove archive files from disk if they are marked for |
95 | @@ -69,227 +105,180 @@ |
96 | by other packages. |
97 | """ |
98 | |
99 | - def __init__(self, archive, diskpool, logger): |
100 | + maximum_chunk_size = 5000 |
101 | + |
102 | + def __init__(self, archive, diskpool, logger, dry_run=False): |
103 | + super(DeathRow, self).__init__(logger) |
104 | self.archive = archive |
105 | self.diskpool = diskpool |
106 | self._removeFile = diskpool.removeFile |
107 | self.logger = logger |
108 | - |
109 | - def reap(self, dry_run=False): |
110 | - """Reap packages that should be removed from the distribution. |
111 | - |
112 | - Looks through all packages that are in condemned states and |
113 | - have scheduleddeletiondate is in the past, try to remove their |
114 | - files from the archive pool (which may be impossible if they are |
115 | - used by other packages which are published), and mark them as |
116 | - removed.""" |
117 | - if dry_run: |
118 | - # Don't actually remove the files if we are dry running |
119 | - def _mockRemoveFile(cn, sn, fn): |
120 | - self.logger.debug("(Not really!) removing %s %s/%s" % |
121 | - (cn, sn, fn)) |
122 | - fullpath = self.diskpool.pathFor(cn, sn, fn) |
123 | - if not os.path.exists(fullpath): |
124 | - raise NotInPool |
125 | - return os.lstat(fullpath).st_size |
126 | - self._removeFile = _mockRemoveFile |
127 | - |
128 | - source_files, binary_files = self._collectCondemned() |
129 | - records = self._tryRemovingFromDisk(source_files, binary_files) |
130 | - self._markPublicationRemoved(records) |
131 | - |
132 | - def _collectCondemned(self): |
133 | - """Return the condemned source and binary publications as a tuple. |
134 | - |
135 | - Return all the `SourcePackagePublishingHistory` and |
136 | - `BinaryPackagePublishingHistory` records that are eligible for |
137 | - removal ('condemned') where the source/binary package that they |
138 | - refer to is not published somewhere else. |
139 | - |
140 | - Both sources and binaries are lists. |
141 | - """ |
142 | - sources = SourcePackagePublishingHistory.select(""" |
143 | - SourcePackagePublishingHistory.archive = %s AND |
144 | - SourcePackagePublishingHistory.scheduleddeletiondate < %s AND |
145 | - SourcePackagePublishingHistory.dateremoved IS NULL AND |
146 | - NOT EXISTS ( |
147 | - SELECT 1 FROM sourcepackagepublishinghistory as spph |
148 | - WHERE |
149 | - SourcePackagePublishingHistory.sourcepackagerelease = |
150 | - spph.sourcepackagerelease AND |
151 | - spph.archive = %s AND |
152 | - spph.status NOT IN %s) |
153 | - """ % sqlvalues(self.archive, UTC_NOW, self.archive, |
154 | - inactive_publishing_status), orderBy="id") |
155 | - self.logger.debug("%d Sources" % sources.count()) |
156 | - |
157 | - binaries = BinaryPackagePublishingHistory.select(""" |
158 | - BinaryPackagePublishingHistory.archive = %s AND |
159 | - BinaryPackagePublishingHistory.scheduleddeletiondate < %s AND |
160 | - BinaryPackagePublishingHistory.dateremoved IS NULL AND |
161 | - NOT EXISTS ( |
162 | - SELECT 1 FROM binarypackagepublishinghistory as bpph |
163 | - WHERE |
164 | - BinaryPackagePublishingHistory.binarypackagerelease = |
165 | - bpph.binarypackagerelease AND |
166 | - bpph.archive = %s AND |
167 | - bpph.status NOT IN %s) |
168 | - """ % sqlvalues(self.archive, UTC_NOW, self.archive, |
169 | - inactive_publishing_status), orderBy="id") |
170 | - self.logger.debug("%d Binaries" % binaries.count()) |
171 | - |
172 | - return (sources, binaries) |
173 | - |
174 | - def canRemove(self, publication_class, filename, file_md5): |
175 | - """Check if given (filename, MD5) can be removed from the pool. |
176 | - |
177 | - Check the archive reference-counter implemented in: |
178 | - `SourcePackagePublishingHistory` or |
179 | - `BinaryPackagePublishingHistory`. |
180 | - |
181 | - Only allow removal of unnecessary files. |
182 | - """ |
183 | - clauses = [] |
184 | - clauseTables = [] |
185 | - |
186 | - if ISourcePackagePublishingHistory.implementedBy( |
187 | - publication_class): |
188 | - clauses.append(""" |
189 | - SourcePackagePublishingHistory.archive = %s AND |
190 | - SourcePackagePublishingHistory.dateremoved is NULL AND |
191 | - SourcePackagePublishingHistory.sourcepackagerelease = |
192 | - SourcePackageReleaseFile.sourcepackagerelease AND |
193 | - SourcePackageReleaseFile.libraryfile = LibraryFileAlias.id |
194 | - """ % sqlvalues(self.archive)) |
195 | - clauseTables.append('SourcePackageReleaseFile') |
196 | - elif IBinaryPackagePublishingHistory.implementedBy( |
197 | - publication_class): |
198 | - clauses.append(""" |
199 | - BinaryPackagePublishingHistory.archive = %s AND |
200 | - BinaryPackagePublishingHistory.dateremoved is NULL AND |
201 | - BinaryPackagePublishingHistory.binarypackagerelease = |
202 | - BinaryPackageFile.binarypackagerelease AND |
203 | - BinaryPackageFile.libraryfile = LibraryFileAlias.id |
204 | - """ % sqlvalues(self.archive)) |
205 | - clauseTables.append('BinaryPackageFile') |
206 | + self.dry_run = dry_run |
207 | + |
208 | + self.bytes = 0 |
209 | + self.start_at = 1 |
210 | + self.store = IStore(SourcePackagePublishingHistory) |
211 | + |
212 | + def isDone(self): |
213 | + return self.findPubs().is_empty() |
214 | + |
215 | + def __call__(self, chunk_size): |
216 | + pubs = list(self.findPubs()[:chunk_size]) |
217 | + files = self.calculateReapableFiles(pubs) |
218 | + |
219 | + # The remaining files aren't referenced by other publications, |
220 | + # so remove them from disk. |
221 | + for comp, spn, filename in files: |
222 | + if not self.dry_run: |
223 | + try: |
224 | + self.bytes += self.diskpool.removeFile(comp, spn, filename) |
225 | + except NotInPool as info: |
226 | + # It's safe for us to let this slide because it means that |
227 | + # the file is already gone. |
228 | + self.logger.debug(str(info)) |
229 | + except MissingSymlinkInPool as info: |
230 | + # This one is a little more worrying, because an expected |
231 | + # symlink has vanished from the pool/ (could be a code |
232 | + # mistake) but there is nothing we can do about it at this |
233 | + # point. |
234 | + self.logger.warn(str(info)) |
235 | + else: |
236 | + self.logger.debug("Not removing %s (dry run)", filename) |
237 | + for pub in pubs: |
238 | + pub.dateremoved = UTC_NOW |
239 | + self.start_at = pubs[-1].id + 1 |
240 | + |
241 | + if not self.dry_run: |
242 | + transaction.commit() |
243 | else: |
244 | - raise AssertionError("%r is not supported." % publication_class) |
245 | - |
246 | - clauses.append(""" |
247 | - LibraryFileAlias.content = LibraryFileContent.id AND |
248 | - LibraryFileAlias.filename = %s AND |
249 | - LibraryFileContent.md5 = %s |
250 | - """ % sqlvalues(filename, file_md5)) |
251 | - clauseTables.extend( |
252 | - ['LibraryFileAlias', 'LibraryFileContent']) |
253 | - |
254 | - all_publications = publication_class.select( |
255 | - " AND ".join(clauses), clauseTables=clauseTables) |
256 | - |
257 | - right_now = datetime.datetime.now(pytz.timezone('UTC')) |
258 | - for pub in all_publications: |
259 | - # Deny removal if any reference is still active. |
260 | - if pub.status not in inactive_publishing_status: |
261 | - return False |
262 | - # Deny removal if any reference wasn't dominated yet. |
263 | - if pub.scheduleddeletiondate is None: |
264 | - return False |
265 | - # Deny removal if any reference is still in 'quarantine'. |
266 | - if pub.scheduleddeletiondate > right_now: |
267 | - return False |
268 | - |
269 | - return True |
270 | - |
271 | - def _tryRemovingFromDisk(self, condemned_source_files, |
272 | - condemned_binary_files): |
273 | - """Take the list of publishing records provided and unpublish them. |
274 | - |
275 | - You should only pass in entries you want to be unpublished because |
276 | - this will result in the files being removed if they're not otherwise |
277 | - in use. |
278 | - """ |
279 | - bytes = 0 |
280 | - condemned_files = set() |
281 | - condemned_records = set() |
282 | - considered_files = set() |
283 | - details = {} |
284 | - |
285 | - def checkPubRecord(pub_record, publication_class): |
286 | - """Check if the publishing record can be removed. |
287 | - |
288 | - It can only be removed if all files in its context are not |
289 | - referred to any other 'published' publishing records. |
290 | - |
291 | - See `canRemove` for more information. |
292 | - """ |
293 | - for pub_file in pub_record.files: |
294 | - filename = pub_file.libraryfilealiasfilename |
295 | - file_md5 = pub_file.libraryfilealias.content.md5 |
296 | - |
297 | - self.logger.debug("Checking %s (%s)" % (filename, file_md5)) |
298 | - |
299 | - # Calculating the file path in pool. |
300 | - pub_file_details = ( |
301 | - pub_file.libraryfilealiasfilename, |
302 | - pub_file.sourcepackagename, |
303 | - pub_file.componentname, |
304 | - ) |
305 | - file_path = self.diskpool.pathFor(*pub_file_details) |
306 | - |
307 | - # Check if the LibraryFileAlias in question was already |
308 | - # verified. If the verification was already made and the |
309 | - # file is condemned queue the publishing record for removal |
310 | - # otherwise just continue the iteration. |
311 | - if (filename, file_md5) in considered_files: |
312 | - self.logger.debug("Already verified.") |
313 | - if file_path in condemned_files: |
314 | - condemned_records.add(pub_file.publishing_record) |
315 | - continue |
316 | - considered_files.add((filename, file_md5)) |
317 | - |
318 | - # Check if the removal is allowed, if not continue. |
319 | - if not self.canRemove(publication_class, filename, file_md5): |
320 | - self.logger.debug("Cannot remove.") |
321 | - continue |
322 | - |
323 | - # Update local containers, in preparation to file removal. |
324 | - details.setdefault(file_path, pub_file_details) |
325 | - condemned_files.add(file_path) |
326 | - condemned_records.add(pub_file.publishing_record) |
327 | - |
328 | - # Check source and binary publishing records. |
329 | - for pub_record in condemned_source_files: |
330 | - checkPubRecord(pub_record, SourcePackagePublishingHistory) |
331 | - for pub_record in condemned_binary_files: |
332 | - checkPubRecord(pub_record, BinaryPackagePublishingHistory) |
333 | - |
334 | - self.logger.info( |
335 | - "Removing %s files marked for reaping" % len(condemned_files)) |
336 | - |
337 | - for condemned_file in sorted(condemned_files, reverse=True): |
338 | - file_name, source_name, component_name = details[condemned_file] |
339 | - try: |
340 | - bytes += self._removeFile( |
341 | - component_name, source_name, file_name) |
342 | - except NotInPool as info: |
343 | - # It's safe for us to let this slide because it means that |
344 | - # the file is already gone. |
345 | - self.logger.debug(str(info)) |
346 | - except MissingSymlinkInPool as info: |
347 | - # This one is a little more worrying, because an expected |
348 | - # symlink has vanished from the pool/ (could be a code |
349 | - # mistake) but there is nothing we can do about it at this |
350 | - # point. |
351 | - self.logger.warn(str(info)) |
352 | - |
353 | - self.logger.info("Total bytes freed: %s" % bytes) |
354 | - |
355 | - return condemned_records |
356 | - |
357 | - def _markPublicationRemoved(self, condemned_records): |
358 | - # Now that the os.remove() calls have been made, simply let every |
359 | - # now out-of-date record be marked as removed. |
360 | - self.logger.debug("Marking %s condemned packages as removed." % |
361 | - len(condemned_records)) |
362 | - for record in condemned_records: |
363 | - record.dateremoved = UTC_NOW |
364 | + transaction.abort() |
365 | + |
366 | + def calculateReapableFiles(self, pubs): |
367 | + files = {} |
368 | + |
369 | + # Find all the files that we might remove because of these |
370 | + # publications. |
371 | + for pub in pubs: |
372 | + spr = self.getSourcePackageRelease(pub) |
373 | + for file in self.getFiles(pub): |
374 | + key = ( |
375 | + pub.componentID, spr.sourcepackagenameID, |
376 | + file.libraryfile.filename, |
377 | + file.libraryfile.content.md5) |
378 | + if key not in files: |
379 | + files[key] = set() |
380 | + |
381 | + interesting_cols = [ |
382 | + ('component', 'integer'), |
383 | + ('sourcepackagename', 'integer'), |
384 | + ('filename', 'text'), |
385 | + ('md5', 'text'), |
386 | + ] |
387 | + interesting_values = Values(None, interesting_cols, files.keys()) |
388 | + |
389 | + # Exclude any files that are referenced by other unremoved |
390 | + # publications. They need to stay on disk now and will be |
391 | + # removed with their final publication later. |
392 | + # We find unremoved publications that aren't in the current |
393 | + # batch, with files with the same |
394 | + # (archive, component, sourcepackagename, filename, md5). |
395 | + has_other_pubs = self.store.with_( |
396 | + SQL('interesting(component, sourcepackagename, filename, md5) AS ' |
397 | + + convert_storm_clause_to_string(interesting_values)) |
398 | + ).find( |
399 | + (Interesting.component_id, Interesting.sourcepackagename_id, |
400 | + Interesting.filename, Interesting.md5), |
401 | + self.getOtherPubFilter(pubs), |
402 | + ).config(distinct=True) |
403 | + for key in has_other_pubs: |
404 | + del files[key] |
405 | + |
406 | + return [ |
407 | + (Component.get(c).name, SourcePackageName.get(s).name, f) |
408 | + for (c, s, f, m) in files] |
409 | + |
410 | + |
411 | +class SourceDeathRow(DeathRow): |
412 | + |
413 | + def findPubs(self): |
414 | + return self.store.find( |
415 | + SourcePackagePublishingHistory, |
416 | + SourcePackagePublishingHistory.id >= self.start_at, |
417 | + SourcePackagePublishingHistory.archive == self.archive, |
418 | + SourcePackagePublishingHistory.scheduleddeletiondate < UTC_NOW, |
419 | + SourcePackagePublishingHistory.dateremoved == None) |
420 | + |
421 | + def getOtherPubFilter(self, pubs): |
422 | + spr_select = Select( |
423 | + SourcePackageReleaseFile.sourcepackagereleaseID, |
424 | + tables=[ |
425 | + LibraryFileAlias, LibraryFileContent, |
426 | + SourcePackageReleaseFile], |
427 | + where=And( |
428 | + LibraryFileAlias.filename == Interesting.filename, |
429 | + LibraryFileContent.md5 == Interesting.md5, |
430 | + LibraryFileContent.id == LibraryFileAlias.contentID, |
431 | + LibraryFileAlias.id == SourcePackageReleaseFile.libraryfileID)) |
432 | + return Exists(Select( |
433 | + 1, tables=[SourcePackagePublishingHistory], |
434 | + where=And( |
435 | + SourcePackagePublishingHistory.sourcepackagereleaseID.is_in( |
436 | + spr_select), |
437 | + # Filter conditions |
438 | + SourcePackagePublishingHistory.componentID == |
439 | + Interesting.component_id, |
440 | + SourcePackagePublishingHistory.sourcepackagenameID == |
441 | + Interesting.sourcepackagename_id, |
442 | + SourcePackagePublishingHistory.archive == self.archive, |
443 | + SourcePackagePublishingHistory.dateremoved == None, |
444 | + Not( |
445 | + SourcePackagePublishingHistory.id.is_in( |
446 | + [pub.id for pub in pubs]))))) |
447 | + |
448 | + def getSourcePackageRelease(self, pub): |
449 | + return pub.sourcepackagerelease |
450 | + |
451 | + def getFiles(self, pub): |
452 | + return pub.sourcepackagerelease.files |
453 | + |
454 | + |
455 | +class BinaryDeathRow(DeathRow): |
456 | + |
457 | + def findPubs(self): |
458 | + return self.store.find( |
459 | + BinaryPackagePublishingHistory, |
460 | + BinaryPackagePublishingHistory.id >= self.start_at, |
461 | + BinaryPackagePublishingHistory.archive == self.archive, |
462 | + BinaryPackagePublishingHistory.scheduleddeletiondate < UTC_NOW, |
463 | + BinaryPackagePublishingHistory.dateremoved == None) |
464 | + |
465 | + def getOtherPubQuery(self, pubs): |
466 | + origin = [ |
467 | + BinaryPackagePublishingHistory, BinaryPackageRelease, |
468 | + BinaryPackageBuild, BinaryPackageFile] |
469 | + filter = [ |
470 | + # Join conditions |
471 | + BinaryPackageRelease.id == |
472 | + BinaryPackagePublishingHistory.binarypackagereleaseID, |
473 | + BinaryPackageBuild.id == BinaryPackageRelease.buildID, |
474 | + BinaryPackageFile.binarypackagereleaseID == |
475 | + BinaryPackageRelease.id, |
476 | + LibraryFileAlias.id == BinaryPackageFile.libraryfileID, |
477 | + # Filter conditions |
478 | + BinaryPackagePublishingHistory.componentID == |
479 | + Interesting.component_id, |
480 | + BinaryPackageBuild.source_package_name_id == |
481 | + Interesting.sourcepackagename_id, |
482 | + BinaryPackagePublishingHistory.archive == self.archive, |
483 | + BinaryPackagePublishingHistory.dateremoved == None, |
484 | + Not( |
485 | + BinaryPackagePublishingHistory.id.is_in( |
486 | + [pub.id for pub in pubs])), |
487 | + ] |
488 | + return origin, filter |
489 | + |
490 | + def getSourcePackageRelease(self, pub): |
491 | + return pub.binarypackagerelease.build.source_package_release |
492 | + |
493 | + def getFiles(self, pub): |
494 | + return pub.binarypackagerelease.files |
495 | |
496 | === modified file 'lib/lp/archivepublisher/scripts/processdeathrow.py' |
497 | --- lib/lp/archivepublisher/scripts/processdeathrow.py 2016-09-19 11:19:16 +0000 |
498 | +++ lib/lp/archivepublisher/scripts/processdeathrow.py 2016-09-24 06:41:02 +0000 |
499 | @@ -15,6 +15,7 @@ |
500 | ] |
501 | |
502 | |
503 | +<<<<<<< TREE |
504 | from lp.archivepublisher.deathrow import getDeathRow |
505 | from lp.archivepublisher.scripts.base import PublisherScript |
506 | from lp.services.limitedlist import LimitedList |
507 | @@ -25,6 +26,16 @@ |
508 | |
509 | |
510 | class DeathRowProcessor(PublisherScript): |
511 | +======= |
512 | +from zope.component import getUtility |
513 | + |
514 | +from lp.archivepublisher.deathrow import getDeathRows |
515 | +from lp.registry.interfaces.distribution import IDistributionSet |
516 | +from lp.services.scripts.base import LaunchpadCronScript |
517 | + |
518 | + |
519 | +class DeathRowProcessor(LaunchpadCronScript): |
520 | +>>>>>>> MERGE-SOURCE |
521 | |
522 | def add_my_options(self): |
523 | self.parser.add_option( |
524 | @@ -59,10 +70,11 @@ |
525 | the operation just executed, i.e, commits successful runs and aborts |
526 | runs with errors. It also respects 'dry-run' command-line option. |
527 | """ |
528 | - death_row = getDeathRow( |
529 | - archive, self.logger, self.options.pool_root) |
530 | + sdr, bdr = getDeathRows( |
531 | + archive, self.logger, self.options.pool_root, self.options.dry_run) |
532 | self.logger.debug( |
533 | "Unpublishing death row for %s." % archive.displayname) |
534 | +<<<<<<< TREE |
535 | set_request_started( |
536 | request_statements=LimitedList(10000), |
537 | txn=self.txn, enable_timeout=False) |
538 | @@ -81,3 +93,7 @@ |
539 | self.txn.commit() |
540 | finally: |
541 | clear_request_started() |
542 | +======= |
543 | + sdr.run() |
544 | + bdr.run() |
545 | +>>>>>>> MERGE-SOURCE |
546 | |
547 | === modified file 'lib/lp/archivepublisher/tests/deathrow.txt' |
548 | --- lib/lp/archivepublisher/tests/deathrow.txt 2011-04-05 08:28:30 +0000 |
549 | +++ lib/lp/archivepublisher/tests/deathrow.txt 2016-09-24 06:41:02 +0000 |
550 | @@ -16,12 +16,20 @@ |
551 | The no-operation use case, reflects the sampledata status. |
552 | |
553 | >>> from lp.services.log.logger import FakeLogger |
554 | - >>> from lp.archivepublisher.deathrow import DeathRow |
555 | + >>> from lp.archivepublisher.deathrow import ( |
556 | + ... BinaryDeathRow, |
557 | + ... SourceDeathRow, |
558 | + ... ) |
559 | >>> from lp.archivepublisher.diskpool import DiskPool |
560 | |
561 | - >>> disk_pool = DiskPool(pool_path, temp_path, FakeLogger()) |
562 | - >>> death_row = DeathRow(ubuntu.main_archive, disk_pool, FakeLogger()) |
563 | - >>> death_row.reap(dry_run=True) |
564 | + >>> logger = FakeLogger() |
565 | + >>> disk_pool = DiskPool(pool_path, temp_path, logger) |
566 | + >>> sdr = SourceDeathRow( |
567 | + ... ubuntu.main_archive, disk_pool, logger, dry_run=True) |
568 | + >>> sdr.run() |
569 | + >>> bdr = BinaryDeathRow( |
570 | + ... ubuntu.main_archive, disk_pool, logger, dry_run=True) |
571 | + >>> bdr.run() |
572 | DEBUG 0 Sources |
573 | DEBUG 0 Binaries |
574 | INFO Removing 0 files marked for reaping |
575 | @@ -237,9 +245,11 @@ |
576 | |
577 | Run DeathRow against the current 'removable' context. |
578 | |
579 | - >>> disk_pool = DiskPool(pool_path, temp_path, FakeLogger()) |
580 | - >>> death_row = DeathRow(ubuntu.main_archive, disk_pool, FakeLogger()) |
581 | - >>> death_row.reap() |
582 | + >>> disk_pool = DiskPool(pool_path, temp_path, logger) |
583 | + >>> sdr = SourceDeathRow(ubuntu.main_archive, disk_pool, logger) |
584 | + >>> sdr.run() |
585 | + >>> bdr = BinaryDeathRow(ubuntu.main_archive, disk_pool, logger) |
586 | + >>> bdr.run() |
587 | DEBUG 4 Sources |
588 | DEBUG 3 Binaries |
589 | ... |
590 | @@ -295,15 +305,15 @@ |
591 | |
592 | The dependent publications were processed as expected; only the one |
593 | with 'scheduleddeletiondate' set to the past was removed, the one with |
594 | -future timestamp and the published one were kept. No binary |
595 | -publications was removed (see more below). |
596 | +future timestamp and the published one were kept. One binary publication |
597 | +was removed, but the file remains on disk for the two active publications. |
598 | |
599 | >>> for pub in dependent_records: |
600 | ... check_removed(pub) |
601 | stuck 666 in hoary SUPERSEDED True |
602 | stuck 667 in hoary SUPERSEDED False |
603 | stuck 668 in hoary PUBLISHED False |
604 | - stuck-bin 666 in hoary i386 SUPERSEDED False |
605 | + stuck-bin 666 in hoary i386 SUPERSEDED True |
606 | stuck-bin 666 in hoary i386 SUPERSEDED False |
607 | stuck-bin 666 in hoary i386 PUBLISHED False |
608 | |
609 | @@ -333,9 +343,11 @@ |
610 | |
611 | Now DeathRow considers 'stuck-bin' publications. |
612 | |
613 | - >>> disk_pool = DiskPool(pool_path, temp_path, FakeLogger()) |
614 | - >>> death_row = DeathRow(ubuntu.main_archive, disk_pool, FakeLogger()) |
615 | - >>> death_row.reap() |
616 | + >>> disk_pool = DiskPool(pool_path, temp_path, logger) |
617 | + >>> sdr = SourceDeathRow(ubuntu.main_archive, disk_pool, logger) |
618 | + >>> sdr.run() |
619 | + >>> bdr = BinaryDeathRow(ubuntu.main_archive, disk_pool, logger) |
620 | + >>> bdr.run() |
621 | DEBUG 0 Sources |
622 | DEBUG 2 Binaries |
623 | DEBUG Checking stuck-bin_666_i386.deb (21c2e59531c8710156d34a3c30ac81d5) |
624 | @@ -346,6 +358,34 @@ |
625 | INFO Total bytes freed: 0 |
626 | DEBUG Marking 0 condemned packages as removed. |
627 | |
628 | +The file is still in the repository, but now two of the three |
629 | +publications are removed. |
630 | + |
631 | + >>> dependent_binaries = ( |
632 | + ... published_binary, |
633 | + ... postponed_binary, |
634 | + ... removed_binary, |
635 | + ... ) |
636 | + |
637 | + >>> check_pool_files() |
638 | + deleted-bin_666_i386.deb: REMOVED |
639 | + deleted_666.dsc: REMOVED |
640 | + obsolete-bin_666_i386.deb: REMOVED |
641 | + obsolete_666.dsc: REMOVED |
642 | + stuck-bin_666_i386.deb: OK |
643 | + shared_1.0.tar.gz: OK |
644 | + stuck_666.dsc: REMOVED |
645 | + stuck_667.dsc: OK |
646 | + stuck_668.dsc: OK |
647 | + superseded-bin_666_i386.deb: REMOVED |
648 | + superseded_666.dsc: REMOVED |
649 | + |
650 | + >>> for pub in dependent_binaries: |
651 | + ... check_removed(pub) |
652 | + stuck-bin 666 in hoary i386 SUPERSEDED True |
653 | + stuck-bin 666 in hoary i386 SUPERSEDED False |
654 | + stuck-bin 666 in hoary i386 SUPERSEDED True |
655 | + |
656 | After being considered for removal, DeathRow realized that this binary |
657 | could not be removed because there is still a publishing record |
658 | imposing quarantine on it. Once the quarantine is lifted, by setting a |
659 | @@ -357,9 +397,11 @@ |
660 | That done, the publication and its files are free to be removed in a |
661 | single pass. |
662 | |
663 | - >>> disk_pool = DiskPool(pool_path, temp_path, FakeLogger()) |
664 | - >>> death_row = DeathRow(ubuntu.main_archive, disk_pool, FakeLogger()) |
665 | - >>> death_row.reap() |
666 | + >>> disk_pool = DiskPool(pool_path, temp_path, logger) |
667 | + >>> sdr = SourceDeathRow(ubuntu.main_archive, disk_pool, logger) |
668 | + >>> sdr.run() |
669 | + >>> bdr = BinaryDeathRow(ubuntu.main_archive, disk_pool, logger) |
670 | + >>> bdr.run() |
671 | DEBUG 0 Sources |
672 | DEBUG 3 Binaries |
673 | DEBUG Checking stuck-bin_666_i386.deb (21c2e59531c8710156d34a3c30ac81d5) |
674 | @@ -374,12 +416,6 @@ |
675 | |
676 | The file was removed from the repository. |
677 | |
678 | - >>> dependent_binaries = ( |
679 | - ... published_binary, |
680 | - ... postponed_binary, |
681 | - ... removed_binary, |
682 | - ... ) |
683 | - |
684 | >>> check_pool_files() |
685 | deleted-bin_666_i386.deb: REMOVED |
686 | deleted_666.dsc: REMOVED |
687 | |
688 | === modified file 'lib/lp/archivepublisher/tests/test_deathrow.py' |
689 | --- lib/lp/archivepublisher/tests/test_deathrow.py 2012-01-01 02:58:52 +0000 |
690 | +++ lib/lp/archivepublisher/tests/test_deathrow.py 2016-09-24 06:41:02 +0000 |
691 | @@ -12,17 +12,22 @@ |
692 | |
693 | from zope.component import getUtility |
694 | |
695 | -from lp.archivepublisher.deathrow import DeathRow |
696 | +from lp.archivepublisher.deathrow import ( |
697 | + DeathRow, |
698 | + SourceDeathRow, |
699 | + ) |
700 | from lp.archivepublisher.diskpool import DiskPool |
701 | from lp.registry.interfaces.distribution import IDistributionSet |
702 | +from lp.services.database.constants import UTC_NOW |
703 | from lp.services.log.logger import BufferLogger |
704 | +from lp.soyuz.interfaces.archive import ArchivePurpose |
705 | from lp.soyuz.interfaces.component import IComponentSet |
706 | from lp.soyuz.tests.test_publishing import SoyuzTestPublisher |
707 | -from lp.testing import TestCase |
708 | +from lp.testing import TestCaseWithFactory |
709 | from lp.testing.layers import LaunchpadZopelessLayer |
710 | |
711 | |
712 | -class TestDeathRow(TestCase): |
713 | +class TestDeathRow(TestCaseWithFactory): |
714 | |
715 | layer = LaunchpadZopelessLayer |
716 | |
717 | @@ -143,3 +148,62 @@ |
718 | |
719 | self.assertDoesNotExist(main_dsc_path) |
720 | self.assertDoesNotExist(universe_dsc_path) |
721 | + |
722 | + def test_calculateReapableFiles_basic(self): |
723 | + # An isolated publication's files are all removable. |
724 | + spph = self.factory.makeSourcePackagePublishingHistory( |
725 | + archive=self.factory.makeArchive(), sourcepackagename=u'something') |
726 | + for fn in ('something_1.orig.tar.gz', 'something_1.dsc'): |
727 | + self.factory.makeSourcePackageReleaseFile( |
728 | + sourcepackagerelease=spph.sourcepackagerelease, |
729 | + library_file=self.factory.makeLibraryFileAlias( |
730 | + filename=fn)) |
731 | + |
732 | + dr = SourceDeathRow(spph.archive, DiskPool, BufferLogger()) |
733 | + self.assertContentEqual( |
734 | + [(u'main', u'something', u'something_1.orig.tar.gz'), |
735 | + (u'main', u'something', u'something_1.dsc')], |
736 | + dr.calculateReapableFiles([spph])) |
737 | + |
738 | + def test_calculateReapableFiles_cross_component(self): |
739 | + # Files can be removed from a component once all relevant |
740 | + # publications in that component are gone. |
741 | + a = self.factory.makeArchive(purpose=ArchivePurpose.PRIMARY) |
742 | + shared_file = self.factory.makeLibraryFileAlias( |
743 | + filename=u'something_1.orig.tar.gz') |
744 | + sources = [] |
745 | + for comp in [u'main', u'universe', u'main', u'main']: |
746 | + spph = self.factory.makeSourcePackagePublishingHistory( |
747 | + archive=a, component=comp, sourcepackagename=u'something') |
748 | + spph.sourcepackagerelease.addFile(shared_file) |
749 | + sources.append(spph) |
750 | + |
751 | + log = BufferLogger() |
752 | + dr = SourceDeathRow(a, DiskPool, log) |
753 | + |
754 | + # Processing all four pubs simultaneously permits removal of all |
755 | + # their files. |
756 | + self.assertContentEqual( |
757 | + [(u'main', u'something', u'something_1.orig.tar.gz'), |
758 | + (u'universe', u'something', u'something_1.orig.tar.gz')], |
759 | + dr.calculateReapableFiles(sources)) |
760 | + |
761 | + # When processing just the first two sources, only the universe |
762 | + # file can be removed. The main file is kept alive by the main |
763 | + # publications in the next batch. |
764 | + self.assertContentEqual( |
765 | + [(u'universe', u'something', u'something_1.orig.tar.gz')], |
766 | + dr.calculateReapableFiles(sources[:2])) |
767 | + |
768 | + # The second batch (two main publications) can remove nothing, |
769 | + # since all files are kept alive by the main pub in the first |
770 | + # batch. |
771 | + self.assertContentEqual([], dr.calculateReapableFiles(sources[2:])) |
772 | + |
773 | + # With the first batch removed, the second batch can eliminate |
774 | + # the main file. |
775 | + for pub in sources[:2]: |
776 | + pub.dateremoved = UTC_NOW |
777 | + self.assertContentEqual( |
778 | + [(u'main', u'something', u'something_1.orig.tar.gz')], |
779 | + dr.calculateReapableFiles(sources[2:])) |
780 | |
781 | === modified file 'lib/lp/services/database/stormexpr.py' |
782 | --- lib/lp/services/database/stormexpr.py 2016-06-10 22:02:37 +0000 |
783 | +++ lib/lp/services/database/stormexpr.py 2016-09-24 06:41:02 +0000 |
784 | @@ -104,8 +104,10 @@ |
785 | "%s::%s" % (compile(value, state), type) |
786 | for value, type in zip(expr.values[0], col_types)) |
787 | rows = [first_row] + [compile(value, state) for value in expr.values[1:]] |
788 | - return "(VALUES (%s)) AS %s(%s)" % ( |
789 | - "), (".join(rows), expr.name, ', '.join(col_names)) |
790 | + s = "(VALUES (%s))" % "), (".join(rows) |
791 | + if expr.name is not None: |
792 | + s += " AS %s(%s)" % (expr.name, ', '.join(col_names)) |
793 | + return s |
794 | |
795 | |
796 | class ColumnSelect(Expr): |
797 | |
798 | === modified file 'lib/lp/soyuz/interfaces/publishing.py' |
799 | --- lib/lp/soyuz/interfaces/publishing.py 2015-11-08 01:09:46 +0000 |
800 | +++ lib/lp/soyuz/interfaces/publishing.py 2016-09-24 06:41:02 +0000 |
801 | @@ -290,6 +290,7 @@ |
802 | required=False, readonly=False, |
803 | ), |
804 | exported_as="distro_series") |
805 | + componentID = Attribute("DB ID for the component") |
806 | component = Int( |
807 | title=_('The component being published into'), |
808 | required=False, readonly=False, |
809 | @@ -680,6 +681,7 @@ |
810 | ), |
811 | exported_as="distro_arch_series") |
812 | distroseries = Attribute("The distroseries being published into") |
813 | + componentID = Attribute("DB ID for the component") |
814 | component = Int( |
815 | title=_('The component being published into'), |
816 | required=False, readonly=False, |