Merge lp:~wgrant/launchpad/renovate-deathrow into lp:launchpad

Proposed by William Grant
Status: Work in progress
Proposed branch: lp:~wgrant/launchpad/renovate-deathrow
Merge into: lp:launchpad
Diff against target: 816 lines (+369/-260) (has conflicts)
6 files modified
lib/lp/archivepublisher/deathrow.py (+220/-231)
lib/lp/archivepublisher/scripts/processdeathrow.py (+18/-2)
lib/lp/archivepublisher/tests/deathrow.txt (+58/-22)
lib/lp/archivepublisher/tests/test_deathrow.py (+67/-3)
lib/lp/services/database/stormexpr.py (+4/-2)
lib/lp/soyuz/interfaces/publishing.py (+2/-0)
Text conflict in lib/lp/archivepublisher/scripts/processdeathrow.py
To merge this branch: bzr merge lp:~wgrant/launchpad/renovate-deathrow
Reviewer Review Type Date Requested Status
Launchpad code reviewers Pending
Review via email: mp+306687@code.launchpad.net
To post a comment you must log in.

Unmerged revisions

16519. By William Grant

Rework SourceDeathRow to use a more efficient query.

16518. By William Grant

Implement a hopefully correct new deathrow algorithm, and fix the tests to basically cope.

16517. By William Grant

Split out and start testing reapable file calculation.

16516. By William Grant

Use component/SPN IDs where possible.

16515. By William Grant

Start rewriting process-death-row as a LoopTuner. Currently it just removes any file referenced by a removable publication, without any of the useful checks.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/archivepublisher/deathrow.py'
2--- lib/lp/archivepublisher/deathrow.py 2012-08-17 11:15:35 +0000
3+++ lib/lp/archivepublisher/deathrow.py 2016-09-24 06:41:02 +0000
4@@ -6,31 +6,65 @@
5 """
6 __metaclass__ = type
7
8-import datetime
9 import logging
10-import os
11
12-import pytz
13+from storm.expr import (
14+ And,
15+ Exists,
16+ Not,
17+ Select,
18+ SQL,
19+ )
20+from storm.locals import (
21+ Int,
22+ Unicode,
23+ )
24+import transaction
25
26 from lp.archivepublisher.config import getPubConfig
27 from lp.archivepublisher.diskpool import DiskPool
28+from lp.registry.model.sourcepackagename import SourcePackageName
29 from lp.services.database.constants import UTC_NOW
30-from lp.services.database.sqlbase import sqlvalues
31+from lp.services.database.lpstorm import IStore
32+from lp.services.database.sqlbase import convert_storm_clause_to_string
33+from lp.services.database.stormexpr import Values
34+from lp.services.librarian.model import (
35+ LibraryFileAlias,
36+ LibraryFileContent,
37+ )
38+from lp.services.looptuner import TunableLoop
39 from lp.soyuz.enums import ArchivePurpose
40 from lp.soyuz.interfaces.publishing import (
41- IBinaryPackagePublishingHistory,
42- inactive_publishing_status,
43- ISourcePackagePublishingHistory,
44 MissingSymlinkInPool,
45 NotInPool,
46 )
47+from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
48+from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
49+from lp.soyuz.model.component import Component
50+from lp.soyuz.model.files import (
51+ BinaryPackageFile,
52+ SourcePackageReleaseFile,
53+ )
54 from lp.soyuz.model.publishing import (
55 BinaryPackagePublishingHistory,
56 SourcePackagePublishingHistory,
57 )
58
59
60-def getDeathRow(archive, log, pool_root_override):
61+# Storm representation of calculateReapableFiles' CTE. Not a real table.
62+class Interesting:
63+ __storm_table__ = 'interesting'
64+ # Meaningless, but get_cls_info needs it.
65+ __storm_primary__ = (
66+ 'component_id', 'sourcepackagename_id', 'filename', 'md5')
67+
68+ component_id = Int(name='component')
69+ sourcepackagename_id = Int(name='sourcepackagename')
70+ filename = Unicode()
71+ md5 = Unicode()
72+
73+
74+def getDeathRows(archive, log, pool_root_override, dryrun):
75 """Return a Deathrow object for the archive supplied.
76
77 :param archive: Use the publisher config for this archive to derive the
78@@ -58,10 +92,12 @@
79 dp = DiskPool(pool_root, pubconf.temproot, diskpool_log)
80
81 log.debug("Preparing death row.")
82- return DeathRow(archive, dp, log)
83-
84-
85-class DeathRow:
86+ return (
87+ SourceDeathRow(archive, dp, log, dryrun),
88+ BinaryDeathRow(archive, dp, log, dryrun))
89+
90+
91+class DeathRow(TunableLoop):
92 """A Distribution Archive Removal Processor.
93
94 DeathRow will remove archive files from disk if they are marked for
95@@ -69,227 +105,180 @@
96 by other packages.
97 """
98
99- def __init__(self, archive, diskpool, logger):
100+ maximum_chunk_size = 5000
101+
102+ def __init__(self, archive, diskpool, logger, dry_run=False):
103+ super(DeathRow, self).__init__(logger)
104 self.archive = archive
105 self.diskpool = diskpool
106 self._removeFile = diskpool.removeFile
107 self.logger = logger
108-
109- def reap(self, dry_run=False):
110- """Reap packages that should be removed from the distribution.
111-
112- Looks through all packages that are in condemned states and
113- have scheduleddeletiondate is in the past, try to remove their
114- files from the archive pool (which may be impossible if they are
115- used by other packages which are published), and mark them as
116- removed."""
117- if dry_run:
118- # Don't actually remove the files if we are dry running
119- def _mockRemoveFile(cn, sn, fn):
120- self.logger.debug("(Not really!) removing %s %s/%s" %
121- (cn, sn, fn))
122- fullpath = self.diskpool.pathFor(cn, sn, fn)
123- if not os.path.exists(fullpath):
124- raise NotInPool
125- return os.lstat(fullpath).st_size
126- self._removeFile = _mockRemoveFile
127-
128- source_files, binary_files = self._collectCondemned()
129- records = self._tryRemovingFromDisk(source_files, binary_files)
130- self._markPublicationRemoved(records)
131-
132- def _collectCondemned(self):
133- """Return the condemned source and binary publications as a tuple.
134-
135- Return all the `SourcePackagePublishingHistory` and
136- `BinaryPackagePublishingHistory` records that are eligible for
137- removal ('condemned') where the source/binary package that they
138- refer to is not published somewhere else.
139-
140- Both sources and binaries are lists.
141- """
142- sources = SourcePackagePublishingHistory.select("""
143- SourcePackagePublishingHistory.archive = %s AND
144- SourcePackagePublishingHistory.scheduleddeletiondate < %s AND
145- SourcePackagePublishingHistory.dateremoved IS NULL AND
146- NOT EXISTS (
147- SELECT 1 FROM sourcepackagepublishinghistory as spph
148- WHERE
149- SourcePackagePublishingHistory.sourcepackagerelease =
150- spph.sourcepackagerelease AND
151- spph.archive = %s AND
152- spph.status NOT IN %s)
153- """ % sqlvalues(self.archive, UTC_NOW, self.archive,
154- inactive_publishing_status), orderBy="id")
155- self.logger.debug("%d Sources" % sources.count())
156-
157- binaries = BinaryPackagePublishingHistory.select("""
158- BinaryPackagePublishingHistory.archive = %s AND
159- BinaryPackagePublishingHistory.scheduleddeletiondate < %s AND
160- BinaryPackagePublishingHistory.dateremoved IS NULL AND
161- NOT EXISTS (
162- SELECT 1 FROM binarypackagepublishinghistory as bpph
163- WHERE
164- BinaryPackagePublishingHistory.binarypackagerelease =
165- bpph.binarypackagerelease AND
166- bpph.archive = %s AND
167- bpph.status NOT IN %s)
168- """ % sqlvalues(self.archive, UTC_NOW, self.archive,
169- inactive_publishing_status), orderBy="id")
170- self.logger.debug("%d Binaries" % binaries.count())
171-
172- return (sources, binaries)
173-
174- def canRemove(self, publication_class, filename, file_md5):
175- """Check if given (filename, MD5) can be removed from the pool.
176-
177- Check the archive reference-counter implemented in:
178- `SourcePackagePublishingHistory` or
179- `BinaryPackagePublishingHistory`.
180-
181- Only allow removal of unnecessary files.
182- """
183- clauses = []
184- clauseTables = []
185-
186- if ISourcePackagePublishingHistory.implementedBy(
187- publication_class):
188- clauses.append("""
189- SourcePackagePublishingHistory.archive = %s AND
190- SourcePackagePublishingHistory.dateremoved is NULL AND
191- SourcePackagePublishingHistory.sourcepackagerelease =
192- SourcePackageReleaseFile.sourcepackagerelease AND
193- SourcePackageReleaseFile.libraryfile = LibraryFileAlias.id
194- """ % sqlvalues(self.archive))
195- clauseTables.append('SourcePackageReleaseFile')
196- elif IBinaryPackagePublishingHistory.implementedBy(
197- publication_class):
198- clauses.append("""
199- BinaryPackagePublishingHistory.archive = %s AND
200- BinaryPackagePublishingHistory.dateremoved is NULL AND
201- BinaryPackagePublishingHistory.binarypackagerelease =
202- BinaryPackageFile.binarypackagerelease AND
203- BinaryPackageFile.libraryfile = LibraryFileAlias.id
204- """ % sqlvalues(self.archive))
205- clauseTables.append('BinaryPackageFile')
206+ self.dry_run = dry_run
207+
208+ self.bytes = 0
209+ self.start_at = 1
210+ self.store = IStore(SourcePackagePublishingHistory)
211+
212+ def isDone(self):
213+ return self.findPubs().is_empty()
214+
215+ def __call__(self, chunk_size):
216+ pubs = list(self.findPubs()[:chunk_size])
217+ files = self.calculateReapableFiles(pubs)
218+
219+ # The remaining files aren't referenced by other publications,
220+ # so remove them from disk.
221+ for comp, spn, filename in files:
222+ if not self.dry_run:
223+ try:
224+ self.bytes += self.diskpool.removeFile(comp, spn, filename)
225+ except NotInPool as info:
226+ # It's safe for us to let this slide because it means that
227+ # the file is already gone.
228+ self.logger.debug(str(info))
229+ except MissingSymlinkInPool as info:
230+ # This one is a little more worrying, because an expected
231+ # symlink has vanished from the pool/ (could be a code
232+ # mistake) but there is nothing we can do about it at this
233+ # point.
234+ self.logger.warn(str(info))
235+ else:
236+ self.logger.debug("Not removing %s (dry run)", filename)
237+ for pub in pubs:
238+ pub.dateremoved = UTC_NOW
239+ self.start_at = pubs[-1].id + 1
240+
241+ if not self.dry_run:
242+ transaction.commit()
243 else:
244- raise AssertionError("%r is not supported." % publication_class)
245-
246- clauses.append("""
247- LibraryFileAlias.content = LibraryFileContent.id AND
248- LibraryFileAlias.filename = %s AND
249- LibraryFileContent.md5 = %s
250- """ % sqlvalues(filename, file_md5))
251- clauseTables.extend(
252- ['LibraryFileAlias', 'LibraryFileContent'])
253-
254- all_publications = publication_class.select(
255- " AND ".join(clauses), clauseTables=clauseTables)
256-
257- right_now = datetime.datetime.now(pytz.timezone('UTC'))
258- for pub in all_publications:
259- # Deny removal if any reference is still active.
260- if pub.status not in inactive_publishing_status:
261- return False
262- # Deny removal if any reference wasn't dominated yet.
263- if pub.scheduleddeletiondate is None:
264- return False
265- # Deny removal if any reference is still in 'quarantine'.
266- if pub.scheduleddeletiondate > right_now:
267- return False
268-
269- return True
270-
271- def _tryRemovingFromDisk(self, condemned_source_files,
272- condemned_binary_files):
273- """Take the list of publishing records provided and unpublish them.
274-
275- You should only pass in entries you want to be unpublished because
276- this will result in the files being removed if they're not otherwise
277- in use.
278- """
279- bytes = 0
280- condemned_files = set()
281- condemned_records = set()
282- considered_files = set()
283- details = {}
284-
285- def checkPubRecord(pub_record, publication_class):
286- """Check if the publishing record can be removed.
287-
288- It can only be removed if all files in its context are not
289- referred to any other 'published' publishing records.
290-
291- See `canRemove` for more information.
292- """
293- for pub_file in pub_record.files:
294- filename = pub_file.libraryfilealiasfilename
295- file_md5 = pub_file.libraryfilealias.content.md5
296-
297- self.logger.debug("Checking %s (%s)" % (filename, file_md5))
298-
299- # Calculating the file path in pool.
300- pub_file_details = (
301- pub_file.libraryfilealiasfilename,
302- pub_file.sourcepackagename,
303- pub_file.componentname,
304- )
305- file_path = self.diskpool.pathFor(*pub_file_details)
306-
307- # Check if the LibraryFileAlias in question was already
308- # verified. If the verification was already made and the
309- # file is condemned queue the publishing record for removal
310- # otherwise just continue the iteration.
311- if (filename, file_md5) in considered_files:
312- self.logger.debug("Already verified.")
313- if file_path in condemned_files:
314- condemned_records.add(pub_file.publishing_record)
315- continue
316- considered_files.add((filename, file_md5))
317-
318- # Check if the removal is allowed, if not continue.
319- if not self.canRemove(publication_class, filename, file_md5):
320- self.logger.debug("Cannot remove.")
321- continue
322-
323- # Update local containers, in preparation to file removal.
324- details.setdefault(file_path, pub_file_details)
325- condemned_files.add(file_path)
326- condemned_records.add(pub_file.publishing_record)
327-
328- # Check source and binary publishing records.
329- for pub_record in condemned_source_files:
330- checkPubRecord(pub_record, SourcePackagePublishingHistory)
331- for pub_record in condemned_binary_files:
332- checkPubRecord(pub_record, BinaryPackagePublishingHistory)
333-
334- self.logger.info(
335- "Removing %s files marked for reaping" % len(condemned_files))
336-
337- for condemned_file in sorted(condemned_files, reverse=True):
338- file_name, source_name, component_name = details[condemned_file]
339- try:
340- bytes += self._removeFile(
341- component_name, source_name, file_name)
342- except NotInPool as info:
343- # It's safe for us to let this slide because it means that
344- # the file is already gone.
345- self.logger.debug(str(info))
346- except MissingSymlinkInPool as info:
347- # This one is a little more worrying, because an expected
348- # symlink has vanished from the pool/ (could be a code
349- # mistake) but there is nothing we can do about it at this
350- # point.
351- self.logger.warn(str(info))
352-
353- self.logger.info("Total bytes freed: %s" % bytes)
354-
355- return condemned_records
356-
357- def _markPublicationRemoved(self, condemned_records):
358- # Now that the os.remove() calls have been made, simply let every
359- # now out-of-date record be marked as removed.
360- self.logger.debug("Marking %s condemned packages as removed." %
361- len(condemned_records))
362- for record in condemned_records:
363- record.dateremoved = UTC_NOW
364+ transaction.abort()
365+
366+ def calculateReapableFiles(self, pubs):
367+ files = {}
368+
369+ # Find all the files that we might remove because of these
370+ # publications.
371+ for pub in pubs:
372+ spr = self.getSourcePackageRelease(pub)
373+ for file in self.getFiles(pub):
374+ key = (
375+ pub.componentID, spr.sourcepackagenameID,
376+ file.libraryfile.filename,
377+ file.libraryfile.content.md5)
378+ if key not in files:
379+ files[key] = set()
380+
381+ interesting_cols = [
382+ ('component', 'integer'),
383+ ('sourcepackagename', 'integer'),
384+ ('filename', 'text'),
385+ ('md5', 'text'),
386+ ]
387+ interesting_values = Values(None, interesting_cols, files.keys())
388+
389+ # Exclude any files that are referenced by other unremoved
390+ # publications. They need to stay on disk now and will be
391+ # removed with their final publication later.
392+ # We find unremoved publications that aren't in the current
393+ # batch, with files with the same
394+ # (archive, component, sourcepackagename, filename, md5).
395+ has_other_pubs = self.store.with_(
396+ SQL('interesting(component, sourcepackagename, filename, md5) AS '
397+ + convert_storm_clause_to_string(interesting_values))
398+ ).find(
399+ (Interesting.component_id, Interesting.sourcepackagename_id,
400+ Interesting.filename, Interesting.md5),
401+ self.getOtherPubFilter(pubs),
402+ ).config(distinct=True)
403+ for key in has_other_pubs:
404+ del files[key]
405+
406+ return [
407+ (Component.get(c).name, SourcePackageName.get(s).name, f)
408+ for (c, s, f, m) in files]
409+
410+
411+class SourceDeathRow(DeathRow):
412+
413+ def findPubs(self):
414+ return self.store.find(
415+ SourcePackagePublishingHistory,
416+ SourcePackagePublishingHistory.id >= self.start_at,
417+ SourcePackagePublishingHistory.archive == self.archive,
418+ SourcePackagePublishingHistory.scheduleddeletiondate < UTC_NOW,
419+ SourcePackagePublishingHistory.dateremoved == None)
420+
421+ def getOtherPubFilter(self, pubs):
422+ spr_select = Select(
423+ SourcePackageReleaseFile.sourcepackagereleaseID,
424+ tables=[
425+ LibraryFileAlias, LibraryFileContent,
426+ SourcePackageReleaseFile],
427+ where=And(
428+ LibraryFileAlias.filename == Interesting.filename,
429+ LibraryFileContent.md5 == Interesting.md5,
430+ LibraryFileContent.id == LibraryFileAlias.contentID,
431+ LibraryFileAlias.id == SourcePackageReleaseFile.libraryfileID))
432+ return Exists(Select(
433+ 1, tables=[SourcePackagePublishingHistory],
434+ where=And(
435+ SourcePackagePublishingHistory.sourcepackagereleaseID.is_in(
436+ spr_select),
437+ # Filter conditions
438+ SourcePackagePublishingHistory.componentID ==
439+ Interesting.component_id,
440+ SourcePackagePublishingHistory.sourcepackagenameID ==
441+ Interesting.sourcepackagename_id,
442+ SourcePackagePublishingHistory.archive == self.archive,
443+ SourcePackagePublishingHistory.dateremoved == None,
444+ Not(
445+ SourcePackagePublishingHistory.id.is_in(
446+ [pub.id for pub in pubs])))))
447+
448+ def getSourcePackageRelease(self, pub):
449+ return pub.sourcepackagerelease
450+
451+ def getFiles(self, pub):
452+ return pub.sourcepackagerelease.files
453+
454+
455+class BinaryDeathRow(DeathRow):
456+
457+ def findPubs(self):
458+ return self.store.find(
459+ BinaryPackagePublishingHistory,
460+ BinaryPackagePublishingHistory.id >= self.start_at,
461+ BinaryPackagePublishingHistory.archive == self.archive,
462+ BinaryPackagePublishingHistory.scheduleddeletiondate < UTC_NOW,
463+ BinaryPackagePublishingHistory.dateremoved == None)
464+
465+ def getOtherPubQuery(self, pubs):
466+ origin = [
467+ BinaryPackagePublishingHistory, BinaryPackageRelease,
468+ BinaryPackageBuild, BinaryPackageFile]
469+ filter = [
470+ # Join conditions
471+ BinaryPackageRelease.id ==
472+ BinaryPackagePublishingHistory.binarypackagereleaseID,
473+ BinaryPackageBuild.id == BinaryPackageRelease.buildID,
474+ BinaryPackageFile.binarypackagereleaseID ==
475+ BinaryPackageRelease.id,
476+ LibraryFileAlias.id == BinaryPackageFile.libraryfileID,
477+ # Filter conditions
478+ BinaryPackagePublishingHistory.componentID ==
479+ Interesting.component_id,
480+ BinaryPackageBuild.source_package_name_id ==
481+ Interesting.sourcepackagename_id,
482+ BinaryPackagePublishingHistory.archive == self.archive,
483+ BinaryPackagePublishingHistory.dateremoved == None,
484+ Not(
485+ BinaryPackagePublishingHistory.id.is_in(
486+ [pub.id for pub in pubs])),
487+ ]
488+ return origin, filter
489+
490+ def getSourcePackageRelease(self, pub):
491+ return pub.binarypackagerelease.build.source_package_release
492+
493+ def getFiles(self, pub):
494+ return pub.binarypackagerelease.files
495
496=== modified file 'lib/lp/archivepublisher/scripts/processdeathrow.py'
497--- lib/lp/archivepublisher/scripts/processdeathrow.py 2016-09-19 11:19:16 +0000
498+++ lib/lp/archivepublisher/scripts/processdeathrow.py 2016-09-24 06:41:02 +0000
499@@ -15,6 +15,7 @@
500 ]
501
502
503+<<<<<<< TREE
504 from lp.archivepublisher.deathrow import getDeathRow
505 from lp.archivepublisher.scripts.base import PublisherScript
506 from lp.services.limitedlist import LimitedList
507@@ -25,6 +26,16 @@
508
509
510 class DeathRowProcessor(PublisherScript):
511+=======
512+from zope.component import getUtility
513+
514+from lp.archivepublisher.deathrow import getDeathRows
515+from lp.registry.interfaces.distribution import IDistributionSet
516+from lp.services.scripts.base import LaunchpadCronScript
517+
518+
519+class DeathRowProcessor(LaunchpadCronScript):
520+>>>>>>> MERGE-SOURCE
521
522 def add_my_options(self):
523 self.parser.add_option(
524@@ -59,10 +70,11 @@
525 the operation just executed, i.e, commits successful runs and aborts
526 runs with errors. It also respects 'dry-run' command-line option.
527 """
528- death_row = getDeathRow(
529- archive, self.logger, self.options.pool_root)
530+ sdr, bdr = getDeathRows(
531+ archive, self.logger, self.options.pool_root, self.options.dry_run)
532 self.logger.debug(
533 "Unpublishing death row for %s." % archive.displayname)
534+<<<<<<< TREE
535 set_request_started(
536 request_statements=LimitedList(10000),
537 txn=self.txn, enable_timeout=False)
538@@ -81,3 +93,7 @@
539 self.txn.commit()
540 finally:
541 clear_request_started()
542+=======
543+ sdr.run()
544+ bdr.run()
545+>>>>>>> MERGE-SOURCE
546
547=== modified file 'lib/lp/archivepublisher/tests/deathrow.txt'
548--- lib/lp/archivepublisher/tests/deathrow.txt 2011-04-05 08:28:30 +0000
549+++ lib/lp/archivepublisher/tests/deathrow.txt 2016-09-24 06:41:02 +0000
550@@ -16,12 +16,20 @@
551 The no-operation use case, reflects the sampledata status.
552
553 >>> from lp.services.log.logger import FakeLogger
554- >>> from lp.archivepublisher.deathrow import DeathRow
555+ >>> from lp.archivepublisher.deathrow import (
556+ ... BinaryDeathRow,
557+ ... SourceDeathRow,
558+ ... )
559 >>> from lp.archivepublisher.diskpool import DiskPool
560
561- >>> disk_pool = DiskPool(pool_path, temp_path, FakeLogger())
562- >>> death_row = DeathRow(ubuntu.main_archive, disk_pool, FakeLogger())
563- >>> death_row.reap(dry_run=True)
564+ >>> logger = FakeLogger()
565+ >>> disk_pool = DiskPool(pool_path, temp_path, logger)
566+ >>> sdr = SourceDeathRow(
567+ ... ubuntu.main_archive, disk_pool, logger, dry_run=True)
568+ >>> sdr.run()
569+ >>> bdr = BinaryDeathRow(
570+ ... ubuntu.main_archive, disk_pool, logger, dry_run=True)
571+ >>> bdr.run()
572 DEBUG 0 Sources
573 DEBUG 0 Binaries
574 INFO Removing 0 files marked for reaping
575@@ -237,9 +245,11 @@
576
577 Run DeathRow against the current 'removable' context.
578
579- >>> disk_pool = DiskPool(pool_path, temp_path, FakeLogger())
580- >>> death_row = DeathRow(ubuntu.main_archive, disk_pool, FakeLogger())
581- >>> death_row.reap()
582+ >>> disk_pool = DiskPool(pool_path, temp_path, logger)
583+ >>> sdr = SourceDeathRow(ubuntu.main_archive, disk_pool, logger)
584+ >>> sdr.run()
585+ >>> bdr = BinaryDeathRow(ubuntu.main_archive, disk_pool, logger)
586+ >>> bdr.run()
587 DEBUG 4 Sources
588 DEBUG 3 Binaries
589 ...
590@@ -295,15 +305,15 @@
591
592 The dependent publications were processed as expected; only the one
593 with 'scheduleddeletiondate' set to the past was removed, the one with
594-future timestamp and the published one were kept. No binary
595-publications was removed (see more below).
596+future timestamp and the published one were kept. One binary publication
597+was removed, but the file remains on disk for the two active publications.
598
599 >>> for pub in dependent_records:
600 ... check_removed(pub)
601 stuck 666 in hoary SUPERSEDED True
602 stuck 667 in hoary SUPERSEDED False
603 stuck 668 in hoary PUBLISHED False
604- stuck-bin 666 in hoary i386 SUPERSEDED False
605+ stuck-bin 666 in hoary i386 SUPERSEDED True
606 stuck-bin 666 in hoary i386 SUPERSEDED False
607 stuck-bin 666 in hoary i386 PUBLISHED False
608
609@@ -333,9 +343,11 @@
610
611 Now DeathRow considers 'stuck-bin' publications.
612
613- >>> disk_pool = DiskPool(pool_path, temp_path, FakeLogger())
614- >>> death_row = DeathRow(ubuntu.main_archive, disk_pool, FakeLogger())
615- >>> death_row.reap()
616+ >>> disk_pool = DiskPool(pool_path, temp_path, logger)
617+ >>> sdr = SourceDeathRow(ubuntu.main_archive, disk_pool, logger)
618+ >>> sdr.run()
619+ >>> bdr = BinaryDeathRow(ubuntu.main_archive, disk_pool, logger)
620+ >>> bdr.run()
621 DEBUG 0 Sources
622 DEBUG 2 Binaries
623 DEBUG Checking stuck-bin_666_i386.deb (21c2e59531c8710156d34a3c30ac81d5)
624@@ -346,6 +358,34 @@
625 INFO Total bytes freed: 0
626 DEBUG Marking 0 condemned packages as removed.
627
628+The file is still in the repository, but now two of the three
629+publications are removed.
630+
631+ >>> dependent_binaries = (
632+ ... published_binary,
633+ ... postponed_binary,
634+ ... removed_binary,
635+ ... )
636+
637+ >>> check_pool_files()
638+ deleted-bin_666_i386.deb: REMOVED
639+ deleted_666.dsc: REMOVED
640+ obsolete-bin_666_i386.deb: REMOVED
641+ obsolete_666.dsc: REMOVED
642+ stuck-bin_666_i386.deb: OK
643+ shared_1.0.tar.gz: OK
644+ stuck_666.dsc: REMOVED
645+ stuck_667.dsc: OK
646+ stuck_668.dsc: OK
647+ superseded-bin_666_i386.deb: REMOVED
648+ superseded_666.dsc: REMOVED
649+
650+ >>> for pub in dependent_binaries:
651+ ... check_removed(pub)
652+ stuck-bin 666 in hoary i386 SUPERSEDED True
653+ stuck-bin 666 in hoary i386 SUPERSEDED False
654+ stuck-bin 666 in hoary i386 SUPERSEDED True
655+
656 After being considered for removal, DeathRow realized that this binary
657 could not be removed because there is still a publishing record
658 imposing quarantine on it. Once the quarantine is lifted, by setting a
659@@ -357,9 +397,11 @@
660 That done, the publication and its files are free to be removed in a
661 single pass.
662
663- >>> disk_pool = DiskPool(pool_path, temp_path, FakeLogger())
664- >>> death_row = DeathRow(ubuntu.main_archive, disk_pool, FakeLogger())
665- >>> death_row.reap()
666+ >>> disk_pool = DiskPool(pool_path, temp_path, logger)
667+ >>> sdr = SourceDeathRow(ubuntu.main_archive, disk_pool, logger)
668+ >>> sdr.run()
669+ >>> bdr = BinaryDeathRow(ubuntu.main_archive, disk_pool, logger)
670+ >>> bdr.run()
671 DEBUG 0 Sources
672 DEBUG 3 Binaries
673 DEBUG Checking stuck-bin_666_i386.deb (21c2e59531c8710156d34a3c30ac81d5)
674@@ -374,12 +416,6 @@
675
676 The file was removed from the repository.
677
678- >>> dependent_binaries = (
679- ... published_binary,
680- ... postponed_binary,
681- ... removed_binary,
682- ... )
683-
684 >>> check_pool_files()
685 deleted-bin_666_i386.deb: REMOVED
686 deleted_666.dsc: REMOVED
687
688=== modified file 'lib/lp/archivepublisher/tests/test_deathrow.py'
689--- lib/lp/archivepublisher/tests/test_deathrow.py 2012-01-01 02:58:52 +0000
690+++ lib/lp/archivepublisher/tests/test_deathrow.py 2016-09-24 06:41:02 +0000
691@@ -12,17 +12,22 @@
692
693 from zope.component import getUtility
694
695-from lp.archivepublisher.deathrow import DeathRow
696+from lp.archivepublisher.deathrow import (
697+ DeathRow,
698+ SourceDeathRow,
699+ )
700 from lp.archivepublisher.diskpool import DiskPool
701 from lp.registry.interfaces.distribution import IDistributionSet
702+from lp.services.database.constants import UTC_NOW
703 from lp.services.log.logger import BufferLogger
704+from lp.soyuz.interfaces.archive import ArchivePurpose
705 from lp.soyuz.interfaces.component import IComponentSet
706 from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
707-from lp.testing import TestCase
708+from lp.testing import TestCaseWithFactory
709 from lp.testing.layers import LaunchpadZopelessLayer
710
711
712-class TestDeathRow(TestCase):
713+class TestDeathRow(TestCaseWithFactory):
714
715 layer = LaunchpadZopelessLayer
716
717@@ -143,3 +148,62 @@
718
719 self.assertDoesNotExist(main_dsc_path)
720 self.assertDoesNotExist(universe_dsc_path)
721+
722+ def test_calculateReapableFiles_basic(self):
723+ # An isolated publication's files are all removable.
724+ spph = self.factory.makeSourcePackagePublishingHistory(
725+ archive=self.factory.makeArchive(), sourcepackagename=u'something')
726+ for fn in ('something_1.orig.tar.gz', 'something_1.dsc'):
727+ self.factory.makeSourcePackageReleaseFile(
728+ sourcepackagerelease=spph.sourcepackagerelease,
729+ library_file=self.factory.makeLibraryFileAlias(
730+ filename=fn))
731+
732+ dr = SourceDeathRow(spph.archive, DiskPool, BufferLogger())
733+ self.assertContentEqual(
734+ [(u'main', u'something', u'something_1.orig.tar.gz'),
735+ (u'main', u'something', u'something_1.dsc')],
736+ dr.calculateReapableFiles([spph]))
737+
738+ def test_calculateReapableFiles_cross_component(self):
739+ # Files can be removed from a component once all relevant
740+ # publications in that component are gone.
741+ a = self.factory.makeArchive(purpose=ArchivePurpose.PRIMARY)
742+ shared_file = self.factory.makeLibraryFileAlias(
743+ filename=u'something_1.orig.tar.gz')
744+ sources = []
745+ for comp in [u'main', u'universe', u'main', u'main']:
746+ spph = self.factory.makeSourcePackagePublishingHistory(
747+ archive=a, component=comp, sourcepackagename=u'something')
748+ spph.sourcepackagerelease.addFile(shared_file)
749+ sources.append(spph)
750+
751+ log = BufferLogger()
752+ dr = SourceDeathRow(a, DiskPool, log)
753+
754+ # Processing all four pubs simultaneously permits removal of all
755+ # their files.
756+ self.assertContentEqual(
757+ [(u'main', u'something', u'something_1.orig.tar.gz'),
758+ (u'universe', u'something', u'something_1.orig.tar.gz')],
759+ dr.calculateReapableFiles(sources))
760+
761+ # When processing just the first two sources, only the universe
762+ # file can be removed. The main file is kept alive by the main
763+ # publications in the next batch.
764+ self.assertContentEqual(
765+ [(u'universe', u'something', u'something_1.orig.tar.gz')],
766+ dr.calculateReapableFiles(sources[:2]))
767+
768+ # The second batch (two main publications) can remove nothing,
769+ # since all files are kept alive by the main pub in the first
770+ # batch.
771+ self.assertContentEqual([], dr.calculateReapableFiles(sources[2:]))
772+
773+ # With the first batch removed, the second batch can eliminate
774+ # the main file.
775+ for pub in sources[:2]:
776+ pub.dateremoved = UTC_NOW
777+ self.assertContentEqual(
778+ [(u'main', u'something', u'something_1.orig.tar.gz')],
779+ dr.calculateReapableFiles(sources[2:]))
780
781=== modified file 'lib/lp/services/database/stormexpr.py'
782--- lib/lp/services/database/stormexpr.py 2016-06-10 22:02:37 +0000
783+++ lib/lp/services/database/stormexpr.py 2016-09-24 06:41:02 +0000
784@@ -104,8 +104,10 @@
785 "%s::%s" % (compile(value, state), type)
786 for value, type in zip(expr.values[0], col_types))
787 rows = [first_row] + [compile(value, state) for value in expr.values[1:]]
788- return "(VALUES (%s)) AS %s(%s)" % (
789- "), (".join(rows), expr.name, ', '.join(col_names))
790+ s = "(VALUES (%s))" % "), (".join(rows)
791+ if expr.name is not None:
792+ s += " AS %s(%s)" % (expr.name, ', '.join(col_names))
793+ return s
794
795
796 class ColumnSelect(Expr):
797
798=== modified file 'lib/lp/soyuz/interfaces/publishing.py'
799--- lib/lp/soyuz/interfaces/publishing.py 2015-11-08 01:09:46 +0000
800+++ lib/lp/soyuz/interfaces/publishing.py 2016-09-24 06:41:02 +0000
801@@ -290,6 +290,7 @@
802 required=False, readonly=False,
803 ),
804 exported_as="distro_series")
805+ componentID = Attribute("DB ID for the component")
806 component = Int(
807 title=_('The component being published into'),
808 required=False, readonly=False,
809@@ -680,6 +681,7 @@
810 ),
811 exported_as="distro_arch_series")
812 distroseries = Attribute("The distroseries being published into")
813+ componentID = Attribute("DB ID for the component")
814 component = Int(
815 title=_('The component being published into'),
816 required=False, readonly=False,