Merge ~cjwatson/launchpad:archive-file-history-backfill into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 478e0f498c7fe414c05cb2eec66b3be3639d3d53
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:archive-file-history-backfill
Merge into: launchpad:master
Diff against target: 184 lines (+97/-1)
4 files modified
database/schema/security.cfg (+1/-0)
lib/lp/archivepublisher/publishing.py (+1/-0)
lib/lp/scripts/garbo.py (+36/-0)
lib/lp/scripts/tests/test_garbo.py (+59/-1)
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Ioana Lasc Pending
Review via email: mp+435122@code.launchpad.net

This proposal supersedes a proposal from 2020-09-15.

Commit message

Backfill ArchiveFile.date_superseded

Description of the change

We can currently derive this reliably from ArchiveFile.scheduled_deletion_date, and doing so gives us a chance of historical queries being mostly accurate.

To post a comment you must log in.
Revision history for this message
Ioana Lasc (ilasc) wrote : Posted in a previous version of this proposal

Looks good.

review: Approve
478e0f4... by Colin Watson

Backfill ArchiveFile.date_superseded

We can currently derive this reliably from
ArchiveFile.scheduled_deletion_date, and doing so gives us a chance of
historical queries being mostly accurate.

LP: #1765933

Revision history for this message
Colin Watson (cjwatson) wrote :

Self-approving to work around voting restrictions (Ioana was on the Launchpad team at the time of her approval vote).

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/database/schema/security.cfg b/database/schema/security.cfg
2index d5578d7..9021834 100644
3--- a/database/schema/security.cfg
4+++ b/database/schema/security.cfg
5@@ -2491,6 +2491,7 @@ public.account = SELECT, DELETE
6 public.answercontact = SELECT, DELETE
7 public.archive = SELECT, UPDATE
8 public.archiveauthtoken = SELECT, UPDATE
9+public.archivefile = SELECT, UPDATE
10 public.archivesubscriber = SELECT, UPDATE
11 public.branch = SELECT, UPDATE
12 public.branchjob = SELECT, DELETE
13diff --git a/lib/lp/archivepublisher/publishing.py b/lib/lp/archivepublisher/publishing.py
14index 07e32dd..e6133f4 100644
15--- a/lib/lp/archivepublisher/publishing.py
16+++ b/lib/lp/archivepublisher/publishing.py
17@@ -2,6 +2,7 @@
18 # GNU Affero General Public License version 3 (see the file LICENSE).
19
20 __all__ = [
21+ "BY_HASH_STAY_OF_EXECUTION",
22 "cannot_modify_suite",
23 "DirectoryHash",
24 "FORMAT_TO_SUBCOMPONENT",
25diff --git a/lib/lp/scripts/garbo.py b/lib/lp/scripts/garbo.py
26index 4531f82..3ec4d53 100644
27--- a/lib/lp/scripts/garbo.py
28+++ b/lib/lp/scripts/garbo.py
29@@ -48,6 +48,7 @@ from zope.component import getUtility
30 from zope.security.proxy import removeSecurityProxy
31
32 from lp.answers.model.answercontact import AnswerContact
33+from lp.archivepublisher.publishing import BY_HASH_STAY_OF_EXECUTION
34 from lp.bugs.interfaces.bug import IBugSet
35 from lp.bugs.model.bug import Bug
36 from lp.bugs.model.bugattachment import BugAttachment
37@@ -127,6 +128,7 @@ from lp.soyuz.enums import (
38 from lp.soyuz.interfaces.publishing import active_publishing_status
39 from lp.soyuz.model.archive import Archive
40 from lp.soyuz.model.archiveauthtoken import ArchiveAuthToken
41+from lp.soyuz.model.archivefile import ArchiveFile
42 from lp.soyuz.model.archivesubscriber import ArchiveSubscriber
43 from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
44 from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
45@@ -2243,6 +2245,39 @@ class BinaryPackagePublishingHistorySPNPopulator(BulkPruner):
46 transaction.commit()
47
48
49+class ArchiveFileDatePopulator(TunableLoop):
50+ """Populates ArchiveFile.date_superseded."""
51+
52+ maximum_chunk_size = 5000
53+
54+ def __init__(self, log, abort_time=None):
55+ super().__init__(log, abort_time)
56+ self.start_at = 1
57+ self.store = IPrimaryStore(ArchiveFile)
58+
59+ def findArchiveFiles(self):
60+ archive_files = self.store.find(
61+ ArchiveFile,
62+ ArchiveFile.id >= self.start_at,
63+ ArchiveFile.date_superseded == None,
64+ ArchiveFile.scheduled_deletion_date != None,
65+ )
66+ return archive_files.order_by(ArchiveFile.id)
67+
68+ def isDone(self):
69+ return self.findArchiveFiles().is_empty()
70+
71+ def __call__(self, chunk_size):
72+ archive_files = list(self.findArchiveFiles()[:chunk_size])
73+ for archive_file in archive_files:
74+ archive_file.date_superseded = (
75+ archive_file.scheduled_deletion_date
76+ - timedelta(days=BY_HASH_STAY_OF_EXECUTION)
77+ )
78+ self.start_at = archive_files[-1].id + 1
79+ transaction.commit()
80+
81+
82 class BaseDatabaseGarbageCollector(LaunchpadCronScript):
83 """Abstract base class to run a collection of TunableLoops."""
84
85@@ -2556,6 +2591,7 @@ class DailyDatabaseGarbageCollector(BaseDatabaseGarbageCollector):
86 tunable_loops = [
87 AnswerContactPruner,
88 ArchiveArtifactoryColumnsPopulator,
89+ ArchiveFileDatePopulator,
90 BinaryPackagePublishingHistoryFormatPopulator,
91 BinaryPackagePublishingHistorySPNPopulator,
92 BranchJobPruner,
93diff --git a/lib/lp/scripts/tests/test_garbo.py b/lib/lp/scripts/tests/test_garbo.py
94index c93b28e..ade0b76 100644
95--- a/lib/lp/scripts/tests/test_garbo.py
96+++ b/lib/lp/scripts/tests/test_garbo.py
97@@ -72,6 +72,7 @@ from lp.registry.model.commercialsubscription import CommercialSubscription
98 from lp.registry.model.teammembership import TeamMembership
99 from lp.scripts.garbo import (
100 AntiqueSessionPruner,
101+ ArchiveFileDatePopulator,
102 BulkPruner,
103 DailyDatabaseGarbageCollector,
104 DuplicateSessionPruner,
105@@ -127,6 +128,7 @@ from lp.soyuz.enums import (
106 PackagePublishingStatus,
107 )
108 from lp.soyuz.interfaces.archive import NAMED_AUTH_TOKEN_FEATURE_FLAG
109+from lp.soyuz.interfaces.archivefile import IArchiveFileSet
110 from lp.soyuz.interfaces.livefs import LIVEFS_FEATURE_FLAG
111 from lp.soyuz.interfaces.publishing import IPublishingSet
112 from lp.soyuz.model.distributionsourcepackagecache import (
113@@ -434,7 +436,11 @@ class TestGarbo(FakeAdapterMixin, TestCaseWithFactory):
114 self.log_buffer = io.StringIO()
115 handler = logging.StreamHandler(self.log_buffer)
116 self.log.addHandler(handler)
117- self.addDetail("garbo-log", text_content(self.log_buffer.getvalue()))
118+ self.addCleanup(
119+ lambda: self.addDetail(
120+ "garbo-log", text_content(self.log_buffer.getvalue())
121+ )
122+ )
123
124 def runFrequently(self, maximum_chunk_size=2, test_args=()):
125 switch_dbuser("garbo_daily")
126@@ -2559,6 +2565,58 @@ class TestGarbo(FakeAdapterMixin, TestCaseWithFactory):
127 # Other publications are left alone.
128 self.assertIsNone(bpphs[2].sourcepackagename)
129
130+ def test_ArchiveFileDatePopulator(self):
131+ switch_dbuser("testadmin")
132+ now = datetime.now(UTC)
133+ archive_files = [self.factory.makeArchiveFile() for _ in range(2)]
134+ removeSecurityProxy(
135+ archive_files[1]
136+ ).scheduled_deletion_date = now + timedelta(hours=6)
137+
138+ self.runDaily()
139+
140+ self.assertThat(
141+ archive_files,
142+ MatchesListwise(
143+ [
144+ MatchesStructure(date_superseded=Is(None)),
145+ MatchesStructure.byEquality(
146+ date_superseded=now - timedelta(hours=18)
147+ ),
148+ ]
149+ ),
150+ )
151+
152+ def test_ArchiveFileDatePopulator_findArchiveFiles_filters_correctly(self):
153+ switch_dbuser("testadmin")
154+
155+ # Create three ArchiveFiles: one with date_superseded set, one with
156+ # date_superseded unset and scheduled_deletion_date set, and one
157+ # with both unset.
158+ archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
159+
160+ Store.of(archive_files[0]).flush()
161+ getUtility(IArchiveFileSet).scheduleDeletion(
162+ [archive_files[0]], timedelta(days=1)
163+ )
164+ self.assertIsNotNone(archive_files[0].date_superseded)
165+
166+ removeSecurityProxy(
167+ archive_files[1]
168+ ).scheduled_deletion_date = datetime.now(UTC) + timedelta(days=1)
169+ self.assertIsNone(archive_files[1].date_superseded)
170+
171+ self.assertIsNone(archive_files[2].date_superseded)
172+ self.assertIsNone(archive_files[2].scheduled_deletion_date)
173+
174+ populator = ArchiveFileDatePopulator(None)
175+ # Consider only ArchiveFiles created by this test.
176+ populator.start_at = archive_files[0].id
177+
178+ rs = populator.findArchiveFiles()
179+ self.assertEqual(1, rs.count())
180+ self.assertEqual(archive_files[1], rs.one())
181+
182
183 class TestGarboTasks(TestCaseWithFactory):
184 layer = LaunchpadZopelessLayer

Subscribers

People subscribed via source and target branches

to status/vote changes: