Merge ~cjwatson/launchpad:archive-file-history-backfill into launchpad:master

Proposed by Colin Watson
Status: Needs review
Proposed branch: ~cjwatson/launchpad:archive-file-history-backfill
Merge into: launchpad:master
Prerequisite: ~cjwatson/launchpad:archive-file-history
Diff against target: 170 lines (+83/-1)
4 files modified
database/schema/security.cfg (+1/-0)
lib/lp/archivepublisher/publishing.py (+1/-0)
lib/lp/scripts/garbo.py (+34/-0)
lib/lp/scripts/tests/test_garbo.py (+47/-1)
Reviewer Review Type Date Requested Status
Ioana Lasc Approve
Review via email: mp+390761@code.launchpad.net

Commit message

Backfill ArchiveFile.date_superseded

Description of the change

We can currently derive this reliably from ArchiveFile.scheduled_deletion_date, and doing so gives us a chance of historical queries being mostly accurate.

To post a comment you must log in.
e89d7f1... by Colin Watson on 2020-09-15

Backfill ArchiveFile.date_superseded

We can currently derive this reliably from
ArchiveFile.scheduled_deletion_date, and doing so gives us a chance of
historical queries being mostly accurate.

LP: #1765933

Revision history for this message
Ioana Lasc (ilasc) wrote :

Looks good.

review: Approve

Unmerged commits

e89d7f1... by Colin Watson on 2020-09-15

Backfill ArchiveFile.date_superseded

We can currently derive this reliably from
ArchiveFile.scheduled_deletion_date, and doing so gives us a chance of
historical queries being mostly accurate.

LP: #1765933

b114919... by Colin Watson on 2020-09-14

Show useful garbo-log details on TestGarbo failures

Evaluating the contents of the log buffer in setUp meant that it was
always empty. We need to evaluate it later, once something useful has
been logged.

178ada8... by Colin Watson on 2018-04-21

Turn ArchiveFile into a history table

This adds date_created and date_superseded columns. Adjust the
publisher to match.

LP: #1765933

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/database/schema/security.cfg b/database/schema/security.cfg
2index 8a9a31b..b43a05b 100644
3--- a/database/schema/security.cfg
4+++ b/database/schema/security.cfg
5@@ -2398,6 +2398,7 @@ public.accesspolicy = SELECT, DELETE
6 public.accesspolicygrant = SELECT, DELETE
7 public.account = SELECT, DELETE
8 public.answercontact = SELECT, DELETE
9+public.archivefile = SELECT, UPDATE
10 public.branch = SELECT, UPDATE
11 public.branchjob = SELECT, DELETE
12 public.branchmergeproposal = SELECT, UPDATE, DELETE
13diff --git a/lib/lp/archivepublisher/publishing.py b/lib/lp/archivepublisher/publishing.py
14index 4735b89..3caf46f 100644
15--- a/lib/lp/archivepublisher/publishing.py
16+++ b/lib/lp/archivepublisher/publishing.py
17@@ -2,6 +2,7 @@
18 # GNU Affero General Public License version 3 (see the file LICENSE).
19
20 __all__ = [
21+ 'BY_HASH_STAY_OF_EXECUTION',
22 'cannot_modify_suite',
23 'DirectoryHash',
24 'FORMAT_TO_SUBCOMPONENT',
25diff --git a/lib/lp/scripts/garbo.py b/lib/lp/scripts/garbo.py
26index 09a64ed..104a9e9 100644
27--- a/lib/lp/scripts/garbo.py
28+++ b/lib/lp/scripts/garbo.py
29@@ -50,6 +50,7 @@ from zope.component import getUtility
30 from zope.security.proxy import removeSecurityProxy
31
32 from lp.answers.model.answercontact import AnswerContact
33+from lp.archivepublisher.publishing import BY_HASH_STAY_OF_EXECUTION
34 from lp.bugs.interfaces.bug import IBugSet
35 from lp.bugs.model.bug import Bug
36 from lp.bugs.model.bugattachment import BugAttachment
37@@ -123,6 +124,7 @@ from lp.snappy.model.snapbuild import SnapFile
38 from lp.snappy.model.snapbuildjob import SnapBuildJobType
39 from lp.soyuz.interfaces.publishing import active_publishing_status
40 from lp.soyuz.model.archive import Archive
41+from lp.soyuz.model.archivefile import ArchiveFile
42 from lp.soyuz.model.distributionsourcepackagecache import (
43 DistributionSourcePackageCache,
44 )
45@@ -1552,6 +1554,37 @@ class GitRepositoryPruner(TunableLoop):
46 transaction.commit()
47
48
49+class ArchiveFileDatePopulator(TunableLoop):
50+ """Populates ArchiveFile.date_superseded."""
51+
52+ maximum_chunk_size = 5000
53+
54+ def __init__(self, log, abort_time=None):
55+ super(ArchiveFileDatePopulator, self).__init__(log, abort_time)
56+ self.start_at = 1
57+ self.store = IMasterStore(ArchiveFile)
58+
59+ def findArchiveFiles(self):
60+ archive_files = self.store.find(
61+ ArchiveFile,
62+ ArchiveFile.id >= self.start_at,
63+ ArchiveFile.date_superseded == None,
64+ ArchiveFile.scheduled_deletion_date != None)
65+ return archive_files.order_by(ArchiveFile.id)
66+
67+ def isDone(self):
68+ return self.findArchiveFiles().is_empty()
69+
70+ def __call__(self, chunk_size):
71+ archive_files = list(self.findArchiveFiles()[:chunk_size])
72+ for archive_file in archive_files:
73+ archive_file.date_superseded = (
74+ archive_file.scheduled_deletion_date -
75+ timedelta(days=BY_HASH_STAY_OF_EXECUTION))
76+ self.start_at = archive_files[-1].id + 1
77+ transaction.commit()
78+
79+
80 class BaseDatabaseGarbageCollector(LaunchpadCronScript):
81 """Abstract base class to run a collection of TunableLoops."""
82 script_name = None # Script name for locking and database user. Override.
83@@ -1825,6 +1858,7 @@ class DailyDatabaseGarbageCollector(BaseDatabaseGarbageCollector):
84 script_name = 'garbo-daily'
85 tunable_loops = [
86 AnswerContactPruner,
87+ ArchiveFileDatePopulator,
88 BranchJobPruner,
89 BugNotificationPruner,
90 BugWatchActivityPruner,
91diff --git a/lib/lp/scripts/tests/test_garbo.py b/lib/lp/scripts/tests/test_garbo.py
92index 2fbc80d..ac17212 100644
93--- a/lib/lp/scripts/tests/test_garbo.py
94+++ b/lib/lp/scripts/tests/test_garbo.py
95@@ -87,6 +87,7 @@ from lp.registry.model.commercialsubscription import CommercialSubscription
96 from lp.registry.model.teammembership import TeamMembership
97 from lp.scripts.garbo import (
98 AntiqueSessionPruner,
99+ ArchiveFileDatePopulator,
100 BulkPruner,
101 DailyDatabaseGarbageCollector,
102 DuplicateSessionPruner,
103@@ -134,6 +135,7 @@ from lp.snappy.model.snapbuildjob import (
104 SnapStoreUploadJob,
105 )
106 from lp.soyuz.enums import PackagePublishingStatus
107+from lp.soyuz.interfaces.archivefile import IArchiveFileSet
108 from lp.soyuz.interfaces.livefs import LIVEFS_FEATURE_FLAG
109 from lp.soyuz.model.distributionsourcepackagecache import (
110 DistributionSourcePackageCache,
111@@ -433,7 +435,8 @@ class TestGarbo(FakeAdapterMixin, TestCaseWithFactory):
112 self.log_buffer = six.StringIO()
113 handler = logging.StreamHandler(self.log_buffer)
114 self.log.addHandler(handler)
115- self.addDetail('garbo-log', text_content(self.log_buffer.getvalue()))
116+ self.addCleanup(lambda: self.addDetail(
117+ 'garbo-log', text_content(self.log_buffer.getvalue())))
118
119 def runFrequently(self, maximum_chunk_size=2, test_args=()):
120 switch_dbuser('garbo_daily')
121@@ -1725,6 +1728,49 @@ class TestGarbo(FakeAdapterMixin, TestCaseWithFactory):
122 # retained.
123 self._test_SnapFilePruner('foo.snap', None, 30, expected_count=1)
124
125+ def test_ArchiveFileDatePopulator(self):
126+ switch_dbuser('testadmin')
127+ now = datetime.now(UTC)
128+ archive_files = [self.factory.makeArchiveFile() for _ in range(2)]
129+ removeSecurityProxy(archive_files[1]).scheduled_deletion_date = (
130+ now + timedelta(hours=6))
131+
132+ self.runDaily()
133+
134+ self.assertThat(archive_files, MatchesListwise([
135+ MatchesStructure(date_superseded=Is(None)),
136+ MatchesStructure.byEquality(
137+ date_superseded=now - timedelta(hours=18)),
138+ ]))
139+
140+ def test_ArchiveFileDatePopulator_findArchiveFiles_filters_correctly(self):
141+ switch_dbuser('testadmin')
142+
143+ # Create three ArchiveFiles: one with date_superseded set, one with
144+ # date_superseded unset and scheduled_deletion_date set, and one
145+ # with both unset.
146+ archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
147+
148+ Store.of(archive_files[0]).flush()
149+ getUtility(IArchiveFileSet).scheduleDeletion(
150+ [archive_files[0]], timedelta(days=1))
151+ self.assertIsNotNone(archive_files[0].date_superseded)
152+
153+ removeSecurityProxy(archive_files[1]).scheduled_deletion_date = (
154+ datetime.now(UTC) + timedelta(days=1))
155+ self.assertIsNone(archive_files[1].date_superseded)
156+
157+ self.assertIsNone(archive_files[2].date_superseded)
158+ self.assertIsNone(archive_files[2].scheduled_deletion_date)
159+
160+ populator = ArchiveFileDatePopulator(None)
161+ # Consider only ArchiveFiles created by this test.
162+ populator.start_at = archive_files[0].id
163+
164+ rs = populator.findArchiveFiles()
165+ self.assertEqual(1, rs.count())
166+ self.assertEqual(archive_files[1], rs.one())
167+
168
169 class TestGarboTasks(TestCaseWithFactory):
170 layer = LaunchpadZopelessLayer

Subscribers

People subscribed via source and target branches

to status/vote changes: