Merge ~cjwatson/launchpad:archive-file-history-backfill into launchpad:master

Proposed by Colin Watson
Status: Superseded
Proposed branch: ~cjwatson/launchpad:archive-file-history-backfill
Merge into: launchpad:master
Prerequisite: ~cjwatson/launchpad:archive-file-history
Diff against target: 170 lines (+83/-1)
4 files modified
database/schema/security.cfg (+1/-0)
lib/lp/archivepublisher/publishing.py (+1/-0)
lib/lp/scripts/garbo.py (+34/-0)
lib/lp/scripts/tests/test_garbo.py (+47/-1)
Reviewer Review Type Date Requested Status
Ioana Lasc (community) Approve
Review via email: mp+390761@code.launchpad.net

This proposal has been superseded by a proposal from 2023-01-04.

Commit message

Backfill ArchiveFile.date_superseded

Description of the change

We can currently derive this reliably from ArchiveFile.scheduled_deletion_date, and doing so gives us a chance of historical queries being mostly accurate.

To post a comment you must log in.
e89d7f1... by Colin Watson

Backfill ArchiveFile.date_superseded

We can currently derive this reliably from
ArchiveFile.scheduled_deletion_date, and doing so gives us a chance of
historical queries being mostly accurate.

LP: #1765933

Revision history for this message
Ioana Lasc (ilasc) wrote :

Looks good.

review: Approve

Unmerged commits

e89d7f1... by Colin Watson

Backfill ArchiveFile.date_superseded

We can currently derive this reliably from
ArchiveFile.scheduled_deletion_date, and doing so gives us a chance of
historical queries being mostly accurate.

LP: #1765933

b114919... by Colin Watson

Show useful garbo-log details on TestGarbo failures

Evaluating the contents of the log buffer in setUp meant that it was
always empty. We need to evaluate it later, once something useful has
been logged.

178ada8... by Colin Watson

Turn ArchiveFile into a history table

This adds date_created and date_superseded columns. Adjust the
publisher to match.

LP: #1765933

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/database/schema/security.cfg b/database/schema/security.cfg
index 8a9a31b..b43a05b 100644
--- a/database/schema/security.cfg
+++ b/database/schema/security.cfg
@@ -2398,6 +2398,7 @@ public.accesspolicy = SELECT, DELETE
2398public.accesspolicygrant = SELECT, DELETE2398public.accesspolicygrant = SELECT, DELETE
2399public.account = SELECT, DELETE2399public.account = SELECT, DELETE
2400public.answercontact = SELECT, DELETE2400public.answercontact = SELECT, DELETE
2401public.archivefile = SELECT, UPDATE
2401public.branch = SELECT, UPDATE2402public.branch = SELECT, UPDATE
2402public.branchjob = SELECT, DELETE2403public.branchjob = SELECT, DELETE
2403public.branchmergeproposal = SELECT, UPDATE, DELETE2404public.branchmergeproposal = SELECT, UPDATE, DELETE
diff --git a/lib/lp/archivepublisher/publishing.py b/lib/lp/archivepublisher/publishing.py
index 4735b89..3caf46f 100644
--- a/lib/lp/archivepublisher/publishing.py
+++ b/lib/lp/archivepublisher/publishing.py
@@ -2,6 +2,7 @@
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4__all__ = [4__all__ = [
5 'BY_HASH_STAY_OF_EXECUTION',
5 'cannot_modify_suite',6 'cannot_modify_suite',
6 'DirectoryHash',7 'DirectoryHash',
7 'FORMAT_TO_SUBCOMPONENT',8 'FORMAT_TO_SUBCOMPONENT',
diff --git a/lib/lp/scripts/garbo.py b/lib/lp/scripts/garbo.py
index 09a64ed..104a9e9 100644
--- a/lib/lp/scripts/garbo.py
+++ b/lib/lp/scripts/garbo.py
@@ -50,6 +50,7 @@ from zope.component import getUtility
50from zope.security.proxy import removeSecurityProxy50from zope.security.proxy import removeSecurityProxy
5151
52from lp.answers.model.answercontact import AnswerContact52from lp.answers.model.answercontact import AnswerContact
53from lp.archivepublisher.publishing import BY_HASH_STAY_OF_EXECUTION
53from lp.bugs.interfaces.bug import IBugSet54from lp.bugs.interfaces.bug import IBugSet
54from lp.bugs.model.bug import Bug55from lp.bugs.model.bug import Bug
55from lp.bugs.model.bugattachment import BugAttachment56from lp.bugs.model.bugattachment import BugAttachment
@@ -123,6 +124,7 @@ from lp.snappy.model.snapbuild import SnapFile
123from lp.snappy.model.snapbuildjob import SnapBuildJobType124from lp.snappy.model.snapbuildjob import SnapBuildJobType
124from lp.soyuz.interfaces.publishing import active_publishing_status125from lp.soyuz.interfaces.publishing import active_publishing_status
125from lp.soyuz.model.archive import Archive126from lp.soyuz.model.archive import Archive
127from lp.soyuz.model.archivefile import ArchiveFile
126from lp.soyuz.model.distributionsourcepackagecache import (128from lp.soyuz.model.distributionsourcepackagecache import (
127 DistributionSourcePackageCache,129 DistributionSourcePackageCache,
128 )130 )
@@ -1552,6 +1554,37 @@ class GitRepositoryPruner(TunableLoop):
1552 transaction.commit()1554 transaction.commit()
15531555
15541556
1557class ArchiveFileDatePopulator(TunableLoop):
1558 """Populates ArchiveFile.date_superseded."""
1559
1560 maximum_chunk_size = 5000
1561
1562 def __init__(self, log, abort_time=None):
1563 super(ArchiveFileDatePopulator, self).__init__(log, abort_time)
1564 self.start_at = 1
1565 self.store = IMasterStore(ArchiveFile)
1566
1567 def findArchiveFiles(self):
1568 archive_files = self.store.find(
1569 ArchiveFile,
1570 ArchiveFile.id >= self.start_at,
1571 ArchiveFile.date_superseded == None,
1572 ArchiveFile.scheduled_deletion_date != None)
1573 return archive_files.order_by(ArchiveFile.id)
1574
1575 def isDone(self):
1576 return self.findArchiveFiles().is_empty()
1577
1578 def __call__(self, chunk_size):
1579 archive_files = list(self.findArchiveFiles()[:chunk_size])
1580 for archive_file in archive_files:
1581 archive_file.date_superseded = (
1582 archive_file.scheduled_deletion_date -
1583 timedelta(days=BY_HASH_STAY_OF_EXECUTION))
1584 self.start_at = archive_files[-1].id + 1
1585 transaction.commit()
1586
1587
1555class BaseDatabaseGarbageCollector(LaunchpadCronScript):1588class BaseDatabaseGarbageCollector(LaunchpadCronScript):
1556 """Abstract base class to run a collection of TunableLoops."""1589 """Abstract base class to run a collection of TunableLoops."""
1557 script_name = None # Script name for locking and database user. Override.1590 script_name = None # Script name for locking and database user. Override.
@@ -1825,6 +1858,7 @@ class DailyDatabaseGarbageCollector(BaseDatabaseGarbageCollector):
1825 script_name = 'garbo-daily'1858 script_name = 'garbo-daily'
1826 tunable_loops = [1859 tunable_loops = [
1827 AnswerContactPruner,1860 AnswerContactPruner,
1861 ArchiveFileDatePopulator,
1828 BranchJobPruner,1862 BranchJobPruner,
1829 BugNotificationPruner,1863 BugNotificationPruner,
1830 BugWatchActivityPruner,1864 BugWatchActivityPruner,
diff --git a/lib/lp/scripts/tests/test_garbo.py b/lib/lp/scripts/tests/test_garbo.py
index 2fbc80d..ac17212 100644
--- a/lib/lp/scripts/tests/test_garbo.py
+++ b/lib/lp/scripts/tests/test_garbo.py
@@ -87,6 +87,7 @@ from lp.registry.model.commercialsubscription import CommercialSubscription
87from lp.registry.model.teammembership import TeamMembership87from lp.registry.model.teammembership import TeamMembership
88from lp.scripts.garbo import (88from lp.scripts.garbo import (
89 AntiqueSessionPruner,89 AntiqueSessionPruner,
90 ArchiveFileDatePopulator,
90 BulkPruner,91 BulkPruner,
91 DailyDatabaseGarbageCollector,92 DailyDatabaseGarbageCollector,
92 DuplicateSessionPruner,93 DuplicateSessionPruner,
@@ -134,6 +135,7 @@ from lp.snappy.model.snapbuildjob import (
134 SnapStoreUploadJob,135 SnapStoreUploadJob,
135 )136 )
136from lp.soyuz.enums import PackagePublishingStatus137from lp.soyuz.enums import PackagePublishingStatus
138from lp.soyuz.interfaces.archivefile import IArchiveFileSet
137from lp.soyuz.interfaces.livefs import LIVEFS_FEATURE_FLAG139from lp.soyuz.interfaces.livefs import LIVEFS_FEATURE_FLAG
138from lp.soyuz.model.distributionsourcepackagecache import (140from lp.soyuz.model.distributionsourcepackagecache import (
139 DistributionSourcePackageCache,141 DistributionSourcePackageCache,
@@ -433,7 +435,8 @@ class TestGarbo(FakeAdapterMixin, TestCaseWithFactory):
433 self.log_buffer = six.StringIO()435 self.log_buffer = six.StringIO()
434 handler = logging.StreamHandler(self.log_buffer)436 handler = logging.StreamHandler(self.log_buffer)
435 self.log.addHandler(handler)437 self.log.addHandler(handler)
436 self.addDetail('garbo-log', text_content(self.log_buffer.getvalue()))438 self.addCleanup(lambda: self.addDetail(
439 'garbo-log', text_content(self.log_buffer.getvalue())))
437440
438 def runFrequently(self, maximum_chunk_size=2, test_args=()):441 def runFrequently(self, maximum_chunk_size=2, test_args=()):
439 switch_dbuser('garbo_daily')442 switch_dbuser('garbo_daily')
@@ -1725,6 +1728,49 @@ class TestGarbo(FakeAdapterMixin, TestCaseWithFactory):
1725 # retained.1728 # retained.
1726 self._test_SnapFilePruner('foo.snap', None, 30, expected_count=1)1729 self._test_SnapFilePruner('foo.snap', None, 30, expected_count=1)
17271730
1731 def test_ArchiveFileDatePopulator(self):
1732 switch_dbuser('testadmin')
1733 now = datetime.now(UTC)
1734 archive_files = [self.factory.makeArchiveFile() for _ in range(2)]
1735 removeSecurityProxy(archive_files[1]).scheduled_deletion_date = (
1736 now + timedelta(hours=6))
1737
1738 self.runDaily()
1739
1740 self.assertThat(archive_files, MatchesListwise([
1741 MatchesStructure(date_superseded=Is(None)),
1742 MatchesStructure.byEquality(
1743 date_superseded=now - timedelta(hours=18)),
1744 ]))
1745
1746 def test_ArchiveFileDatePopulator_findArchiveFiles_filters_correctly(self):
1747 switch_dbuser('testadmin')
1748
1749 # Create three ArchiveFiles: one with date_superseded set, one with
1750 # date_superseded unset and scheduled_deletion_date set, and one
1751 # with both unset.
1752 archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
1753
1754 Store.of(archive_files[0]).flush()
1755 getUtility(IArchiveFileSet).scheduleDeletion(
1756 [archive_files[0]], timedelta(days=1))
1757 self.assertIsNotNone(archive_files[0].date_superseded)
1758
1759 removeSecurityProxy(archive_files[1]).scheduled_deletion_date = (
1760 datetime.now(UTC) + timedelta(days=1))
1761 self.assertIsNone(archive_files[1].date_superseded)
1762
1763 self.assertIsNone(archive_files[2].date_superseded)
1764 self.assertIsNone(archive_files[2].scheduled_deletion_date)
1765
1766 populator = ArchiveFileDatePopulator(None)
1767 # Consider only ArchiveFiles created by this test.
1768 populator.start_at = archive_files[0].id
1769
1770 rs = populator.findArchiveFiles()
1771 self.assertEqual(1, rs.count())
1772 self.assertEqual(archive_files[1], rs.one())
1773
17281774
1729class TestGarboTasks(TestCaseWithFactory):1775class TestGarboTasks(TestCaseWithFactory):
1730 layer = LaunchpadZopelessLayer1776 layer = LaunchpadZopelessLayer

Subscribers

People subscribed via source and target branches

to status/vote changes: