Merge ~cjwatson/launchpad:librarian-gc-ram into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 7cda2a51deb8a2ad13f0e211a24696da6ef483ed
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:librarian-gc-ram
Merge into: launchpad:master
Diff against target: 216 lines (+51/-31)
4 files modified
cronscripts/librarian-gc.py (+12/-10)
lib/lp/services/librarianserver/librariangc.py (+31/-13)
lib/lp/services/librarianserver/tests/test_gc.py (+5/-5)
lib/lp/testing/pgsql.py (+3/-3)
Reviewer Review Type Date Requested Status
Ioana Lasc (community) Approve
Review via email: mp+407071@code.launchpad.net

Commit message

librarian-gc: Use a server-side cursor for iterating LFC IDs

Description of the change

If we use a normal client-side cursor, then psycopg2 reads all the rows into memory up-front, defeating our attempt to avoid doing so.

In order to do this, I had to also fix bug 853066, because Storm's `ConnectionWrapper.cursor` doesn't currently support creating server-side cursors (though that could certainly be fixed).

To post a comment you must log in.
Revision history for this message
Ioana Lasc (ilasc) wrote :

LGTM

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/cronscripts/librarian-gc.py b/cronscripts/librarian-gc.py
2index cc18c12..c767e73 100755
3--- a/cronscripts/librarian-gc.py
4+++ b/cronscripts/librarian-gc.py
5@@ -1,6 +1,6 @@
6 #!/usr/bin/python3 -S
7 #
8-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
9+# Copyright 2009-2021 Canonical Ltd. This software is licensed under the
10 # GNU Affero General Public License version 3 (see the file LICENSE).
11
12 """Librarian garbage collector.
13@@ -16,9 +16,14 @@ import _pythonpath # noqa: F401
14
15 import logging
16
17-from lp.services.config import config
18-from lp.services.database.interfaces import IStore
19-from lp.services.librarian.model import LibraryFileAlias
20+from lp.services.config import (
21+ config,
22+ dbconfig,
23+ )
24+from lp.services.database.sqlbase import (
25+ connect,
26+ ISOLATION_LEVEL_AUTOCOMMIT,
27+ )
28 from lp.services.librarianserver import librariangc
29 from lp.services.scripts.base import LaunchpadCronScript
30
31@@ -63,15 +68,12 @@ class LibrarianGC(LaunchpadCronScript):
32 if self.options.loglevel <= logging.DEBUG:
33 librariangc.debug = True
34
35- # XXX wgrant 2011-09-18 bug=853066: Using Storm's raw connection
36- # here is wrong. We should either create our own or use
37- # Store.execute or cursor() and the transaction module.
38- store = IStore(LibraryFileAlias)
39- conn = store._connection._raw_connection
40+ conn = connect(
41+ user=dbconfig.dbuser, isolation=ISOLATION_LEVEL_AUTOCOMMIT)
42
43 # Refuse to run if we have significant clock skew between the
44 # librarian and the database.
45- librariangc.confirm_no_clock_skew(store)
46+ librariangc.confirm_no_clock_skew(conn)
47
48 # Note that each of these next steps will issue commit commands
49 # as appropriate to make this script transaction friendly
50diff --git a/lib/lp/services/librarianserver/librariangc.py b/lib/lp/services/librarianserver/librariangc.py
51index b924409..71c3355 100644
52--- a/lib/lp/services/librarianserver/librariangc.py
53+++ b/lib/lp/services/librarianserver/librariangc.py
54@@ -1,4 +1,4 @@
55-# Copyright 2009-2020 Canonical Ltd. This software is licensed under the
56+# Copyright 2009-2021 Canonical Ltd. This software is licensed under the
57 # GNU Affero General Public License version 3 (see the file LICENSE).
58
59 """Librarian garbage collection routines"""
60@@ -31,7 +31,6 @@ from lp.services.database.postgresql import (
61 listReferences,
62 quoteIdentifier,
63 )
64-from lp.services.database.sqlbase import get_transaction_timestamp
65 from lp.services.features import getFeatureFlag
66 from lp.services.librarianserver import swift
67 from lp.services.librarianserver.storage import (
68@@ -116,14 +115,19 @@ def sha1_file(content_id):
69 return hasher.hexdigest(), length
70
71
72-def confirm_no_clock_skew(store):
73+def confirm_no_clock_skew(con):
74 """Raise an exception if there is significant clock skew between the
75 database and this machine.
76
77 It is theoretically possible to lose data if there is more than several
78 hours of skew.
79 """
80- db_now = get_transaction_timestamp(store)
81+ cur = con.cursor()
82+ try:
83+ cur.execute("SELECT CURRENT_TIMESTAMP AT TIME ZONE 'UTC'")
84+ db_now = cur.fetchone()[0].replace(tzinfo=pytz.UTC)
85+ finally:
86+ cur.close()
87 local_now = _utcnow()
88 five_minutes = timedelta(minutes=5)
89
90@@ -610,10 +614,18 @@ def delete_unreferenced_content(con):
91
92
93 def delete_unwanted_files(con):
94- delete_unwanted_disk_files(con)
95- swift_enabled = getFeatureFlag('librarian.swift.enabled') or False
96- if swift_enabled:
97- delete_unwanted_swift_files(con)
98+ con.rollback()
99+ orig_autocommit = con.autocommit
100+ try:
101+ # Disable autocommit so that we can use named cursors.
102+ con.autocommit = False
103+ delete_unwanted_disk_files(con)
104+ swift_enabled = getFeatureFlag('librarian.swift.enabled') or False
105+ if swift_enabled:
106+ delete_unwanted_swift_files(con)
107+ finally:
108+ con.rollback()
109+ con.autocommit = orig_autocommit
110
111
112 def delete_unwanted_disk_files(con):
113@@ -629,7 +641,7 @@ def delete_unwanted_disk_files(con):
114
115 swift_enabled = getFeatureFlag('librarian.swift.enabled') or False
116
117- cur = con.cursor()
118+ cur = con.cursor(name="librariangc_disk_lfcs")
119
120 # Calculate all stored LibraryFileContent ids that we want to keep.
121 # Results are ordered so we don't have to suck them all in at once.
122@@ -737,6 +749,7 @@ def delete_unwanted_disk_files(con):
123 "was not found on disk." % next_wanted_content_id)
124 next_wanted_content_id = get_next_wanted_content_id()
125
126+ cur.close()
127 log.info(
128 "Deleted %d files from disk that were no longer referenced "
129 "in the db." % removed_count)
130@@ -791,12 +804,16 @@ def delete_unwanted_swift_files(con):
131
132 log.info("Deleting unwanted files from Swift.")
133
134- cur = con.cursor()
135-
136 # Get the largest LibraryFileContent id in the database. This lets
137 # us know when to stop looking in Swift for more files.
138- cur.execute("SELECT max(id) FROM LibraryFileContent")
139- max_lfc_id = cur.fetchone()[0]
140+ cur = con.cursor()
141+ try:
142+ cur.execute("SELECT max(id) FROM LibraryFileContent")
143+ max_lfc_id = cur.fetchone()[0]
144+ finally:
145+ cur.close()
146+
147+ cur = con.cursor(name="librariangc_swift_lfcs")
148
149 # Calculate all stored LibraryFileContent ids that we want to keep.
150 # Results are ordered so we don't have to suck them all in at once.
151@@ -877,6 +894,7 @@ def delete_unwanted_swift_files(con):
152 "but was not found in Swift.".format(next_wanted_content_id))
153 next_wanted_content_id = get_next_wanted_content_id()
154
155+ cur.close()
156 log.info(
157 "Deleted {0} files from Swift that were no longer referenced "
158 "in the db.".format(removed_count))
159diff --git a/lib/lp/services/librarianserver/tests/test_gc.py b/lib/lp/services/librarianserver/tests/test_gc.py
160index 9163899..31c3830 100644
161--- a/lib/lp/services/librarianserver/tests/test_gc.py
162+++ b/lib/lp/services/librarianserver/tests/test_gc.py
163@@ -1,4 +1,4 @@
164-# Copyright 2009-2020 Canonical Ltd. This software is licensed under the
165+# Copyright 2009-2021 Canonical Ltd. This software is licensed under the
166 # GNU Affero General Public License version 3 (see the file LICENSE).
167
168 """Librarian garbage collection tests"""
169@@ -96,8 +96,8 @@ class TestLibrarianGarbageCollectionBase:
170 # Make sure that every file the database knows about exists on disk.
171 # We manually remove them for tests that need to cope with missing
172 # library items.
173- self.store = IMasterStore(LibraryFileContent)
174- for content in self.store.find(LibraryFileContent):
175+ store = IMasterStore(LibraryFileContent)
176+ for content in store.find(LibraryFileContent):
177 path = librariangc.get_file_path(content.id)
178 if not os.path.exists(path):
179 if not os.path.exists(os.path.dirname(path)):
180@@ -596,13 +596,13 @@ class TestLibrarianGarbageCollectionBase:
181
182 def test_confirm_no_clock_skew(self):
183 # There should not be any clock skew when running the test suite.
184- librariangc.confirm_no_clock_skew(self.store)
185+ librariangc.confirm_no_clock_skew(self.con)
186
187 # To test this function raises an excption when it should,
188 # fool the garbage collector into thinking it is tomorrow.
189 with self.librariangc_thinking_it_is_tomorrow():
190 self.assertRaises(
191- Exception, librariangc.confirm_no_clock_skew, (self.store,)
192+ Exception, librariangc.confirm_no_clock_skew, (self.con,)
193 )
194
195
196diff --git a/lib/lp/testing/pgsql.py b/lib/lp/testing/pgsql.py
197index c4dffc4..373b021 100644
198--- a/lib/lp/testing/pgsql.py
199+++ b/lib/lp/testing/pgsql.py
200@@ -1,4 +1,4 @@
201-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
202+# Copyright 2009-2021 Canonical Ltd. This software is licensed under the
203 # GNU Affero General Public License version 3 (see the file LICENSE).
204
205 '''
206@@ -69,8 +69,8 @@ class ConnectionWrapper:
207 finally:
208 ConnectionWrapper.committed = True
209
210- def cursor(self):
211- return CursorWrapper(self.real_connection.cursor())
212+ def cursor(self, *args, **kwargs):
213+ return CursorWrapper(self.real_connection.cursor(*args, **kwargs))
214
215 def __getattr__(self, key):
216 return getattr(self.real_connection, key)

Subscribers

People subscribed via source and target branches

to status/vote changes: