Merge ~cjwatson/launchpad:librarian-gc-ram into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 7cda2a51deb8a2ad13f0e211a24696da6ef483ed
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:librarian-gc-ram
Merge into: launchpad:master
Diff against target: 216 lines (+51/-31)
4 files modified
cronscripts/librarian-gc.py (+12/-10)
lib/lp/services/librarianserver/librariangc.py (+31/-13)
lib/lp/services/librarianserver/tests/test_gc.py (+5/-5)
lib/lp/testing/pgsql.py (+3/-3)
Reviewer Review Type Date Requested Status
Ioana Lasc (community) Approve
Review via email: mp+407071@code.launchpad.net

Commit message

librarian-gc: Use a server-side cursor for iterating LFC IDs

Description of the change

If we use a normal client-side cursor, then psycopg2 reads all the rows into memory up-front, defeating our attempt to avoid doing so.

In order to do this, I had to also fix bug 853066, because Storm's `ConnectionWrapper.cursor` doesn't currently support creating server-side cursors (though that could certainly be fixed).

To post a comment you must log in.
Revision history for this message
Ioana Lasc (ilasc) wrote :

LGTM

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/cronscripts/librarian-gc.py b/cronscripts/librarian-gc.py
index cc18c12..c767e73 100755
--- a/cronscripts/librarian-gc.py
+++ b/cronscripts/librarian-gc.py
@@ -1,6 +1,6 @@
1#!/usr/bin/python3 -S1#!/usr/bin/python3 -S
2#2#
3# Copyright 2009-2011 Canonical Ltd. This software is licensed under the3# Copyright 2009-2021 Canonical Ltd. This software is licensed under the
4# GNU Affero General Public License version 3 (see the file LICENSE).4# GNU Affero General Public License version 3 (see the file LICENSE).
55
6"""Librarian garbage collector.6"""Librarian garbage collector.
@@ -16,9 +16,14 @@ import _pythonpath # noqa: F401
1616
17import logging17import logging
1818
19from lp.services.config import config19from lp.services.config import (
20from lp.services.database.interfaces import IStore20 config,
21from lp.services.librarian.model import LibraryFileAlias21 dbconfig,
22 )
23from lp.services.database.sqlbase import (
24 connect,
25 ISOLATION_LEVEL_AUTOCOMMIT,
26 )
22from lp.services.librarianserver import librariangc27from lp.services.librarianserver import librariangc
23from lp.services.scripts.base import LaunchpadCronScript28from lp.services.scripts.base import LaunchpadCronScript
2429
@@ -63,15 +68,12 @@ class LibrarianGC(LaunchpadCronScript):
63 if self.options.loglevel <= logging.DEBUG:68 if self.options.loglevel <= logging.DEBUG:
64 librariangc.debug = True69 librariangc.debug = True
6570
66 # XXX wgrant 2011-09-18 bug=853066: Using Storm's raw connection71 conn = connect(
67 # here is wrong. We should either create our own or use72 user=dbconfig.dbuser, isolation=ISOLATION_LEVEL_AUTOCOMMIT)
68 # Store.execute or cursor() and the transaction module.
69 store = IStore(LibraryFileAlias)
70 conn = store._connection._raw_connection
7173
72 # Refuse to run if we have significant clock skew between the74 # Refuse to run if we have significant clock skew between the
73 # librarian and the database.75 # librarian and the database.
74 librariangc.confirm_no_clock_skew(store)76 librariangc.confirm_no_clock_skew(conn)
7577
76 # Note that each of these next steps will issue commit commands78 # Note that each of these next steps will issue commit commands
77 # as appropriate to make this script transaction friendly79 # as appropriate to make this script transaction friendly
diff --git a/lib/lp/services/librarianserver/librariangc.py b/lib/lp/services/librarianserver/librariangc.py
index b924409..71c3355 100644
--- a/lib/lp/services/librarianserver/librariangc.py
+++ b/lib/lp/services/librarianserver/librariangc.py
@@ -1,4 +1,4 @@
1# Copyright 2009-2020 Canonical Ltd. This software is licensed under the1# Copyright 2009-2021 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""Librarian garbage collection routines"""4"""Librarian garbage collection routines"""
@@ -31,7 +31,6 @@ from lp.services.database.postgresql import (
31 listReferences,31 listReferences,
32 quoteIdentifier,32 quoteIdentifier,
33 )33 )
34from lp.services.database.sqlbase import get_transaction_timestamp
35from lp.services.features import getFeatureFlag34from lp.services.features import getFeatureFlag
36from lp.services.librarianserver import swift35from lp.services.librarianserver import swift
37from lp.services.librarianserver.storage import (36from lp.services.librarianserver.storage import (
@@ -116,14 +115,19 @@ def sha1_file(content_id):
116 return hasher.hexdigest(), length115 return hasher.hexdigest(), length
117116
118117
119def confirm_no_clock_skew(store):118def confirm_no_clock_skew(con):
120 """Raise an exception if there is significant clock skew between the119 """Raise an exception if there is significant clock skew between the
121 database and this machine.120 database and this machine.
122121
123 It is theoretically possible to lose data if there is more than several122 It is theoretically possible to lose data if there is more than several
124 hours of skew.123 hours of skew.
125 """124 """
126 db_now = get_transaction_timestamp(store)125 cur = con.cursor()
126 try:
127 cur.execute("SELECT CURRENT_TIMESTAMP AT TIME ZONE 'UTC'")
128 db_now = cur.fetchone()[0].replace(tzinfo=pytz.UTC)
129 finally:
130 cur.close()
127 local_now = _utcnow()131 local_now = _utcnow()
128 five_minutes = timedelta(minutes=5)132 five_minutes = timedelta(minutes=5)
129133
@@ -610,10 +614,18 @@ def delete_unreferenced_content(con):
610614
611615
612def delete_unwanted_files(con):616def delete_unwanted_files(con):
613 delete_unwanted_disk_files(con)617 con.rollback()
614 swift_enabled = getFeatureFlag('librarian.swift.enabled') or False618 orig_autocommit = con.autocommit
615 if swift_enabled:619 try:
616 delete_unwanted_swift_files(con)620 # Disable autocommit so that we can use named cursors.
621 con.autocommit = False
622 delete_unwanted_disk_files(con)
623 swift_enabled = getFeatureFlag('librarian.swift.enabled') or False
624 if swift_enabled:
625 delete_unwanted_swift_files(con)
626 finally:
627 con.rollback()
628 con.autocommit = orig_autocommit
617629
618630
619def delete_unwanted_disk_files(con):631def delete_unwanted_disk_files(con):
@@ -629,7 +641,7 @@ def delete_unwanted_disk_files(con):
629641
630 swift_enabled = getFeatureFlag('librarian.swift.enabled') or False642 swift_enabled = getFeatureFlag('librarian.swift.enabled') or False
631643
632 cur = con.cursor()644 cur = con.cursor(name="librariangc_disk_lfcs")
633645
634 # Calculate all stored LibraryFileContent ids that we want to keep.646 # Calculate all stored LibraryFileContent ids that we want to keep.
635 # Results are ordered so we don't have to suck them all in at once.647 # Results are ordered so we don't have to suck them all in at once.
@@ -737,6 +749,7 @@ def delete_unwanted_disk_files(con):
737 "was not found on disk." % next_wanted_content_id)749 "was not found on disk." % next_wanted_content_id)
738 next_wanted_content_id = get_next_wanted_content_id()750 next_wanted_content_id = get_next_wanted_content_id()
739751
752 cur.close()
740 log.info(753 log.info(
741 "Deleted %d files from disk that were no longer referenced "754 "Deleted %d files from disk that were no longer referenced "
742 "in the db." % removed_count)755 "in the db." % removed_count)
@@ -791,12 +804,16 @@ def delete_unwanted_swift_files(con):
791804
792 log.info("Deleting unwanted files from Swift.")805 log.info("Deleting unwanted files from Swift.")
793806
794 cur = con.cursor()
795
796 # Get the largest LibraryFileContent id in the database. This lets807 # Get the largest LibraryFileContent id in the database. This lets
797 # us know when to stop looking in Swift for more files.808 # us know when to stop looking in Swift for more files.
798 cur.execute("SELECT max(id) FROM LibraryFileContent")809 cur = con.cursor()
799 max_lfc_id = cur.fetchone()[0]810 try:
811 cur.execute("SELECT max(id) FROM LibraryFileContent")
812 max_lfc_id = cur.fetchone()[0]
813 finally:
814 cur.close()
815
816 cur = con.cursor(name="librariangc_swift_lfcs")
800817
801 # Calculate all stored LibraryFileContent ids that we want to keep.818 # Calculate all stored LibraryFileContent ids that we want to keep.
802 # Results are ordered so we don't have to suck them all in at once.819 # Results are ordered so we don't have to suck them all in at once.
@@ -877,6 +894,7 @@ def delete_unwanted_swift_files(con):
877 "but was not found in Swift.".format(next_wanted_content_id))894 "but was not found in Swift.".format(next_wanted_content_id))
878 next_wanted_content_id = get_next_wanted_content_id()895 next_wanted_content_id = get_next_wanted_content_id()
879896
897 cur.close()
880 log.info(898 log.info(
881 "Deleted {0} files from Swift that were no longer referenced "899 "Deleted {0} files from Swift that were no longer referenced "
882 "in the db.".format(removed_count))900 "in the db.".format(removed_count))
diff --git a/lib/lp/services/librarianserver/tests/test_gc.py b/lib/lp/services/librarianserver/tests/test_gc.py
index 9163899..31c3830 100644
--- a/lib/lp/services/librarianserver/tests/test_gc.py
+++ b/lib/lp/services/librarianserver/tests/test_gc.py
@@ -1,4 +1,4 @@
1# Copyright 2009-2020 Canonical Ltd. This software is licensed under the1# Copyright 2009-2021 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""Librarian garbage collection tests"""4"""Librarian garbage collection tests"""
@@ -96,8 +96,8 @@ class TestLibrarianGarbageCollectionBase:
96 # Make sure that every file the database knows about exists on disk.96 # Make sure that every file the database knows about exists on disk.
97 # We manually remove them for tests that need to cope with missing97 # We manually remove them for tests that need to cope with missing
98 # library items.98 # library items.
99 self.store = IMasterStore(LibraryFileContent)99 store = IMasterStore(LibraryFileContent)
100 for content in self.store.find(LibraryFileContent):100 for content in store.find(LibraryFileContent):
101 path = librariangc.get_file_path(content.id)101 path = librariangc.get_file_path(content.id)
102 if not os.path.exists(path):102 if not os.path.exists(path):
103 if not os.path.exists(os.path.dirname(path)):103 if not os.path.exists(os.path.dirname(path)):
@@ -596,13 +596,13 @@ class TestLibrarianGarbageCollectionBase:
596596
597 def test_confirm_no_clock_skew(self):597 def test_confirm_no_clock_skew(self):
598 # There should not be any clock skew when running the test suite.598 # There should not be any clock skew when running the test suite.
599 librariangc.confirm_no_clock_skew(self.store)599 librariangc.confirm_no_clock_skew(self.con)
600600
601 # To test this function raises an excption when it should,601 # To test this function raises an excption when it should,
602 # fool the garbage collector into thinking it is tomorrow.602 # fool the garbage collector into thinking it is tomorrow.
603 with self.librariangc_thinking_it_is_tomorrow():603 with self.librariangc_thinking_it_is_tomorrow():
604 self.assertRaises(604 self.assertRaises(
605 Exception, librariangc.confirm_no_clock_skew, (self.store,)605 Exception, librariangc.confirm_no_clock_skew, (self.con,)
606 )606 )
607607
608608
diff --git a/lib/lp/testing/pgsql.py b/lib/lp/testing/pgsql.py
index c4dffc4..373b021 100644
--- a/lib/lp/testing/pgsql.py
+++ b/lib/lp/testing/pgsql.py
@@ -1,4 +1,4 @@
1# Copyright 2009-2011 Canonical Ltd. This software is licensed under the1# Copyright 2009-2021 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4'''4'''
@@ -69,8 +69,8 @@ class ConnectionWrapper:
69 finally:69 finally:
70 ConnectionWrapper.committed = True70 ConnectionWrapper.committed = True
7171
72 def cursor(self):72 def cursor(self, *args, **kwargs):
73 return CursorWrapper(self.real_connection.cursor())73 return CursorWrapper(self.real_connection.cursor(*args, **kwargs))
7474
75 def __getattr__(self, key):75 def __getattr__(self, key):
76 return getattr(self.real_connection, key)76 return getattr(self.real_connection, key)

Subscribers

People subscribed via source and target branches

to status/vote changes: