Merge ~cjwatson/launchpad:swift-files-deduplicate into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 67c30a6974378c1e2b7239bdee52fa5544a57d70
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:swift-files-deduplicate
Merge into: launchpad:master
Diff against target: 136 lines (+70/-8)
3 files modified
lib/lp/services/librarianserver/librariangc.py (+7/-4)
lib/lp/services/librarianserver/tests/test_gc.py (+60/-1)
lib/lp/testing/swift/fakeswift.py (+3/-3)
Reviewer Review Type Date Requested Status
Ioana Lasc (community) Approve
Review via email: mp+387835@code.launchpad.net

Commit message

Tolerate duplicate files from Swift get_container

Description of the change

The librarian's GC process has been failing with errors such as this for some time:

  ClientException: Object DELETE failed: ... 404 Not Found [first 60 chars of response] <html><h1>Not Found</h1><p>The resource could not be found.

It's not completely clear why this is happening, but one possibility is that swift_connection.get_container is returning duplicate files when iterating over multiple batches or similar, so deduplicate its response.

To post a comment you must log in.
Revision history for this message
Ioana Lasc (ilasc) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/lib/lp/services/librarianserver/librariangc.py b/lib/lp/services/librarianserver/librariangc.py
index 19900e4..88c9af9 100644
--- a/lib/lp/services/librarianserver/librariangc.py
+++ b/lib/lp/services/librarianserver/librariangc.py
@@ -1,4 +1,4 @@
1# Copyright 2009-2018 Canonical Ltd. This software is licensed under the1# Copyright 2009-2020 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""Librarian garbage collection routines"""4"""Librarian garbage collection routines"""
@@ -750,15 +750,18 @@ def swift_files(max_lfc_id):
750 while container != final_container:750 while container != final_container:
751 container_num += 1751 container_num += 1
752 container = swift.SWIFT_CONTAINER_PREFIX + str(container_num)752 container = swift.SWIFT_CONTAINER_PREFIX + str(container_num)
753 seen_names = set()
753 try:754 try:
754 names = sorted(755 objs = sorted(
755 swift.quiet_swiftclient(756 swift.quiet_swiftclient(
756 swift_connection.get_container,757 swift_connection.get_container,
757 container, full_listing=True)[1],758 container, full_listing=True)[1],
758 key=lambda x: [759 key=lambda x: [
759 int(segment) for segment in x['name'].split('/')])760 int(segment) for segment in x['name'].split('/')])
760 for name in names:761 for obj in objs:
761 yield (container, name)762 if obj['name'] not in seen_names:
763 yield (container, obj)
764 seen_names.add(obj['name'])
762 except swiftclient.ClientException as x:765 except swiftclient.ClientException as x:
763 if x.http_status == 404:766 if x.http_status == 404:
764 continue767 continue
diff --git a/lib/lp/services/librarianserver/tests/test_gc.py b/lib/lp/services/librarianserver/tests/test_gc.py
index 479c51c..d59dde0 100644
--- a/lib/lp/services/librarianserver/tests/test_gc.py
+++ b/lib/lp/services/librarianserver/tests/test_gc.py
@@ -1,4 +1,4 @@
1# Copyright 2009-2017 Canonical Ltd. This software is licensed under the1# Copyright 2009-2020 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""Librarian garbage collection tests"""4"""Librarian garbage collection tests"""
@@ -23,6 +23,7 @@ from subprocess import (
23import sys23import sys
24import tempfile24import tempfile
2525
26from fixtures import MockPatchObject
26import pytz27import pytz
27from sqlobject import SQLObjectNotFound28from sqlobject import SQLObjectNotFound
28from storm.store import Store29from storm.store import Store
@@ -796,6 +797,64 @@ class TestSwiftLibrarianGarbageCollection(
796 self.assertEqual([False, False, False], segment_existence(big1_id))797 self.assertEqual([False, False, False], segment_existence(big1_id))
797 self.assertEqual([True, True, True], segment_existence(big2_id))798 self.assertEqual([True, True, True], segment_existence(big2_id))
798799
800 def test_delete_unwanted_files_handles_duplicates(self):
801 # GC tolerates swift_connection.get_container returning duplicate
802 # names (perhaps across multiple batches). It's not clear whether
803 # this happens in practice, but some strange deletion failures
804 # suggest that it might happen.
805 switch_dbuser('testadmin')
806 content = b'uploading to swift'
807 f1_lfa = LibraryFileAlias.get(self.client.addFile(
808 'foo.txt', len(content), io.BytesIO(content), 'text/plain'))
809 f1_id = f1_lfa.contentID
810
811 f2_lfa = LibraryFileAlias.get(self.client.addFile(
812 'bar.txt', len(content), io.BytesIO(content), 'text/plain'))
813 f2_id = f2_lfa.contentID
814 transaction.commit()
815
816 for lfc_id in (f1_id, f2_id):
817 # Make the files old so they don't look in-progress.
818 os.utime(swift.filesystem_path(lfc_id), (0, 0))
819
820 switch_dbuser(config.librarian_gc.dbuser)
821
822 # Force swift_connection.get_container to return duplicate results.
823 orig_get_container = swiftclient.get_container
824
825 def duplicating_get_container(*args, **kwargs):
826 rv = orig_get_container(*args, **kwargs)
827 if not kwargs.get('full_listing', False):
828 rv = (rv[0], rv[1] * 2)
829 return rv
830
831 self.useFixture(MockPatchObject(
832 swiftclient, 'get_container', duplicating_get_container))
833
834 swift.to_swift(BufferLogger(), remove_func=os.unlink)
835
836 # Both files exist in Swift.
837 self.assertTrue(self.file_exists(f1_id))
838 self.assertTrue(self.file_exists(f2_id))
839
840 # Both files survive the first purge.
841 with self.librariangc_thinking_it_is_tomorrow():
842 librariangc.delete_unwanted_files(self.con)
843 self.assertTrue(self.file_exists(f1_id))
844 self.assertTrue(self.file_exists(f2_id))
845
846 # Remove the first file from the DB.
847 content = f1_lfa.content
848 Store.of(f1_lfa).remove(f1_lfa)
849 Store.of(content).remove(content)
850 transaction.commit()
851
852 # The first file is removed, but the second is intact.
853 with self.librariangc_thinking_it_is_tomorrow():
854 librariangc.delete_unwanted_files(self.con)
855 self.assertFalse(self.file_exists(f1_id))
856 self.assertTrue(self.file_exists(f2_id))
857
799858
800class TestBlobCollection(TestCase):859class TestBlobCollection(TestCase):
801 layer = LaunchpadZopelessLayer860 layer = LaunchpadZopelessLayer
diff --git a/lib/lp/testing/swift/fakeswift.py b/lib/lp/testing/swift/fakeswift.py
index eedaa77..86716cd 100644
--- a/lib/lp/testing/swift/fakeswift.py
+++ b/lib/lp/testing/swift/fakeswift.py
@@ -1,4 +1,4 @@
1# Copyright 2013-2017 Canonical Ltd. This software is licensed under the1# Copyright 2013-2020 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""An OpenStack Swift server mock using Twisted.4"""An OpenStack Swift server mock using Twisted.
@@ -359,8 +359,8 @@ class SwiftContainer(resource.Resource):
359 child = self.container_children.get(name, None)359 child = self.container_children.get(name, None)
360 elif request.method == b"DELETE":360 elif request.method == b"DELETE":
361 child = self.container_children.get(name, None)361 child = self.container_children.get(name, None)
362 if child is None: # delete unknown object362 if child is None: # delete unknown object
363 return EmptyPage(http.NO_CONTENT)363 return EmptyPage(http.NOT_FOUND)
364 del self.container_children[name]364 del self.container_children[name]
365 return EmptyPage(http.NO_CONTENT)365 return EmptyPage(http.NO_CONTENT)
366 else:366 else:

Subscribers

People subscribed via source and target branches

to status/vote changes: