Merge ~cjwatson/launchpad:swift-files-deduplicate into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 67c30a6974378c1e2b7239bdee52fa5544a57d70
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:swift-files-deduplicate
Merge into: launchpad:master
Diff against target: 136 lines (+70/-8)
3 files modified
lib/lp/services/librarianserver/librariangc.py (+7/-4)
lib/lp/services/librarianserver/tests/test_gc.py (+60/-1)
lib/lp/testing/swift/fakeswift.py (+3/-3)
Reviewer Review Type Date Requested Status
Ioana Lasc (community) Approve
Review via email: mp+387835@code.launchpad.net

Commit message

Tolerate duplicate files from Swift get_container

Description of the change

The librarian's GC process has been failing with errors such as this for some time:

  ClientException: Object DELETE failed: ... 404 Not Found [first 60 chars of response] <html><h1>Not Found</h1><p>The resource could not be found.

It's not completely clear why this is happening, but one possibility is that swift_connection.get_container is returning duplicate files when iterating over multiple batches or similar, so deduplicate its response.

To post a comment you must log in.
Revision history for this message
Ioana Lasc (ilasc) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/services/librarianserver/librariangc.py b/lib/lp/services/librarianserver/librariangc.py
2index 19900e4..88c9af9 100644
3--- a/lib/lp/services/librarianserver/librariangc.py
4+++ b/lib/lp/services/librarianserver/librariangc.py
5@@ -1,4 +1,4 @@
6-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
7+# Copyright 2009-2020 Canonical Ltd. This software is licensed under the
8 # GNU Affero General Public License version 3 (see the file LICENSE).
9
10 """Librarian garbage collection routines"""
11@@ -750,15 +750,18 @@ def swift_files(max_lfc_id):
12 while container != final_container:
13 container_num += 1
14 container = swift.SWIFT_CONTAINER_PREFIX + str(container_num)
15+ seen_names = set()
16 try:
17- names = sorted(
18+ objs = sorted(
19 swift.quiet_swiftclient(
20 swift_connection.get_container,
21 container, full_listing=True)[1],
22 key=lambda x: [
23 int(segment) for segment in x['name'].split('/')])
24- for name in names:
25- yield (container, name)
26+ for obj in objs:
27+ if obj['name'] not in seen_names:
28+ yield (container, obj)
29+ seen_names.add(obj['name'])
30 except swiftclient.ClientException as x:
31 if x.http_status == 404:
32 continue
33diff --git a/lib/lp/services/librarianserver/tests/test_gc.py b/lib/lp/services/librarianserver/tests/test_gc.py
34index 479c51c..d59dde0 100644
35--- a/lib/lp/services/librarianserver/tests/test_gc.py
36+++ b/lib/lp/services/librarianserver/tests/test_gc.py
37@@ -1,4 +1,4 @@
38-# Copyright 2009-2017 Canonical Ltd. This software is licensed under the
39+# Copyright 2009-2020 Canonical Ltd. This software is licensed under the
40 # GNU Affero General Public License version 3 (see the file LICENSE).
41
42 """Librarian garbage collection tests"""
43@@ -23,6 +23,7 @@ from subprocess import (
44 import sys
45 import tempfile
46
47+from fixtures import MockPatchObject
48 import pytz
49 from sqlobject import SQLObjectNotFound
50 from storm.store import Store
51@@ -796,6 +797,64 @@ class TestSwiftLibrarianGarbageCollection(
52 self.assertEqual([False, False, False], segment_existence(big1_id))
53 self.assertEqual([True, True, True], segment_existence(big2_id))
54
55+ def test_delete_unwanted_files_handles_duplicates(self):
56+ # GC tolerates swift_connection.get_container returning duplicate
57+ # names (perhaps across multiple batches). It's not clear whether
58+ # this happens in practice, but some strange deletion failures
59+ # suggest that it might happen.
60+ switch_dbuser('testadmin')
61+ content = b'uploading to swift'
62+ f1_lfa = LibraryFileAlias.get(self.client.addFile(
63+ 'foo.txt', len(content), io.BytesIO(content), 'text/plain'))
64+ f1_id = f1_lfa.contentID
65+
66+ f2_lfa = LibraryFileAlias.get(self.client.addFile(
67+ 'bar.txt', len(content), io.BytesIO(content), 'text/plain'))
68+ f2_id = f2_lfa.contentID
69+ transaction.commit()
70+
71+ for lfc_id in (f1_id, f2_id):
72+ # Make the files old so they don't look in-progress.
73+ os.utime(swift.filesystem_path(lfc_id), (0, 0))
74+
75+ switch_dbuser(config.librarian_gc.dbuser)
76+
77+ # Force swift_connection.get_container to return duplicate results.
78+ orig_get_container = swiftclient.get_container
79+
80+ def duplicating_get_container(*args, **kwargs):
81+ rv = orig_get_container(*args, **kwargs)
82+ if not kwargs.get('full_listing', False):
83+ rv = (rv[0], rv[1] * 2)
84+ return rv
85+
86+ self.useFixture(MockPatchObject(
87+ swiftclient, 'get_container', duplicating_get_container))
88+
89+ swift.to_swift(BufferLogger(), remove_func=os.unlink)
90+
91+ # Both files exist in Swift.
92+ self.assertTrue(self.file_exists(f1_id))
93+ self.assertTrue(self.file_exists(f2_id))
94+
95+ # Both files survive the first purge.
96+ with self.librariangc_thinking_it_is_tomorrow():
97+ librariangc.delete_unwanted_files(self.con)
98+ self.assertTrue(self.file_exists(f1_id))
99+ self.assertTrue(self.file_exists(f2_id))
100+
101+ # Remove the first file from the DB.
102+ content = f1_lfa.content
103+ Store.of(f1_lfa).remove(f1_lfa)
104+ Store.of(content).remove(content)
105+ transaction.commit()
106+
107+ # The first file is removed, but the second is intact.
108+ with self.librariangc_thinking_it_is_tomorrow():
109+ librariangc.delete_unwanted_files(self.con)
110+ self.assertFalse(self.file_exists(f1_id))
111+ self.assertTrue(self.file_exists(f2_id))
112+
113
114 class TestBlobCollection(TestCase):
115 layer = LaunchpadZopelessLayer
116diff --git a/lib/lp/testing/swift/fakeswift.py b/lib/lp/testing/swift/fakeswift.py
117index eedaa77..86716cd 100644
118--- a/lib/lp/testing/swift/fakeswift.py
119+++ b/lib/lp/testing/swift/fakeswift.py
120@@ -1,4 +1,4 @@
121-# Copyright 2013-2017 Canonical Ltd. This software is licensed under the
122+# Copyright 2013-2020 Canonical Ltd. This software is licensed under the
123 # GNU Affero General Public License version 3 (see the file LICENSE).
124
125 """An OpenStack Swift server mock using Twisted.
126@@ -359,8 +359,8 @@ class SwiftContainer(resource.Resource):
127 child = self.container_children.get(name, None)
128 elif request.method == b"DELETE":
129 child = self.container_children.get(name, None)
130- if child is None: # delete unknown object
131- return EmptyPage(http.NO_CONTENT)
132+ if child is None: # delete unknown object
133+ return EmptyPage(http.NOT_FOUND)
134 del self.container_children[name]
135 return EmptyPage(http.NO_CONTENT)
136 else:

Subscribers

People subscribed via source and target branches

to status/vote changes: