Merge ~cjwatson/launchpad:librarian-gc-delete-tolerate-404 into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 6239cad5bf1ee47f8f8ba0cb4cc951a98ad04d1f
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:librarian-gc-delete-tolerate-404
Merge into: launchpad:master
Diff against target: 142 lines (+81/-6)
3 files modified
lib/lp/services/librarianserver/librariangc.py (+5/-1)
lib/lp/services/librarianserver/tests/test_gc.py (+60/-0)
lib/lp/testing/swift/fakeswift.py (+16/-5)
Reviewer Review Type Date Requested Status
Tom Wardill (community) Approve
Review via email: mp+389749@code.launchpad.net

Commit message

Ignore 404 when deleting unwanted Swift files

Description of the change

librarian-gc has been consistently failing for some time with the following pattern:

  2020-08-19 03:22:44 DEBUG http://10.34.0.139:8080 "GET /v1/AUTH_xxx/librarian_6539?format=json&marker=3269994338 HTTP/1.1" 200 552359
  2020-08-19 03:22:44 DEBUG http://10.34.0.139:8080 "GET /v1/AUTH_xxx/librarian_6539?format=json&marker=3269999999 HTTP/1.1" 200 2
  2020-08-19 03:22:45 DEBUG http://10.34.0.139:8080 "DELETE /v1/AUTH_xxx/librarian_6539/3269500785 HTTP/1.1" 401 131
  2020-08-19 03:22:46 DEBUG http://10.34.0.136:5000 "POST /v2.0/tokens HTTP/1.1" 200 3410
  2020-08-19 03:22:46 DEBUG http://10.34.0.139:8080 "DELETE /v1/AUTH_xxx/librarian_6539/3269500785 HTTP/1.1" 404 70

The DELETE may happen in a different swiftclient.client.Connection instance from the GETs, and the sequence of GETs to enumerate all the files in Swift up to that point takes almost two hours on production at the moment which is longer than Keystone's default token expiry time of one hour, so it makes sense that the first DELETE might return 401. However, it's not at all clear why the retried DELETE of the same object gets 404 (nothing else in librarian-gc seems to have deleted that object). Although I'm concerned about this odd pattern, I've been unable to reproduce this with the same version of Swift, so perhaps it's best to just ignore it (since DELETE returning 404 still at least indicates that the object is no longer present) and allow librarian-gc to run to completion.

To post a comment you must log in.
Revision history for this message
Tom Wardill (twom) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/services/librarianserver/librariangc.py b/lib/lp/services/librarianserver/librariangc.py
2index 88c9af9..a16e769 100644
3--- a/lib/lp/services/librarianserver/librariangc.py
4+++ b/lib/lp/services/librarianserver/librariangc.py
5@@ -832,7 +832,11 @@ def delete_unwanted_swift_files(con):
6 "File %d not removed - created too recently", content_id)
7 else:
8 with swift.connection() as swift_connection:
9- swift_connection.delete_object(container, name)
10+ try:
11+ swift_connection.delete_object(container, name)
12+ except swiftclient.ClientException as e:
13+ if e.http_status != 404:
14+ raise
15 log.debug3(
16 'Deleted ({0}, {1}) from Swift'.format(container, name))
17 removed_count += 1
18diff --git a/lib/lp/services/librarianserver/tests/test_gc.py b/lib/lp/services/librarianserver/tests/test_gc.py
19index d59dde0..71fb0c4 100644
20--- a/lib/lp/services/librarianserver/tests/test_gc.py
21+++ b/lib/lp/services/librarianserver/tests/test_gc.py
22@@ -25,6 +25,8 @@ import tempfile
23
24 from fixtures import MockPatchObject
25 import pytz
26+import requests
27+from six.moves.urllib.parse import urljoin
28 from sqlobject import SQLObjectNotFound
29 from storm.store import Store
30 from swiftclient import client as swiftclient
31@@ -855,6 +857,64 @@ class TestSwiftLibrarianGarbageCollection(
32 self.assertFalse(self.file_exists(f1_id))
33 self.assertTrue(self.file_exists(f2_id))
34
35+ def test_delete_unwanted_files_handles_missing(self):
36+ # GC tolerates a file being already missing from Swift when it tries
37+ # to delete it. It's not clear why this happens in practice.
38+ switch_dbuser('testadmin')
39+ content = b'uploading to swift'
40+ f1_lfa = LibraryFileAlias.get(self.client.addFile(
41+ 'foo.txt', len(content), io.BytesIO(content), 'text/plain'))
42+ f1_id = f1_lfa.contentID
43+
44+ f2_lfa = LibraryFileAlias.get(self.client.addFile(
45+ 'bar.txt', len(content), io.BytesIO(content), 'text/plain'))
46+ f2_id = f2_lfa.contentID
47+ transaction.commit()
48+
49+ for lfc_id in (f1_id, f2_id):
50+ # Make the files old so they don't look in-progress.
51+ os.utime(swift.filesystem_path(lfc_id), (0, 0))
52+
53+ switch_dbuser(config.librarian_gc.dbuser)
54+
55+ swift.to_swift(BufferLogger(), remove_func=os.unlink)
56+
57+ # Both files exist in Swift.
58+ self.assertTrue(self.file_exists(f1_id))
59+ self.assertTrue(self.file_exists(f2_id))
60+
61+ # Remove the first file from the DB.
62+ content = f1_lfa.content
63+ Store.of(f1_lfa).remove(f1_lfa)
64+ Store.of(content).remove(content)
65+ transaction.commit()
66+
67+ # In production, we see a sequence of DELETE 401 / authenticate /
68+ # DELETE 404. Since it isn't clear why this happens, we don't know
69+ # exactly how to simulate it here, but do our best by deleting the
70+ # object and forcibly expiring the client's token just before the
71+ # first expected DELETE.
72+ real_delete_object = swiftclient.Connection.delete_object
73+
74+ def fake_delete_object(connection, *args, **kwargs):
75+ real_delete_object(connection, *args, **kwargs)
76+ self.assertIsNotNone(connection.token)
77+ requests.delete(urljoin(
78+ config.librarian_server.os_auth_url,
79+ 'tokens/%s' % connection.token))
80+ return real_delete_object(connection, *args, **kwargs)
81+
82+ self.patch(
83+ swiftclient.Connection, 'delete_object', fake_delete_object)
84+
85+ # delete_unwanted_files completes successfully, and the second file
86+ # is intact.
87+ with self.librariangc_thinking_it_is_tomorrow():
88+ swift.quiet_swiftclient(
89+ librariangc.delete_unwanted_files, self.con)
90+ self.assertFalse(self.file_exists(f1_id))
91+ self.assertTrue(self.file_exists(f2_id))
92+
93
94 class TestBlobCollection(TestCase):
95 layer = LaunchpadZopelessLayer
96diff --git a/lib/lp/testing/swift/fakeswift.py b/lib/lp/testing/swift/fakeswift.py
97index 86716cd..3536326 100644
98--- a/lib/lp/testing/swift/fakeswift.py
99+++ b/lib/lp/testing/swift/fakeswift.py
100@@ -74,8 +74,8 @@ class FakeKeystone(resource.Resource):
101 def getValidToken(self, tenant_name, expected_token=None):
102 """Get a valid token for the given tenant name."""
103 if expected_token is not None:
104- token = self.tokens[expected_token]
105- if self._isValidToken(token, tenant_name):
106+ token = self.tokens.get(expected_token)
107+ if token is not None and self._isValidToken(token, tenant_name):
108 return token
109 else:
110 for id, token in six.iteritems(self.tokens):
111@@ -100,10 +100,21 @@ class FakeKeystone(resource.Resource):
112 valid_token = self.getValidToken(tenant_name, token)
113 return valid_token is not None
114
115- def getChild(self, path, request):
116+ def getChild(self, name, request):
117 """See `twisted.web.resource.Resource.getChild`."""
118- if path in (b"v2.0", b"tokens"):
119+ if name in (b"v2.0", b"tokens"):
120 return self
121+
122+ token = self.tokens.get(name, None)
123+
124+ if request.method == b"DELETE":
125+ if not self.validateToken(request, token["tenant"]["name"]):
126+ return EmptyPage(http.UNAUTHORIZED)
127+ if token is None: # delete unknown token
128+ return EmptyPage(http.NOT_FOUND)
129+ del self.tokens[name]
130+ return EmptyPage(http.NO_CONTENT)
131+
132 return resource.NoResource("Not a valid keystone URL.")
133
134 def render_POST(self, request):
135@@ -531,7 +542,7 @@ class FakeSwift(resource.Resource):
136 return resource
137
138 if not self.root.keystone.validateToken(request, tenant_name):
139- return EmptyPage(http.FORBIDDEN)
140+ return EmptyPage(http.UNAUTHORIZED)
141
142 return resource
143

Subscribers

People subscribed via source and target branches

to status/vote changes: