Merge lp:~wgrant/launchpad/parallel-gc into lp:launchpad

Proposed by William Grant
Status: Merged
Merged at revision: 18837
Proposed branch: lp:~wgrant/launchpad/parallel-gc
Merge into: lp:launchpad
Diff against target: 101 lines (+43/-34)
1 file modified
lib/lp/services/librarianserver/librariangc.py (+43/-34)
To merge this branch: bzr merge lp:~wgrant/launchpad/parallel-gc
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Review via email: mp+360461@code.launchpad.net

Commit message

Run librariangc file deletions in parallel to work around Swift latency.

To post a comment you must log in.
Revision history for this message
Colin Watson (cjwatson) :
review: Approve
Revision history for this message
Colin Watson (cjwatson) :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/services/librarianserver/librariangc.py'
2--- lib/lp/services/librarianserver/librariangc.py 2018-05-06 08:52:34 +0000
3+++ lib/lp/services/librarianserver/librariangc.py 2018-12-10 10:21:48 +0000
4@@ -11,6 +11,7 @@
5 )
6 import errno
7 import hashlib
8+import multiprocessing.pool
9 import os
10 import re
11 import sys
12@@ -523,6 +524,41 @@
13 else:
14 return False
15
16+ def remove_content(self, content_id):
17+ removed = []
18+
19+ # Remove the file from disk, if it hasn't already been.
20+ path = get_file_path(content_id)
21+ try:
22+ os.unlink(path)
23+ removed.append('filesystem')
24+ except OSError as e:
25+ if e.errno != errno.ENOENT:
26+ raise
27+
28+ # Remove the file from Swift, if it hasn't already been.
29+ if self.swift_enabled:
30+ container, name = swift.swift_location(content_id)
31+ with swift.connection() as swift_connection:
32+ try:
33+ swift.quiet_swiftclient(
34+ swift_connection.delete_object, container, name)
35+ removed.append('Swift')
36+ except swiftclient.ClientException as x:
37+ if x.http_status != 404:
38+ raise
39+
40+ if removed:
41+ log.debug3(
42+ "Deleted %s from %s", content_id, ' & '.join(removed))
43+
44+ elif config.librarian_server.upstream_host is None:
45+ # It is normal to have files in the database that
46+ # are not on disk if the Librarian has an upstream
47+ # Librarian, such as on staging. Don't annoy the
48+ # operator with noise in this case.
49+ log.info("%s already deleted", path)
50+
51 def __call__(self, chunksize):
52 chunksize = int(chunksize)
53
54@@ -548,40 +584,13 @@
55 SELECT content FROM UnreferencedLibraryFileContent
56 WHERE id BETWEEN %s AND %s
57 """, (self.index, self.index + chunksize - 1))
58- for content_id in (row[0] for row in cur.fetchall()):
59- removed = []
60-
61- # Remove the file from disk, if it hasn't already been.
62- path = get_file_path(content_id)
63- try:
64- os.unlink(path)
65- removed.append('filesystem')
66- except OSError as e:
67- if e.errno != errno.ENOENT:
68- raise
69-
70- # Remove the file from Swift, if it hasn't already been.
71- if self.swift_enabled:
72- container, name = swift.swift_location(content_id)
73- with swift.connection() as swift_connection:
74- try:
75- swift.quiet_swiftclient(
76- swift_connection.delete_object, container, name)
77- removed.append('Swift')
78- except swiftclient.ClientException as x:
79- if x.http_status != 404:
80- raise
81-
82- if removed:
83- log.debug3(
84- "Deleted %s from %s", content_id, ' & '.join(removed))
85-
86- elif config.librarian_server.upstream_host is None:
87- # It is normal to have files in the database that
88- # are not on disk if the Librarian has an upstream
89- # Librarian, such as on staging. Don't annoy the
90- # operator with noise in this case.
91- log.info("%s already deleted", path)
92+
93+ pool = multiprocessing.pool.ThreadPool(10)
94+ try:
95+ pool.map(self.remove_content, (row[0] for row in cur.fetchall()))
96+ finally:
97+ pool.close()
98+ pool.join()
99 self.con.rollback()
100
101 self.index += chunksize