Merge lp:~cjwatson/launchpad/bzr-svn-worker-store-cache into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18874
Proposed branch: lp:~cjwatson/launchpad/bzr-svn-worker-store-cache
Merge into: lp:launchpad
Diff against target: 184 lines (+93/-9)
3 files modified
lib/lp/codehosting/codeimport/tarball.py (+7/-4)
lib/lp/codehosting/codeimport/tests/test_worker.py (+46/-1)
lib/lp/codehosting/codeimport/worker.py (+40/-4)
To merge this branch: bzr merge lp:~cjwatson/launchpad/bzr-svn-worker-store-cache
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+362757@code.launchpad.net

Commit message

Store bzr-svn's cache in the import data store.

Description of the change

We really shouldn't be storing persistent state (even if only caches) on individual importds. The last time we brought up new importds, they took ages to clear their initial queue because they had to populate ~/.bazaar/svn-cache/ first, unlike bzr-git where we push the cache to the import data store. This is a bit of a trade-off because it'll mean transferring the full cache for a given repository at the start and end of each import, but I think that's better than sometimes hammering the remote repository if a particular importd happens to be lacking the cache.

The last figure I saw for the total size of the cache across all imports was 42G or so, which isn't quite trivial, but maple seems to have plenty of space so it shouldn't be a problem.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/codehosting/codeimport/tarball.py'
2--- lib/lp/codehosting/codeimport/tarball.py 2009-06-25 04:06:00 +0000
3+++ lib/lp/codehosting/codeimport/tarball.py 2019-02-06 00:49:40 +0000
4@@ -1,4 +1,4 @@
5-# Copyright 2009 Canonical Ltd. This software is licensed under the
6+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
7 # GNU Affero General Public License version 3 (see the file LICENSE).
8
9 """Create and extract tarballs."""
10@@ -33,19 +33,22 @@
11 raise TarError(retcode)
12
13
14-def create_tarball(directory, tarball_name):
15+def create_tarball(directory, tarball_name, filenames=None):
16 """Create a tarball of `directory` called `tarball_name`.
17
18 This creates a tarball of `directory` from its parent directory. This
19 means that when untarred, it will create a new directory with the same
20- name as `directory`.
21+ name as `directory`. If `filenames` is not None, then the tarball will
22+ be limited to that list of directory entries under `directory`.
23
24 Basically, this is the standard way of making tarballs.
25 """
26 if not os.path.isdir(directory):
27 raise NotADirectory(directory)
28+ if filenames is None:
29+ filenames = ['.']
30 retcode = subprocess.call(
31- ['tar', '-C', directory, '-czf', tarball_name, '.'])
32+ ['tar', '-C', directory, '-czf', tarball_name] + filenames)
33 _check_tar_retcode(retcode)
34
35
36
37=== modified file 'lib/lp/codehosting/codeimport/tests/test_worker.py'
38--- lib/lp/codehosting/codeimport/tests/test_worker.py 2018-08-16 12:37:47 +0000
39+++ lib/lp/codehosting/codeimport/tests/test_worker.py 2019-02-06 00:49:40 +0000
40@@ -1,4 +1,4 @@
41-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
42+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
43 # GNU Affero General Public License version 3 (see the file LICENSE).
44
45 """Tests for the code import worker."""
46@@ -11,6 +11,7 @@
47 import subprocess
48 import tempfile
49 import time
50+from uuid import uuid4
51
52 from bzrlib import (
53 trace,
54@@ -1230,6 +1231,50 @@
55 self.bazaar_store, logging.getLogger(),
56 opener_policy=opener_policy)
57
58+ def test_pushBazaarBranch_saves_bzr_svn_cache(self):
59+ # BzrSvnImportWorker.pushBazaarBranch saves a tarball of the bzr-svn
60+ # cache in the worker's ImportDataStore.
61+ from bzrlib.plugins.svn.cache import get_cache
62+ worker = self.makeImportWorker(self.makeSourceDetails(
63+ 'trunk', [('README', 'Original contents')]),
64+ opener_policy=AcceptAnythingPolicy())
65+ uuid = subvertpy.ra.RemoteAccess(worker.source_details.url).get_uuid()
66+ cache_dir = get_cache(uuid).create_cache_dir()
67+ cache_dir_contents = os.listdir(cache_dir)
68+ self.assertNotEqual([], cache_dir_contents)
69+ opener = SafeBranchOpener(worker._opener_policy, worker.probers)
70+ remote_branch = opener.open(worker.source_details.url)
71+ worker.pushBazaarBranch(
72+ self.make_branch('.'), remote_branch=remote_branch)
73+ worker.import_data_store.fetch('svn-cache.tar.gz')
74+ extract_tarball('svn-cache.tar.gz', '.')
75+ self.assertContentEqual(cache_dir_contents, os.listdir(uuid))
76+
77+ def test_getBazaarBranch_fetches_bzr_svn_cache(self):
78+ # BzrSvnImportWorker.getBazaarBranch fetches the tarball of the
79+ # bzr-svn cache from the worker's ImportDataStore and expands it
80+ # into the appropriate cache directory.
81+ from bzrlib.plugins.svn.cache import get_cache
82+ worker = self.makeImportWorker(self.makeSourceDetails(
83+ 'trunk', [('README', 'Original contents')]),
84+ opener_policy=AcceptAnythingPolicy())
85+ # Store a tarred-up cache in the store.
86+ content = self.factory.getUniqueString()
87+ uuid = str(uuid4())
88+ os.makedirs(os.path.join('cache', uuid))
89+ with open(os.path.join('cache', uuid, 'svn-cache'), 'w') as cache_file:
90+ cache_file.write(content)
91+ create_tarball('cache', 'svn-cache.tar.gz', filenames=[uuid])
92+ worker.import_data_store.put('svn-cache.tar.gz')
93+ # Make sure there's a Bazaar branch in the branch store.
94+ branch = self.make_branch('branch')
95+ ToBzrImportWorker.pushBazaarBranch(worker, branch)
96+ # Finally, fetching the tree gets the cache too.
97+ worker.getBazaarBranch()
98+ cache_dir = get_cache(uuid).create_cache_dir()
99+ with open(os.path.join(cache_dir, 'svn-cache')) as cache_file:
100+ self.assertEqual(content, cache_file.read())
101+
102
103 class TestBzrImport(WorkerTest, TestActualImportMixin,
104 PullingImportWorkerTests):
105
106=== modified file 'lib/lp/codehosting/codeimport/worker.py'
107--- lib/lp/codehosting/codeimport/worker.py 2018-08-16 12:37:47 +0000
108+++ lib/lp/codehosting/codeimport/worker.py 2019-02-06 00:49:40 +0000
109@@ -1,4 +1,4 @@
110-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
111+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
112 # GNU Affero General Public License version 3 (see the file LICENSE).
113
114 """The code import worker. This imports code from foreign repositories."""
115@@ -600,7 +600,7 @@
116 self.required_format, self.needs_bzr_tree,
117 stacked_on_url=self.source_details.stacked_on_url)
118
119- def pushBazaarBranch(self, bazaar_branch):
120+ def pushBazaarBranch(self, bazaar_branch, remote_branch=None):
121 """Push the updated Bazaar branch to the server.
122
123 :return: True if revisions were transferred.
124@@ -778,7 +778,7 @@
125 else:
126 raise
127 self._logger.info("Pushing local import branch to central store.")
128- self.pushBazaarBranch(bazaar_branch)
129+ self.pushBazaarBranch(bazaar_branch, remote_branch=remote_branch)
130 self._logger.info("Job complete.")
131 return result
132 finally:
133@@ -844,7 +844,7 @@
134 extract_tarball(local_name, git_db_dir)
135 return branch
136
137- def pushBazaarBranch(self, bazaar_branch):
138+ def pushBazaarBranch(self, bazaar_branch, remote_branch=None):
139 """See `ToBzrImportWorker.pushBazaarBranch`.
140
141 In addition to the superclass' behaviour, we store bzr-git's cache
142@@ -894,6 +894,42 @@
143 from bzrlib.plugins.svn import SvnRemoteProber
144 return [SvnRemoteProber]
145
146+ def getBazaarBranch(self):
147+ """See `ToBzrImportWorker.getBazaarBranch`.
148+
149+ In addition to the superclass' behaviour, we retrieve bzr-svn's
150+ cache from the import data store and put it where bzr-svn will find
151+ it.
152+ """
153+ from bzrlib.plugins.svn.cache import create_cache_dir
154+ branch = super(BzrSvnImportWorker, self).getBazaarBranch()
155+ local_name = 'svn-cache.tar.gz'
156+ if self.import_data_store.fetch(local_name):
157+ extract_tarball(local_name, create_cache_dir())
158+ return branch
159+
160+ def pushBazaarBranch(self, bazaar_branch, remote_branch=None):
161+ """See `ToBzrImportWorker.pushBazaarBranch`.
162+
163+ In addition to the superclass' behaviour, we store bzr-svn's cache
164+ directory in the import data store.
165+ """
166+ from bzrlib.plugins.svn.cache import get_cache
167+ non_trivial = super(BzrSvnImportWorker, self).pushBazaarBranch(
168+ bazaar_branch)
169+ if remote_branch is not None:
170+ cache = get_cache(remote_branch.repository.uuid)
171+ cache_dir = cache.create_cache_dir()
172+ local_name = 'svn-cache.tar.gz'
173+ create_tarball(
174+ os.path.dirname(cache_dir), local_name,
175+ filenames=[os.path.basename(cache_dir)])
176+ self.import_data_store.put(local_name)
177+ # XXX cjwatson 2019-02-06: Once this is behaving well on
178+ # production, consider removing the local cache after pushing a
179+ # copy of it to the import data store.
180+ return non_trivial
181+
182
183 class BzrImportWorker(PullingImportWorker):
184 """An import worker for importing Bazaar branches."""