Merge lp:~cjwatson/launchpad/codeimport-git-worker-sync-head into lp:launchpad
- codeimport-git-worker-sync-head
- Merge into devel
Proposed by
Colin Watson
Status: | Merged |
---|---|
Merged at revision: | 18259 |
Proposed branch: | lp:~cjwatson/launchpad/codeimport-git-worker-sync-head |
Merge into: | lp:launchpad |
Prerequisite: | lp:~cjwatson/launchpad/codeimport-git-dulwich-web |
Diff against target: |
454 lines (+233/-45) 4 files modified
lib/lp/codehosting/codeimport/tests/servers.py (+50/-3) lib/lp/codehosting/codeimport/tests/test_workermonitor.py (+78/-33) lib/lp/codehosting/codeimport/worker.py (+103/-9) scripts/code-import-worker.py (+2/-0) |
To merge this branch: | bzr merge lp:~cjwatson/launchpad/codeimport-git-worker-sync-head |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
William Grant | code | Approve | |
Review via email: mp+309988@code.launchpad.net |
Commit message
Fix the Git-to-Git import worker to synchronise HEAD if possible.
Description of the change
We can only synchronise HEAD if the server advertises its target, but git 1.8.4.3 or newer does so, which covers a lot of ground (notably, GitHub has a more than sufficient version).
This needs a couple of other bits:
* https:/
* git 1:1.7.9.
To post a comment you must log in.
Revision history for this message
William Grant (wgrant) : | # |
review:
Approve
(code)
Revision history for this message
Colin Watson (cjwatson) wrote : | # |
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lib/lp/codehosting/codeimport/tests/servers.py' |
2 | --- lib/lp/codehosting/codeimport/tests/servers.py 2016-11-03 11:40:11 +0000 |
3 | +++ lib/lp/codehosting/codeimport/tests/servers.py 2016-11-08 03:28:29 +0000 |
4 | @@ -15,6 +15,7 @@ |
5 | from cStringIO import StringIO |
6 | import errno |
7 | import os |
8 | +import re |
9 | import shutil |
10 | import signal |
11 | import stat |
12 | @@ -42,9 +43,13 @@ |
13 | import dulwich.index |
14 | from dulwich.objects import Blob |
15 | from dulwich.repo import Repo as GitRepo |
16 | -from dulwich.server import Backend |
17 | +from dulwich.server import ( |
18 | + Backend, |
19 | + Handler, |
20 | + ) |
21 | from dulwich.web import ( |
22 | GunzipFilter, |
23 | + handle_service_request, |
24 | HTTPGitApplication, |
25 | LimitedInputFilter, |
26 | WSGIRequestHandlerLogger, |
27 | @@ -240,13 +245,55 @@ |
28 | return GitRepo(full_path) |
29 | |
30 | |
31 | +class TurnipSetSymbolicRefHandler(Handler): |
32 | + """Dulwich protocol handler for setting a symbolic ref. |
33 | + |
34 | + Transcribed from turnip.pack.git.PackBackendProtocol. |
35 | + """ |
36 | + |
37 | + def __init__(self, backend, args, proto, http_req=None): |
38 | + super(TurnipSetSymbolicRefHandler, self).__init__( |
39 | + backend, proto, http_req=http_req) |
40 | + self.repo = backend.open_repository(args[0]) |
41 | + |
42 | + def handle(self): |
43 | + line = self.proto.read_pkt_line() |
44 | + if line is None: |
45 | + self.proto.write_pkt_line(b"ERR Invalid set-symbolic-ref-line\n") |
46 | + self.proto.write_pkt_line(None) |
47 | + return |
48 | + name, target = line.split(b" ", 1) |
49 | + if name != b"HEAD": |
50 | + self.proto.write_pkt_line( |
51 | + b'ERR Symbolic ref name must be "HEAD"\n') |
52 | + self.proto.write_pkt_line(None) |
53 | + return |
54 | + if target.startswith(b"-"): |
55 | + self.proto.write_pkt_line( |
56 | + b'ERR Symbolic ref target may not start with "-"\n') |
57 | + self.proto.write_pkt_line(None) |
58 | + return |
59 | + try: |
60 | + self.repo.refs.set_symbolic_ref(name, target) |
61 | + except Exception as e: |
62 | + self.proto.write_pkt_line(b'ERR %s\n' % e) |
63 | + else: |
64 | + self.proto.write_pkt_line(b'ACK %s\n' % name) |
65 | + self.proto.write_pkt_line(None) |
66 | + |
67 | + |
68 | class HTTPGitServerThread(threading.Thread): |
69 | """Thread that runs an HTTP Git server.""" |
70 | |
71 | def __init__(self, backend, address, port=None): |
72 | super(HTTPGitServerThread, self).__init__() |
73 | self.setName("HTTP Git server on %s:%s" % (address, port)) |
74 | - app = GunzipFilter(LimitedInputFilter(HTTPGitApplication(backend))) |
75 | + app = HTTPGitApplication( |
76 | + backend, |
77 | + handlers={'turnip-set-symbolic-ref': TurnipSetSymbolicRefHandler}) |
78 | + app.services[('POST', re.compile('/turnip-set-symbolic-ref$'))] = ( |
79 | + handle_service_request) |
80 | + app = GunzipFilter(LimitedInputFilter(app)) |
81 | self.server = make_server( |
82 | address, port, app, handler_class=WSGIRequestHandlerLogger, |
83 | server_class=WSGIServerLogger) |
84 | @@ -297,7 +344,7 @@ |
85 | |
86 | def makeRepo(self, repository_name, tree_contents): |
87 | repository_path = os.path.join(self.repository_store, repository_name) |
88 | - os.mkdir(repository_path) |
89 | + os.makedirs(repository_path) |
90 | self.createRepository(repository_path, bare=self._use_server) |
91 | repo = GitRepo(repository_path) |
92 | blobs = [ |
93 | |
94 | === modified file 'lib/lp/codehosting/codeimport/tests/test_workermonitor.py' |
95 | --- lib/lp/codehosting/codeimport/tests/test_workermonitor.py 2016-11-03 11:40:11 +0000 |
96 | +++ lib/lp/codehosting/codeimport/tests/test_workermonitor.py 2016-11-08 03:28:29 +0000 |
97 | @@ -20,6 +20,7 @@ |
98 | TestCase as BzrTestCase, |
99 | TestCaseInTempDir, |
100 | ) |
101 | +from dulwich.repo import Repo as GitRepo |
102 | import oops_twisted |
103 | from testtools.deferredruntest import ( |
104 | assert_fails_with, |
105 | @@ -775,6 +776,27 @@ |
106 | self.addCleanup(clean_up_default_stores_for_import, source_details) |
107 | return job |
108 | |
109 | + def makeTargetGitServer(self): |
110 | + """Set up a target Git server that can receive imports.""" |
111 | + self.target_store = tempfile.mkdtemp() |
112 | + self.addCleanup(shutil.rmtree, self.target_store) |
113 | + self.target_git_server = GitServer(self.target_store, use_server=True) |
114 | + self.target_git_server.start_server() |
115 | + self.addCleanup(self.target_git_server.stop_server) |
116 | + config_name = self.getUniqueString() |
117 | + config_fixture = self.useFixture(ConfigFixture( |
118 | + config_name, self.layer.config_fixture.instance_name)) |
119 | + setting_lines = [ |
120 | + "[codehosting]", |
121 | + "git_browse_root: %s" % self.target_git_server.get_url(""), |
122 | + "", |
123 | + "[codeimport]", |
124 | + "macaroon_secret_key: some-secret", |
125 | + ] |
126 | + config_fixture.add_section("\n" + "\n".join(setting_lines)) |
127 | + self.useFixture(ConfigUseFixture(config_name)) |
128 | + self.useFixture(GitHostingFixture()) |
129 | + |
130 | def assertCodeImportResultCreated(self, code_import): |
131 | """Assert that a `CodeImportResult` was created for `code_import`.""" |
132 | self.assertEqual(len(list(code_import.results)), 1) |
133 | @@ -796,7 +818,7 @@ |
134 | cwd=repo_path, universal_newlines=True)) |
135 | self.assertEqual(self.foreign_commit_count, commit_count) |
136 | |
137 | - def assertImported(self, ignored, code_import_id): |
138 | + def assertImported(self, code_import_id): |
139 | """Assert that the `CodeImport` of the given id was imported.""" |
140 | # In the in-memory tests, check that resetTimeout on the |
141 | # CodeImportWorkerMonitorProtocol was called at least once. |
142 | @@ -832,53 +854,40 @@ |
143 | |
144 | return deferred.addBoth(save_protocol_object) |
145 | |
146 | + @defer.inlineCallbacks |
147 | def test_import_cvs(self): |
148 | # Create a CVS CodeImport and import it. |
149 | job = self.getStartedJobForImport(self.makeCVSCodeImport()) |
150 | code_import_id = job.code_import.id |
151 | job_id = job.id |
152 | self.layer.txn.commit() |
153 | - result = self.performImport(job_id) |
154 | - return result.addCallback(self.assertImported, code_import_id) |
155 | + yield self.performImport(job_id) |
156 | + self.assertImported(code_import_id) |
157 | |
158 | + @defer.inlineCallbacks |
159 | def test_import_subversion(self): |
160 | # Create a Subversion CodeImport and import it. |
161 | job = self.getStartedJobForImport(self.makeSVNCodeImport()) |
162 | code_import_id = job.code_import.id |
163 | job_id = job.id |
164 | self.layer.txn.commit() |
165 | - result = self.performImport(job_id) |
166 | - return result.addCallback(self.assertImported, code_import_id) |
167 | + yield self.performImport(job_id) |
168 | + self.assertImported(code_import_id) |
169 | |
170 | + @defer.inlineCallbacks |
171 | def test_import_git(self): |
172 | # Create a Git CodeImport and import it. |
173 | job = self.getStartedJobForImport(self.makeGitCodeImport()) |
174 | code_import_id = job.code_import.id |
175 | job_id = job.id |
176 | self.layer.txn.commit() |
177 | - result = self.performImport(job_id) |
178 | - return result.addCallback(self.assertImported, code_import_id) |
179 | + yield self.performImport(job_id) |
180 | + self.assertImported(code_import_id) |
181 | |
182 | + @defer.inlineCallbacks |
183 | def test_import_git_to_git(self): |
184 | # Create a Git-to-Git CodeImport and import it. |
185 | - self.target_store = tempfile.mkdtemp() |
186 | - self.addCleanup(shutil.rmtree, self.target_store) |
187 | - target_git_server = GitServer(self.target_store, use_server=True) |
188 | - target_git_server.start_server() |
189 | - self.addCleanup(target_git_server.stop_server) |
190 | - config_name = self.getUniqueString() |
191 | - config_fixture = self.useFixture(ConfigFixture( |
192 | - config_name, self.layer.config_fixture.instance_name)) |
193 | - setting_lines = [ |
194 | - "[codehosting]", |
195 | - "git_browse_root: %s" % target_git_server.get_url(""), |
196 | - "", |
197 | - "[codeimport]", |
198 | - "macaroon_secret_key: some-secret", |
199 | - ] |
200 | - config_fixture.add_section("\n" + "\n".join(setting_lines)) |
201 | - self.useFixture(ConfigUseFixture(config_name)) |
202 | - self.useFixture(GitHostingFixture()) |
203 | + self.makeTargetGitServer() |
204 | job = self.getStartedJobForImport(self.makeGitCodeImport( |
205 | target_rcs_type=TargetRevisionControlSystems.GIT)) |
206 | code_import_id = job.code_import.id |
207 | @@ -887,27 +896,63 @@ |
208 | target_repo_path = os.path.join( |
209 | self.target_store, job.code_import.target.unique_name) |
210 | os.makedirs(target_repo_path) |
211 | - target_git_server.createRepository(target_repo_path, bare=True) |
212 | - result = self.performImport(job_id) |
213 | - return result.addCallback(self.assertImported, code_import_id) |
214 | - |
215 | + self.target_git_server.createRepository(target_repo_path, bare=True) |
216 | + yield self.performImport(job_id) |
217 | + self.assertImported(code_import_id) |
218 | + target_repo = GitRepo(target_repo_path) |
219 | + self.assertContentEqual( |
220 | + ["heads/master"], target_repo.refs.keys(base="refs")) |
221 | + self.assertEqual( |
222 | + "ref: refs/heads/master", target_repo.refs.read_ref("HEAD")) |
223 | + |
224 | + @defer.inlineCallbacks |
225 | + def test_import_git_to_git_refs_changed(self): |
226 | + # Create a Git-to-Git CodeImport and import it incrementally with |
227 | + # ref and HEAD changes. |
228 | + self.makeTargetGitServer() |
229 | + job = self.getStartedJobForImport(self.makeGitCodeImport( |
230 | + target_rcs_type=TargetRevisionControlSystems.GIT)) |
231 | + code_import_id = job.code_import.id |
232 | + job_id = job.id |
233 | + self.layer.txn.commit() |
234 | + source_repo = GitRepo(os.path.join(self.repo_path, "source")) |
235 | + commit = source_repo.refs["refs/heads/master"] |
236 | + source_repo.refs["refs/heads/one"] = commit |
237 | + source_repo.refs["refs/heads/two"] = commit |
238 | + source_repo.refs.set_symbolic_ref("HEAD", "refs/heads/one") |
239 | + del source_repo.refs["refs/heads/master"] |
240 | + target_repo_path = os.path.join( |
241 | + self.target_store, job.code_import.target.unique_name) |
242 | + self.target_git_server.makeRepo( |
243 | + job.code_import.target.unique_name, [("NEWS", "contents")]) |
244 | + yield self.performImport(job_id) |
245 | + self.assertImported(code_import_id) |
246 | + target_repo = GitRepo(target_repo_path) |
247 | + self.assertContentEqual( |
248 | + ["heads/one", "heads/two"], target_repo.refs.keys(base="refs")) |
249 | + self.assertEqual( |
250 | + "ref: refs/heads/one", |
251 | + GitRepo(target_repo_path).refs.read_ref("HEAD")) |
252 | + |
253 | + @defer.inlineCallbacks |
254 | def test_import_bzr(self): |
255 | # Create a Bazaar CodeImport and import it. |
256 | job = self.getStartedJobForImport(self.makeBzrCodeImport()) |
257 | code_import_id = job.code_import.id |
258 | job_id = job.id |
259 | self.layer.txn.commit() |
260 | - result = self.performImport(job_id) |
261 | - return result.addCallback(self.assertImported, code_import_id) |
262 | + yield self.performImport(job_id) |
263 | + self.assertImported(code_import_id) |
264 | |
265 | + @defer.inlineCallbacks |
266 | def test_import_bzrsvn(self): |
267 | # Create a Subversion-via-bzr-svn CodeImport and import it. |
268 | job = self.getStartedJobForImport(self.makeBzrSvnCodeImport()) |
269 | code_import_id = job.code_import.id |
270 | job_id = job.id |
271 | self.layer.txn.commit() |
272 | - result = self.performImport(job_id) |
273 | - return result.addCallback(self.assertImported, code_import_id) |
274 | + yield self.performImport(job_id) |
275 | + self.assertImported(code_import_id) |
276 | |
277 | |
278 | class DeferredOnExit(protocol.ProcessProtocol): |
279 | |
280 | === modified file 'lib/lp/codehosting/codeimport/worker.py' |
281 | --- lib/lp/codehosting/codeimport/worker.py 2016-11-03 11:40:11 +0000 |
282 | +++ lib/lp/codehosting/codeimport/worker.py 2016-11-08 03:28:29 +0000 |
283 | @@ -20,6 +20,7 @@ |
284 | ] |
285 | |
286 | |
287 | +import io |
288 | import os |
289 | import shutil |
290 | import subprocess |
291 | @@ -61,6 +62,11 @@ |
292 | import cscvs |
293 | from cscvs.cmds import totla |
294 | import CVS |
295 | +from dulwich.errors import GitProtocolError |
296 | +from dulwich.protocol import ( |
297 | + pkt_line, |
298 | + Protocol, |
299 | + ) |
300 | from lazr.uri import ( |
301 | InvalidURIError, |
302 | URI, |
303 | @@ -93,6 +99,7 @@ |
304 | from lp.services.config import config |
305 | from lp.services.macaroons.interfaces import IMacaroonIssuer |
306 | from lp.services.propertycache import cachedproperty |
307 | +from lp.services.timeout import urlfetch |
308 | from lp.services.utils import sanitise_urls |
309 | |
310 | |
311 | @@ -1000,6 +1007,78 @@ |
312 | if retcode: |
313 | raise subprocess.CalledProcessError(retcode, cmd) |
314 | |
315 | + def _getHead(self, repository, remote_name): |
316 | + """Get HEAD from a configured remote in a local repository. |
317 | + |
318 | + The returned ref name will be adjusted in such a way that it can be |
319 | + passed to `_setHead` (e.g. refs/remotes/origin/master -> |
320 | + refs/heads/master). |
321 | + """ |
322 | + # This is a bit weird, but set-head will bail out if the target |
323 | + # doesn't exist in the correct remotes namespace. git 2.8.0 has |
324 | + # "git ls-remote --symref <repository> HEAD" which would involve |
325 | + # less juggling. |
326 | + self._runGit( |
327 | + "fetch", "-q", ".", "refs/heads/*:refs/remotes/%s/*" % remote_name, |
328 | + cwd=repository) |
329 | + self._runGit( |
330 | + "remote", "set-head", remote_name, "--auto", cwd=repository) |
331 | + ref_prefix = "refs/remotes/%s/" % remote_name |
332 | + target_ref = subprocess.check_output( |
333 | + ["git", "symbolic-ref", ref_prefix + "HEAD"], |
334 | + cwd=repository, universal_newlines=True).rstrip("\n") |
335 | + if not target_ref.startswith(ref_prefix): |
336 | + raise GitProtocolError( |
337 | + "'git remote set-head %s --auto' did not leave remote HEAD " |
338 | + "under %s" % (remote_name, ref_prefix)) |
339 | + real_target_ref = "refs/heads/" + target_ref[len(ref_prefix):] |
340 | + # Ensure the result is a valid ref name, just in case. |
341 | + self._runGit("check-ref-format", real_target_ref, cwd="repository") |
342 | + return real_target_ref |
343 | + |
344 | + def _setHead(self, target_url, target_ref): |
345 | + """Set HEAD on a remote repository. |
346 | + |
347 | + This relies on the turnip-set-symbolic-ref extension. |
348 | + """ |
349 | + service = "turnip-set-symbolic-ref" |
350 | + url = urljoin(target_url, service) |
351 | + headers = { |
352 | + "Content-Type": "application/x-%s-request" % service, |
353 | + } |
354 | + body = pkt_line("HEAD %s" % target_ref) + pkt_line(None) |
355 | + try: |
356 | + response = urlfetch(url, method="POST", headers=headers, data=body) |
357 | + response.raise_for_status() |
358 | + except Exception as e: |
359 | + raise GitProtocolError(str(e)) |
360 | + content_type = response.headers.get("Content-Type") |
361 | + if content_type != ("application/x-%s-result" % service): |
362 | + raise GitProtocolError( |
363 | + "Invalid Content-Type from server: %s" % content_type) |
364 | + content = io.BytesIO(response.content) |
365 | + proto = Protocol(content.read, None) |
366 | + pkts = list(proto.read_pkt_seq()) |
367 | + if len(pkts) == 1 and pkts[0].rstrip(b"\n") == b"ACK HEAD": |
368 | + pass |
369 | + elif pkts and pkts[0].startswith(b"ERR "): |
370 | + raise GitProtocolError( |
371 | + pkts[0][len(b"ERR "):].rstrip(b"\n").decode("UTF-8")) |
372 | + else: |
373 | + raise GitProtocolError("Unexpected packets %r from server" % pkts) |
374 | + |
375 | + def _deleteRefs(self, repository, pattern): |
376 | + """Delete all refs in `repository` matching `pattern`.""" |
377 | + # XXX cjwatson 2016-11-08: We might ideally use something like: |
378 | + # "git for-each-ref --format='delete %(refname)%00%(objectname)%00' \ |
379 | + # <pattern> | git update-ref --stdin -z |
380 | + # ... which would be faster, but that requires git 1.8.5. |
381 | + remote_refs = subprocess.check_output( |
382 | + ["git", "for-each-ref", "--format=%(refname)", pattern], |
383 | + cwd="repository").splitlines() |
384 | + for remote_ref in remote_refs: |
385 | + self._runGit("update-ref", "-d", remote_ref, cwd="repository") |
386 | + |
387 | def _doImport(self): |
388 | self._logger.info("Starting job.") |
389 | try: |
390 | @@ -1031,20 +1110,35 @@ |
391 | try: |
392 | self._runGit("config", "gc.auto", "0", cwd="repository") |
393 | self._runGit( |
394 | - "fetch", "--prune", self.source_details.url, "+refs/*:refs/*", |
395 | - cwd="repository") |
396 | - # We should sync the remote HEAD as well. This involves |
397 | - # considerable gymnastics. "git remote set-head --auto" does it |
398 | - # if we can arrange a suitable temporary remote, or git 2.8.0 |
399 | - # has "git ls-remote --symref <repository> HEAD". We then also |
400 | - # need to set Launchpad's idea of HEAD, which is fiddly from an |
401 | - # import worker. For now we leave it at the default and trust |
402 | - # that we'll be able to fix things up later. |
403 | + "remote", "add", "source", self.source_details.url, |
404 | + cwd="repository") |
405 | + self._runGit( |
406 | + "fetch", "--prune", "source", "+refs/*:refs/*", |
407 | + cwd="repository") |
408 | + try: |
409 | + new_head = self._getHead("repository", "source") |
410 | + except (subprocess.CalledProcessError, GitProtocolError) as e2: |
411 | + self._logger.info("Unable to fetch default branch: %s" % e2) |
412 | + new_head = None |
413 | + self._runGit("remote", "rm", "source", cwd="repository") |
414 | + # XXX cjwatson 2016-11-03: For some reason "git remote rm" |
415 | + # doesn't actually remove the refs. |
416 | + self._deleteRefs("repository", "refs/remotes/source/*") |
417 | except subprocess.CalledProcessError as e: |
418 | self._logger.info("Unable to fetch remote repository: %s" % e) |
419 | return CodeImportWorkerExitCode.FAILURE_INVALID |
420 | self._logger.info("Pushing repository to hosting service.") |
421 | try: |
422 | + if new_head is not None: |
423 | + # Push the target of HEAD first to ensure that it is always |
424 | + # available. |
425 | + self._runGit( |
426 | + "push", target_url, "+%s:%s" % (new_head, new_head), |
427 | + cwd="repository") |
428 | + try: |
429 | + self._setHead(target_url, new_head) |
430 | + except GitProtocolError as e: |
431 | + self._logger.info("Unable to set default branch: %s" % e) |
432 | self._runGit("push", "--mirror", target_url, cwd="repository") |
433 | except subprocess.CalledProcessError as e: |
434 | self._logger.info( |
435 | |
436 | === modified file 'scripts/code-import-worker.py' |
437 | --- scripts/code-import-worker.py 2016-10-11 15:28:25 +0000 |
438 | +++ scripts/code-import-worker.py 2016-11-08 03:28:29 +0000 |
439 | @@ -35,6 +35,7 @@ |
440 | from lp.codehosting.safe_open import AcceptAnythingPolicy |
441 | from lp.services import scripts |
442 | from lp.services.config import config |
443 | +from lp.services.timeout import set_default_timeout_function |
444 | |
445 | |
446 | opener_policies = { |
447 | @@ -71,6 +72,7 @@ |
448 | |
449 | def main(self): |
450 | force_bzr_to_use_urllib() |
451 | + set_default_timeout_function(lambda: 60.0) |
452 | source_details = CodeImportSourceDetails.fromArguments(self.args) |
453 | if source_details.rcstype == 'git': |
454 | if source_details.target_rcstype == 'bzr': |
Thanks, I've addressed all the comments and copied the git package to the Launchpad PPA. I'll hold off on actually landing this until the turnip MP is reviewed.