Merge lp:~cjwatson/launchpad/codeimport-git-worker-sync-head into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18259
Proposed branch: lp:~cjwatson/launchpad/codeimport-git-worker-sync-head
Merge into: lp:launchpad
Prerequisite: lp:~cjwatson/launchpad/codeimport-git-dulwich-web
Diff against target: 454 lines (+233/-45)
4 files modified
lib/lp/codehosting/codeimport/tests/servers.py (+50/-3)
lib/lp/codehosting/codeimport/tests/test_workermonitor.py (+78/-33)
lib/lp/codehosting/codeimport/worker.py (+103/-9)
scripts/code-import-worker.py (+2/-0)
To merge this branch: bzr merge lp:~cjwatson/launchpad/codeimport-git-worker-sync-head
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+309988@code.launchpad.net

Commit message

Fix the Git-to-Git import worker to synchronise HEAD if possible.

Description of the change

We can only synchronise HEAD if the server advertises its target, but git 1.8.4.3 or newer does so, which covers a lot of ground (notably, GitHub has a more than sufficient version).

This needs a couple of other bits:

 * https://code.launchpad.net/~cjwatson/turnip/+git/turnip/+merge/309702
 * git 1:1.7.9.5-1ubuntu0.3ppa2 from https://launchpad.net/~cjwatson/+archive/ubuntu/launchpad/+packages to make upload-pack advertise the target of HEAD (only needed in tests, so this just needs to be in the Launchpad PPA).

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)
Revision history for this message
Colin Watson (cjwatson) wrote :

Thanks, I've addressed all the comments and copied the git package to the Launchpad PPA. I'll hold off on actually landing this until the turnip MP is reviewed.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/codehosting/codeimport/tests/servers.py'
2--- lib/lp/codehosting/codeimport/tests/servers.py 2016-11-03 11:40:11 +0000
3+++ lib/lp/codehosting/codeimport/tests/servers.py 2016-11-08 03:28:29 +0000
4@@ -15,6 +15,7 @@
5 from cStringIO import StringIO
6 import errno
7 import os
8+import re
9 import shutil
10 import signal
11 import stat
12@@ -42,9 +43,13 @@
13 import dulwich.index
14 from dulwich.objects import Blob
15 from dulwich.repo import Repo as GitRepo
16-from dulwich.server import Backend
17+from dulwich.server import (
18+ Backend,
19+ Handler,
20+ )
21 from dulwich.web import (
22 GunzipFilter,
23+ handle_service_request,
24 HTTPGitApplication,
25 LimitedInputFilter,
26 WSGIRequestHandlerLogger,
27@@ -240,13 +245,55 @@
28 return GitRepo(full_path)
29
30
31+class TurnipSetSymbolicRefHandler(Handler):
32+ """Dulwich protocol handler for setting a symbolic ref.
33+
34+ Transcribed from turnip.pack.git.PackBackendProtocol.
35+ """
36+
37+ def __init__(self, backend, args, proto, http_req=None):
38+ super(TurnipSetSymbolicRefHandler, self).__init__(
39+ backend, proto, http_req=http_req)
40+ self.repo = backend.open_repository(args[0])
41+
42+ def handle(self):
43+ line = self.proto.read_pkt_line()
44+ if line is None:
45+ self.proto.write_pkt_line(b"ERR Invalid set-symbolic-ref-line\n")
46+ self.proto.write_pkt_line(None)
47+ return
48+ name, target = line.split(b" ", 1)
49+ if name != b"HEAD":
50+ self.proto.write_pkt_line(
51+ b'ERR Symbolic ref name must be "HEAD"\n')
52+ self.proto.write_pkt_line(None)
53+ return
54+ if target.startswith(b"-"):
55+ self.proto.write_pkt_line(
56+ b'ERR Symbolic ref target may not start with "-"\n')
57+ self.proto.write_pkt_line(None)
58+ return
59+ try:
60+ self.repo.refs.set_symbolic_ref(name, target)
61+ except Exception as e:
62+ self.proto.write_pkt_line(b'ERR %s\n' % e)
63+ else:
64+ self.proto.write_pkt_line(b'ACK %s\n' % name)
65+ self.proto.write_pkt_line(None)
66+
67+
68 class HTTPGitServerThread(threading.Thread):
69 """Thread that runs an HTTP Git server."""
70
71 def __init__(self, backend, address, port=None):
72 super(HTTPGitServerThread, self).__init__()
73 self.setName("HTTP Git server on %s:%s" % (address, port))
74- app = GunzipFilter(LimitedInputFilter(HTTPGitApplication(backend)))
75+ app = HTTPGitApplication(
76+ backend,
77+ handlers={'turnip-set-symbolic-ref': TurnipSetSymbolicRefHandler})
78+ app.services[('POST', re.compile('/turnip-set-symbolic-ref$'))] = (
79+ handle_service_request)
80+ app = GunzipFilter(LimitedInputFilter(app))
81 self.server = make_server(
82 address, port, app, handler_class=WSGIRequestHandlerLogger,
83 server_class=WSGIServerLogger)
84@@ -297,7 +344,7 @@
85
86 def makeRepo(self, repository_name, tree_contents):
87 repository_path = os.path.join(self.repository_store, repository_name)
88- os.mkdir(repository_path)
89+ os.makedirs(repository_path)
90 self.createRepository(repository_path, bare=self._use_server)
91 repo = GitRepo(repository_path)
92 blobs = [
93
94=== modified file 'lib/lp/codehosting/codeimport/tests/test_workermonitor.py'
95--- lib/lp/codehosting/codeimport/tests/test_workermonitor.py 2016-11-03 11:40:11 +0000
96+++ lib/lp/codehosting/codeimport/tests/test_workermonitor.py 2016-11-08 03:28:29 +0000
97@@ -20,6 +20,7 @@
98 TestCase as BzrTestCase,
99 TestCaseInTempDir,
100 )
101+from dulwich.repo import Repo as GitRepo
102 import oops_twisted
103 from testtools.deferredruntest import (
104 assert_fails_with,
105@@ -775,6 +776,27 @@
106 self.addCleanup(clean_up_default_stores_for_import, source_details)
107 return job
108
109+ def makeTargetGitServer(self):
110+ """Set up a target Git server that can receive imports."""
111+ self.target_store = tempfile.mkdtemp()
112+ self.addCleanup(shutil.rmtree, self.target_store)
113+ self.target_git_server = GitServer(self.target_store, use_server=True)
114+ self.target_git_server.start_server()
115+ self.addCleanup(self.target_git_server.stop_server)
116+ config_name = self.getUniqueString()
117+ config_fixture = self.useFixture(ConfigFixture(
118+ config_name, self.layer.config_fixture.instance_name))
119+ setting_lines = [
120+ "[codehosting]",
121+ "git_browse_root: %s" % self.target_git_server.get_url(""),
122+ "",
123+ "[codeimport]",
124+ "macaroon_secret_key: some-secret",
125+ ]
126+ config_fixture.add_section("\n" + "\n".join(setting_lines))
127+ self.useFixture(ConfigUseFixture(config_name))
128+ self.useFixture(GitHostingFixture())
129+
130 def assertCodeImportResultCreated(self, code_import):
131 """Assert that a `CodeImportResult` was created for `code_import`."""
132 self.assertEqual(len(list(code_import.results)), 1)
133@@ -796,7 +818,7 @@
134 cwd=repo_path, universal_newlines=True))
135 self.assertEqual(self.foreign_commit_count, commit_count)
136
137- def assertImported(self, ignored, code_import_id):
138+ def assertImported(self, code_import_id):
139 """Assert that the `CodeImport` of the given id was imported."""
140 # In the in-memory tests, check that resetTimeout on the
141 # CodeImportWorkerMonitorProtocol was called at least once.
142@@ -832,53 +854,40 @@
143
144 return deferred.addBoth(save_protocol_object)
145
146+ @defer.inlineCallbacks
147 def test_import_cvs(self):
148 # Create a CVS CodeImport and import it.
149 job = self.getStartedJobForImport(self.makeCVSCodeImport())
150 code_import_id = job.code_import.id
151 job_id = job.id
152 self.layer.txn.commit()
153- result = self.performImport(job_id)
154- return result.addCallback(self.assertImported, code_import_id)
155+ yield self.performImport(job_id)
156+ self.assertImported(code_import_id)
157
158+ @defer.inlineCallbacks
159 def test_import_subversion(self):
160 # Create a Subversion CodeImport and import it.
161 job = self.getStartedJobForImport(self.makeSVNCodeImport())
162 code_import_id = job.code_import.id
163 job_id = job.id
164 self.layer.txn.commit()
165- result = self.performImport(job_id)
166- return result.addCallback(self.assertImported, code_import_id)
167+ yield self.performImport(job_id)
168+ self.assertImported(code_import_id)
169
170+ @defer.inlineCallbacks
171 def test_import_git(self):
172 # Create a Git CodeImport and import it.
173 job = self.getStartedJobForImport(self.makeGitCodeImport())
174 code_import_id = job.code_import.id
175 job_id = job.id
176 self.layer.txn.commit()
177- result = self.performImport(job_id)
178- return result.addCallback(self.assertImported, code_import_id)
179+ yield self.performImport(job_id)
180+ self.assertImported(code_import_id)
181
182+ @defer.inlineCallbacks
183 def test_import_git_to_git(self):
184 # Create a Git-to-Git CodeImport and import it.
185- self.target_store = tempfile.mkdtemp()
186- self.addCleanup(shutil.rmtree, self.target_store)
187- target_git_server = GitServer(self.target_store, use_server=True)
188- target_git_server.start_server()
189- self.addCleanup(target_git_server.stop_server)
190- config_name = self.getUniqueString()
191- config_fixture = self.useFixture(ConfigFixture(
192- config_name, self.layer.config_fixture.instance_name))
193- setting_lines = [
194- "[codehosting]",
195- "git_browse_root: %s" % target_git_server.get_url(""),
196- "",
197- "[codeimport]",
198- "macaroon_secret_key: some-secret",
199- ]
200- config_fixture.add_section("\n" + "\n".join(setting_lines))
201- self.useFixture(ConfigUseFixture(config_name))
202- self.useFixture(GitHostingFixture())
203+ self.makeTargetGitServer()
204 job = self.getStartedJobForImport(self.makeGitCodeImport(
205 target_rcs_type=TargetRevisionControlSystems.GIT))
206 code_import_id = job.code_import.id
207@@ -887,27 +896,63 @@
208 target_repo_path = os.path.join(
209 self.target_store, job.code_import.target.unique_name)
210 os.makedirs(target_repo_path)
211- target_git_server.createRepository(target_repo_path, bare=True)
212- result = self.performImport(job_id)
213- return result.addCallback(self.assertImported, code_import_id)
214-
215+ self.target_git_server.createRepository(target_repo_path, bare=True)
216+ yield self.performImport(job_id)
217+ self.assertImported(code_import_id)
218+ target_repo = GitRepo(target_repo_path)
219+ self.assertContentEqual(
220+ ["heads/master"], target_repo.refs.keys(base="refs"))
221+ self.assertEqual(
222+ "ref: refs/heads/master", target_repo.refs.read_ref("HEAD"))
223+
224+ @defer.inlineCallbacks
225+ def test_import_git_to_git_refs_changed(self):
226+ # Create a Git-to-Git CodeImport and import it incrementally with
227+ # ref and HEAD changes.
228+ self.makeTargetGitServer()
229+ job = self.getStartedJobForImport(self.makeGitCodeImport(
230+ target_rcs_type=TargetRevisionControlSystems.GIT))
231+ code_import_id = job.code_import.id
232+ job_id = job.id
233+ self.layer.txn.commit()
234+ source_repo = GitRepo(os.path.join(self.repo_path, "source"))
235+ commit = source_repo.refs["refs/heads/master"]
236+ source_repo.refs["refs/heads/one"] = commit
237+ source_repo.refs["refs/heads/two"] = commit
238+ source_repo.refs.set_symbolic_ref("HEAD", "refs/heads/one")
239+ del source_repo.refs["refs/heads/master"]
240+ target_repo_path = os.path.join(
241+ self.target_store, job.code_import.target.unique_name)
242+ self.target_git_server.makeRepo(
243+ job.code_import.target.unique_name, [("NEWS", "contents")])
244+ yield self.performImport(job_id)
245+ self.assertImported(code_import_id)
246+ target_repo = GitRepo(target_repo_path)
247+ self.assertContentEqual(
248+ ["heads/one", "heads/two"], target_repo.refs.keys(base="refs"))
249+ self.assertEqual(
250+ "ref: refs/heads/one",
251+ GitRepo(target_repo_path).refs.read_ref("HEAD"))
252+
253+ @defer.inlineCallbacks
254 def test_import_bzr(self):
255 # Create a Bazaar CodeImport and import it.
256 job = self.getStartedJobForImport(self.makeBzrCodeImport())
257 code_import_id = job.code_import.id
258 job_id = job.id
259 self.layer.txn.commit()
260- result = self.performImport(job_id)
261- return result.addCallback(self.assertImported, code_import_id)
262+ yield self.performImport(job_id)
263+ self.assertImported(code_import_id)
264
265+ @defer.inlineCallbacks
266 def test_import_bzrsvn(self):
267 # Create a Subversion-via-bzr-svn CodeImport and import it.
268 job = self.getStartedJobForImport(self.makeBzrSvnCodeImport())
269 code_import_id = job.code_import.id
270 job_id = job.id
271 self.layer.txn.commit()
272- result = self.performImport(job_id)
273- return result.addCallback(self.assertImported, code_import_id)
274+ yield self.performImport(job_id)
275+ self.assertImported(code_import_id)
276
277
278 class DeferredOnExit(protocol.ProcessProtocol):
279
280=== modified file 'lib/lp/codehosting/codeimport/worker.py'
281--- lib/lp/codehosting/codeimport/worker.py 2016-11-03 11:40:11 +0000
282+++ lib/lp/codehosting/codeimport/worker.py 2016-11-08 03:28:29 +0000
283@@ -20,6 +20,7 @@
284 ]
285
286
287+import io
288 import os
289 import shutil
290 import subprocess
291@@ -61,6 +62,11 @@
292 import cscvs
293 from cscvs.cmds import totla
294 import CVS
295+from dulwich.errors import GitProtocolError
296+from dulwich.protocol import (
297+ pkt_line,
298+ Protocol,
299+ )
300 from lazr.uri import (
301 InvalidURIError,
302 URI,
303@@ -93,6 +99,7 @@
304 from lp.services.config import config
305 from lp.services.macaroons.interfaces import IMacaroonIssuer
306 from lp.services.propertycache import cachedproperty
307+from lp.services.timeout import urlfetch
308 from lp.services.utils import sanitise_urls
309
310
311@@ -1000,6 +1007,78 @@
312 if retcode:
313 raise subprocess.CalledProcessError(retcode, cmd)
314
315+ def _getHead(self, repository, remote_name):
316+ """Get HEAD from a configured remote in a local repository.
317+
318+ The returned ref name will be adjusted in such a way that it can be
319+ passed to `_setHead` (e.g. refs/remotes/origin/master ->
320+ refs/heads/master).
321+ """
322+ # This is a bit weird, but set-head will bail out if the target
323+ # doesn't exist in the correct remotes namespace. git 2.8.0 has
324+ # "git ls-remote --symref <repository> HEAD" which would involve
325+ # less juggling.
326+ self._runGit(
327+ "fetch", "-q", ".", "refs/heads/*:refs/remotes/%s/*" % remote_name,
328+ cwd=repository)
329+ self._runGit(
330+ "remote", "set-head", remote_name, "--auto", cwd=repository)
331+ ref_prefix = "refs/remotes/%s/" % remote_name
332+ target_ref = subprocess.check_output(
333+ ["git", "symbolic-ref", ref_prefix + "HEAD"],
334+ cwd=repository, universal_newlines=True).rstrip("\n")
335+ if not target_ref.startswith(ref_prefix):
336+ raise GitProtocolError(
337+ "'git remote set-head %s --auto' did not leave remote HEAD "
338+ "under %s" % (remote_name, ref_prefix))
339+ real_target_ref = "refs/heads/" + target_ref[len(ref_prefix):]
340+ # Ensure the result is a valid ref name, just in case.
341+ self._runGit("check-ref-format", real_target_ref, cwd="repository")
342+ return real_target_ref
343+
344+ def _setHead(self, target_url, target_ref):
345+ """Set HEAD on a remote repository.
346+
347+ This relies on the turnip-set-symbolic-ref extension.
348+ """
349+ service = "turnip-set-symbolic-ref"
350+ url = urljoin(target_url, service)
351+ headers = {
352+ "Content-Type": "application/x-%s-request" % service,
353+ }
354+ body = pkt_line("HEAD %s" % target_ref) + pkt_line(None)
355+ try:
356+ response = urlfetch(url, method="POST", headers=headers, data=body)
357+ response.raise_for_status()
358+ except Exception as e:
359+ raise GitProtocolError(str(e))
360+ content_type = response.headers.get("Content-Type")
361+ if content_type != ("application/x-%s-result" % service):
362+ raise GitProtocolError(
363+ "Invalid Content-Type from server: %s" % content_type)
364+ content = io.BytesIO(response.content)
365+ proto = Protocol(content.read, None)
366+ pkts = list(proto.read_pkt_seq())
367+ if len(pkts) == 1 and pkts[0].rstrip(b"\n") == b"ACK HEAD":
368+ pass
369+ elif pkts and pkts[0].startswith(b"ERR "):
370+ raise GitProtocolError(
371+ pkts[0][len(b"ERR "):].rstrip(b"\n").decode("UTF-8"))
372+ else:
373+ raise GitProtocolError("Unexpected packets %r from server" % pkts)
374+
375+ def _deleteRefs(self, repository, pattern):
376+ """Delete all refs in `repository` matching `pattern`."""
377+ # XXX cjwatson 2016-11-08: We might ideally use something like:
378+ # "git for-each-ref --format='delete %(refname)%00%(objectname)%00' \
379+ # <pattern> | git update-ref --stdin -z
380+ # ... which would be faster, but that requires git 1.8.5.
381+ remote_refs = subprocess.check_output(
382+ ["git", "for-each-ref", "--format=%(refname)", pattern],
383+ cwd="repository").splitlines()
384+ for remote_ref in remote_refs:
385+ self._runGit("update-ref", "-d", remote_ref, cwd="repository")
386+
387 def _doImport(self):
388 self._logger.info("Starting job.")
389 try:
390@@ -1031,20 +1110,35 @@
391 try:
392 self._runGit("config", "gc.auto", "0", cwd="repository")
393 self._runGit(
394- "fetch", "--prune", self.source_details.url, "+refs/*:refs/*",
395- cwd="repository")
396- # We should sync the remote HEAD as well. This involves
397- # considerable gymnastics. "git remote set-head --auto" does it
398- # if we can arrange a suitable temporary remote, or git 2.8.0
399- # has "git ls-remote --symref <repository> HEAD". We then also
400- # need to set Launchpad's idea of HEAD, which is fiddly from an
401- # import worker. For now we leave it at the default and trust
402- # that we'll be able to fix things up later.
403+ "remote", "add", "source", self.source_details.url,
404+ cwd="repository")
405+ self._runGit(
406+ "fetch", "--prune", "source", "+refs/*:refs/*",
407+ cwd="repository")
408+ try:
409+ new_head = self._getHead("repository", "source")
410+ except (subprocess.CalledProcessError, GitProtocolError) as e2:
411+ self._logger.info("Unable to fetch default branch: %s" % e2)
412+ new_head = None
413+ self._runGit("remote", "rm", "source", cwd="repository")
414+ # XXX cjwatson 2016-11-03: For some reason "git remote rm"
415+ # doesn't actually remove the refs.
416+ self._deleteRefs("repository", "refs/remotes/source/*")
417 except subprocess.CalledProcessError as e:
418 self._logger.info("Unable to fetch remote repository: %s" % e)
419 return CodeImportWorkerExitCode.FAILURE_INVALID
420 self._logger.info("Pushing repository to hosting service.")
421 try:
422+ if new_head is not None:
423+ # Push the target of HEAD first to ensure that it is always
424+ # available.
425+ self._runGit(
426+ "push", target_url, "+%s:%s" % (new_head, new_head),
427+ cwd="repository")
428+ try:
429+ self._setHead(target_url, new_head)
430+ except GitProtocolError as e:
431+ self._logger.info("Unable to set default branch: %s" % e)
432 self._runGit("push", "--mirror", target_url, cwd="repository")
433 except subprocess.CalledProcessError as e:
434 self._logger.info(
435
436=== modified file 'scripts/code-import-worker.py'
437--- scripts/code-import-worker.py 2016-10-11 15:28:25 +0000
438+++ scripts/code-import-worker.py 2016-11-08 03:28:29 +0000
439@@ -35,6 +35,7 @@
440 from lp.codehosting.safe_open import AcceptAnythingPolicy
441 from lp.services import scripts
442 from lp.services.config import config
443+from lp.services.timeout import set_default_timeout_function
444
445
446 opener_policies = {
447@@ -71,6 +72,7 @@
448
449 def main(self):
450 force_bzr_to_use_urllib()
451+ set_default_timeout_function(lambda: 60.0)
452 source_details = CodeImportSourceDetails.fromArguments(self.args)
453 if source_details.rcstype == 'git':
454 if source_details.target_rcstype == 'bzr':