Merge lp:~cjwatson/launchpad/codeimport-git-progress into lp:launchpad

Proposed by Colin Watson on 2017-05-11
Status: Needs review
Proposed branch: lp:~cjwatson/launchpad/codeimport-git-progress
Merge into: lp:launchpad
Diff against target: 221 lines (+140/-8)
2 files modified
lib/lp/codehosting/codeimport/tests/test_worker.py (+50/-0)
lib/lp/codehosting/codeimport/worker.py (+90/-8)
To merge this branch: bzr merge lp:~cjwatson/launchpad/codeimport-git-progress
Reviewer Review Type Date Requested Status
Launchpad code reviewers 2017-05-11 Pending
Review via email: mp+323902@code.launchpad.net

Commit message

Enable throttled progress output from git-to-git import workers.

Description of the change

This may help with bug 1642699, though I'm not sure yet.

Getting things to work with Python 2's disconnect between file objects and the io module is gratuitously inconvenient.

To post a comment you must log in.

Unmerged revisions

18374. By Colin Watson on 2017-05-11

Enable throttled progress output from git-to-git import workers.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/codehosting/codeimport/tests/test_worker.py'
--- lib/lp/codehosting/codeimport/tests/test_worker.py 2017-01-12 18:01:56 +0000
+++ lib/lp/codehosting/codeimport/tests/test_worker.py 2017-05-11 12:10:44 +0000
@@ -50,7 +50,9 @@
50import subvertpy.client50import subvertpy.client
51import subvertpy.ra51import subvertpy.ra
52from testtools.matchers import (52from testtools.matchers import (
53 ContainsAll,
53 Equals,54 Equals,
55 LessThan,
54 Matcher,56 Matcher,
55 MatchesListwise,57 MatchesListwise,
56 Mismatch,58 Mismatch,
@@ -87,6 +89,7 @@
87 ForeignTreeStore,89 ForeignTreeStore,
88 get_default_bazaar_branch_store,90 get_default_bazaar_branch_store,
89 GitImportWorker,91 GitImportWorker,
92 GitToGitImportWorker,
90 ImportDataStore,93 ImportDataStore,
91 ToBzrImportWorker,94 ToBzrImportWorker,
92 )95 )
@@ -1304,6 +1307,53 @@
1304 branch.repository.get_revision(branch.last_revision()).committer)1307 branch.repository.get_revision(branch.last_revision()).committer)
13051308
13061309
1310class TestGitToGitImportWorker(TestCase):
1311
1312 def test_throttleProgress(self):
1313 source_details = self.factory.makeCodeImportSourceDetails(
1314 rcstype="git", target_rcstype="git")
1315 logger = BufferLogger()
1316 worker = GitToGitImportWorker(
1317 source_details, logger, AcceptAnythingPolicy())
1318 read_fd, write_fd = os.pipe()
1319 pid = os.fork()
1320 if pid == 0: # child
1321 os.close(read_fd)
1322 with os.fdopen(write_fd, "wb") as write:
1323 write.write(b"Starting\n")
1324 for i in range(50):
1325 time.sleep(0.1)
1326 write.write(("%d ...\r" % i).encode("UTF-8"))
1327 if (i % 10) == 9:
1328 write.write(
1329 ("Interval %d\n" % (i // 10)).encode("UTF-8"))
1330 write.write(b"Finishing\n")
1331 os._exit(0)
1332 else: # parent
1333 os.close(write_fd)
1334 with os.fdopen(read_fd, "rb") as read:
1335 lines = list(worker._throttleProgress(read, timeout=0.5))
1336 os.waitpid(pid, 0)
1337 # Matching the exact sequence of lines would be too brittle, but
1338 # we require some things to be true:
1339 # All the non-progress lines must be present, in the right
1340 # order.
1341 self.assertEqual(
1342 [u"Starting\n", u"Interval 0\n", u"Interval 1\n",
1343 u"Interval 2\n", u"Interval 3\n", u"Interval 4\n",
1344 u"Finishing\n"],
1345 [line for line in lines if not line.endswith(u"\r")])
1346 # No more than 15 progress lines may be present (allowing some
1347 # slack for the child process being slow).
1348 progress_lines = [line for line in lines if line.endswith(u"\r")]
1349 self.assertThat(len(progress_lines), LessThan(16))
1350 # All the progress lines immediately before interval markers
1351 # must be present.
1352 self.assertThat(
1353 progress_lines,
1354 ContainsAll([u"%d ...\r" % i for i in (9, 19, 29, 39, 49)]))
1355
1356
1307class CodeImportBranchOpenPolicyTests(TestCase):1357class CodeImportBranchOpenPolicyTests(TestCase):
13081358
1309 def setUp(self):1359 def setUp(self):
13101360
=== modified file 'lib/lp/codehosting/codeimport/worker.py'
--- lib/lp/codehosting/codeimport/worker.py 2016-11-14 18:29:17 +0000
+++ lib/lp/codehosting/codeimport/worker.py 2017-05-11 12:10:44 +0000
@@ -1,4 +1,4 @@
1# Copyright 2009-2016 Canonical Ltd. This software is licensed under the1# Copyright 2009-2017 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""The code import worker. This imports code from foreign repositories."""4"""The code import worker. This imports code from foreign repositories."""
@@ -19,10 +19,10 @@
19 'get_default_bazaar_branch_store',19 'get_default_bazaar_branch_store',
20 ]20 ]
2121
22
23import io22import io
24import os23import os
25import shutil24import shutil
25import signal
26import subprocess26import subprocess
27from urlparse import (27from urlparse import (
28 urlsplit,28 urlsplit,
@@ -73,6 +73,7 @@
73 )73 )
74from pymacaroons import Macaroon74from pymacaroons import Macaroon
75import SCM75import SCM
76import six
76from zope.component import getUtility77from zope.component import getUtility
77from zope.security.proxy import removeSecurityProxy78from zope.security.proxy import removeSecurityProxy
7879
@@ -987,14 +988,94 @@
987class GitToGitImportWorker(ImportWorker):988class GitToGitImportWorker(ImportWorker):
988 """An import worker for imports from Git to Git."""989 """An import worker for imports from Git to Git."""
989990
991 def _throttleProgress(self, file_obj, timeout=15.0):
992 """Throttle progress messages from a file object.
993
994 git can produce progress output on stderr, but it produces rather a
995 lot of it and we don't want it all to end up in logs. Throttle this
996 so that we only produce output every `timeout` seconds, or when we
997 see a line terminated with a newline rather than a carriage return.
998
999 :param file_obj: a file-like object opened in binary mode.
1000 :param timeout: emit progress output only after this many seconds
1001 have elapsed.
1002 :return: an iterator of interesting text lines read from the file.
1003 """
1004 # newline="" requests universal newlines mode, but without
1005 # translation.
1006 if six.PY2 and isinstance(file_obj, file):
1007 # A Python 2 file object can't be used directly to construct an
1008 # io.TextIOWrapper.
1009 class _ReadableFileWrapper:
1010 def __init__(self, raw):
1011 self._raw = raw
1012
1013 def __enter__(self):
1014 return self
1015
1016 def __exit__(self, exc_type, exc_value, exc_tb):
1017 pass
1018
1019 def readable(self):
1020 return True
1021
1022 def writable(self):
1023 return False
1024
1025 def seekable(self):
1026 return True
1027
1028 def __getattr__(self, name):
1029 return getattr(self._raw, name)
1030
1031 with _ReadableFileWrapper(file_obj) as readable:
1032 with io.BufferedReader(readable) as buffered:
1033 for line in self._throttleProgress(
1034 buffered, timeout=timeout):
1035 yield line
1036 return
1037
1038 class ReceivedAlarm(Exception):
1039 pass
1040
1041 def alarm_handler(signum, frame):
1042 raise ReceivedAlarm()
1043
1044 old_alarm = signal.signal(signal.SIGALRM, alarm_handler)
1045 try:
1046 progress_buffer = None
1047 with io.TextIOWrapper(
1048 file_obj, encoding="UTF-8", errors="replace",
1049 newline="") as wrapped_file:
1050 while True:
1051 try:
1052 signal.setitimer(signal.ITIMER_REAL, timeout)
1053 line = next(wrapped_file)
1054 signal.setitimer(signal.ITIMER_REAL, 0)
1055 if line.endswith(u"\r"):
1056 progress_buffer = line
1057 else:
1058 if progress_buffer is not None:
1059 yield progress_buffer
1060 progress_buffer = None
1061 yield line
1062 except ReceivedAlarm:
1063 if progress_buffer is not None:
1064 yield progress_buffer
1065 progress_buffer = None
1066 except StopIteration:
1067 break
1068 finally:
1069 signal.setitimer(signal.ITIMER_REAL, 0)
1070 signal.signal(signal.SIGALRM, old_alarm)
1071
990 def _runGit(self, *args, **kwargs):1072 def _runGit(self, *args, **kwargs):
991 """Run git with arguments, sending output to the logger."""1073 """Run git with arguments, sending output to the logger."""
992 cmd = ["git"] + list(args)1074 cmd = ["git"] + list(args)
993 git_process = subprocess.Popen(1075 git_process = subprocess.Popen(
994 cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kwargs)1076 cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kwargs)
995 for line in git_process.stdout:1077 for line in self._throttleProgress(git_process.stdout):
996 line = line.decode("UTF-8", "replace").rstrip("\n")1078 self._logger.info(sanitise_urls(line.rstrip("\r\n")))
997 self._logger.info(sanitise_urls(line))
998 retcode = git_process.wait()1079 retcode = git_process.wait()
999 if retcode:1080 if retcode:
1000 raise subprocess.CalledProcessError(retcode, cmd)1081 raise subprocess.CalledProcessError(retcode, cmd)
@@ -1127,13 +1208,14 @@
1127 # Push the target of HEAD first to ensure that it is always1208 # Push the target of HEAD first to ensure that it is always
1128 # available.1209 # available.
1129 self._runGit(1210 self._runGit(
1130 "push", target_url, "+%s:%s" % (new_head, new_head),1211 "push", "--progress", target_url,
1131 cwd="repository")1212 "+%s:%s" % (new_head, new_head), cwd="repository")
1132 try:1213 try:
1133 self._setHead(target_url, new_head)1214 self._setHead(target_url, new_head)
1134 except GitProtocolError as e:1215 except GitProtocolError as e:
1135 self._logger.info("Unable to set default branch: %s" % e)1216 self._logger.info("Unable to set default branch: %s" % e)
1136 self._runGit("push", "--mirror", target_url, cwd="repository")1217 self._runGit(
1218 "push", "--progress", "--mirror", target_url, cwd="repository")
1137 except subprocess.CalledProcessError as e:1219 except subprocess.CalledProcessError as e:
1138 self._logger.info(1220 self._logger.info(
1139 "Unable to push to hosting service: git push exited %s" %1221 "Unable to push to hosting service: git push exited %s" %