Merge ~cjwatson/launchpad:codeimport-git-progress into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 946fc55f68991af9952c7f2b25fc82c8234c1c89
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:codeimport-git-progress
Merge into: launchpad:master
Diff against target: 209 lines (+140/-7)
2 files modified
lib/lp/codehosting/codeimport/tests/test_worker.py (+52/-0)
lib/lp/codehosting/codeimport/worker.py (+88/-7)
Reviewer Review Type Date Requested Status
Tom Wardill (community) Approve
Ioana Lasc (community) Approve
Review via email: mp+373732@code.launchpad.net

Commit message

Enable throttled progress output from git-to-git import workers

Description of the change

This may help with bug 1642699, though I'm not sure yet.

Getting things to work with Python 2's disconnect between file objects and the io module is gratuitously inconvenient.

This is essentially the same as https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-progress/+merge/323902, converted to git and rebased on master.

To post a comment you must log in.
Revision history for this message
Ioana Lasc (ilasc) wrote :

looks good.

review: Approve
Revision history for this message
Otto Co-Pilot (otto-copilot) wrote :
Revision history for this message
Tom Wardill (twom) :
review: Approve
Revision history for this message
Otto Co-Pilot (otto-copilot) wrote :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/lib/lp/codehosting/codeimport/tests/test_worker.py b/lib/lp/codehosting/codeimport/tests/test_worker.py
index d0a30cf..9daf357 100644
--- a/lib/lp/codehosting/codeimport/tests/test_worker.py
+++ b/lib/lp/codehosting/codeimport/tests/test_worker.py
@@ -58,6 +58,10 @@ import six
58import subvertpy58import subvertpy
59import subvertpy.client59import subvertpy.client
60import subvertpy.ra60import subvertpy.ra
61from testtools.matchers import (
62 ContainsAll,
63 LessThan,
64 )
6165
62import lp.codehosting66import lp.codehosting
63from lp.codehosting.codeimport.tarball import (67from lp.codehosting.codeimport.tarball import (
@@ -81,6 +85,7 @@ from lp.codehosting.codeimport.worker import (
81 ForeignTreeStore,85 ForeignTreeStore,
82 get_default_bazaar_branch_store,86 get_default_bazaar_branch_store,
83 GitImportWorker,87 GitImportWorker,
88 GitToGitImportWorker,
84 ImportDataStore,89 ImportDataStore,
85 ToBzrImportWorker,90 ToBzrImportWorker,
86 )91 )
@@ -1358,6 +1363,53 @@ class TestBzrImport(WorkerTest, TestActualImportMixin,
1358 branch.repository.get_revision(branch.last_revision()).committer)1363 branch.repository.get_revision(branch.last_revision()).committer)
13591364
13601365
1366class TestGitToGitImportWorker(TestCase):
1367
1368 def test_throttleProgress(self):
1369 source_details = self.factory.makeCodeImportSourceDetails(
1370 rcstype="git", target_rcstype="git")
1371 logger = BufferLogger()
1372 worker = GitToGitImportWorker(
1373 source_details, logger, AcceptAnythingPolicy())
1374 read_fd, write_fd = os.pipe()
1375 pid = os.fork()
1376 if pid == 0: # child
1377 os.close(read_fd)
1378 with os.fdopen(write_fd, "wb") as write:
1379 write.write(b"Starting\n")
1380 for i in range(50):
1381 time.sleep(0.1)
1382 write.write(("%d ...\r" % i).encode("UTF-8"))
1383 if (i % 10) == 9:
1384 write.write(
1385 ("Interval %d\n" % (i // 10)).encode("UTF-8"))
1386 write.write(b"Finishing\n")
1387 os._exit(0)
1388 else: # parent
1389 os.close(write_fd)
1390 with os.fdopen(read_fd, "rb") as read:
1391 lines = list(worker._throttleProgress(read, timeout=0.5))
1392 os.waitpid(pid, 0)
1393 # Matching the exact sequence of lines would be too brittle, but
1394 # we require some things to be true:
1395 # All the non-progress lines must be present, in the right
1396 # order.
1397 self.assertEqual(
1398 [u"Starting\n", u"Interval 0\n", u"Interval 1\n",
1399 u"Interval 2\n", u"Interval 3\n", u"Interval 4\n",
1400 u"Finishing\n"],
1401 [line for line in lines if not line.endswith(u"\r")])
1402 # No more than 15 progress lines may be present (allowing some
1403 # slack for the child process being slow).
1404 progress_lines = [line for line in lines if line.endswith(u"\r")]
1405 self.assertThat(len(progress_lines), LessThan(16))
1406 # All the progress lines immediately before interval markers
1407 # must be present.
1408 self.assertThat(
1409 progress_lines,
1410 ContainsAll([u"%d ...\r" % i for i in (9, 19, 29, 39, 49)]))
1411
1412
1361class CodeImportBranchOpenPolicyTests(TestCase):1413class CodeImportBranchOpenPolicyTests(TestCase):
13621414
1363 def setUp(self):1415 def setUp(self):
diff --git a/lib/lp/codehosting/codeimport/worker.py b/lib/lp/codehosting/codeimport/worker.py
index 6259189..5aacf29 100644
--- a/lib/lp/codehosting/codeimport/worker.py
+++ b/lib/lp/codehosting/codeimport/worker.py
@@ -19,10 +19,10 @@ __all__ = [
19 'get_default_bazaar_branch_store',19 'get_default_bazaar_branch_store',
20 ]20 ]
2121
22
23import io22import io
24import os23import os
25import shutil24import shutil
25import signal
26import subprocess26import subprocess
2727
28# FIRST Ensure correct plugins are loaded. Do not delete this comment or the28# FIRST Ensure correct plugins are loaded. Do not delete this comment or the
@@ -962,14 +962,94 @@ class BzrImportWorker(PullingImportWorker):
962class GitToGitImportWorker(ImportWorker):962class GitToGitImportWorker(ImportWorker):
963 """An import worker for imports from Git to Git."""963 """An import worker for imports from Git to Git."""
964964
965 def _throttleProgress(self, file_obj, timeout=15.0):
966 """Throttle progress messages from a file object.
967
968 git can produce progress output on stderr, but it produces rather a
969 lot of it and we don't want it all to end up in logs. Throttle this
970 so that we only produce output every `timeout` seconds, or when we
971 see a line terminated with a newline rather than a carriage return.
972
973 :param file_obj: a file-like object opened in binary mode.
974 :param timeout: emit progress output only after this many seconds
975 have elapsed.
976 :return: an iterator of interesting text lines read from the file.
977 """
978 # newline="" requests universal newlines mode, but without
979 # translation.
980 if six.PY2 and isinstance(file_obj, file):
981 # A Python 2 file object can't be used directly to construct an
982 # io.TextIOWrapper.
983 class _ReadableFileWrapper:
984 def __init__(self, raw):
985 self._raw = raw
986
987 def __enter__(self):
988 return self
989
990 def __exit__(self, exc_type, exc_value, exc_tb):
991 pass
992
993 def readable(self):
994 return True
995
996 def writable(self):
997 return False
998
999 def seekable(self):
1000 return True
1001
1002 def __getattr__(self, name):
1003 return getattr(self._raw, name)
1004
1005 with _ReadableFileWrapper(file_obj) as readable:
1006 with io.BufferedReader(readable) as buffered:
1007 for line in self._throttleProgress(
1008 buffered, timeout=timeout):
1009 yield line
1010 return
1011
1012 class ReceivedAlarm(Exception):
1013 pass
1014
1015 def alarm_handler(signum, frame):
1016 raise ReceivedAlarm()
1017
1018 old_alarm = signal.signal(signal.SIGALRM, alarm_handler)
1019 try:
1020 progress_buffer = None
1021 with io.TextIOWrapper(
1022 file_obj, encoding="UTF-8", errors="replace",
1023 newline="") as wrapped_file:
1024 while True:
1025 try:
1026 signal.setitimer(signal.ITIMER_REAL, timeout)
1027 line = next(wrapped_file)
1028 signal.setitimer(signal.ITIMER_REAL, 0)
1029 if line.endswith(u"\r"):
1030 progress_buffer = line
1031 else:
1032 if progress_buffer is not None:
1033 yield progress_buffer
1034 progress_buffer = None
1035 yield line
1036 except ReceivedAlarm:
1037 if progress_buffer is not None:
1038 yield progress_buffer
1039 progress_buffer = None
1040 except StopIteration:
1041 break
1042 finally:
1043 signal.setitimer(signal.ITIMER_REAL, 0)
1044 signal.signal(signal.SIGALRM, old_alarm)
1045
965 def _runGit(self, *args, **kwargs):1046 def _runGit(self, *args, **kwargs):
966 """Run git with arguments, sending output to the logger."""1047 """Run git with arguments, sending output to the logger."""
967 cmd = ["git"] + list(args)1048 cmd = ["git"] + list(args)
968 git_process = subprocess.Popen(1049 git_process = subprocess.Popen(
969 cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kwargs)1050 cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kwargs)
970 for line in git_process.stdout:1051 for line in self._throttleProgress(git_process.stdout):
971 line = line.decode("UTF-8", "replace").rstrip("\n")1052 self._logger.info(sanitise_urls(line.rstrip("\r\n")))
972 self._logger.info(sanitise_urls(line))
973 retcode = git_process.wait()1053 retcode = git_process.wait()
974 if retcode:1054 if retcode:
975 raise subprocess.CalledProcessError(retcode, cmd)1055 raise subprocess.CalledProcessError(retcode, cmd)
@@ -1104,13 +1184,14 @@ class GitToGitImportWorker(ImportWorker):
1104 # Push the target of HEAD first to ensure that it is always1184 # Push the target of HEAD first to ensure that it is always
1105 # available.1185 # available.
1106 self._runGit(1186 self._runGit(
1107 "push", target_url, "+%s:%s" % (new_head, new_head),1187 "push", "--progress", target_url,
1108 cwd="repository")1188 "+%s:%s" % (new_head, new_head), cwd="repository")
1109 try:1189 try:
1110 self._setHead(target_url, new_head)1190 self._setHead(target_url, new_head)
1111 except GitProtocolError as e:1191 except GitProtocolError as e:
1112 self._logger.info("Unable to set default branch: %s" % e)1192 self._logger.info("Unable to set default branch: %s" % e)
1113 self._runGit("push", "--mirror", target_url, cwd="repository")1193 self._runGit(
1194 "push", "--progress", "--mirror", target_url, cwd="repository")
1114 except subprocess.CalledProcessError as e:1195 except subprocess.CalledProcessError as e:
1115 self._logger.info(1196 self._logger.info(
1116 "Unable to push to hosting service: git push exited %s" %1197 "Unable to push to hosting service: git push exited %s" %

Subscribers

People subscribed via source and target branches

to status/vote changes: