Merge ~cjwatson/launchpad:codeimport-git-progress into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 946fc55f68991af9952c7f2b25fc82c8234c1c89
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:codeimport-git-progress
Merge into: launchpad:master
Diff against target: 209 lines (+140/-7)
2 files modified
lib/lp/codehosting/codeimport/tests/test_worker.py (+52/-0)
lib/lp/codehosting/codeimport/worker.py (+88/-7)
Reviewer Review Type Date Requested Status
Tom Wardill (community) Approve
Ioana Lasc (community) Approve
Review via email: mp+373732@code.launchpad.net

Commit message

Enable throttled progress output from git-to-git import workers

Description of the change

This may help with bug 1642699, though I'm not sure yet.

Getting things to work with Python 2's disconnect between file objects and the io module is gratuitously inconvenient.

This is essentially the same as https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-progress/+merge/323902, converted to git and rebased on master.

To post a comment you must log in.
Revision history for this message
Ioana Lasc (ilasc) wrote :

looks good.

review: Approve
Revision history for this message
Otto Co-Pilot (otto-copilot) wrote :
Revision history for this message
Tom Wardill (twom) :
review: Approve
Revision history for this message
Otto Co-Pilot (otto-copilot) wrote :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/codehosting/codeimport/tests/test_worker.py b/lib/lp/codehosting/codeimport/tests/test_worker.py
2index d0a30cf..9daf357 100644
3--- a/lib/lp/codehosting/codeimport/tests/test_worker.py
4+++ b/lib/lp/codehosting/codeimport/tests/test_worker.py
5@@ -58,6 +58,10 @@ import six
6 import subvertpy
7 import subvertpy.client
8 import subvertpy.ra
9+from testtools.matchers import (
10+ ContainsAll,
11+ LessThan,
12+ )
13
14 import lp.codehosting
15 from lp.codehosting.codeimport.tarball import (
16@@ -81,6 +85,7 @@ from lp.codehosting.codeimport.worker import (
17 ForeignTreeStore,
18 get_default_bazaar_branch_store,
19 GitImportWorker,
20+ GitToGitImportWorker,
21 ImportDataStore,
22 ToBzrImportWorker,
23 )
24@@ -1358,6 +1363,53 @@ class TestBzrImport(WorkerTest, TestActualImportMixin,
25 branch.repository.get_revision(branch.last_revision()).committer)
26
27
28+class TestGitToGitImportWorker(TestCase):
29+
30+ def test_throttleProgress(self):
31+ source_details = self.factory.makeCodeImportSourceDetails(
32+ rcstype="git", target_rcstype="git")
33+ logger = BufferLogger()
34+ worker = GitToGitImportWorker(
35+ source_details, logger, AcceptAnythingPolicy())
36+ read_fd, write_fd = os.pipe()
37+ pid = os.fork()
38+ if pid == 0: # child
39+ os.close(read_fd)
40+ with os.fdopen(write_fd, "wb") as write:
41+ write.write(b"Starting\n")
42+ for i in range(50):
43+ time.sleep(0.1)
44+ write.write(("%d ...\r" % i).encode("UTF-8"))
45+ if (i % 10) == 9:
46+ write.write(
47+ ("Interval %d\n" % (i // 10)).encode("UTF-8"))
48+ write.write(b"Finishing\n")
49+ os._exit(0)
50+ else: # parent
51+ os.close(write_fd)
52+ with os.fdopen(read_fd, "rb") as read:
53+ lines = list(worker._throttleProgress(read, timeout=0.5))
54+ os.waitpid(pid, 0)
55+ # Matching the exact sequence of lines would be too brittle, but
56+ # we require some things to be true:
57+ # All the non-progress lines must be present, in the right
58+ # order.
59+ self.assertEqual(
60+ [u"Starting\n", u"Interval 0\n", u"Interval 1\n",
61+ u"Interval 2\n", u"Interval 3\n", u"Interval 4\n",
62+ u"Finishing\n"],
63+ [line for line in lines if not line.endswith(u"\r")])
64+ # No more than 15 progress lines may be present (allowing some
65+ # slack for the child process being slow).
66+ progress_lines = [line for line in lines if line.endswith(u"\r")]
67+ self.assertThat(len(progress_lines), LessThan(16))
68+ # All the progress lines immediately before interval markers
69+ # must be present.
70+ self.assertThat(
71+ progress_lines,
72+ ContainsAll([u"%d ...\r" % i for i in (9, 19, 29, 39, 49)]))
73+
74+
75 class CodeImportBranchOpenPolicyTests(TestCase):
76
77 def setUp(self):
78diff --git a/lib/lp/codehosting/codeimport/worker.py b/lib/lp/codehosting/codeimport/worker.py
79index 6259189..5aacf29 100644
80--- a/lib/lp/codehosting/codeimport/worker.py
81+++ b/lib/lp/codehosting/codeimport/worker.py
82@@ -19,10 +19,10 @@ __all__ = [
83 'get_default_bazaar_branch_store',
84 ]
85
86-
87 import io
88 import os
89 import shutil
90+import signal
91 import subprocess
92
93 # FIRST Ensure correct plugins are loaded. Do not delete this comment or the
94@@ -962,14 +962,94 @@ class BzrImportWorker(PullingImportWorker):
95 class GitToGitImportWorker(ImportWorker):
96 """An import worker for imports from Git to Git."""
97
98+ def _throttleProgress(self, file_obj, timeout=15.0):
99+ """Throttle progress messages from a file object.
100+
101+ git can produce progress output on stderr, but it produces rather a
102+ lot of it and we don't want it all to end up in logs. Throttle this
103+ so that we only produce output every `timeout` seconds, or when we
104+ see a line terminated with a newline rather than a carriage return.
105+
106+ :param file_obj: a file-like object opened in binary mode.
107+ :param timeout: emit progress output only after this many seconds
108+ have elapsed.
109+ :return: an iterator of interesting text lines read from the file.
110+ """
111+ # newline="" requests universal newlines mode, but without
112+ # translation.
113+ if six.PY2 and isinstance(file_obj, file):
114+ # A Python 2 file object can't be used directly to construct an
115+ # io.TextIOWrapper.
116+ class _ReadableFileWrapper:
117+ def __init__(self, raw):
118+ self._raw = raw
119+
120+ def __enter__(self):
121+ return self
122+
123+ def __exit__(self, exc_type, exc_value, exc_tb):
124+ pass
125+
126+ def readable(self):
127+ return True
128+
129+ def writable(self):
130+ return False
131+
132+ def seekable(self):
133+ return True
134+
135+ def __getattr__(self, name):
136+ return getattr(self._raw, name)
137+
138+ with _ReadableFileWrapper(file_obj) as readable:
139+ with io.BufferedReader(readable) as buffered:
140+ for line in self._throttleProgress(
141+ buffered, timeout=timeout):
142+ yield line
143+ return
144+
145+ class ReceivedAlarm(Exception):
146+ pass
147+
148+ def alarm_handler(signum, frame):
149+ raise ReceivedAlarm()
150+
151+ old_alarm = signal.signal(signal.SIGALRM, alarm_handler)
152+ try:
153+ progress_buffer = None
154+ with io.TextIOWrapper(
155+ file_obj, encoding="UTF-8", errors="replace",
156+ newline="") as wrapped_file:
157+ while True:
158+ try:
159+ signal.setitimer(signal.ITIMER_REAL, timeout)
160+ line = next(wrapped_file)
161+ signal.setitimer(signal.ITIMER_REAL, 0)
162+ if line.endswith(u"\r"):
163+ progress_buffer = line
164+ else:
165+ if progress_buffer is not None:
166+ yield progress_buffer
167+ progress_buffer = None
168+ yield line
169+ except ReceivedAlarm:
170+ if progress_buffer is not None:
171+ yield progress_buffer
172+ progress_buffer = None
173+ except StopIteration:
174+ break
175+ finally:
176+ signal.setitimer(signal.ITIMER_REAL, 0)
177+ signal.signal(signal.SIGALRM, old_alarm)
178+
179 def _runGit(self, *args, **kwargs):
180 """Run git with arguments, sending output to the logger."""
181 cmd = ["git"] + list(args)
182 git_process = subprocess.Popen(
183 cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kwargs)
184- for line in git_process.stdout:
185- line = line.decode("UTF-8", "replace").rstrip("\n")
186- self._logger.info(sanitise_urls(line))
187+ for line in self._throttleProgress(git_process.stdout):
188+ self._logger.info(sanitise_urls(line.rstrip("\r\n")))
189 retcode = git_process.wait()
190 if retcode:
191 raise subprocess.CalledProcessError(retcode, cmd)
192@@ -1104,13 +1184,14 @@ class GitToGitImportWorker(ImportWorker):
193 # Push the target of HEAD first to ensure that it is always
194 # available.
195 self._runGit(
196- "push", target_url, "+%s:%s" % (new_head, new_head),
197- cwd="repository")
198+ "push", "--progress", target_url,
199+ "+%s:%s" % (new_head, new_head), cwd="repository")
200 try:
201 self._setHead(target_url, new_head)
202 except GitProtocolError as e:
203 self._logger.info("Unable to set default branch: %s" % e)
204- self._runGit("push", "--mirror", target_url, cwd="repository")
205+ self._runGit(
206+ "push", "--progress", "--mirror", target_url, cwd="repository")
207 except subprocess.CalledProcessError as e:
208 self._logger.info(
209 "Unable to push to hosting service: git push exited %s" %

Subscribers

People subscribed via source and target branches

to status/vote changes: