Merge lp:~javier.collado/utah/bug1103448 into lp:utah

Proposed by Javier Collado
Status: Merged
Approved by: Javier Collado
Approved revision: 815
Merged at revision: 813
Proposed branch: lp:~javier.collado/utah/bug1103448
Merge into: lp:utah
Diff against target: 207 lines (+96/-19)
4 files modified
utah/client/runner.py (+17/-2)
utah/retry.py (+20/-3)
utah/timeout.py (+38/-13)
utah/url.py (+21/-1)
To merge this branch: bzr merge lp:~javier.collado/utah/bug1103448
Reviewer Review Type Date Requested Status
Max Brustkern (community) Approve
Joe Talbott Pending
UTAH Dev Pending
Review via email: mp+146826@code.launchpad.net

Description of the change

This branch implements a workaround for http errors like the ones described in
bug1103448.

The workaround is just retry the failed `bzr export` commands for one minute
every 3 seconds.

The values have been hardcoded, but they could be added to the configuration
file if needed. Anyway, given that the change affects both the client and the
server, I think this could be updated only on the server since the client would
always get the defaults from the configuration file in the package when
installed. Please let me know if there's some good way to share this
configuration data without adding a new command line option at the client.

Aside from this, I've spent some time improving the documentation for timeout
and retry since that has been helpful when implementing the workaround.

To post a comment you must log in.
Revision history for this message
Max Brustkern (nuclearbob) wrote :

Looks good to me. I haven't tested it yet, but I can do that after I finish the other testing I'm doing.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'utah/client/runner.py'
2--- utah/client/runner.py 2012-12-12 11:21:10 +0000
3+++ utah/client/runner.py 2013-02-06 11:14:36 +0000
4@@ -18,12 +18,16 @@
5 from utah.client.testsuite import TestSuite
6 from utah.client.state_agent import StateAgentYAML
7 from utah.client import exceptions
8+from utah.exceptions import UTAHException
9+from utah.timeout import timeout
10+from utah.retry import retry
11
12 import os
13 import shutil
14 import stat
15 import urllib
16 import jsonschema
17+import time
18
19 from utah.client.common import (
20 MASTER_RUNLIST,
21@@ -443,9 +447,20 @@
22 if not os.path.exists(name):
23 os.mkdir(name)
24
25+ def vcs_get_retriable():
26+ result = vcs_handler.get(directory=name)
27+ if (isinstance(vcs_handler, BzrHandler) and
28+ result['returncode'] == 3 and
29+ 'Unable to handle http code 502' in result['stderr']):
30+ # Separate every retry attempt with a timeout of 3 seconds
31+ time.sleep(3)
32+ raise UTAHException('Launchpad temporary error detected',
33+ retry=True)
34+ return result
35+
36 if name not in self.fetched_suites:
37-
38- res = vcs_handler.get(directory=name)
39+ # Retry bzr export on http errors for 60 seconds
40+ res = timeout(60, retry, vcs_get_retriable)
41 self.result.add_result(res)
42
43 # If fetch failed move on to the next testsuite.
44
45=== modified file 'utah/retry.py'
46--- utah/retry.py 2012-12-03 14:02:18 +0000
47+++ utah/retry.py 2013-02-06 11:14:36 +0000
48@@ -23,9 +23,26 @@
49
50
51 def retry(command, *args, **kw):
52- """
53- Retry an operation until it completes or an exception is raised without
54- retry set to True.
55+ """Retry a command as long as a retriable exception is captured.
56+
57+ A retriable exception is considered to be a
58+ ``UTAHException`` that has the attribute ``retry`` set to
59+ ``True``.
60+
61+ :param command: Command to be executed.
62+ :type command: callable
63+ :param args: Positional arguments to be passed to the callable.
64+ :param kwargs:
65+ Keyword arguments to be passed to the callable.
66+
67+ .. note::
68+ If a keyword argument named ``logmethod`` is found, it will be
69+ extracted (i.e. it won't be passed to the callable) and used to log
70+ every retry attempt (using ``sys.stderr`` by default).
71+ :returns: The value returned by the callable.
72+
73+ .. seealso:: :func:`utah.timeout.timeout`
74+
75 """
76 try:
77 logmethod = kw.pop('logmethod')
78
79=== modified file 'utah/timeout.py'
80--- utah/timeout.py 2012-12-08 02:10:12 +0000
81+++ utah/timeout.py 2013-02-06 11:14:36 +0000
82@@ -25,18 +25,29 @@
83
84
85 class UTAHTimeout(UTAHException):
86- """
87- Provide a special exception to indicate an operation timed out.
88- """
89+ """Provide a special exception to indicate an operation timed out."""
90
91
92 # Mostly pulled from utah/client/common.py
93 # Inspired by:
94 # http://stackoverflow.com/a/3326559
95 def timeout(timeout, command, *args, **kw):
96- """
97- Run command with all other arguments for up to timeout seconds, after with
98- a UTAHTimeout exception is returned.
99+ """Run a command for up to ``timeout`` seconds.
100+
101+ :param timeout:
102+ Maximum amount of time to wait for the command to complete in seconds.
103+ :type timeout: int
104+ :param command:
105+ Command whose execution should complete before timeout expires.
106+ :type command: callable
107+ :param args: Positional arguments to be passed to the callable.
108+ :param kwargs: Keyword arguments to be passed to the callable.
109+ :returns: The value returned by the callable.
110+ :raises UTAHTimeout:
111+ If command execution hasn't finished before ``timeout`` seconds.
112+
113+ .. seealso:: :func:`utah.retry.retry`
114+
115 """
116 #TODO: Better support for nested timeouts.
117 if command is None:
118@@ -71,11 +82,20 @@
119
120
121 def subprocesstimeout(timeout, *args, **kw):
122- """
123- Pass all arguments except timeout to subprocess.Popen and run for timeout
124- seconds.
125- After the timeout is reached, kill all subprocesses and return a
126- UTAHTimeout exception.
127+ """Run command through ``subprocess.Popen`` for up to ``timeout`` seconds.
128+
129+ Command termination is checked using ``subprocess.Popen.poll``.
130+
131+ :param timeout:
132+ Maximum amount of time to wait for the command to complete in seconds.
133+ :type timeout: int
134+ :param args: Positional arguments to be passed to ``subprocess.Popen``.
135+ :param kwargs: Keyword arguments to be passed to the ``subprocess.Popen``.
136+ :returns: The subprocess object
137+ :rtype: ``subprocess.Popen``
138+ :raises UTAHTimeout:
139+ If command execution hasn't finished before ``timeout`` seconds.
140+
141 """
142 if args is None:
143 return
144@@ -118,8 +138,13 @@
145
146
147 def get_process_children(pid):
148- """
149- Find process children so they can be killed when the timeout expires.
150+ """Find process children so they can be killed when the timeout expires.
151+
152+ :param pid: Process ID for the parent process
153+ :type pid: int
154+ :returns: The pid for each children process
155+ :rtype: list(int)
156+
157 """
158 p = subprocess.Popen('ps --no-headers -o pid --ppid %d' % pid, shell=True,
159 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
160
161=== modified file 'utah/url.py'
162--- utah/url.py 2012-12-13 13:48:54 +0000
163+++ utah/url.py 2013-02-06 11:14:36 +0000
164@@ -24,6 +24,7 @@
165 import urllib2
166 import tempfile
167 import logging
168+import time
169 from urlparse import urlparse
170 from argparse import ArgumentTypeError
171
172@@ -32,6 +33,8 @@
173 import bzrlib.errors
174
175 from utah.exceptions import UTAHException
176+from utah.timeout import timeout
177+from utah.retry import retry
178
179
180 # Inspired by: http://stackoverflow.com/a/2070916/183066
181@@ -212,8 +215,25 @@
182 cmd = bzrlib.builtins.cmd_export()
183 assert cmd is not None
184 tmp_dir = tempfile.mkdtemp(prefix='utah_')
185+
186+ def bzr_export_retriable():
187+ """bzr export a URL retrying on http errors
188+
189+ This is a workaround to launchpad problems with http URLs that
190+ happen from time to time
191+
192+ """
193+ try:
194+ cmd.run(tmp_dir, url)
195+ except bzrlib.errors.InvalidHttpResponse as exception:
196+ # Separate every retry attempt with a timeout of 3 seconds
197+ time.sleep(3)
198+ raise UTAHException(exception.path, exception.msg, retry=True)
199+
200 try:
201- cmd.run(tmp_dir, url)
202+ # Retry bzr export on http errors for 60 seconds
203+ timeout(60, retry, bzr_export_retriable,
204+ logmethod=bzr_logger.debug)
205 except bzrlib.errors.BzrError as exception:
206 raise ArgumentTypeError('Bazaar export error: {}'
207 .format(exception))

Subscribers

People subscribed via source and target branches