Merge lp:~javier.collado/utah/bug1169510 into lp:utah

Proposed by Javier Collado
Status: Merged
Approved by: Javier Collado
Approved revision: 876
Merged at revision: 866
Proposed branch: lp:~javier.collado/utah/bug1169510
Merge into: lp:utah
Diff against target: 253 lines (+60/-54)
7 files modified
debian/changelog (+3/-0)
examples/run_install_test.py (+0/-7)
examples/run_utah_tests.py (+19/-19)
utah/provisioning/provisioning.py (+1/-1)
utah/provisioning/ssh.py (+24/-8)
utah/run.py (+11/-16)
utah/timeout.py (+2/-3)
To merge this branch: bzr merge lp:~javier.collado/utah/bug1169510
Reviewer Review Type Date Requested Status
Max Brustkern (community) Approve
Andy Doan (community) Approve
Review via email: mp+159646@code.launchpad.net

Description of the change

The changes in this branch are intended to make sure that timeouts are timely
handled when the utah client is running through SSH.

The problem here is that the SIGALRM isn't called until the call to the client
is finished (which might take hours) as explained in the following bug:
http://bugs.python.org/issue9504

To fix the problem, a timeout is set in the ssh socket, so that the call to the
client is interrupted in case of a timeout instead of waiting for the client to
finish before calling the signal handler.

In addition to this, there are a few exception related changes to make sure
that UTAHTimeout isn't caught by accident (making it a subclass of SystemExit)
and also to make sure that error messages are printed after cleanup messages to
make easier to troubleshoot problems.

To post a comment you must log in.
Revision history for this message
Andy Doan (doanac) wrote :

LGTM - didn't know about SystemExit, so I've learned something new.

review: Approve
Revision history for this message
Max Brustkern (nuclearbob) wrote :

This looks great, it'll simplify some things nicely.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'debian/changelog'
2--- debian/changelog 2013-04-17 08:26:37 +0000
3+++ debian/changelog 2013-04-18 14:31:13 +0000
4@@ -4,6 +4,9 @@
5 * Check squashfs on the server images as well during ISO static
6 validation.
7
8+ [ Javier Collado ]
9+ * Use socket timeout in SSH commands instead of SIGALRM (LP: #1169510)
10+
11 -- Max Brustkern <max@canonical.com> Tue, 16 Apr 2013 09:21:19 -0400
12
13 utah (0.10ubuntu1) UNRELEASED; urgency=low
14
15=== modified file 'examples/run_install_test.py'
16--- examples/run_install_test.py 2013-04-10 15:34:49 +0000
17+++ examples/run_install_test.py 2013-04-18 14:31:13 +0000
18@@ -81,13 +81,6 @@
19 preseed=args.preseed, rewrite=args.rewrite,
20 series=args.series, xml=args.xml)
21 exitstatus, locallogs = run_tests(args, machine)
22-
23- except UTAHException as error:
24- sys.stderr.write('Exception: ' + str(error))
25- exitstatus = ReturnCodes.UTAH_EXCEPTION_ERROR
26- except Exception:
27- logging.exception('Unhandled error in UTAH')
28- exitstatus = ReturnCodes.UNHANDLED_ERROR
29 finally:
30 if not args.no_destroy and machine is not None:
31 try:
32
33=== modified file 'examples/run_utah_tests.py'
34--- examples/run_utah_tests.py 2013-04-04 15:39:49 +0000
35+++ examples/run_utah_tests.py 2013-04-18 14:31:13 +0000
36@@ -21,6 +21,7 @@
37 import argparse
38 import logging
39 import sys
40+from traceback import format_exception
41
42 from utah import config
43 from utah.cleanup import cleanup
44@@ -92,23 +93,16 @@
45 def run_provisioned_tests(args):
46 """Run test cases in a provisioned machine."""
47 locallogs = []
48- exitstatus = ReturnCodes.SUCCESS
49 try:
50 # TBD: Inventory should be used to verify machine
51 # is not running other tests
52 machine = ProvisionedMachine(name=args.name)
53 exitstatus, locallogs = run_tests(args, machine)
54- except UTAHException as error:
55- sys.stderr.write('Exception: ' + str(error))
56- exitstatus = ReturnCodes.UTAH_EXCEPTION_ERROR
57- except Exception:
58- logging.exception('Unhandled error in UTAH')
59- exitstatus = ReturnCodes.UNHANDLED_ERROR
60 finally:
61 if len(locallogs) != 0:
62 print('Test logs copied to the following files:')
63 print("\t" + "\n\t".join(locallogs))
64- sys.exit(exitstatus)
65+ sys.exit(ReturnCodes.SUCCESS)
66
67 function = run_provisioned_tests
68 if args.arch is not None and 'arm' in args.arch:
69@@ -124,16 +118,22 @@
70
71 if __name__ == '__main__':
72 try:
73- if isinstance(config.jobtimeout, int):
74- try:
75+ try:
76+ if isinstance(config.jobtimeout, int):
77 timeout(config.jobtimeout, run_utah_tests)
78- except UTAHTimeout:
79- sys.stderr.write('Job failed to complete after '
80- + str(config.jobtimeout) + ' seconds.')
81- sys.exit(ReturnCodes.TIMEOUT_ERROR)
82- else:
83+ else:
84+ run_utah_tests()
85+ except AttributeError:
86 run_utah_tests()
87- except AttributeError:
88- run_utah_tests()
89- finally:
90- cleanup.run()
91+ finally:
92+ cleanup.run()
93+ except UTAHTimeout as exception:
94+ sys.stderr.write('UTAH timeout: {}'.format(exception.message))
95+ sys.exit(ReturnCodes.TIMEOUT_ERROR)
96+ except UTAHException as error:
97+ sys.stderr.write('UTAH exception: '.format(error))
98+ sys.exit(ReturnCodes.UTAH_EXCEPTION_ERROR)
99+ except Exception:
100+ sys.stderr.write('Unhandled error in UTAH:\n{}'
101+ .format(''.join(format_exception(*sys.exc_info()))))
102+ sys.exit(ReturnCodes.UNHANDLED_ERROR)
103
104=== modified file 'utah/provisioning/provisioning.py'
105--- utah/provisioning/provisioning.py 2013-04-08 19:16:05 +0000
106+++ utah/provisioning/provisioning.py 2013-04-18 14:31:13 +0000
107@@ -315,7 +315,7 @@
108 try:
109 utah.timeout.timeout(config.installtimeout, self._provision)
110 except utah.timeout.UTAHTimeout:
111- raise UTAHProvisioningException(
112+ raise utah.timeout.UTAHTimeout(
113 'Machine install timed out. '
114 'Consider adjusting timeout values in '
115 '/etc/utah/conf, ~/.utah/conf, or '
116
117=== modified file 'utah/provisioning/ssh.py'
118--- utah/provisioning/ssh.py 2013-04-04 17:07:22 +0000
119+++ utah/provisioning/ssh.py 2013-04-18 14:31:13 +0000
120@@ -21,6 +21,7 @@
121 import logging
122 import os
123 import paramiko
124+import signal
125 import socket
126
127 from stat import S_ISDIR
128@@ -57,7 +58,7 @@
129 self.ssh_client = ssh_client
130 self.ssh_logger = logging.getLogger('ssh')
131
132- def run(self, command, quiet=False, root=False, timeout=None):
133+ def run(self, command, quiet=False, root=False, command_timeout=None):
134 """Run a command through SSH.
135
136 :param command: Command to execute on the remote machine
137@@ -70,11 +71,11 @@
138 Run command as root user when this flag is set. Otherwise, use the
139 user specified in the configuration file.
140 :type root: bool
141- :param timeout:
142+ :param command_timeout:
143 Amount of time in seconds to wait for the command to complete.
144
145 .. seealso:: :func:`utah.timeout.timeout`
146- :type timeout: int
147+ :type command_timeout: int
148
149 """
150 if isinstance(command, basestring):
151@@ -107,11 +108,26 @@
152 .format(commandstring))
153 stdout = channel.makefile('rb')
154 stderr = channel.makefile_stderr('rb')
155- if timeout is None:
156- channel.exec_command(commandstring)
157- else:
158- utah.timeout.timeout(timeout, channel.exec_command,
159- commandstring)
160+
161+ # Get remaining SIGALRM timeout
162+ alarm_timeout = signal.alarm(0)
163+ signal.alarm(alarm_timeout)
164+
165+ # Discard 0 or None timeout values
166+ timeouts = filter(None, [command_timeout, alarm_timeout])
167+ if timeouts:
168+ timeout = min(timeouts)
169+ channel.settimeout(timeout)
170+
171+ channel.exec_command(commandstring)
172+ try:
173+ # Make sure socket.timeout is raised
174+ channel.recv(0)
175+ except socket.timeout:
176+ raise utah.timeout.UTAHTimeout(
177+ 'SSH command timed out {!r} after {} seconds'
178+ .format(commandstring, timeout))
179+
180 retval = channel.recv_exit_status()
181
182 except paramiko.BadHostKeyException as err:
183
184=== modified file 'utah/run.py'
185--- utah/run.py 2013-04-17 08:26:37 +0000
186+++ utah/run.py 2013-04-18 14:31:13 +0000
187@@ -24,7 +24,6 @@
188 import socket
189 import sys
190 import time
191-import traceback
192 import urllib
193
194 import jsonschema
195@@ -341,21 +340,17 @@
196 .format(report_type, machine.installtype))
197
198 locallogs = []
199- try:
200- machine.installclient()
201- except Exception:
202- traceback.print_exc()
203- exitstatus = 1
204- else:
205- # Server will return success code only if the execution
206- # of every runlist was successful
207- exitstatus = _run(args.runlist)
208-
209- if args.files is not None:
210- try:
211- locallogs += getfiles(args, machine)
212- except UTAHException as err:
213- logging.warning('Failed to download files: %s', str(err))
214+ machine.installclient()
215+
216+ # Server will return success code only if the execution
217+ # of every runlist was successful
218+ exitstatus = _run(args.runlist)
219+
220+ if args.files is not None:
221+ try:
222+ locallogs += getfiles(args, machine)
223+ except UTAHException as err:
224+ logging.warning('Failed to download files: %s', str(err))
225
226 # Provisioned systems have an image already installed
227 # and the preseed file is no longer available
228
229=== modified file 'utah/timeout.py'
230--- utah/timeout.py 2013-04-03 21:17:26 +0000
231+++ utah/timeout.py 2013-04-18 14:31:13 +0000
232@@ -21,11 +21,10 @@
233 import subprocess
234 import sys
235
236-from utah.exceptions import UTAHException
237 from utah.commandstr import commandstr
238
239
240-class UTAHTimeout(UTAHException):
241+class UTAHTimeout(SystemExit):
242
243 """Provide a special exception to indicate an operation timed out."""
244
245@@ -83,7 +82,7 @@
246 return retval
247 except UTAHTimeout:
248 raise UTAHTimeout(
249- '{} timed out after {} seconds'
250+ '{} timed out after {} seconds\n'
251 .format(commandstr(command, *args, **kw), str(timeout)))
252 else:
253 raise UTAHTimeout('0 timeout specified')

Subscribers

People subscribed via source and target branches