Merge lp:~julian-edwards/launchpad/timeouts-feature-bug-681426 into lp:launchpad

Proposed by Julian Edwards
Status: Merged
Approved by: Jonathan Lange
Approved revision: no longer in the source branch.
Merged at revision: 11986
Proposed branch: lp:~julian-edwards/launchpad/timeouts-feature-bug-681426
Merge into: lp:launchpad
Diff against target: 100 lines (+34/-29)
3 files modified
lib/lp/buildmaster/model/builder.py (+16/-28)
lib/lp/buildmaster/tests/test_builder.py (+13/-0)
lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py (+5/-1)
To merge this branch: bzr merge lp:~julian-edwards/launchpad/timeouts-feature-bug-681426
Reviewer Review Type Date Requested Status
Jonathan Lange (community) Approve
Review via email: mp+41881@code.launchpad.net

Commit message

Work around a bug in builder Xen guests where it sometimes appears to drop the first network packet. We send a fake ping message before building.

Description of the change

See the bug for gory details.

I've also removed some unnecessary code that was checking for failures that could never happen at that part of the code. The failures are all caught in the same place in the manager code now.

bin/test -cvv test_builder test_virtual_job_dispatch_pings_before_building

To post a comment you must log in.
Revision history for this message
Jonathan Lange (jml) wrote :

Good to land. Thanks for the comment. Oh, you should probably link to the bug report in the comment.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/buildmaster/model/builder.py'
2--- lib/lp/buildmaster/model/builder.py 2010-11-18 17:54:06 +0000
3+++ lib/lp/buildmaster/model/builder.py 2010-11-25 17:40:47 +0000
4@@ -512,39 +512,27 @@
5 else:
6 d = defer.succeed(None)
7
8- def resume_done(ignored):
9+ def ping_done(ignored):
10 return self.current_build_behavior.dispatchBuildToSlave(
11 build_queue_item.id, logger)
12
13- def eb_slave_failure(failure):
14- failure.trap(BuildSlaveFailure)
15- e = failure.value
16- self.failBuilder(
17- "Exception (%s) when setting up to new job" % (e,))
18-
19- def eb_cannot_fetch_file(failure):
20- failure.trap(CannotFetchFile)
21- e = failure.value
22- message = """Slave '%s' (%s) was unable to fetch file.
23- ****** URL ********
24- %s
25- ****** INFO *******
26- %s
27- *******************
28- """ % (self.name, self.url, e.file_url, e.error_information)
29- raise BuildDaemonError(message)
30-
31- def eb_socket_error(failure):
32- failure.trap(socket.error)
33- e = failure.value
34- error_message = "Exception (%s) when setting up new job" % (e,)
35- d = self.handleTimeout(logger, error_message)
36- return d.addBoth(lambda ignored: failure)
37+ def resume_done(ignored):
38+ # Before we try and contact the resumed slave, we're going
39+ # to send it a message. This is to ensure it's accepting
40+ # packets from the outside world, because testing has shown
41+ # that the first packet will randomly fail for no apparent
42+ # reason. This could be a quirk of the Xen guest, we're not
43+ # sure. We also don't care about the result from this message,
44+ # just that it's sent, hence the "addBoth".
45+ # See bug 586359.
46+ if self.virtualized:
47+ d = self.slave.echo("ping")
48+ else:
49+ d = defer.succeed(None)
50+ d.addBoth(ping_done)
51+ return d
52
53 d.addCallback(resume_done)
54- d.addErrback(eb_slave_failure)
55- d.addErrback(eb_cannot_fetch_file)
56- d.addErrback(eb_socket_error)
57 return d
58
59 def failBuilder(self, reason):
60
61=== modified file 'lib/lp/buildmaster/tests/test_builder.py'
62--- lib/lp/buildmaster/tests/test_builder.py 2010-11-22 10:11:44 +0000
63+++ lib/lp/buildmaster/tests/test_builder.py 2010-11-25 17:40:47 +0000
64@@ -247,6 +247,19 @@
65 self.assertEqual(BuildStatus.BUILDING, build.status)
66 return d.addCallback(check_build_started)
67
68+ def test_virtual_job_dispatch_pings_before_building(self):
69+ # We need to send a ping to the builder to work around a bug
70+ # where sometimes the first network packet sent is dropped.
71+ builder, build = self._setupBinaryBuildAndBuilder()
72+ candidate = build.queueBuild()
73+ removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
74+ result=candidate)
75+ d = builder.findAndStartJob()
76+ def check_build_started(candidate):
77+ self.assertIn(
78+ ('echo', 'ping'), removeSecurityProxy(builder.slave).call_log)
79+ return d.addCallback(check_build_started)
80+
81 def test_slave(self):
82 # Builder.slave is a BuilderSlave that points at the actual Builder.
83 # The Builder is only ever used in scripts that run outside of the
84
85=== modified file 'lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py'
86--- lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2010-10-31 00:05:10 +0000
87+++ lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2010-11-25 17:40:47 +0000
88@@ -124,7 +124,11 @@
89 build_log = [
90 ('build', build_id, 'binarypackage', chroot.content.sha1,
91 filemap_names, extra_args)]
92- return upload_logs + build_log
93+ if builder.virtualized:
94+ result = [('echo', 'ping')] + upload_logs + build_log
95+ else:
96+ result = upload_logs + build_log
97+ return result
98
99 def startBuild(self, builder, candidate):
100 builder = removeSecurityProxy(builder)