Merge lp:~julian-edwards/launchpad/timeouts-feature-bug-681426 into lp:launchpad

Proposed by Julian Edwards
Status: Merged
Approved by: Jonathan Lange
Approved revision: no longer in the source branch.
Merged at revision: 11986
Proposed branch: lp:~julian-edwards/launchpad/timeouts-feature-bug-681426
Merge into: lp:launchpad
Diff against target: 100 lines (+34/-29)
3 files modified
lib/lp/buildmaster/model/builder.py (+16/-28)
lib/lp/buildmaster/tests/test_builder.py (+13/-0)
lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py (+5/-1)
To merge this branch: bzr merge lp:~julian-edwards/launchpad/timeouts-feature-bug-681426
Reviewer Review Type Date Requested Status
Jonathan Lange (community) Approve
Review via email: mp+41881@code.launchpad.net

Commit message

Work around a bug in builder Xen guests where it sometimes appears to drop the first network packet. We send a fake ping message before building.

Description of the change

See the bug for gory details.

I've also removed some unnecessary code that was checking for failures that could never happen at that part of the code. The failures are all caught in the same place in the manager code now.

bin/test -cvv test_builder test_virtual_job_dispatch_pings_before_building

To post a comment you must log in.
Jonathan Lange (jml) wrote :

Good to land. Thanks for the comment. Oh, you should probably link to the bug report in the comment.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/buildmaster/model/builder.py'
--- lib/lp/buildmaster/model/builder.py 2010-11-18 17:54:06 +0000
+++ lib/lp/buildmaster/model/builder.py 2010-11-25 17:40:47 +0000
@@ -512,39 +512,27 @@
512 else:512 else:
513 d = defer.succeed(None)513 d = defer.succeed(None)
514514
515 def resume_done(ignored):515 def ping_done(ignored):
516 return self.current_build_behavior.dispatchBuildToSlave(516 return self.current_build_behavior.dispatchBuildToSlave(
517 build_queue_item.id, logger)517 build_queue_item.id, logger)
518518
519 def eb_slave_failure(failure):519 def resume_done(ignored):
520 failure.trap(BuildSlaveFailure)520 # Before we try and contact the resumed slave, we're going
521 e = failure.value521 # to send it a message. This is to ensure it's accepting
522 self.failBuilder(522 # packets from the outside world, because testing has shown
523 "Exception (%s) when setting up to new job" % (e,))523 # that the first packet will randomly fail for no apparent
524524 # reason. This could be a quirk of the Xen guest, we're not
525 def eb_cannot_fetch_file(failure):525 # sure. We also don't care about the result from this message,
526 failure.trap(CannotFetchFile)526 # just that it's sent, hence the "addBoth".
527 e = failure.value527 # See bug 586359.
528 message = """Slave '%s' (%s) was unable to fetch file.528 if self.virtualized:
529 ****** URL ********529 d = self.slave.echo("ping")
530 %s530 else:
531 ****** INFO *******531 d = defer.succeed(None)
532 %s532 d.addBoth(ping_done)
533 *******************533 return d
534 """ % (self.name, self.url, e.file_url, e.error_information)
535 raise BuildDaemonError(message)
536
537 def eb_socket_error(failure):
538 failure.trap(socket.error)
539 e = failure.value
540 error_message = "Exception (%s) when setting up new job" % (e,)
541 d = self.handleTimeout(logger, error_message)
542 return d.addBoth(lambda ignored: failure)
543534
544 d.addCallback(resume_done)535 d.addCallback(resume_done)
545 d.addErrback(eb_slave_failure)
546 d.addErrback(eb_cannot_fetch_file)
547 d.addErrback(eb_socket_error)
548 return d536 return d
549537
550 def failBuilder(self, reason):538 def failBuilder(self, reason):
551539
=== modified file 'lib/lp/buildmaster/tests/test_builder.py'
--- lib/lp/buildmaster/tests/test_builder.py 2010-11-22 10:11:44 +0000
+++ lib/lp/buildmaster/tests/test_builder.py 2010-11-25 17:40:47 +0000
@@ -247,6 +247,19 @@
247 self.assertEqual(BuildStatus.BUILDING, build.status)247 self.assertEqual(BuildStatus.BUILDING, build.status)
248 return d.addCallback(check_build_started)248 return d.addCallback(check_build_started)
249249
250 def test_virtual_job_dispatch_pings_before_building(self):
251 # We need to send a ping to the builder to work around a bug
252 # where sometimes the first network packet sent is dropped.
253 builder, build = self._setupBinaryBuildAndBuilder()
254 candidate = build.queueBuild()
255 removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
256 result=candidate)
257 d = builder.findAndStartJob()
258 def check_build_started(candidate):
259 self.assertIn(
260 ('echo', 'ping'), removeSecurityProxy(builder.slave).call_log)
261 return d.addCallback(check_build_started)
262
250 def test_slave(self):263 def test_slave(self):
251 # Builder.slave is a BuilderSlave that points at the actual Builder.264 # Builder.slave is a BuilderSlave that points at the actual Builder.
252 # The Builder is only ever used in scripts that run outside of the265 # The Builder is only ever used in scripts that run outside of the
253266
=== modified file 'lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py'
--- lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2010-10-31 00:05:10 +0000
+++ lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2010-11-25 17:40:47 +0000
@@ -124,7 +124,11 @@
124 build_log = [124 build_log = [
125 ('build', build_id, 'binarypackage', chroot.content.sha1,125 ('build', build_id, 'binarypackage', chroot.content.sha1,
126 filemap_names, extra_args)]126 filemap_names, extra_args)]
127 return upload_logs + build_log127 if builder.virtualized:
128 result = [('echo', 'ping')] + upload_logs + build_log
129 else:
130 result = upload_logs + build_log
131 return result
128132
129 def startBuild(self, builder, candidate):133 def startBuild(self, builder, candidate):
130 builder = removeSecurityProxy(builder)134 builder = removeSecurityProxy(builder)