Merge lp:~wgrant/launchpad/bi-uB-no-bfjb into lp:launchpad

Proposed by William Grant
Status: Merged
Approved by: Steve Kowalik
Approved revision: no longer in the source branch.
Merged at revision: 16792
Proposed branch: lp:~wgrant/launchpad/bi-uB-no-bfjb
Merge into: lp:launchpad
Diff against target: 409 lines (+77/-153)
5 files modified
lib/lp/buildmaster/interactor.py (+50/-96)
lib/lp/buildmaster/interfaces/builder.py (+0/-8)
lib/lp/buildmaster/manager.py (+4/-4)
lib/lp/buildmaster/tests/test_interactor.py (+20/-43)
lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py (+3/-2)
To merge this branch: bzr merge lp:~wgrant/launchpad/bi-uB-no-bfjb
Reviewer Review Type Date Requested Status
Steve Kowalik (community) code Approve
Review via email: mp+188766@code.launchpad.net

Commit message

BuilderInteractor.updateBuild constructs a BuildFarmJobBehavior itself as necessary, while BuilderInteractor.rescueIfLost is given a cookie directly.

Description of the change

More BuilderInteractor refactorment. In this episode, updateBuild retrieves the Builder and constructs a BuildFarmJobBehavior itself only when necessary (on build completion), allowing the general case to avoid hitting the DB except for the BuildQueue.logtail write. Meanwhile, rescueIfLost takes the expected cookie rather than calculating it itself, so SlaveScanner can eventually cache the cookie without hitting the DB each time.

To post a comment you must log in.
Revision history for this message
Steve Kowalik (stevenk) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/buildmaster/interactor.py'
--- lib/lp/buildmaster/interactor.py 2013-10-01 00:38:38 +0000
+++ lib/lp/buildmaster/interactor.py 2013-10-02 05:56:53 +0000
@@ -25,7 +25,6 @@
25 BuildDaemonError,25 BuildDaemonError,
26 CannotFetchFile,26 CannotFetchFile,
27 CannotResumeHost,27 CannotResumeHost,
28 CorruptBuildCookie,
29 )28 )
30from lp.buildmaster.interfaces.buildfarmjobbehavior import (29from lp.buildmaster.interfaces.buildfarmjobbehavior import (
31 IBuildFarmJobBehavior,30 IBuildFarmJobBehavior,
@@ -267,29 +266,15 @@
267 status['logtail'] = status_sentence[2]266 status['logtail'] = status_sentence[2]
268 defer.returnValue((status_sentence, status))267 defer.returnValue((status_sentence, status))
269268
270 @staticmethod
271 def verifySlaveBuildCookie(behavior, slave_cookie):
272 """See `IBuildFarmJobBehavior`."""
273 if behavior is None:
274 if slave_cookie is not None:
275 raise CorruptBuildCookie('Slave building when should be idle.')
276 else:
277 good_cookie = behavior.getBuildCookie()
278 if slave_cookie != good_cookie:
279 raise CorruptBuildCookie(
280 "Invalid slave build cookie: got %r, expected %r."
281 % (slave_cookie, good_cookie))
282
283 @classmethod269 @classmethod
284 @defer.inlineCallbacks270 @defer.inlineCallbacks
285 def rescueIfLost(cls, vitals, slave, behavior, logger=None):271 def rescueIfLost(cls, vitals, slave, expected_cookie, logger=None):
286 """Reset the slave if its job information doesn't match the DB.272 """Reset the slave if its job information doesn't match the DB.
287273
288 This checks the build ID reported in the slave status against the274 This checks the build ID reported in the slave status against
289 database. If it isn't building what we think it should be, the current275 the given cookie. If it isn't building what we think it should
290 build will be aborted and the slave cleaned in preparation for a new276 be, the current build will be aborted and the slave cleaned in
291 task. The decision about the slave's correctness is left up to277 preparation for a new task.
292 `IBuildFarmJobBehavior.verifySlaveBuildCookie`.
293278
294 :return: A Deferred that fires when the dialog with the slave is279 :return: A Deferred that fires when the dialog with the slave is
295 finished. Its return value is True if the slave is lost,280 finished. Its return value is True if the slave is lost,
@@ -315,14 +300,12 @@
315 else:300 else:
316 slave_cookie = status_sentence[ident_position[status]]301 slave_cookie = status_sentence[ident_position[status]]
317302
318 # verifySlaveBuildCookie will raise CorruptBuildCookie if the303 if slave_cookie == expected_cookie:
319 # slave cookie doesn't match the expected one, including304 # The master and slave agree about the current job. Continue.
320 # verifying that the slave cookie is None iff we expect the
321 # slave to be idle.
322 try:
323 cls.verifySlaveBuildCookie(behavior, slave_cookie)
324 defer.returnValue(False)305 defer.returnValue(False)
325 except CorruptBuildCookie as reason:306 else:
307 # The master and slave disagree. The master is our master,
308 # so try to rescue the slave.
326 # An IDLE slave doesn't need rescuing (SlaveScanner.scan309 # An IDLE slave doesn't need rescuing (SlaveScanner.scan
327 # will rescue the DB side instead), and we just have to wait310 # will rescue the DB side instead), and we just have to wait
328 # out an ABORTING one.311 # out an ABORTING one.
@@ -332,8 +315,8 @@
332 yield slave.abort()315 yield slave.abort()
333 if logger:316 if logger:
334 logger.info(317 logger.info(
335 "Builder '%s' rescued from '%s': '%s'" %318 "Builder slave '%s' rescued from %r (expected %r)." %
336 (vitals.name, slave_cookie, reason))319 (vitals.name, slave_cookie, expected_cookie))
337 defer.returnValue(True)320 defer.returnValue(True)
338321
339 @classmethod322 @classmethod
@@ -474,9 +457,22 @@
474 candidate, vitals, builder, slave, new_behavior, logger)457 candidate, vitals, builder, slave, new_behavior, logger)
475 defer.returnValue(candidate)458 defer.returnValue(candidate)
476459
460 @staticmethod
461 def extractBuildStatus(status_dict):
462 """Read build status name.
463
464 :param status_dict: build status dict from slaveStatus.
465 :return: the unqualified status name, e.g. "OK".
466 """
467 status_string = status_dict['build_status']
468 lead_string = 'BuildStatus.'
469 assert status_string.startswith(lead_string), (
470 "Malformed status string: '%s'" % status_string)
471 return status_string[len(lead_string):]
472
477 @classmethod473 @classmethod
478 @defer.inlineCallbacks474 @defer.inlineCallbacks
479 def updateBuild(cls, queueItem, slave, behavior):475 def updateBuild(cls, vitals, slave, builders_cache, behavior_factory):
480 """Verify the current build job status.476 """Verify the current build job status.
481477
482 Perform the required actions for each state.478 Perform the required actions for each state.
@@ -487,76 +483,34 @@
487 # impossible to get past rescueIfLost unless the slave matches483 # impossible to get past rescueIfLost unless the slave matches
488 # the DB, and this method isn't called unless the DB says484 # the DB, and this method isn't called unless the DB says
489 # there's a job.485 # there's a job.
490 builder_status_handlers = {
491 'BuilderStatus.BUILDING': cls.updateBuild_BUILDING,
492 'BuilderStatus.ABORTING': cls.updateBuild_ABORTING,
493 'BuilderStatus.WAITING': cls.updateBuild_WAITING,
494 }
495 statuses = yield cls.slaveStatus(slave)486 statuses = yield cls.slaveStatus(slave)
496 logger = logging.getLogger('slave-scanner')
497 status_sentence, status_dict = statuses487 status_sentence, status_dict = statuses
498 builder_status = status_dict['builder_status']488 builder_status = status_dict['builder_status']
499 if builder_status not in builder_status_handlers:489 if builder_status == 'BuilderStatus.BUILDING':
490 # Build still building, collect the logtail.
491 if vitals.build_queue.job.status != JobStatus.RUNNING:
492 # XXX: This check should be removed once we confirm it's
493 # not regularly hit.
494 raise AssertionError(
495 "Job not running when assigned and slave building.")
496 vitals.build_queue.logtail = encoding.guess(
497 str(status_dict.get('logtail')))
498 transaction.commit()
499 elif builder_status == 'BuilderStatus.ABORTING':
500 # Build is being aborted.
501 vitals.build_queue.logtail = (
502 "Waiting for slave process to be terminated")
503 transaction.commit()
504 elif builder_status == 'BuilderStatus.WAITING':
505 # Build has finished. Delegate handling to the build itself.
506 builder = builders_cache[vitals.name]
507 behavior = behavior_factory(vitals.build_queue, builder, slave)
508 behavior.updateSlaveStatus(status_sentence, status_dict)
509 yield behavior.handleStatus(
510 vitals.build_queue, cls.extractBuildStatus(status_dict),
511 status_dict)
512 else:
500 raise AssertionError("Unknown status %s" % builder_status)513 raise AssertionError("Unknown status %s" % builder_status)
501 method = builder_status_handlers[builder_status]
502 yield method(
503 queueItem, behavior, status_sentence, status_dict, logger)
504
505 @staticmethod
506 def updateBuild_BUILDING(queueItem, behavior, status_sentence, status_dict,
507 logger):
508 """Build still building, collect the logtail"""
509 if queueItem.job.status != JobStatus.RUNNING:
510 queueItem.job.start()
511 queueItem.logtail = encoding.guess(str(status_dict.get('logtail')))
512 transaction.commit()
513
514 @staticmethod
515 def updateBuild_ABORTING(queueItem, behavior, status_sentence, status_dict,
516 logger):
517 """Build was ABORTED.
518
519 Master-side should wait until the slave finish the process correctly.
520 """
521 queueItem.logtail = "Waiting for slave process to be terminated"
522 transaction.commit()
523
524 @staticmethod
525 def extractBuildStatus(status_dict):
526 """Read build status name.
527
528 :param status_dict: build status dict as passed to the
529 updateBuild_* methods.
530 :return: the unqualified status name, e.g. "OK".
531 """
532 status_string = status_dict['build_status']
533 lead_string = 'BuildStatus.'
534 assert status_string.startswith(lead_string), (
535 "Malformed status string: '%s'" % status_string)
536
537 return status_string[len(lead_string):]
538
539 @classmethod
540 def updateBuild_WAITING(cls, queueItem, behavior, status_sentence,
541 status_dict, logger):
542 """Perform the actions needed for a slave in a WAITING state
543
544 Buildslave can be WAITING in five situations:
545
546 * Build has failed, no filemap is received (PACKAGEFAIL, DEPFAIL,
547 CHROOTFAIL, BUILDERFAIL,
548 ABORTED)
549
550 * Build has been built successfully (BuildStatus.OK), in this case
551 we have a 'filemap', so we can retrieve those files and store in
552 Librarian with getFileFromSlave() and then pass the binaries to
553 the uploader for processing.
554 """
555 behavior.updateSlaveStatus(
556 status_sentence, status_dict)
557 d = behavior.handleStatus(
558 queueItem, cls.extractBuildStatus(status_dict), status_dict)
559 return d
560514
561 @staticmethod515 @staticmethod
562 def _getSlaveScannerLogger():516 def _getSlaveScannerLogger():
563517
=== modified file 'lib/lp/buildmaster/interfaces/builder.py'
--- lib/lp/buildmaster/interfaces/builder.py 2013-09-02 08:11:58 +0000
+++ lib/lp/buildmaster/interfaces/builder.py 2013-10-02 05:56:53 +0000
@@ -7,7 +7,6 @@
77
8__all__ = [8__all__ = [
9 'BuildDaemonError',9 'BuildDaemonError',
10 'CorruptBuildCookie',
11 'BuildSlaveFailure',10 'BuildSlaveFailure',
12 'CannotBuild',11 'CannotBuild',
13 'CannotFetchFile',12 'CannotFetchFile',
@@ -71,13 +70,6 @@
71 """The build slave had a protocol version. This is a serious error."""70 """The build slave had a protocol version. This is a serious error."""
7271
7372
74class CorruptBuildCookie(BuildDaemonError):
75 """The build slave is working with mismatched information.
76
77 It needs to be rescued.
78 """
79
80
81class CannotResumeHost(BuildDaemonError):73class CannotResumeHost(BuildDaemonError):
82 """The build slave virtual machine cannot be resumed."""74 """The build slave virtual machine cannot be resumed."""
8375
8476
=== modified file 'lib/lp/buildmaster/manager.py'
--- lib/lp/buildmaster/manager.py 2013-09-30 08:12:17 +0000
+++ lib/lp/buildmaster/manager.py 2013-10-02 05:56:53 +0000
@@ -296,8 +296,9 @@
296 return296 return
297 behavior = self.behavior_factory(297 behavior = self.behavior_factory(
298 vitals.build_queue, builder, slave)298 vitals.build_queue, builder, slave)
299 db_cookie = behavior.getBuildCookie() if behavior else None
299 lost = yield interactor.rescueIfLost(300 lost = yield interactor.rescueIfLost(
300 vitals, slave, behavior, self.logger)301 vitals, slave, db_cookie, self.logger)
301 if lost:302 if lost:
302 lost_reason = '%s is lost' % vitals.name303 lost_reason = '%s is lost' % vitals.name
303304
@@ -320,9 +321,8 @@
320 if vitals.build_queue is not None:321 if vitals.build_queue is not None:
321 # Scan the slave and get the logtail, or collect the build322 # Scan the slave and get the logtail, or collect the build
322 # if it's ready. Yes, "updateBuild" is a bad name.323 # if it's ready. Yes, "updateBuild" is a bad name.
323 behavior = behavior or self.behavior_factory(324 yield interactor.updateBuild(
324 vitals.build_queue, builder, slave)325 vitals, slave, self.builders_cache, self.behavior_factory)
325 yield interactor.updateBuild(vitals.build_queue, slave, behavior)
326 elif vitals.manual:326 elif vitals.manual:
327 # If the builder is in manual mode, don't dispatch anything.327 # If the builder is in manual mode, don't dispatch anything.
328 self.logger.debug(328 self.logger.debug(
329329
=== modified file 'lib/lp/buildmaster/tests/test_interactor.py'
--- lib/lp/buildmaster/tests/test_interactor.py 2013-10-01 01:07:35 +0000
+++ lib/lp/buildmaster/tests/test_interactor.py 2013-10-02 05:56:53 +0000
@@ -30,7 +30,6 @@
30from lp.buildmaster.interfaces.builder import (30from lp.buildmaster.interfaces.builder import (
31 CannotFetchFile,31 CannotFetchFile,
32 CannotResumeHost,32 CannotResumeHost,
33 CorruptBuildCookie,
34 )33 )
35from lp.buildmaster.tests.mock_slaves import (34from lp.buildmaster.tests.mock_slaves import (
36 AbortingSlave,35 AbortingSlave,
@@ -40,7 +39,6 @@
40 MockBuilder,39 MockBuilder,
41 OkSlave,40 OkSlave,
42 SlaveTestHelpers,41 SlaveTestHelpers,
43 TrivialBehavior,
44 WaitingSlave,42 WaitingSlave,
45 )43 )
46from lp.services.config import config44from lp.services.config import config
@@ -78,38 +76,6 @@
78 self.assertRaises(76 self.assertRaises(
79 AssertionError, BuilderInteractor.extractBuildStatus, slave_status)77 AssertionError, BuilderInteractor.extractBuildStatus, slave_status)
8078
81 def test_verifySlaveBuildCookie_building_match(self):
82 BuilderInteractor.verifySlaveBuildCookie(TrivialBehavior(), 'trivial')
83
84 def test_verifySlaveBuildCookie_building_mismatch(self):
85 self.assertRaises(
86 CorruptBuildCookie,
87 BuilderInteractor.verifySlaveBuildCookie,
88 TrivialBehavior(), 'difficult')
89
90 def test_verifySlaveBuildCookie_idle_match(self):
91 BuilderInteractor.verifySlaveBuildCookie(None, None)
92
93 def test_verifySlaveBuildCookie_idle_mismatch(self):
94 self.assertRaises(
95 CorruptBuildCookie,
96 BuilderInteractor.verifySlaveBuildCookie, None, 'foo')
97
98 def test_rescueIfLost_aborts_lost_and_broken_slave(self):
99 # A slave that's 'lost' should be aborted; when the slave is
100 # broken then abort() should also throw a fault.
101 slave = LostBuildingBrokenSlave()
102 d = BuilderInteractor.rescueIfLost(
103 extract_vitals_from_db(MockBuilder()), slave, TrivialBehavior())
104
105 def check_slave_status(failure):
106 self.assertIn('abort', slave.call_log)
107 # 'Fault' comes from the LostBuildingBrokenSlave, this is
108 # just testing that the value is passed through.
109 self.assertIsInstance(failure.value, xmlrpclib.Fault)
110
111 return d.addBoth(check_slave_status)
112
113 def resumeSlaveHost(self, builder):79 def resumeSlaveHost(self, builder):
114 vitals = extract_vitals_from_db(builder)80 vitals = extract_vitals_from_db(builder)
115 return BuilderInteractor.resumeSlaveHost(81 return BuilderInteractor.resumeSlaveHost(
@@ -183,6 +149,21 @@
183 slave = BuilderInteractor.makeSlaveFromVitals(vitals)149 slave = BuilderInteractor.makeSlaveFromVitals(vitals)
184 self.assertEqual(5, slave.timeout)150 self.assertEqual(5, slave.timeout)
185151
152 def test_rescueIfLost_aborts_lost_and_broken_slave(self):
153 # A slave that's 'lost' should be aborted; when the slave is
154 # broken then abort() should also throw a fault.
155 slave = LostBuildingBrokenSlave()
156 d = BuilderInteractor.rescueIfLost(
157 extract_vitals_from_db(MockBuilder()), slave, 'trivial')
158
159 def check_slave_status(failure):
160 self.assertIn('abort', slave.call_log)
161 # 'Fault' comes from the LostBuildingBrokenSlave, this is
162 # just testing that the value is passed through.
163 self.assertIsInstance(failure.value, xmlrpclib.Fault)
164
165 return d.addBoth(check_slave_status)
166
186 @defer.inlineCallbacks167 @defer.inlineCallbacks
187 def test_recover_idle_slave(self):168 def test_recover_idle_slave(self):
188 # An idle slave is not rescued, even if it's not meant to be169 # An idle slave is not rescued, even if it's not meant to be
@@ -190,7 +171,7 @@
190 # we still report that it's lost.171 # we still report that it's lost.
191 slave = OkSlave()172 slave = OkSlave()
192 lost = yield BuilderInteractor.rescueIfLost(173 lost = yield BuilderInteractor.rescueIfLost(
193 extract_vitals_from_db(MockBuilder()), slave, TrivialBehavior())174 extract_vitals_from_db(MockBuilder()), slave, 'trivial')
194 self.assertTrue(lost)175 self.assertTrue(lost)
195 self.assertEqual([], slave.call_log)176 self.assertEqual([], slave.call_log)
196177
@@ -209,8 +190,7 @@
209 # WAITING.190 # WAITING.
210 waiting_slave = WaitingSlave(build_id='trivial')191 waiting_slave = WaitingSlave(build_id='trivial')
211 lost = yield BuilderInteractor.rescueIfLost(192 lost = yield BuilderInteractor.rescueIfLost(
212 extract_vitals_from_db(MockBuilder()), waiting_slave,193 extract_vitals_from_db(MockBuilder()), waiting_slave, 'trivial')
213 TrivialBehavior())
214 self.assertFalse(lost)194 self.assertFalse(lost)
215 self.assertEqual(['status'], waiting_slave.call_log)195 self.assertEqual(['status'], waiting_slave.call_log)
216196
@@ -223,8 +203,7 @@
223 # discarded.203 # discarded.
224 waiting_slave = WaitingSlave(build_id='non-trivial')204 waiting_slave = WaitingSlave(build_id='non-trivial')
225 lost = yield BuilderInteractor.rescueIfLost(205 lost = yield BuilderInteractor.rescueIfLost(
226 extract_vitals_from_db(MockBuilder()), waiting_slave,206 extract_vitals_from_db(MockBuilder()), waiting_slave, 'trivial')
227 TrivialBehavior())
228 self.assertTrue(lost)207 self.assertTrue(lost)
229 self.assertEqual(['status', 'clean'], waiting_slave.call_log)208 self.assertEqual(['status', 'clean'], waiting_slave.call_log)
230209
@@ -234,8 +213,7 @@
234 # BUILDING.213 # BUILDING.
235 building_slave = BuildingSlave(build_id='trivial')214 building_slave = BuildingSlave(build_id='trivial')
236 lost = yield BuilderInteractor.rescueIfLost(215 lost = yield BuilderInteractor.rescueIfLost(
237 extract_vitals_from_db(MockBuilder()), building_slave,216 extract_vitals_from_db(MockBuilder()), building_slave, 'trivial')
238 TrivialBehavior())
239 self.assertFalse(lost)217 self.assertFalse(lost)
240 self.assertEqual(['status'], building_slave.call_log)218 self.assertEqual(['status'], building_slave.call_log)
241219
@@ -245,8 +223,7 @@
245 # abort the build, thus stopping it in its tracks.223 # abort the build, thus stopping it in its tracks.
246 building_slave = BuildingSlave(build_id='non-trivial')224 building_slave = BuildingSlave(build_id='non-trivial')
247 lost = yield BuilderInteractor.rescueIfLost(225 lost = yield BuilderInteractor.rescueIfLost(
248 extract_vitals_from_db(MockBuilder()), building_slave,226 extract_vitals_from_db(MockBuilder()), building_slave, 'trivial')
249 TrivialBehavior())
250 self.assertTrue(lost)227 self.assertTrue(lost)
251 self.assertEqual(['status', 'abort'], building_slave.call_log)228 self.assertEqual(['status', 'abort'], building_slave.call_log)
252229
253230
=== modified file 'lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py'
--- lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2013-09-30 08:12:17 +0000
+++ lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2013-10-02 05:56:53 +0000
@@ -36,6 +36,7 @@
36 TestGetUploadMethodsMixin,36 TestGetUploadMethodsMixin,
37 TestHandleStatusMixin,37 TestHandleStatusMixin,
38 )38 )
39from lp.buildmaster.tests.test_manager import MockBuildersCache
39from lp.registry.interfaces.pocket import (40from lp.registry.interfaces.pocket import (
40 PackagePublishingPocket,41 PackagePublishingPocket,
41 pocketsuffix,42 pocketsuffix,
@@ -338,9 +339,9 @@
338 self.addCleanup(self._cleanup)339 self.addCleanup(self._cleanup)
339340
340 def updateBuild(self, candidate, slave):341 def updateBuild(self, candidate, slave):
342 bc = MockBuildersCache(self.builder, candidate)
341 return self.interactor.updateBuild(343 return self.interactor.updateBuild(
342 candidate, slave,344 bc.getVitals('foo'), slave, bc, self.interactor.getBuildBehavior)
343 self.interactor.getBuildBehavior(candidate, self.builder, slave))
344345
345 def assertBuildProperties(self, build):346 def assertBuildProperties(self, build):
346 """Check that a build happened by making sure some of its properties347 """Check that a build happened by making sure some of its properties