Merge lp:~wgrant/launchpad/bi-uB-no-bfjb into lp:launchpad

Proposed by William Grant
Status: Merged
Approved by: Steve Kowalik
Approved revision: no longer in the source branch.
Merged at revision: 16792
Proposed branch: lp:~wgrant/launchpad/bi-uB-no-bfjb
Merge into: lp:launchpad
Diff against target: 409 lines (+77/-153)
5 files modified
lib/lp/buildmaster/interactor.py (+50/-96)
lib/lp/buildmaster/interfaces/builder.py (+0/-8)
lib/lp/buildmaster/manager.py (+4/-4)
lib/lp/buildmaster/tests/test_interactor.py (+20/-43)
lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py (+3/-2)
To merge this branch: bzr merge lp:~wgrant/launchpad/bi-uB-no-bfjb
Reviewer Review Type Date Requested Status
Steve Kowalik (community) code Approve
Review via email: mp+188766@code.launchpad.net

Commit message

BuilderInteractor.updateBuild constructs a BuildFarmJobBehavior itself as necessary, while BuilderInteractor.rescueIfLost is given a cookie directly.

Description of the change

More BuilderInteractor refactorment. In this episode, updateBuild retrieves the Builder and constructs a BuildFarmJobBehavior itself only when necessary (on build completion), allowing the general case to avoid hitting the DB except for the BuildQueue.logtail write. Meanwhile, rescueIfLost takes the expected cookie rather than calculating it itself, so SlaveScanner can eventually cache the cookie without hitting the DB each time.

To post a comment you must log in.
Revision history for this message
Steve Kowalik (stevenk) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/buildmaster/interactor.py'
2--- lib/lp/buildmaster/interactor.py 2013-10-01 00:38:38 +0000
3+++ lib/lp/buildmaster/interactor.py 2013-10-02 05:56:53 +0000
4@@ -25,7 +25,6 @@
5 BuildDaemonError,
6 CannotFetchFile,
7 CannotResumeHost,
8- CorruptBuildCookie,
9 )
10 from lp.buildmaster.interfaces.buildfarmjobbehavior import (
11 IBuildFarmJobBehavior,
12@@ -267,29 +266,15 @@
13 status['logtail'] = status_sentence[2]
14 defer.returnValue((status_sentence, status))
15
16- @staticmethod
17- def verifySlaveBuildCookie(behavior, slave_cookie):
18- """See `IBuildFarmJobBehavior`."""
19- if behavior is None:
20- if slave_cookie is not None:
21- raise CorruptBuildCookie('Slave building when should be idle.')
22- else:
23- good_cookie = behavior.getBuildCookie()
24- if slave_cookie != good_cookie:
25- raise CorruptBuildCookie(
26- "Invalid slave build cookie: got %r, expected %r."
27- % (slave_cookie, good_cookie))
28-
29 @classmethod
30 @defer.inlineCallbacks
31- def rescueIfLost(cls, vitals, slave, behavior, logger=None):
32+ def rescueIfLost(cls, vitals, slave, expected_cookie, logger=None):
33 """Reset the slave if its job information doesn't match the DB.
34
35- This checks the build ID reported in the slave status against the
36- database. If it isn't building what we think it should be, the current
37- build will be aborted and the slave cleaned in preparation for a new
38- task. The decision about the slave's correctness is left up to
39- `IBuildFarmJobBehavior.verifySlaveBuildCookie`.
40+ This checks the build ID reported in the slave status against
41+ the given cookie. If it isn't building what we think it should
42+ be, the current build will be aborted and the slave cleaned in
43+ preparation for a new task.
44
45 :return: A Deferred that fires when the dialog with the slave is
46 finished. Its return value is True if the slave is lost,
47@@ -315,14 +300,12 @@
48 else:
49 slave_cookie = status_sentence[ident_position[status]]
50
51- # verifySlaveBuildCookie will raise CorruptBuildCookie if the
52- # slave cookie doesn't match the expected one, including
53- # verifying that the slave cookie is None iff we expect the
54- # slave to be idle.
55- try:
56- cls.verifySlaveBuildCookie(behavior, slave_cookie)
57+ if slave_cookie == expected_cookie:
58+ # The master and slave agree about the current job. Continue.
59 defer.returnValue(False)
60- except CorruptBuildCookie as reason:
61+ else:
62+ # The master and slave disagree. The master is our master,
63+ # so try to rescue the slave.
64 # An IDLE slave doesn't need rescuing (SlaveScanner.scan
65 # will rescue the DB side instead), and we just have to wait
66 # out an ABORTING one.
67@@ -332,8 +315,8 @@
68 yield slave.abort()
69 if logger:
70 logger.info(
71- "Builder '%s' rescued from '%s': '%s'" %
72- (vitals.name, slave_cookie, reason))
73+ "Builder slave '%s' rescued from %r (expected %r)." %
74+ (vitals.name, slave_cookie, expected_cookie))
75 defer.returnValue(True)
76
77 @classmethod
78@@ -474,9 +457,22 @@
79 candidate, vitals, builder, slave, new_behavior, logger)
80 defer.returnValue(candidate)
81
82+ @staticmethod
83+ def extractBuildStatus(status_dict):
84+ """Read build status name.
85+
86+ :param status_dict: build status dict from slaveStatus.
87+ :return: the unqualified status name, e.g. "OK".
88+ """
89+ status_string = status_dict['build_status']
90+ lead_string = 'BuildStatus.'
91+ assert status_string.startswith(lead_string), (
92+ "Malformed status string: '%s'" % status_string)
93+ return status_string[len(lead_string):]
94+
95 @classmethod
96 @defer.inlineCallbacks
97- def updateBuild(cls, queueItem, slave, behavior):
98+ def updateBuild(cls, vitals, slave, builders_cache, behavior_factory):
99 """Verify the current build job status.
100
101 Perform the required actions for each state.
102@@ -487,76 +483,34 @@
103 # impossible to get past rescueIfLost unless the slave matches
104 # the DB, and this method isn't called unless the DB says
105 # there's a job.
106- builder_status_handlers = {
107- 'BuilderStatus.BUILDING': cls.updateBuild_BUILDING,
108- 'BuilderStatus.ABORTING': cls.updateBuild_ABORTING,
109- 'BuilderStatus.WAITING': cls.updateBuild_WAITING,
110- }
111 statuses = yield cls.slaveStatus(slave)
112- logger = logging.getLogger('slave-scanner')
113 status_sentence, status_dict = statuses
114 builder_status = status_dict['builder_status']
115- if builder_status not in builder_status_handlers:
116+ if builder_status == 'BuilderStatus.BUILDING':
117+ # Build still building, collect the logtail.
118+ if vitals.build_queue.job.status != JobStatus.RUNNING:
119+ # XXX: This check should be removed once we confirm it's
120+ # not regularly hit.
121+ raise AssertionError(
122+ "Job not running when assigned and slave building.")
123+ vitals.build_queue.logtail = encoding.guess(
124+ str(status_dict.get('logtail')))
125+ transaction.commit()
126+ elif builder_status == 'BuilderStatus.ABORTING':
127+ # Build is being aborted.
128+ vitals.build_queue.logtail = (
129+ "Waiting for slave process to be terminated")
130+ transaction.commit()
131+ elif builder_status == 'BuilderStatus.WAITING':
132+ # Build has finished. Delegate handling to the build itself.
133+ builder = builders_cache[vitals.name]
134+ behavior = behavior_factory(vitals.build_queue, builder, slave)
135+ behavior.updateSlaveStatus(status_sentence, status_dict)
136+ yield behavior.handleStatus(
137+ vitals.build_queue, cls.extractBuildStatus(status_dict),
138+ status_dict)
139+ else:
140 raise AssertionError("Unknown status %s" % builder_status)
141- method = builder_status_handlers[builder_status]
142- yield method(
143- queueItem, behavior, status_sentence, status_dict, logger)
144-
145- @staticmethod
146- def updateBuild_BUILDING(queueItem, behavior, status_sentence, status_dict,
147- logger):
148- """Build still building, collect the logtail"""
149- if queueItem.job.status != JobStatus.RUNNING:
150- queueItem.job.start()
151- queueItem.logtail = encoding.guess(str(status_dict.get('logtail')))
152- transaction.commit()
153-
154- @staticmethod
155- def updateBuild_ABORTING(queueItem, behavior, status_sentence, status_dict,
156- logger):
157- """Build was ABORTED.
158-
159- Master-side should wait until the slave finish the process correctly.
160- """
161- queueItem.logtail = "Waiting for slave process to be terminated"
162- transaction.commit()
163-
164- @staticmethod
165- def extractBuildStatus(status_dict):
166- """Read build status name.
167-
168- :param status_dict: build status dict as passed to the
169- updateBuild_* methods.
170- :return: the unqualified status name, e.g. "OK".
171- """
172- status_string = status_dict['build_status']
173- lead_string = 'BuildStatus.'
174- assert status_string.startswith(lead_string), (
175- "Malformed status string: '%s'" % status_string)
176-
177- return status_string[len(lead_string):]
178-
179- @classmethod
180- def updateBuild_WAITING(cls, queueItem, behavior, status_sentence,
181- status_dict, logger):
182- """Perform the actions needed for a slave in a WAITING state
183-
184- Buildslave can be WAITING in five situations:
185-
186- * Build has failed, no filemap is received (PACKAGEFAIL, DEPFAIL,
187- CHROOTFAIL, BUILDERFAIL,
188- ABORTED)
189-
190- * Build has been built successfully (BuildStatus.OK), in this case
191- we have a 'filemap', so we can retrieve those files and store in
192- Librarian with getFileFromSlave() and then pass the binaries to
193- the uploader for processing.
194- """
195- behavior.updateSlaveStatus(
196- status_sentence, status_dict)
197- d = behavior.handleStatus(
198- queueItem, cls.extractBuildStatus(status_dict), status_dict)
199- return d
200
201 @staticmethod
202 def _getSlaveScannerLogger():
203
204=== modified file 'lib/lp/buildmaster/interfaces/builder.py'
205--- lib/lp/buildmaster/interfaces/builder.py 2013-09-02 08:11:58 +0000
206+++ lib/lp/buildmaster/interfaces/builder.py 2013-10-02 05:56:53 +0000
207@@ -7,7 +7,6 @@
208
209 __all__ = [
210 'BuildDaemonError',
211- 'CorruptBuildCookie',
212 'BuildSlaveFailure',
213 'CannotBuild',
214 'CannotFetchFile',
215@@ -71,13 +70,6 @@
216 """The build slave had a protocol version. This is a serious error."""
217
218
219-class CorruptBuildCookie(BuildDaemonError):
220- """The build slave is working with mismatched information.
221-
222- It needs to be rescued.
223- """
224-
225-
226 class CannotResumeHost(BuildDaemonError):
227 """The build slave virtual machine cannot be resumed."""
228
229
230=== modified file 'lib/lp/buildmaster/manager.py'
231--- lib/lp/buildmaster/manager.py 2013-09-30 08:12:17 +0000
232+++ lib/lp/buildmaster/manager.py 2013-10-02 05:56:53 +0000
233@@ -296,8 +296,9 @@
234 return
235 behavior = self.behavior_factory(
236 vitals.build_queue, builder, slave)
237+ db_cookie = behavior.getBuildCookie() if behavior else None
238 lost = yield interactor.rescueIfLost(
239- vitals, slave, behavior, self.logger)
240+ vitals, slave, db_cookie, self.logger)
241 if lost:
242 lost_reason = '%s is lost' % vitals.name
243
244@@ -320,9 +321,8 @@
245 if vitals.build_queue is not None:
246 # Scan the slave and get the logtail, or collect the build
247 # if it's ready. Yes, "updateBuild" is a bad name.
248- behavior = behavior or self.behavior_factory(
249- vitals.build_queue, builder, slave)
250- yield interactor.updateBuild(vitals.build_queue, slave, behavior)
251+ yield interactor.updateBuild(
252+ vitals, slave, self.builders_cache, self.behavior_factory)
253 elif vitals.manual:
254 # If the builder is in manual mode, don't dispatch anything.
255 self.logger.debug(
256
257=== modified file 'lib/lp/buildmaster/tests/test_interactor.py'
258--- lib/lp/buildmaster/tests/test_interactor.py 2013-10-01 01:07:35 +0000
259+++ lib/lp/buildmaster/tests/test_interactor.py 2013-10-02 05:56:53 +0000
260@@ -30,7 +30,6 @@
261 from lp.buildmaster.interfaces.builder import (
262 CannotFetchFile,
263 CannotResumeHost,
264- CorruptBuildCookie,
265 )
266 from lp.buildmaster.tests.mock_slaves import (
267 AbortingSlave,
268@@ -40,7 +39,6 @@
269 MockBuilder,
270 OkSlave,
271 SlaveTestHelpers,
272- TrivialBehavior,
273 WaitingSlave,
274 )
275 from lp.services.config import config
276@@ -78,38 +76,6 @@
277 self.assertRaises(
278 AssertionError, BuilderInteractor.extractBuildStatus, slave_status)
279
280- def test_verifySlaveBuildCookie_building_match(self):
281- BuilderInteractor.verifySlaveBuildCookie(TrivialBehavior(), 'trivial')
282-
283- def test_verifySlaveBuildCookie_building_mismatch(self):
284- self.assertRaises(
285- CorruptBuildCookie,
286- BuilderInteractor.verifySlaveBuildCookie,
287- TrivialBehavior(), 'difficult')
288-
289- def test_verifySlaveBuildCookie_idle_match(self):
290- BuilderInteractor.verifySlaveBuildCookie(None, None)
291-
292- def test_verifySlaveBuildCookie_idle_mismatch(self):
293- self.assertRaises(
294- CorruptBuildCookie,
295- BuilderInteractor.verifySlaveBuildCookie, None, 'foo')
296-
297- def test_rescueIfLost_aborts_lost_and_broken_slave(self):
298- # A slave that's 'lost' should be aborted; when the slave is
299- # broken then abort() should also throw a fault.
300- slave = LostBuildingBrokenSlave()
301- d = BuilderInteractor.rescueIfLost(
302- extract_vitals_from_db(MockBuilder()), slave, TrivialBehavior())
303-
304- def check_slave_status(failure):
305- self.assertIn('abort', slave.call_log)
306- # 'Fault' comes from the LostBuildingBrokenSlave, this is
307- # just testing that the value is passed through.
308- self.assertIsInstance(failure.value, xmlrpclib.Fault)
309-
310- return d.addBoth(check_slave_status)
311-
312 def resumeSlaveHost(self, builder):
313 vitals = extract_vitals_from_db(builder)
314 return BuilderInteractor.resumeSlaveHost(
315@@ -183,6 +149,21 @@
316 slave = BuilderInteractor.makeSlaveFromVitals(vitals)
317 self.assertEqual(5, slave.timeout)
318
319+ def test_rescueIfLost_aborts_lost_and_broken_slave(self):
320+ # A slave that's 'lost' should be aborted; when the slave is
321+ # broken then abort() should also throw a fault.
322+ slave = LostBuildingBrokenSlave()
323+ d = BuilderInteractor.rescueIfLost(
324+ extract_vitals_from_db(MockBuilder()), slave, 'trivial')
325+
326+ def check_slave_status(failure):
327+ self.assertIn('abort', slave.call_log)
328+ # 'Fault' comes from the LostBuildingBrokenSlave, this is
329+ # just testing that the value is passed through.
330+ self.assertIsInstance(failure.value, xmlrpclib.Fault)
331+
332+ return d.addBoth(check_slave_status)
333+
334 @defer.inlineCallbacks
335 def test_recover_idle_slave(self):
336 # An idle slave is not rescued, even if it's not meant to be
337@@ -190,7 +171,7 @@
338 # we still report that it's lost.
339 slave = OkSlave()
340 lost = yield BuilderInteractor.rescueIfLost(
341- extract_vitals_from_db(MockBuilder()), slave, TrivialBehavior())
342+ extract_vitals_from_db(MockBuilder()), slave, 'trivial')
343 self.assertTrue(lost)
344 self.assertEqual([], slave.call_log)
345
346@@ -209,8 +190,7 @@
347 # WAITING.
348 waiting_slave = WaitingSlave(build_id='trivial')
349 lost = yield BuilderInteractor.rescueIfLost(
350- extract_vitals_from_db(MockBuilder()), waiting_slave,
351- TrivialBehavior())
352+ extract_vitals_from_db(MockBuilder()), waiting_slave, 'trivial')
353 self.assertFalse(lost)
354 self.assertEqual(['status'], waiting_slave.call_log)
355
356@@ -223,8 +203,7 @@
357 # discarded.
358 waiting_slave = WaitingSlave(build_id='non-trivial')
359 lost = yield BuilderInteractor.rescueIfLost(
360- extract_vitals_from_db(MockBuilder()), waiting_slave,
361- TrivialBehavior())
362+ extract_vitals_from_db(MockBuilder()), waiting_slave, 'trivial')
363 self.assertTrue(lost)
364 self.assertEqual(['status', 'clean'], waiting_slave.call_log)
365
366@@ -234,8 +213,7 @@
367 # BUILDING.
368 building_slave = BuildingSlave(build_id='trivial')
369 lost = yield BuilderInteractor.rescueIfLost(
370- extract_vitals_from_db(MockBuilder()), building_slave,
371- TrivialBehavior())
372+ extract_vitals_from_db(MockBuilder()), building_slave, 'trivial')
373 self.assertFalse(lost)
374 self.assertEqual(['status'], building_slave.call_log)
375
376@@ -245,8 +223,7 @@
377 # abort the build, thus stopping it in its tracks.
378 building_slave = BuildingSlave(build_id='non-trivial')
379 lost = yield BuilderInteractor.rescueIfLost(
380- extract_vitals_from_db(MockBuilder()), building_slave,
381- TrivialBehavior())
382+ extract_vitals_from_db(MockBuilder()), building_slave, 'trivial')
383 self.assertTrue(lost)
384 self.assertEqual(['status', 'abort'], building_slave.call_log)
385
386
387=== modified file 'lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py'
388--- lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2013-09-30 08:12:17 +0000
389+++ lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2013-10-02 05:56:53 +0000
390@@ -36,6 +36,7 @@
391 TestGetUploadMethodsMixin,
392 TestHandleStatusMixin,
393 )
394+from lp.buildmaster.tests.test_manager import MockBuildersCache
395 from lp.registry.interfaces.pocket import (
396 PackagePublishingPocket,
397 pocketsuffix,
398@@ -338,9 +339,9 @@
399 self.addCleanup(self._cleanup)
400
401 def updateBuild(self, candidate, slave):
402+ bc = MockBuildersCache(self.builder, candidate)
403 return self.interactor.updateBuild(
404- candidate, slave,
405- self.interactor.getBuildBehavior(candidate, self.builder, slave))
406+ bc.getVitals('foo'), slave, bc, self.interactor.getBuildBehavior)
407
408 def assertBuildProperties(self, build):
409 """Check that a build happened by making sure some of its properties