Merge lp:~wgrant/launchpad/bi-uB-no-bfjb into lp:launchpad
- bi-uB-no-bfjb
- Merge into devel
Proposed by
William Grant
Status: | Merged |
---|---|
Approved by: | Steve Kowalik |
Approved revision: | no longer in the source branch. |
Merged at revision: | 16792 |
Proposed branch: | lp:~wgrant/launchpad/bi-uB-no-bfjb |
Merge into: | lp:launchpad |
Diff against target: |
409 lines (+77/-153) 5 files modified
lib/lp/buildmaster/interactor.py (+50/-96) lib/lp/buildmaster/interfaces/builder.py (+0/-8) lib/lp/buildmaster/manager.py (+4/-4) lib/lp/buildmaster/tests/test_interactor.py (+20/-43) lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py (+3/-2) |
To merge this branch: | bzr merge lp:~wgrant/launchpad/bi-uB-no-bfjb |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Steve Kowalik (community) | code | Approve | |
Review via email: mp+188766@code.launchpad.net |
Commit message
BuilderInteract
Description of the change
More BuilderInteractor refactorment. In this episode, updateBuild retrieves the Builder and constructs a BuildFarmJobBeh
To post a comment you must log in.
Revision history for this message
Steve Kowalik (stevenk) : | # |
review:
Approve
(code)
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lib/lp/buildmaster/interactor.py' |
2 | --- lib/lp/buildmaster/interactor.py 2013-10-01 00:38:38 +0000 |
3 | +++ lib/lp/buildmaster/interactor.py 2013-10-02 05:56:53 +0000 |
4 | @@ -25,7 +25,6 @@ |
5 | BuildDaemonError, |
6 | CannotFetchFile, |
7 | CannotResumeHost, |
8 | - CorruptBuildCookie, |
9 | ) |
10 | from lp.buildmaster.interfaces.buildfarmjobbehavior import ( |
11 | IBuildFarmJobBehavior, |
12 | @@ -267,29 +266,15 @@ |
13 | status['logtail'] = status_sentence[2] |
14 | defer.returnValue((status_sentence, status)) |
15 | |
16 | - @staticmethod |
17 | - def verifySlaveBuildCookie(behavior, slave_cookie): |
18 | - """See `IBuildFarmJobBehavior`.""" |
19 | - if behavior is None: |
20 | - if slave_cookie is not None: |
21 | - raise CorruptBuildCookie('Slave building when should be idle.') |
22 | - else: |
23 | - good_cookie = behavior.getBuildCookie() |
24 | - if slave_cookie != good_cookie: |
25 | - raise CorruptBuildCookie( |
26 | - "Invalid slave build cookie: got %r, expected %r." |
27 | - % (slave_cookie, good_cookie)) |
28 | - |
29 | @classmethod |
30 | @defer.inlineCallbacks |
31 | - def rescueIfLost(cls, vitals, slave, behavior, logger=None): |
32 | + def rescueIfLost(cls, vitals, slave, expected_cookie, logger=None): |
33 | """Reset the slave if its job information doesn't match the DB. |
34 | |
35 | - This checks the build ID reported in the slave status against the |
36 | - database. If it isn't building what we think it should be, the current |
37 | - build will be aborted and the slave cleaned in preparation for a new |
38 | - task. The decision about the slave's correctness is left up to |
39 | - `IBuildFarmJobBehavior.verifySlaveBuildCookie`. |
40 | + This checks the build ID reported in the slave status against |
41 | + the given cookie. If it isn't building what we think it should |
42 | + be, the current build will be aborted and the slave cleaned in |
43 | + preparation for a new task. |
44 | |
45 | :return: A Deferred that fires when the dialog with the slave is |
46 | finished. Its return value is True if the slave is lost, |
47 | @@ -315,14 +300,12 @@ |
48 | else: |
49 | slave_cookie = status_sentence[ident_position[status]] |
50 | |
51 | - # verifySlaveBuildCookie will raise CorruptBuildCookie if the |
52 | - # slave cookie doesn't match the expected one, including |
53 | - # verifying that the slave cookie is None iff we expect the |
54 | - # slave to be idle. |
55 | - try: |
56 | - cls.verifySlaveBuildCookie(behavior, slave_cookie) |
57 | + if slave_cookie == expected_cookie: |
58 | + # The master and slave agree about the current job. Continue. |
59 | defer.returnValue(False) |
60 | - except CorruptBuildCookie as reason: |
61 | + else: |
62 | + # The master and slave disagree. The master is our master, |
63 | + # so try to rescue the slave. |
64 | # An IDLE slave doesn't need rescuing (SlaveScanner.scan |
65 | # will rescue the DB side instead), and we just have to wait |
66 | # out an ABORTING one. |
67 | @@ -332,8 +315,8 @@ |
68 | yield slave.abort() |
69 | if logger: |
70 | logger.info( |
71 | - "Builder '%s' rescued from '%s': '%s'" % |
72 | - (vitals.name, slave_cookie, reason)) |
73 | + "Builder slave '%s' rescued from %r (expected %r)." % |
74 | + (vitals.name, slave_cookie, expected_cookie)) |
75 | defer.returnValue(True) |
76 | |
77 | @classmethod |
78 | @@ -474,9 +457,22 @@ |
79 | candidate, vitals, builder, slave, new_behavior, logger) |
80 | defer.returnValue(candidate) |
81 | |
82 | + @staticmethod |
83 | + def extractBuildStatus(status_dict): |
84 | + """Read build status name. |
85 | + |
86 | + :param status_dict: build status dict from slaveStatus. |
87 | + :return: the unqualified status name, e.g. "OK". |
88 | + """ |
89 | + status_string = status_dict['build_status'] |
90 | + lead_string = 'BuildStatus.' |
91 | + assert status_string.startswith(lead_string), ( |
92 | + "Malformed status string: '%s'" % status_string) |
93 | + return status_string[len(lead_string):] |
94 | + |
95 | @classmethod |
96 | @defer.inlineCallbacks |
97 | - def updateBuild(cls, queueItem, slave, behavior): |
98 | + def updateBuild(cls, vitals, slave, builders_cache, behavior_factory): |
99 | """Verify the current build job status. |
100 | |
101 | Perform the required actions for each state. |
102 | @@ -487,76 +483,34 @@ |
103 | # impossible to get past rescueIfLost unless the slave matches |
104 | # the DB, and this method isn't called unless the DB says |
105 | # there's a job. |
106 | - builder_status_handlers = { |
107 | - 'BuilderStatus.BUILDING': cls.updateBuild_BUILDING, |
108 | - 'BuilderStatus.ABORTING': cls.updateBuild_ABORTING, |
109 | - 'BuilderStatus.WAITING': cls.updateBuild_WAITING, |
110 | - } |
111 | statuses = yield cls.slaveStatus(slave) |
112 | - logger = logging.getLogger('slave-scanner') |
113 | status_sentence, status_dict = statuses |
114 | builder_status = status_dict['builder_status'] |
115 | - if builder_status not in builder_status_handlers: |
116 | + if builder_status == 'BuilderStatus.BUILDING': |
117 | + # Build still building, collect the logtail. |
118 | + if vitals.build_queue.job.status != JobStatus.RUNNING: |
119 | + # XXX: This check should be removed once we confirm it's |
120 | + # not regularly hit. |
121 | + raise AssertionError( |
122 | + "Job not running when assigned and slave building.") |
123 | + vitals.build_queue.logtail = encoding.guess( |
124 | + str(status_dict.get('logtail'))) |
125 | + transaction.commit() |
126 | + elif builder_status == 'BuilderStatus.ABORTING': |
127 | + # Build is being aborted. |
128 | + vitals.build_queue.logtail = ( |
129 | + "Waiting for slave process to be terminated") |
130 | + transaction.commit() |
131 | + elif builder_status == 'BuilderStatus.WAITING': |
132 | + # Build has finished. Delegate handling to the build itself. |
133 | + builder = builders_cache[vitals.name] |
134 | + behavior = behavior_factory(vitals.build_queue, builder, slave) |
135 | + behavior.updateSlaveStatus(status_sentence, status_dict) |
136 | + yield behavior.handleStatus( |
137 | + vitals.build_queue, cls.extractBuildStatus(status_dict), |
138 | + status_dict) |
139 | + else: |
140 | raise AssertionError("Unknown status %s" % builder_status) |
141 | - method = builder_status_handlers[builder_status] |
142 | - yield method( |
143 | - queueItem, behavior, status_sentence, status_dict, logger) |
144 | - |
145 | - @staticmethod |
146 | - def updateBuild_BUILDING(queueItem, behavior, status_sentence, status_dict, |
147 | - logger): |
148 | - """Build still building, collect the logtail""" |
149 | - if queueItem.job.status != JobStatus.RUNNING: |
150 | - queueItem.job.start() |
151 | - queueItem.logtail = encoding.guess(str(status_dict.get('logtail'))) |
152 | - transaction.commit() |
153 | - |
154 | - @staticmethod |
155 | - def updateBuild_ABORTING(queueItem, behavior, status_sentence, status_dict, |
156 | - logger): |
157 | - """Build was ABORTED. |
158 | - |
159 | - Master-side should wait until the slave finish the process correctly. |
160 | - """ |
161 | - queueItem.logtail = "Waiting for slave process to be terminated" |
162 | - transaction.commit() |
163 | - |
164 | - @staticmethod |
165 | - def extractBuildStatus(status_dict): |
166 | - """Read build status name. |
167 | - |
168 | - :param status_dict: build status dict as passed to the |
169 | - updateBuild_* methods. |
170 | - :return: the unqualified status name, e.g. "OK". |
171 | - """ |
172 | - status_string = status_dict['build_status'] |
173 | - lead_string = 'BuildStatus.' |
174 | - assert status_string.startswith(lead_string), ( |
175 | - "Malformed status string: '%s'" % status_string) |
176 | - |
177 | - return status_string[len(lead_string):] |
178 | - |
179 | - @classmethod |
180 | - def updateBuild_WAITING(cls, queueItem, behavior, status_sentence, |
181 | - status_dict, logger): |
182 | - """Perform the actions needed for a slave in a WAITING state |
183 | - |
184 | - Buildslave can be WAITING in five situations: |
185 | - |
186 | - * Build has failed, no filemap is received (PACKAGEFAIL, DEPFAIL, |
187 | - CHROOTFAIL, BUILDERFAIL, |
188 | - ABORTED) |
189 | - |
190 | - * Build has been built successfully (BuildStatus.OK), in this case |
191 | - we have a 'filemap', so we can retrieve those files and store in |
192 | - Librarian with getFileFromSlave() and then pass the binaries to |
193 | - the uploader for processing. |
194 | - """ |
195 | - behavior.updateSlaveStatus( |
196 | - status_sentence, status_dict) |
197 | - d = behavior.handleStatus( |
198 | - queueItem, cls.extractBuildStatus(status_dict), status_dict) |
199 | - return d |
200 | |
201 | @staticmethod |
202 | def _getSlaveScannerLogger(): |
203 | |
204 | === modified file 'lib/lp/buildmaster/interfaces/builder.py' |
205 | --- lib/lp/buildmaster/interfaces/builder.py 2013-09-02 08:11:58 +0000 |
206 | +++ lib/lp/buildmaster/interfaces/builder.py 2013-10-02 05:56:53 +0000 |
207 | @@ -7,7 +7,6 @@ |
208 | |
209 | __all__ = [ |
210 | 'BuildDaemonError', |
211 | - 'CorruptBuildCookie', |
212 | 'BuildSlaveFailure', |
213 | 'CannotBuild', |
214 | 'CannotFetchFile', |
215 | @@ -71,13 +70,6 @@ |
216 | """The build slave had a protocol version. This is a serious error.""" |
217 | |
218 | |
219 | -class CorruptBuildCookie(BuildDaemonError): |
220 | - """The build slave is working with mismatched information. |
221 | - |
222 | - It needs to be rescued. |
223 | - """ |
224 | - |
225 | - |
226 | class CannotResumeHost(BuildDaemonError): |
227 | """The build slave virtual machine cannot be resumed.""" |
228 | |
229 | |
230 | === modified file 'lib/lp/buildmaster/manager.py' |
231 | --- lib/lp/buildmaster/manager.py 2013-09-30 08:12:17 +0000 |
232 | +++ lib/lp/buildmaster/manager.py 2013-10-02 05:56:53 +0000 |
233 | @@ -296,8 +296,9 @@ |
234 | return |
235 | behavior = self.behavior_factory( |
236 | vitals.build_queue, builder, slave) |
237 | + db_cookie = behavior.getBuildCookie() if behavior else None |
238 | lost = yield interactor.rescueIfLost( |
239 | - vitals, slave, behavior, self.logger) |
240 | + vitals, slave, db_cookie, self.logger) |
241 | if lost: |
242 | lost_reason = '%s is lost' % vitals.name |
243 | |
244 | @@ -320,9 +321,8 @@ |
245 | if vitals.build_queue is not None: |
246 | # Scan the slave and get the logtail, or collect the build |
247 | # if it's ready. Yes, "updateBuild" is a bad name. |
248 | - behavior = behavior or self.behavior_factory( |
249 | - vitals.build_queue, builder, slave) |
250 | - yield interactor.updateBuild(vitals.build_queue, slave, behavior) |
251 | + yield interactor.updateBuild( |
252 | + vitals, slave, self.builders_cache, self.behavior_factory) |
253 | elif vitals.manual: |
254 | # If the builder is in manual mode, don't dispatch anything. |
255 | self.logger.debug( |
256 | |
257 | === modified file 'lib/lp/buildmaster/tests/test_interactor.py' |
258 | --- lib/lp/buildmaster/tests/test_interactor.py 2013-10-01 01:07:35 +0000 |
259 | +++ lib/lp/buildmaster/tests/test_interactor.py 2013-10-02 05:56:53 +0000 |
260 | @@ -30,7 +30,6 @@ |
261 | from lp.buildmaster.interfaces.builder import ( |
262 | CannotFetchFile, |
263 | CannotResumeHost, |
264 | - CorruptBuildCookie, |
265 | ) |
266 | from lp.buildmaster.tests.mock_slaves import ( |
267 | AbortingSlave, |
268 | @@ -40,7 +39,6 @@ |
269 | MockBuilder, |
270 | OkSlave, |
271 | SlaveTestHelpers, |
272 | - TrivialBehavior, |
273 | WaitingSlave, |
274 | ) |
275 | from lp.services.config import config |
276 | @@ -78,38 +76,6 @@ |
277 | self.assertRaises( |
278 | AssertionError, BuilderInteractor.extractBuildStatus, slave_status) |
279 | |
280 | - def test_verifySlaveBuildCookie_building_match(self): |
281 | - BuilderInteractor.verifySlaveBuildCookie(TrivialBehavior(), 'trivial') |
282 | - |
283 | - def test_verifySlaveBuildCookie_building_mismatch(self): |
284 | - self.assertRaises( |
285 | - CorruptBuildCookie, |
286 | - BuilderInteractor.verifySlaveBuildCookie, |
287 | - TrivialBehavior(), 'difficult') |
288 | - |
289 | - def test_verifySlaveBuildCookie_idle_match(self): |
290 | - BuilderInteractor.verifySlaveBuildCookie(None, None) |
291 | - |
292 | - def test_verifySlaveBuildCookie_idle_mismatch(self): |
293 | - self.assertRaises( |
294 | - CorruptBuildCookie, |
295 | - BuilderInteractor.verifySlaveBuildCookie, None, 'foo') |
296 | - |
297 | - def test_rescueIfLost_aborts_lost_and_broken_slave(self): |
298 | - # A slave that's 'lost' should be aborted; when the slave is |
299 | - # broken then abort() should also throw a fault. |
300 | - slave = LostBuildingBrokenSlave() |
301 | - d = BuilderInteractor.rescueIfLost( |
302 | - extract_vitals_from_db(MockBuilder()), slave, TrivialBehavior()) |
303 | - |
304 | - def check_slave_status(failure): |
305 | - self.assertIn('abort', slave.call_log) |
306 | - # 'Fault' comes from the LostBuildingBrokenSlave, this is |
307 | - # just testing that the value is passed through. |
308 | - self.assertIsInstance(failure.value, xmlrpclib.Fault) |
309 | - |
310 | - return d.addBoth(check_slave_status) |
311 | - |
312 | def resumeSlaveHost(self, builder): |
313 | vitals = extract_vitals_from_db(builder) |
314 | return BuilderInteractor.resumeSlaveHost( |
315 | @@ -183,6 +149,21 @@ |
316 | slave = BuilderInteractor.makeSlaveFromVitals(vitals) |
317 | self.assertEqual(5, slave.timeout) |
318 | |
319 | + def test_rescueIfLost_aborts_lost_and_broken_slave(self): |
320 | + # A slave that's 'lost' should be aborted; when the slave is |
321 | + # broken then abort() should also throw a fault. |
322 | + slave = LostBuildingBrokenSlave() |
323 | + d = BuilderInteractor.rescueIfLost( |
324 | + extract_vitals_from_db(MockBuilder()), slave, 'trivial') |
325 | + |
326 | + def check_slave_status(failure): |
327 | + self.assertIn('abort', slave.call_log) |
328 | + # 'Fault' comes from the LostBuildingBrokenSlave, this is |
329 | + # just testing that the value is passed through. |
330 | + self.assertIsInstance(failure.value, xmlrpclib.Fault) |
331 | + |
332 | + return d.addBoth(check_slave_status) |
333 | + |
334 | @defer.inlineCallbacks |
335 | def test_recover_idle_slave(self): |
336 | # An idle slave is not rescued, even if it's not meant to be |
337 | @@ -190,7 +171,7 @@ |
338 | # we still report that it's lost. |
339 | slave = OkSlave() |
340 | lost = yield BuilderInteractor.rescueIfLost( |
341 | - extract_vitals_from_db(MockBuilder()), slave, TrivialBehavior()) |
342 | + extract_vitals_from_db(MockBuilder()), slave, 'trivial') |
343 | self.assertTrue(lost) |
344 | self.assertEqual([], slave.call_log) |
345 | |
346 | @@ -209,8 +190,7 @@ |
347 | # WAITING. |
348 | waiting_slave = WaitingSlave(build_id='trivial') |
349 | lost = yield BuilderInteractor.rescueIfLost( |
350 | - extract_vitals_from_db(MockBuilder()), waiting_slave, |
351 | - TrivialBehavior()) |
352 | + extract_vitals_from_db(MockBuilder()), waiting_slave, 'trivial') |
353 | self.assertFalse(lost) |
354 | self.assertEqual(['status'], waiting_slave.call_log) |
355 | |
356 | @@ -223,8 +203,7 @@ |
357 | # discarded. |
358 | waiting_slave = WaitingSlave(build_id='non-trivial') |
359 | lost = yield BuilderInteractor.rescueIfLost( |
360 | - extract_vitals_from_db(MockBuilder()), waiting_slave, |
361 | - TrivialBehavior()) |
362 | + extract_vitals_from_db(MockBuilder()), waiting_slave, 'trivial') |
363 | self.assertTrue(lost) |
364 | self.assertEqual(['status', 'clean'], waiting_slave.call_log) |
365 | |
366 | @@ -234,8 +213,7 @@ |
367 | # BUILDING. |
368 | building_slave = BuildingSlave(build_id='trivial') |
369 | lost = yield BuilderInteractor.rescueIfLost( |
370 | - extract_vitals_from_db(MockBuilder()), building_slave, |
371 | - TrivialBehavior()) |
372 | + extract_vitals_from_db(MockBuilder()), building_slave, 'trivial') |
373 | self.assertFalse(lost) |
374 | self.assertEqual(['status'], building_slave.call_log) |
375 | |
376 | @@ -245,8 +223,7 @@ |
377 | # abort the build, thus stopping it in its tracks. |
378 | building_slave = BuildingSlave(build_id='non-trivial') |
379 | lost = yield BuilderInteractor.rescueIfLost( |
380 | - extract_vitals_from_db(MockBuilder()), building_slave, |
381 | - TrivialBehavior()) |
382 | + extract_vitals_from_db(MockBuilder()), building_slave, 'trivial') |
383 | self.assertTrue(lost) |
384 | self.assertEqual(['status', 'abort'], building_slave.call_log) |
385 | |
386 | |
387 | === modified file 'lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py' |
388 | --- lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2013-09-30 08:12:17 +0000 |
389 | +++ lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2013-10-02 05:56:53 +0000 |
390 | @@ -36,6 +36,7 @@ |
391 | TestGetUploadMethodsMixin, |
392 | TestHandleStatusMixin, |
393 | ) |
394 | +from lp.buildmaster.tests.test_manager import MockBuildersCache |
395 | from lp.registry.interfaces.pocket import ( |
396 | PackagePublishingPocket, |
397 | pocketsuffix, |
398 | @@ -338,9 +339,9 @@ |
399 | self.addCleanup(self._cleanup) |
400 | |
401 | def updateBuild(self, candidate, slave): |
402 | + bc = MockBuildersCache(self.builder, candidate) |
403 | return self.interactor.updateBuild( |
404 | - candidate, slave, |
405 | - self.interactor.getBuildBehavior(candidate, self.builder, slave)) |
406 | + bc.getVitals('foo'), slave, bc, self.interactor.getBuildBehavior) |
407 | |
408 | def assertBuildProperties(self, build): |
409 | """Check that a build happened by making sure some of its properties |