Merge lp:~wgrant/launchpad/bug-1221002 into lp:launchpad
- bug-1221002
- Merge into devel
Proposed by
William Grant
Status: | Merged |
---|---|
Approved by: | Steve Kowalik |
Approved revision: | no longer in the source branch. |
Merged at revision: | 16760 |
Proposed branch: | lp:~wgrant/launchpad/bug-1221002 |
Merge into: | lp:launchpad |
Diff against target: |
614 lines (+211/-191) 4 files modified
lib/lp/buildmaster/interactor.py (+35/-54) lib/lp/buildmaster/manager.py (+46/-64) lib/lp/buildmaster/tests/test_interactor.py (+47/-62) lib/lp/buildmaster/tests/test_manager.py (+83/-11) |
To merge this branch: | bzr merge lp:~wgrant/launchpad/bug-1221002 |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Steve Kowalik (community) | code | Approve | |
Review via email: mp+184030@code.launchpad.net |
Commit message
Rework SlaveScanner.scan() to make a bit more sense.
Description of the change
Rework SlaveScanner.scan() to make a bit more sense. It now ensures that the slave and DB states agree at the start, rescuing the slave and job if the early check fails. This means that the builderok == False case is now handled early and actually prevents all slave communication, as you'd think it would.
If the slave and DB states match, we update or dispatch as appropriate. BuilderInteract
SlaveScanner.scan() now takes builder and interactor arguments for overriding things in tests.
To post a comment you must log in.
Revision history for this message
Steve Kowalik (stevenk) : | # |
review:
Approve
(code)
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lib/lp/buildmaster/interactor.py' | |||
2 | --- lib/lp/buildmaster/interactor.py 2013-09-03 09:35:07 +0000 | |||
3 | +++ lib/lp/buildmaster/interactor.py 2013-09-05 22:24:08 +0000 | |||
4 | @@ -8,9 +8,7 @@ | |||
5 | 8 | ] | 8 | ] |
6 | 9 | 9 | ||
7 | 10 | import logging | 10 | import logging |
8 | 11 | import socket | ||
9 | 12 | from urlparse import urlparse | 11 | from urlparse import urlparse |
10 | 13 | import xmlrpclib | ||
11 | 14 | 12 | ||
12 | 15 | import transaction | 13 | import transaction |
13 | 16 | from twisted.internet import defer | 14 | from twisted.internet import defer |
14 | @@ -288,28 +286,17 @@ | |||
15 | 288 | status['logtail'] = status_sentence[2] | 286 | status['logtail'] = status_sentence[2] |
16 | 289 | defer.returnValue((status_sentence, status)) | 287 | defer.returnValue((status_sentence, status)) |
17 | 290 | 288 | ||
34 | 291 | @defer.inlineCallbacks | 289 | def verifySlaveBuildCookie(self, slave_cookie): |
19 | 292 | def isAvailable(self): | ||
20 | 293 | """Whether or not a builder is available for building new jobs. | ||
21 | 294 | |||
22 | 295 | :return: A Deferred that fires with True or False, depending on | ||
23 | 296 | whether the builder is available or not. | ||
24 | 297 | """ | ||
25 | 298 | if not self.builder.builderok: | ||
26 | 299 | defer.returnValue(False) | ||
27 | 300 | try: | ||
28 | 301 | status = yield self.slave.status() | ||
29 | 302 | except (xmlrpclib.Fault, socket.error): | ||
30 | 303 | defer.returnValue(False) | ||
31 | 304 | defer.returnValue(status[0] == 'BuilderStatus.IDLE') | ||
32 | 305 | |||
33 | 306 | def verifySlaveBuildCookie(self, slave_build_cookie): | ||
35 | 307 | """See `IBuildFarmJobBehavior`.""" | 290 | """See `IBuildFarmJobBehavior`.""" |
36 | 308 | if self._current_build_behavior is None: | 291 | if self._current_build_behavior is None: |
41 | 309 | raise CorruptBuildCookie('No job assigned to builder') | 292 | if slave_cookie is not None: |
42 | 310 | good_cookie = self._current_build_behavior.getBuildCookie() | 293 | raise CorruptBuildCookie('Slave building when should be idle.') |
43 | 311 | if slave_build_cookie != good_cookie: | 294 | else: |
44 | 312 | raise CorruptBuildCookie("Invalid slave build cookie.") | 295 | good_cookie = self._current_build_behavior.getBuildCookie() |
45 | 296 | if slave_cookie != good_cookie: | ||
46 | 297 | raise CorruptBuildCookie( | ||
47 | 298 | "Invalid slave build cookie: got %r, expected %r." | ||
48 | 299 | % (slave_cookie, good_cookie)) | ||
49 | 313 | 300 | ||
50 | 314 | @defer.inlineCallbacks | 301 | @defer.inlineCallbacks |
51 | 315 | def rescueIfLost(self, logger=None): | 302 | def rescueIfLost(self, logger=None): |
52 | @@ -322,7 +309,8 @@ | |||
53 | 322 | `IBuildFarmJobBehavior.verifySlaveBuildCookie`. | 309 | `IBuildFarmJobBehavior.verifySlaveBuildCookie`. |
54 | 323 | 310 | ||
55 | 324 | :return: A Deferred that fires when the dialog with the slave is | 311 | :return: A Deferred that fires when the dialog with the slave is |
57 | 325 | finished. It does not have a return value. | 312 | finished. Its return value is True if the slave is lost, |
58 | 313 | False otherwise. | ||
59 | 326 | """ | 314 | """ |
60 | 327 | # 'ident_position' dict relates the position of the job identifier | 315 | # 'ident_position' dict relates the position of the job identifier |
61 | 328 | # token in the sentence received from status(), according to the | 316 | # token in the sentence received from status(), according to the |
62 | @@ -330,38 +318,40 @@ | |||
63 | 330 | # for further information about sentence format. | 318 | # for further information about sentence format. |
64 | 331 | ident_position = { | 319 | ident_position = { |
65 | 332 | 'BuilderStatus.BUILDING': 1, | 320 | 'BuilderStatus.BUILDING': 1, |
66 | 321 | 'BuilderStatus.ABORTING': 1, | ||
67 | 333 | 'BuilderStatus.WAITING': 2 | 322 | 'BuilderStatus.WAITING': 2 |
68 | 334 | } | 323 | } |
69 | 335 | 324 | ||
72 | 336 | # If slave is not building nor waiting, it's not in need of | 325 | # Determine the slave's current build cookie. For BUILDING, ABORTING |
73 | 337 | # rescuing. | 326 | # and WAITING we extract the string from the slave status |
74 | 327 | # sentence, and for IDLE it is None. | ||
75 | 338 | status_sentence = yield self.slave.status() | 328 | status_sentence = yield self.slave.status() |
76 | 339 | status = status_sentence[0] | 329 | status = status_sentence[0] |
77 | 340 | if status not in ident_position.keys(): | 330 | if status not in ident_position.keys(): |
80 | 341 | return | 331 | slave_cookie = None |
81 | 342 | slave_build_id = status_sentence[ident_position[status]] | 332 | else: |
82 | 333 | slave_cookie = status_sentence[ident_position[status]] | ||
83 | 334 | |||
84 | 335 | # verifySlaveBuildCookie will raise CorruptBuildCookie if the | ||
85 | 336 | # slave cookie doesn't match the expected one, including | ||
86 | 337 | # verifying that the slave cookie is None iff we expect the | ||
87 | 338 | # slave to be idle. | ||
88 | 343 | try: | 339 | try: |
90 | 344 | self.verifySlaveBuildCookie(slave_build_id) | 340 | self.verifySlaveBuildCookie(slave_cookie) |
91 | 341 | defer.returnValue(False) | ||
92 | 345 | except CorruptBuildCookie as reason: | 342 | except CorruptBuildCookie as reason: |
93 | 343 | # An IDLE slave doesn't need rescuing (SlaveScanner.scan | ||
94 | 344 | # will rescue the DB side instead), and we just have to wait | ||
95 | 345 | # out an ABORTING one. | ||
96 | 346 | if status == 'BuilderStatus.WAITING': | 346 | if status == 'BuilderStatus.WAITING': |
97 | 347 | yield self.cleanSlave() | 347 | yield self.cleanSlave() |
99 | 348 | else: | 348 | elif status == 'BuilderStatus.BUILDING': |
100 | 349 | yield self.requestAbort() | 349 | yield self.requestAbort() |
101 | 350 | if logger: | 350 | if logger: |
102 | 351 | logger.info( | 351 | logger.info( |
103 | 352 | "Builder '%s' rescued from '%s': '%s'" % | 352 | "Builder '%s' rescued from '%s': '%s'" % |
116 | 353 | (self.builder.name, slave_build_id, reason)) | 353 | (self.builder.name, slave_cookie, reason)) |
117 | 354 | 354 | defer.returnValue(True) | |
106 | 355 | def updateStatus(self, logger=None): | ||
107 | 356 | """Update the builder's status by probing it. | ||
108 | 357 | |||
109 | 358 | :return: A Deferred that fires when the dialog with the slave is | ||
110 | 359 | finished. It does not have a return value. | ||
111 | 360 | """ | ||
112 | 361 | if logger: | ||
113 | 362 | logger.debug('Checking %s' % self.builder.name) | ||
114 | 363 | |||
115 | 364 | return self.rescueIfLost(logger) | ||
118 | 365 | 355 | ||
119 | 366 | def cleanSlave(self): | 356 | def cleanSlave(self): |
120 | 367 | """Clean any temporary files from the slave. | 357 | """Clean any temporary files from the slave. |
121 | @@ -524,8 +514,11 @@ | |||
122 | 524 | 514 | ||
123 | 525 | :return: A Deferred that fires when the slave dialog is finished. | 515 | :return: A Deferred that fires when the slave dialog is finished. |
124 | 526 | """ | 516 | """ |
125 | 517 | # IDLE is deliberately not handled here, because it should be | ||
126 | 518 | # impossible to get past rescueIfLost unless the slave matches | ||
127 | 519 | # the DB, and this method isn't called unless the DB says | ||
128 | 520 | # there's a job. | ||
129 | 527 | builder_status_handlers = { | 521 | builder_status_handlers = { |
130 | 528 | 'BuilderStatus.IDLE': self.updateBuild_IDLE, | ||
131 | 529 | 'BuilderStatus.BUILDING': self.updateBuild_BUILDING, | 522 | 'BuilderStatus.BUILDING': self.updateBuild_BUILDING, |
132 | 530 | 'BuilderStatus.ABORTING': self.updateBuild_ABORTING, | 523 | 'BuilderStatus.ABORTING': self.updateBuild_ABORTING, |
133 | 531 | 'BuilderStatus.WAITING': self.updateBuild_WAITING, | 524 | 'BuilderStatus.WAITING': self.updateBuild_WAITING, |
134 | @@ -539,18 +532,6 @@ | |||
135 | 539 | method = builder_status_handlers[builder_status] | 532 | method = builder_status_handlers[builder_status] |
136 | 540 | yield method(queueItem, status_sentence, status_dict, logger) | 533 | yield method(queueItem, status_sentence, status_dict, logger) |
137 | 541 | 534 | ||
138 | 542 | def updateBuild_IDLE(self, queueItem, status_sentence, status_dict, | ||
139 | 543 | logger): | ||
140 | 544 | """Somehow the builder forgot about the build job. | ||
141 | 545 | |||
142 | 546 | Log this and reset the record. | ||
143 | 547 | """ | ||
144 | 548 | logger.warn( | ||
145 | 549 | "Builder %s forgot about buildqueue %d -- resetting buildqueue " | ||
146 | 550 | "record" % (queueItem.builder.url, queueItem.id)) | ||
147 | 551 | queueItem.reset() | ||
148 | 552 | transaction.commit() | ||
149 | 553 | |||
150 | 554 | def updateBuild_BUILDING(self, queueItem, status_sentence, status_dict, | 535 | def updateBuild_BUILDING(self, queueItem, status_sentence, status_dict, |
151 | 555 | logger): | 536 | logger): |
152 | 556 | """Build still building, collect the logtail""" | 537 | """Build still building, collect the logtail""" |
153 | 557 | 538 | ||
154 | === modified file 'lib/lp/buildmaster/manager.py' | |||
155 | --- lib/lp/buildmaster/manager.py 2013-09-03 04:06:08 +0000 | |||
156 | +++ lib/lp/buildmaster/manager.py 2013-09-05 22:24:08 +0000 | |||
157 | @@ -250,82 +250,64 @@ | |||
158 | 250 | defer.returnValue(value is not None) | 250 | defer.returnValue(value is not None) |
159 | 251 | 251 | ||
160 | 252 | @defer.inlineCallbacks | 252 | @defer.inlineCallbacks |
162 | 253 | def scan(self): | 253 | def scan(self, builder=None, interactor=None): |
163 | 254 | """Probe the builder and update/dispatch/collect as appropriate. | 254 | """Probe the builder and update/dispatch/collect as appropriate. |
164 | 255 | 255 | ||
184 | 256 | There are several steps to scanning: | 256 | :return: A Deferred that fires when the scan is complete. |
166 | 257 | |||
167 | 258 | 1. If the builder is marked as "ok" then probe it to see what state | ||
168 | 259 | it's in. This is where lost jobs are rescued if we think the | ||
169 | 260 | builder is doing something that it later tells us it's not, | ||
170 | 261 | and also where the multi-phase abort procedure happens. | ||
171 | 262 | See IBuilder.rescueIfLost, which is called by | ||
172 | 263 | IBuilder.updateStatus(). | ||
173 | 264 | 2. If the builder is still happy, we ask it if it has an active build | ||
174 | 265 | and then either update the build in Launchpad or collect the | ||
175 | 266 | completed build. (builder.updateBuild) | ||
176 | 267 | 3. If the builder is not happy or it was marked as unavailable | ||
177 | 268 | mid-build, we need to reset the job that we thought it had, so | ||
178 | 269 | that the job is dispatched elsewhere. | ||
179 | 270 | 4. If the builder is idle and we have another build ready, dispatch | ||
180 | 271 | it. | ||
181 | 272 | |||
182 | 273 | :return: A Deferred that fires when the scan is complete, whose | ||
183 | 274 | value is A `BuilderSlave` if we dispatched a job to it, or None. | ||
185 | 275 | """ | 257 | """ |
193 | 276 | # We need to re-fetch the builder object on each cycle as the | 258 | self.logger.debug("Scanning %s." % self.builder_name) |
194 | 277 | # Storm store is invalidated over transaction boundaries. | 259 | # Commit and refetch the Builder object to ensure we have the |
195 | 278 | 260 | # latest data from the DB. | |
196 | 279 | self.builder = get_builder(self.builder_name) | 261 | transaction.commit() |
197 | 280 | self.interactor = BuilderInteractor(self.builder) | 262 | self.builder = builder or get_builder(self.builder_name) |
198 | 281 | 263 | self.interactor = interactor or BuilderInteractor(self.builder) | |
199 | 282 | if self.builder.builderok: | 264 | |
200 | 265 | # Confirm that the DB and slave sides are in a valid, mutually | ||
201 | 266 | # agreeable state. | ||
202 | 267 | lost_reason = None | ||
203 | 268 | if not self.builder.builderok: | ||
204 | 269 | lost_reason = '%s is disabled' % self.builder.name | ||
205 | 270 | else: | ||
206 | 283 | cancelled = yield self.checkCancellation(self.builder) | 271 | cancelled = yield self.checkCancellation(self.builder) |
207 | 284 | if cancelled: | 272 | if cancelled: |
208 | 285 | return | 273 | return |
217 | 286 | yield self.interactor.updateStatus(self.logger) | 274 | lost = yield self.interactor.rescueIfLost(self.logger) |
218 | 287 | 275 | if lost: | |
219 | 288 | # Commit the changes done while possibly rescuing jobs, to | 276 | lost_reason = '%s is lost' % self.builder.name |
220 | 289 | # avoid holding table locks. | 277 | |
221 | 290 | transaction.commit() | 278 | # The slave is lost or the builder is disabled. We can't |
222 | 291 | 279 | # continue to update the job status or dispatch a new job, so | |
223 | 292 | buildqueue = self.builder.currentjob | 280 | # just rescue the assigned job, if any, so it can be dispatched |
224 | 293 | if buildqueue is not None: | 281 | # to another slave. |
225 | 282 | if lost_reason is not None: | ||
226 | 283 | if self.builder.currentjob is not None: | ||
227 | 284 | self.logger.warn( | ||
228 | 285 | "%s. Resetting BuildQueue %d.", lost_reason, | ||
229 | 286 | self.builder.currentjob.id) | ||
230 | 287 | self.builder.currentjob.reset() | ||
231 | 288 | transaction.commit() | ||
232 | 289 | return | ||
233 | 290 | |||
234 | 291 | # We've confirmed that the slave state matches the DB. Continue | ||
235 | 292 | # with updating the job status, or dispatching a new job if the | ||
236 | 293 | # builder is idle. | ||
237 | 294 | if self.builder.currentjob is not None: | ||
238 | 294 | # Scan the slave and get the logtail, or collect the build | 295 | # Scan the slave and get the logtail, or collect the build |
239 | 295 | # if it's ready. Yes, "updateBuild" is a bad name. | 296 | # if it's ready. Yes, "updateBuild" is a bad name. |
244 | 296 | yield self.interactor.updateBuild(buildqueue) | 297 | yield self.interactor.updateBuild(self.builder.currentjob) |
245 | 297 | 298 | elif self.builder.manual: | |
246 | 298 | # If the builder is in manual mode, don't dispatch anything. | 299 | # If the builder is in manual mode, don't dispatch anything. |
243 | 299 | if self.builder.manual: | ||
247 | 300 | self.logger.debug( | 300 | self.logger.debug( |
248 | 301 | '%s is in manual mode, not dispatching.' % | 301 | '%s is in manual mode, not dispatching.' % |
249 | 302 | self.builder.name) | 302 | self.builder.name) |
266 | 303 | return | 303 | else: |
267 | 304 | 304 | # See if there is a job we can dispatch to the builder slave. | |
268 | 305 | # If the builder is marked unavailable, don't dispatch anything. | 305 | yield self.interactor.findAndStartJob() |
269 | 306 | # Additionaly, because builders can be removed from the pool at | 306 | if self.builder.currentjob is not None: |
270 | 307 | # any time, we need to see if we think there was a build running | 307 | # After a successful dispatch we can reset the |
271 | 308 | # on it before it was marked unavailable. In this case we reset | 308 | # failure_count. |
272 | 309 | # the build thusly forcing it to get re-dispatched to another | 309 | self.builder.resetFailureCount() |
257 | 310 | # builder. | ||
258 | 311 | available = yield self.interactor.isAvailable() | ||
259 | 312 | if not available: | ||
260 | 313 | job = self.builder.currentjob | ||
261 | 314 | if job is not None and not self.builder.builderok: | ||
262 | 315 | self.logger.info( | ||
263 | 316 | "%s was made unavailable, resetting attached " | ||
264 | 317 | "job" % self.builder.name) | ||
265 | 318 | job.reset() | ||
273 | 319 | transaction.commit() | 310 | transaction.commit() |
274 | 320 | return | ||
275 | 321 | |||
276 | 322 | # See if there is a job we can dispatch to the builder slave. | ||
277 | 323 | yield self.interactor.findAndStartJob() | ||
278 | 324 | if self.builder.currentjob is not None: | ||
279 | 325 | # After a successful dispatch we can reset the | ||
280 | 326 | # failure_count. | ||
281 | 327 | self.builder.resetFailureCount() | ||
282 | 328 | transaction.commit() | ||
283 | 329 | 311 | ||
284 | 330 | 312 | ||
285 | 331 | class NewBuildersScanner: | 313 | class NewBuildersScanner: |
286 | 332 | 314 | ||
287 | === modified file 'lib/lp/buildmaster/tests/test_interactor.py' | |||
288 | --- lib/lp/buildmaster/tests/test_interactor.py 2013-09-02 12:45:50 +0000 | |||
289 | +++ lib/lp/buildmaster/tests/test_interactor.py 2013-09-05 22:24:08 +0000 | |||
290 | @@ -33,7 +33,6 @@ | |||
291 | 33 | ) | 33 | ) |
292 | 34 | from lp.buildmaster.tests.mock_slaves import ( | 34 | from lp.buildmaster.tests.mock_slaves import ( |
293 | 35 | AbortingSlave, | 35 | AbortingSlave, |
294 | 36 | BrokenSlave, | ||
295 | 37 | BuildingSlave, | 36 | BuildingSlave, |
296 | 38 | DeadProxy, | 37 | DeadProxy, |
297 | 39 | LostBuildingBrokenSlave, | 38 | LostBuildingBrokenSlave, |
298 | @@ -80,28 +79,33 @@ | |||
299 | 80 | self.assertRaises( | 79 | self.assertRaises( |
300 | 81 | AssertionError, interactor.extractBuildStatus, slave_status) | 80 | AssertionError, interactor.extractBuildStatus, slave_status) |
301 | 82 | 81 | ||
303 | 83 | def test_verifySlaveBuildCookie_good(self): | 82 | def test_verifySlaveBuildCookie_building_match(self): |
304 | 84 | interactor = BuilderInteractor(MockBuilder(), None, TrivialBehavior()) | 83 | interactor = BuilderInteractor(MockBuilder(), None, TrivialBehavior()) |
305 | 85 | interactor.verifySlaveBuildCookie('trivial') | 84 | interactor.verifySlaveBuildCookie('trivial') |
306 | 86 | 85 | ||
308 | 87 | def test_verifySlaveBuildCookie_bad(self): | 86 | def test_verifySlaveBuildCookie_building_mismatch(self): |
309 | 88 | interactor = BuilderInteractor(MockBuilder(), None, TrivialBehavior()) | 87 | interactor = BuilderInteractor(MockBuilder(), None, TrivialBehavior()) |
310 | 89 | self.assertRaises( | 88 | self.assertRaises( |
311 | 90 | CorruptBuildCookie, | 89 | CorruptBuildCookie, |
312 | 91 | interactor.verifySlaveBuildCookie, 'difficult') | 90 | interactor.verifySlaveBuildCookie, 'difficult') |
313 | 92 | 91 | ||
315 | 93 | def test_verifySlaveBuildCookie_idle(self): | 92 | def test_verifySlaveBuildCookie_idle_match(self): |
316 | 93 | interactor = BuilderInteractor(MockBuilder()) | ||
317 | 94 | self.assertIs(None, interactor._current_build_behavior) | ||
318 | 95 | interactor.verifySlaveBuildCookie(None) | ||
319 | 96 | |||
320 | 97 | def test_verifySlaveBuildCookie_idle_mismatch(self): | ||
321 | 94 | interactor = BuilderInteractor(MockBuilder()) | 98 | interactor = BuilderInteractor(MockBuilder()) |
322 | 95 | self.assertIs(None, interactor._current_build_behavior) | 99 | self.assertIs(None, interactor._current_build_behavior) |
323 | 96 | self.assertRaises( | 100 | self.assertRaises( |
324 | 97 | CorruptBuildCookie, interactor.verifySlaveBuildCookie, 'foo') | 101 | CorruptBuildCookie, interactor.verifySlaveBuildCookie, 'foo') |
325 | 98 | 102 | ||
327 | 99 | def test_updateStatus_aborts_lost_and_broken_slave(self): | 103 | def test_rescueIfLost_aborts_lost_and_broken_slave(self): |
328 | 100 | # A slave that's 'lost' should be aborted; when the slave is | 104 | # A slave that's 'lost' should be aborted; when the slave is |
329 | 101 | # broken then abort() should also throw a fault. | 105 | # broken then abort() should also throw a fault. |
330 | 102 | slave = LostBuildingBrokenSlave() | 106 | slave = LostBuildingBrokenSlave() |
331 | 103 | interactor = BuilderInteractor(MockBuilder(), slave, TrivialBehavior()) | 107 | interactor = BuilderInteractor(MockBuilder(), slave, TrivialBehavior()) |
333 | 104 | d = interactor.updateStatus(DevNullLogger()) | 108 | d = interactor.rescueIfLost(DevNullLogger()) |
334 | 105 | 109 | ||
335 | 106 | def check_slave_status(failure): | 110 | def check_slave_status(failure): |
336 | 107 | self.assertIn('abort', slave.call_log) | 111 | self.assertIn('abort', slave.call_log) |
337 | @@ -170,31 +174,37 @@ | |||
338 | 170 | interactor = BuilderInteractor(builder) | 174 | interactor = BuilderInteractor(builder) |
339 | 171 | self.assertEqual(5, interactor.slave.timeout) | 175 | self.assertEqual(5, interactor.slave.timeout) |
340 | 172 | 176 | ||
341 | 177 | @defer.inlineCallbacks | ||
342 | 178 | def test_recover_idle_slave(self): | ||
343 | 179 | # An idle slave is not rescued, even if it's not meant to be | ||
344 | 180 | # idle. SlaveScanner.scan() will clean up the DB side, because | ||
345 | 181 | # we still report that it's lost. | ||
346 | 182 | slave = OkSlave() | ||
347 | 183 | lost = yield BuilderInteractor( | ||
348 | 184 | MockBuilder(), slave, TrivialBehavior()).rescueIfLost() | ||
349 | 185 | self.assertTrue(lost) | ||
350 | 186 | self.assertEqual([], slave.call_log) | ||
351 | 187 | |||
352 | 188 | @defer.inlineCallbacks | ||
353 | 173 | def test_recover_ok_slave(self): | 189 | def test_recover_ok_slave(self): |
355 | 174 | # An idle slave is not rescued. | 190 | # An idle slave that's meant to be idle is not rescued. |
356 | 175 | slave = OkSlave() | 191 | slave = OkSlave() |
366 | 176 | d = BuilderInteractor( | 192 | lost = yield BuilderInteractor( |
367 | 177 | MockBuilder(), slave, TrivialBehavior()).rescueIfLost() | 193 | MockBuilder(), slave, None).rescueIfLost() |
368 | 178 | 194 | self.assertFalse(lost) | |
369 | 179 | def check_slave_calls(ignored): | 195 | self.assertEqual([], slave.call_log) |
370 | 180 | self.assertNotIn('abort', slave.call_log) | 196 | |
371 | 181 | self.assertNotIn('clean', slave.call_log) | 197 | @defer.inlineCallbacks |
363 | 182 | |||
364 | 183 | return d.addCallback(check_slave_calls) | ||
365 | 184 | |||
372 | 185 | def test_recover_waiting_slave_with_good_id(self): | 198 | def test_recover_waiting_slave_with_good_id(self): |
373 | 186 | # rescueIfLost does not attempt to abort or clean a builder that is | 199 | # rescueIfLost does not attempt to abort or clean a builder that is |
374 | 187 | # WAITING. | 200 | # WAITING. |
375 | 188 | waiting_slave = WaitingSlave(build_id='trivial') | 201 | waiting_slave = WaitingSlave(build_id='trivial') |
377 | 189 | d = BuilderInteractor( | 202 | lost = yield BuilderInteractor( |
378 | 190 | MockBuilder(), waiting_slave, TrivialBehavior()).rescueIfLost() | 203 | MockBuilder(), waiting_slave, TrivialBehavior()).rescueIfLost() |
386 | 191 | 204 | self.assertFalse(lost) | |
387 | 192 | def check_slave_calls(ignored): | 205 | self.assertEqual(['status'], waiting_slave.call_log) |
388 | 193 | self.assertNotIn('abort', waiting_slave.call_log) | 206 | |
389 | 194 | self.assertNotIn('clean', waiting_slave.call_log) | 207 | @defer.inlineCallbacks |
383 | 195 | |||
384 | 196 | return d.addCallback(check_slave_calls) | ||
385 | 197 | |||
390 | 198 | def test_recover_waiting_slave_with_bad_id(self): | 208 | def test_recover_waiting_slave_with_bad_id(self): |
391 | 199 | # If a slave is WAITING with a build for us to get, and the build | 209 | # If a slave is WAITING with a build for us to get, and the build |
392 | 200 | # cookie cannot be verified, which means we don't recognize the build, | 210 | # cookie cannot be verified, which means we don't recognize the build, |
393 | @@ -202,40 +212,30 @@ | |||
394 | 202 | # builder is reset for a new build, and the corrupt build is | 212 | # builder is reset for a new build, and the corrupt build is |
395 | 203 | # discarded. | 213 | # discarded. |
396 | 204 | waiting_slave = WaitingSlave(build_id='non-trivial') | 214 | waiting_slave = WaitingSlave(build_id='non-trivial') |
398 | 205 | d = BuilderInteractor( | 215 | lost = yield BuilderInteractor( |
399 | 206 | MockBuilder(), waiting_slave, TrivialBehavior()).rescueIfLost() | 216 | MockBuilder(), waiting_slave, TrivialBehavior()).rescueIfLost() |
407 | 207 | 217 | self.assertTrue(lost) | |
408 | 208 | def check_slave_calls(ignored): | 218 | self.assertEqual(['status', 'clean'], waiting_slave.call_log) |
409 | 209 | self.assertNotIn('abort', waiting_slave.call_log) | 219 | |
410 | 210 | self.assertIn('clean', waiting_slave.call_log) | 220 | @defer.inlineCallbacks |
404 | 211 | |||
405 | 212 | return d.addCallback(check_slave_calls) | ||
406 | 213 | |||
411 | 214 | def test_recover_building_slave_with_good_id(self): | 221 | def test_recover_building_slave_with_good_id(self): |
412 | 215 | # rescueIfLost does not attempt to abort or clean a builder that is | 222 | # rescueIfLost does not attempt to abort or clean a builder that is |
413 | 216 | # BUILDING. | 223 | # BUILDING. |
414 | 217 | building_slave = BuildingSlave(build_id='trivial') | 224 | building_slave = BuildingSlave(build_id='trivial') |
416 | 218 | d = BuilderInteractor( | 225 | lost = yield BuilderInteractor( |
417 | 219 | MockBuilder(), building_slave, TrivialBehavior()).rescueIfLost() | 226 | MockBuilder(), building_slave, TrivialBehavior()).rescueIfLost() |
425 | 220 | 227 | self.assertFalse(lost) | |
426 | 221 | def check_slave_calls(ignored): | 228 | self.assertEqual(['status'], building_slave.call_log) |
427 | 222 | self.assertNotIn('abort', building_slave.call_log) | 229 | |
428 | 223 | self.assertNotIn('clean', building_slave.call_log) | 230 | @defer.inlineCallbacks |
422 | 224 | |||
423 | 225 | return d.addCallback(check_slave_calls) | ||
424 | 226 | |||
429 | 227 | def test_recover_building_slave_with_bad_id(self): | 231 | def test_recover_building_slave_with_bad_id(self): |
430 | 228 | # If a slave is BUILDING with a build id we don't recognize, then we | 232 | # If a slave is BUILDING with a build id we don't recognize, then we |
431 | 229 | # abort the build, thus stopping it in its tracks. | 233 | # abort the build, thus stopping it in its tracks. |
432 | 230 | building_slave = BuildingSlave(build_id='non-trivial') | 234 | building_slave = BuildingSlave(build_id='non-trivial') |
434 | 231 | d = BuilderInteractor( | 235 | lost = yield BuilderInteractor( |
435 | 232 | MockBuilder(), building_slave, TrivialBehavior()).rescueIfLost() | 236 | MockBuilder(), building_slave, TrivialBehavior()).rescueIfLost() |
442 | 233 | 237 | self.assertTrue(lost) | |
443 | 234 | def check_slave_calls(ignored): | 238 | self.assertEqual(['status', 'abort'], building_slave.call_log) |
438 | 235 | self.assertIn('abort', building_slave.call_log) | ||
439 | 236 | self.assertNotIn('clean', building_slave.call_log) | ||
440 | 237 | |||
441 | 238 | return d.addCallback(check_slave_calls) | ||
444 | 239 | 239 | ||
445 | 240 | 240 | ||
446 | 241 | class TestBuilderInteractorSlaveStatus(TestCase): | 241 | class TestBuilderInteractorSlaveStatus(TestCase): |
447 | @@ -285,21 +285,6 @@ | |||
448 | 285 | self.assertStatus( | 285 | self.assertStatus( |
449 | 286 | AbortingSlave(), builder_status='BuilderStatus.ABORTING') | 286 | AbortingSlave(), builder_status='BuilderStatus.ABORTING') |
450 | 287 | 287 | ||
451 | 288 | def test_isAvailable_with_not_builderok(self): | ||
452 | 289 | # isAvailable() is a wrapper around BuilderSlave.status() | ||
453 | 290 | builder = MockBuilder() | ||
454 | 291 | builder.builderok = False | ||
455 | 292 | d = BuilderInteractor(builder).isAvailable() | ||
456 | 293 | return d.addCallback(self.assertFalse) | ||
457 | 294 | |||
458 | 295 | def test_isAvailable_with_slave_fault(self): | ||
459 | 296 | d = BuilderInteractor(MockBuilder(), BrokenSlave()).isAvailable() | ||
460 | 297 | return d.addCallback(self.assertFalse) | ||
461 | 298 | |||
462 | 299 | def test_isAvailable_with_slave_idle(self): | ||
463 | 300 | d = BuilderInteractor(MockBuilder(), OkSlave()).isAvailable() | ||
464 | 301 | return d.addCallback(self.assertTrue) | ||
465 | 302 | |||
466 | 303 | 288 | ||
467 | 304 | class TestBuilderInteractorDB(TestCaseWithFactory): | 289 | class TestBuilderInteractorDB(TestCaseWithFactory): |
468 | 305 | """BuilderInteractor tests that need a DB.""" | 290 | """BuilderInteractor tests that need a DB.""" |
469 | 306 | 291 | ||
470 | === modified file 'lib/lp/buildmaster/tests/test_manager.py' | |||
471 | --- lib/lp/buildmaster/tests/test_manager.py 2013-09-03 03:41:02 +0000 | |||
472 | +++ lib/lp/buildmaster/tests/test_manager.py 2013-09-05 22:24:08 +0000 | |||
473 | @@ -8,7 +8,6 @@ | |||
474 | 8 | import time | 8 | import time |
475 | 9 | import xmlrpclib | 9 | import xmlrpclib |
476 | 10 | 10 | ||
477 | 11 | from lpbuildd.tests import BuilddSlaveTestSetup | ||
478 | 12 | from testtools.deferredruntest import ( | 11 | from testtools.deferredruntest import ( |
479 | 13 | assert_fails_with, | 12 | assert_fails_with, |
480 | 14 | AsynchronousDeferredRunTest, | 13 | AsynchronousDeferredRunTest, |
481 | @@ -47,7 +46,9 @@ | |||
482 | 47 | BuildingSlave, | 46 | BuildingSlave, |
483 | 48 | LostBuildingBrokenSlave, | 47 | LostBuildingBrokenSlave, |
484 | 49 | make_publisher, | 48 | make_publisher, |
485 | 49 | MockBuilder, | ||
486 | 50 | OkSlave, | 50 | OkSlave, |
487 | 51 | TrivialBehavior, | ||
488 | 51 | ) | 52 | ) |
489 | 52 | from lp.registry.interfaces.distribution import IDistributionSet | 53 | from lp.registry.interfaces.distribution import IDistributionSet |
490 | 53 | from lp.services.config import config | 54 | from lp.services.config import config |
491 | @@ -178,28 +179,30 @@ | |||
492 | 178 | build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job) | 179 | build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job) |
493 | 179 | self.assertEqual(build.status, BuildStatus.NEEDSBUILD) | 180 | self.assertEqual(build.status, BuildStatus.NEEDSBUILD) |
494 | 180 | 181 | ||
495 | 182 | @defer.inlineCallbacks | ||
496 | 181 | def testScanRescuesJobFromBrokenBuilder(self): | 183 | def testScanRescuesJobFromBrokenBuilder(self): |
497 | 182 | # The job assigned to a broken builder is rescued. | 184 | # The job assigned to a broken builder is rescued. |
498 | 183 | self.useFixture(BuilddSlaveTestSetup()) | ||
499 | 184 | |||
500 | 185 | # Sampledata builder is enabled and is assigned to an active job. | 185 | # Sampledata builder is enabled and is assigned to an active job. |
501 | 186 | builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME] | 186 | builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME] |
502 | 187 | self.patch( | ||
503 | 188 | BuilderSlave, 'makeBuilderSlave', | ||
504 | 189 | FakeMethod(BuildingSlave(build_id='PACKAGEBUILD-8'))) | ||
505 | 187 | self.assertTrue(builder.builderok) | 190 | self.assertTrue(builder.builderok) |
506 | 188 | job = builder.currentjob | 191 | job = builder.currentjob |
507 | 189 | self.assertBuildingJob(job, builder) | 192 | self.assertBuildingJob(job, builder) |
508 | 190 | 193 | ||
509 | 194 | scanner = self._getScanner() | ||
510 | 195 | yield scanner.scan() | ||
511 | 196 | self.assertIsNot(None, builder.currentjob) | ||
512 | 197 | |||
513 | 191 | # Disable the sampledata builder | 198 | # Disable the sampledata builder |
514 | 192 | login('foo.bar@canonical.com') | ||
515 | 193 | builder.builderok = False | 199 | builder.builderok = False |
516 | 194 | transaction.commit() | 200 | transaction.commit() |
517 | 195 | login(ANONYMOUS) | ||
518 | 196 | 201 | ||
519 | 197 | # Run 'scan' and check its result. | 202 | # Run 'scan' and check its result. |
525 | 198 | switch_dbuser(config.builddmaster.dbuser) | 203 | slave = yield scanner.scan() |
526 | 199 | scanner = self._getScanner() | 204 | self.assertIs(None, builder.currentjob) |
527 | 200 | d = defer.maybeDeferred(scanner.scan) | 205 | self._checkJobRescued(slave, builder, job) |
523 | 201 | d.addCallback(self._checkJobRescued, builder, job) | ||
524 | 202 | return d | ||
528 | 203 | 206 | ||
529 | 204 | def _checkJobUpdated(self, slave, builder, job): | 207 | def _checkJobUpdated(self, slave, builder, job): |
530 | 205 | """`SlaveScanner.scan` updates legitimate jobs. | 208 | """`SlaveScanner.scan` updates legitimate jobs. |
531 | @@ -223,7 +226,7 @@ | |||
532 | 223 | login('foo.bar@canonical.com') | 226 | login('foo.bar@canonical.com') |
533 | 224 | builder.builderok = True | 227 | builder.builderok = True |
534 | 225 | self.patch(BuilderSlave, 'makeBuilderSlave', | 228 | self.patch(BuilderSlave, 'makeBuilderSlave', |
536 | 226 | FakeMethod(BuildingSlave(build_id='8-1'))) | 229 | FakeMethod(BuildingSlave(build_id='PACKAGEBUILD-8'))) |
537 | 227 | transaction.commit() | 230 | transaction.commit() |
538 | 228 | login(ANONYMOUS) | 231 | login(ANONYMOUS) |
539 | 229 | 232 | ||
540 | @@ -437,6 +440,75 @@ | |||
541 | 437 | self.assertEqual(BuildStatus.CANCELLED, build.status) | 440 | self.assertEqual(BuildStatus.CANCELLED, build.status) |
542 | 438 | 441 | ||
543 | 439 | 442 | ||
544 | 443 | class FakeBuildQueue: | ||
545 | 444 | |||
546 | 445 | def __init__(self): | ||
547 | 446 | self.id = 1 | ||
548 | 447 | self.reset = FakeMethod() | ||
549 | 448 | |||
550 | 449 | |||
551 | 450 | class TestSlaveScannerWithoutDB(TestCase): | ||
552 | 451 | |||
553 | 452 | run_tests_with = AsynchronousDeferredRunTest | ||
554 | 453 | |||
555 | 454 | @defer.inlineCallbacks | ||
556 | 455 | def test_scan_with_job(self): | ||
557 | 456 | # SlaveScanner.scan calls updateBuild() when a job is building. | ||
558 | 457 | interactor = BuilderInteractor( | ||
559 | 458 | MockBuilder(), BuildingSlave('trivial'), TrivialBehavior()) | ||
560 | 459 | scanner = SlaveScanner('mock', BufferLogger()) | ||
561 | 460 | |||
562 | 461 | # Instrument updateBuild and currentjob.reset | ||
563 | 462 | interactor.updateBuild = FakeMethod() | ||
564 | 463 | interactor.builder.currentjob = FakeBuildQueue() | ||
565 | 464 | # XXX: checkCancellation needs more than a FakeBuildQueue. | ||
566 | 465 | scanner.checkCancellation = FakeMethod(defer.succeed(False)) | ||
567 | 466 | |||
568 | 467 | yield scanner.scan(builder=interactor.builder, interactor=interactor) | ||
569 | 468 | self.assertEqual(['status'], interactor.slave.call_log) | ||
570 | 469 | self.assertEqual(1, interactor.updateBuild.call_count) | ||
571 | 470 | self.assertEqual(0, interactor.builder.currentjob.reset.call_count) | ||
572 | 471 | |||
573 | 472 | @defer.inlineCallbacks | ||
574 | 473 | def test_scan_aborts_lost_slave_with_job(self): | ||
575 | 474 | # SlaveScanner.scan uses BuilderInteractor.rescueIfLost to abort | ||
576 | 475 | # slaves that don't have the expected job. | ||
577 | 476 | interactor = BuilderInteractor( | ||
578 | 477 | MockBuilder(), BuildingSlave('nontrivial'), TrivialBehavior()) | ||
579 | 478 | scanner = SlaveScanner('mock', BufferLogger()) | ||
580 | 479 | |||
581 | 480 | # Instrument updateBuild and currentjob.reset | ||
582 | 481 | interactor.updateBuild = FakeMethod() | ||
583 | 482 | interactor.builder.currentjob = FakeBuildQueue() | ||
584 | 483 | # XXX: checkCancellation needs more than a FakeBuildQueue. | ||
585 | 484 | scanner.checkCancellation = FakeMethod(defer.succeed(False)) | ||
586 | 485 | |||
587 | 486 | # A single scan will call status(), notice that the slave is | ||
588 | 487 | # lost, abort() the slave, then reset() the job without calling | ||
589 | 488 | # updateBuild(). | ||
590 | 489 | yield scanner.scan(builder=interactor.builder, interactor=interactor) | ||
591 | 490 | self.assertEqual(['status', 'abort'], interactor.slave.call_log) | ||
592 | 491 | self.assertEqual(0, interactor.updateBuild.call_count) | ||
593 | 492 | self.assertEqual(1, interactor.builder.currentjob.reset.call_count) | ||
594 | 493 | |||
595 | 494 | @defer.inlineCallbacks | ||
596 | 495 | def test_scan_aborts_lost_slave_when_idle(self): | ||
597 | 496 | # SlaveScanner.scan uses BuilderInteractor.rescueIfLost to abort | ||
598 | 497 | # slaves that aren't meant to have a job. | ||
599 | 498 | interactor = BuilderInteractor(MockBuilder(), BuildingSlave(), None) | ||
600 | 499 | scanner = SlaveScanner('mock', BufferLogger()) | ||
601 | 500 | |||
602 | 501 | # Instrument updateBuild. | ||
603 | 502 | interactor.updateBuild = FakeMethod() | ||
604 | 503 | |||
605 | 504 | # A single scan will call status(), notice that the slave is | ||
606 | 505 | # lost, abort() the slave, then reset() the job without calling | ||
607 | 506 | # updateBuild(). | ||
608 | 507 | yield scanner.scan(builder=interactor.builder, interactor=interactor) | ||
609 | 508 | self.assertEqual(['status', 'abort'], interactor.slave.call_log) | ||
610 | 509 | self.assertEqual(0, interactor.updateBuild.call_count) | ||
611 | 510 | |||
612 | 511 | |||
613 | 440 | class TestCancellationChecking(TestCaseWithFactory): | 512 | class TestCancellationChecking(TestCaseWithFactory): |
614 | 441 | """Unit tests for the checkCancellation method.""" | 513 | """Unit tests for the checkCancellation method.""" |
615 | 442 | 514 |