Merge lp:~julian-edwards/launchpad/cancel-build-bug-173018-part2 into lp:launchpad

Proposed by Julian Edwards
Status: Merged
Approved by: Brad Crittenden
Approved revision: no longer in the source branch.
Merged at revision: 14220
Proposed branch: lp:~julian-edwards/launchpad/cancel-build-bug-173018-part2
Merge into: lp:launchpad
Prerequisite: lp:~julian-edwards/launchpad/cancel-build-bug-173018
Diff against target: 305 lines (+163/-20)
4 files modified
lib/lp/app/browser/tales.py (+2/-0)
lib/lp/buildmaster/manager.py (+45/-14)
lib/lp/buildmaster/model/buildfarmjob.py (+4/-1)
lib/lp/buildmaster/tests/test_manager.py (+112/-5)
To merge this branch: bzr merge lp:~julian-edwards/launchpad/cancel-build-bug-173018-part2
Reviewer Review Type Date Requested Status
Brad Crittenden (community) code Approve
Review via email: mp+80662@code.launchpad.net

Commit message

[r=bac][bug=173018] Alter the buildd-manager to notice that a build state is CANCELLING, and rip it forcefully off the builder by resetting the virtual machine.

Description of the change

Second branch in a pipeline of changes to add mid-build cancellation on the build farm. This one fixes the buildd-manager to notice that build states are CANCELLING just before it tries to check an active build and rips it forcefully off the builder by resetting the virtual machine. (hence it only works on virtual builders)

Most of the branch is tests, the actual fix is easy.

To post a comment you must log in.
Revision history for this message
Brad Crittenden (bac) wrote :

Hi Julian,

Nice branch and tests.

I notice in your tests there is an inconsistency whether you 'return d' or not. It is unclear to me that it is necessary. Could you take a pass and see if it is required?

Also, in these tests the lambda can be eliminated:

d.addCallback(lambda result: self.assertFalse(result))

can be

d.addCallback(self.assertFalse)

Is there a reason to prefer the wordier pattern?

In general the tests are non-trivial but well structured and easy to follow!

review: Approve (code)
Revision history for this message
Brad Crittenden (bac) wrote :

I forgot to ask that you clean up some lint.

Revision history for this message
Julian Edwards (julian-edwards) wrote :

On Friday 28 October 2011 17:54:24 you wrote:
> Review: Approve code
>
> Hi Julian,
>
> Nice branch and tests.

Thank you, and thanks for the review.

> I notice in your tests there is an inconsistency whether you 'return d' or
> not. It is unclear to me that it is necessary. Could you take a pass and
> see if it is required?

Argh, well spotted. I do indeed need to return d. It's not strictly
necessary but it helps the test runner spot unfired Deferreds.

> Also, in these tests the lambda can be eliminated:
>
> d.addCallback(lambda result: self.assertFalse(result))
>
> can be
>
> d.addCallback(self.assertFalse)
>
> Is there a reason to prefer the wordier pattern?

It's a left-over from some simplifications - I guess I can simplify it
further!

> In general the tests are non-trivial but well structured and easy to follow!

Thank you - I tried. Twisted makes it somewhat harder to follow than normal.
And the buildd-manager is not exactly the easiest part of LP ...

Cheers!

(PS yes I'll fix lint)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/app/browser/tales.py'
2--- lib/lp/app/browser/tales.py 2011-10-20 00:53:01 +0000
3+++ lib/lp/app/browser/tales.py 2011-10-31 10:44:35 +0000
4@@ -1107,6 +1107,8 @@
5 BuildStatus.BUILDING: {'src': "/@@/processing"},
6 BuildStatus.UPLOADING: {'src': "/@@/processing"},
7 BuildStatus.FAILEDTOUPLOAD: {'src': "/@@/build-failedtoupload"},
8+ BuildStatus.CANCELLING: {'src': "/@@/processing"},
9+ BuildStatus.CANCELLED: {'src': "/@@/build-failed"},
10 }
11
12 alt = '[%s]' % self._context.status.name
13
14=== modified file 'lib/lp/buildmaster/manager.py'
15--- lib/lp/buildmaster/manager.py 2011-10-31 10:44:35 +0000
16+++ lib/lp/buildmaster/manager.py 2011-10-31 10:44:35 +0000
17@@ -6,11 +6,8 @@
18 __metaclass__ = type
19
20 __all__ = [
21- 'BaseDispatchResult',
22 'BuilddManager',
23 'BUILDD_MANAGER_LOG_NAME',
24- 'FailDispatchResult',
25- 'ResetDispatchResult',
26 ]
27
28 import logging
29@@ -172,7 +169,7 @@
30 "job '%s' failure count: %s" % (
31 self.builder_name,
32 builder.failure_count,
33- build_farm_job.title,
34+ build_farm_job.title,
35 build_farm_job.failure_count))
36 else:
37 self.logger.info(
38@@ -187,6 +184,33 @@
39 exc_info=True)
40 transaction.abort()
41
42+ def checkCancellation(self, builder):
43+ """See if there is a pending cancellation request.
44+
45+ If the current build is in status CANCELLING then terminate it
46+ immediately.
47+
48+ :return: A deferred whose value is True if we cancelled the build.
49+ """
50+ if not builder.virtualized:
51+ return defer.succeed(False)
52+ buildqueue = self.builder.getBuildQueue()
53+ if not buildqueue:
54+ return defer.succeed(False)
55+ build = buildqueue.specific_job.build
56+ if build.status != BuildStatus.CANCELLING:
57+ return defer.succeed(False)
58+
59+ def resume_done(ignored):
60+ return defer.succeed(True)
61+
62+ self.logger.info("Cancelling build '%s'" % build.title)
63+ buildqueue.cancel()
64+ transaction.commit()
65+ d = builder.resumeSlaveHost()
66+ d.addCallback(resume_done)
67+ return d
68+
69 def scan(self):
70 """Probe the builder and update/dispatch/collect as appropriate.
71
72@@ -215,14 +239,6 @@
73
74 self.builder = get_builder(self.builder_name)
75
76- if self.builder.builderok:
77- # TODO: check build status for CANCELLING and virtual bulder
78- # a) set to CANCELLED,
79- # b) reset builder
80- d = self.builder.updateStatus(self.logger)
81- else:
82- d = defer.succeed(None)
83-
84 def status_updated(ignored):
85 # Commit the changes done while possibly rescuing jobs, to
86 # avoid holding table locks.
87@@ -271,6 +287,7 @@
88 # See if there is a job we can dispatch to the builder slave.
89
90 d = self.builder.findAndStartJob()
91+
92 def job_started(candidate):
93 if self.builder.currentjob is not None:
94 # After a successful dispatch we can reset the
95@@ -282,8 +299,22 @@
96 return None
97 return d.addCallback(job_started)
98
99- d.addCallback(status_updated)
100- d.addCallback(build_updated)
101+ def cancellation_checked(cancelled):
102+ if cancelled:
103+ return defer.succeed(None)
104+ d = self.builder.updateStatus(self.logger)
105+ d.addCallback(status_updated)
106+ d.addCallback(build_updated)
107+ return d
108+
109+ if self.builder.builderok:
110+ d = self.checkCancellation(self.builder)
111+ d.addCallback(cancellation_checked)
112+ else:
113+ d = defer.succeed(None)
114+ d.addCallback(status_updated)
115+ d.addCallback(build_updated)
116+
117 return d
118
119
120
121=== modified file 'lib/lp/buildmaster/model/buildfarmjob.py'
122--- lib/lp/buildmaster/model/buildfarmjob.py 2011-10-31 10:44:35 +0000
123+++ lib/lp/buildmaster/model/buildfarmjob.py 2011-10-31 10:44:35 +0000
124@@ -32,7 +32,6 @@
125 from storm.store import Store
126 from zope.component import (
127 ComponentLookupError,
128- getAdapter,
129 getUtility,
130 )
131 from zope.interface import (
132@@ -113,6 +112,10 @@
133 """See `IBuildFarmJobOld`."""
134 pass
135
136+ def jobCancel(self):
137+ """See `IBuildFarmJobOld`."""
138+ pass
139+
140 @staticmethod
141 def addCandidateSelectionCriteria(processor, virtualized):
142 """See `IBuildFarmJobOld`."""
143
144=== modified file 'lib/lp/buildmaster/tests/test_manager.py'
145--- lib/lp/buildmaster/tests/test_manager.py 2011-10-26 12:31:14 +0000
146+++ lib/lp/buildmaster/tests/test_manager.py 2011-10-31 10:44:35 +0000
147@@ -82,8 +82,6 @@
148 'bob' builder.
149 """
150 super(TestSlaveScannerScan, self).setUp()
151- self.slave = self.useFixture(BuilddSlaveTestSetup())
152-
153 # Creating the required chroots needed for dispatching.
154 test_publisher = make_publisher()
155 ubuntu = getUtility(IDistributionSet).getByName('ubuntu')
156@@ -212,6 +210,7 @@
157
158 def testScanRescuesJobFromBrokenBuilder(self):
159 # The job assigned to a broken builder is rescued.
160+ self.useFixture(BuilddSlaveTestSetup())
161
162 # Sampledata builder is enabled and is assigned to an active job.
163 builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
164@@ -420,6 +419,114 @@
165
166 return d.addCallback(check)
167
168+ def test_cancelling_a_build(self):
169+ # When scanning an in-progress build, if its state is CANCELLING
170+ # then the build should be stopped and moved to the CANCELLED state.
171+
172+ # Set up a building slave with a fake resume method so we can see
173+ # if it got called later.
174+ slave = BuildingSlave(build_id="8-1")
175+ call_counter = FakeMethod()
176+
177+ def fake_resume():
178+ call_counter()
179+ return defer.succeed((None, None, 0))
180+ slave.resume = fake_resume
181+
182+ # Set the sample data builder building with the slave from above.
183+ builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
184+ login('foo.bar@canonical.com')
185+ builder.builderok = True
186+ # For now, we can only cancel virtual builds.
187+ builder.virtualized = True
188+ builder.vm_host = "fake_vm_host"
189+ builder.setSlaveForTesting(slave)
190+ transaction.commit()
191+ login(ANONYMOUS)
192+ buildqueue = builder.currentjob
193+ self.assertBuildingJob(buildqueue, builder)
194+
195+ # Now set the build to CANCELLING.
196+ build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(buildqueue)
197+ build.status = BuildStatus.CANCELLING
198+
199+ # Run 'scan' and check its results.
200+ self.layer.switchDbUser(config.builddmaster.dbuser)
201+ scanner = self._getScanner()
202+ d = scanner.scan()
203+
204+ # The build state should be cancelled and we should have also
205+ # called the resume() method on the slave that resets the virtual
206+ # machine.
207+ def check_cancelled(ignore, builder, buildqueue):
208+ self.assertEqual(1, call_counter.call_count)
209+ self.assertEqual(BuildStatus.CANCELLED, build.status)
210+
211+ d.addCallback(check_cancelled, builder, buildqueue)
212+ return d
213+
214+
215+class TestCancellationChecking(TestCaseWithFactory):
216+ """Unit tests for the checkCancellation method."""
217+
218+ layer = ZopelessDatabaseLayer
219+ run_tests_with = AsynchronousDeferredRunTest.make_factory(timeout=20)
220+
221+ def setUp(self):
222+ super(TestCancellationChecking, self).setUp()
223+ builder_name = BOB_THE_BUILDER_NAME
224+ self.builder = getUtility(IBuilderSet)[builder_name]
225+ self.builder.virtualized = True
226+ self.scanner = SlaveScanner(builder_name, BufferLogger())
227+ self.scanner.builder = self.builder
228+ self.scanner.logger.name = 'slave-scanner'
229+
230+ def test_ignores_nonvirtual(self):
231+ # If the builder is nonvirtual make sure we return False.
232+ self.builder.virtualized = False
233+ d = self.scanner.checkCancellation(self.builder)
234+ return d.addCallback(self.assertFalse)
235+
236+ def test_ignores_no_buildqueue(self):
237+ # If the builder has no buildqueue associated,
238+ # make sure we return False.
239+ buildqueue = self.builder.currentjob
240+ buildqueue.reset()
241+ d = self.scanner.checkCancellation(self.builder)
242+ return d.addCallback(self.assertFalse)
243+
244+ def test_ignores_build_not_cancelling(self):
245+ # If the active build is not in a CANCELLING state, ignore it.
246+ buildqueue = self.builder.currentjob
247+ build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(buildqueue)
248+ build.status = BuildStatus.BUILDING
249+ d = self.scanner.checkCancellation(self.builder)
250+ return d.addCallback(self.assertFalse)
251+
252+ def test_cancelling_build_is_cancelled(self):
253+ # If a build is CANCELLING, make sure True is returned and the
254+ # slave was resumed.
255+ call_counter = FakeMethod()
256+
257+ def fake_resume():
258+ call_counter()
259+ return defer.succeed((None, None, 0))
260+ slave = OkSlave()
261+ slave.resume = fake_resume
262+ self.builder.vm_host = "fake_vm_host"
263+ self.builder.setSlaveForTesting(slave)
264+ buildqueue = self.builder.currentjob
265+ build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(buildqueue)
266+ build.status = BuildStatus.CANCELLING
267+
268+ def check(result):
269+ self.assertEqual(1, call_counter.call_count)
270+ self.assertTrue(result)
271+ self.assertEqual(BuildStatus.CANCELLED, build.status)
272+
273+ d = self.scanner.checkCancellation(self.builder)
274+ return d.addCallback(check)
275+
276
277 class TestBuilddManager(TestCase):
278
279@@ -602,7 +709,7 @@
280 increase if the process is logging to this file.
281 """
282 last_size = None
283- for poll in range(poll_repeat+1):
284+ for poll in range(poll_repeat + 1):
285 try:
286 statinfo = os.stat(filepath)
287 if last_size is None:
288@@ -646,7 +753,7 @@
289 self.assertTrue(is_file_growing(logfilepath))
290 # After rotating the log, the process keeps using the old file, no
291 # new file is created.
292- rotated_logfilepath = logfilepath+'.1'
293+ rotated_logfilepath = logfilepath + '.1'
294 os.rename(logfilepath, rotated_logfilepath)
295 self.assertTrue(is_file_growing(rotated_logfilepath))
296 self.assertFalse(os.access(logfilepath, os.F_OK))
297@@ -662,7 +769,7 @@
298 # 1000000 bytes but this is deactivated for the buildd manager.
299 test_setup = BuilddManagerTestSetup()
300 logfilepath = test_setup.logfile
301- rotated_logfilepath = logfilepath+'.1'
302+ rotated_logfilepath = logfilepath + '.1'
303 # Prefill the log file to just under 1000000 bytes.
304 test_setup.precreateLogfile(
305 "2010-07-27 12:36:54+0200 [-] Starting scanning cycle.\n", 18518)