Merge lp:~mars/launchpad/test-ghost-update-2 into lp:~launchpad/launchpad/ghost-line

Proposed by Māris Fogels on 2010-12-02
Status: Merged
Approved by: Māris Fogels on 2010-12-07
Approved revision: 11787
Merged at revision: 11784
Proposed branch: lp:~mars/launchpad/test-ghost-update-2
Merge into: lp:~launchpad/launchpad/ghost-line
Diff against target: 7763 lines (+3923/-2238)
36 files modified
lib/canonical/launchpad/icing/style-3-0.css.in (+6/-0)
lib/lp/app/browser/configure.zcml (+6/-0)
lib/lp/app/browser/linkchecker.py (+77/-0)
lib/lp/app/browser/stringformatter.py (+3/-1)
lib/lp/app/browser/tests/test_linkchecker.py (+83/-0)
lib/lp/app/configure.zcml (+0/-14)
lib/lp/app/doc/displaying-paragraphs-of-text.txt (+11/-11)
lib/lp/app/javascript/lp-links.js (+105/-0)
lib/lp/app/templates/base-layout-macros.pt (+9/-0)
lib/lp/bugs/windmill/tests/test_bug_commenting.py (+1/-1)
lib/lp/buildmaster/doc/builder.txt (+118/-2)
lib/lp/buildmaster/interfaces/builder.py (+62/-83)
lib/lp/buildmaster/manager.py (+468/-204)
lib/lp/buildmaster/model/builder.py (+224/-240)
lib/lp/buildmaster/model/buildfarmjobbehavior.py (+52/-60)
lib/lp/buildmaster/model/packagebuild.py (+0/-6)
lib/lp/buildmaster/tests/mock_slaves.py (+32/-157)
lib/lp/buildmaster/tests/test_builder.py (+154/-582)
lib/lp/buildmaster/tests/test_manager.py (+782/-248)
lib/lp/buildmaster/tests/test_packagebuild.py (+0/-12)
lib/lp/code/model/recipebuilder.py (+28/-32)
lib/lp/code/windmill/tests/test_branch_broken_links.py (+113/-0)
lib/lp/code/windmill/tests/test_branchmergeproposal_review.py (+1/-1)
lib/lp/soyuz/browser/tests/test_builder_views.py (+1/-1)
lib/lp/soyuz/doc/buildd-dispatching.txt (+371/-0)
lib/lp/soyuz/doc/buildd-slavescanner.txt (+876/-0)
lib/lp/soyuz/model/binarypackagebuildbehavior.py (+41/-59)
lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py (+8/-290)
lib/lp/soyuz/tests/test_doc.py (+6/-0)
lib/lp/testing/factory.py (+2/-8)
lib/lp/translations/doc/translationtemplatesbuildbehavior.txt (+114/-0)
lib/lp/translations/model/translationtemplatesbuildbehavior.py (+14/-20)
lib/lp/translations/stories/buildfarm/xx-build-summary.txt (+1/-1)
lib/lp/translations/tests/test_translationtemplatesbuildbehavior.py (+153/-202)
lib/lp_sitecustomize.py (+0/-3)
utilities/migrater/file-ownership.txt (+1/-0)
To merge this branch: bzr merge lp:~mars/launchpad/test-ghost-update-2
Reviewer Review Type Date Requested Status
Māris Fogels (community) Approve on 2010-12-02
Review via email: mp+42515@code.launchpad.net

Commit message

Testing the bundle-merge tarmac command

Description of the change

Testing the bundle-merge Tarmac command

To post a comment you must log in.
Māris Fogels (mars) :
review: Approve
Launchpad PQM Bot (launchpad-pqm) wrote :

No approved revision specified.

11786. By Māris Fogels on 2010-12-07

Merged r11806

11787. By Māris Fogels on 2010-12-07

Added a file for testing

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/canonical/launchpad/icing/style-3-0.css.in'
2--- lib/canonical/launchpad/icing/style-3-0.css.in 2010-09-23 11:17:45 +0000
3+++ lib/canonical/launchpad/icing/style-3-0.css.in 2010-12-07 16:29:13 +0000
4@@ -284,6 +284,12 @@
5 a.help.icon, a.sprite.maybe.help {
6 border: none;
7 }
8+a.invalid-link {
9+ disabled: True;
10+ color: #909090;
11+ text-decoration: none;
12+ cursor: default;
13+ }
14 img, a img {
15 /* No border on images that are links. */
16 border: none;
17
18=== modified file 'lib/lp/app/browser/configure.zcml'
19--- lib/lp/app/browser/configure.zcml 2010-10-15 01:27:04 +0000
20+++ lib/lp/app/browser/configure.zcml 2010-12-07 16:29:13 +0000
21@@ -98,6 +98,12 @@
22 template="../templates/launchpad-search-form.pt"
23 permission="zope.Public" />
24
25+ <browser:page
26+ for="*"
27+ name="+check-links"
28+ class="lp.app.browser.linkchecker.LinkCheckerAPI"
29+ permission="zope.Public"/>
30+
31 <!-- TALES namespaces. -->
32
33 <!-- TALES lp: namespace (should be deprecated) -->
34
35=== added file 'lib/lp/app/browser/linkchecker.py'
36--- lib/lp/app/browser/linkchecker.py 1970-01-01 00:00:00 +0000
37+++ lib/lp/app/browser/linkchecker.py 2010-12-07 16:29:13 +0000
38@@ -0,0 +1,77 @@
39+# Copyright 2009 Canonical Ltd. This software is licensed under the
40+# GNU Affero General Public License version 3 (see the file LICENSE).
41+
42+# pylint: disable-msg=E0211,E0213
43+
44+__metaclass__ = type
45+__all__ = [
46+ 'LinkCheckerAPI',
47+ ]
48+
49+import simplejson
50+from zope.component import getUtility
51+
52+from lp.app.errors import NotFoundError
53+from lp.code.errors import (
54+ CannotHaveLinkedBranch,
55+ InvalidNamespace,
56+ NoLinkedBranch,
57+ NoSuchBranch,
58+ )
59+from lp.code.interfaces.branchlookup import IBranchLookup
60+from lp.registry.interfaces.product import InvalidProductName
61+
62+
63+class LinkCheckerAPI:
64+ """Validates Launchpad shortcut links.
65+
66+ This class provides the endpoint of an Ajax call to .../+check-links.
67+ When invoked with a collection of links harvested from a page, it will
68+ check the validity of each one and send a response containing those that
69+ are invalid. Javascript on the page will set the style of invalid links to
70+ something appropriate.
71+
72+ This initial implementation supports processing links like the following:
73+ /+branch/foo/bar
74+
75+ The implementation can easily be extended to handle other forms by
76+ providing a method to handle the link type extracted from the json
77+ request.
78+ """
79+
80+ def __init__(self, context, request):
81+ # We currently only use the request.
82+ # self.context = context
83+ self.request = request
84+
85+ # Each link type has it's own validation method.
86+ self.link_checkers = dict(
87+ branch_links=self.check_branch_links,
88+ )
89+
90+ def __call__(self):
91+ result = {}
92+ links_to_check_data = self.request.get('link_hrefs')
93+ links_to_check = simplejson.loads(links_to_check_data)
94+
95+ for link_type in links_to_check:
96+ links = links_to_check[link_type]
97+ invalid_links = self.link_checkers[link_type](links)
98+ result['invalid_'+link_type] = invalid_links
99+
100+ self.request.response.setHeader('Content-type', 'application/json')
101+ return simplejson.dumps(result)
102+
103+ def check_branch_links(self, links):
104+ """Check links of the form /+branch/foo/bar"""
105+ invalid_links = []
106+ branch_lookup = getUtility(IBranchLookup)
107+ for link in links:
108+ path = link.strip('/')[len('+branch/'):]
109+ try:
110+ branch_lookup.getByLPPath(path)
111+ except (CannotHaveLinkedBranch, InvalidNamespace,
112+ InvalidProductName, NoLinkedBranch, NoSuchBranch,
113+ NotFoundError):
114+ invalid_links.append(link)
115+ return invalid_links
116
117=== modified file 'lib/lp/app/browser/stringformatter.py'
118--- lib/lp/app/browser/stringformatter.py 2010-09-25 14:29:32 +0000
119+++ lib/lp/app/browser/stringformatter.py 2010-12-07 16:29:13 +0000
120@@ -274,7 +274,9 @@
121 return FormattersAPI._linkify_bug_number(
122 lp_url, path, trailers)
123 url = '/+branch/%s' % path
124- return '<a href="%s">%s</a>%s' % (
125+ # Mark the links with a 'branch-short-link' class so they can be
126+ # harvested and validated when the page is rendered.
127+ return '<a href="%s" class="branch-short-link">%s</a>%s' % (
128 cgi.escape(url, quote=True),
129 cgi.escape(lp_url),
130 cgi.escape(trailers))
131
132=== added file 'lib/lp/app/browser/tests/test_linkchecker.py'
133--- lib/lp/app/browser/tests/test_linkchecker.py 1970-01-01 00:00:00 +0000
134+++ lib/lp/app/browser/tests/test_linkchecker.py 2010-12-07 16:29:13 +0000
135@@ -0,0 +1,83 @@
136+# Copyright 2010 Canonical Ltd. This software is licensed under the
137+# GNU Affero General Public License version 3 (see the file LICENSE).
138+
139+"""Unit tests for the LinkCheckerAPI."""
140+
141+__metaclass__ = type
142+
143+from random import shuffle
144+
145+import simplejson
146+from zope.security.proxy import removeSecurityProxy
147+
148+from canonical.launchpad.webapp.servers import LaunchpadTestRequest
149+from canonical.testing.layers import DatabaseFunctionalLayer
150+from lp.app.browser.linkchecker import LinkCheckerAPI
151+from lp.testing import TestCaseWithFactory
152+
153+
154+class TestLinkCheckerAPI(TestCaseWithFactory):
155+
156+ layer = DatabaseFunctionalLayer
157+
158+ BRANCH_URL_TEMPLATE = '/+branch/%s'
159+
160+ def check_invalid_links(self, result_json, link_type, invalid_links):
161+ link_dict = simplejson.loads(result_json)
162+ links_to_check = link_dict[link_type]
163+ self.assertEqual(len(invalid_links), len(links_to_check))
164+ self.assertEqual(set(invalid_links), set(links_to_check))
165+
166+ def make_valid_links(self):
167+ branch = self.factory.makeProductBranch()
168+ valid_branch_url = self.BRANCH_URL_TEMPLATE % branch.unique_name
169+ product = self.factory.makeProduct()
170+ product_branch = self.factory.makeProductBranch(product=product)
171+ removeSecurityProxy(product).development_focus.branch = product_branch
172+ valid_product_url = self.BRANCH_URL_TEMPLATE % product.name
173+
174+ return [
175+ valid_branch_url,
176+ valid_product_url,
177+ ]
178+
179+ def make_invalid_links(self):
180+ return [
181+ self.BRANCH_URL_TEMPLATE % 'foo',
182+ self.BRANCH_URL_TEMPLATE % 'bar',
183+ ]
184+
185+ def invoke_branch_link_checker(
186+ self, valid_branch_urls=None, invalid_branch_urls=None):
187+ if valid_branch_urls is None:
188+ valid_branch_urls = {}
189+ if invalid_branch_urls is None:
190+ invalid_branch_urls = {}
191+
192+ branch_urls = list(valid_branch_urls)
193+ branch_urls.extend(invalid_branch_urls)
194+ shuffle(branch_urls)
195+
196+ links_to_check = dict(branch_links=branch_urls)
197+ link_json = simplejson.dumps(links_to_check)
198+
199+ request = LaunchpadTestRequest(link_hrefs=link_json)
200+ link_checker = LinkCheckerAPI(object(), request)
201+ result_json = link_checker()
202+ self.check_invalid_links(
203+ result_json, 'invalid_branch_links', invalid_branch_urls)
204+
205+ def test_only_valid_branchlinks(self):
206+ branch_urls = self.make_valid_links()
207+ self.invoke_branch_link_checker(valid_branch_urls=branch_urls)
208+
209+ def test_only_invalid_branchlinks(self):
210+ branch_urls = self.make_invalid_links()
211+ self.invoke_branch_link_checker(invalid_branch_urls=branch_urls)
212+
213+ def test_valid_and_invald_branchlinks(self):
214+ valid_branch_urls = self.make_valid_links()
215+ invalid_branch_urls = self.make_invalid_links()
216+ self.invoke_branch_link_checker(
217+ valid_branch_urls=valid_branch_urls,
218+ invalid_branch_urls=invalid_branch_urls)
219
220=== added file 'lib/lp/app/configure.zcml'
221--- lib/lp/app/configure.zcml 1970-01-01 00:00:00 +0000
222+++ lib/lp/app/configure.zcml 2010-12-07 16:29:13 +0000
223@@ -0,0 +1,14 @@
224+<!-- Copyright 2009 Canonical Ltd. This software is licensed under the
225+ GNU Affero General Public License version 3 (see the file LICENSE).
226+-->
227+
228+<configure
229+ xmlns="http://namespaces.zope.org/zope"
230+ xmlns:browser="http://namespaces.zope.org/browser"
231+ xmlns:i18n="http://namespaces.zope.org/i18n"
232+ xmlns:xmlrpc="http://namespaces.zope.org/xmlrpc"
233+ xmlns:lp="http://namespaces.canonical.com/lp"
234+ i18n_domain="launchpad">
235+ <include
236+ package=".browser"/>
237+</configure>
238
239=== removed file 'lib/lp/app/configure.zcml'
240--- lib/lp/app/configure.zcml 2009-07-17 02:25:09 +0000
241+++ lib/lp/app/configure.zcml 1970-01-01 00:00:00 +0000
242@@ -1,14 +0,0 @@
243-<!-- Copyright 2009 Canonical Ltd. This software is licensed under the
244- GNU Affero General Public License version 3 (see the file LICENSE).
245--->
246-
247-<configure
248- xmlns="http://namespaces.zope.org/zope"
249- xmlns:browser="http://namespaces.zope.org/browser"
250- xmlns:i18n="http://namespaces.zope.org/i18n"
251- xmlns:xmlrpc="http://namespaces.zope.org/xmlrpc"
252- xmlns:lp="http://namespaces.canonical.com/lp"
253- i18n_domain="launchpad">
254- <include
255- package=".browser"/>
256-</configure>
257
258=== modified file 'lib/lp/app/doc/displaying-paragraphs-of-text.txt'
259--- lib/lp/app/doc/displaying-paragraphs-of-text.txt 2010-10-09 16:36:22 +0000
260+++ lib/lp/app/doc/displaying-paragraphs-of-text.txt 2010-12-07 16:29:13 +0000
261@@ -357,17 +357,17 @@
262 ... 'lp:///foo\n'
263 ... 'lp:/foo\n')
264 >>> print test_tales('foo/fmt:text-to-html', foo=text)
265- <p><a href="/+branch/~foo/bar/baz">lp:~foo/bar/baz</a><br />
266- <a href="/+branch/~foo/bar/bug-123">lp:~foo/bar/bug-123</a><br />
267- <a href="/+branch/~foo/+junk/baz">lp:~foo/+junk/baz</a><br />
268- <a href="/+branch/~foo/ubuntu/jaunty/evolution/baz">lp:~foo/ubuntu/jaunty/evolution/baz</a><br />
269- <a href="/+branch/foo/bar">lp:foo/bar</a><br />
270- <a href="/+branch/foo">lp:foo</a><br />
271- <a href="/+branch/foo">lp:foo</a>,<br />
272- <a href="/+branch/foo/bar">lp:foo/bar</a>.<br />
273- <a href="/+branch/foo/bar/baz">lp:foo/bar/baz</a><br />
274- <a href="/+branch/foo">lp:///foo</a><br />
275- <a href="/+branch/foo">lp:/foo</a></p>
276+ <p><a href="/+branch/~foo/bar/baz" class="...">lp:~foo/bar/baz</a><br />
277+ <a href="/+branch/~foo/bar/bug-123" class="...">lp:~foo/bar/bug-123</a><br />
278+ <a href="/+branch/~foo/+junk/baz" class="...">lp:~foo/+junk/baz</a><br />
279+ <a href="/+branch/~foo/ubuntu/jaunty/evolution/baz" class="...">lp:~foo/ubuntu/jaunty/evolution/baz</a><br />
280+ <a href="/+branch/foo/bar" class="...">lp:foo/bar</a><br />
281+ <a href="/+branch/foo" class="...">lp:foo</a><br />
282+ <a href="/+branch/foo" class="...">lp:foo</a>,<br />
283+ <a href="/+branch/foo/bar" class="...">lp:foo/bar</a>.<br />
284+ <a href="/+branch/foo/bar/baz" class="...">lp:foo/bar/baz</a><br />
285+ <a href="/+branch/foo" class="...">lp:///foo</a><br />
286+ <a href="/+branch/foo" class="...">lp:/foo</a></p>
287
288 Text that looks like a branch reference, but is followed only by digits is
289 treated as a link to a bug.
290
291=== added file 'lib/lp/app/javascript/lp-links.js'
292--- lib/lp/app/javascript/lp-links.js 1970-01-01 00:00:00 +0000
293+++ lib/lp/app/javascript/lp-links.js 2010-12-07 16:29:13 +0000
294@@ -0,0 +1,105 @@
295+/**
296+ * Launchpad utilities for manipulating links.
297+ *
298+ * @module app
299+ * @submodule links
300+ */
301+
302+YUI.add('lp.app.links', function(Y) {
303+
304+ function harvest_links(Y, links_holder, link_class, link_type) {
305+ // Get any links of the specified link_class and store them as the
306+ // specified link_type in the specified links_holder
307+ var link_info = new Array();
308+ Y.all('.'+link_class).each(function(link) {
309+ var href = link.getAttribute('href');
310+ if( link_info.indexOf(href)<0 ) {
311+ link_info.push(href);
312+ }
313+ });
314+ if( link_info.length > 0 ) {
315+ links_holder[link_type] = link_info;
316+ }
317+ }
318+
319+ function process_invalid_links(
320+ Y, link_info, link_class, link_type, title) {
321+ // We have a collection of invalid links possibly containing links of
322+ // type link_type, so we need to remove the existing link_class,
323+ // replace it with an invalid-link class, and set the link title.
324+ var invalid_links = Y.Array(link_info['invalid_'+link_type]);
325+
326+ if( invalid_links.length > 0) {
327+ Y.all('.'+link_class).each(function(link) {
328+ var href = link.getAttribute('href');
329+ if( invalid_links.indexOf(href)>=0 ) {
330+ var msg = title + href;
331+ link.removeClass(link_class);
332+ link.addClass('invalid-link');
333+ link.title = msg
334+ link.on('click', function(e) {
335+ e.halt();
336+ alert(msg);
337+ });
338+ }
339+ });
340+ }
341+ }
342+
343+ var links = Y.namespace('lp.app.links');
344+
345+ links.check_valid_lp_links = function() {
346+ // Grabs any lp: style links on the page and checks that they are
347+ // valid. Invalid ones have their class changed to "invalid-link".
348+ // ATM, we only handle +branch links.
349+
350+ var links_to_check = {}
351+
352+ // We get all the links with defined css classes.
353+ // At the moment, we just handle branch links, but in future...
354+ harvest_links(Y, links_to_check, 'branch-short-link', 'branch_links');
355+
356+ // Do we have anything to do?
357+ if( Y.Object.size(links_to_check) == 0 ) {
358+ return;
359+ }
360+
361+ // Get the final json to send
362+ var json_link_info = Y.JSON.stringify(links_to_check);
363+ var qs = '';
364+ qs = LP.client.append_qs(qs, 'link_hrefs', json_link_info);
365+
366+ var config = {
367+ on: {
368+ failure: function(id, response, args) {
369+ // If we have firebug installed, log the error.
370+ if( console != undefined ) {
371+ console.log("Link Check Error: " + args + ': '
372+ + response.status + ' - ' +
373+ response.statusText + ' - '
374+ + response.responseXML);
375+ }
376+ },
377+ success: function(id, response) {
378+ var link_info = Y.JSON.parse(response.responseText)
379+ // ATM, we just handle branch links, but in future...
380+ process_invalid_links(Y, link_info, 'branch-short-link',
381+ 'branch_links', "Invalid branch: ");
382+ }
383+ }
384+ }
385+ var uri = '+check-links';
386+ var on = Y.merge(config.on);
387+ var client = this;
388+ var y_config = { method: "POST",
389+ headers: {'Accept': 'application/json'},
390+ on: on,
391+ 'arguments': [client, uri],
392+ data: qs};
393+ Y.io(uri, y_config);
394+ };
395+
396+}, "0.1", {"requires": [
397+ "base", "node", "io", "dom", "json"
398+ ]});
399+
400
401=== modified file 'lib/lp/app/templates/base-layout-macros.pt'
402--- lib/lp/app/templates/base-layout-macros.pt 2010-10-25 13:16:10 +0000
403+++ lib/lp/app/templates/base-layout-macros.pt 2010-12-07 16:29:13 +0000
404@@ -175,6 +175,8 @@
405 <script type="text/javascript"
406 tal:attributes="src string:${lp_js}/app/lp-mochi.js"></script>
407 <script type="text/javascript"
408+ tal:attributes="src string:${lp_js}/app/lp-links.js"></script>
409+ <script type="text/javascript"
410 tal:attributes="src string:${lp_js}/app/dragscroll.js"></script>
411 <script type="text/javascript"
412 tal:attributes="src string:${lp_js}/app/picker.js"></script>
413@@ -304,6 +306,13 @@
414 // anywhere outside of it.
415 Y.on('click', handleClickOnPage, window);
416 });
417+
418+ LPS.use('lp.app.links',
419+ function(Y) {
420+ Y.on('load', function(e) {
421+ Y.lp.app.links.check_valid_lp_links();
422+ }, window);
423+ });
424 </script>
425 </metal:page-javascript>
426
427
428=== modified file 'lib/lp/bugs/windmill/tests/test_bug_commenting.py'
429--- lib/lp/bugs/windmill/tests/test_bug_commenting.py 2010-08-20 20:31:18 +0000
430+++ lib/lp/bugs/windmill/tests/test_bug_commenting.py 2010-12-07 16:29:13 +0000
431@@ -18,7 +18,7 @@
432 WAIT_ELEMENT_COMPLETE = u'30000'
433 WAIT_CHECK_CHANGE = u'1000'
434 ADD_COMMENT_BUTTON = (
435- u'//input[@id="field.actions.save" and @class="button js-action"]')
436+ u'//input[@id="field.actions.save" and contains(@class, "button")]')
437
438
439 class TestBugCommenting(WindmillTestCase):
440
441=== modified file 'lib/lp/buildmaster/doc/builder.txt'
442--- lib/lp/buildmaster/doc/builder.txt 2010-09-24 12:10:52 +0000
443+++ lib/lp/buildmaster/doc/builder.txt 2010-12-07 16:29:13 +0000
444@@ -19,6 +19,9 @@
445 As expected, it implements IBuilder.
446
447 >>> from canonical.launchpad.webapp.testing import verifyObject
448+ >>> from lp.buildmaster.interfaces.builder import IBuilder
449+ >>> verifyObject(IBuilder, builder)
450+ True
451
452 >>> print builder.name
453 bob
454@@ -83,7 +86,7 @@
455 The 'new' method will create a new builder in the database.
456
457 >>> bnew = builderset.new(1, 'http://dummy.com:8221/', 'dummy',
458- ... 'Dummy Title', 'eh ?', 1)
459+ ... 'Dummy Title', 'eh ?', 1)
460 >>> bnew.name
461 u'dummy'
462
463@@ -167,7 +170,7 @@
464 >>> recipe_bq.processor = i386_family.processors[0]
465 >>> recipe_bq.virtualized = True
466 >>> transaction.commit()
467-
468+
469 >>> queue_sizes = builderset.getBuildQueueSizes()
470 >>> print queue_sizes['virt']['386']
471 (1L, datetime.timedelta(0, 64))
472@@ -185,3 +188,116 @@
473
474 >>> print queue_sizes['virt']['386']
475 (2L, datetime.timedelta(0, 128))
476+
477+
478+Resuming buildd slaves
479+======================
480+
481+Virtual slaves are resumed using a command specified in the
482+configuration profile. Production configuration uses a SSH trigger
483+account accessed via a private key available in the builddmaster
484+machine (which used ftpmaster configuration profile) as in:
485+
486+{{{
487+ssh ~/.ssh/ppa-reset-key ppa@%(vm_host)s
488+}}}
489+
490+The test configuration uses a fake command that can be performed in
491+development machine and allow us to tests the important features used
492+in production, as 'vm_host' variable replacement.
493+
494+ >>> from canonical.config import config
495+ >>> config.builddmaster.vm_resume_command
496+ 'echo %(vm_host)s'
497+
498+Before performing the command, it checks if the builder is indeed
499+virtual and raises CannotResumeHost if it isn't.
500+
501+ >>> bob = getUtility(IBuilderSet)['bob']
502+ >>> bob.resumeSlaveHost()
503+ Traceback (most recent call last):
504+ ...
505+ CannotResumeHost: Builder is not virtualized.
506+
507+For testing purposes resumeSlaveHost returns the stdout and stderr
508+buffer resulted from the command.
509+
510+ >>> frog = getUtility(IBuilderSet)['frog']
511+ >>> out, err = frog.resumeSlaveHost()
512+ >>> print out.strip()
513+ localhost-host.ppa
514+
515+If the specified command fails, resumeSlaveHost also raises
516+CannotResumeHost exception with the results stdout and stderr.
517+
518+ # The command must have a vm_host dict key and when executed,
519+ # have a returncode that is not 0.
520+ >>> vm_resume_command = """
521+ ... [builddmaster]
522+ ... vm_resume_command: test "%(vm_host)s = 'false'"
523+ ... """
524+ >>> config.push('vm_resume_command', vm_resume_command)
525+ >>> frog.resumeSlaveHost()
526+ Traceback (most recent call last):
527+ ...
528+ CannotResumeHost: Resuming failed:
529+ OUT:
530+ <BLANKLINE>
531+ ERR:
532+ <BLANKLINE>
533+
534+Restore default value for resume command.
535+
536+ >>> config_data = config.pop('vm_resume_command')
537+
538+
539+Rescuing lost slaves
540+====================
541+
542+Builder.rescueIfLost() checks the build ID reported in the slave status
543+against the database. If it isn't building what we think it should be,
544+the current build will be aborted and the slave cleaned in preparation
545+for a new task. The decision about the slave's correctness is left up
546+to IBuildFarmJobBehavior.verifySlaveBuildCookie -- for these examples we
547+will use a special behavior that just checks if the cookie reads 'good'.
548+
549+ >>> import logging
550+ >>> from lp.buildmaster.interfaces.builder import CorruptBuildCookie
551+ >>> from lp.buildmaster.tests.mock_slaves import (
552+ ... BuildingSlave, MockBuilder, OkSlave, WaitingSlave)
553+
554+ >>> class TestBuildBehavior:
555+ ... def verifySlaveBuildCookie(self, cookie):
556+ ... if cookie != 'good':
557+ ... raise CorruptBuildCookie('Bad value')
558+
559+ >>> def rescue_slave_if_lost(slave):
560+ ... builder = MockBuilder('mock', slave, TestBuildBehavior())
561+ ... builder.rescueIfLost(logging.getLogger())
562+
563+An idle slave is not rescued.
564+
565+ >>> rescue_slave_if_lost(OkSlave())
566+
567+Slaves building or having built the correct build are not rescued
568+either.
569+
570+ >>> rescue_slave_if_lost(BuildingSlave(build_id='good'))
571+ >>> rescue_slave_if_lost(WaitingSlave(build_id='good'))
572+
573+But if a slave is building the wrong ID, it is declared lost and
574+an abort is attempted. MockSlave prints out a message when it is aborted
575+or cleaned.
576+
577+ >>> rescue_slave_if_lost(BuildingSlave(build_id='bad'))
578+ Aborting slave
579+ INFO:root:Builder 'mock' rescued from 'bad': 'Bad value'
580+
581+Slaves having completed an incorrect build are also declared lost,
582+but there's no need to abort a completed build. Such builders are
583+instead simply cleaned, ready for the next build.
584+
585+ >>> rescue_slave_if_lost(WaitingSlave(build_id='bad'))
586+ Cleaning slave
587+ INFO:root:Builder 'mock' rescued from 'bad': 'Bad value'
588+
589
590=== modified file 'lib/lp/buildmaster/interfaces/builder.py'
591--- lib/lp/buildmaster/interfaces/builder.py 2010-10-18 11:57:09 +0000
592+++ lib/lp/buildmaster/interfaces/builder.py 2010-12-07 16:29:13 +0000
593@@ -154,6 +154,11 @@
594
595 currentjob = Attribute("BuildQueue instance for job being processed.")
596
597+ is_available = Bool(
598+ title=_("Whether or not a builder is available for building "
599+ "new jobs. "),
600+ required=False)
601+
602 failure_count = Int(
603 title=_('Failure Count'), required=False, default=0,
604 description=_("Number of consecutive failures for this builder."))
605@@ -168,74 +173,32 @@
606 def resetFailureCount():
607 """Set the failure_count back to zero."""
608
609- def failBuilder(reason):
610- """Mark builder as failed for a given reason."""
611-
612- def setSlaveForTesting(proxy):
613- """Sets the RPC proxy through which to operate the build slave."""
614-
615- def verifySlaveBuildCookie(slave_build_id):
616- """Verify that a slave's build cookie is consistent.
617-
618- This should delegate to the current `IBuildFarmJobBehavior`.
619- """
620-
621- def transferSlaveFileToLibrarian(file_sha1, filename, private):
622- """Transfer a file from the slave to the librarian.
623-
624- :param file_sha1: The file's sha1, which is how the file is addressed
625- in the slave XMLRPC protocol. Specially, the file_sha1 'buildlog'
626- will cause the build log to be retrieved and gzipped.
627- :param filename: The name of the file to be given to the librarian file
628- alias.
629- :param private: True if the build is for a private archive.
630- :return: A librarian file alias.
631- """
632-
633- def getBuildQueue():
634- """Return a `BuildQueue` if there's an active job on this builder.
635-
636- :return: A BuildQueue, or None.
637- """
638-
639- def getCurrentBuildFarmJob():
640- """Return a `BuildFarmJob` for this builder."""
641-
642- # All methods below here return Deferred.
643-
644- def isAvailable():
645- """Whether or not a builder is available for building new jobs.
646-
647- :return: A Deferred that fires with True or False, depending on
648- whether the builder is available or not.
649+ def checkSlaveAlive():
650+ """Check that the buildd slave is alive.
651+
652+ This pings the slave over the network via the echo method and looks
653+ for the sent message as the reply.
654+
655+ :raises BuildDaemonError: When the slave is down.
656 """
657
658 def rescueIfLost(logger=None):
659 """Reset the slave if its job information doesn't match the DB.
660
661- This checks the build ID reported in the slave status against the
662- database. If it isn't building what we think it should be, the current
663- build will be aborted and the slave cleaned in preparation for a new
664- task. The decision about the slave's correctness is left up to
665- `IBuildFarmJobBehavior.verifySlaveBuildCookie`.
666-
667- :return: A Deferred that fires when the dialog with the slave is
668- finished. It does not have a return value.
669+ If the builder is BUILDING or WAITING but has a build ID string
670+ that doesn't match what is stored in the DB, we have to dismiss
671+ its current actions and clean the slave for another job, assuming
672+ the XMLRPC is working properly at this point.
673 """
674
675 def updateStatus(logger=None):
676- """Update the builder's status by probing it.
677-
678- :return: A Deferred that fires when the dialog with the slave is
679- finished. It does not have a return value.
680- """
681+ """Update the builder's status by probing it."""
682
683 def cleanSlave():
684- """Clean any temporary files from the slave.
685-
686- :return: A Deferred that fires when the dialog with the slave is
687- finished. It does not have a return value.
688- """
689+ """Clean any temporary files from the slave."""
690+
691+ def failBuilder(reason):
692+ """Mark builder as failed for a given reason."""
693
694 def requestAbort():
695 """Ask that a build be aborted.
696@@ -243,9 +206,6 @@
697 This takes place asynchronously: Actually killing everything running
698 can take some time so the slave status should be queried again to
699 detect when the abort has taken effect. (Look for status ABORTED).
700-
701- :return: A Deferred that fires when the dialog with the slave is
702- finished. It does not have a return value.
703 """
704
705 def resumeSlaveHost():
706@@ -257,35 +217,37 @@
707 :raises: CannotResumeHost: if builder is not virtual or if the
708 configuration command has failed.
709
710- :return: A Deferred that fires when the resume operation finishes,
711- whose value is a (stdout, stderr) tuple for success, or a Failure
712- whose value is a CannotResumeHost exception.
713+ :return: command stdout and stderr buffers as a tuple.
714 """
715
716+ def setSlaveForTesting(proxy):
717+ """Sets the RPC proxy through which to operate the build slave."""
718+
719 def slaveStatus():
720 """Get the slave status for this builder.
721
722- :return: A Deferred which fires when the slave dialog is complete.
723- Its value is a dict containing at least builder_status, but
724- potentially other values included by the current build
725- behavior.
726+ :return: a dict containing at least builder_status, but potentially
727+ other values included by the current build behavior.
728 """
729
730 def slaveStatusSentence():
731 """Get the slave status sentence for this builder.
732
733- :return: A Deferred which fires when the slave dialog is complete.
734- Its value is a tuple with the first element containing the
735- slave status, build_id-queue-id and then optionally more
736- elements depending on the status.
737+ :return: A tuple with the first element containing the slave status,
738+ build_id-queue-id and then optionally more elements depending on
739+ the status.
740+ """
741+
742+ def verifySlaveBuildCookie(slave_build_id):
743+ """Verify that a slave's build cookie is consistent.
744+
745+ This should delegate to the current `IBuildFarmJobBehavior`.
746 """
747
748 def updateBuild(queueItem):
749 """Verify the current build job status.
750
751 Perform the required actions for each state.
752-
753- :return: A Deferred that fires when the slave dialog is finished.
754 """
755
756 def startBuild(build_queue_item, logger):
757@@ -293,10 +255,21 @@
758
759 :param build_queue_item: A BuildQueueItem to build.
760 :param logger: A logger to be used to log diagnostic information.
761-
762- :return: A Deferred that fires after the dispatch has completed whose
763- value is None, or a Failure that contains an exception
764- explaining what went wrong.
765+ :raises BuildSlaveFailure: When the build slave fails.
766+ :raises CannotBuild: When a build cannot be started for some reason
767+ other than the build slave failing.
768+ """
769+
770+ def transferSlaveFileToLibrarian(file_sha1, filename, private):
771+ """Transfer a file from the slave to the librarian.
772+
773+ :param file_sha1: The file's sha1, which is how the file is addressed
774+ in the slave XMLRPC protocol. Specially, the file_sha1 'buildlog'
775+ will cause the build log to be retrieved and gzipped.
776+ :param filename: The name of the file to be given to the librarian file
777+ alias.
778+ :param private: True if the build is for a private archive.
779+ :return: A librarian file alias.
780 """
781
782 def handleTimeout(logger, error_message):
783@@ -311,8 +284,6 @@
784
785 :param logger: The logger object to be used for logging.
786 :param error_message: The error message to be used for logging.
787- :return: A Deferred that fires after the virtual slave was resumed
788- or immediately if it's a non-virtual slave.
789 """
790
791 def findAndStartJob(buildd_slave=None):
792@@ -320,9 +291,17 @@
793
794 :param buildd_slave: An optional buildd slave that this builder should
795 talk to.
796- :return: A Deferred whose value is the `IBuildQueue` instance
797- found or None if no job was found.
798- """
799+ :return: the `IBuildQueue` instance found or None if no job was found.
800+ """
801+
802+ def getBuildQueue():
803+ """Return a `BuildQueue` if there's an active job on this builder.
804+
805+ :return: A BuildQueue, or None.
806+ """
807+
808+ def getCurrentBuildFarmJob():
809+ """Return a `BuildFarmJob` for this builder."""
810
811
812 class IBuilderSet(Interface):
813
814=== modified file 'lib/lp/buildmaster/manager.py'
815--- lib/lp/buildmaster/manager.py 2010-10-20 12:28:46 +0000
816+++ lib/lp/buildmaster/manager.py 2010-12-07 16:29:13 +0000
817@@ -10,10 +10,13 @@
818 'BuilddManager',
819 'BUILDD_MANAGER_LOG_NAME',
820 'FailDispatchResult',
821+ 'RecordingSlave',
822 'ResetDispatchResult',
823+ 'buildd_success_result_map',
824 ]
825
826 import logging
827+import os
828
829 import transaction
830 from twisted.application import service
831@@ -21,27 +24,129 @@
832 defer,
833 reactor,
834 )
835-from twisted.internet.task import LoopingCall
836+from twisted.protocols.policies import TimeoutMixin
837 from twisted.python import log
838+from twisted.python.failure import Failure
839+from twisted.web import xmlrpc
840 from zope.component import getUtility
841
842+from canonical.config import config
843+from canonical.launchpad.webapp import urlappend
844+from lp.services.database import write_transaction
845 from lp.buildmaster.enums import BuildStatus
846-from lp.buildmaster.interfaces.buildfarmjobbehavior import (
847- BuildBehaviorMismatch,
848- )
849-from lp.buildmaster.model.builder import Builder
850-from lp.buildmaster.interfaces.builder import (
851- BuildDaemonError,
852- BuildSlaveFailure,
853- CannotBuild,
854- CannotFetchFile,
855- CannotResumeHost,
856- )
857+from lp.services.twistedsupport.processmonitor import ProcessWithTimeout
858
859
860 BUILDD_MANAGER_LOG_NAME = "slave-scanner"
861
862
863+buildd_success_result_map = {
864+ 'ensurepresent': True,
865+ 'build': 'BuilderStatus.BUILDING',
866+ }
867+
868+
869+class QueryWithTimeoutProtocol(xmlrpc.QueryProtocol, TimeoutMixin):
870+ """XMLRPC query protocol with a configurable timeout.
871+
872+ XMLRPC queries using this protocol will be unconditionally closed
873+ when the timeout is elapsed. The timeout is fetched from the context
874+ Launchpad configuration file (`config.builddmaster.socket_timeout`).
875+ """
876+ def connectionMade(self):
877+ xmlrpc.QueryProtocol.connectionMade(self)
878+ self.setTimeout(config.builddmaster.socket_timeout)
879+
880+
881+class QueryFactoryWithTimeout(xmlrpc._QueryFactory):
882+ """XMLRPC client factory with timeout support."""
883+ # Make this factory quiet.
884+ noisy = False
885+ # Use the protocol with timeout support.
886+ protocol = QueryWithTimeoutProtocol
887+
888+
889+class RecordingSlave:
890+ """An RPC proxy for buildd slaves that records instructions to the latter.
891+
892+ The idea here is to merely record the instructions that the slave-scanner
893+ issues to the buildd slaves and "replay" them a bit later in asynchronous
894+ and parallel fashion.
895+
896+ By dealing with a number of buildd slaves in parallel we remove *the*
897+ major slave-scanner throughput issue while avoiding large-scale changes to
898+ its code base.
899+ """
900+
901+ def __init__(self, name, url, vm_host):
902+ self.name = name
903+ self.url = url
904+ self.vm_host = vm_host
905+
906+ self.resume_requested = False
907+ self.calls = []
908+
909+ def __repr__(self):
910+ return '<%s:%s>' % (self.name, self.url)
911+
912+ def cacheFile(self, logger, libraryfilealias):
913+ """Cache the file on the server."""
914+ self.ensurepresent(
915+ libraryfilealias.content.sha1, libraryfilealias.http_url, '', '')
916+
917+ def sendFileToSlave(self, *args):
918+ """Helper to send a file to this builder."""
919+ return self.ensurepresent(*args)
920+
921+ def ensurepresent(self, *args):
922+ """Download files needed for the build."""
923+ self.calls.append(('ensurepresent', args))
924+ result = buildd_success_result_map.get('ensurepresent')
925+ return [result, 'Download']
926+
927+ def build(self, *args):
928+ """Perform the build."""
929+ # XXX: This method does not appear to be used.
930+ self.calls.append(('build', args))
931+ result = buildd_success_result_map.get('build')
932+ return [result, args[0]]
933+
934+ def resume(self):
935+ """Record the request to resume the builder..
936+
937+ Always succeed.
938+
939+ :return: a (stdout, stderr, subprocess exitcode) triple
940+ """
941+ self.resume_requested = True
942+ return ['', '', 0]
943+
944+ def resumeSlave(self, clock=None):
945+ """Resume the builder in a asynchronous fashion.
946+
947+ Used the configuration command-line in the same way
948+ `BuilddSlave.resume` does.
949+
950+ Also use the builddmaster configuration 'socket_timeout' as
951+ the process timeout.
952+
953+ :param clock: An optional twisted.internet.task.Clock to override
954+ the default clock. For use in tests.
955+
956+ :return: a Deferred
957+ """
958+ resume_command = config.builddmaster.vm_resume_command % {
959+ 'vm_host': self.vm_host}
960+ # Twisted API require string and the configuration provides unicode.
961+ resume_argv = [str(term) for term in resume_command.split()]
962+
963+ d = defer.Deferred()
964+ p = ProcessWithTimeout(
965+ d, config.builddmaster.socket_timeout, clock=clock)
966+ p.spawnProcess(resume_argv[0], tuple(resume_argv))
967+ return d
968+
969+
970 def get_builder(name):
971 """Helper to return the builder given the slave for this request."""
972 # Avoiding circular imports.
973@@ -54,12 +159,9 @@
974 # builder.currentjob hides a complicated query, don't run it twice.
975 # See bug 623281.
976 current_job = builder.currentjob
977- if current_job is None:
978- job_failure_count = 0
979- else:
980- job_failure_count = current_job.specific_job.build.failure_count
981+ build_job = current_job.specific_job.build
982
983- if builder.failure_count == job_failure_count and current_job is not None:
984+ if builder.failure_count == build_job.failure_count:
985 # If the failure count for the builder is the same as the
986 # failure count for the job being built, then we cannot
987 # tell whether the job or the builder is at fault. The best
988@@ -68,28 +170,17 @@
989 current_job.reset()
990 return
991
992- if builder.failure_count > job_failure_count:
993+ if builder.failure_count > build_job.failure_count:
994 # The builder has failed more than the jobs it's been
995- # running.
996-
997- # Re-schedule the build if there is one.
998- if current_job is not None:
999- current_job.reset()
1000-
1001- # We are a little more tolerant with failing builders than
1002- # failing jobs because sometimes they get unresponsive due to
1003- # human error, flaky networks etc. We expect the builder to get
1004- # better, whereas jobs are very unlikely to get better.
1005- if builder.failure_count >= Builder.FAILURE_THRESHOLD:
1006- # It's also gone over the threshold so let's disable it.
1007- builder.failBuilder(fail_notes)
1008+ # running, so let's disable it and re-schedule the build.
1009+ builder.failBuilder(fail_notes)
1010+ current_job.reset()
1011 else:
1012 # The job is the culprit! Override its status to 'failed'
1013 # to make sure it won't get automatically dispatched again,
1014 # and remove the buildqueue request. The failure should
1015 # have already caused any relevant slave data to be stored
1016 # on the build record so don't worry about that here.
1017- build_job = current_job.specific_job.build
1018 build_job.status = BuildStatus.FAILEDTOBUILD
1019 builder.currentjob.destroySelf()
1020
1021@@ -99,108 +190,133 @@
1022 # next buildd scan.
1023
1024
1025+class BaseDispatchResult:
1026+ """Base class for *DispatchResult variations.
1027+
1028+ It will be extended to represent dispatching results and allow
1029+ homogeneous processing.
1030+ """
1031+
1032+ def __init__(self, slave, info=None):
1033+ self.slave = slave
1034+ self.info = info
1035+
1036+ def _cleanJob(self, job):
1037+ """Clean up in case of builder reset or dispatch failure."""
1038+ if job is not None:
1039+ job.reset()
1040+
1041+ def assessFailureCounts(self):
1042+ """View builder/job failure_count and work out which needs to die.
1043+
1044+ :return: True if we disabled something, False if we did not.
1045+ """
1046+ builder = get_builder(self.slave.name)
1047+ assessFailureCounts(builder, self.info)
1048+
1049+ def ___call__(self):
1050+ raise NotImplementedError(
1051+ "Call sites must define an evaluation method.")
1052+
1053+
1054+class FailDispatchResult(BaseDispatchResult):
1055+ """Represents a communication failure while dispatching a build job..
1056+
1057+ When evaluated this object mark the corresponding `IBuilder` as
1058+ 'NOK' with the given text as 'failnotes'. It also cleans up the running
1059+ job (`IBuildQueue`).
1060+ """
1061+
1062+ def __repr__(self):
1063+ return '%r failure (%s)' % (self.slave, self.info)
1064+
1065+ @write_transaction
1066+ def __call__(self):
1067+ self.assessFailureCounts()
1068+
1069+
1070+class ResetDispatchResult(BaseDispatchResult):
1071+ """Represents a failure to reset a builder.
1072+
1073+ When evaluated this object simply cleans up the running job
1074+ (`IBuildQueue`) and marks the builder down.
1075+ """
1076+
1077+ def __repr__(self):
1078+ return '%r reset failure' % self.slave
1079+
1080+ @write_transaction
1081+ def __call__(self):
1082+ builder = get_builder(self.slave.name)
1083+ # Builders that fail to reset should be disabled as per bug
1084+ # 563353.
1085+ # XXX Julian bug=586362
1086+ # This is disabled until this code is not also used for dispatch
1087+ # failures where we *don't* want to disable the builder.
1088+ # builder.failBuilder(self.info)
1089+ self._cleanJob(builder.currentjob)
1090+
1091+
1092 class SlaveScanner:
1093 """A manager for a single builder."""
1094
1095- # The interval between each poll cycle, in seconds. We'd ideally
1096- # like this to be lower but 5 seems a reasonable compromise between
1097- # responsivity and load on the database server, since in each cycle
1098- # we can run quite a few queries.
1099 SCAN_INTERVAL = 5
1100
1101+ # These are for the benefit of tests; see `TestingSlaveScanner`.
1102+ # It pokes fake versions in here so that it can verify methods were
1103+ # called. The tests should really be using FakeMethod() though.
1104+ reset_result = ResetDispatchResult
1105+ fail_result = FailDispatchResult
1106+
1107 def __init__(self, builder_name, logger):
1108 self.builder_name = builder_name
1109 self.logger = logger
1110+ self._deferred_list = []
1111+
1112+ def scheduleNextScanCycle(self):
1113+ """Schedule another scan of the builder some time in the future."""
1114+ self._deferred_list = []
1115+ # XXX: Change this to use LoopingCall.
1116+ reactor.callLater(self.SCAN_INTERVAL, self.startCycle)
1117
1118 def startCycle(self):
1119 """Scan the builder and dispatch to it or deal with failures."""
1120- self.loop = LoopingCall(self.singleCycle)
1121- self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL)
1122- return self.stopping_deferred
1123-
1124- def stopCycle(self):
1125- """Terminate the LoopingCall."""
1126- self.loop.stop()
1127-
1128- def singleCycle(self):
1129 self.logger.debug("Scanning builder: %s" % self.builder_name)
1130- d = self.scan()
1131-
1132- d.addErrback(self._scanFailed)
1133- return d
1134-
1135- def _scanFailed(self, failure):
1136- """Deal with failures encountered during the scan cycle.
1137-
1138- 1. Print the error in the log
1139- 2. Increment and assess failure counts on the builder and job.
1140- """
1141- # Make sure that pending database updates are removed as it
1142- # could leave the database in an inconsistent state (e.g. The
1143- # job says it's running but the buildqueue has no builder set).
1144- transaction.abort()
1145-
1146- # If we don't recognise the exception include a stack trace with
1147- # the error.
1148- error_message = failure.getErrorMessage()
1149- if failure.check(
1150- BuildSlaveFailure, CannotBuild, BuildBehaviorMismatch,
1151- CannotResumeHost, BuildDaemonError, CannotFetchFile):
1152- self.logger.info("Scanning failed with: %s" % error_message)
1153- else:
1154+
1155+ try:
1156+ slave = self.scan()
1157+ if slave is None:
1158+ self.scheduleNextScanCycle()
1159+ else:
1160+ # XXX: Ought to return Deferred.
1161+ self.resumeAndDispatch(slave)
1162+ except:
1163+ error = Failure()
1164 self.logger.info("Scanning failed with: %s\n%s" %
1165- (failure.getErrorMessage(), failure.getTraceback()))
1166+ (error.getErrorMessage(), error.getTraceback()))
1167
1168- # Decide if we need to terminate the job or fail the
1169- # builder.
1170- try:
1171 builder = get_builder(self.builder_name)
1172- builder.gotFailure()
1173- if builder.currentjob is not None:
1174- build_farm_job = builder.getCurrentBuildFarmJob()
1175- build_farm_job.gotFailure()
1176- self.logger.info(
1177- "builder %s failure count: %s, "
1178- "job '%s' failure count: %s" % (
1179- self.builder_name,
1180- builder.failure_count,
1181- build_farm_job.title,
1182- build_farm_job.failure_count))
1183- else:
1184- self.logger.info(
1185- "Builder %s failed a probe, count: %s" % (
1186- self.builder_name, builder.failure_count))
1187- assessFailureCounts(builder, failure.getErrorMessage())
1188+
1189+ # Decide if we need to terminate the job or fail the
1190+ # builder.
1191+ self._incrementFailureCounts(builder)
1192+ self.logger.info(
1193+ "builder failure count: %s, job failure count: %s" % (
1194+ builder.failure_count,
1195+ builder.getCurrentBuildFarmJob().failure_count))
1196+ assessFailureCounts(builder, error.getErrorMessage())
1197 transaction.commit()
1198- except:
1199- # Catastrophic code failure! Not much we can do.
1200- self.logger.error(
1201- "Miserable failure when trying to examine failure counts:\n",
1202- exc_info=True)
1203- transaction.abort()
1204-
1205+
1206+ self.scheduleNextScanCycle()
1207+
1208+ @write_transaction
1209 def scan(self):
1210 """Probe the builder and update/dispatch/collect as appropriate.
1211
1212- There are several steps to scanning:
1213-
1214- 1. If the builder is marked as "ok" then probe it to see what state
1215- it's in. This is where lost jobs are rescued if we think the
1216- builder is doing something that it later tells us it's not,
1217- and also where the multi-phase abort procedure happens.
1218- See IBuilder.rescueIfLost, which is called by
1219- IBuilder.updateStatus().
1220- 2. If the builder is still happy, we ask it if it has an active build
1221- and then either update the build in Launchpad or collect the
1222- completed build. (builder.updateBuild)
1223- 3. If the builder is not happy or it was marked as unavailable
1224- mid-build, we need to reset the job that we thought it had, so
1225- that the job is dispatched elsewhere.
1226- 4. If the builder is idle and we have another build ready, dispatch
1227- it.
1228-
1229- :return: A Deferred that fires when the scan is complete, whose
1230- value is A `BuilderSlave` if we dispatched a job to it, or None.
1231+ The whole method is wrapped in a transaction, but we do partial
1232+ commits to avoid holding locks on tables.
1233+
1234+ :return: A `RecordingSlave` if we dispatched a job to it, or None.
1235 """
1236 # We need to re-fetch the builder object on each cycle as the
1237 # Storm store is invalidated over transaction boundaries.
1238@@ -208,72 +324,240 @@
1239 self.builder = get_builder(self.builder_name)
1240
1241 if self.builder.builderok:
1242- d = self.builder.updateStatus(self.logger)
1243+ self.builder.updateStatus(self.logger)
1244+ transaction.commit()
1245+
1246+ # See if we think there's an active build on the builder.
1247+ buildqueue = self.builder.getBuildQueue()
1248+
1249+ # XXX Julian 2010-07-29 bug=611258
1250+ # We're not using the RecordingSlave until dispatching, which
1251+ # means that this part blocks until we've received a response
1252+ # from the builder. updateBuild() needs to be made
1253+ # asyncronous.
1254+
1255+ # Scan the slave and get the logtail, or collect the build if
1256+ # it's ready. Yes, "updateBuild" is a bad name.
1257+ if buildqueue is not None:
1258+ self.builder.updateBuild(buildqueue)
1259+ transaction.commit()
1260+
1261+ # If the builder is in manual mode, don't dispatch anything.
1262+ if self.builder.manual:
1263+ self.logger.debug(
1264+ '%s is in manual mode, not dispatching.' % self.builder.name)
1265+ return None
1266+
1267+ # If the builder is marked unavailable, don't dispatch anything.
1268+ # Additionaly, because builders can be removed from the pool at
1269+ # any time, we need to see if we think there was a build running
1270+ # on it before it was marked unavailable. In this case we reset
1271+ # the build thusly forcing it to get re-dispatched to another
1272+ # builder.
1273+ if not self.builder.is_available:
1274+ job = self.builder.currentjob
1275+ if job is not None and not self.builder.builderok:
1276+ self.logger.info(
1277+ "%s was made unavailable, resetting attached "
1278+ "job" % self.builder.name)
1279+ job.reset()
1280+ transaction.commit()
1281+ return None
1282+
1283+ # See if there is a job we can dispatch to the builder slave.
1284+
1285+ # XXX: Rather than use the slave actually associated with the builder
1286+ # (which, incidentally, shouldn't be a property anyway), we make a new
1287+ # RecordingSlave so we can get access to its asynchronous
1288+ # "resumeSlave" method. Blech.
1289+ slave = RecordingSlave(
1290+ self.builder.name, self.builder.url, self.builder.vm_host)
1291+ # XXX: Passing buildd_slave=slave overwrites the 'slave' property of
1292+ # self.builder. Not sure why this is needed yet.
1293+ self.builder.findAndStartJob(buildd_slave=slave)
1294+ if self.builder.currentjob is not None:
1295+ # After a successful dispatch we can reset the
1296+ # failure_count.
1297+ self.builder.resetFailureCount()
1298+ transaction.commit()
1299+ return slave
1300+
1301+ return None
1302+
1303+ def resumeAndDispatch(self, slave):
1304+ """Chain the resume and dispatching Deferreds."""
1305+ # XXX: resumeAndDispatch makes Deferreds without returning them.
1306+ if slave.resume_requested:
1307+ # The slave needs to be reset before we can dispatch to
1308+ # it (e.g. a virtual slave)
1309+
1310+ # XXX: Two problems here. The first is that 'resumeSlave' only
1311+ # exists on RecordingSlave (BuilderSlave calls it 'resume').
1312+ d = slave.resumeSlave()
1313+ d.addBoth(self.checkResume, slave)
1314 else:
1315+ # No resume required, build dispatching can commence.
1316 d = defer.succeed(None)
1317
1318- def status_updated(ignored):
1319- # Commit the changes done while possibly rescuing jobs, to
1320- # avoid holding table locks.
1321- transaction.commit()
1322-
1323- # See if we think there's an active build on the builder.
1324- buildqueue = self.builder.getBuildQueue()
1325-
1326- # Scan the slave and get the logtail, or collect the build if
1327- # it's ready. Yes, "updateBuild" is a bad name.
1328- if buildqueue is not None:
1329- return self.builder.updateBuild(buildqueue)
1330-
1331- def build_updated(ignored):
1332- # Commit changes done while updating the build, to avoid
1333- # holding table locks.
1334- transaction.commit()
1335-
1336- # If the builder is in manual mode, don't dispatch anything.
1337- if self.builder.manual:
1338- self.logger.debug(
1339- '%s is in manual mode, not dispatching.' %
1340- self.builder.name)
1341- return
1342-
1343- # If the builder is marked unavailable, don't dispatch anything.
1344- # Additionaly, because builders can be removed from the pool at
1345- # any time, we need to see if we think there was a build running
1346- # on it before it was marked unavailable. In this case we reset
1347- # the build thusly forcing it to get re-dispatched to another
1348- # builder.
1349-
1350- return self.builder.isAvailable().addCallback(got_available)
1351-
1352- def got_available(available):
1353- if not available:
1354- job = self.builder.currentjob
1355- if job is not None and not self.builder.builderok:
1356- self.logger.info(
1357- "%s was made unavailable, resetting attached "
1358- "job" % self.builder.name)
1359- job.reset()
1360- transaction.commit()
1361- return
1362-
1363- # See if there is a job we can dispatch to the builder slave.
1364-
1365- d = self.builder.findAndStartJob()
1366- def job_started(candidate):
1367- if self.builder.currentjob is not None:
1368- # After a successful dispatch we can reset the
1369- # failure_count.
1370- self.builder.resetFailureCount()
1371- transaction.commit()
1372- return self.builder.slave
1373- else:
1374+ # Dispatch the build to the slave asynchronously.
1375+ d.addCallback(self.initiateDispatch, slave)
1376+ # Store this deferred so we can wait for it along with all
1377+ # the others that will be generated by RecordingSlave during
1378+ # the dispatch process, and chain a callback after they've
1379+ # all fired.
1380+ self._deferred_list.append(d)
1381+
1382+ def initiateDispatch(self, resume_result, slave):
1383+ """Start dispatching a build to a slave.
1384+
1385+ If the previous task in chain (slave resuming) has failed it will
1386+ receive a `ResetBuilderRequest` instance as 'resume_result' and
1387+ will immediately return that so the subsequent callback can collect
1388+ it.
1389+
1390+ If the slave resuming succeeded, it starts the XMLRPC dialogue. The
1391+ dialogue may consist of many calls to the slave before the build
1392+ starts. Each call is done via a Deferred event, where slave calls
1393+ are sent in callSlave(), and checked in checkDispatch() which will
1394+ keep firing events via callSlave() until all the events are done or
1395+ an error occurs.
1396+ """
1397+ if resume_result is not None:
1398+ self.slaveConversationEnded()
1399+ return resume_result
1400+
1401+ self.logger.info('Dispatching: %s' % slave)
1402+ self.callSlave(slave)
1403+
1404+ def _getProxyForSlave(self, slave):
1405+ """Return a twisted.web.xmlrpc.Proxy for the buildd slave.
1406+
1407+ Uses a protocol with timeout support, See QueryFactoryWithTimeout.
1408+ """
1409+ proxy = xmlrpc.Proxy(str(urlappend(slave.url, 'rpc')))
1410+ proxy.queryFactory = QueryFactoryWithTimeout
1411+ return proxy
1412+
1413+ def callSlave(self, slave):
1414+ """Dispatch the next XMLRPC for the given slave."""
1415+ if len(slave.calls) == 0:
1416+ # That's the end of the dialogue with the slave.
1417+ self.slaveConversationEnded()
1418+ return
1419+
1420+ # Get an XMLRPC proxy for the buildd slave.
1421+ proxy = self._getProxyForSlave(slave)
1422+ method, args = slave.calls.pop(0)
1423+ d = proxy.callRemote(method, *args)
1424+ d.addBoth(self.checkDispatch, method, slave)
1425+ self._deferred_list.append(d)
1426+ self.logger.debug('%s -> %s(%s)' % (slave, method, args))
1427+
1428+ def slaveConversationEnded(self):
1429+ """After all the Deferreds are set up, chain a callback on them."""
1430+ dl = defer.DeferredList(self._deferred_list, consumeErrors=True)
1431+ dl.addBoth(self.evaluateDispatchResult)
1432+ return dl
1433+
1434+ def evaluateDispatchResult(self, deferred_list_results):
1435+ """Process the DispatchResult for this dispatch chain.
1436+
1437+ After waiting for the Deferred chain to finish, we'll have a
1438+ DispatchResult to evaluate, which deals with the result of
1439+ dispatching.
1440+ """
1441+ # The `deferred_list_results` is what we get when waiting on a
1442+ # DeferredList. It's a list of tuples of (status, result) where
1443+ # result is what the last callback in that chain returned.
1444+
1445+ # If the result is an instance of BaseDispatchResult we need to
1446+ # evaluate it, as there's further action required at the end of
1447+ # the dispatch chain. None, resulting from successful chains,
1448+ # are discarded.
1449+
1450+ dispatch_results = [
1451+ result for status, result in deferred_list_results
1452+ if isinstance(result, BaseDispatchResult)]
1453+
1454+ for result in dispatch_results:
1455+ self.logger.info("%r" % result)
1456+ result()
1457+
1458+ # At this point, we're done dispatching, so we can schedule the
1459+ # next scan cycle.
1460+ self.scheduleNextScanCycle()
1461+
1462+ # For the test suite so that it can chain callback results.
1463+ return deferred_list_results
1464+
1465+ def checkResume(self, response, slave):
1466+ """Check the result of resuming a slave.
1467+
1468+ If there's a problem resuming, we return a ResetDispatchResult which
1469+ will get evaluated at the end of the scan, or None if the resume
1470+ was OK.
1471+
1472+ :param response: the tuple that's constructed in
1473+ ProcessWithTimeout.processEnded(), or a Failure that
1474+ contains the tuple.
1475+ :param slave: the slave object we're talking to
1476+ """
1477+ if isinstance(response, Failure):
1478+ out, err, code = response.value
1479+ else:
1480+ out, err, code = response
1481+ if code == os.EX_OK:
1482+ return None
1483+
1484+ error_text = '%s\n%s' % (out, err)
1485+ self.logger.error('%s resume failure: %s' % (slave, error_text))
1486+ return self.reset_result(slave, error_text)
1487+
1488+ def _incrementFailureCounts(self, builder):
1489+ builder.gotFailure()
1490+ builder.getCurrentBuildFarmJob().gotFailure()
1491+
1492+ def checkDispatch(self, response, method, slave):
1493+ """Verify the results of a slave xmlrpc call.
1494+
1495+ If it failed and it compromises the slave then return a corresponding
1496+ `FailDispatchResult`, if it was a communication failure, simply
1497+ reset the slave by returning a `ResetDispatchResult`.
1498+ """
1499+ from lp.buildmaster.interfaces.builder import IBuilderSet
1500+ builder = getUtility(IBuilderSet)[slave.name]
1501+
1502+ # XXX these DispatchResult classes are badly named and do the
1503+ # same thing. We need to fix that.
1504+ self.logger.debug(
1505+ '%s response for "%s": %s' % (slave, method, response))
1506+
1507+ if isinstance(response, Failure):
1508+ self.logger.warn(
1509+ '%s communication failed (%s)' %
1510+ (slave, response.getErrorMessage()))
1511+ self.slaveConversationEnded()
1512+ self._incrementFailureCounts(builder)
1513+ return self.fail_result(slave)
1514+
1515+ if isinstance(response, list) and len(response) == 2:
1516+ if method in buildd_success_result_map:
1517+ expected_status = buildd_success_result_map.get(method)
1518+ status, info = response
1519+ if status == expected_status:
1520+ self.callSlave(slave)
1521 return None
1522- return d.addCallback(job_started)
1523-
1524- d.addCallback(status_updated)
1525- d.addCallback(build_updated)
1526- return d
1527+ else:
1528+ info = 'Unknown slave method: %s' % method
1529+ else:
1530+ info = 'Unexpected response: %s' % repr(response)
1531+
1532+ self.logger.error(
1533+ '%s failed to dispatch (%s)' % (slave, info))
1534+
1535+ self.slaveConversationEnded()
1536+ self._incrementFailureCounts(builder)
1537+ return self.fail_result(slave, info)
1538
1539
1540 class NewBuildersScanner:
1541@@ -294,21 +578,15 @@
1542 self.current_builders = [
1543 builder.name for builder in getUtility(IBuilderSet)]
1544
1545- def stop(self):
1546- """Terminate the LoopingCall."""
1547- self.loop.stop()
1548-
1549 def scheduleScan(self):
1550 """Schedule a callback SCAN_INTERVAL seconds later."""
1551- self.loop = LoopingCall(self.scan)
1552- self.loop.clock = self._clock
1553- self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL)
1554- return self.stopping_deferred
1555+ return self._clock.callLater(self.SCAN_INTERVAL, self.scan)
1556
1557 def scan(self):
1558 """If a new builder appears, create a SlaveScanner for it."""
1559 new_builders = self.checkForNewBuilders()
1560 self.manager.addScanForBuilders(new_builders)
1561+ self.scheduleScan()
1562
1563 def checkForNewBuilders(self):
1564 """See if any new builders were added."""
1565@@ -331,7 +609,10 @@
1566 manager=self, clock=clock)
1567
1568 def _setupLogger(self):
1569- """Set up a 'slave-scanner' logger that redirects to twisted.
1570+ """Setup a 'slave-scanner' logger that redirects to twisted.
1571+
1572+ It is going to be used locally and within the thread running
1573+ the scan() method.
1574
1575 Make it less verbose to avoid messing too much with the old code.
1576 """
1577@@ -362,29 +643,12 @@
1578 # Events will now fire in the SlaveScanner objects to scan each
1579 # builder.
1580
1581- def stopService(self):
1582- """Callback for when we need to shut down."""
1583- # XXX: lacks unit tests
1584- # All the SlaveScanner objects need to be halted gracefully.
1585- deferreds = [slave.stopping_deferred for slave in self.builder_slaves]
1586- deferreds.append(self.new_builders_scanner.stopping_deferred)
1587-
1588- self.new_builders_scanner.stop()
1589- for slave in self.builder_slaves:
1590- slave.stopCycle()
1591-
1592- # The 'stopping_deferred's are called back when the loops are
1593- # stopped, so we can wait on them all at once here before
1594- # exiting.
1595- d = defer.DeferredList(deferreds, consumeErrors=True)
1596- return d
1597-
1598 def addScanForBuilders(self, builders):
1599 """Set up scanner objects for the builders specified."""
1600 for builder in builders:
1601 slave_scanner = SlaveScanner(builder, self.logger)
1602 self.builder_slaves.append(slave_scanner)
1603- slave_scanner.startCycle()
1604+ slave_scanner.scheduleNextScanCycle()
1605
1606 # Return the slave list for the benefit of tests.
1607 return self.builder_slaves
1608
1609=== modified file 'lib/lp/buildmaster/model/builder.py'
1610--- lib/lp/buildmaster/model/builder.py 2010-10-20 11:54:27 +0000
1611+++ lib/lp/buildmaster/model/builder.py 2010-12-07 16:29:13 +0000
1612@@ -13,11 +13,12 @@
1613 ]
1614
1615 import gzip
1616+import httplib
1617 import logging
1618 import os
1619 import socket
1620+import subprocess
1621 import tempfile
1622-import transaction
1623 import urllib2
1624 import xmlrpclib
1625
1626@@ -33,13 +34,6 @@
1627 Count,
1628 Sum,
1629 )
1630-
1631-from twisted.internet import (
1632- defer,
1633- reactor as default_reactor,
1634- )
1635-from twisted.web import xmlrpc
1636-
1637 from zope.component import getUtility
1638 from zope.interface import implements
1639
1640@@ -64,6 +58,7 @@
1641 from lp.buildmaster.interfaces.builder import (
1642 BuildDaemonError,
1643 BuildSlaveFailure,
1644+ CannotBuild,
1645 CannotFetchFile,
1646 CannotResumeHost,
1647 CorruptBuildCookie,
1648@@ -71,6 +66,9 @@
1649 IBuilderSet,
1650 )
1651 from lp.buildmaster.interfaces.buildfarmjob import IBuildFarmJobSet
1652+from lp.buildmaster.interfaces.buildfarmjobbehavior import (
1653+ BuildBehaviorMismatch,
1654+ )
1655 from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet
1656 from lp.buildmaster.model.buildfarmjobbehavior import IdleBuildBehavior
1657 from lp.buildmaster.model.buildqueue import (
1658@@ -80,9 +78,9 @@
1659 from lp.registry.interfaces.person import validate_public_person
1660 from lp.services.job.interfaces.job import JobStatus
1661 from lp.services.job.model.job import Job
1662+from lp.services.osutils import until_no_eintr
1663 from lp.services.propertycache import cachedproperty
1664-from lp.services.twistedsupport.processmonitor import ProcessWithTimeout
1665-from lp.services.twistedsupport import cancel_on_timeout
1666+from lp.services.twistedsupport.xmlrpc import BlockingProxy
1667 # XXX Michael Nelson 2010-01-13 bug=491330
1668 # These dependencies on soyuz will be removed when getBuildRecords()
1669 # is moved.
1670@@ -94,9 +92,25 @@
1671 from lp.soyuz.model.processor import Processor
1672
1673
1674-class QuietQueryFactory(xmlrpc._QueryFactory):
1675- """XMLRPC client factory that doesn't splatter the log with junk."""
1676- noisy = False
1677+class TimeoutHTTPConnection(httplib.HTTPConnection):
1678+
1679+ def connect(self):
1680+ """Override the standard connect() methods to set a timeout"""
1681+ ret = httplib.HTTPConnection.connect(self)
1682+ self.sock.settimeout(config.builddmaster.socket_timeout)
1683+ return ret
1684+
1685+
1686+class TimeoutHTTP(httplib.HTTP):
1687+ _connection_class = TimeoutHTTPConnection
1688+
1689+
1690+class TimeoutTransport(xmlrpclib.Transport):
1691+ """XMLRPC Transport to setup a socket with defined timeout"""
1692+
1693+ def make_connection(self, host):
1694+ host, extra_headers, x509 = self.get_host_info(host)
1695+ return TimeoutHTTP(host)
1696
1697
1698 class BuilderSlave(object):
1699@@ -111,7 +125,24 @@
1700 # many false positives in your test run and will most likely break
1701 # production.
1702
1703- def __init__(self, proxy, builder_url, vm_host, reactor=None):
1704+ # XXX: This (BuilderSlave) should use composition, rather than
1705+ # inheritance.
1706+
1707+ # XXX: Have a documented interface for the XML-RPC server:
1708+ # - what methods
1709+ # - what return values expected
1710+ # - what faults
1711+ # (see XMLRPCBuildDSlave in lib/canonical/buildd/slave.py).
1712+
1713+ # XXX: Arguably, this interface should be asynchronous
1714+ # (i.e. Deferred-returning). This would mean that Builder (see below)
1715+ # would have to expect Deferreds.
1716+
1717+ # XXX: Once we have a client object with a defined, tested interface, we
1718+ # should make a test double that doesn't do any XML-RPC and can be used to
1719+ # make testing easier & tests faster.
1720+
1721+ def __init__(self, proxy, builder_url, vm_host):
1722 """Initialize a BuilderSlave.
1723
1724 :param proxy: An XML-RPC proxy, implementing 'callRemote'. It must
1725@@ -124,87 +155,63 @@
1726 self._file_cache_url = urlappend(builder_url, 'filecache')
1727 self._server = proxy
1728
1729- if reactor is None:
1730- self.reactor = default_reactor
1731- else:
1732- self.reactor = reactor
1733-
1734 @classmethod
1735- def makeBuilderSlave(cls, builder_url, vm_host, reactor=None, proxy=None):
1736- """Create and return a `BuilderSlave`.
1737-
1738- :param builder_url: The URL of the slave buildd machine,
1739- e.g. http://localhost:8221
1740- :param vm_host: If the slave is virtual, specify its host machine here.
1741- :param reactor: Used by tests to override the Twisted reactor.
1742- :param proxy: Used By tests to override the xmlrpc.Proxy.
1743- """
1744- rpc_url = urlappend(builder_url.encode('utf-8'), 'rpc')
1745- if proxy is None:
1746- server_proxy = xmlrpc.Proxy(rpc_url, allowNone=True)
1747- server_proxy.queryFactory = QuietQueryFactory
1748- else:
1749- server_proxy = proxy
1750- return cls(server_proxy, builder_url, vm_host, reactor)
1751-
1752- def _with_timeout(self, d):
1753- TIMEOUT = config.builddmaster.socket_timeout
1754- return cancel_on_timeout(d, TIMEOUT, self.reactor)
1755+ def makeBlockingSlave(cls, builder_url, vm_host):
1756+ rpc_url = urlappend(builder_url, 'rpc')
1757+ server_proxy = xmlrpclib.ServerProxy(
1758+ rpc_url, transport=TimeoutTransport(), allow_none=True)
1759+ return cls(BlockingProxy(server_proxy), builder_url, vm_host)
1760
1761 def abort(self):
1762 """Abort the current build."""
1763- return self._with_timeout(self._server.callRemote('abort'))
1764+ return self._server.callRemote('abort')
1765
1766 def clean(self):
1767 """Clean up the waiting files and reset the slave's internal state."""
1768- return self._with_timeout(self._server.callRemote('clean'))
1769+ return self._server.callRemote('clean')
1770
1771 def echo(self, *args):
1772 """Echo the arguments back."""
1773- return self._with_timeout(self._server.callRemote('echo', *args))
1774+ return self._server.callRemote('echo', *args)
1775
1776 def info(self):
1777 """Return the protocol version and the builder methods supported."""
1778- return self._with_timeout(self._server.callRemote('info'))
1779+ return self._server.callRemote('info')
1780
1781 def status(self):
1782 """Return the status of the build daemon."""
1783- return self._with_timeout(self._server.callRemote('status'))
1784+ return self._server.callRemote('status')
1785
1786 def ensurepresent(self, sha1sum, url, username, password):
1787- # XXX: Nothing external calls this. Make it private.
1788 """Attempt to ensure the given file is present."""
1789- return self._with_timeout(self._server.callRemote(
1790- 'ensurepresent', sha1sum, url, username, password))
1791+ return self._server.callRemote(
1792+ 'ensurepresent', sha1sum, url, username, password)
1793
1794 def getFile(self, sha_sum):
1795 """Construct a file-like object to return the named file."""
1796- # XXX 2010-10-18 bug=662631
1797- # Change this to do non-blocking IO.
1798 file_url = urlappend(self._file_cache_url, sha_sum)
1799 return urllib2.urlopen(file_url)
1800
1801- def resume(self, clock=None):
1802- """Resume the builder in an asynchronous fashion.
1803-
1804- We use the builddmaster configuration 'socket_timeout' as
1805- the process timeout.
1806-
1807- :param clock: An optional twisted.internet.task.Clock to override
1808- the default clock. For use in tests.
1809-
1810- :return: a Deferred that returns a
1811- (stdout, stderr, subprocess exitcode) triple
1812+ def resume(self):
1813+ """Resume a virtual builder.
1814+
1815+ It uses the configuration command-line (replacing 'vm_host') and
1816+ return its output.
1817+
1818+ :return: a (stdout, stderr, subprocess exitcode) triple
1819 """
1820+ # XXX: This executes the vm_resume_command
1821+ # synchronously. RecordingSlave does so asynchronously. Since we
1822+ # always want to do this asynchronously, there's no need for the
1823+ # duplication.
1824 resume_command = config.builddmaster.vm_resume_command % {
1825 'vm_host': self._vm_host}
1826- # Twisted API requires string but the configuration provides unicode.
1827- resume_argv = [term.encode('utf-8') for term in resume_command.split()]
1828- d = defer.Deferred()
1829- p = ProcessWithTimeout(
1830- d, config.builddmaster.socket_timeout, clock=clock)
1831- p.spawnProcess(resume_argv[0], tuple(resume_argv))
1832- return d
1833+ resume_argv = resume_command.split()
1834+ resume_process = subprocess.Popen(
1835+ resume_argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1836+ stdout, stderr = resume_process.communicate()
1837+
1838+ return (stdout, stderr, resume_process.returncode)
1839
1840 def cacheFile(self, logger, libraryfilealias):
1841 """Make sure that the file at 'libraryfilealias' is on the slave.
1842@@ -217,15 +224,13 @@
1843 "Asking builder on %s to ensure it has file %s (%s, %s)" % (
1844 self._file_cache_url, libraryfilealias.filename, url,
1845 libraryfilealias.content.sha1))
1846- return self.sendFileToSlave(libraryfilealias.content.sha1, url)
1847+ self.sendFileToSlave(libraryfilealias.content.sha1, url)
1848
1849 def sendFileToSlave(self, sha1, url, username="", password=""):
1850 """Helper to send the file at 'url' with 'sha1' to this builder."""
1851- d = self.ensurepresent(sha1, url, username, password)
1852- def check_present((present, info)):
1853- if not present:
1854- raise CannotFetchFile(url, info)
1855- return d.addCallback(check_present)
1856+ present, info = self.ensurepresent(sha1, url, username, password)
1857+ if not present:
1858+ raise CannotFetchFile(url, info)
1859
1860 def build(self, buildid, builder_type, chroot_sha1, filemap, args):
1861 """Build a thing on this build slave.
1862@@ -238,18 +243,19 @@
1863 :param args: A dictionary of extra arguments. The contents depend on
1864 the build job type.
1865 """
1866- d = self._with_timeout(self._server.callRemote(
1867- 'build', buildid, builder_type, chroot_sha1, filemap, args))
1868- def got_fault(failure):
1869- failure.trap(xmlrpclib.Fault)
1870- raise BuildSlaveFailure(failure.value)
1871- return d.addErrback(got_fault)
1872+ try:
1873+ return self._server.callRemote(
1874+ 'build', buildid, builder_type, chroot_sha1, filemap, args)
1875+ except xmlrpclib.Fault, info:
1876+ raise BuildSlaveFailure(info)
1877
1878
1879 # This is a separate function since MockBuilder needs to use it too.
1880 # Do not use it -- (Mock)Builder.rescueIfLost should be used instead.
1881 def rescueBuilderIfLost(builder, logger=None):
1882 """See `IBuilder`."""
1883+ status_sentence = builder.slaveStatusSentence()
1884+
1885 # 'ident_position' dict relates the position of the job identifier
1886 # token in the sentence received from status(), according the
1887 # two status we care about. See see lib/canonical/buildd/slave.py
1888@@ -259,58 +265,61 @@
1889 'BuilderStatus.WAITING': 2
1890 }
1891
1892- d = builder.slaveStatusSentence()
1893-
1894- def got_status(status_sentence):
1895- """After we get the status, clean if we have to.
1896-
1897- Always return status_sentence.
1898- """
1899- # Isolate the BuilderStatus string, always the first token in
1900- # see lib/canonical/buildd/slave.py and
1901- # IBuilder.slaveStatusSentence().
1902- status = status_sentence[0]
1903-
1904- # If the cookie test below fails, it will request an abort of the
1905- # builder. This will leave the builder in the aborted state and
1906- # with no assigned job, and we should now "clean" the slave which
1907- # will reset its state back to IDLE, ready to accept new builds.
1908- # This situation is usually caused by a temporary loss of
1909- # communications with the slave and the build manager had to reset
1910- # the job.
1911- if status == 'BuilderStatus.ABORTED' and builder.currentjob is None:
1912- if logger is not None:
1913- logger.info(
1914- "Builder '%s' being cleaned up from ABORTED" %
1915- (builder.name,))
1916- d = builder.cleanSlave()
1917- return d.addCallback(lambda ignored: status_sentence)
1918+ # Isolate the BuilderStatus string, always the first token in
1919+ # see lib/canonical/buildd/slave.py and
1920+ # IBuilder.slaveStatusSentence().
1921+ status = status_sentence[0]
1922+
1923+ # If the cookie test below fails, it will request an abort of the
1924+ # builder. This will leave the builder in the aborted state and
1925+ # with no assigned job, and we should now "clean" the slave which
1926+ # will reset its state back to IDLE, ready to accept new builds.
1927+ # This situation is usually caused by a temporary loss of
1928+ # communications with the slave and the build manager had to reset
1929+ # the job.
1930+ if status == 'BuilderStatus.ABORTED' and builder.currentjob is None:
1931+ builder.cleanSlave()
1932+ if logger is not None:
1933+ logger.info(
1934+ "Builder '%s' cleaned up from ABORTED" % builder.name)
1935+ return
1936+
1937+ # If slave is not building nor waiting, it's not in need of rescuing.
1938+ if status not in ident_position.keys():
1939+ return
1940+
1941+ slave_build_id = status_sentence[ident_position[status]]
1942+
1943+ try:
1944+ builder.verifySlaveBuildCookie(slave_build_id)
1945+ except CorruptBuildCookie, reason:
1946+ if status == 'BuilderStatus.WAITING':
1947+ builder.cleanSlave()
1948 else:
1949- return status_sentence
1950-
1951- def rescue_slave(status_sentence):
1952- # If slave is not building nor waiting, it's not in need of rescuing.
1953- status = status_sentence[0]
1954- if status not in ident_position.keys():
1955- return
1956- slave_build_id = status_sentence[ident_position[status]]
1957- try:
1958- builder.verifySlaveBuildCookie(slave_build_id)
1959- except CorruptBuildCookie, reason:
1960- if status == 'BuilderStatus.WAITING':
1961- d = builder.cleanSlave()
1962- else:
1963- d = builder.requestAbort()
1964- def log_rescue(ignored):
1965- if logger:
1966- logger.info(
1967- "Builder '%s' rescued from '%s': '%s'" %
1968- (builder.name, slave_build_id, reason))
1969- return d.addCallback(log_rescue)
1970-
1971- d.addCallback(got_status)
1972- d.addCallback(rescue_slave)
1973- return d
1974+ builder.requestAbort()
1975+ if logger:
1976+ logger.info(
1977+ "Builder '%s' rescued from '%s': '%s'" %
1978+ (builder.name, slave_build_id, reason))
1979+
1980+
1981+def _update_builder_status(builder, logger=None):
1982+ """Really update the builder status."""
1983+ try:
1984+ builder.checkSlaveAlive()
1985+ builder.rescueIfLost(logger)
1986+ # Catch only known exceptions.
1987+ # XXX cprov 2007-06-15 bug=120571: ValueError & TypeError catching is
1988+ # disturbing in this context. We should spend sometime sanitizing the
1989+ # exceptions raised in the Builder API since we already started the
1990+ # main refactoring of this area.
1991+ except (ValueError, TypeError, xmlrpclib.Fault,
1992+ BuildDaemonError), reason:
1993+ builder.failBuilder(str(reason))
1994+ if logger:
1995+ logger.warn(
1996+ "%s (%s) marked as failed due to: %s",
1997+ builder.name, builder.url, builder.failnotes, exc_info=True)
1998
1999
2000 def updateBuilderStatus(builder, logger=None):
2001@@ -318,7 +327,16 @@
2002 if logger:
2003 logger.debug('Checking %s' % builder.name)
2004
2005- return builder.rescueIfLost(logger)
2006+ MAX_EINTR_RETRIES = 42 # pulling a number out of my a$$ here
2007+ try:
2008+ return until_no_eintr(
2009+ MAX_EINTR_RETRIES, _update_builder_status, builder, logger=logger)
2010+ except socket.error, reason:
2011+ # In Python 2.6 we can use IOError instead. It also has
2012+ # reason.errno but we might be using 2.5 here so use the
2013+ # index hack.
2014+ error_message = str(reason)
2015+ builder.handleTimeout(logger, error_message)
2016
2017
2018 class Builder(SQLBase):
2019@@ -346,10 +364,6 @@
2020 active = BoolCol(dbName='active', notNull=True, default=True)
2021 failure_count = IntCol(dbName='failure_count', default=0, notNull=True)
2022
2023- # The number of times a builder can consecutively fail before we
2024- # give up and mark it builderok=False.
2025- FAILURE_THRESHOLD = 5
2026-
2027 def _getCurrentBuildBehavior(self):
2028 """Return the current build behavior."""
2029 if not safe_hasattr(self, '_current_build_behavior'):
2030@@ -395,13 +409,18 @@
2031 """See `IBuilder`."""
2032 self.failure_count = 0
2033
2034+ def checkSlaveAlive(self):
2035+ """See IBuilder."""
2036+ if self.slave.echo("Test")[0] != "Test":
2037+ raise BuildDaemonError("Failed to echo OK")
2038+
2039 def rescueIfLost(self, logger=None):
2040 """See `IBuilder`."""
2041- return rescueBuilderIfLost(self, logger)
2042+ rescueBuilderIfLost(self, logger)
2043
2044 def updateStatus(self, logger=None):
2045 """See `IBuilder`."""
2046- return updateBuilderStatus(self, logger)
2047+ updateBuilderStatus(self, logger)
2048
2049 def cleanSlave(self):
2050 """See IBuilder."""
2051@@ -421,23 +440,20 @@
2052 def resumeSlaveHost(self):
2053 """See IBuilder."""
2054 if not self.virtualized:
2055- return defer.fail(CannotResumeHost('Builder is not virtualized.'))
2056+ raise CannotResumeHost('Builder is not virtualized.')
2057
2058 if not self.vm_host:
2059- return defer.fail(CannotResumeHost('Undefined vm_host.'))
2060+ raise CannotResumeHost('Undefined vm_host.')
2061
2062 logger = self._getSlaveScannerLogger()
2063 logger.debug("Resuming %s (%s)" % (self.name, self.url))
2064
2065- d = self.slave.resume()
2066- def got_resume_ok((stdout, stderr, returncode)):
2067- return stdout, stderr
2068- def got_resume_bad(failure):
2069- stdout, stderr, code = failure.value
2070+ stdout, stderr, returncode = self.slave.resume()
2071+ if returncode != 0:
2072 raise CannotResumeHost(
2073 "Resuming failed:\nOUT:\n%s\nERR:\n%s\n" % (stdout, stderr))
2074
2075- return d.addCallback(got_resume_ok).addErrback(got_resume_bad)
2076+ return stdout, stderr
2077
2078 @cachedproperty
2079 def slave(self):
2080@@ -446,7 +462,7 @@
2081 # the slave object, which is usually an XMLRPC client, with a
2082 # stub object that removes the need to actually create a buildd
2083 # slave in various states - which can be hard to create.
2084- return BuilderSlave.makeBuilderSlave(self.url, self.vm_host)
2085+ return BuilderSlave.makeBlockingSlave(self.url, self.vm_host)
2086
2087 def setSlaveForTesting(self, proxy):
2088 """See IBuilder."""
2089@@ -467,23 +483,18 @@
2090
2091 # If we are building a virtual build, resume the virtual machine.
2092 if self.virtualized:
2093- d = self.resumeSlaveHost()
2094- else:
2095- d = defer.succeed(None)
2096+ self.resumeSlaveHost()
2097
2098- def resume_done(ignored):
2099- return self.current_build_behavior.dispatchBuildToSlave(
2100+ # Do it.
2101+ build_queue_item.markAsBuilding(self)
2102+ try:
2103+ self.current_build_behavior.dispatchBuildToSlave(
2104 build_queue_item.id, logger)
2105-
2106- def eb_slave_failure(failure):
2107- failure.trap(BuildSlaveFailure)
2108- e = failure.value
2109+ except BuildSlaveFailure, e:
2110+ logger.debug("Disabling builder: %s" % self.url, exc_info=1)
2111 self.failBuilder(
2112 "Exception (%s) when setting up to new job" % (e,))
2113-
2114- def eb_cannot_fetch_file(failure):
2115- failure.trap(CannotFetchFile)
2116- e = failure.value
2117+ except CannotFetchFile, e:
2118 message = """Slave '%s' (%s) was unable to fetch file.
2119 ****** URL ********
2120 %s
2121@@ -492,19 +503,10 @@
2122 *******************
2123 """ % (self.name, self.url, e.file_url, e.error_information)
2124 raise BuildDaemonError(message)
2125-
2126- def eb_socket_error(failure):
2127- failure.trap(socket.error)
2128- e = failure.value
2129+ except socket.error, e:
2130 error_message = "Exception (%s) when setting up new job" % (e,)
2131- d = self.handleTimeout(logger, error_message)
2132- return d.addBoth(lambda ignored: failure)
2133-
2134- d.addCallback(resume_done)
2135- d.addErrback(eb_slave_failure)
2136- d.addErrback(eb_cannot_fetch_file)
2137- d.addErrback(eb_socket_error)
2138- return d
2139+ self.handleTimeout(logger, error_message)
2140+ raise BuildSlaveFailure
2141
2142 def failBuilder(self, reason):
2143 """See IBuilder"""
2144@@ -532,24 +534,22 @@
2145
2146 def slaveStatus(self):
2147 """See IBuilder."""
2148- d = self.slave.status()
2149- def got_status(status_sentence):
2150- status = {'builder_status': status_sentence[0]}
2151-
2152- # Extract detailed status and log information if present.
2153- # Although build_id is also easily extractable here, there is no
2154- # valid reason for anything to use it, so we exclude it.
2155- if status['builder_status'] == 'BuilderStatus.WAITING':
2156- status['build_status'] = status_sentence[1]
2157- else:
2158- if status['builder_status'] == 'BuilderStatus.BUILDING':
2159- status['logtail'] = status_sentence[2]
2160-
2161- self.current_build_behavior.updateSlaveStatus(
2162- status_sentence, status)
2163- return status
2164-
2165- return d.addCallback(got_status)
2166+ builder_version, builder_arch, mechanisms = self.slave.info()
2167+ status_sentence = self.slave.status()
2168+
2169+ status = {'builder_status': status_sentence[0]}
2170+
2171+ # Extract detailed status and log information if present.
2172+ # Although build_id is also easily extractable here, there is no
2173+ # valid reason for anything to use it, so we exclude it.
2174+ if status['builder_status'] == 'BuilderStatus.WAITING':
2175+ status['build_status'] = status_sentence[1]
2176+ else:
2177+ if status['builder_status'] == 'BuilderStatus.BUILDING':
2178+ status['logtail'] = status_sentence[2]
2179+
2180+ self.current_build_behavior.updateSlaveStatus(status_sentence, status)
2181+ return status
2182
2183 def slaveStatusSentence(self):
2184 """See IBuilder."""
2185@@ -562,15 +562,13 @@
2186
2187 def updateBuild(self, queueItem):
2188 """See `IBuilder`."""
2189- return self.current_build_behavior.updateBuild(queueItem)
2190+ self.current_build_behavior.updateBuild(queueItem)
2191
2192 def transferSlaveFileToLibrarian(self, file_sha1, filename, private):
2193 """See IBuilder."""
2194 out_file_fd, out_file_name = tempfile.mkstemp(suffix=".buildlog")
2195 out_file = os.fdopen(out_file_fd, "r+")
2196 try:
2197- # XXX 2010-10-18 bug=662631
2198- # Change this to do non-blocking IO.
2199 slave_file = self.slave.getFile(file_sha1)
2200 copy_and_close(slave_file, out_file)
2201 # If the requested file is the 'buildlog' compress it using gzip
2202@@ -601,17 +599,18 @@
2203
2204 return library_file.id
2205
2206- def isAvailable(self):
2207+ @property
2208+ def is_available(self):
2209 """See `IBuilder`."""
2210 if not self.builderok:
2211- return defer.succeed(False)
2212- d = self.slaveStatusSentence()
2213- def catch_fault(failure):
2214- failure.trap(xmlrpclib.Fault, socket.error)
2215- return False
2216- def check_available(status):
2217- return status[0] == BuilderStatus.IDLE
2218- return d.addCallbacks(check_available, catch_fault)
2219+ return False
2220+ try:
2221+ slavestatus = self.slaveStatusSentence()
2222+ except (xmlrpclib.Fault, socket.error):
2223+ return False
2224+ if slavestatus[0] != BuilderStatus.IDLE:
2225+ return False
2226+ return True
2227
2228 def _getSlaveScannerLogger(self):
2229 """Return the logger instance from buildd-slave-scanner.py."""
2230@@ -622,27 +621,6 @@
2231 logger = logging.getLogger('slave-scanner')
2232 return logger
2233
2234- def acquireBuildCandidate(self):
2235- """Acquire a build candidate in an atomic fashion.
2236-
2237- When retrieiving a candidate we need to mark it as building
2238- immediately so that it is not dispatched by another builder in the
2239- build manager.
2240-
2241- We can consider this to be atomic because although the build manager
2242- is a Twisted app and gives the appearance of doing lots of things at
2243- once, it's still single-threaded so no more than one builder scan
2244- can be in this code at the same time.
2245-
2246- If there's ever more than one build manager running at once, then
2247- this code will need some sort of mutex.
2248- """
2249- candidate = self._findBuildCandidate()
2250- if candidate is not None:
2251- candidate.markAsBuilding(self)
2252- transaction.commit()
2253- return candidate
2254-
2255 def _findBuildCandidate(self):
2256 """Find a candidate job for dispatch to an idle buildd slave.
2257
2258@@ -722,46 +700,52 @@
2259 :param candidate: The job to dispatch.
2260 """
2261 logger = self._getSlaveScannerLogger()
2262- # Using maybeDeferred ensures that any exceptions are also
2263- # wrapped up and caught later.
2264- d = defer.maybeDeferred(self.startBuild, candidate, logger)
2265- return d
2266+ try:
2267+ self.startBuild(candidate, logger)
2268+ except (BuildSlaveFailure, CannotBuild, BuildBehaviorMismatch), err:
2269+ logger.warn('Could not build: %s' % err)
2270
2271 def handleTimeout(self, logger, error_message):
2272 """See IBuilder."""
2273+ builder_should_be_failed = True
2274+
2275 if self.virtualized:
2276 # Virtualized/PPA builder: attempt a reset.
2277 logger.warn(
2278 "Resetting builder: %s -- %s" % (self.url, error_message),
2279 exc_info=True)
2280- d = self.resumeSlaveHost()
2281- return d
2282- else:
2283- # XXX: This should really let the failure bubble up to the
2284- # scan() method that does the failure counting.
2285+ try:
2286+ self.resumeSlaveHost()
2287+ except CannotResumeHost, err:
2288+ # Failed to reset builder.
2289+ logger.warn(
2290+ "Failed to reset builder: %s -- %s" %
2291+ (self.url, str(err)), exc_info=True)
2292+ else:
2293+ # Builder was reset, do *not* mark it as failed.
2294+ builder_should_be_failed = False
2295+
2296+ if builder_should_be_failed:
2297 # Mark builder as 'failed'.
2298 logger.warn(
2299- "Disabling builder: %s -- %s" % (self.url, error_message))
2300+ "Disabling builder: %s -- %s" % (self.url, error_message),
2301+ exc_info=True)
2302 self.failBuilder(error_message)
2303- return defer.succeed(None)
2304
2305 def findAndStartJob(self, buildd_slave=None):
2306 """See IBuilder."""
2307- # XXX This method should be removed in favour of two separately
2308- # called methods that find and dispatch the job. It will
2309- # require a lot of test fixing.
2310 logger = self._getSlaveScannerLogger()
2311- candidate = self.acquireBuildCandidate()
2312+ candidate = self._findBuildCandidate()
2313
2314 if candidate is None:
2315 logger.debug("No build candidates available for builder.")
2316- return defer.succeed(None)
2317+ return None
2318
2319 if buildd_slave is not None:
2320 self.setSlaveForTesting(buildd_slave)
2321
2322- d = self._dispatchBuildCandidate(candidate)
2323- return d.addCallback(lambda ignored: candidate)
2324+ self._dispatchBuildCandidate(candidate)
2325+ return candidate
2326
2327 def getBuildQueue(self):
2328 """See `IBuilder`."""
2329
2330=== modified file 'lib/lp/buildmaster/model/buildfarmjobbehavior.py'
2331--- lib/lp/buildmaster/model/buildfarmjobbehavior.py 2010-10-20 11:54:27 +0000
2332+++ lib/lp/buildmaster/model/buildfarmjobbehavior.py 2010-12-07 16:29:13 +0000
2333@@ -16,18 +16,13 @@
2334 import socket
2335 import xmlrpclib
2336
2337-from twisted.internet import defer
2338-
2339 from zope.component import getUtility
2340 from zope.interface import implements
2341 from zope.security.proxy import removeSecurityProxy
2342
2343 from canonical import encoding
2344 from canonical.librarian.interfaces import ILibrarianClient
2345-from lp.buildmaster.interfaces.builder import (
2346- BuildSlaveFailure,
2347- CorruptBuildCookie,
2348- )
2349+from lp.buildmaster.interfaces.builder import CorruptBuildCookie
2350 from lp.buildmaster.interfaces.buildfarmjobbehavior import (
2351 BuildBehaviorMismatch,
2352 IBuildFarmJobBehavior,
2353@@ -74,53 +69,54 @@
2354 """See `IBuildFarmJobBehavior`."""
2355 logger = logging.getLogger('slave-scanner')
2356
2357- d = self._builder.slaveStatus()
2358-
2359- def got_failure(failure):
2360- failure.trap(xmlrpclib.Fault, socket.error)
2361- info = failure.value
2362+ try:
2363+ slave_status = self._builder.slaveStatus()
2364+ except (xmlrpclib.Fault, socket.error), info:
2365+ # XXX cprov 2005-06-29:
2366+ # Hmm, a problem with the xmlrpc interface,
2367+ # disable the builder ?? or simple notice the failure
2368+ # with a timestamp.
2369 info = ("Could not contact the builder %s, caught a (%s)"
2370 % (queueItem.builder.url, info))
2371- raise BuildSlaveFailure(info)
2372-
2373- def got_status(slave_status):
2374- builder_status_handlers = {
2375- 'BuilderStatus.IDLE': self.updateBuild_IDLE,
2376- 'BuilderStatus.BUILDING': self.updateBuild_BUILDING,
2377- 'BuilderStatus.ABORTING': self.updateBuild_ABORTING,
2378- 'BuilderStatus.ABORTED': self.updateBuild_ABORTED,
2379- 'BuilderStatus.WAITING': self.updateBuild_WAITING,
2380- }
2381-
2382- builder_status = slave_status['builder_status']
2383- if builder_status not in builder_status_handlers:
2384- logger.critical(
2385- "Builder on %s returned unknown status %s, failing it"
2386- % (self._builder.url, builder_status))
2387- self._builder.failBuilder(
2388- "Unknown status code (%s) returned from status() probe."
2389- % builder_status)
2390- # XXX: This will leave the build and job in a bad state, but
2391- # should never be possible, since our builder statuses are
2392- # known.
2393- queueItem._builder = None
2394- queueItem.setDateStarted(None)
2395- return
2396-
2397- # Since logtail is a xmlrpclib.Binary container and it is
2398- # returned from the IBuilder content class, it arrives
2399- # protected by a Zope Security Proxy, which is not declared,
2400- # thus empty. Before passing it to the status handlers we
2401- # will simply remove the proxy.
2402- logtail = removeSecurityProxy(slave_status.get('logtail'))
2403-
2404- method = builder_status_handlers[builder_status]
2405- return defer.maybeDeferred(
2406- method, queueItem, slave_status, logtail, logger)
2407-
2408- d.addErrback(got_failure)
2409- d.addCallback(got_status)
2410- return d
2411+ logger.debug(info, exc_info=True)
2412+ # keep the job for scan
2413+ return
2414+
2415+ builder_status_handlers = {
2416+ 'BuilderStatus.IDLE': self.updateBuild_IDLE,
2417+ 'BuilderStatus.BUILDING': self.updateBuild_BUILDING,
2418+ 'BuilderStatus.ABORTING': self.updateBuild_ABORTING,
2419+ 'BuilderStatus.ABORTED': self.updateBuild_ABORTED,
2420+ 'BuilderStatus.WAITING': self.updateBuild_WAITING,
2421+ }
2422+
2423+ builder_status = slave_status['builder_status']
2424+ if builder_status not in builder_status_handlers:
2425+ logger.critical(
2426+ "Builder on %s returned unknown status %s, failing it"
2427+ % (self._builder.url, builder_status))
2428+ self._builder.failBuilder(
2429+ "Unknown status code (%s) returned from status() probe."
2430+ % builder_status)
2431+ # XXX: This will leave the build and job in a bad state, but
2432+ # should never be possible, since our builder statuses are
2433+ # known.
2434+ queueItem._builder = None
2435+ queueItem.setDateStarted(None)
2436+ return
2437+
2438+ # Since logtail is a xmlrpclib.Binary container and it is returned
2439+ # from the IBuilder content class, it arrives protected by a Zope
2440+ # Security Proxy, which is not declared, thus empty. Before passing
2441+ # it to the status handlers we will simply remove the proxy.
2442+ logtail = removeSecurityProxy(slave_status.get('logtail'))
2443+
2444+ method = builder_status_handlers[builder_status]
2445+ try:
2446+ method(queueItem, slave_status, logtail, logger)
2447+ except TypeError, e:
2448+ logger.critical("Received wrong number of args in response.")
2449+ logger.exception(e)
2450
2451 def updateBuild_IDLE(self, queueItem, slave_status, logtail, logger):
2452 """Somehow the builder forgot about the build job.
2453@@ -150,13 +146,11 @@
2454
2455 Clean the builder for another jobs.
2456 """
2457- d = queueItem.builder.cleanSlave()
2458- def got_cleaned(ignored):
2459- queueItem.builder = None
2460- if queueItem.job.status != JobStatus.FAILED:
2461- queueItem.job.fail()
2462- queueItem.specific_job.jobAborted()
2463- return d.addCallback(got_cleaned)
2464+ queueItem.builder.cleanSlave()
2465+ queueItem.builder = None
2466+ if queueItem.job.status != JobStatus.FAILED:
2467+ queueItem.job.fail()
2468+ queueItem.specific_job.jobAborted()
2469
2470 def extractBuildStatus(self, slave_status):
2471 """Read build status name.
2472@@ -191,8 +185,6 @@
2473 # XXX: dsilvers 2005-03-02: Confirm the builder has the right build?
2474
2475 build = queueItem.specific_job.build
2476- # XXX 2010-10-18 bug=662631
2477- # Change this to do non-blocking IO.
2478 build.handleStatus(build_status, librarian, slave_status)
2479
2480
2481
2482=== modified file 'lib/lp/buildmaster/model/packagebuild.py'
2483--- lib/lp/buildmaster/model/packagebuild.py 2010-10-26 20:43:50 +0000
2484+++ lib/lp/buildmaster/model/packagebuild.py 2010-12-07 16:29:13 +0000
2485@@ -163,8 +163,6 @@
2486 def getLogFromSlave(package_build):
2487 """See `IPackageBuild`."""
2488 builder = package_build.buildqueue_record.builder
2489- # XXX 2010-10-18 bug=662631
2490- # Change this to do non-blocking IO.
2491 return builder.transferSlaveFileToLibrarian(
2492 SLAVE_LOG_FILENAME,
2493 package_build.buildqueue_record.getLogFileName(),
2494@@ -180,8 +178,6 @@
2495 # log, builder and date_finished are read-only, so we must
2496 # currently remove the security proxy to set them.
2497 naked_build = removeSecurityProxy(build)
2498- # XXX 2010-10-18 bug=662631
2499- # Change this to do non-blocking IO.
2500 naked_build.log = build.getLogFromSlave(build)
2501 naked_build.builder = build.buildqueue_record.builder
2502 # XXX cprov 20060615 bug=120584: Currently buildduration includes
2503@@ -278,8 +274,6 @@
2504 logger.critical("Unknown BuildStatus '%s' for builder '%s'"
2505 % (status, self.buildqueue_record.builder.url))
2506 return
2507- # XXX 2010-10-18 bug=662631
2508- # Change this to do non-blocking IO.
2509 method(librarian, slave_status, logger)
2510
2511 def _handleStatus_OK(self, librarian, slave_status, logger):
2512
2513=== modified file 'lib/lp/buildmaster/tests/mock_slaves.py'
2514--- lib/lp/buildmaster/tests/mock_slaves.py 2010-10-14 15:37:56 +0000
2515+++ lib/lp/buildmaster/tests/mock_slaves.py 2010-12-07 16:29:13 +0000
2516@@ -6,40 +6,21 @@
2517 __metaclass__ = type
2518
2519 __all__ = [
2520- 'AbortedSlave',
2521- 'AbortingSlave',
2522+ 'MockBuilder',
2523+ 'LostBuildingBrokenSlave',
2524 'BrokenSlave',
2525+ 'OkSlave',
2526 'BuildingSlave',
2527- 'CorruptBehavior',
2528- 'DeadProxy',
2529- 'LostBuildingBrokenSlave',
2530- 'MockBuilder',
2531- 'OkSlave',
2532- 'SlaveTestHelpers',
2533- 'TrivialBehavior',
2534+ 'AbortedSlave',
2535 'WaitingSlave',
2536+ 'AbortingSlave',
2537 ]
2538
2539-import fixtures
2540-import os
2541-
2542 from StringIO import StringIO
2543 import xmlrpclib
2544
2545-from testtools.content import Content
2546-from testtools.content_type import UTF8_TEXT
2547-
2548-from twisted.internet import defer
2549-from twisted.web import xmlrpc
2550-
2551-from canonical.buildd.tests.harness import BuilddSlaveTestSetup
2552-
2553-from lp.buildmaster.interfaces.builder import (
2554- CannotFetchFile,
2555- CorruptBuildCookie,
2556- )
2557+from lp.buildmaster.interfaces.builder import CannotFetchFile
2558 from lp.buildmaster.model.builder import (
2559- BuilderSlave,
2560 rescueBuilderIfLost,
2561 updateBuilderStatus,
2562 )
2563@@ -78,9 +59,15 @@
2564 slave_build_id)
2565
2566 def cleanSlave(self):
2567+ # XXX: This should not print anything. The print is only here to make
2568+ # doc/builder.txt a meaningful test.
2569+ print 'Cleaning slave'
2570 return self.slave.clean()
2571
2572 def requestAbort(self):
2573+ # XXX: This should not print anything. The print is only here to make
2574+ # doc/builder.txt a meaningful test.
2575+ print 'Aborting slave'
2576 return self.slave.abort()
2577
2578 def resumeSlave(self, logger):
2579@@ -90,10 +77,10 @@
2580 pass
2581
2582 def rescueIfLost(self, logger=None):
2583- return rescueBuilderIfLost(self, logger)
2584+ rescueBuilderIfLost(self, logger)
2585
2586 def updateStatus(self, logger=None):
2587- return defer.maybeDeferred(updateBuilderStatus, self, logger)
2588+ updateBuilderStatus(self, logger)
2589
2590
2591 # XXX: It would be *really* nice to run some set of tests against the real
2592@@ -108,44 +95,36 @@
2593 self.arch_tag = arch_tag
2594
2595 def status(self):
2596- return defer.succeed(('BuilderStatus.IDLE', ''))
2597+ return ('BuilderStatus.IDLE', '')
2598
2599 def ensurepresent(self, sha1, url, user=None, password=None):
2600 self.call_log.append(('ensurepresent', url, user, password))
2601- return defer.succeed((True, None))
2602+ return True, None
2603
2604 def build(self, buildid, buildtype, chroot, filemap, args):
2605 self.call_log.append(
2606 ('build', buildid, buildtype, chroot, filemap.keys(), args))
2607 info = 'OkSlave BUILDING'
2608- return defer.succeed(('BuildStatus.Building', info))
2609+ return ('BuildStatus.Building', info)
2610
2611 def echo(self, *args):
2612 self.call_log.append(('echo',) + args)
2613- return defer.succeed(args)
2614+ return args
2615
2616 def clean(self):
2617 self.call_log.append('clean')
2618- return defer.succeed(None)
2619
2620 def abort(self):
2621 self.call_log.append('abort')
2622- return defer.succeed(None)
2623
2624 def info(self):
2625 self.call_log.append('info')
2626- return defer.succeed(('1.0', self.arch_tag, 'debian'))
2627-
2628- def resume(self):
2629- self.call_log.append('resume')
2630- return defer.succeed(("", "", 0))
2631+ return ('1.0', self.arch_tag, 'debian')
2632
2633 def sendFileToSlave(self, sha1, url, username="", password=""):
2634- d = self.ensurepresent(sha1, url, username, password)
2635- def check_present((present, info)):
2636- if not present:
2637- raise CannotFetchFile(url, info)
2638- return d.addCallback(check_present)
2639+ present, info = self.ensurepresent(sha1, url, username, password)
2640+ if not present:
2641+ raise CannotFetchFile(url, info)
2642
2643 def cacheFile(self, logger, libraryfilealias):
2644 return self.sendFileToSlave(
2645@@ -162,11 +141,9 @@
2646 def status(self):
2647 self.call_log.append('status')
2648 buildlog = xmlrpclib.Binary("This is a build log")
2649- return defer.succeed(
2650- ('BuilderStatus.BUILDING', self.build_id, buildlog))
2651+ return ('BuilderStatus.BUILDING', self.build_id, buildlog)
2652
2653 def getFile(self, sum):
2654- # XXX: This needs to be updated to return a Deferred.
2655 self.call_log.append('getFile')
2656 if sum == "buildlog":
2657 s = StringIO("This is a build log")
2658@@ -178,15 +155,11 @@
2659 """A mock slave that looks like it's currently waiting."""
2660
2661 def __init__(self, state='BuildStatus.OK', dependencies=None,
2662- build_id='1-1', filemap=None):
2663+ build_id='1-1'):
2664 super(WaitingSlave, self).__init__()
2665 self.state = state
2666 self.dependencies = dependencies
2667 self.build_id = build_id
2668- if filemap is None:
2669- self.filemap = {}
2670- else:
2671- self.filemap = filemap
2672
2673 # By default, the slave only has a buildlog, but callsites
2674 # can update this list as needed.
2675@@ -194,12 +167,10 @@
2676
2677 def status(self):
2678 self.call_log.append('status')
2679- return defer.succeed((
2680- 'BuilderStatus.WAITING', self.state, self.build_id, self.filemap,
2681- self.dependencies))
2682+ return ('BuilderStatus.WAITING', self.state, self.build_id, {},
2683+ self.dependencies)
2684
2685 def getFile(self, hash):
2686- # XXX: This needs to be updated to return a Deferred.
2687 self.call_log.append('getFile')
2688 if hash in self.valid_file_hashes:
2689 content = "This is a %s" % hash
2690@@ -213,19 +184,15 @@
2691
2692 def status(self):
2693 self.call_log.append('status')
2694- return defer.succeed(('BuilderStatus.ABORTING', '1-1'))
2695+ return ('BuilderStatus.ABORTING', '1-1')
2696
2697
2698 class AbortedSlave(OkSlave):
2699 """A mock slave that looks like it's aborted."""
2700
2701- def clean(self):
2702+ def status(self):
2703 self.call_log.append('status')
2704- return defer.succeed(None)
2705-
2706- def status(self):
2707- self.call_log.append('clean')
2708- return defer.succeed(('BuilderStatus.ABORTED', '1-1'))
2709+ return ('BuilderStatus.ABORTED', '1-1')
2710
2711
2712 class LostBuildingBrokenSlave:
2713@@ -239,108 +206,16 @@
2714
2715 def status(self):
2716 self.call_log.append('status')
2717- return defer.succeed(('BuilderStatus.BUILDING', '1000-10000'))
2718+ return ('BuilderStatus.BUILDING', '1000-10000')
2719
2720 def abort(self):
2721 self.call_log.append('abort')
2722- return defer.fail(xmlrpclib.Fault(8002, "Could not abort"))
2723+ raise xmlrpclib.Fault(8002, "Could not abort")
2724
2725
2726 class BrokenSlave:
2727 """A mock slave that reports that it is broken."""
2728
2729- def __init__(self):
2730- self.call_log = []
2731-
2732 def status(self):
2733 self.call_log.append('status')
2734- return defer.fail(xmlrpclib.Fault(8001, "Broken slave"))
2735-
2736-
2737-class CorruptBehavior:
2738-
2739- def verifySlaveBuildCookie(self, cookie):
2740- raise CorruptBuildCookie("Bad value: %r" % (cookie,))
2741-
2742-
2743-class TrivialBehavior:
2744-
2745- def verifySlaveBuildCookie(self, cookie):
2746- pass
2747-
2748-
2749-class DeadProxy(xmlrpc.Proxy):
2750- """An xmlrpc.Proxy that doesn't actually send any messages.
2751-
2752- Used when you want to test timeouts, for example.
2753- """
2754-
2755- def callRemote(self, *args, **kwargs):
2756- return defer.Deferred()
2757-
2758-
2759-class SlaveTestHelpers(fixtures.Fixture):
2760-
2761- # The URL for the XML-RPC service set up by `BuilddSlaveTestSetup`.
2762- BASE_URL = 'http://localhost:8221'
2763- TEST_URL = '%s/rpc/' % (BASE_URL,)
2764-
2765- def getServerSlave(self):
2766- """Set up a test build slave server.
2767-
2768- :return: A `BuilddSlaveTestSetup` object.
2769- """
2770- tachandler = BuilddSlaveTestSetup()
2771- tachandler.setUp()
2772- # Basically impossible to do this w/ TrialTestCase. But it would be
2773- # really nice to keep it.
2774- #
2775- # def addLogFile(exc_info):
2776- # self.addDetail(
2777- # 'xmlrpc-log-file',
2778- # Content(UTF8_TEXT, lambda: open(tachandler.logfile, 'r').read()))
2779- # self.addOnException(addLogFile)
2780- self.addCleanup(tachandler.tearDown)
2781- return tachandler
2782-
2783- def getClientSlave(self, reactor=None, proxy=None):
2784- """Return a `BuilderSlave` for use in testing.
2785-
2786- Points to a fixed URL that is also used by `BuilddSlaveTestSetup`.
2787- """
2788- return BuilderSlave.makeBuilderSlave(
2789- self.TEST_URL, 'vmhost', reactor, proxy)
2790-
2791- def makeCacheFile(self, tachandler, filename):
2792- """Make a cache file available on the remote slave.
2793-
2794- :param tachandler: The TacTestSetup object used to start the remote
2795- slave.
2796- :param filename: The name of the file to create in the file cache
2797- area.
2798- """
2799- path = os.path.join(tachandler.root, 'filecache', filename)
2800- fd = open(path, 'w')
2801- fd.write('something')
2802- fd.close()
2803- self.addCleanup(os.unlink, path)
2804-
2805- def triggerGoodBuild(self, slave, build_id=None):
2806- """Trigger a good build on 'slave'.
2807-
2808- :param slave: A `BuilderSlave` instance to trigger the build on.
2809- :param build_id: The build identifier. If not specified, defaults to
2810- an arbitrary string.
2811- :type build_id: str
2812- :return: The build id returned by the slave.
2813- """
2814- if build_id is None:
2815- build_id = 'random-build-id'
2816- tachandler = self.getServerSlave()
2817- chroot_file = 'fake-chroot'
2818- dsc_file = 'thing'
2819- self.makeCacheFile(tachandler, chroot_file)
2820- self.makeCacheFile(tachandler, dsc_file)
2821- return slave.build(
2822- build_id, 'debian', chroot_file, {'.dsc': dsc_file},
2823- {'ogrecomponent': 'main'})
2824+ raise xmlrpclib.Fault(8001, "Broken slave")
2825
2826=== modified file 'lib/lp/buildmaster/tests/test_builder.py'
2827--- lib/lp/buildmaster/tests/test_builder.py 2010-10-18 16:44:22 +0000
2828+++ lib/lp/buildmaster/tests/test_builder.py 2010-12-07 16:29:13 +0000
2829@@ -3,24 +3,20 @@
2830
2831 """Test Builder features."""
2832
2833+import errno
2834 import os
2835-import signal
2836+import socket
2837 import xmlrpclib
2838
2839-from twisted.web.client import getPage
2840-
2841-from twisted.internet.defer import CancelledError
2842-from twisted.internet.task import Clock
2843-from twisted.python.failure import Failure
2844-from twisted.trial.unittest import TestCase as TrialTestCase
2845+from testtools.content import Content
2846+from testtools.content_type import UTF8_TEXT
2847
2848 from zope.component import getUtility
2849 from zope.security.proxy import removeSecurityProxy
2850
2851 from canonical.buildd.slave import BuilderStatus
2852-from canonical.config import config
2853+from canonical.buildd.tests.harness import BuilddSlaveTestSetup
2854 from canonical.database.sqlbase import flush_database_updates
2855-from canonical.launchpad.scripts import QuietFakeLogger
2856 from canonical.launchpad.webapp.interfaces import (
2857 DEFAULT_FLAVOR,
2858 IStoreSelector,
2859@@ -28,38 +24,21 @@
2860 )
2861 from canonical.testing.layers import (
2862 DatabaseFunctionalLayer,
2863- LaunchpadZopelessLayer,
2864- TwistedLaunchpadZopelessLayer,
2865- TwistedLayer,
2866+ LaunchpadZopelessLayer
2867 )
2868 from lp.buildmaster.enums import BuildStatus
2869-from lp.buildmaster.interfaces.builder import (
2870- CannotFetchFile,
2871- IBuilder,
2872- IBuilderSet,
2873- )
2874+from lp.buildmaster.interfaces.builder import IBuilder, IBuilderSet
2875 from lp.buildmaster.interfaces.buildfarmjobbehavior import (
2876 IBuildFarmJobBehavior,
2877 )
2878 from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet
2879-from lp.buildmaster.interfaces.builder import CannotResumeHost
2880+from lp.buildmaster.model.builder import BuilderSlave
2881 from lp.buildmaster.model.buildfarmjobbehavior import IdleBuildBehavior
2882 from lp.buildmaster.model.buildqueue import BuildQueue
2883 from lp.buildmaster.tests.mock_slaves import (
2884 AbortedSlave,
2885- AbortingSlave,
2886- BrokenSlave,
2887- BuildingSlave,
2888- CorruptBehavior,
2889- DeadProxy,
2890- LostBuildingBrokenSlave,
2891 MockBuilder,
2892- OkSlave,
2893- SlaveTestHelpers,
2894- TrivialBehavior,
2895- WaitingSlave,
2896 )
2897-from lp.services.job.interfaces.job import JobStatus
2898 from lp.soyuz.enums import (
2899 ArchivePurpose,
2900 PackagePublishingStatus,
2901@@ -70,12 +49,9 @@
2902 )
2903 from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
2904 from lp.testing import (
2905- ANONYMOUS,
2906- login_as,
2907- logout,
2908+ TestCase,
2909 TestCaseWithFactory,
2910 )
2911-from lp.testing.factory import LaunchpadObjectFactory
2912 from lp.testing.fakemethod import FakeMethod
2913
2914
2915@@ -116,121 +92,42 @@
2916 bq = builder.getBuildQueue()
2917 self.assertIs(None, bq)
2918
2919-
2920-class TestBuilderWithTrial(TrialTestCase):
2921-
2922- layer = TwistedLaunchpadZopelessLayer
2923-
2924- def setUp(self):
2925- super(TestBuilderWithTrial, self)
2926- self.slave_helper = SlaveTestHelpers()
2927- self.slave_helper.setUp()
2928- self.addCleanup(self.slave_helper.cleanUp)
2929- self.factory = LaunchpadObjectFactory()
2930- login_as(ANONYMOUS)
2931- self.addCleanup(logout)
2932-
2933- def test_updateStatus_aborts_lost_and_broken_slave(self):
2934- # A slave that's 'lost' should be aborted; when the slave is
2935- # broken then abort() should also throw a fault.
2936- slave = LostBuildingBrokenSlave()
2937- lostbuilding_builder = MockBuilder(
2938- 'Lost Building Broken Slave', slave, behavior=CorruptBehavior())
2939- d = lostbuilding_builder.updateStatus(QuietFakeLogger())
2940- def check_slave_status(failure):
2941- self.assertIn('abort', slave.call_log)
2942- # 'Fault' comes from the LostBuildingBrokenSlave, this is
2943- # just testing that the value is passed through.
2944- self.assertIsInstance(failure.value, xmlrpclib.Fault)
2945- return d.addBoth(check_slave_status)
2946-
2947- def test_resumeSlaveHost_nonvirtual(self):
2948- builder = self.factory.makeBuilder(virtualized=False)
2949- d = builder.resumeSlaveHost()
2950- return self.assertFailure(d, CannotResumeHost)
2951-
2952- def test_resumeSlaveHost_no_vmhost(self):
2953- builder = self.factory.makeBuilder(virtualized=True, vm_host=None)
2954- d = builder.resumeSlaveHost()
2955- return self.assertFailure(d, CannotResumeHost)
2956-
2957- def test_resumeSlaveHost_success(self):
2958- reset_config = """
2959- [builddmaster]
2960- vm_resume_command: /bin/echo -n parp"""
2961- config.push('reset', reset_config)
2962- self.addCleanup(config.pop, 'reset')
2963-
2964- builder = self.factory.makeBuilder(virtualized=True, vm_host="pop")
2965- d = builder.resumeSlaveHost()
2966- def got_resume(output):
2967- self.assertEqual(('parp', ''), output)
2968- return d.addCallback(got_resume)
2969-
2970- def test_resumeSlaveHost_command_failed(self):
2971- reset_fail_config = """
2972- [builddmaster]
2973- vm_resume_command: /bin/false"""
2974- config.push('reset fail', reset_fail_config)
2975- self.addCleanup(config.pop, 'reset fail')
2976- builder = self.factory.makeBuilder(virtualized=True, vm_host="pop")
2977- d = builder.resumeSlaveHost()
2978- return self.assertFailure(d, CannotResumeHost)
2979-
2980- def test_handleTimeout_resume_failure(self):
2981- reset_fail_config = """
2982- [builddmaster]
2983- vm_resume_command: /bin/false"""
2984- config.push('reset fail', reset_fail_config)
2985- self.addCleanup(config.pop, 'reset fail')
2986- builder = self.factory.makeBuilder(virtualized=True, vm_host="pop")
2987- builder.builderok = True
2988- d = builder.handleTimeout(QuietFakeLogger(), 'blah')
2989- return self.assertFailure(d, CannotResumeHost)
2990-
2991- def _setupRecipeBuildAndBuilder(self):
2992- # Helper function to make a builder capable of building a
2993- # recipe, returning both.
2994- processor = self.factory.makeProcessor(name="i386")
2995- builder = self.factory.makeBuilder(
2996- processor=processor, virtualized=True, vm_host="bladh")
2997- builder.setSlaveForTesting(OkSlave())
2998- distroseries = self.factory.makeDistroSeries()
2999- das = self.factory.makeDistroArchSeries(
3000- distroseries=distroseries, architecturetag="i386",
3001- processorfamily=processor.family)
3002- chroot = self.factory.makeLibraryFileAlias()
3003- das.addOrUpdateChroot(chroot)
3004- distroseries.nominatedarchindep = das
3005- build = self.factory.makeSourcePackageRecipeBuild(
3006- distroseries=distroseries)
3007- return builder, build
3008-
3009- def test_findAndStartJob_returns_candidate(self):
3010- # findAndStartJob finds the next queued job using _findBuildCandidate.
3011- # We don't care about the type of build at all.
3012- builder, build = self._setupRecipeBuildAndBuilder()
3013- candidate = build.queueBuild()
3014- # _findBuildCandidate is tested elsewhere, we just make sure that
3015- # findAndStartJob delegates to it.
3016- removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
3017- result=candidate)
3018- d = builder.findAndStartJob()
3019- return d.addCallback(self.assertEqual, candidate)
3020-
3021- def test_findAndStartJob_starts_job(self):
3022- # findAndStartJob finds the next queued job using _findBuildCandidate
3023- # and then starts it.
3024- # We don't care about the type of build at all.
3025- builder, build = self._setupRecipeBuildAndBuilder()
3026- candidate = build.queueBuild()
3027- removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
3028- result=candidate)
3029- d = builder.findAndStartJob()
3030- def check_build_started(candidate):
3031- self.assertEqual(candidate.builder, builder)
3032- self.assertEqual(BuildStatus.BUILDING, build.status)
3033- return d.addCallback(check_build_started)
3034+ def test_updateBuilderStatus_catches_repeated_EINTR(self):
3035+ # A single EINTR return from a socket operation should cause the
3036+ # operation to be retried, not fail/reset the builder.
3037+ builder = removeSecurityProxy(self.factory.makeBuilder())
3038+ builder.handleTimeout = FakeMethod()
3039+ builder.rescueIfLost = FakeMethod()
3040+
3041+ def _fake_checkSlaveAlive():
3042+ # Raise an EINTR error for all invocations.
3043+ raise socket.error(errno.EINTR, "fake eintr")
3044+
3045+ builder.checkSlaveAlive = _fake_checkSlaveAlive
3046+ builder.updateStatus()
3047+
3048+ # builder.updateStatus should eventually have called
3049+ # handleTimeout()
3050+ self.assertEqual(1, builder.handleTimeout.call_count)
3051+
3052+ def test_updateBuilderStatus_catches_single_EINTR(self):
3053+ builder = removeSecurityProxy(self.factory.makeBuilder())
3054+ builder.handleTimeout = FakeMethod()
3055+ builder.rescueIfLost = FakeMethod()
3056+ self.eintr_returned = False
3057+
3058+ def _fake_checkSlaveAlive():
3059+ # raise an EINTR error for the first invocation only.
3060+ if not self.eintr_returned:
3061+ self.eintr_returned = True
3062+ raise socket.error(errno.EINTR, "fake eintr")
3063+
3064+ builder.checkSlaveAlive = _fake_checkSlaveAlive
3065+ builder.updateStatus()
3066+
3067+ # builder.updateStatus should never call handleTimeout() for a
3068+ # single EINTR.
3069+ self.assertEqual(0, builder.handleTimeout.call_count)
3070
3071 def test_slave(self):
3072 # Builder.slave is a BuilderSlave that points at the actual Builder.
3073@@ -239,147 +136,25 @@
3074 builder = removeSecurityProxy(self.factory.makeBuilder())
3075 self.assertEqual(builder.url, builder.slave.url)
3076
3077+
3078+class Test_rescueBuilderIfLost(TestCaseWithFactory):
3079+ """Tests for lp.buildmaster.model.builder.rescueBuilderIfLost."""
3080+
3081+ layer = LaunchpadZopelessLayer
3082+
3083 def test_recovery_of_aborted_slave(self):
3084 # If a slave is in the ABORTED state, rescueBuilderIfLost should
3085 # clean it if we don't think it's currently building anything.
3086 # See bug 463046.
3087 aborted_slave = AbortedSlave()
3088+ # The slave's clean() method is normally an XMLRPC call, so we
3089+ # can just stub it out and check that it got called.
3090+ aborted_slave.clean = FakeMethod()
3091 builder = MockBuilder("mock_builder", aborted_slave)
3092 builder.currentjob = None
3093- d = builder.rescueIfLost()
3094- def check_slave_calls(ignored):
3095- self.assertIn('clean', aborted_slave.call_log)
3096- return d.addCallback(check_slave_calls)
3097-
3098- def test_recover_ok_slave(self):
3099- # An idle slave is not rescued.
3100- slave = OkSlave()
3101- builder = MockBuilder("mock_builder", slave, TrivialBehavior())
3102- d = builder.rescueIfLost()
3103- def check_slave_calls(ignored):
3104- self.assertNotIn('abort', slave.call_log)
3105- self.assertNotIn('clean', slave.call_log)
3106- return d.addCallback(check_slave_calls)
3107-
3108- def test_recover_waiting_slave_with_good_id(self):
3109- # rescueIfLost does not attempt to abort or clean a builder that is
3110- # WAITING.
3111- waiting_slave = WaitingSlave()
3112- builder = MockBuilder("mock_builder", waiting_slave, TrivialBehavior())
3113- d = builder.rescueIfLost()
3114- def check_slave_calls(ignored):
3115- self.assertNotIn('abort', waiting_slave.call_log)
3116- self.assertNotIn('clean', waiting_slave.call_log)
3117- return d.addCallback(check_slave_calls)
3118-
3119- def test_recover_waiting_slave_with_bad_id(self):
3120- # If a slave is WAITING with a build for us to get, and the build
3121- # cookie cannot be verified, which means we don't recognize the build,
3122- # then rescueBuilderIfLost should attempt to abort it, so that the
3123- # builder is reset for a new build, and the corrupt build is
3124- # discarded.
3125- waiting_slave = WaitingSlave()
3126- builder = MockBuilder("mock_builder", waiting_slave, CorruptBehavior())
3127- d = builder.rescueIfLost()
3128- def check_slave_calls(ignored):
3129- self.assertNotIn('abort', waiting_slave.call_log)
3130- self.assertIn('clean', waiting_slave.call_log)
3131- return d.addCallback(check_slave_calls)
3132-
3133- def test_recover_building_slave_with_good_id(self):
3134- # rescueIfLost does not attempt to abort or clean a builder that is
3135- # BUILDING.
3136- building_slave = BuildingSlave()
3137- builder = MockBuilder("mock_builder", building_slave, TrivialBehavior())
3138- d = builder.rescueIfLost()
3139- def check_slave_calls(ignored):
3140- self.assertNotIn('abort', building_slave.call_log)
3141- self.assertNotIn('clean', building_slave.call_log)
3142- return d.addCallback(check_slave_calls)
3143-
3144- def test_recover_building_slave_with_bad_id(self):
3145- # If a slave is BUILDING with a build id we don't recognize, then we
3146- # abort the build, thus stopping it in its tracks.
3147- building_slave = BuildingSlave()
3148- builder = MockBuilder("mock_builder", building_slave, CorruptBehavior())
3149- d = builder.rescueIfLost()
3150- def check_slave_calls(ignored):
3151- self.assertIn('abort', building_slave.call_log)
3152- self.assertNotIn('clean', building_slave.call_log)
3153- return d.addCallback(check_slave_calls)
3154-
3155-
3156-class TestBuilderSlaveStatus(TestBuilderWithTrial):
3157-
3158- # Verify what IBuilder.slaveStatus returns with slaves in different
3159- # states.
3160-
3161- def assertStatus(self, slave, builder_status=None,
3162- build_status=None, logtail=False, filemap=None,
3163- dependencies=None):
3164- builder = self.factory.makeBuilder()
3165- builder.setSlaveForTesting(slave)
3166- d = builder.slaveStatus()
3167-
3168- def got_status(status_dict):
3169- expected = {}
3170- if builder_status is not None:
3171- expected["builder_status"] = builder_status
3172- if build_status is not None:
3173- expected["build_status"] = build_status
3174- if dependencies is not None:
3175- expected["dependencies"] = dependencies
3176-
3177- # We don't care so much about the content of the logtail,
3178- # just that it's there.
3179- if logtail:
3180- tail = status_dict.pop("logtail")
3181- self.assertIsInstance(tail, xmlrpclib.Binary)
3182-
3183- self.assertEqual(expected, status_dict)
3184-
3185- return d.addCallback(got_status)
3186-
3187- def test_slaveStatus_idle_slave(self):
3188- self.assertStatus(
3189- OkSlave(), builder_status='BuilderStatus.IDLE')
3190-
3191- def test_slaveStatus_building_slave(self):
3192- self.assertStatus(
3193- BuildingSlave(), builder_status='BuilderStatus.BUILDING',
3194- logtail=True)
3195-
3196- def test_slaveStatus_waiting_slave(self):
3197- self.assertStatus(
3198- WaitingSlave(), builder_status='BuilderStatus.WAITING',
3199- build_status='BuildStatus.OK', filemap={})
3200-
3201- def test_slaveStatus_aborting_slave(self):
3202- self.assertStatus(
3203- AbortingSlave(), builder_status='BuilderStatus.ABORTING')
3204-
3205- def test_slaveStatus_aborted_slave(self):
3206- self.assertStatus(
3207- AbortedSlave(), builder_status='BuilderStatus.ABORTED')
3208-
3209- def test_isAvailable_with_not_builderok(self):
3210- # isAvailable() is a wrapper around slaveStatusSentence()
3211- builder = self.factory.makeBuilder()
3212- builder.builderok = False
3213- d = builder.isAvailable()
3214- return d.addCallback(self.assertFalse)
3215-
3216- def test_isAvailable_with_slave_fault(self):
3217- builder = self.factory.makeBuilder()
3218- builder.setSlaveForTesting(BrokenSlave())
3219- d = builder.isAvailable()
3220- return d.addCallback(self.assertFalse)
3221-
3222- def test_isAvailable_with_slave_idle(self):
3223- builder = self.factory.makeBuilder()
3224- builder.setSlaveForTesting(OkSlave())
3225- d = builder.isAvailable()
3226- return d.addCallback(self.assertTrue)
3227+ builder.rescueIfLost()
3228+
3229+ self.assertEqual(1, aborted_slave.clean.call_count)
3230
3231
3232 class TestFindBuildCandidateBase(TestCaseWithFactory):
3233@@ -413,49 +188,6 @@
3234 builder.manual = False
3235
3236
3237-class TestFindBuildCandidateGeneralCases(TestFindBuildCandidateBase):
3238- # Test usage of findBuildCandidate not specific to any archive type.
3239-
3240- def test_findBuildCandidate_supersedes_builds(self):
3241- # IBuilder._findBuildCandidate identifies if there are builds
3242- # for superseded source package releases in the queue and marks
3243- # the corresponding build record as SUPERSEDED.
3244- archive = self.factory.makeArchive()
3245- self.publisher.getPubSource(
3246- sourcename="gedit", status=PackagePublishingStatus.PUBLISHED,
3247- archive=archive).createMissingBuilds()
3248- old_candidate = removeSecurityProxy(
3249- self.frog_builder)._findBuildCandidate()
3250-
3251- # The candidate starts off as NEEDSBUILD:
3252- build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(
3253- old_candidate)
3254- self.assertEqual(BuildStatus.NEEDSBUILD, build.status)
3255-
3256- # Now supersede the source package:
3257- publication = build.current_source_publication
3258- publication.status = PackagePublishingStatus.SUPERSEDED
3259-
3260- # The candidate returned is now a different one:
3261- new_candidate = removeSecurityProxy(
3262- self.frog_builder)._findBuildCandidate()
3263- self.assertNotEqual(new_candidate, old_candidate)
3264-
3265- # And the old_candidate is superseded:
3266- self.assertEqual(BuildStatus.SUPERSEDED, build.status)
3267-
3268- def test_acquireBuildCandidate_marks_building(self):
3269- # acquireBuildCandidate() should call _findBuildCandidate and
3270- # mark the build as building.
3271- archive = self.factory.makeArchive()
3272- self.publisher.getPubSource(
3273- sourcename="gedit", status=PackagePublishingStatus.PUBLISHED,
3274- archive=archive).createMissingBuilds()
3275- candidate = removeSecurityProxy(
3276- self.frog_builder).acquireBuildCandidate()
3277- self.assertEqual(JobStatus.RUNNING, candidate.job.status)
3278-
3279-
3280 class TestFindBuildCandidatePPAWithSingleBuilder(TestCaseWithFactory):
3281
3282 layer = LaunchpadZopelessLayer
3283@@ -588,16 +320,6 @@
3284 build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(next_job)
3285 self.failUnlessEqual('joesppa', build.archive.name)
3286
3287- def test_findBuildCandidate_with_disabled_archive(self):
3288- # Disabled archives should not be considered for dispatching
3289- # builds.
3290- disabled_job = removeSecurityProxy(self.builder4)._findBuildCandidate()
3291- build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(
3292- disabled_job)
3293- build.archive.disable()
3294- next_job = removeSecurityProxy(self.builder4)._findBuildCandidate()
3295- self.assertNotEqual(disabled_job, next_job)
3296-
3297
3298 class TestFindBuildCandidatePrivatePPA(TestFindBuildCandidatePPABase):
3299
3300@@ -610,14 +332,6 @@
3301 build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(next_job)
3302 self.failUnlessEqual('joesppa', build.archive.name)
3303
3304- # If the source for the build is still pending, it won't be
3305- # dispatched because the builder has to fetch the source files
3306- # from the (password protected) repo area, not the librarian.
3307- pub = build.current_source_publication
3308- pub.status = PackagePublishingStatus.PENDING
3309- candidate = removeSecurityProxy(self.builder4)._findBuildCandidate()
3310- self.assertNotEqual(next_job.id, candidate.id)
3311-
3312
3313 class TestFindBuildCandidateDistroArchive(TestFindBuildCandidateBase):
3314
3315@@ -760,48 +474,97 @@
3316 self.builder.current_build_behavior, BinaryPackageBuildBehavior)
3317
3318
3319-class TestSlave(TrialTestCase):
3320+class TestSlave(TestCase):
3321 """
3322 Integration tests for BuilderSlave that verify how it works against a
3323 real slave server.
3324 """
3325
3326- layer = TwistedLayer
3327-
3328- def setUp(self):
3329- super(TestSlave, self).setUp()
3330- self.slave_helper = SlaveTestHelpers()
3331- self.slave_helper.setUp()
3332- self.addCleanup(self.slave_helper.cleanUp)
3333-
3334 # XXX: JonathanLange 2010-09-20 bug=643521: There are also tests for
3335 # BuilderSlave in buildd-slave.txt and in other places. The tests here
3336 # ought to become the canonical tests for BuilderSlave vs running buildd
3337 # XML-RPC server interaction.
3338
3339+ # The URL for the XML-RPC service set up by `BuilddSlaveTestSetup`.
3340+ TEST_URL = 'http://localhost:8221/rpc/'
3341+
3342+ def getServerSlave(self):
3343+ """Set up a test build slave server.
3344+
3345+ :return: A `BuilddSlaveTestSetup` object.
3346+ """
3347+ tachandler = BuilddSlaveTestSetup()
3348+ tachandler.setUp()
3349+ self.addCleanup(tachandler.tearDown)
3350+ def addLogFile(exc_info):
3351+ self.addDetail(
3352+ 'xmlrpc-log-file',
3353+ Content(UTF8_TEXT, lambda: open(tachandler.logfile, 'r').read()))
3354+ self.addOnException(addLogFile)
3355+ return tachandler
3356+
3357+ def getClientSlave(self):
3358+ """Return a `BuilderSlave` for use in testing.
3359+
3360+ Points to a fixed URL that is also used by `BuilddSlaveTestSetup`.
3361+ """
3362+ return BuilderSlave.makeBlockingSlave(self.TEST_URL, 'vmhost')
3363+
3364+ def makeCacheFile(self, tachandler, filename):
3365+ """Make a cache file available on the remote slave.
3366+
3367+ :param tachandler: The TacTestSetup object used to start the remote
3368+ slave.
3369+ :param filename: The name of the file to create in the file cache
3370+ area.
3371+ """
3372+ path = os.path.join(tachandler.root, 'filecache', filename)
3373+ fd = open(path, 'w')
3374+ fd.write('something')
3375+ fd.close()
3376+ self.addCleanup(os.unlink, path)
3377+
3378+ def triggerGoodBuild(self, slave, build_id=None):
3379+ """Trigger a good build on 'slave'.
3380+
3381+ :param slave: A `BuilderSlave` instance to trigger the build on.
3382+ :param build_id: The build identifier. If not specified, defaults to
3383+ an arbitrary string.
3384+ :type build_id: str
3385+ :return: The build id returned by the slave.
3386+ """
3387+ if build_id is None:
3388+ build_id = self.getUniqueString()
3389+ tachandler = self.getServerSlave()
3390+ chroot_file = 'fake-chroot'
3391+ dsc_file = 'thing'
3392+ self.makeCacheFile(tachandler, chroot_file)
3393+ self.makeCacheFile(tachandler, dsc_file)
3394+ return slave.build(
3395+ build_id, 'debian', chroot_file, {'.dsc': dsc_file},
3396+ {'ogrecomponent': 'main'})
3397+
3398 # XXX 2010-10-06 Julian bug=655559
3399 # This is failing on buildbot but not locally; it's trying to abort
3400 # before the build has started.
3401 def disabled_test_abort(self):
3402- slave = self.slave_helper.getClientSlave()
3403+ slave = self.getClientSlave()
3404 # We need to be in a BUILDING state before we can abort.
3405- d = self.slave_helper.triggerGoodBuild(slave)
3406- d.addCallback(lambda ignored: slave.abort())
3407- d.addCallback(self.assertEqual, BuilderStatus.ABORTING)
3408- return d
3409+ self.triggerGoodBuild(slave)
3410+ result = slave.abort()
3411+ self.assertEqual(result, BuilderStatus.ABORTING)
3412
3413 def test_build(self):
3414 # Calling 'build' with an expected builder type, a good build id,
3415 # valid chroot & filemaps works and returns a BuilderStatus of
3416 # BUILDING.
3417 build_id = 'some-id'
3418- slave = self.slave_helper.getClientSlave()
3419- d = self.slave_helper.triggerGoodBuild(slave, build_id)
3420- return d.addCallback(
3421- self.assertEqual, [BuilderStatus.BUILDING, build_id])
3422+ slave = self.getClientSlave()
3423+ result = self.triggerGoodBuild(slave, build_id)
3424+ self.assertEqual([BuilderStatus.BUILDING, build_id], result)
3425
3426 def test_clean(self):
3427- slave = self.slave_helper.getClientSlave()
3428+ slave = self.getClientSlave()
3429 # XXX: JonathanLange 2010-09-21: Calling clean() on the slave requires
3430 # it to be in either the WAITING or ABORTED states, and both of these
3431 # states are very difficult to achieve in a test environment. For the
3432@@ -811,248 +574,57 @@
3433 def test_echo(self):
3434 # Calling 'echo' contacts the server which returns the arguments we
3435 # gave it.
3436- self.slave_helper.getServerSlave()
3437- slave = self.slave_helper.getClientSlave()
3438- d = slave.echo('foo', 'bar', 42)
3439- return d.addCallback(self.assertEqual, ['foo', 'bar', 42])
3440+ self.getServerSlave()
3441+ slave = self.getClientSlave()
3442+ result = slave.echo('foo', 'bar', 42)
3443+ self.assertEqual(['foo', 'bar', 42], result)
3444
3445 def test_info(self):
3446 # Calling 'info' gets some information about the slave.
3447- self.slave_helper.getServerSlave()
3448- slave = self.slave_helper.getClientSlave()
3449- d = slave.info()
3450+ self.getServerSlave()
3451+ slave = self.getClientSlave()
3452+ result = slave.info()
3453 # We're testing the hard-coded values, since the version is hard-coded
3454 # into the remote slave, the supported build managers are hard-coded
3455 # into the tac file for the remote slave and config is returned from
3456 # the configuration file.
3457- return d.addCallback(
3458- self.assertEqual,
3459+ self.assertEqual(
3460 ['1.0',
3461 'i386',
3462 ['sourcepackagerecipe',
3463- 'translation-templates', 'binarypackage', 'debian']])
3464+ 'translation-templates', 'binarypackage', 'debian']],
3465+ result)
3466
3467 def test_initial_status(self):
3468 # Calling 'status' returns the current status of the slave. The
3469 # initial status is IDLE.
3470- self.slave_helper.getServerSlave()
3471- slave = self.slave_helper.getClientSlave()
3472- d = slave.status()
3473- return d.addCallback(self.assertEqual, [BuilderStatus.IDLE, ''])
3474+ self.getServerSlave()
3475+ slave = self.getClientSlave()
3476+ status = slave.status()
3477+ self.assertEqual([BuilderStatus.IDLE, ''], status)
3478
3479 def test_status_after_build(self):
3480 # Calling 'status' returns the current status of the slave. After a
3481 # build has been triggered, the status is BUILDING.
3482- slave = self.slave_helper.getClientSlave()
3483+ slave = self.getClientSlave()
3484 build_id = 'status-build-id'
3485- d = self.slave_helper.triggerGoodBuild(slave, build_id)
3486- d.addCallback(lambda ignored: slave.status())
3487- def check_status(status):
3488- self.assertEqual([BuilderStatus.BUILDING, build_id], status[:2])
3489- [log_file] = status[2:]
3490- self.assertIsInstance(log_file, xmlrpclib.Binary)
3491- return d.addCallback(check_status)
3492+ self.triggerGoodBuild(slave, build_id)
3493+ status = slave.status()
3494+ self.assertEqual([BuilderStatus.BUILDING, build_id], status[:2])
3495+ [log_file] = status[2:]
3496+ self.assertIsInstance(log_file, xmlrpclib.Binary)
3497
3498 def test_ensurepresent_not_there(self):
3499 # ensurepresent checks to see if a file is there.
3500- self.slave_helper.getServerSlave()
3501- slave = self.slave_helper.getClientSlave()
3502- d = slave.ensurepresent('blahblah', None, None, None)
3503- d.addCallback(self.assertEqual, [False, 'No URL'])
3504- return d
3505+ self.getServerSlave()
3506+ slave = self.getClientSlave()
3507+ result = slave.ensurepresent('blahblah', None, None, None)
3508+ self.assertEqual([False, 'No URL'], result)
3509
3510 def test_ensurepresent_actually_there(self):
3511 # ensurepresent checks to see if a file is there.
3512- tachandler = self.slave_helper.getServerSlave()
3513- slave = self.slave_helper.getClientSlave()
3514- self.slave_helper.makeCacheFile(tachandler, 'blahblah')
3515- d = slave.ensurepresent('blahblah', None, None, None)
3516- d.addCallback(self.assertEqual, [True, 'No URL'])
3517- return d
3518-
3519- def test_sendFileToSlave_not_there(self):
3520- self.slave_helper.getServerSlave()
3521- slave = self.slave_helper.getClientSlave()
3522- d = slave.sendFileToSlave('blahblah', None, None, None)
3523- return self.assertFailure(d, CannotFetchFile)
3524-
3525- def test_sendFileToSlave_actually_there(self):
3526- tachandler = self.slave_helper.getServerSlave()
3527- slave = self.slave_helper.getClientSlave()
3528- self.slave_helper.makeCacheFile(tachandler, 'blahblah')
3529- d = slave.sendFileToSlave('blahblah', None, None, None)
3530- def check_present(ignored):
3531- d = slave.ensurepresent('blahblah', None, None, None)
3532- return d.addCallback(self.assertEqual, [True, 'No URL'])
3533- d.addCallback(check_present)
3534- return d
3535-
3536- def test_resumeHost_success(self):
3537- # On a successful resume resume() fires the returned deferred
3538- # callback with 'None'.
3539- self.slave_helper.getServerSlave()
3540- slave = self.slave_helper.getClientSlave()
3541-
3542- # The configuration testing command-line.
3543- self.assertEqual(
3544- 'echo %(vm_host)s', config.builddmaster.vm_resume_command)
3545-
3546- # On success the response is None.
3547- def check_resume_success(response):
3548- out, err, code = response
3549- self.assertEqual(os.EX_OK, code)
3550- # XXX: JonathanLange 2010-09-23: We should instead pass the
3551- # expected vm_host into the client slave. Not doing this now,
3552- # since the SlaveHelper is being moved around.
3553- self.assertEqual("%s\n" % slave._vm_host, out)
3554- d = slave.resume()
3555- d.addBoth(check_resume_success)
3556- return d
3557-
3558- def test_resumeHost_failure(self):
3559- # On a failed resume, 'resumeHost' fires the returned deferred
3560- # errorback with the `ProcessTerminated` failure.
3561- self.slave_helper.getServerSlave()
3562- slave = self.slave_helper.getClientSlave()
3563-
3564- # Override the configuration command-line with one that will fail.
3565- failed_config = """
3566- [builddmaster]
3567- vm_resume_command: test "%(vm_host)s = 'no-sir'"
3568- """
3569- config.push('failed_resume_command', failed_config)
3570- self.addCleanup(config.pop, 'failed_resume_command')
3571-
3572- # On failures, the response is a twisted `Failure` object containing
3573- # a tuple.
3574- def check_resume_failure(failure):
3575- out, err, code = failure.value
3576- # The process will exit with a return code of "1".
3577- self.assertEqual(code, 1)
3578- d = slave.resume()
3579- d.addBoth(check_resume_failure)
3580- return d
3581-
3582- def test_resumeHost_timeout(self):
3583- # On a resume timeouts, 'resumeHost' fires the returned deferred
3584- # errorback with the `TimeoutError` failure.
3585- self.slave_helper.getServerSlave()
3586- slave = self.slave_helper.getClientSlave()
3587-
3588- # Override the configuration command-line with one that will timeout.
3589- timeout_config = """
3590- [builddmaster]
3591- vm_resume_command: sleep 5
3592- socket_timeout: 1
3593- """
3594- config.push('timeout_resume_command', timeout_config)
3595- self.addCleanup(config.pop, 'timeout_resume_command')
3596-
3597- # On timeouts, the response is a twisted `Failure` object containing
3598- # a `TimeoutError` error.
3599- def check_resume_timeout(failure):
3600- self.assertIsInstance(failure, Failure)
3601- out, err, code = failure.value
3602- self.assertEqual(code, signal.SIGKILL)
3603- clock = Clock()
3604- d = slave.resume(clock=clock)
3605- # Move the clock beyond the socket_timeout but earlier than the
3606- # sleep 5. This stops the test having to wait for the timeout.
3607- # Fast tests FTW!
3608- clock.advance(2)
3609- d.addBoth(check_resume_timeout)
3610- return d
3611-
3612-
3613-class TestSlaveTimeouts(TrialTestCase):
3614- # Testing that the methods that call callRemote() all time out
3615- # as required.
3616-
3617- layer = TwistedLayer
3618-
3619- def setUp(self):
3620- super(TestSlaveTimeouts, self).setUp()
3621- self.slave_helper = SlaveTestHelpers()
3622- self.slave_helper.setUp()
3623- self.addCleanup(self.slave_helper.cleanUp)
3624- self.clock = Clock()
3625- self.proxy = DeadProxy("url")
3626- self.slave = self.slave_helper.getClientSlave(
3627- reactor=self.clock, proxy=self.proxy)
3628-
3629- def assertCancelled(self, d):
3630- self.clock.advance(config.builddmaster.socket_timeout + 1)
3631- return self.assertFailure(d, CancelledError)
3632-
3633- def test_timeout_abort(self):
3634- return self.assertCancelled(self.slave.abort())
3635-
3636- def test_timeout_clean(self):
3637- return self.assertCancelled(self.slave.clean())
3638-
3639- def test_timeout_echo(self):
3640- return self.assertCancelled(self.slave.echo())
3641-
3642- def test_timeout_info(self):
3643- return self.assertCancelled(self.slave.info())
3644-
3645- def test_timeout_status(self):
3646- return self.assertCancelled(self.slave.status())
3647-
3648- def test_timeout_ensurepresent(self):
3649- return self.assertCancelled(
3650- self.slave.ensurepresent(None, None, None, None))
3651-
3652- def test_timeout_build(self):
3653- return self.assertCancelled(
3654- self.slave.build(None, None, None, None, None))
3655-
3656-
3657-class TestSlaveWithLibrarian(TrialTestCase):
3658- """Tests that need more of Launchpad to run."""
3659-
3660- layer = TwistedLaunchpadZopelessLayer
3661-
3662- def setUp(self):
3663- super(TestSlaveWithLibrarian, self)
3664- self.slave_helper = SlaveTestHelpers()
3665- self.slave_helper.setUp()
3666- self.addCleanup(self.slave_helper.cleanUp)
3667- self.factory = LaunchpadObjectFactory()
3668- login_as(ANONYMOUS)
3669- self.addCleanup(logout)
3670-
3671- def test_ensurepresent_librarian(self):
3672- # ensurepresent, when given an http URL for a file will download the
3673- # file from that URL and report that the file is present, and it was
3674- # downloaded.
3675-
3676- # Use the Librarian because it's a "convenient" web server.
3677- lf = self.factory.makeLibraryFileAlias(
3678- 'HelloWorld.txt', content="Hello World")
3679- self.layer.txn.commit()
3680- self.slave_helper.getServerSlave()
3681- slave = self.slave_helper.getClientSlave()
3682- d = slave.ensurepresent(
3683- lf.content.sha1, lf.http_url, "", "")
3684- d.addCallback(self.assertEqual, [True, 'Download'])
3685- return d
3686-
3687- def test_retrieve_files_from_filecache(self):
3688- # Files that are present on the slave can be downloaded with a
3689- # filename made from the sha1 of the content underneath the
3690- # 'filecache' directory.
3691- content = "Hello World"
3692- lf = self.factory.makeLibraryFileAlias(
3693- 'HelloWorld.txt', content=content)
3694- self.layer.txn.commit()
3695- expected_url = '%s/filecache/%s' % (
3696- self.slave_helper.BASE_URL, lf.content.sha1)
3697- self.slave_helper.getServerSlave()
3698- slave = self.slave_helper.getClientSlave()
3699- d = slave.ensurepresent(
3700- lf.content.sha1, lf.http_url, "", "")
3701- def check_file(ignored):
3702- d = getPage(expected_url.encode('utf8'))
3703- return d.addCallback(self.assertEqual, content)
3704- return d.addCallback(check_file)
3705+ tachandler = self.getServerSlave()
3706+ slave = self.getClientSlave()
3707+ self.makeCacheFile(tachandler, 'blahblah')
3708+ result = slave.ensurepresent('blahblah', None, None, None)
3709+ self.assertEqual([True, 'No URL'], result)
3710
3711=== modified file 'lib/lp/buildmaster/tests/test_manager.py'
3712--- lib/lp/buildmaster/tests/test_manager.py 2010-10-19 13:58:21 +0000
3713+++ lib/lp/buildmaster/tests/test_manager.py 2010-12-07 16:29:13 +0000
3714@@ -6,7 +6,6 @@
3715 import os
3716 import signal
3717 import time
3718-import xmlrpclib
3719
3720 import transaction
3721
3722@@ -15,7 +14,9 @@
3723 reactor,
3724 task,
3725 )
3726+from twisted.internet.error import ConnectionClosed
3727 from twisted.internet.task import (
3728+ Clock,
3729 deferLater,
3730 )
3731 from twisted.python.failure import Failure
3732@@ -29,45 +30,577 @@
3733 ANONYMOUS,
3734 login,
3735 )
3736-from canonical.launchpad.scripts.logger import (
3737- QuietFakeLogger,
3738- )
3739+from canonical.launchpad.scripts.logger import BufferLogger
3740 from canonical.testing.layers import (
3741 LaunchpadScriptLayer,
3742- TwistedLaunchpadZopelessLayer,
3743+ LaunchpadZopelessLayer,
3744 TwistedLayer,
3745- ZopelessDatabaseLayer,
3746 )
3747 from lp.buildmaster.enums import BuildStatus
3748 from lp.buildmaster.interfaces.builder import IBuilderSet
3749 from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet
3750 from lp.buildmaster.manager import (
3751- assessFailureCounts,
3752+ BaseDispatchResult,
3753+ buildd_success_result_map,
3754 BuilddManager,
3755+ FailDispatchResult,
3756 NewBuildersScanner,
3757+ RecordingSlave,
3758+ ResetDispatchResult,
3759 SlaveScanner,
3760 )
3761-from lp.buildmaster.model.builder import Builder
3762 from lp.buildmaster.tests.harness import BuilddManagerTestSetup
3763-from lp.buildmaster.tests.mock_slaves import (
3764- BrokenSlave,
3765- BuildingSlave,
3766- OkSlave,
3767- )
3768+from lp.buildmaster.tests.mock_slaves import BuildingSlave
3769 from lp.registry.interfaces.distribution import IDistributionSet
3770 from lp.soyuz.interfaces.binarypackagebuild import IBinaryPackageBuildSet
3771-from lp.testing import TestCaseWithFactory
3772+from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
3773+from lp.testing import TestCase as LaunchpadTestCase
3774 from lp.testing.factory import LaunchpadObjectFactory
3775 from lp.testing.fakemethod import FakeMethod
3776 from lp.testing.sampledata import BOB_THE_BUILDER_NAME
3777
3778
3779+class TestRecordingSlaves(TrialTestCase):
3780+ """Tests for the recording slave class."""
3781+ layer = TwistedLayer
3782+
3783+ def setUp(self):
3784+ """Setup a fresh `RecordingSlave` for tests."""
3785+ TrialTestCase.setUp(self)
3786+ self.slave = RecordingSlave(
3787+ 'foo', 'http://foo:8221/rpc', 'foo.host')
3788+
3789+ def test_representation(self):
3790+ """`RecordingSlave` has a custom representation.
3791+
3792+ It encloses builder name and xmlrpc url for debug purposes.
3793+ """
3794+ self.assertEqual('<foo:http://foo:8221/rpc>', repr(self.slave))
3795+
3796+ def assert_ensurepresent(self, func):
3797+ """Helper function to test results from calling ensurepresent."""
3798+ self.assertEqual(
3799+ [True, 'Download'],
3800+ func('boing', 'bar', 'baz'))
3801+ self.assertEqual(
3802+ [('ensurepresent', ('boing', 'bar', 'baz'))],
3803+ self.slave.calls)
3804+
3805+ def test_ensurepresent(self):
3806+ """`RecordingSlave.ensurepresent` always succeeds.
3807+
3808+ It returns the expected succeed code and records the interaction
3809+ information for later use.
3810+ """
3811+ self.assert_ensurepresent(self.slave.ensurepresent)
3812+
3813+ def test_sendFileToSlave(self):
3814+ """RecordingSlave.sendFileToSlave always succeeeds.
3815+
3816+ It calls ensurepresent() and hence returns the same results.
3817+ """
3818+ self.assert_ensurepresent(self.slave.sendFileToSlave)
3819+
3820+ def test_build(self):
3821+ """`RecordingSlave.build` always succeeds.
3822+
3823+ It returns the expected succeed code and records the interaction
3824+ information for later use.
3825+ """
3826+ self.assertEqual(
3827+ ['BuilderStatus.BUILDING', 'boing'],
3828+ self.slave.build('boing', 'bar', 'baz'))
3829+ self.assertEqual(
3830+ [('build', ('boing', 'bar', 'baz'))],
3831+ self.slave.calls)
3832+
3833+ def test_resume(self):
3834+ """`RecordingSlave.resume` always returns successs."""
3835+ # Resume isn't requested in a just-instantiated RecordingSlave.
3836+ self.assertFalse(self.slave.resume_requested)
3837+
3838+ # When resume is called, it returns the success list and mark
3839+ # the slave for resuming.
3840+ self.assertEqual(['', '', os.EX_OK], self.slave.resume())
3841+ self.assertTrue(self.slave.resume_requested)
3842+
3843+ def test_resumeHost_success(self):
3844+ # On a successful resume resumeHost() fires the returned deferred
3845+ # callback with 'None'.
3846+
3847+ # The configuration testing command-line.
3848+ self.assertEqual(
3849+ 'echo %(vm_host)s', config.builddmaster.vm_resume_command)
3850+
3851+ # On success the response is None.
3852+ def check_resume_success(response):
3853+ out, err, code = response
3854+ self.assertEqual(os.EX_OK, code)
3855+ self.assertEqual("%s\n" % self.slave.vm_host, out)
3856+ d = self.slave.resumeSlave()
3857+ d.addBoth(check_resume_success)
3858+ return d
3859+
3860+ def test_resumeHost_failure(self):
3861+ # On a failed resume, 'resumeHost' fires the returned deferred
3862+ # errorback with the `ProcessTerminated` failure.
3863+
3864+ # Override the configuration command-line with one that will fail.
3865+ failed_config = """
3866+ [builddmaster]
3867+ vm_resume_command: test "%(vm_host)s = 'no-sir'"
3868+ """
3869+ config.push('failed_resume_command', failed_config)
3870+ self.addCleanup(config.pop, 'failed_resume_command')
3871+
3872+ # On failures, the response is a twisted `Failure` object containing
3873+ # a tuple.
3874+ def check_resume_failure(failure):
3875+ out, err, code = failure.value
3876+ # The process will exit with a return code of "1".
3877+ self.assertEqual(code, 1)
3878+ d = self.slave.resumeSlave()
3879+ d.addBoth(check_resume_failure)
3880+ return d
3881+
3882+ def test_resumeHost_timeout(self):
3883+ # On a resume timeouts, 'resumeHost' fires the returned deferred
3884+ # errorback with the `TimeoutError` failure.
3885+
3886+ # Override the configuration command-line with one that will timeout.
3887+ timeout_config = """
3888+ [builddmaster]
3889+ vm_resume_command: sleep 5
3890+ socket_timeout: 1
3891+ """
3892+ config.push('timeout_resume_command', timeout_config)
3893+ self.addCleanup(config.pop, 'timeout_resume_command')
3894+
3895+ # On timeouts, the response is a twisted `Failure` object containing
3896+ # a `TimeoutError` error.
3897+ def check_resume_timeout(failure):
3898+ self.assertIsInstance(failure, Failure)
3899+ out, err, code = failure.value
3900+ self.assertEqual(code, signal.SIGKILL)
3901+ clock = Clock()
3902+ d = self.slave.resumeSlave(clock=clock)
3903+ # Move the clock beyond the socket_timeout but earlier than the
3904+ # sleep 5. This stops the test having to wait for the timeout.
3905+ # Fast tests FTW!
3906+ clock.advance(2)
3907+ d.addBoth(check_resume_timeout)
3908+ return d
3909+
3910+
3911+class TestingXMLRPCProxy:
3912+ """This class mimics a twisted XMLRPC Proxy class."""
3913+
3914+ def __init__(self, failure_info=None):
3915+ self.calls = []
3916+ self.failure_info = failure_info
3917+ self.works = failure_info is None
3918+
3919+ def callRemote(self, *args):
3920+ self.calls.append(args)
3921+ if self.works:
3922+ result = buildd_success_result_map.get(args[0])
3923+ else:
3924+ result = 'boing'
3925+ return defer.succeed([result, self.failure_info])
3926+
3927+
3928+class TestingResetDispatchResult(ResetDispatchResult):
3929+ """Override the evaluation method to simply annotate the call."""
3930+
3931+ def __init__(self, slave, info=None):
3932+ ResetDispatchResult.__init__(self, slave, info)
3933+ self.processed = False
3934+
3935+ def __call__(self):
3936+ self.processed = True
3937+
3938+
3939+class TestingFailDispatchResult(FailDispatchResult):
3940+ """Override the evaluation method to simply annotate the call."""
3941+
3942+ def __init__(self, slave, info=None):
3943+ FailDispatchResult.__init__(self, slave, info)
3944+ self.processed = False
3945+
3946+ def __call__(self):
3947+ self.processed = True
3948+
3949+
3950+class TestingSlaveScanner(SlaveScanner):
3951+ """Override the dispatch result factories """
3952+
3953+ reset_result = TestingResetDispatchResult
3954+ fail_result = TestingFailDispatchResult
3955+
3956+
3957+class TestSlaveScanner(TrialTestCase):
3958+ """Tests for the actual build slave manager."""
3959+ layer = LaunchpadZopelessLayer
3960+
3961+ def setUp(self):
3962+ TrialTestCase.setUp(self)
3963+ self.manager = TestingSlaveScanner(
3964+ BOB_THE_BUILDER_NAME, BufferLogger())
3965+
3966+ self.fake_builder_url = 'http://bob.buildd:8221/'
3967+ self.fake_builder_host = 'bob.host'
3968+
3969+ # We will use an instrumented SlaveScanner instance for tests in
3970+ # this context.
3971+
3972+ # Stop cyclic execution and record the end of the cycle.
3973+ self.stopped = False
3974+
3975+ def testNextCycle():
3976+ self.stopped = True
3977+
3978+ self.manager.scheduleNextScanCycle = testNextCycle
3979+
3980+ # Return the testing Proxy version.
3981+ self.test_proxy = TestingXMLRPCProxy()
3982+
3983+ def testGetProxyForSlave(slave):
3984+ return self.test_proxy
3985+ self.manager._getProxyForSlave = testGetProxyForSlave
3986+
3987+ # Deactivate the 'scan' method.
3988+ def testScan():
3989+ pass
3990+ self.manager.scan = testScan
3991+
3992+ # Stop automatic collection of dispatching results.
3993+ def testslaveConversationEnded():
3994+ pass
3995+ self._realslaveConversationEnded = self.manager.slaveConversationEnded
3996+ self.manager.slaveConversationEnded = testslaveConversationEnded
3997+
3998+ def assertIsDispatchReset(self, result):
3999+ self.assertTrue(
4000+ isinstance(result, TestingResetDispatchResult),
4001+ 'Dispatch failure did not result in a ResetBuildResult object')
4002+
4003+ def assertIsDispatchFail(self, result):
4004+ self.assertTrue(
4005+ isinstance(result, TestingFailDispatchResult),
4006+ 'Dispatch failure did not result in a FailBuildResult object')
4007+
4008+ def test_checkResume(self):
4009+ """`SlaveScanner.checkResume` is chained after resume requests.
4010+
4011+ If the resume request succeed it returns None, otherwise it returns
4012+ a `ResetBuildResult` (the one in the test context) that will be
4013+ collect and evaluated later.
4014+
4015+ See `RecordingSlave.resumeHost` for more information about the resume
4016+ result contents.
4017+ """
4018+ slave = RecordingSlave('foo', 'http://foo.buildd:8221/', 'foo.host')
4019+
4020+ successful_response = ['', '', os.EX_OK]
4021+ result = self.manager.checkResume(successful_response, slave)
4022+ self.assertEqual(
4023+ None, result, 'Successful resume checks should return None')
4024+
4025+ failed_response = ['stdout', 'stderr', 1]
4026+ result = self.manager.checkResume(failed_response, slave)
4027+ self.assertIsDispatchReset(result)
4028+ self.assertEqual(
4029+ '<foo:http://foo.buildd:8221/> reset failure', repr(result))
4030+ self.assertEqual(
4031+ result.info, "stdout\nstderr")
4032+
4033+ def test_fail_to_resume_slave_resets_slave(self):
4034+ # If an attempt to resume and dispatch a slave fails, we reset the
4035+ # slave by calling self.reset_result(slave)().
4036+
4037+ reset_result_calls = []
4038+
4039+ class LoggingResetResult(BaseDispatchResult):
4040+ """A DispatchResult that logs calls to itself.
4041+
4042+ This *must* subclass BaseDispatchResult, otherwise finishCycle()
4043+ won't treat it like a dispatch result.
4044+ """
4045+
4046+ def __init__(self, slave, info=None):
4047+ self.slave = slave
4048+
4049+ def __call__(self):
4050+ reset_result_calls.append(self.slave)
4051+
4052+ # Make a failing slave that is requesting a resume.
4053+ slave = RecordingSlave('foo', 'http://foo.buildd:8221/', 'foo.host')
4054+ slave.resume_requested = True
4055+ slave.resumeSlave = lambda: deferLater(
4056+ reactor, 0, defer.fail, Failure(('out', 'err', 1)))
4057+
4058+ # Make the manager log the reset result calls.
4059+ self.manager.reset_result = LoggingResetResult
4060+
4061+ # We only care about this one slave. Reset the list of manager
4062+ # deferreds in case setUp did something unexpected.
4063+ self.manager._deferred_list = []
4064+
4065+ # Here, we're patching the slaveConversationEnded method so we can
4066+ # get an extra callback at the end of it, so we can
4067+ # verify that the reset_result was really called.
4068+ def _slaveConversationEnded():
4069+ d = self._realslaveConversationEnded()
4070+ return d.addCallback(
4071+ lambda ignored: self.assertEqual([slave], reset_result_calls))
4072+ self.manager.slaveConversationEnded = _slaveConversationEnded
4073+
4074+ self.manager.resumeAndDispatch(slave)
4075+
4076+ def test_failed_to_resume_slave_ready_for_reset(self):
4077+ # When a slave fails to resume, the manager has a Deferred in its
4078+ # Deferred list that is ready to fire with a ResetDispatchResult.
4079+
4080+ # Make a failing slave that is requesting a resume.
4081+ slave = RecordingSlave('foo', 'http://foo.buildd:8221/', 'foo.host')
4082+ slave.resume_requested = True
4083+ slave.resumeSlave = lambda: defer.fail(Failure(('out', 'err', 1)))
4084+
4085+ # We only care about this one slave. Reset the list of manager
4086+ # deferreds in case setUp did something unexpected.
4087+ self.manager._deferred_list = []
4088+ # Restore the slaveConversationEnded method. It's very relevant to
4089+ # this test.
4090+ self.manager.slaveConversationEnded = self._realslaveConversationEnded
4091+ self.manager.resumeAndDispatch(slave)
4092+ [d] = self.manager._deferred_list
4093+
4094+ # The Deferred for our failing slave should be ready to fire
4095+ # successfully with a ResetDispatchResult.
4096+ def check_result(result):
4097+ self.assertIsInstance(result, ResetDispatchResult)
4098+ self.assertEqual(slave, result.slave)
4099+ self.assertFalse(result.processed)
4100+ return d.addCallback(check_result)
4101+
4102+ def _setUpSlaveAndBuilder(self, builder_failure_count=None,
4103+ job_failure_count=None):
4104+ # Helper function to set up a builder and its recording slave.
4105+ if builder_failure_count is None:
4106+ builder_failure_count = 0
4107+ if job_failure_count is None:
4108+ job_failure_count = 0
4109+ slave = RecordingSlave(
4110+ BOB_THE_BUILDER_NAME, self.fake_builder_url,
4111+ self.fake_builder_host)
4112+ bob_builder = getUtility(IBuilderSet)[slave.name]
4113+ bob_builder.failure_count = builder_failure_count
4114+ bob_builder.getCurrentBuildFarmJob().failure_count = job_failure_count
4115+ return slave, bob_builder
4116+
4117+ def test_checkDispatch_success(self):
4118+ # SlaveScanner.checkDispatch returns None for a successful
4119+ # dispatch.
4120+
4121+ """
4122+ If the dispatch request fails or a unknown method is given, it
4123+ returns a `FailDispatchResult` (in the test context) that will
4124+ be evaluated later.
4125+
4126+ Builders will be marked as failed if the following responses
4127+ categories are received.
4128+
4129+ * Legitimate slave failures: when the response is a list with 2
4130+ elements but the first element ('status') does not correspond to
4131+ the expected 'success' result. See `buildd_success_result_map`.
4132+
4133+ * Unexpected (code) failures: when the given 'method' is unknown
4134+ or the response isn't a 2-element list or Failure instance.
4135+
4136+ Communication failures (a twisted `Failure` instance) will simply
4137+ cause the builder to be reset, a `ResetDispatchResult` object is
4138+ returned. In other words, network failures are ignored in this
4139+ stage, broken builders will be identified and marked as so
4140+ during 'scan()' stage.
4141+
4142+ On success dispatching it returns None.
4143+ """
4144+ slave, bob_builder = self._setUpSlaveAndBuilder(
4145+ builder_failure_count=0, job_failure_count=0)
4146+
4147+ # Successful legitimate response, None is returned.
4148+ successful_response = [
4149+ buildd_success_result_map.get('ensurepresent'), 'cool builder']
4150+ result = self.manager.checkDispatch(
4151+ successful_response, 'ensurepresent', slave)
4152+ self.assertEqual(
4153+ None, result, 'Successful dispatch checks should return None')
4154+
4155+ def test_checkDispatch_first_fail(self):
4156+ # Failed legitimate response, results in FailDispatchResult and
4157+ # failure_count on the job and the builder are both incremented.
4158+ slave, bob_builder = self._setUpSlaveAndBuilder(
4159+ builder_failure_count=0, job_failure_count=0)
4160+
4161+ failed_response = [False, 'uncool builder']
4162+ result = self.manager.checkDispatch(
4163+ failed_response, 'ensurepresent', slave)
4164+ self.assertIsDispatchFail(result)
4165+ self.assertEqual(
4166+ repr(result),
4167+ '<bob:%s> failure (uncool builder)' % self.fake_builder_url)
4168+ self.assertEqual(1, bob_builder.failure_count)
4169+ self.assertEqual(
4170+ 1, bob_builder.getCurrentBuildFarmJob().failure_count)
4171+
4172+ def test_checkDispatch_second_reset_fail_by_builder(self):
4173+ # Twisted Failure response, results in a `FailDispatchResult`.
4174+ slave, bob_builder = self._setUpSlaveAndBuilder(
4175+ builder_failure_count=1, job_failure_count=0)
4176+
4177+ twisted_failure = Failure(ConnectionClosed('Boom!'))
4178+ result = self.manager.checkDispatch(
4179+ twisted_failure, 'ensurepresent', slave)
4180+ self.assertIsDispatchFail(result)
4181+ self.assertEqual(
4182+ '<bob:%s> failure (None)' % self.fake_builder_url, repr(result))
4183+ self.assertEqual(2, bob_builder.failure_count)
4184+ self.assertEqual(
4185+ 1, bob_builder.getCurrentBuildFarmJob().failure_count)
4186+
4187+ def test_checkDispatch_second_comms_fail_by_builder(self):
4188+ # Unexpected response, results in a `FailDispatchResult`.
4189+ slave, bob_builder = self._setUpSlaveAndBuilder(
4190+ builder_failure_count=1, job_failure_count=0)
4191+
4192+ unexpected_response = [1, 2, 3]
4193+ result = self.manager.checkDispatch(
4194+ unexpected_response, 'build', slave)
4195+ self.assertIsDispatchFail(result)
4196+ self.assertEqual(
4197+ '<bob:%s> failure '
4198+ '(Unexpected response: [1, 2, 3])' % self.fake_builder_url,
4199+ repr(result))
4200+ self.assertEqual(2, bob_builder.failure_count)
4201+ self.assertEqual(
4202+ 1, bob_builder.getCurrentBuildFarmJob().failure_count)
4203+
4204+ def test_checkDispatch_second_comms_fail_by_job(self):
4205+ # Unknown method was given, results in a `FailDispatchResult`.
4206+ # This could be caused by a faulty job which would fail the job.
4207+ slave, bob_builder = self._setUpSlaveAndBuilder(
4208+ builder_failure_count=0, job_failure_count=1)
4209+
4210+ successful_response = [
4211+ buildd_success_result_map.get('ensurepresent'), 'cool builder']
4212+ result = self.manager.checkDispatch(
4213+ successful_response, 'unknown-method', slave)
4214+ self.assertIsDispatchFail(result)
4215+ self.assertEqual(
4216+ '<bob:%s> failure '
4217+ '(Unknown slave method: unknown-method)' % self.fake_builder_url,
4218+ repr(result))
4219+ self.assertEqual(1, bob_builder.failure_count)
4220+ self.assertEqual(
4221+ 2, bob_builder.getCurrentBuildFarmJob().failure_count)
4222+
4223+ def test_initiateDispatch(self):
4224+ """Check `dispatchBuild` in various scenarios.
4225+
4226+ When there are no recording slaves (i.e. no build got dispatched
4227+ in scan()) it simply finishes the cycle.
4228+
4229+ When there is a recording slave with pending slave calls, they are
4230+ performed and if they all succeed the cycle is finished with no
4231+ errors.
4232+
4233+ On slave call failure the chain is stopped immediately and an
4234+ FailDispatchResult is collected while finishing the cycle.
4235+ """
4236+ def check_no_events(results):
4237+ errors = [
4238+ r for s, r in results if isinstance(r, BaseDispatchResult)]
4239+ self.assertEqual(0, len(errors))
4240+
4241+ def check_events(results):
4242+ [error] = [r for s, r in results if r is not None]
4243+ self.assertEqual(
4244+ '<bob:%s> failure (very broken slave)'
4245+ % self.fake_builder_url,
4246+ repr(error))
4247+ self.assertTrue(error.processed)
4248+
4249+ def _wait_on_deferreds_then_check_no_events():
4250+ dl = self._realslaveConversationEnded()
4251+ dl.addCallback(check_no_events)
4252+
4253+ def _wait_on_deferreds_then_check_events():
4254+ dl = self._realslaveConversationEnded()
4255+ dl.addCallback(check_events)
4256+
4257+ # A functional slave charged with some interactions.
4258+ slave = RecordingSlave(
4259+ BOB_THE_BUILDER_NAME, self.fake_builder_url,
4260+ self.fake_builder_host)
4261+ slave.ensurepresent('arg1', 'arg2', 'arg3')
4262+ slave.build('arg1', 'arg2', 'arg3')
4263+
4264+ # If the previous step (resuming) has failed nothing gets dispatched.
4265+ reset_result = ResetDispatchResult(slave)
4266+ result = self.manager.initiateDispatch(reset_result, slave)
4267+ self.assertTrue(result is reset_result)
4268+ self.assertFalse(slave.resume_requested)
4269+ self.assertEqual(0, len(self.manager._deferred_list))
4270+
4271+ # Operation with the default (funcional slave), no resets or
4272+ # failures results are triggered.
4273+ slave.resume()
4274+ result = self.manager.initiateDispatch(None, slave)
4275+ self.assertEqual(None, result)
4276+ self.assertTrue(slave.resume_requested)
4277+ self.assertEqual(
4278+ [('ensurepresent', 'arg1', 'arg2', 'arg3'),
4279+ ('build', 'arg1', 'arg2', 'arg3')],
4280+ self.test_proxy.calls)
4281+ self.assertEqual(2, len(self.manager._deferred_list))
4282+
4283+ # Monkey patch the slaveConversationEnded method so we can chain a
4284+ # callback to check the end of the result chain.
4285+ self.manager.slaveConversationEnded = \
4286+ _wait_on_deferreds_then_check_no_events
4287+ events = self.manager.slaveConversationEnded()
4288+
4289+ # Create a broken slave and insert interaction that will
4290+ # cause the builder to be marked as fail.
4291+ self.test_proxy = TestingXMLRPCProxy('very broken slave')
4292+ slave = RecordingSlave(
4293+ BOB_THE_BUILDER_NAME, self.fake_builder_url,
4294+ self.fake_builder_host)
4295+ slave.ensurepresent('arg1', 'arg2', 'arg3')
4296+ slave.build('arg1', 'arg2', 'arg3')
4297+
4298+ result = self.manager.initiateDispatch(None, slave)
4299+ self.assertEqual(None, result)
4300+ self.assertEqual(3, len(self.manager._deferred_list))
4301+ self.assertEqual(
4302+ [('ensurepresent', 'arg1', 'arg2', 'arg3')],
4303+ self.test_proxy.calls)
4304+
4305+ # Monkey patch the slaveConversationEnded method so we can chain a
4306+ # callback to check the end of the result chain.
4307+ self.manager.slaveConversationEnded = \
4308+ _wait_on_deferreds_then_check_events
4309+ events = self.manager.slaveConversationEnded()
4310+
4311+ return events
4312+
4313+
4314 class TestSlaveScannerScan(TrialTestCase):
4315 """Tests `SlaveScanner.scan` method.
4316
4317 This method uses the old framework for scanning and dispatching builds.
4318 """
4319- layer = TwistedLaunchpadZopelessLayer
4320+ layer = LaunchpadZopelessLayer
4321
4322 def setUp(self):
4323 """Setup TwistedLayer, TrialTestCase and BuilddSlaveTest.
4324@@ -75,18 +608,19 @@
4325 Also adjust the sampledata in a way a build can be dispatched to
4326 'bob' builder.
4327 """
4328- from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
4329 TwistedLayer.testSetUp()
4330 TrialTestCase.setUp(self)
4331 self.slave = BuilddSlaveTestSetup()
4332 self.slave.setUp()
4333
4334 # Creating the required chroots needed for dispatching.
4335+ login('foo.bar@canonical.com')
4336 test_publisher = SoyuzTestPublisher()
4337 ubuntu = getUtility(IDistributionSet).getByName('ubuntu')
4338 hoary = ubuntu.getSeries('hoary')
4339 test_publisher.setUpDefaultDistroSeries(hoary)
4340 test_publisher.addFakeChroots()
4341+ login(ANONYMOUS)
4342
4343 def tearDown(self):
4344 self.slave.tearDown()
4345@@ -94,7 +628,8 @@
4346 TwistedLayer.testTearDown()
4347
4348 def _resetBuilder(self, builder):
4349- """Reset the given builder and its job."""
4350+ """Reset the given builder and it's job."""
4351+ login('foo.bar@canonical.com')
4352
4353 builder.builderok = True
4354 job = builder.currentjob
4355@@ -102,6 +637,7 @@
4356 job.reset()
4357
4358 transaction.commit()
4359+ login(ANONYMOUS)
4360
4361 def assertBuildingJob(self, job, builder, logtail=None):
4362 """Assert the given job is building on the given builder."""
4363@@ -117,25 +653,55 @@
4364 self.assertEqual(build.status, BuildStatus.BUILDING)
4365 self.assertEqual(job.logtail, logtail)
4366
4367- def _getScanner(self, builder_name=None):
4368+ def _getManager(self):
4369 """Instantiate a SlaveScanner object.
4370
4371 Replace its default logging handler by a testing version.
4372 """
4373- if builder_name is None:
4374- builder_name = BOB_THE_BUILDER_NAME
4375- scanner = SlaveScanner(builder_name, QuietFakeLogger())
4376- scanner.logger.name = 'slave-scanner'
4377+ manager = SlaveScanner(BOB_THE_BUILDER_NAME, BufferLogger())
4378+ manager.logger.name = 'slave-scanner'
4379
4380- return scanner
4381+ return manager
4382
4383 def _checkDispatch(self, slave, builder):
4384- # SlaveScanner.scan returns a slave when a dispatch was
4385- # successful. We also check that the builder has a job on it.
4386-
4387- self.assertTrue(slave is not None, "Expected a slave.")
4388+ """`SlaveScanner.scan` returns a `RecordingSlave`.
4389+
4390+ The single slave returned should match the given builder and
4391+ contain interactions that should be performed asynchronously for
4392+ properly dispatching the sampledata job.
4393+ """
4394+ self.assertFalse(
4395+ slave is None, "Unexpected recording_slaves.")
4396+
4397+ self.assertEqual(slave.name, builder.name)
4398+ self.assertEqual(slave.url, builder.url)
4399+ self.assertEqual(slave.vm_host, builder.vm_host)
4400 self.assertEqual(0, builder.failure_count)
4401- self.assertTrue(builder.currentjob is not None)
4402+
4403+ self.assertEqual(
4404+ [('ensurepresent',
4405+ ('0feca720e2c29dafb2c900713ba560e03b758711',
4406+ 'http://localhost:58000/93/fake_chroot.tar.gz',
4407+ '', '')),
4408+ ('ensurepresent',
4409+ ('4e3961baf4f56fdbc95d0dd47f3c5bc275da8a33',
4410+ 'http://localhost:58000/43/alsa-utils_1.0.9a-4ubuntu1.dsc',
4411+ '', '')),
4412+ ('build',
4413+ ('6358a89e2215e19b02bf91e2e4d009640fae5cf8',
4414+ 'binarypackage', '0feca720e2c29dafb2c900713ba560e03b758711',
4415+ {'alsa-utils_1.0.9a-4ubuntu1.dsc':
4416+ '4e3961baf4f56fdbc95d0dd47f3c5bc275da8a33'},
4417+ {'arch_indep': True,
4418+ 'arch_tag': 'i386',
4419+ 'archive_private': False,
4420+ 'archive_purpose': 'PRIMARY',
4421+ 'archives':
4422+ ['deb http://ftpmaster.internal/ubuntu hoary main'],
4423+ 'build_debug_symbols': False,
4424+ 'ogrecomponent': 'main',
4425+ 'suite': u'hoary'}))],
4426+ slave.calls, "Job was not properly dispatched.")
4427
4428 def testScanDispatchForResetBuilder(self):
4429 # A job gets dispatched to the sampledata builder after it's reset.
4430@@ -143,27 +709,26 @@
4431 # Reset sampledata builder.
4432 builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
4433 self._resetBuilder(builder)
4434- builder.setSlaveForTesting(OkSlave())
4435 # Set this to 1 here so that _checkDispatch can make sure it's
4436 # reset to 0 after a successful dispatch.
4437 builder.failure_count = 1
4438
4439 # Run 'scan' and check its result.
4440- self.layer.txn.commit()
4441- self.layer.switchDbUser(config.builddmaster.dbuser)
4442- scanner = self._getScanner()
4443- d = defer.maybeDeferred(scanner.scan)
4444+ LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser)
4445+ manager = self._getManager()
4446+ d = defer.maybeDeferred(manager.scan)
4447 d.addCallback(self._checkDispatch, builder)
4448 return d
4449
4450- def _checkNoDispatch(self, slave, builder):
4451+ def _checkNoDispatch(self, recording_slave, builder):
4452 """Assert that no dispatch has occurred.
4453
4454- 'slave' is None, so no interations would be passed
4455+ 'recording_slave' is None, so no interations would be passed
4456 to the asynchonous dispatcher and the builder remained active
4457 and IDLE.
4458 """
4459- self.assertTrue(slave is None, "Unexpected slave.")
4460+ self.assertTrue(
4461+ recording_slave is None, "Unexpected recording_slave.")
4462
4463 builder = getUtility(IBuilderSet).get(builder.id)
4464 self.assertTrue(builder.builderok)
4465@@ -188,9 +753,9 @@
4466 login(ANONYMOUS)
4467
4468 # Run 'scan' and check its result.
4469- self.layer.switchDbUser(config.builddmaster.dbuser)
4470- scanner = self._getScanner()
4471- d = defer.maybeDeferred(scanner.singleCycle)
4472+ LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser)
4473+ manager = self._getManager()
4474+ d = defer.maybeDeferred(manager.scan)
4475 d.addCallback(self._checkNoDispatch, builder)
4476 return d
4477
4478@@ -228,9 +793,9 @@
4479 login(ANONYMOUS)
4480
4481 # Run 'scan' and check its result.
4482- self.layer.switchDbUser(config.builddmaster.dbuser)
4483- scanner = self._getScanner()
4484- d = defer.maybeDeferred(scanner.scan)
4485+ LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser)
4486+ manager = self._getManager()
4487+ d = defer.maybeDeferred(manager.scan)
4488 d.addCallback(self._checkJobRescued, builder, job)
4489 return d
4490
4491@@ -249,6 +814,8 @@
4492 self.assertBuildingJob(job, builder, logtail='This is a build log')
4493
4494 def testScanUpdatesBuildingJobs(self):
4495+ # The job assigned to a broken builder is rescued.
4496+
4497 # Enable sampledata builder attached to an appropriate testing
4498 # slave. It will respond as if it was building the sampledata job.
4499 builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
4500@@ -263,174 +830,188 @@
4501 self.assertBuildingJob(job, builder)
4502
4503 # Run 'scan' and check its result.
4504- self.layer.switchDbUser(config.builddmaster.dbuser)
4505- scanner = self._getScanner()
4506- d = defer.maybeDeferred(scanner.scan)
4507+ LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser)
4508+ manager = self._getManager()
4509+ d = defer.maybeDeferred(manager.scan)
4510 d.addCallback(self._checkJobUpdated, builder, job)
4511 return d
4512
4513- def test_scan_with_nothing_to_dispatch(self):
4514- factory = LaunchpadObjectFactory()
4515- builder = factory.makeBuilder()
4516- builder.setSlaveForTesting(OkSlave())
4517- scanner = self._getScanner(builder_name=builder.name)
4518- d = scanner.scan()
4519- return d.addCallback(self._checkNoDispatch, builder)
4520-
4521- def test_scan_with_manual_builder(self):
4522- # Reset sampledata builder.
4523- builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
4524- self._resetBuilder(builder)
4525- builder.setSlaveForTesting(OkSlave())
4526- builder.manual = True
4527- scanner = self._getScanner()
4528- d = scanner.scan()
4529- d.addCallback(self._checkNoDispatch, builder)
4530- return d
4531-
4532- def test_scan_with_not_ok_builder(self):
4533- # Reset sampledata builder.
4534- builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
4535- self._resetBuilder(builder)
4536- builder.setSlaveForTesting(OkSlave())
4537- builder.builderok = False
4538- scanner = self._getScanner()
4539- d = scanner.scan()
4540- # Because the builder is not ok, we can't use _checkNoDispatch.
4541- d.addCallback(
4542- lambda ignored: self.assertIdentical(None, builder.currentjob))
4543- return d
4544-
4545- def test_scan_of_broken_slave(self):
4546- builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
4547- self._resetBuilder(builder)
4548- builder.setSlaveForTesting(BrokenSlave())
4549- builder.failure_count = 0
4550- scanner = self._getScanner(builder_name=builder.name)
4551- d = scanner.scan()
4552- return self.assertFailure(d, xmlrpclib.Fault)
4553-
4554- def _assertFailureCounting(self, builder_count, job_count,
4555- expected_builder_count, expected_job_count):
4556+ def test_scan_assesses_failure_exceptions(self):
4557 # If scan() fails with an exception, failure_counts should be
4558- # incremented. What we do with the results of the failure
4559- # counts is tested below separately, this test just makes sure that
4560- # scan() is setting the counts.
4561+ # incremented and tested.
4562 def failing_scan():
4563- return defer.fail(Exception("fake exception"))
4564- scanner = self._getScanner()
4565- scanner.scan = failing_scan
4566+ raise Exception("fake exception")
4567+ manager = self._getManager()
4568+ manager.scan = failing_scan
4569+ manager.scheduleNextScanCycle = FakeMethod()
4570 from lp.buildmaster import manager as manager_module
4571 self.patch(manager_module, 'assessFailureCounts', FakeMethod())
4572- builder = getUtility(IBuilderSet)[scanner.builder_name]
4573-
4574- builder.failure_count = builder_count
4575- builder.currentjob.specific_job.build.failure_count = job_count
4576- # The _scanFailed() calls abort, so make sure our existing
4577- # failure counts are persisted.
4578- self.layer.txn.commit()
4579-
4580- # singleCycle() calls scan() which is our fake one that throws an
4581+ builder = getUtility(IBuilderSet)[manager.builder_name]
4582+
4583+ # Failure counts start at zero.
4584+ self.assertEqual(0, builder.failure_count)
4585+ self.assertEqual(
4586+ 0, builder.currentjob.specific_job.build.failure_count)
4587+
4588+ # startCycle() calls scan() which is our fake one that throws an
4589 # exception.
4590- d = scanner.singleCycle()
4591+ manager.startCycle()
4592
4593 # Failure counts should be updated, and the assessment method
4594- # should have been called. The actual behaviour is tested below
4595- # in TestFailureAssessments.
4596- def got_scan(ignored):
4597- self.assertEqual(expected_builder_count, builder.failure_count)
4598- self.assertEqual(
4599- expected_job_count,
4600- builder.currentjob.specific_job.build.failure_count)
4601- self.assertEqual(
4602- 1, manager_module.assessFailureCounts.call_count)
4603-
4604- return d.addCallback(got_scan)
4605-
4606- def test_scan_first_fail(self):
4607- # The first failure of a job should result in the failure_count
4608- # on the job and the builder both being incremented.
4609- self._assertFailureCounting(
4610- builder_count=0, job_count=0, expected_builder_count=1,
4611- expected_job_count=1)
4612-
4613- def test_scan_second_builder_fail(self):
4614- # The first failure of a job should result in the failure_count
4615- # on the job and the builder both being incremented.
4616- self._assertFailureCounting(
4617- builder_count=1, job_count=0, expected_builder_count=2,
4618- expected_job_count=1)
4619-
4620- def test_scan_second_job_fail(self):
4621- # The first failure of a job should result in the failure_count
4622- # on the job and the builder both being incremented.
4623- self._assertFailureCounting(
4624- builder_count=0, job_count=1, expected_builder_count=1,
4625- expected_job_count=2)
4626-
4627- def test_scanFailed_handles_lack_of_a_job_on_the_builder(self):
4628- def failing_scan():
4629- return defer.fail(Exception("fake exception"))
4630- scanner = self._getScanner()
4631- scanner.scan = failing_scan
4632- builder = getUtility(IBuilderSet)[scanner.builder_name]
4633- builder.failure_count = Builder.FAILURE_THRESHOLD
4634- builder.currentjob.reset()
4635- self.layer.txn.commit()
4636-
4637- d = scanner.singleCycle()
4638-
4639- def scan_finished(ignored):
4640- self.assertFalse(builder.builderok)
4641-
4642- return d.addCallback(scan_finished)
4643-
4644- def test_fail_to_resume_slave_resets_job(self):
4645- # If an attempt to resume and dispatch a slave fails, it should
4646- # reset the job via job.reset()
4647-
4648- # Make a slave with a failing resume() method.
4649- slave = OkSlave()
4650- slave.resume = lambda: deferLater(
4651- reactor, 0, defer.fail, Failure(('out', 'err', 1)))
4652-
4653- # Reset sampledata builder.
4654- builder = removeSecurityProxy(
4655- getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME])
4656- self._resetBuilder(builder)
4657- self.assertEqual(0, builder.failure_count)
4658- builder.setSlaveForTesting(slave)
4659- builder.vm_host = "fake_vm_host"
4660-
4661- scanner = self._getScanner()
4662-
4663- # Get the next job that will be dispatched.
4664- job = removeSecurityProxy(builder._findBuildCandidate())
4665- job.virtualized = True
4666- builder.virtualized = True
4667- d = scanner.singleCycle()
4668-
4669- def check(ignored):
4670- # The failure_count will have been incremented on the
4671- # builder, we can check that to see that a dispatch attempt
4672- # did indeed occur.
4673- self.assertEqual(1, builder.failure_count)
4674- # There should also be no builder set on the job.
4675- self.assertTrue(job.builder is None)
4676- build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job)
4677- self.assertEqual(build.status, BuildStatus.NEEDSBUILD)
4678-
4679- return d.addCallback(check)
4680+ # should have been called.
4681+ self.assertEqual(1, builder.failure_count)
4682+ self.assertEqual(
4683+ 1, builder.currentjob.specific_job.build.failure_count)
4684+
4685+ self.assertEqual(
4686+ 1, manager_module.assessFailureCounts.call_count)
4687+
4688+
4689+class TestDispatchResult(LaunchpadTestCase):
4690+ """Tests `BaseDispatchResult` variations.
4691+
4692+ Variations of `BaseDispatchResult` when evaluated update the database
4693+ information according to their purpose.
4694+ """
4695+
4696+ layer = LaunchpadZopelessLayer
4697+
4698+ def _getBuilder(self, name):
4699+ """Return a fixed `IBuilder` instance from the sampledata.
4700+
4701+ Ensure it's active (builderok=True) and it has a in-progress job.
4702+ """
4703+ login('foo.bar@canonical.com')
4704+
4705+ builder = getUtility(IBuilderSet)[name]
4706+ builder.builderok = True
4707+
4708+ job = builder.currentjob
4709+ build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job)
4710+ self.assertEqual(
4711+ 'i386 build of mozilla-firefox 0.9 in ubuntu hoary RELEASE',
4712+ build.title)
4713+
4714+ self.assertEqual('BUILDING', build.status.name)
4715+ self.assertNotEqual(None, job.builder)
4716+ self.assertNotEqual(None, job.date_started)
4717+ self.assertNotEqual(None, job.logtail)
4718+
4719+ transaction.commit()
4720+
4721+ return builder, job.id
4722+
4723+ def assertBuildqueueIsClean(self, buildqueue):
4724+ # Check that the buildqueue is reset.
4725+ self.assertEqual(None, buildqueue.builder)
4726+ self.assertEqual(None, buildqueue.date_started)
4727+ self.assertEqual(None, buildqueue.logtail)
4728+
4729+ def assertBuilderIsClean(self, builder):
4730+ # Check that the builder is ready for a new build.
4731+ self.assertTrue(builder.builderok)
4732+ self.assertIs(None, builder.failnotes)
4733+ self.assertIs(None, builder.currentjob)
4734+
4735+ def testResetDispatchResult(self):
4736+ # Test that `ResetDispatchResult` resets the builder and job.
4737+ builder, job_id = self._getBuilder(BOB_THE_BUILDER_NAME)
4738+ buildqueue_id = builder.currentjob.id
4739+ builder.builderok = True
4740+ builder.failure_count = 1
4741+
4742+ # Setup a interaction to satisfy 'write_transaction' decorator.
4743+ login(ANONYMOUS)
4744+ slave = RecordingSlave(builder.name, builder.url, builder.vm_host)
4745+ result = ResetDispatchResult(slave)
4746+ result()
4747+
4748+ buildqueue = getUtility(IBuildQueueSet).get(buildqueue_id)
4749+ self.assertBuildqueueIsClean(buildqueue)
4750+
4751+ # XXX Julian
4752+ # Disabled test until bug 586362 is fixed.
4753+ #self.assertFalse(builder.builderok)
4754+ self.assertBuilderIsClean(builder)
4755+
4756+ def testFailDispatchResult(self):
4757+ # Test that `FailDispatchResult` calls assessFailureCounts() so
4758+ # that we know the builders and jobs are failed as necessary
4759+ # when a FailDispatchResult is called at the end of the dispatch
4760+ # chain.
4761+ builder, job_id = self._getBuilder(BOB_THE_BUILDER_NAME)
4762+
4763+ # Setup a interaction to satisfy 'write_transaction' decorator.
4764+ login(ANONYMOUS)
4765+ slave = RecordingSlave(builder.name, builder.url, builder.vm_host)
4766+ result = FailDispatchResult(slave, 'does not work!')
4767+ result.assessFailureCounts = FakeMethod()
4768+ self.assertEqual(0, result.assessFailureCounts.call_count)
4769+ result()
4770+ self.assertEqual(1, result.assessFailureCounts.call_count)
4771+
4772+ def _setup_failing_dispatch_result(self):
4773+ # assessFailureCounts should fail jobs or builders depending on
4774+ # whether it sees the failure_counts on each increasing.
4775+ builder, job_id = self._getBuilder(BOB_THE_BUILDER_NAME)
4776+ slave = RecordingSlave(builder.name, builder.url, builder.vm_host)
4777+ result = FailDispatchResult(slave, 'does not work!')
4778+ return builder, result
4779+
4780+ def test_assessFailureCounts_equal_failures(self):
4781+ # Basic case where the failure counts are equal and the job is
4782+ # reset to try again & the builder is not failed.
4783+ builder, result = self._setup_failing_dispatch_result()
4784+ buildqueue = builder.currentjob
4785+ build = buildqueue.specific_job.build
4786+ builder.failure_count = 2
4787+ build.failure_count = 2
4788+ result.assessFailureCounts()
4789+
4790+ self.assertBuilderIsClean(builder)
4791+ self.assertEqual('NEEDSBUILD', build.status.name)
4792+ self.assertBuildqueueIsClean(buildqueue)
4793+
4794+ def test_assessFailureCounts_job_failed(self):
4795+ # Case where the job has failed more than the builder.
4796+ builder, result = self._setup_failing_dispatch_result()
4797+ buildqueue = builder.currentjob
4798+ build = buildqueue.specific_job.build
4799+ build.failure_count = 2
4800+ builder.failure_count = 1
4801+ result.assessFailureCounts()
4802+
4803+ self.assertBuilderIsClean(builder)
4804+ self.assertEqual('FAILEDTOBUILD', build.status.name)
4805+ # The buildqueue should have been removed entirely.
4806+ self.assertEqual(
4807+ None, getUtility(IBuildQueueSet).getByBuilder(builder),
4808+ "Buildqueue was not removed when it should be.")
4809+
4810+ def test_assessFailureCounts_builder_failed(self):
4811+ # Case where the builder has failed more than the job.
4812+ builder, result = self._setup_failing_dispatch_result()
4813+ buildqueue = builder.currentjob
4814+ build = buildqueue.specific_job.build
4815+ build.failure_count = 2
4816+ builder.failure_count = 3
4817+ result.assessFailureCounts()
4818+
4819+ self.assertFalse(builder.builderok)
4820+ self.assertEqual('does not work!', builder.failnotes)
4821+ self.assertTrue(builder.currentjob is None)
4822+ self.assertEqual('NEEDSBUILD', build.status.name)
4823+ self.assertBuildqueueIsClean(buildqueue)
4824
4825
4826 class TestBuilddManager(TrialTestCase):
4827
4828- layer = TwistedLaunchpadZopelessLayer
4829+ layer = LaunchpadZopelessLayer
4830
4831 def _stub_out_scheduleNextScanCycle(self):
4832 # stub out the code that adds a callLater, so that later tests
4833 # don't get surprises.
4834- self.patch(SlaveScanner, 'startCycle', FakeMethod())
4835+ self.patch(SlaveScanner, 'scheduleNextScanCycle', FakeMethod())
4836
4837 def test_addScanForBuilders(self):
4838 # Test that addScanForBuilders generates NewBuildersScanner objects.
4839@@ -459,62 +1040,10 @@
4840 self.assertNotEqual(0, manager.new_builders_scanner.scan.call_count)
4841
4842
4843-class TestFailureAssessments(TestCaseWithFactory):
4844-
4845- layer = ZopelessDatabaseLayer
4846-
4847- def setUp(self):
4848- TestCaseWithFactory.setUp(self)
4849- self.builder = self.factory.makeBuilder()
4850- self.build = self.factory.makeSourcePackageRecipeBuild()
4851- self.buildqueue = self.build.queueBuild()
4852- self.buildqueue.markAsBuilding(self.builder)
4853-
4854- def test_equal_failures_reset_job(self):
4855- self.builder.gotFailure()
4856- self.builder.getCurrentBuildFarmJob().gotFailure()
4857-
4858- assessFailureCounts(self.builder, "failnotes")
4859- self.assertIs(None, self.builder.currentjob)
4860- self.assertEqual(self.build.status, BuildStatus.NEEDSBUILD)
4861-
4862- def test_job_failing_more_than_builder_fails_job(self):
4863- self.builder.getCurrentBuildFarmJob().gotFailure()
4864-
4865- assessFailureCounts(self.builder, "failnotes")
4866- self.assertIs(None, self.builder.currentjob)
4867- self.assertEqual(self.build.status, BuildStatus.FAILEDTOBUILD)
4868-
4869- def test_builder_failing_more_than_job_but_under_fail_threshold(self):
4870- self.builder.failure_count = Builder.FAILURE_THRESHOLD - 1
4871-
4872- assessFailureCounts(self.builder, "failnotes")
4873- self.assertIs(None, self.builder.currentjob)
4874- self.assertEqual(self.build.status, BuildStatus.NEEDSBUILD)
4875- self.assertTrue(self.builder.builderok)
4876-
4877- def test_builder_failing_more_than_job_but_over_fail_threshold(self):
4878- self.builder.failure_count = Builder.FAILURE_THRESHOLD
4879-
4880- assessFailureCounts(self.builder, "failnotes")
4881- self.assertIs(None, self.builder.currentjob)
4882- self.assertEqual(self.build.status, BuildStatus.NEEDSBUILD)
4883- self.assertFalse(self.builder.builderok)
4884- self.assertEqual("failnotes", self.builder.failnotes)
4885-
4886- def test_builder_failing_with_no_attached_job(self):
4887- self.buildqueue.reset()
4888- self.builder.failure_count = Builder.FAILURE_THRESHOLD
4889-
4890- assessFailureCounts(self.builder, "failnotes")
4891- self.assertFalse(self.builder.builderok)
4892- self.assertEqual("failnotes", self.builder.failnotes)
4893-
4894-
4895 class TestNewBuilders(TrialTestCase):
4896 """Test detecting of new builders."""
4897
4898- layer = TwistedLaunchpadZopelessLayer
4899+ layer = LaunchpadZopelessLayer
4900
4901 def _getScanner(self, manager=None, clock=None):
4902 return NewBuildersScanner(manager=manager, clock=clock)
4903@@ -555,8 +1084,11 @@
4904 new_builders, builder_scanner.checkForNewBuilders())
4905
4906 def test_scan(self):
4907- # See if scan detects new builders.
4908+ # See if scan detects new builders and schedules the next scan.
4909
4910+ # stub out the addScanForBuilders and scheduleScan methods since
4911+ # they use callLater; we only want to assert that they get
4912+ # called.
4913 def fake_checkForNewBuilders():
4914 return "new_builders"
4915
4916@@ -572,6 +1104,9 @@
4917 builder_scanner.scan()
4918 advance = NewBuildersScanner.SCAN_INTERVAL + 1
4919 clock.advance(advance)
4920+ self.assertNotEqual(
4921+ 0, builder_scanner.scheduleScan.call_count,
4922+ "scheduleScan did not get called")
4923
4924
4925 def is_file_growing(filepath, poll_interval=1, poll_repeat=10):
4926@@ -612,7 +1147,7 @@
4927 return False
4928
4929
4930-class TestBuilddManagerScript(TestCaseWithFactory):
4931+class TestBuilddManagerScript(LaunchpadTestCase):
4932
4933 layer = LaunchpadScriptLayer
4934
4935@@ -621,7 +1156,6 @@
4936 fixture = BuilddManagerTestSetup()
4937 fixture.setUp()
4938 fixture.tearDown()
4939- self.layer.force_dirty_database()
4940
4941 # XXX Julian 2010-08-06 bug=614275
4942 # These next 2 tests are in the wrong place, they should be near the
4943
4944=== modified file 'lib/lp/buildmaster/tests/test_packagebuild.py'
4945--- lib/lp/buildmaster/tests/test_packagebuild.py 2010-10-26 20:43:50 +0000
4946+++ lib/lp/buildmaster/tests/test_packagebuild.py 2010-12-07 16:29:13 +0000
4947@@ -97,8 +97,6 @@
4948 self.assertRaises(
4949 NotImplementedError, self.package_build.verifySuccessfulUpload)
4950 self.assertRaises(NotImplementedError, self.package_build.notify)
4951- # XXX 2010-10-18 bug=662631
4952- # Change this to do non-blocking IO.
4953 self.assertRaises(
4954 NotImplementedError, self.package_build.handleStatus,
4955 None, None, None)
4956@@ -311,8 +309,6 @@
4957 # A filemap with plain filenames should not cause a problem.
4958 # The call to handleStatus will attempt to get the file from
4959 # the slave resulting in a URL error in this test case.
4960- # XXX 2010-10-18 bug=662631
4961- # Change this to do non-blocking IO.
4962 self.build.handleStatus('OK', None, {
4963 'filemap': {'myfile.py': 'test_file_hash'},
4964 })
4965@@ -323,8 +319,6 @@
4966 def test_handleStatus_OK_absolute_filepath(self):
4967 # A filemap that tries to write to files outside of
4968 # the upload directory will result in a failed upload.
4969- # XXX 2010-10-18 bug=662631
4970- # Change this to do non-blocking IO.
4971 self.build.handleStatus('OK', None, {
4972 'filemap': {'/tmp/myfile.py': 'test_file_hash'},
4973 })
4974@@ -335,8 +329,6 @@
4975 def test_handleStatus_OK_relative_filepath(self):
4976 # A filemap that tries to write to files outside of
4977 # the upload directory will result in a failed upload.
4978- # XXX 2010-10-18 bug=662631
4979- # Change this to do non-blocking IO.
4980 self.build.handleStatus('OK', None, {
4981 'filemap': {'../myfile.py': 'test_file_hash'},
4982 })
4983@@ -347,8 +339,6 @@
4984 # The build log is set during handleStatus.
4985 removeSecurityProxy(self.build).log = None
4986 self.assertEqual(None, self.build.log)
4987- # XXX 2010-10-18 bug=662631
4988- # Change this to do non-blocking IO.
4989 self.build.handleStatus('OK', None, {
4990 'filemap': {'myfile.py': 'test_file_hash'},
4991 })
4992@@ -358,8 +348,6 @@
4993 # The date finished is updated during handleStatus_OK.
4994 removeSecurityProxy(self.build).date_finished = None
4995 self.assertEqual(None, self.build.date_finished)
4996- # XXX 2010-10-18 bug=662631
4997- # Change this to do non-blocking IO.
4998 self.build.handleStatus('OK', None, {
4999 'filemap': {'myfile.py': 'test_file_hash'},
5000 })
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches

to all changes: