Merge lp:~mars/launchpad/test-ghost-update-2 into lp:~launchpad/launchpad/ghost-line

Proposed by Māris Fogels
Status: Merged
Approved by: Māris Fogels
Approved revision: 11787
Merged at revision: 11784
Proposed branch: lp:~mars/launchpad/test-ghost-update-2
Merge into: lp:~launchpad/launchpad/ghost-line
Diff against target: 7763 lines (+3923/-2238)
36 files modified
lib/canonical/launchpad/icing/style-3-0.css.in (+6/-0)
lib/lp/app/browser/configure.zcml (+6/-0)
lib/lp/app/browser/linkchecker.py (+77/-0)
lib/lp/app/browser/stringformatter.py (+3/-1)
lib/lp/app/browser/tests/test_linkchecker.py (+83/-0)
lib/lp/app/configure.zcml (+0/-14)
lib/lp/app/doc/displaying-paragraphs-of-text.txt (+11/-11)
lib/lp/app/javascript/lp-links.js (+105/-0)
lib/lp/app/templates/base-layout-macros.pt (+9/-0)
lib/lp/bugs/windmill/tests/test_bug_commenting.py (+1/-1)
lib/lp/buildmaster/doc/builder.txt (+118/-2)
lib/lp/buildmaster/interfaces/builder.py (+62/-83)
lib/lp/buildmaster/manager.py (+468/-204)
lib/lp/buildmaster/model/builder.py (+224/-240)
lib/lp/buildmaster/model/buildfarmjobbehavior.py (+52/-60)
lib/lp/buildmaster/model/packagebuild.py (+0/-6)
lib/lp/buildmaster/tests/mock_slaves.py (+32/-157)
lib/lp/buildmaster/tests/test_builder.py (+154/-582)
lib/lp/buildmaster/tests/test_manager.py (+782/-248)
lib/lp/buildmaster/tests/test_packagebuild.py (+0/-12)
lib/lp/code/model/recipebuilder.py (+28/-32)
lib/lp/code/windmill/tests/test_branch_broken_links.py (+113/-0)
lib/lp/code/windmill/tests/test_branchmergeproposal_review.py (+1/-1)
lib/lp/soyuz/browser/tests/test_builder_views.py (+1/-1)
lib/lp/soyuz/doc/buildd-dispatching.txt (+371/-0)
lib/lp/soyuz/doc/buildd-slavescanner.txt (+876/-0)
lib/lp/soyuz/model/binarypackagebuildbehavior.py (+41/-59)
lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py (+8/-290)
lib/lp/soyuz/tests/test_doc.py (+6/-0)
lib/lp/testing/factory.py (+2/-8)
lib/lp/translations/doc/translationtemplatesbuildbehavior.txt (+114/-0)
lib/lp/translations/model/translationtemplatesbuildbehavior.py (+14/-20)
lib/lp/translations/stories/buildfarm/xx-build-summary.txt (+1/-1)
lib/lp/translations/tests/test_translationtemplatesbuildbehavior.py (+153/-202)
lib/lp_sitecustomize.py (+0/-3)
utilities/migrater/file-ownership.txt (+1/-0)
To merge this branch: bzr merge lp:~mars/launchpad/test-ghost-update-2
Reviewer Review Type Date Requested Status
Māris Fogels (community) Approve
Review via email: mp+42515@code.launchpad.net

Commit message

Testing the bundle-merge tarmac command

Description of the change

Testing the bundle-merge Tarmac command

To post a comment you must log in.
Revision history for this message
Māris Fogels (mars) :
review: Approve
Revision history for this message
Launchpad PQM Bot (launchpad-pqm) wrote :

No approved revision specified.

11786. By Māris Fogels

Merged r11806

11787. By Māris Fogels

Added a file for testing

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/canonical/launchpad/icing/style-3-0.css.in'
--- lib/canonical/launchpad/icing/style-3-0.css.in 2010-09-23 11:17:45 +0000
+++ lib/canonical/launchpad/icing/style-3-0.css.in 2010-12-07 16:29:13 +0000
@@ -284,6 +284,12 @@
284a.help.icon, a.sprite.maybe.help {284a.help.icon, a.sprite.maybe.help {
285 border: none;285 border: none;
286}286}
287a.invalid-link {
288 disabled: True;
289 color: #909090;
290 text-decoration: none;
291 cursor: default;
292 }
287img, a img {293img, a img {
288 /* No border on images that are links. */294 /* No border on images that are links. */
289 border: none;295 border: none;
290296
=== modified file 'lib/lp/app/browser/configure.zcml'
--- lib/lp/app/browser/configure.zcml 2010-10-15 01:27:04 +0000
+++ lib/lp/app/browser/configure.zcml 2010-12-07 16:29:13 +0000
@@ -98,6 +98,12 @@
98 template="../templates/launchpad-search-form.pt"98 template="../templates/launchpad-search-form.pt"
99 permission="zope.Public" />99 permission="zope.Public" />
100100
101 <browser:page
102 for="*"
103 name="+check-links"
104 class="lp.app.browser.linkchecker.LinkCheckerAPI"
105 permission="zope.Public"/>
106
101 <!-- TALES namespaces. -->107 <!-- TALES namespaces. -->
102108
103 <!-- TALES lp: namespace (should be deprecated) -->109 <!-- TALES lp: namespace (should be deprecated) -->
104110
=== added file 'lib/lp/app/browser/linkchecker.py'
--- lib/lp/app/browser/linkchecker.py 1970-01-01 00:00:00 +0000
+++ lib/lp/app/browser/linkchecker.py 2010-12-07 16:29:13 +0000
@@ -0,0 +1,77 @@
1# Copyright 2009 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).
3
4# pylint: disable-msg=E0211,E0213
5
6__metaclass__ = type
7__all__ = [
8 'LinkCheckerAPI',
9 ]
10
11import simplejson
12from zope.component import getUtility
13
14from lp.app.errors import NotFoundError
15from lp.code.errors import (
16 CannotHaveLinkedBranch,
17 InvalidNamespace,
18 NoLinkedBranch,
19 NoSuchBranch,
20 )
21from lp.code.interfaces.branchlookup import IBranchLookup
22from lp.registry.interfaces.product import InvalidProductName
23
24
25class LinkCheckerAPI:
26 """Validates Launchpad shortcut links.
27
28 This class provides the endpoint of an Ajax call to .../+check-links.
29 When invoked with a collection of links harvested from a page, it will
30 check the validity of each one and send a response containing those that
31 are invalid. Javascript on the page will set the style of invalid links to
32 something appropriate.
33
34 This initial implementation supports processing links like the following:
35 /+branch/foo/bar
36
37 The implementation can easily be extended to handle other forms by
38 providing a method to handle the link type extracted from the json
39 request.
40 """
41
42 def __init__(self, context, request):
43 # We currently only use the request.
44 # self.context = context
45 self.request = request
46
47 # Each link type has it's own validation method.
48 self.link_checkers = dict(
49 branch_links=self.check_branch_links,
50 )
51
52 def __call__(self):
53 result = {}
54 links_to_check_data = self.request.get('link_hrefs')
55 links_to_check = simplejson.loads(links_to_check_data)
56
57 for link_type in links_to_check:
58 links = links_to_check[link_type]
59 invalid_links = self.link_checkers[link_type](links)
60 result['invalid_'+link_type] = invalid_links
61
62 self.request.response.setHeader('Content-type', 'application/json')
63 return simplejson.dumps(result)
64
65 def check_branch_links(self, links):
66 """Check links of the form /+branch/foo/bar"""
67 invalid_links = []
68 branch_lookup = getUtility(IBranchLookup)
69 for link in links:
70 path = link.strip('/')[len('+branch/'):]
71 try:
72 branch_lookup.getByLPPath(path)
73 except (CannotHaveLinkedBranch, InvalidNamespace,
74 InvalidProductName, NoLinkedBranch, NoSuchBranch,
75 NotFoundError):
76 invalid_links.append(link)
77 return invalid_links
078
=== modified file 'lib/lp/app/browser/stringformatter.py'
--- lib/lp/app/browser/stringformatter.py 2010-09-25 14:29:32 +0000
+++ lib/lp/app/browser/stringformatter.py 2010-12-07 16:29:13 +0000
@@ -274,7 +274,9 @@
274 return FormattersAPI._linkify_bug_number(274 return FormattersAPI._linkify_bug_number(
275 lp_url, path, trailers)275 lp_url, path, trailers)
276 url = '/+branch/%s' % path276 url = '/+branch/%s' % path
277 return '<a href="%s">%s</a>%s' % (277 # Mark the links with a 'branch-short-link' class so they can be
278 # harvested and validated when the page is rendered.
279 return '<a href="%s" class="branch-short-link">%s</a>%s' % (
278 cgi.escape(url, quote=True),280 cgi.escape(url, quote=True),
279 cgi.escape(lp_url),281 cgi.escape(lp_url),
280 cgi.escape(trailers))282 cgi.escape(trailers))
281283
=== added file 'lib/lp/app/browser/tests/test_linkchecker.py'
--- lib/lp/app/browser/tests/test_linkchecker.py 1970-01-01 00:00:00 +0000
+++ lib/lp/app/browser/tests/test_linkchecker.py 2010-12-07 16:29:13 +0000
@@ -0,0 +1,83 @@
1# Copyright 2010 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).
3
4"""Unit tests for the LinkCheckerAPI."""
5
6__metaclass__ = type
7
8from random import shuffle
9
10import simplejson
11from zope.security.proxy import removeSecurityProxy
12
13from canonical.launchpad.webapp.servers import LaunchpadTestRequest
14from canonical.testing.layers import DatabaseFunctionalLayer
15from lp.app.browser.linkchecker import LinkCheckerAPI
16from lp.testing import TestCaseWithFactory
17
18
19class TestLinkCheckerAPI(TestCaseWithFactory):
20
21 layer = DatabaseFunctionalLayer
22
23 BRANCH_URL_TEMPLATE = '/+branch/%s'
24
25 def check_invalid_links(self, result_json, link_type, invalid_links):
26 link_dict = simplejson.loads(result_json)
27 links_to_check = link_dict[link_type]
28 self.assertEqual(len(invalid_links), len(links_to_check))
29 self.assertEqual(set(invalid_links), set(links_to_check))
30
31 def make_valid_links(self):
32 branch = self.factory.makeProductBranch()
33 valid_branch_url = self.BRANCH_URL_TEMPLATE % branch.unique_name
34 product = self.factory.makeProduct()
35 product_branch = self.factory.makeProductBranch(product=product)
36 removeSecurityProxy(product).development_focus.branch = product_branch
37 valid_product_url = self.BRANCH_URL_TEMPLATE % product.name
38
39 return [
40 valid_branch_url,
41 valid_product_url,
42 ]
43
44 def make_invalid_links(self):
45 return [
46 self.BRANCH_URL_TEMPLATE % 'foo',
47 self.BRANCH_URL_TEMPLATE % 'bar',
48 ]
49
50 def invoke_branch_link_checker(
51 self, valid_branch_urls=None, invalid_branch_urls=None):
52 if valid_branch_urls is None:
53 valid_branch_urls = {}
54 if invalid_branch_urls is None:
55 invalid_branch_urls = {}
56
57 branch_urls = list(valid_branch_urls)
58 branch_urls.extend(invalid_branch_urls)
59 shuffle(branch_urls)
60
61 links_to_check = dict(branch_links=branch_urls)
62 link_json = simplejson.dumps(links_to_check)
63
64 request = LaunchpadTestRequest(link_hrefs=link_json)
65 link_checker = LinkCheckerAPI(object(), request)
66 result_json = link_checker()
67 self.check_invalid_links(
68 result_json, 'invalid_branch_links', invalid_branch_urls)
69
70 def test_only_valid_branchlinks(self):
71 branch_urls = self.make_valid_links()
72 self.invoke_branch_link_checker(valid_branch_urls=branch_urls)
73
74 def test_only_invalid_branchlinks(self):
75 branch_urls = self.make_invalid_links()
76 self.invoke_branch_link_checker(invalid_branch_urls=branch_urls)
77
78 def test_valid_and_invald_branchlinks(self):
79 valid_branch_urls = self.make_valid_links()
80 invalid_branch_urls = self.make_invalid_links()
81 self.invoke_branch_link_checker(
82 valid_branch_urls=valid_branch_urls,
83 invalid_branch_urls=invalid_branch_urls)
084
=== added file 'lib/lp/app/configure.zcml'
--- lib/lp/app/configure.zcml 1970-01-01 00:00:00 +0000
+++ lib/lp/app/configure.zcml 2010-12-07 16:29:13 +0000
@@ -0,0 +1,14 @@
1<!-- Copyright 2009 Canonical Ltd. This software is licensed under the
2 GNU Affero General Public License version 3 (see the file LICENSE).
3-->
4
5<configure
6 xmlns="http://namespaces.zope.org/zope"
7 xmlns:browser="http://namespaces.zope.org/browser"
8 xmlns:i18n="http://namespaces.zope.org/i18n"
9 xmlns:xmlrpc="http://namespaces.zope.org/xmlrpc"
10 xmlns:lp="http://namespaces.canonical.com/lp"
11 i18n_domain="launchpad">
12 <include
13 package=".browser"/>
14</configure>
015
=== removed file 'lib/lp/app/configure.zcml'
--- lib/lp/app/configure.zcml 2009-07-17 02:25:09 +0000
+++ lib/lp/app/configure.zcml 1970-01-01 00:00:00 +0000
@@ -1,14 +0,0 @@
1<!-- Copyright 2009 Canonical Ltd. This software is licensed under the
2 GNU Affero General Public License version 3 (see the file LICENSE).
3-->
4
5<configure
6 xmlns="http://namespaces.zope.org/zope"
7 xmlns:browser="http://namespaces.zope.org/browser"
8 xmlns:i18n="http://namespaces.zope.org/i18n"
9 xmlns:xmlrpc="http://namespaces.zope.org/xmlrpc"
10 xmlns:lp="http://namespaces.canonical.com/lp"
11 i18n_domain="launchpad">
12 <include
13 package=".browser"/>
14</configure>
150
=== modified file 'lib/lp/app/doc/displaying-paragraphs-of-text.txt'
--- lib/lp/app/doc/displaying-paragraphs-of-text.txt 2010-10-09 16:36:22 +0000
+++ lib/lp/app/doc/displaying-paragraphs-of-text.txt 2010-12-07 16:29:13 +0000
@@ -357,17 +357,17 @@
357 ... 'lp:///foo\n'357 ... 'lp:///foo\n'
358 ... 'lp:/foo\n')358 ... 'lp:/foo\n')
359 >>> print test_tales('foo/fmt:text-to-html', foo=text)359 >>> print test_tales('foo/fmt:text-to-html', foo=text)
360 <p><a href="/+branch/~foo/bar/baz">lp:~foo/bar/baz</a><br />360 <p><a href="/+branch/~foo/bar/baz" class="...">lp:~foo/bar/baz</a><br />
361 <a href="/+branch/~foo/bar/bug-123">lp:~foo/bar/bug-123</a><br />361 <a href="/+branch/~foo/bar/bug-123" class="...">lp:~foo/bar/bug-123</a><br />
362 <a href="/+branch/~foo/+junk/baz">lp:~foo/+junk/baz</a><br />362 <a href="/+branch/~foo/+junk/baz" class="...">lp:~foo/+junk/baz</a><br />
363 <a href="/+branch/~foo/ubuntu/jaunty/evolution/baz">lp:~foo/ubuntu/jaunty/evolution/baz</a><br />363 <a href="/+branch/~foo/ubuntu/jaunty/evolution/baz" class="...">lp:~foo/ubuntu/jaunty/evolution/baz</a><br />
364 <a href="/+branch/foo/bar">lp:foo/bar</a><br />364 <a href="/+branch/foo/bar" class="...">lp:foo/bar</a><br />
365 <a href="/+branch/foo">lp:foo</a><br />365 <a href="/+branch/foo" class="...">lp:foo</a><br />
366 <a href="/+branch/foo">lp:foo</a>,<br />366 <a href="/+branch/foo" class="...">lp:foo</a>,<br />
367 <a href="/+branch/foo/bar">lp:foo/bar</a>.<br />367 <a href="/+branch/foo/bar" class="...">lp:foo/bar</a>.<br />
368 <a href="/+branch/foo/bar/baz">lp:foo/bar/baz</a><br />368 <a href="/+branch/foo/bar/baz" class="...">lp:foo/bar/baz</a><br />
369 <a href="/+branch/foo">lp:///foo</a><br />369 <a href="/+branch/foo" class="...">lp:///foo</a><br />
370 <a href="/+branch/foo">lp:/foo</a></p>370 <a href="/+branch/foo" class="...">lp:/foo</a></p>
371371
372Text that looks like a branch reference, but is followed only by digits is372Text that looks like a branch reference, but is followed only by digits is
373treated as a link to a bug.373treated as a link to a bug.
374374
=== added file 'lib/lp/app/javascript/lp-links.js'
--- lib/lp/app/javascript/lp-links.js 1970-01-01 00:00:00 +0000
+++ lib/lp/app/javascript/lp-links.js 2010-12-07 16:29:13 +0000
@@ -0,0 +1,105 @@
1/**
2 * Launchpad utilities for manipulating links.
3 *
4 * @module app
5 * @submodule links
6 */
7
8YUI.add('lp.app.links', function(Y) {
9
10 function harvest_links(Y, links_holder, link_class, link_type) {
11 // Get any links of the specified link_class and store them as the
12 // specified link_type in the specified links_holder
13 var link_info = new Array();
14 Y.all('.'+link_class).each(function(link) {
15 var href = link.getAttribute('href');
16 if( link_info.indexOf(href)<0 ) {
17 link_info.push(href);
18 }
19 });
20 if( link_info.length > 0 ) {
21 links_holder[link_type] = link_info;
22 }
23 }
24
25 function process_invalid_links(
26 Y, link_info, link_class, link_type, title) {
27 // We have a collection of invalid links possibly containing links of
28 // type link_type, so we need to remove the existing link_class,
29 // replace it with an invalid-link class, and set the link title.
30 var invalid_links = Y.Array(link_info['invalid_'+link_type]);
31
32 if( invalid_links.length > 0) {
33 Y.all('.'+link_class).each(function(link) {
34 var href = link.getAttribute('href');
35 if( invalid_links.indexOf(href)>=0 ) {
36 var msg = title + href;
37 link.removeClass(link_class);
38 link.addClass('invalid-link');
39 link.title = msg
40 link.on('click', function(e) {
41 e.halt();
42 alert(msg);
43 });
44 }
45 });
46 }
47 }
48
49 var links = Y.namespace('lp.app.links');
50
51 links.check_valid_lp_links = function() {
52 // Grabs any lp: style links on the page and checks that they are
53 // valid. Invalid ones have their class changed to "invalid-link".
54 // ATM, we only handle +branch links.
55
56 var links_to_check = {}
57
58 // We get all the links with defined css classes.
59 // At the moment, we just handle branch links, but in future...
60 harvest_links(Y, links_to_check, 'branch-short-link', 'branch_links');
61
62 // Do we have anything to do?
63 if( Y.Object.size(links_to_check) == 0 ) {
64 return;
65 }
66
67 // Get the final json to send
68 var json_link_info = Y.JSON.stringify(links_to_check);
69 var qs = '';
70 qs = LP.client.append_qs(qs, 'link_hrefs', json_link_info);
71
72 var config = {
73 on: {
74 failure: function(id, response, args) {
75 // If we have firebug installed, log the error.
76 if( console != undefined ) {
77 console.log("Link Check Error: " + args + ': '
78 + response.status + ' - ' +
79 response.statusText + ' - '
80 + response.responseXML);
81 }
82 },
83 success: function(id, response) {
84 var link_info = Y.JSON.parse(response.responseText)
85 // ATM, we just handle branch links, but in future...
86 process_invalid_links(Y, link_info, 'branch-short-link',
87 'branch_links', "Invalid branch: ");
88 }
89 }
90 }
91 var uri = '+check-links';
92 var on = Y.merge(config.on);
93 var client = this;
94 var y_config = { method: "POST",
95 headers: {'Accept': 'application/json'},
96 on: on,
97 'arguments': [client, uri],
98 data: qs};
99 Y.io(uri, y_config);
100 };
101
102}, "0.1", {"requires": [
103 "base", "node", "io", "dom", "json"
104 ]});
105
0106
=== modified file 'lib/lp/app/templates/base-layout-macros.pt'
--- lib/lp/app/templates/base-layout-macros.pt 2010-10-25 13:16:10 +0000
+++ lib/lp/app/templates/base-layout-macros.pt 2010-12-07 16:29:13 +0000
@@ -175,6 +175,8 @@
175 <script type="text/javascript"175 <script type="text/javascript"
176 tal:attributes="src string:${lp_js}/app/lp-mochi.js"></script>176 tal:attributes="src string:${lp_js}/app/lp-mochi.js"></script>
177 <script type="text/javascript"177 <script type="text/javascript"
178 tal:attributes="src string:${lp_js}/app/lp-links.js"></script>
179 <script type="text/javascript"
178 tal:attributes="src string:${lp_js}/app/dragscroll.js"></script>180 tal:attributes="src string:${lp_js}/app/dragscroll.js"></script>
179 <script type="text/javascript"181 <script type="text/javascript"
180 tal:attributes="src string:${lp_js}/app/picker.js"></script>182 tal:attributes="src string:${lp_js}/app/picker.js"></script>
@@ -304,6 +306,13 @@
304 // anywhere outside of it.306 // anywhere outside of it.
305 Y.on('click', handleClickOnPage, window);307 Y.on('click', handleClickOnPage, window);
306 });308 });
309
310 LPS.use('lp.app.links',
311 function(Y) {
312 Y.on('load', function(e) {
313 Y.lp.app.links.check_valid_lp_links();
314 }, window);
315 });
307 </script>316 </script>
308</metal:page-javascript>317</metal:page-javascript>
309318
310319
=== modified file 'lib/lp/bugs/windmill/tests/test_bug_commenting.py'
--- lib/lp/bugs/windmill/tests/test_bug_commenting.py 2010-08-20 20:31:18 +0000
+++ lib/lp/bugs/windmill/tests/test_bug_commenting.py 2010-12-07 16:29:13 +0000
@@ -18,7 +18,7 @@
18WAIT_ELEMENT_COMPLETE = u'30000'18WAIT_ELEMENT_COMPLETE = u'30000'
19WAIT_CHECK_CHANGE = u'1000'19WAIT_CHECK_CHANGE = u'1000'
20ADD_COMMENT_BUTTON = (20ADD_COMMENT_BUTTON = (
21 u'//input[@id="field.actions.save" and @class="button js-action"]')21 u'//input[@id="field.actions.save" and contains(@class, "button")]')
2222
2323
24class TestBugCommenting(WindmillTestCase):24class TestBugCommenting(WindmillTestCase):
2525
=== modified file 'lib/lp/buildmaster/doc/builder.txt'
--- lib/lp/buildmaster/doc/builder.txt 2010-09-24 12:10:52 +0000
+++ lib/lp/buildmaster/doc/builder.txt 2010-12-07 16:29:13 +0000
@@ -19,6 +19,9 @@
19As expected, it implements IBuilder.19As expected, it implements IBuilder.
2020
21 >>> from canonical.launchpad.webapp.testing import verifyObject21 >>> from canonical.launchpad.webapp.testing import verifyObject
22 >>> from lp.buildmaster.interfaces.builder import IBuilder
23 >>> verifyObject(IBuilder, builder)
24 True
2225
23 >>> print builder.name26 >>> print builder.name
24 bob27 bob
@@ -83,7 +86,7 @@
83The 'new' method will create a new builder in the database.86The 'new' method will create a new builder in the database.
8487
85 >>> bnew = builderset.new(1, 'http://dummy.com:8221/', 'dummy',88 >>> bnew = builderset.new(1, 'http://dummy.com:8221/', 'dummy',
86 ... 'Dummy Title', 'eh ?', 1)89 ... 'Dummy Title', 'eh ?', 1)
87 >>> bnew.name90 >>> bnew.name
88 u'dummy'91 u'dummy'
8992
@@ -167,7 +170,7 @@
167 >>> recipe_bq.processor = i386_family.processors[0]170 >>> recipe_bq.processor = i386_family.processors[0]
168 >>> recipe_bq.virtualized = True171 >>> recipe_bq.virtualized = True
169 >>> transaction.commit()172 >>> transaction.commit()
170173
171 >>> queue_sizes = builderset.getBuildQueueSizes()174 >>> queue_sizes = builderset.getBuildQueueSizes()
172 >>> print queue_sizes['virt']['386']175 >>> print queue_sizes['virt']['386']
173 (1L, datetime.timedelta(0, 64))176 (1L, datetime.timedelta(0, 64))
@@ -185,3 +188,116 @@
185188
186 >>> print queue_sizes['virt']['386']189 >>> print queue_sizes['virt']['386']
187 (2L, datetime.timedelta(0, 128))190 (2L, datetime.timedelta(0, 128))
191
192
193Resuming buildd slaves
194======================
195
196Virtual slaves are resumed using a command specified in the
197configuration profile. Production configuration uses a SSH trigger
198account accessed via a private key available in the builddmaster
199machine (which used ftpmaster configuration profile) as in:
200
201{{{
202ssh ~/.ssh/ppa-reset-key ppa@%(vm_host)s
203}}}
204
205The test configuration uses a fake command that can be performed in
206development machine and allow us to tests the important features used
207in production, as 'vm_host' variable replacement.
208
209 >>> from canonical.config import config
210 >>> config.builddmaster.vm_resume_command
211 'echo %(vm_host)s'
212
213Before performing the command, it checks if the builder is indeed
214virtual and raises CannotResumeHost if it isn't.
215
216 >>> bob = getUtility(IBuilderSet)['bob']
217 >>> bob.resumeSlaveHost()
218 Traceback (most recent call last):
219 ...
220 CannotResumeHost: Builder is not virtualized.
221
222For testing purposes resumeSlaveHost returns the stdout and stderr
223buffer resulted from the command.
224
225 >>> frog = getUtility(IBuilderSet)['frog']
226 >>> out, err = frog.resumeSlaveHost()
227 >>> print out.strip()
228 localhost-host.ppa
229
230If the specified command fails, resumeSlaveHost also raises
231CannotResumeHost exception with the results stdout and stderr.
232
233 # The command must have a vm_host dict key and when executed,
234 # have a returncode that is not 0.
235 >>> vm_resume_command = """
236 ... [builddmaster]
237 ... vm_resume_command: test "%(vm_host)s = 'false'"
238 ... """
239 >>> config.push('vm_resume_command', vm_resume_command)
240 >>> frog.resumeSlaveHost()
241 Traceback (most recent call last):
242 ...
243 CannotResumeHost: Resuming failed:
244 OUT:
245 <BLANKLINE>
246 ERR:
247 <BLANKLINE>
248
249Restore default value for resume command.
250
251 >>> config_data = config.pop('vm_resume_command')
252
253
254Rescuing lost slaves
255====================
256
257Builder.rescueIfLost() checks the build ID reported in the slave status
258against the database. If it isn't building what we think it should be,
259the current build will be aborted and the slave cleaned in preparation
260for a new task. The decision about the slave's correctness is left up
261to IBuildFarmJobBehavior.verifySlaveBuildCookie -- for these examples we
262will use a special behavior that just checks if the cookie reads 'good'.
263
264 >>> import logging
265 >>> from lp.buildmaster.interfaces.builder import CorruptBuildCookie
266 >>> from lp.buildmaster.tests.mock_slaves import (
267 ... BuildingSlave, MockBuilder, OkSlave, WaitingSlave)
268
269 >>> class TestBuildBehavior:
270 ... def verifySlaveBuildCookie(self, cookie):
271 ... if cookie != 'good':
272 ... raise CorruptBuildCookie('Bad value')
273
274 >>> def rescue_slave_if_lost(slave):
275 ... builder = MockBuilder('mock', slave, TestBuildBehavior())
276 ... builder.rescueIfLost(logging.getLogger())
277
278An idle slave is not rescued.
279
280 >>> rescue_slave_if_lost(OkSlave())
281
282Slaves building or having built the correct build are not rescued
283either.
284
285 >>> rescue_slave_if_lost(BuildingSlave(build_id='good'))
286 >>> rescue_slave_if_lost(WaitingSlave(build_id='good'))
287
288But if a slave is building the wrong ID, it is declared lost and
289an abort is attempted. MockSlave prints out a message when it is aborted
290or cleaned.
291
292 >>> rescue_slave_if_lost(BuildingSlave(build_id='bad'))
293 Aborting slave
294 INFO:root:Builder 'mock' rescued from 'bad': 'Bad value'
295
296Slaves having completed an incorrect build are also declared lost,
297but there's no need to abort a completed build. Such builders are
298instead simply cleaned, ready for the next build.
299
300 >>> rescue_slave_if_lost(WaitingSlave(build_id='bad'))
301 Cleaning slave
302 INFO:root:Builder 'mock' rescued from 'bad': 'Bad value'
303
188304
=== modified file 'lib/lp/buildmaster/interfaces/builder.py'
--- lib/lp/buildmaster/interfaces/builder.py 2010-10-18 11:57:09 +0000
+++ lib/lp/buildmaster/interfaces/builder.py 2010-12-07 16:29:13 +0000
@@ -154,6 +154,11 @@
154154
155 currentjob = Attribute("BuildQueue instance for job being processed.")155 currentjob = Attribute("BuildQueue instance for job being processed.")
156156
157 is_available = Bool(
158 title=_("Whether or not a builder is available for building "
159 "new jobs. "),
160 required=False)
161
157 failure_count = Int(162 failure_count = Int(
158 title=_('Failure Count'), required=False, default=0,163 title=_('Failure Count'), required=False, default=0,
159 description=_("Number of consecutive failures for this builder."))164 description=_("Number of consecutive failures for this builder."))
@@ -168,74 +173,32 @@
168 def resetFailureCount():173 def resetFailureCount():
169 """Set the failure_count back to zero."""174 """Set the failure_count back to zero."""
170175
171 def failBuilder(reason):176 def checkSlaveAlive():
172 """Mark builder as failed for a given reason."""177 """Check that the buildd slave is alive.
173178
174 def setSlaveForTesting(proxy):179 This pings the slave over the network via the echo method and looks
175 """Sets the RPC proxy through which to operate the build slave."""180 for the sent message as the reply.
176181
177 def verifySlaveBuildCookie(slave_build_id):182 :raises BuildDaemonError: When the slave is down.
178 """Verify that a slave's build cookie is consistent.
179
180 This should delegate to the current `IBuildFarmJobBehavior`.
181 """
182
183 def transferSlaveFileToLibrarian(file_sha1, filename, private):
184 """Transfer a file from the slave to the librarian.
185
186 :param file_sha1: The file's sha1, which is how the file is addressed
187 in the slave XMLRPC protocol. Specially, the file_sha1 'buildlog'
188 will cause the build log to be retrieved and gzipped.
189 :param filename: The name of the file to be given to the librarian file
190 alias.
191 :param private: True if the build is for a private archive.
192 :return: A librarian file alias.
193 """
194
195 def getBuildQueue():
196 """Return a `BuildQueue` if there's an active job on this builder.
197
198 :return: A BuildQueue, or None.
199 """
200
201 def getCurrentBuildFarmJob():
202 """Return a `BuildFarmJob` for this builder."""
203
204 # All methods below here return Deferred.
205
206 def isAvailable():
207 """Whether or not a builder is available for building new jobs.
208
209 :return: A Deferred that fires with True or False, depending on
210 whether the builder is available or not.
211 """183 """
212184
213 def rescueIfLost(logger=None):185 def rescueIfLost(logger=None):
214 """Reset the slave if its job information doesn't match the DB.186 """Reset the slave if its job information doesn't match the DB.
215187
216 This checks the build ID reported in the slave status against the188 If the builder is BUILDING or WAITING but has a build ID string
217 database. If it isn't building what we think it should be, the current189 that doesn't match what is stored in the DB, we have to dismiss
218 build will be aborted and the slave cleaned in preparation for a new190 its current actions and clean the slave for another job, assuming
219 task. The decision about the slave's correctness is left up to191 the XMLRPC is working properly at this point.
220 `IBuildFarmJobBehavior.verifySlaveBuildCookie`.
221
222 :return: A Deferred that fires when the dialog with the slave is
223 finished. It does not have a return value.
224 """192 """
225193
226 def updateStatus(logger=None):194 def updateStatus(logger=None):
227 """Update the builder's status by probing it.195 """Update the builder's status by probing it."""
228
229 :return: A Deferred that fires when the dialog with the slave is
230 finished. It does not have a return value.
231 """
232196
233 def cleanSlave():197 def cleanSlave():
234 """Clean any temporary files from the slave.198 """Clean any temporary files from the slave."""
235 199
236 :return: A Deferred that fires when the dialog with the slave is200 def failBuilder(reason):
237 finished. It does not have a return value.201 """Mark builder as failed for a given reason."""
238 """
239202
240 def requestAbort():203 def requestAbort():
241 """Ask that a build be aborted.204 """Ask that a build be aborted.
@@ -243,9 +206,6 @@
243 This takes place asynchronously: Actually killing everything running206 This takes place asynchronously: Actually killing everything running
244 can take some time so the slave status should be queried again to207 can take some time so the slave status should be queried again to
245 detect when the abort has taken effect. (Look for status ABORTED).208 detect when the abort has taken effect. (Look for status ABORTED).
246
247 :return: A Deferred that fires when the dialog with the slave is
248 finished. It does not have a return value.
249 """209 """
250210
251 def resumeSlaveHost():211 def resumeSlaveHost():
@@ -257,35 +217,37 @@
257 :raises: CannotResumeHost: if builder is not virtual or if the217 :raises: CannotResumeHost: if builder is not virtual or if the
258 configuration command has failed.218 configuration command has failed.
259219
260 :return: A Deferred that fires when the resume operation finishes,220 :return: command stdout and stderr buffers as a tuple.
261 whose value is a (stdout, stderr) tuple for success, or a Failure
262 whose value is a CannotResumeHost exception.
263 """221 """
264222
223 def setSlaveForTesting(proxy):
224 """Sets the RPC proxy through which to operate the build slave."""
225
265 def slaveStatus():226 def slaveStatus():
266 """Get the slave status for this builder.227 """Get the slave status for this builder.
267228
268 :return: A Deferred which fires when the slave dialog is complete.229 :return: a dict containing at least builder_status, but potentially
269 Its value is a dict containing at least builder_status, but230 other values included by the current build behavior.
270 potentially other values included by the current build
271 behavior.
272 """231 """
273232
274 def slaveStatusSentence():233 def slaveStatusSentence():
275 """Get the slave status sentence for this builder.234 """Get the slave status sentence for this builder.
276235
277 :return: A Deferred which fires when the slave dialog is complete.236 :return: A tuple with the first element containing the slave status,
278 Its value is a tuple with the first element containing the237 build_id-queue-id and then optionally more elements depending on
279 slave status, build_id-queue-id and then optionally more238 the status.
280 elements depending on the status.239 """
240
241 def verifySlaveBuildCookie(slave_build_id):
242 """Verify that a slave's build cookie is consistent.
243
244 This should delegate to the current `IBuildFarmJobBehavior`.
281 """245 """
282246
283 def updateBuild(queueItem):247 def updateBuild(queueItem):
284 """Verify the current build job status.248 """Verify the current build job status.
285249
286 Perform the required actions for each state.250 Perform the required actions for each state.
287
288 :return: A Deferred that fires when the slave dialog is finished.
289 """251 """
290252
291 def startBuild(build_queue_item, logger):253 def startBuild(build_queue_item, logger):
@@ -293,10 +255,21 @@
293255
294 :param build_queue_item: A BuildQueueItem to build.256 :param build_queue_item: A BuildQueueItem to build.
295 :param logger: A logger to be used to log diagnostic information.257 :param logger: A logger to be used to log diagnostic information.
296258 :raises BuildSlaveFailure: When the build slave fails.
297 :return: A Deferred that fires after the dispatch has completed whose259 :raises CannotBuild: When a build cannot be started for some reason
298 value is None, or a Failure that contains an exception260 other than the build slave failing.
299 explaining what went wrong.261 """
262
263 def transferSlaveFileToLibrarian(file_sha1, filename, private):
264 """Transfer a file from the slave to the librarian.
265
266 :param file_sha1: The file's sha1, which is how the file is addressed
267 in the slave XMLRPC protocol. Specially, the file_sha1 'buildlog'
268 will cause the build log to be retrieved and gzipped.
269 :param filename: The name of the file to be given to the librarian file
270 alias.
271 :param private: True if the build is for a private archive.
272 :return: A librarian file alias.
300 """273 """
301274
302 def handleTimeout(logger, error_message):275 def handleTimeout(logger, error_message):
@@ -311,8 +284,6 @@
311284
312 :param logger: The logger object to be used for logging.285 :param logger: The logger object to be used for logging.
313 :param error_message: The error message to be used for logging.286 :param error_message: The error message to be used for logging.
314 :return: A Deferred that fires after the virtual slave was resumed
315 or immediately if it's a non-virtual slave.
316 """287 """
317288
318 def findAndStartJob(buildd_slave=None):289 def findAndStartJob(buildd_slave=None):
@@ -320,9 +291,17 @@
320291
321 :param buildd_slave: An optional buildd slave that this builder should292 :param buildd_slave: An optional buildd slave that this builder should
322 talk to.293 talk to.
323 :return: A Deferred whose value is the `IBuildQueue` instance294 :return: the `IBuildQueue` instance found or None if no job was found.
324 found or None if no job was found.295 """
325 """296
297 def getBuildQueue():
298 """Return a `BuildQueue` if there's an active job on this builder.
299
300 :return: A BuildQueue, or None.
301 """
302
303 def getCurrentBuildFarmJob():
304 """Return a `BuildFarmJob` for this builder."""
326305
327306
328class IBuilderSet(Interface):307class IBuilderSet(Interface):
329308
=== modified file 'lib/lp/buildmaster/manager.py'
--- lib/lp/buildmaster/manager.py 2010-10-20 12:28:46 +0000
+++ lib/lp/buildmaster/manager.py 2010-12-07 16:29:13 +0000
@@ -10,10 +10,13 @@
10 'BuilddManager',10 'BuilddManager',
11 'BUILDD_MANAGER_LOG_NAME',11 'BUILDD_MANAGER_LOG_NAME',
12 'FailDispatchResult',12 'FailDispatchResult',
13 'RecordingSlave',
13 'ResetDispatchResult',14 'ResetDispatchResult',
15 'buildd_success_result_map',
14 ]16 ]
1517
16import logging18import logging
19import os
1720
18import transaction21import transaction
19from twisted.application import service22from twisted.application import service
@@ -21,27 +24,129 @@
21 defer,24 defer,
22 reactor,25 reactor,
23 )26 )
24from twisted.internet.task import LoopingCall27from twisted.protocols.policies import TimeoutMixin
25from twisted.python import log28from twisted.python import log
29from twisted.python.failure import Failure
30from twisted.web import xmlrpc
26from zope.component import getUtility31from zope.component import getUtility
2732
33from canonical.config import config
34from canonical.launchpad.webapp import urlappend
35from lp.services.database import write_transaction
28from lp.buildmaster.enums import BuildStatus36from lp.buildmaster.enums import BuildStatus
29from lp.buildmaster.interfaces.buildfarmjobbehavior import (37from lp.services.twistedsupport.processmonitor import ProcessWithTimeout
30 BuildBehaviorMismatch,
31 )
32from lp.buildmaster.model.builder import Builder
33from lp.buildmaster.interfaces.builder import (
34 BuildDaemonError,
35 BuildSlaveFailure,
36 CannotBuild,
37 CannotFetchFile,
38 CannotResumeHost,
39 )
4038
4139
42BUILDD_MANAGER_LOG_NAME = "slave-scanner"40BUILDD_MANAGER_LOG_NAME = "slave-scanner"
4341
4442
43buildd_success_result_map = {
44 'ensurepresent': True,
45 'build': 'BuilderStatus.BUILDING',
46 }
47
48
49class QueryWithTimeoutProtocol(xmlrpc.QueryProtocol, TimeoutMixin):
50 """XMLRPC query protocol with a configurable timeout.
51
52 XMLRPC queries using this protocol will be unconditionally closed
53 when the timeout is elapsed. The timeout is fetched from the context
54 Launchpad configuration file (`config.builddmaster.socket_timeout`).
55 """
56 def connectionMade(self):
57 xmlrpc.QueryProtocol.connectionMade(self)
58 self.setTimeout(config.builddmaster.socket_timeout)
59
60
61class QueryFactoryWithTimeout(xmlrpc._QueryFactory):
62 """XMLRPC client factory with timeout support."""
63 # Make this factory quiet.
64 noisy = False
65 # Use the protocol with timeout support.
66 protocol = QueryWithTimeoutProtocol
67
68
69class RecordingSlave:
70 """An RPC proxy for buildd slaves that records instructions to the latter.
71
72 The idea here is to merely record the instructions that the slave-scanner
73 issues to the buildd slaves and "replay" them a bit later in asynchronous
74 and parallel fashion.
75
76 By dealing with a number of buildd slaves in parallel we remove *the*
77 major slave-scanner throughput issue while avoiding large-scale changes to
78 its code base.
79 """
80
81 def __init__(self, name, url, vm_host):
82 self.name = name
83 self.url = url
84 self.vm_host = vm_host
85
86 self.resume_requested = False
87 self.calls = []
88
89 def __repr__(self):
90 return '<%s:%s>' % (self.name, self.url)
91
92 def cacheFile(self, logger, libraryfilealias):
93 """Cache the file on the server."""
94 self.ensurepresent(
95 libraryfilealias.content.sha1, libraryfilealias.http_url, '', '')
96
97 def sendFileToSlave(self, *args):
98 """Helper to send a file to this builder."""
99 return self.ensurepresent(*args)
100
101 def ensurepresent(self, *args):
102 """Download files needed for the build."""
103 self.calls.append(('ensurepresent', args))
104 result = buildd_success_result_map.get('ensurepresent')
105 return [result, 'Download']
106
107 def build(self, *args):
108 """Perform the build."""
109 # XXX: This method does not appear to be used.
110 self.calls.append(('build', args))
111 result = buildd_success_result_map.get('build')
112 return [result, args[0]]
113
114 def resume(self):
115 """Record the request to resume the builder..
116
117 Always succeed.
118
119 :return: a (stdout, stderr, subprocess exitcode) triple
120 """
121 self.resume_requested = True
122 return ['', '', 0]
123
124 def resumeSlave(self, clock=None):
125 """Resume the builder in a asynchronous fashion.
126
127 Used the configuration command-line in the same way
128 `BuilddSlave.resume` does.
129
130 Also use the builddmaster configuration 'socket_timeout' as
131 the process timeout.
132
133 :param clock: An optional twisted.internet.task.Clock to override
134 the default clock. For use in tests.
135
136 :return: a Deferred
137 """
138 resume_command = config.builddmaster.vm_resume_command % {
139 'vm_host': self.vm_host}
140 # Twisted API require string and the configuration provides unicode.
141 resume_argv = [str(term) for term in resume_command.split()]
142
143 d = defer.Deferred()
144 p = ProcessWithTimeout(
145 d, config.builddmaster.socket_timeout, clock=clock)
146 p.spawnProcess(resume_argv[0], tuple(resume_argv))
147 return d
148
149
45def get_builder(name):150def get_builder(name):
46 """Helper to return the builder given the slave for this request."""151 """Helper to return the builder given the slave for this request."""
47 # Avoiding circular imports.152 # Avoiding circular imports.
@@ -54,12 +159,9 @@
54 # builder.currentjob hides a complicated query, don't run it twice.159 # builder.currentjob hides a complicated query, don't run it twice.
55 # See bug 623281.160 # See bug 623281.
56 current_job = builder.currentjob161 current_job = builder.currentjob
57 if current_job is None:162 build_job = current_job.specific_job.build
58 job_failure_count = 0
59 else:
60 job_failure_count = current_job.specific_job.build.failure_count
61163
62 if builder.failure_count == job_failure_count and current_job is not None:164 if builder.failure_count == build_job.failure_count:
63 # If the failure count for the builder is the same as the165 # If the failure count for the builder is the same as the
64 # failure count for the job being built, then we cannot166 # failure count for the job being built, then we cannot
65 # tell whether the job or the builder is at fault. The best167 # tell whether the job or the builder is at fault. The best
@@ -68,28 +170,17 @@
68 current_job.reset()170 current_job.reset()
69 return171 return
70172
71 if builder.failure_count > job_failure_count:173 if builder.failure_count > build_job.failure_count:
72 # The builder has failed more than the jobs it's been174 # The builder has failed more than the jobs it's been
73 # running.175 # running, so let's disable it and re-schedule the build.
74176 builder.failBuilder(fail_notes)
75 # Re-schedule the build if there is one.177 current_job.reset()
76 if current_job is not None:
77 current_job.reset()
78
79 # We are a little more tolerant with failing builders than
80 # failing jobs because sometimes they get unresponsive due to
81 # human error, flaky networks etc. We expect the builder to get
82 # better, whereas jobs are very unlikely to get better.
83 if builder.failure_count >= Builder.FAILURE_THRESHOLD:
84 # It's also gone over the threshold so let's disable it.
85 builder.failBuilder(fail_notes)
86 else:178 else:
87 # The job is the culprit! Override its status to 'failed'179 # The job is the culprit! Override its status to 'failed'
88 # to make sure it won't get automatically dispatched again,180 # to make sure it won't get automatically dispatched again,
89 # and remove the buildqueue request. The failure should181 # and remove the buildqueue request. The failure should
90 # have already caused any relevant slave data to be stored182 # have already caused any relevant slave data to be stored
91 # on the build record so don't worry about that here.183 # on the build record so don't worry about that here.
92 build_job = current_job.specific_job.build
93 build_job.status = BuildStatus.FAILEDTOBUILD184 build_job.status = BuildStatus.FAILEDTOBUILD
94 builder.currentjob.destroySelf()185 builder.currentjob.destroySelf()
95186
@@ -99,108 +190,133 @@
99 # next buildd scan.190 # next buildd scan.
100191
101192
193class BaseDispatchResult:
194 """Base class for *DispatchResult variations.
195
196 It will be extended to represent dispatching results and allow
197 homogeneous processing.
198 """
199
200 def __init__(self, slave, info=None):
201 self.slave = slave
202 self.info = info
203
204 def _cleanJob(self, job):
205 """Clean up in case of builder reset or dispatch failure."""
206 if job is not None:
207 job.reset()
208
209 def assessFailureCounts(self):
210 """View builder/job failure_count and work out which needs to die.
211
212 :return: True if we disabled something, False if we did not.
213 """
214 builder = get_builder(self.slave.name)
215 assessFailureCounts(builder, self.info)
216
217 def ___call__(self):
218 raise NotImplementedError(
219 "Call sites must define an evaluation method.")
220
221
222class FailDispatchResult(BaseDispatchResult):
223 """Represents a communication failure while dispatching a build job..
224
225 When evaluated this object mark the corresponding `IBuilder` as
226 'NOK' with the given text as 'failnotes'. It also cleans up the running
227 job (`IBuildQueue`).
228 """
229
230 def __repr__(self):
231 return '%r failure (%s)' % (self.slave, self.info)
232
233 @write_transaction
234 def __call__(self):
235 self.assessFailureCounts()
236
237
238class ResetDispatchResult(BaseDispatchResult):
239 """Represents a failure to reset a builder.
240
241 When evaluated this object simply cleans up the running job
242 (`IBuildQueue`) and marks the builder down.
243 """
244
245 def __repr__(self):
246 return '%r reset failure' % self.slave
247
248 @write_transaction
249 def __call__(self):
250 builder = get_builder(self.slave.name)
251 # Builders that fail to reset should be disabled as per bug
252 # 563353.
253 # XXX Julian bug=586362
254 # This is disabled until this code is not also used for dispatch
255 # failures where we *don't* want to disable the builder.
256 # builder.failBuilder(self.info)
257 self._cleanJob(builder.currentjob)
258
259
102class SlaveScanner:260class SlaveScanner:
103 """A manager for a single builder."""261 """A manager for a single builder."""
104262
105 # The interval between each poll cycle, in seconds. We'd ideally
106 # like this to be lower but 5 seems a reasonable compromise between
107 # responsivity and load on the database server, since in each cycle
108 # we can run quite a few queries.
109 SCAN_INTERVAL = 5263 SCAN_INTERVAL = 5
110264
265 # These are for the benefit of tests; see `TestingSlaveScanner`.
266 # It pokes fake versions in here so that it can verify methods were
267 # called. The tests should really be using FakeMethod() though.
268 reset_result = ResetDispatchResult
269 fail_result = FailDispatchResult
270
111 def __init__(self, builder_name, logger):271 def __init__(self, builder_name, logger):
112 self.builder_name = builder_name272 self.builder_name = builder_name
113 self.logger = logger273 self.logger = logger
274 self._deferred_list = []
275
276 def scheduleNextScanCycle(self):
277 """Schedule another scan of the builder some time in the future."""
278 self._deferred_list = []
279 # XXX: Change this to use LoopingCall.
280 reactor.callLater(self.SCAN_INTERVAL, self.startCycle)
114281
115 def startCycle(self):282 def startCycle(self):
116 """Scan the builder and dispatch to it or deal with failures."""283 """Scan the builder and dispatch to it or deal with failures."""
117 self.loop = LoopingCall(self.singleCycle)
118 self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL)
119 return self.stopping_deferred
120
121 def stopCycle(self):
122 """Terminate the LoopingCall."""
123 self.loop.stop()
124
125 def singleCycle(self):
126 self.logger.debug("Scanning builder: %s" % self.builder_name)284 self.logger.debug("Scanning builder: %s" % self.builder_name)
127 d = self.scan()285
128286 try:
129 d.addErrback(self._scanFailed)287 slave = self.scan()
130 return d288 if slave is None:
131289 self.scheduleNextScanCycle()
132 def _scanFailed(self, failure):290 else:
133 """Deal with failures encountered during the scan cycle.291 # XXX: Ought to return Deferred.
134292 self.resumeAndDispatch(slave)
135 1. Print the error in the log293 except:
136 2. Increment and assess failure counts on the builder and job.294 error = Failure()
137 """
138 # Make sure that pending database updates are removed as it
139 # could leave the database in an inconsistent state (e.g. The
140 # job says it's running but the buildqueue has no builder set).
141 transaction.abort()
142
143 # If we don't recognise the exception include a stack trace with
144 # the error.
145 error_message = failure.getErrorMessage()
146 if failure.check(
147 BuildSlaveFailure, CannotBuild, BuildBehaviorMismatch,
148 CannotResumeHost, BuildDaemonError, CannotFetchFile):
149 self.logger.info("Scanning failed with: %s" % error_message)
150 else:
151 self.logger.info("Scanning failed with: %s\n%s" %295 self.logger.info("Scanning failed with: %s\n%s" %
152 (failure.getErrorMessage(), failure.getTraceback()))296 (error.getErrorMessage(), error.getTraceback()))
153297
154 # Decide if we need to terminate the job or fail the
155 # builder.
156 try:
157 builder = get_builder(self.builder_name)298 builder = get_builder(self.builder_name)
158 builder.gotFailure()299
159 if builder.currentjob is not None:300 # Decide if we need to terminate the job or fail the
160 build_farm_job = builder.getCurrentBuildFarmJob()301 # builder.
161 build_farm_job.gotFailure()302 self._incrementFailureCounts(builder)
162 self.logger.info(303 self.logger.info(
163 "builder %s failure count: %s, "304 "builder failure count: %s, job failure count: %s" % (
164 "job '%s' failure count: %s" % (305 builder.failure_count,
165 self.builder_name,306 builder.getCurrentBuildFarmJob().failure_count))
166 builder.failure_count,307 assessFailureCounts(builder, error.getErrorMessage())
167 build_farm_job.title,
168 build_farm_job.failure_count))
169 else:
170 self.logger.info(
171 "Builder %s failed a probe, count: %s" % (
172 self.builder_name, builder.failure_count))
173 assessFailureCounts(builder, failure.getErrorMessage())
174 transaction.commit()308 transaction.commit()
175 except:309
176 # Catastrophic code failure! Not much we can do.310 self.scheduleNextScanCycle()
177 self.logger.error(311
178 "Miserable failure when trying to examine failure counts:\n",312 @write_transaction
179 exc_info=True)
180 transaction.abort()
181
182 def scan(self):313 def scan(self):
183 """Probe the builder and update/dispatch/collect as appropriate.314 """Probe the builder and update/dispatch/collect as appropriate.
184315
185 There are several steps to scanning:316 The whole method is wrapped in a transaction, but we do partial
186317 commits to avoid holding locks on tables.
187 1. If the builder is marked as "ok" then probe it to see what state318
188 it's in. This is where lost jobs are rescued if we think the319 :return: A `RecordingSlave` if we dispatched a job to it, or None.
189 builder is doing something that it later tells us it's not,
190 and also where the multi-phase abort procedure happens.
191 See IBuilder.rescueIfLost, which is called by
192 IBuilder.updateStatus().
193 2. If the builder is still happy, we ask it if it has an active build
194 and then either update the build in Launchpad or collect the
195 completed build. (builder.updateBuild)
196 3. If the builder is not happy or it was marked as unavailable
197 mid-build, we need to reset the job that we thought it had, so
198 that the job is dispatched elsewhere.
199 4. If the builder is idle and we have another build ready, dispatch
200 it.
201
202 :return: A Deferred that fires when the scan is complete, whose
203 value is A `BuilderSlave` if we dispatched a job to it, or None.
204 """320 """
205 # We need to re-fetch the builder object on each cycle as the321 # We need to re-fetch the builder object on each cycle as the
206 # Storm store is invalidated over transaction boundaries.322 # Storm store is invalidated over transaction boundaries.
@@ -208,72 +324,240 @@
208 self.builder = get_builder(self.builder_name)324 self.builder = get_builder(self.builder_name)
209325
210 if self.builder.builderok:326 if self.builder.builderok:
211 d = self.builder.updateStatus(self.logger)327 self.builder.updateStatus(self.logger)
328 transaction.commit()
329
330 # See if we think there's an active build on the builder.
331 buildqueue = self.builder.getBuildQueue()
332
333 # XXX Julian 2010-07-29 bug=611258
334 # We're not using the RecordingSlave until dispatching, which
335 # means that this part blocks until we've received a response
336 # from the builder. updateBuild() needs to be made
337 # asyncronous.
338
339 # Scan the slave and get the logtail, or collect the build if
340 # it's ready. Yes, "updateBuild" is a bad name.
341 if buildqueue is not None:
342 self.builder.updateBuild(buildqueue)
343 transaction.commit()
344
345 # If the builder is in manual mode, don't dispatch anything.
346 if self.builder.manual:
347 self.logger.debug(
348 '%s is in manual mode, not dispatching.' % self.builder.name)
349 return None
350
351 # If the builder is marked unavailable, don't dispatch anything.
352 # Additionaly, because builders can be removed from the pool at
353 # any time, we need to see if we think there was a build running
354 # on it before it was marked unavailable. In this case we reset
355 # the build thusly forcing it to get re-dispatched to another
356 # builder.
357 if not self.builder.is_available:
358 job = self.builder.currentjob
359 if job is not None and not self.builder.builderok:
360 self.logger.info(
361 "%s was made unavailable, resetting attached "
362 "job" % self.builder.name)
363 job.reset()
364 transaction.commit()
365 return None
366
367 # See if there is a job we can dispatch to the builder slave.
368
369 # XXX: Rather than use the slave actually associated with the builder
370 # (which, incidentally, shouldn't be a property anyway), we make a new
371 # RecordingSlave so we can get access to its asynchronous
372 # "resumeSlave" method. Blech.
373 slave = RecordingSlave(
374 self.builder.name, self.builder.url, self.builder.vm_host)
375 # XXX: Passing buildd_slave=slave overwrites the 'slave' property of
376 # self.builder. Not sure why this is needed yet.
377 self.builder.findAndStartJob(buildd_slave=slave)
378 if self.builder.currentjob is not None:
379 # After a successful dispatch we can reset the
380 # failure_count.
381 self.builder.resetFailureCount()
382 transaction.commit()
383 return slave
384
385 return None
386
387 def resumeAndDispatch(self, slave):
388 """Chain the resume and dispatching Deferreds."""
389 # XXX: resumeAndDispatch makes Deferreds without returning them.
390 if slave.resume_requested:
391 # The slave needs to be reset before we can dispatch to
392 # it (e.g. a virtual slave)
393
394 # XXX: Two problems here. The first is that 'resumeSlave' only
395 # exists on RecordingSlave (BuilderSlave calls it 'resume').
396 d = slave.resumeSlave()
397 d.addBoth(self.checkResume, slave)
212 else:398 else:
399 # No resume required, build dispatching can commence.
213 d = defer.succeed(None)400 d = defer.succeed(None)
214401
215 def status_updated(ignored):402 # Dispatch the build to the slave asynchronously.
216 # Commit the changes done while possibly rescuing jobs, to403 d.addCallback(self.initiateDispatch, slave)
217 # avoid holding table locks.404 # Store this deferred so we can wait for it along with all
218 transaction.commit()405 # the others that will be generated by RecordingSlave during
219406 # the dispatch process, and chain a callback after they've
220 # See if we think there's an active build on the builder.407 # all fired.
221 buildqueue = self.builder.getBuildQueue()408 self._deferred_list.append(d)
222409
223 # Scan the slave and get the logtail, or collect the build if410 def initiateDispatch(self, resume_result, slave):
224 # it's ready. Yes, "updateBuild" is a bad name.411 """Start dispatching a build to a slave.
225 if buildqueue is not None:412
226 return self.builder.updateBuild(buildqueue)413 If the previous task in chain (slave resuming) has failed it will
227414 receive a `ResetBuilderRequest` instance as 'resume_result' and
228 def build_updated(ignored):415 will immediately return that so the subsequent callback can collect
229 # Commit changes done while updating the build, to avoid416 it.
230 # holding table locks.417
231 transaction.commit()418 If the slave resuming succeeded, it starts the XMLRPC dialogue. The
232419 dialogue may consist of many calls to the slave before the build
233 # If the builder is in manual mode, don't dispatch anything.420 starts. Each call is done via a Deferred event, where slave calls
234 if self.builder.manual:421 are sent in callSlave(), and checked in checkDispatch() which will
235 self.logger.debug(422 keep firing events via callSlave() until all the events are done or
236 '%s is in manual mode, not dispatching.' %423 an error occurs.
237 self.builder.name)424 """
238 return425 if resume_result is not None:
239426 self.slaveConversationEnded()
240 # If the builder is marked unavailable, don't dispatch anything.427 return resume_result
241 # Additionaly, because builders can be removed from the pool at428
242 # any time, we need to see if we think there was a build running429 self.logger.info('Dispatching: %s' % slave)
243 # on it before it was marked unavailable. In this case we reset430 self.callSlave(slave)
244 # the build thusly forcing it to get re-dispatched to another431
245 # builder.432 def _getProxyForSlave(self, slave):
246433 """Return a twisted.web.xmlrpc.Proxy for the buildd slave.
247 return self.builder.isAvailable().addCallback(got_available)434
248435 Uses a protocol with timeout support, See QueryFactoryWithTimeout.
249 def got_available(available):436 """
250 if not available:437 proxy = xmlrpc.Proxy(str(urlappend(slave.url, 'rpc')))
251 job = self.builder.currentjob438 proxy.queryFactory = QueryFactoryWithTimeout
252 if job is not None and not self.builder.builderok:439 return proxy
253 self.logger.info(440
254 "%s was made unavailable, resetting attached "441 def callSlave(self, slave):
255 "job" % self.builder.name)442 """Dispatch the next XMLRPC for the given slave."""
256 job.reset()443 if len(slave.calls) == 0:
257 transaction.commit()444 # That's the end of the dialogue with the slave.
258 return445 self.slaveConversationEnded()
259446 return
260 # See if there is a job we can dispatch to the builder slave.447
261448 # Get an XMLRPC proxy for the buildd slave.
262 d = self.builder.findAndStartJob()449 proxy = self._getProxyForSlave(slave)
263 def job_started(candidate):450 method, args = slave.calls.pop(0)
264 if self.builder.currentjob is not None:451 d = proxy.callRemote(method, *args)
265 # After a successful dispatch we can reset the452 d.addBoth(self.checkDispatch, method, slave)
266 # failure_count.453 self._deferred_list.append(d)
267 self.builder.resetFailureCount()454 self.logger.debug('%s -> %s(%s)' % (slave, method, args))
268 transaction.commit()455
269 return self.builder.slave456 def slaveConversationEnded(self):
270 else:457 """After all the Deferreds are set up, chain a callback on them."""
458 dl = defer.DeferredList(self._deferred_list, consumeErrors=True)
459 dl.addBoth(self.evaluateDispatchResult)
460 return dl
461
462 def evaluateDispatchResult(self, deferred_list_results):
463 """Process the DispatchResult for this dispatch chain.
464
465 After waiting for the Deferred chain to finish, we'll have a
466 DispatchResult to evaluate, which deals with the result of
467 dispatching.
468 """
469 # The `deferred_list_results` is what we get when waiting on a
470 # DeferredList. It's a list of tuples of (status, result) where
471 # result is what the last callback in that chain returned.
472
473 # If the result is an instance of BaseDispatchResult we need to
474 # evaluate it, as there's further action required at the end of
475 # the dispatch chain. None, resulting from successful chains,
476 # are discarded.
477
478 dispatch_results = [
479 result for status, result in deferred_list_results
480 if isinstance(result, BaseDispatchResult)]
481
482 for result in dispatch_results:
483 self.logger.info("%r" % result)
484 result()
485
486 # At this point, we're done dispatching, so we can schedule the
487 # next scan cycle.
488 self.scheduleNextScanCycle()
489
490 # For the test suite so that it can chain callback results.
491 return deferred_list_results
492
493 def checkResume(self, response, slave):
494 """Check the result of resuming a slave.
495
496 If there's a problem resuming, we return a ResetDispatchResult which
497 will get evaluated at the end of the scan, or None if the resume
498 was OK.
499
500 :param response: the tuple that's constructed in
501 ProcessWithTimeout.processEnded(), or a Failure that
502 contains the tuple.
503 :param slave: the slave object we're talking to
504 """
505 if isinstance(response, Failure):
506 out, err, code = response.value
507 else:
508 out, err, code = response
509 if code == os.EX_OK:
510 return None
511
512 error_text = '%s\n%s' % (out, err)
513 self.logger.error('%s resume failure: %s' % (slave, error_text))
514 return self.reset_result(slave, error_text)
515
516 def _incrementFailureCounts(self, builder):
517 builder.gotFailure()
518 builder.getCurrentBuildFarmJob().gotFailure()
519
520 def checkDispatch(self, response, method, slave):
521 """Verify the results of a slave xmlrpc call.
522
523 If it failed and it compromises the slave then return a corresponding
524 `FailDispatchResult`, if it was a communication failure, simply
525 reset the slave by returning a `ResetDispatchResult`.
526 """
527 from lp.buildmaster.interfaces.builder import IBuilderSet
528 builder = getUtility(IBuilderSet)[slave.name]
529
530 # XXX these DispatchResult classes are badly named and do the
531 # same thing. We need to fix that.
532 self.logger.debug(
533 '%s response for "%s": %s' % (slave, method, response))
534
535 if isinstance(response, Failure):
536 self.logger.warn(
537 '%s communication failed (%s)' %
538 (slave, response.getErrorMessage()))
539 self.slaveConversationEnded()
540 self._incrementFailureCounts(builder)
541 return self.fail_result(slave)
542
543 if isinstance(response, list) and len(response) == 2:
544 if method in buildd_success_result_map:
545 expected_status = buildd_success_result_map.get(method)
546 status, info = response
547 if status == expected_status:
548 self.callSlave(slave)
271 return None549 return None
272 return d.addCallback(job_started)550 else:
273551 info = 'Unknown slave method: %s' % method
274 d.addCallback(status_updated)552 else:
275 d.addCallback(build_updated)553 info = 'Unexpected response: %s' % repr(response)
276 return d554
555 self.logger.error(
556 '%s failed to dispatch (%s)' % (slave, info))
557
558 self.slaveConversationEnded()
559 self._incrementFailureCounts(builder)
560 return self.fail_result(slave, info)
277561
278562
279class NewBuildersScanner:563class NewBuildersScanner:
@@ -294,21 +578,15 @@
294 self.current_builders = [578 self.current_builders = [
295 builder.name for builder in getUtility(IBuilderSet)]579 builder.name for builder in getUtility(IBuilderSet)]
296580
297 def stop(self):
298 """Terminate the LoopingCall."""
299 self.loop.stop()
300
301 def scheduleScan(self):581 def scheduleScan(self):
302 """Schedule a callback SCAN_INTERVAL seconds later."""582 """Schedule a callback SCAN_INTERVAL seconds later."""
303 self.loop = LoopingCall(self.scan)583 return self._clock.callLater(self.SCAN_INTERVAL, self.scan)
304 self.loop.clock = self._clock
305 self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL)
306 return self.stopping_deferred
307584
308 def scan(self):585 def scan(self):
309 """If a new builder appears, create a SlaveScanner for it."""586 """If a new builder appears, create a SlaveScanner for it."""
310 new_builders = self.checkForNewBuilders()587 new_builders = self.checkForNewBuilders()
311 self.manager.addScanForBuilders(new_builders)588 self.manager.addScanForBuilders(new_builders)
589 self.scheduleScan()
312590
313 def checkForNewBuilders(self):591 def checkForNewBuilders(self):
314 """See if any new builders were added."""592 """See if any new builders were added."""
@@ -331,7 +609,10 @@
331 manager=self, clock=clock)609 manager=self, clock=clock)
332610
333 def _setupLogger(self):611 def _setupLogger(self):
334 """Set up a 'slave-scanner' logger that redirects to twisted.612 """Setup a 'slave-scanner' logger that redirects to twisted.
613
614 It is going to be used locally and within the thread running
615 the scan() method.
335616
336 Make it less verbose to avoid messing too much with the old code.617 Make it less verbose to avoid messing too much with the old code.
337 """618 """
@@ -362,29 +643,12 @@
362 # Events will now fire in the SlaveScanner objects to scan each643 # Events will now fire in the SlaveScanner objects to scan each
363 # builder.644 # builder.
364645
365 def stopService(self):
366 """Callback for when we need to shut down."""
367 # XXX: lacks unit tests
368 # All the SlaveScanner objects need to be halted gracefully.
369 deferreds = [slave.stopping_deferred for slave in self.builder_slaves]
370 deferreds.append(self.new_builders_scanner.stopping_deferred)
371
372 self.new_builders_scanner.stop()
373 for slave in self.builder_slaves:
374 slave.stopCycle()
375
376 # The 'stopping_deferred's are called back when the loops are
377 # stopped, so we can wait on them all at once here before
378 # exiting.
379 d = defer.DeferredList(deferreds, consumeErrors=True)
380 return d
381
382 def addScanForBuilders(self, builders):646 def addScanForBuilders(self, builders):
383 """Set up scanner objects for the builders specified."""647 """Set up scanner objects for the builders specified."""
384 for builder in builders:648 for builder in builders:
385 slave_scanner = SlaveScanner(builder, self.logger)649 slave_scanner = SlaveScanner(builder, self.logger)
386 self.builder_slaves.append(slave_scanner)650 self.builder_slaves.append(slave_scanner)
387 slave_scanner.startCycle()651 slave_scanner.scheduleNextScanCycle()
388652
389 # Return the slave list for the benefit of tests.653 # Return the slave list for the benefit of tests.
390 return self.builder_slaves654 return self.builder_slaves
391655
=== modified file 'lib/lp/buildmaster/model/builder.py'
--- lib/lp/buildmaster/model/builder.py 2010-10-20 11:54:27 +0000
+++ lib/lp/buildmaster/model/builder.py 2010-12-07 16:29:13 +0000
@@ -13,11 +13,12 @@
13 ]13 ]
1414
15import gzip15import gzip
16import httplib
16import logging17import logging
17import os18import os
18import socket19import socket
20import subprocess
19import tempfile21import tempfile
20import transaction
21import urllib222import urllib2
22import xmlrpclib23import xmlrpclib
2324
@@ -33,13 +34,6 @@
33 Count,34 Count,
34 Sum,35 Sum,
35 )36 )
36
37from twisted.internet import (
38 defer,
39 reactor as default_reactor,
40 )
41from twisted.web import xmlrpc
42
43from zope.component import getUtility37from zope.component import getUtility
44from zope.interface import implements38from zope.interface import implements
4539
@@ -64,6 +58,7 @@
64from lp.buildmaster.interfaces.builder import (58from lp.buildmaster.interfaces.builder import (
65 BuildDaemonError,59 BuildDaemonError,
66 BuildSlaveFailure,60 BuildSlaveFailure,
61 CannotBuild,
67 CannotFetchFile,62 CannotFetchFile,
68 CannotResumeHost,63 CannotResumeHost,
69 CorruptBuildCookie,64 CorruptBuildCookie,
@@ -71,6 +66,9 @@
71 IBuilderSet,66 IBuilderSet,
72 )67 )
73from lp.buildmaster.interfaces.buildfarmjob import IBuildFarmJobSet68from lp.buildmaster.interfaces.buildfarmjob import IBuildFarmJobSet
69from lp.buildmaster.interfaces.buildfarmjobbehavior import (
70 BuildBehaviorMismatch,
71 )
74from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet72from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet
75from lp.buildmaster.model.buildfarmjobbehavior import IdleBuildBehavior73from lp.buildmaster.model.buildfarmjobbehavior import IdleBuildBehavior
76from lp.buildmaster.model.buildqueue import (74from lp.buildmaster.model.buildqueue import (
@@ -80,9 +78,9 @@
80from lp.registry.interfaces.person import validate_public_person78from lp.registry.interfaces.person import validate_public_person
81from lp.services.job.interfaces.job import JobStatus79from lp.services.job.interfaces.job import JobStatus
82from lp.services.job.model.job import Job80from lp.services.job.model.job import Job
81from lp.services.osutils import until_no_eintr
83from lp.services.propertycache import cachedproperty82from lp.services.propertycache import cachedproperty
84from lp.services.twistedsupport.processmonitor import ProcessWithTimeout83from lp.services.twistedsupport.xmlrpc import BlockingProxy
85from lp.services.twistedsupport import cancel_on_timeout
86# XXX Michael Nelson 2010-01-13 bug=49133084# XXX Michael Nelson 2010-01-13 bug=491330
87# These dependencies on soyuz will be removed when getBuildRecords()85# These dependencies on soyuz will be removed when getBuildRecords()
88# is moved.86# is moved.
@@ -94,9 +92,25 @@
94from lp.soyuz.model.processor import Processor92from lp.soyuz.model.processor import Processor
9593
9694
97class QuietQueryFactory(xmlrpc._QueryFactory):95class TimeoutHTTPConnection(httplib.HTTPConnection):
98 """XMLRPC client factory that doesn't splatter the log with junk."""96
99 noisy = False97 def connect(self):
98 """Override the standard connect() methods to set a timeout"""
99 ret = httplib.HTTPConnection.connect(self)
100 self.sock.settimeout(config.builddmaster.socket_timeout)
101 return ret
102
103
104class TimeoutHTTP(httplib.HTTP):
105 _connection_class = TimeoutHTTPConnection
106
107
108class TimeoutTransport(xmlrpclib.Transport):
109 """XMLRPC Transport to setup a socket with defined timeout"""
110
111 def make_connection(self, host):
112 host, extra_headers, x509 = self.get_host_info(host)
113 return TimeoutHTTP(host)
100114
101115
102class BuilderSlave(object):116class BuilderSlave(object):
@@ -111,7 +125,24 @@
111 # many false positives in your test run and will most likely break125 # many false positives in your test run and will most likely break
112 # production.126 # production.
113127
114 def __init__(self, proxy, builder_url, vm_host, reactor=None):128 # XXX: This (BuilderSlave) should use composition, rather than
129 # inheritance.
130
131 # XXX: Have a documented interface for the XML-RPC server:
132 # - what methods
133 # - what return values expected
134 # - what faults
135 # (see XMLRPCBuildDSlave in lib/canonical/buildd/slave.py).
136
137 # XXX: Arguably, this interface should be asynchronous
138 # (i.e. Deferred-returning). This would mean that Builder (see below)
139 # would have to expect Deferreds.
140
141 # XXX: Once we have a client object with a defined, tested interface, we
142 # should make a test double that doesn't do any XML-RPC and can be used to
143 # make testing easier & tests faster.
144
145 def __init__(self, proxy, builder_url, vm_host):
115 """Initialize a BuilderSlave.146 """Initialize a BuilderSlave.
116147
117 :param proxy: An XML-RPC proxy, implementing 'callRemote'. It must148 :param proxy: An XML-RPC proxy, implementing 'callRemote'. It must
@@ -124,87 +155,63 @@
124 self._file_cache_url = urlappend(builder_url, 'filecache')155 self._file_cache_url = urlappend(builder_url, 'filecache')
125 self._server = proxy156 self._server = proxy
126157
127 if reactor is None:
128 self.reactor = default_reactor
129 else:
130 self.reactor = reactor
131
132 @classmethod158 @classmethod
133 def makeBuilderSlave(cls, builder_url, vm_host, reactor=None, proxy=None):159 def makeBlockingSlave(cls, builder_url, vm_host):
134 """Create and return a `BuilderSlave`.160 rpc_url = urlappend(builder_url, 'rpc')
135161 server_proxy = xmlrpclib.ServerProxy(
136 :param builder_url: The URL of the slave buildd machine,162 rpc_url, transport=TimeoutTransport(), allow_none=True)
137 e.g. http://localhost:8221163 return cls(BlockingProxy(server_proxy), builder_url, vm_host)
138 :param vm_host: If the slave is virtual, specify its host machine here.
139 :param reactor: Used by tests to override the Twisted reactor.
140 :param proxy: Used By tests to override the xmlrpc.Proxy.
141 """
142 rpc_url = urlappend(builder_url.encode('utf-8'), 'rpc')
143 if proxy is None:
144 server_proxy = xmlrpc.Proxy(rpc_url, allowNone=True)
145 server_proxy.queryFactory = QuietQueryFactory
146 else:
147 server_proxy = proxy
148 return cls(server_proxy, builder_url, vm_host, reactor)
149
150 def _with_timeout(self, d):
151 TIMEOUT = config.builddmaster.socket_timeout
152 return cancel_on_timeout(d, TIMEOUT, self.reactor)
153164
154 def abort(self):165 def abort(self):
155 """Abort the current build."""166 """Abort the current build."""
156 return self._with_timeout(self._server.callRemote('abort'))167 return self._server.callRemote('abort')
157168
158 def clean(self):169 def clean(self):
159 """Clean up the waiting files and reset the slave's internal state."""170 """Clean up the waiting files and reset the slave's internal state."""
160 return self._with_timeout(self._server.callRemote('clean'))171 return self._server.callRemote('clean')
161172
162 def echo(self, *args):173 def echo(self, *args):
163 """Echo the arguments back."""174 """Echo the arguments back."""
164 return self._with_timeout(self._server.callRemote('echo', *args))175 return self._server.callRemote('echo', *args)
165176
166 def info(self):177 def info(self):
167 """Return the protocol version and the builder methods supported."""178 """Return the protocol version and the builder methods supported."""
168 return self._with_timeout(self._server.callRemote('info'))179 return self._server.callRemote('info')
169180
170 def status(self):181 def status(self):
171 """Return the status of the build daemon."""182 """Return the status of the build daemon."""
172 return self._with_timeout(self._server.callRemote('status'))183 return self._server.callRemote('status')
173184
174 def ensurepresent(self, sha1sum, url, username, password):185 def ensurepresent(self, sha1sum, url, username, password):
175 # XXX: Nothing external calls this. Make it private.
176 """Attempt to ensure the given file is present."""186 """Attempt to ensure the given file is present."""
177 return self._with_timeout(self._server.callRemote(187 return self._server.callRemote(
178 'ensurepresent', sha1sum, url, username, password))188 'ensurepresent', sha1sum, url, username, password)
179189
180 def getFile(self, sha_sum):190 def getFile(self, sha_sum):
181 """Construct a file-like object to return the named file."""191 """Construct a file-like object to return the named file."""
182 # XXX 2010-10-18 bug=662631
183 # Change this to do non-blocking IO.
184 file_url = urlappend(self._file_cache_url, sha_sum)192 file_url = urlappend(self._file_cache_url, sha_sum)
185 return urllib2.urlopen(file_url)193 return urllib2.urlopen(file_url)
186194
187 def resume(self, clock=None):195 def resume(self):
188 """Resume the builder in an asynchronous fashion.196 """Resume a virtual builder.
189197
190 We use the builddmaster configuration 'socket_timeout' as198 It uses the configuration command-line (replacing 'vm_host') and
191 the process timeout.199 return its output.
192200
193 :param clock: An optional twisted.internet.task.Clock to override201 :return: a (stdout, stderr, subprocess exitcode) triple
194 the default clock. For use in tests.
195
196 :return: a Deferred that returns a
197 (stdout, stderr, subprocess exitcode) triple
198 """202 """
203 # XXX: This executes the vm_resume_command
204 # synchronously. RecordingSlave does so asynchronously. Since we
205 # always want to do this asynchronously, there's no need for the
206 # duplication.
199 resume_command = config.builddmaster.vm_resume_command % {207 resume_command = config.builddmaster.vm_resume_command % {
200 'vm_host': self._vm_host}208 'vm_host': self._vm_host}
201 # Twisted API requires string but the configuration provides unicode.209 resume_argv = resume_command.split()
202 resume_argv = [term.encode('utf-8') for term in resume_command.split()]210 resume_process = subprocess.Popen(
203 d = defer.Deferred()211 resume_argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
204 p = ProcessWithTimeout(212 stdout, stderr = resume_process.communicate()
205 d, config.builddmaster.socket_timeout, clock=clock)213
206 p.spawnProcess(resume_argv[0], tuple(resume_argv))214 return (stdout, stderr, resume_process.returncode)
207 return d
208215
209 def cacheFile(self, logger, libraryfilealias):216 def cacheFile(self, logger, libraryfilealias):
210 """Make sure that the file at 'libraryfilealias' is on the slave.217 """Make sure that the file at 'libraryfilealias' is on the slave.
@@ -217,15 +224,13 @@
217 "Asking builder on %s to ensure it has file %s (%s, %s)" % (224 "Asking builder on %s to ensure it has file %s (%s, %s)" % (
218 self._file_cache_url, libraryfilealias.filename, url,225 self._file_cache_url, libraryfilealias.filename, url,
219 libraryfilealias.content.sha1))226 libraryfilealias.content.sha1))
220 return self.sendFileToSlave(libraryfilealias.content.sha1, url)227 self.sendFileToSlave(libraryfilealias.content.sha1, url)
221228
222 def sendFileToSlave(self, sha1, url, username="", password=""):229 def sendFileToSlave(self, sha1, url, username="", password=""):
223 """Helper to send the file at 'url' with 'sha1' to this builder."""230 """Helper to send the file at 'url' with 'sha1' to this builder."""
224 d = self.ensurepresent(sha1, url, username, password)231 present, info = self.ensurepresent(sha1, url, username, password)
225 def check_present((present, info)):232 if not present:
226 if not present:233 raise CannotFetchFile(url, info)
227 raise CannotFetchFile(url, info)
228 return d.addCallback(check_present)
229234
230 def build(self, buildid, builder_type, chroot_sha1, filemap, args):235 def build(self, buildid, builder_type, chroot_sha1, filemap, args):
231 """Build a thing on this build slave.236 """Build a thing on this build slave.
@@ -238,18 +243,19 @@
238 :param args: A dictionary of extra arguments. The contents depend on243 :param args: A dictionary of extra arguments. The contents depend on
239 the build job type.244 the build job type.
240 """245 """
241 d = self._with_timeout(self._server.callRemote(246 try:
242 'build', buildid, builder_type, chroot_sha1, filemap, args))247 return self._server.callRemote(
243 def got_fault(failure):248 'build', buildid, builder_type, chroot_sha1, filemap, args)
244 failure.trap(xmlrpclib.Fault)249 except xmlrpclib.Fault, info:
245 raise BuildSlaveFailure(failure.value)250 raise BuildSlaveFailure(info)
246 return d.addErrback(got_fault)
247251
248252
249# This is a separate function since MockBuilder needs to use it too.253# This is a separate function since MockBuilder needs to use it too.
250# Do not use it -- (Mock)Builder.rescueIfLost should be used instead.254# Do not use it -- (Mock)Builder.rescueIfLost should be used instead.
251def rescueBuilderIfLost(builder, logger=None):255def rescueBuilderIfLost(builder, logger=None):
252 """See `IBuilder`."""256 """See `IBuilder`."""
257 status_sentence = builder.slaveStatusSentence()
258
253 # 'ident_position' dict relates the position of the job identifier259 # 'ident_position' dict relates the position of the job identifier
254 # token in the sentence received from status(), according the260 # token in the sentence received from status(), according the
255 # two status we care about. See see lib/canonical/buildd/slave.py261 # two status we care about. See see lib/canonical/buildd/slave.py
@@ -259,58 +265,61 @@
259 'BuilderStatus.WAITING': 2265 'BuilderStatus.WAITING': 2
260 }266 }
261267
262 d = builder.slaveStatusSentence()268 # Isolate the BuilderStatus string, always the first token in
263269 # see lib/canonical/buildd/slave.py and
264 def got_status(status_sentence):270 # IBuilder.slaveStatusSentence().
265 """After we get the status, clean if we have to.271 status = status_sentence[0]
266272
267 Always return status_sentence.273 # If the cookie test below fails, it will request an abort of the
268 """274 # builder. This will leave the builder in the aborted state and
269 # Isolate the BuilderStatus string, always the first token in275 # with no assigned job, and we should now "clean" the slave which
270 # see lib/canonical/buildd/slave.py and276 # will reset its state back to IDLE, ready to accept new builds.
271 # IBuilder.slaveStatusSentence().277 # This situation is usually caused by a temporary loss of
272 status = status_sentence[0]278 # communications with the slave and the build manager had to reset
273279 # the job.
274 # If the cookie test below fails, it will request an abort of the280 if status == 'BuilderStatus.ABORTED' and builder.currentjob is None:
275 # builder. This will leave the builder in the aborted state and281 builder.cleanSlave()
276 # with no assigned job, and we should now "clean" the slave which282 if logger is not None:
277 # will reset its state back to IDLE, ready to accept new builds.283 logger.info(
278 # This situation is usually caused by a temporary loss of284 "Builder '%s' cleaned up from ABORTED" % builder.name)
279 # communications with the slave and the build manager had to reset285 return
280 # the job.286
281 if status == 'BuilderStatus.ABORTED' and builder.currentjob is None:287 # If slave is not building nor waiting, it's not in need of rescuing.
282 if logger is not None:288 if status not in ident_position.keys():
283 logger.info(289 return
284 "Builder '%s' being cleaned up from ABORTED" %290
285 (builder.name,))291 slave_build_id = status_sentence[ident_position[status]]
286 d = builder.cleanSlave()292
287 return d.addCallback(lambda ignored: status_sentence)293 try:
294 builder.verifySlaveBuildCookie(slave_build_id)
295 except CorruptBuildCookie, reason:
296 if status == 'BuilderStatus.WAITING':
297 builder.cleanSlave()
288 else:298 else:
289 return status_sentence299 builder.requestAbort()
290300 if logger:
291 def rescue_slave(status_sentence):301 logger.info(
292 # If slave is not building nor waiting, it's not in need of rescuing.302 "Builder '%s' rescued from '%s': '%s'" %
293 status = status_sentence[0]303 (builder.name, slave_build_id, reason))
294 if status not in ident_position.keys():304
295 return305
296 slave_build_id = status_sentence[ident_position[status]]306def _update_builder_status(builder, logger=None):
297 try:307 """Really update the builder status."""
298 builder.verifySlaveBuildCookie(slave_build_id)308 try:
299 except CorruptBuildCookie, reason:309 builder.checkSlaveAlive()
300 if status == 'BuilderStatus.WAITING':310 builder.rescueIfLost(logger)
301 d = builder.cleanSlave()311 # Catch only known exceptions.
302 else:312 # XXX cprov 2007-06-15 bug=120571: ValueError & TypeError catching is
303 d = builder.requestAbort()313 # disturbing in this context. We should spend sometime sanitizing the
304 def log_rescue(ignored):314 # exceptions raised in the Builder API since we already started the
305 if logger:315 # main refactoring of this area.
306 logger.info(316 except (ValueError, TypeError, xmlrpclib.Fault,
307 "Builder '%s' rescued from '%s': '%s'" %317 BuildDaemonError), reason:
308 (builder.name, slave_build_id, reason))318 builder.failBuilder(str(reason))
309 return d.addCallback(log_rescue)319 if logger:
310320 logger.warn(
311 d.addCallback(got_status)321 "%s (%s) marked as failed due to: %s",
312 d.addCallback(rescue_slave)322 builder.name, builder.url, builder.failnotes, exc_info=True)
313 return d
314323
315324
316def updateBuilderStatus(builder, logger=None):325def updateBuilderStatus(builder, logger=None):
@@ -318,7 +327,16 @@
318 if logger:327 if logger:
319 logger.debug('Checking %s' % builder.name)328 logger.debug('Checking %s' % builder.name)
320329
321 return builder.rescueIfLost(logger)330 MAX_EINTR_RETRIES = 42 # pulling a number out of my a$$ here
331 try:
332 return until_no_eintr(
333 MAX_EINTR_RETRIES, _update_builder_status, builder, logger=logger)
334 except socket.error, reason:
335 # In Python 2.6 we can use IOError instead. It also has
336 # reason.errno but we might be using 2.5 here so use the
337 # index hack.
338 error_message = str(reason)
339 builder.handleTimeout(logger, error_message)
322340
323341
324class Builder(SQLBase):342class Builder(SQLBase):
@@ -346,10 +364,6 @@
346 active = BoolCol(dbName='active', notNull=True, default=True)364 active = BoolCol(dbName='active', notNull=True, default=True)
347 failure_count = IntCol(dbName='failure_count', default=0, notNull=True)365 failure_count = IntCol(dbName='failure_count', default=0, notNull=True)
348366
349 # The number of times a builder can consecutively fail before we
350 # give up and mark it builderok=False.
351 FAILURE_THRESHOLD = 5
352
353 def _getCurrentBuildBehavior(self):367 def _getCurrentBuildBehavior(self):
354 """Return the current build behavior."""368 """Return the current build behavior."""
355 if not safe_hasattr(self, '_current_build_behavior'):369 if not safe_hasattr(self, '_current_build_behavior'):
@@ -395,13 +409,18 @@
395 """See `IBuilder`."""409 """See `IBuilder`."""
396 self.failure_count = 0410 self.failure_count = 0
397411
412 def checkSlaveAlive(self):
413 """See IBuilder."""
414 if self.slave.echo("Test")[0] != "Test":
415 raise BuildDaemonError("Failed to echo OK")
416
398 def rescueIfLost(self, logger=None):417 def rescueIfLost(self, logger=None):
399 """See `IBuilder`."""418 """See `IBuilder`."""
400 return rescueBuilderIfLost(self, logger)419 rescueBuilderIfLost(self, logger)
401420
402 def updateStatus(self, logger=None):421 def updateStatus(self, logger=None):
403 """See `IBuilder`."""422 """See `IBuilder`."""
404 return updateBuilderStatus(self, logger)423 updateBuilderStatus(self, logger)
405424
406 def cleanSlave(self):425 def cleanSlave(self):
407 """See IBuilder."""426 """See IBuilder."""
@@ -421,23 +440,20 @@
421 def resumeSlaveHost(self):440 def resumeSlaveHost(self):
422 """See IBuilder."""441 """See IBuilder."""
423 if not self.virtualized:442 if not self.virtualized:
424 return defer.fail(CannotResumeHost('Builder is not virtualized.'))443 raise CannotResumeHost('Builder is not virtualized.')
425444
426 if not self.vm_host:445 if not self.vm_host:
427 return defer.fail(CannotResumeHost('Undefined vm_host.'))446 raise CannotResumeHost('Undefined vm_host.')
428447
429 logger = self._getSlaveScannerLogger()448 logger = self._getSlaveScannerLogger()
430 logger.debug("Resuming %s (%s)" % (self.name, self.url))449 logger.debug("Resuming %s (%s)" % (self.name, self.url))
431450
432 d = self.slave.resume()451 stdout, stderr, returncode = self.slave.resume()
433 def got_resume_ok((stdout, stderr, returncode)):452 if returncode != 0:
434 return stdout, stderr
435 def got_resume_bad(failure):
436 stdout, stderr, code = failure.value
437 raise CannotResumeHost(453 raise CannotResumeHost(
438 "Resuming failed:\nOUT:\n%s\nERR:\n%s\n" % (stdout, stderr))454 "Resuming failed:\nOUT:\n%s\nERR:\n%s\n" % (stdout, stderr))
439455
440 return d.addCallback(got_resume_ok).addErrback(got_resume_bad)456 return stdout, stderr
441457
442 @cachedproperty458 @cachedproperty
443 def slave(self):459 def slave(self):
@@ -446,7 +462,7 @@
446 # the slave object, which is usually an XMLRPC client, with a462 # the slave object, which is usually an XMLRPC client, with a
447 # stub object that removes the need to actually create a buildd463 # stub object that removes the need to actually create a buildd
448 # slave in various states - which can be hard to create.464 # slave in various states - which can be hard to create.
449 return BuilderSlave.makeBuilderSlave(self.url, self.vm_host)465 return BuilderSlave.makeBlockingSlave(self.url, self.vm_host)
450466
451 def setSlaveForTesting(self, proxy):467 def setSlaveForTesting(self, proxy):
452 """See IBuilder."""468 """See IBuilder."""
@@ -467,23 +483,18 @@
467483
468 # If we are building a virtual build, resume the virtual machine.484 # If we are building a virtual build, resume the virtual machine.
469 if self.virtualized:485 if self.virtualized:
470 d = self.resumeSlaveHost()486 self.resumeSlaveHost()
471 else:
472 d = defer.succeed(None)
473487
474 def resume_done(ignored):488 # Do it.
475 return self.current_build_behavior.dispatchBuildToSlave(489 build_queue_item.markAsBuilding(self)
490 try:
491 self.current_build_behavior.dispatchBuildToSlave(
476 build_queue_item.id, logger)492 build_queue_item.id, logger)
477493 except BuildSlaveFailure, e:
478 def eb_slave_failure(failure):494 logger.debug("Disabling builder: %s" % self.url, exc_info=1)
479 failure.trap(BuildSlaveFailure)
480 e = failure.value
481 self.failBuilder(495 self.failBuilder(
482 "Exception (%s) when setting up to new job" % (e,))496 "Exception (%s) when setting up to new job" % (e,))
483497 except CannotFetchFile, e:
484 def eb_cannot_fetch_file(failure):
485 failure.trap(CannotFetchFile)
486 e = failure.value
487 message = """Slave '%s' (%s) was unable to fetch file.498 message = """Slave '%s' (%s) was unable to fetch file.
488 ****** URL ********499 ****** URL ********
489 %s500 %s
@@ -492,19 +503,10 @@
492 *******************503 *******************
493 """ % (self.name, self.url, e.file_url, e.error_information)504 """ % (self.name, self.url, e.file_url, e.error_information)
494 raise BuildDaemonError(message)505 raise BuildDaemonError(message)
495506 except socket.error, e:
496 def eb_socket_error(failure):
497 failure.trap(socket.error)
498 e = failure.value
499 error_message = "Exception (%s) when setting up new job" % (e,)507 error_message = "Exception (%s) when setting up new job" % (e,)
500 d = self.handleTimeout(logger, error_message)508 self.handleTimeout(logger, error_message)
501 return d.addBoth(lambda ignored: failure)509 raise BuildSlaveFailure
502
503 d.addCallback(resume_done)
504 d.addErrback(eb_slave_failure)
505 d.addErrback(eb_cannot_fetch_file)
506 d.addErrback(eb_socket_error)
507 return d
508510
509 def failBuilder(self, reason):511 def failBuilder(self, reason):
510 """See IBuilder"""512 """See IBuilder"""
@@ -532,24 +534,22 @@
532534
533 def slaveStatus(self):535 def slaveStatus(self):
534 """See IBuilder."""536 """See IBuilder."""
535 d = self.slave.status()537 builder_version, builder_arch, mechanisms = self.slave.info()
536 def got_status(status_sentence):538 status_sentence = self.slave.status()
537 status = {'builder_status': status_sentence[0]}539
538540 status = {'builder_status': status_sentence[0]}
539 # Extract detailed status and log information if present.541
540 # Although build_id is also easily extractable here, there is no542 # Extract detailed status and log information if present.
541 # valid reason for anything to use it, so we exclude it.543 # Although build_id is also easily extractable here, there is no
542 if status['builder_status'] == 'BuilderStatus.WAITING':544 # valid reason for anything to use it, so we exclude it.
543 status['build_status'] = status_sentence[1]545 if status['builder_status'] == 'BuilderStatus.WAITING':
544 else:546 status['build_status'] = status_sentence[1]
545 if status['builder_status'] == 'BuilderStatus.BUILDING':547 else:
546 status['logtail'] = status_sentence[2]548 if status['builder_status'] == 'BuilderStatus.BUILDING':
547549 status['logtail'] = status_sentence[2]
548 self.current_build_behavior.updateSlaveStatus(550
549 status_sentence, status)551 self.current_build_behavior.updateSlaveStatus(status_sentence, status)
550 return status552 return status
551
552 return d.addCallback(got_status)
553553
554 def slaveStatusSentence(self):554 def slaveStatusSentence(self):
555 """See IBuilder."""555 """See IBuilder."""
@@ -562,15 +562,13 @@
562562
563 def updateBuild(self, queueItem):563 def updateBuild(self, queueItem):
564 """See `IBuilder`."""564 """See `IBuilder`."""
565 return self.current_build_behavior.updateBuild(queueItem)565 self.current_build_behavior.updateBuild(queueItem)
566566
567 def transferSlaveFileToLibrarian(self, file_sha1, filename, private):567 def transferSlaveFileToLibrarian(self, file_sha1, filename, private):
568 """See IBuilder."""568 """See IBuilder."""
569 out_file_fd, out_file_name = tempfile.mkstemp(suffix=".buildlog")569 out_file_fd, out_file_name = tempfile.mkstemp(suffix=".buildlog")
570 out_file = os.fdopen(out_file_fd, "r+")570 out_file = os.fdopen(out_file_fd, "r+")
571 try:571 try:
572 # XXX 2010-10-18 bug=662631
573 # Change this to do non-blocking IO.
574 slave_file = self.slave.getFile(file_sha1)572 slave_file = self.slave.getFile(file_sha1)
575 copy_and_close(slave_file, out_file)573 copy_and_close(slave_file, out_file)
576 # If the requested file is the 'buildlog' compress it using gzip574 # If the requested file is the 'buildlog' compress it using gzip
@@ -601,17 +599,18 @@
601599
602 return library_file.id600 return library_file.id
603601
604 def isAvailable(self):602 @property
603 def is_available(self):
605 """See `IBuilder`."""604 """See `IBuilder`."""
606 if not self.builderok:605 if not self.builderok:
607 return defer.succeed(False)606 return False
608 d = self.slaveStatusSentence()607 try:
609 def catch_fault(failure):608 slavestatus = self.slaveStatusSentence()
610 failure.trap(xmlrpclib.Fault, socket.error)609 except (xmlrpclib.Fault, socket.error):
611 return False610 return False
612 def check_available(status):611 if slavestatus[0] != BuilderStatus.IDLE:
613 return status[0] == BuilderStatus.IDLE612 return False
614 return d.addCallbacks(check_available, catch_fault)613 return True
615614
616 def _getSlaveScannerLogger(self):615 def _getSlaveScannerLogger(self):
617 """Return the logger instance from buildd-slave-scanner.py."""616 """Return the logger instance from buildd-slave-scanner.py."""
@@ -622,27 +621,6 @@
622 logger = logging.getLogger('slave-scanner')621 logger = logging.getLogger('slave-scanner')
623 return logger622 return logger
624623
625 def acquireBuildCandidate(self):
626 """Acquire a build candidate in an atomic fashion.
627
628 When retrieiving a candidate we need to mark it as building
629 immediately so that it is not dispatched by another builder in the
630 build manager.
631
632 We can consider this to be atomic because although the build manager
633 is a Twisted app and gives the appearance of doing lots of things at
634 once, it's still single-threaded so no more than one builder scan
635 can be in this code at the same time.
636
637 If there's ever more than one build manager running at once, then
638 this code will need some sort of mutex.
639 """
640 candidate = self._findBuildCandidate()
641 if candidate is not None:
642 candidate.markAsBuilding(self)
643 transaction.commit()
644 return candidate
645
646 def _findBuildCandidate(self):624 def _findBuildCandidate(self):
647 """Find a candidate job for dispatch to an idle buildd slave.625 """Find a candidate job for dispatch to an idle buildd slave.
648626
@@ -722,46 +700,52 @@
722 :param candidate: The job to dispatch.700 :param candidate: The job to dispatch.
723 """701 """
724 logger = self._getSlaveScannerLogger()702 logger = self._getSlaveScannerLogger()
725 # Using maybeDeferred ensures that any exceptions are also703 try:
726 # wrapped up and caught later.704 self.startBuild(candidate, logger)
727 d = defer.maybeDeferred(self.startBuild, candidate, logger)705 except (BuildSlaveFailure, CannotBuild, BuildBehaviorMismatch), err:
728 return d706 logger.warn('Could not build: %s' % err)
729707
730 def handleTimeout(self, logger, error_message):708 def handleTimeout(self, logger, error_message):
731 """See IBuilder."""709 """See IBuilder."""
710 builder_should_be_failed = True
711
732 if self.virtualized:712 if self.virtualized:
733 # Virtualized/PPA builder: attempt a reset.713 # Virtualized/PPA builder: attempt a reset.
734 logger.warn(714 logger.warn(
735 "Resetting builder: %s -- %s" % (self.url, error_message),715 "Resetting builder: %s -- %s" % (self.url, error_message),
736 exc_info=True)716 exc_info=True)
737 d = self.resumeSlaveHost()717 try:
738 return d718 self.resumeSlaveHost()
739 else:719 except CannotResumeHost, err:
740 # XXX: This should really let the failure bubble up to the720 # Failed to reset builder.
741 # scan() method that does the failure counting.721 logger.warn(
722 "Failed to reset builder: %s -- %s" %
723 (self.url, str(err)), exc_info=True)
724 else:
725 # Builder was reset, do *not* mark it as failed.
726 builder_should_be_failed = False
727
728 if builder_should_be_failed:
742 # Mark builder as 'failed'.729 # Mark builder as 'failed'.
743 logger.warn(730 logger.warn(
744 "Disabling builder: %s -- %s" % (self.url, error_message))731 "Disabling builder: %s -- %s" % (self.url, error_message),
732 exc_info=True)
745 self.failBuilder(error_message)733 self.failBuilder(error_message)
746 return defer.succeed(None)
747734
748 def findAndStartJob(self, buildd_slave=None):735 def findAndStartJob(self, buildd_slave=None):
749 """See IBuilder."""736 """See IBuilder."""
750 # XXX This method should be removed in favour of two separately
751 # called methods that find and dispatch the job. It will
752 # require a lot of test fixing.
753 logger = self._getSlaveScannerLogger()737 logger = self._getSlaveScannerLogger()
754 candidate = self.acquireBuildCandidate()738 candidate = self._findBuildCandidate()
755739
756 if candidate is None:740 if candidate is None:
757 logger.debug("No build candidates available for builder.")741 logger.debug("No build candidates available for builder.")
758 return defer.succeed(None)742 return None
759743
760 if buildd_slave is not None:744 if buildd_slave is not None:
761 self.setSlaveForTesting(buildd_slave)745 self.setSlaveForTesting(buildd_slave)
762746
763 d = self._dispatchBuildCandidate(candidate)747 self._dispatchBuildCandidate(candidate)
764 return d.addCallback(lambda ignored: candidate)748 return candidate
765749
766 def getBuildQueue(self):750 def getBuildQueue(self):
767 """See `IBuilder`."""751 """See `IBuilder`."""
768752
=== modified file 'lib/lp/buildmaster/model/buildfarmjobbehavior.py'
--- lib/lp/buildmaster/model/buildfarmjobbehavior.py 2010-10-20 11:54:27 +0000
+++ lib/lp/buildmaster/model/buildfarmjobbehavior.py 2010-12-07 16:29:13 +0000
@@ -16,18 +16,13 @@
16import socket16import socket
17import xmlrpclib17import xmlrpclib
1818
19from twisted.internet import defer
20
21from zope.component import getUtility19from zope.component import getUtility
22from zope.interface import implements20from zope.interface import implements
23from zope.security.proxy import removeSecurityProxy21from zope.security.proxy import removeSecurityProxy
2422
25from canonical import encoding23from canonical import encoding
26from canonical.librarian.interfaces import ILibrarianClient24from canonical.librarian.interfaces import ILibrarianClient
27from lp.buildmaster.interfaces.builder import (25from lp.buildmaster.interfaces.builder import CorruptBuildCookie
28 BuildSlaveFailure,
29 CorruptBuildCookie,
30 )
31from lp.buildmaster.interfaces.buildfarmjobbehavior import (26from lp.buildmaster.interfaces.buildfarmjobbehavior import (
32 BuildBehaviorMismatch,27 BuildBehaviorMismatch,
33 IBuildFarmJobBehavior,28 IBuildFarmJobBehavior,
@@ -74,53 +69,54 @@
74 """See `IBuildFarmJobBehavior`."""69 """See `IBuildFarmJobBehavior`."""
75 logger = logging.getLogger('slave-scanner')70 logger = logging.getLogger('slave-scanner')
7671
77 d = self._builder.slaveStatus()72 try:
7873 slave_status = self._builder.slaveStatus()
79 def got_failure(failure):74 except (xmlrpclib.Fault, socket.error), info:
80 failure.trap(xmlrpclib.Fault, socket.error)75 # XXX cprov 2005-06-29:
81 info = failure.value76 # Hmm, a problem with the xmlrpc interface,
77 # disable the builder ?? or simple notice the failure
78 # with a timestamp.
82 info = ("Could not contact the builder %s, caught a (%s)"79 info = ("Could not contact the builder %s, caught a (%s)"
83 % (queueItem.builder.url, info))80 % (queueItem.builder.url, info))
84 raise BuildSlaveFailure(info)81 logger.debug(info, exc_info=True)
8582 # keep the job for scan
86 def got_status(slave_status):83 return
87 builder_status_handlers = {84
88 'BuilderStatus.IDLE': self.updateBuild_IDLE,85 builder_status_handlers = {
89 'BuilderStatus.BUILDING': self.updateBuild_BUILDING,86 'BuilderStatus.IDLE': self.updateBuild_IDLE,
90 'BuilderStatus.ABORTING': self.updateBuild_ABORTING,87 'BuilderStatus.BUILDING': self.updateBuild_BUILDING,
91 'BuilderStatus.ABORTED': self.updateBuild_ABORTED,88 'BuilderStatus.ABORTING': self.updateBuild_ABORTING,
92 'BuilderStatus.WAITING': self.updateBuild_WAITING,89 'BuilderStatus.ABORTED': self.updateBuild_ABORTED,
93 }90 'BuilderStatus.WAITING': self.updateBuild_WAITING,
9491 }
95 builder_status = slave_status['builder_status']92
96 if builder_status not in builder_status_handlers:93 builder_status = slave_status['builder_status']
97 logger.critical(94 if builder_status not in builder_status_handlers:
98 "Builder on %s returned unknown status %s, failing it"95 logger.critical(
99 % (self._builder.url, builder_status))96 "Builder on %s returned unknown status %s, failing it"
100 self._builder.failBuilder(97 % (self._builder.url, builder_status))
101 "Unknown status code (%s) returned from status() probe."98 self._builder.failBuilder(
102 % builder_status)99 "Unknown status code (%s) returned from status() probe."
103 # XXX: This will leave the build and job in a bad state, but100 % builder_status)
104 # should never be possible, since our builder statuses are101 # XXX: This will leave the build and job in a bad state, but
105 # known.102 # should never be possible, since our builder statuses are
106 queueItem._builder = None103 # known.
107 queueItem.setDateStarted(None)104 queueItem._builder = None
108 return105 queueItem.setDateStarted(None)
109106 return
110 # Since logtail is a xmlrpclib.Binary container and it is107
111 # returned from the IBuilder content class, it arrives108 # Since logtail is a xmlrpclib.Binary container and it is returned
112 # protected by a Zope Security Proxy, which is not declared,109 # from the IBuilder content class, it arrives protected by a Zope
113 # thus empty. Before passing it to the status handlers we110 # Security Proxy, which is not declared, thus empty. Before passing
114 # will simply remove the proxy.111 # it to the status handlers we will simply remove the proxy.
115 logtail = removeSecurityProxy(slave_status.get('logtail'))112 logtail = removeSecurityProxy(slave_status.get('logtail'))
116113
117 method = builder_status_handlers[builder_status]114 method = builder_status_handlers[builder_status]
118 return defer.maybeDeferred(115 try:
119 method, queueItem, slave_status, logtail, logger)116 method(queueItem, slave_status, logtail, logger)
120117 except TypeError, e:
121 d.addErrback(got_failure)118 logger.critical("Received wrong number of args in response.")
122 d.addCallback(got_status)119 logger.exception(e)
123 return d
124120
125 def updateBuild_IDLE(self, queueItem, slave_status, logtail, logger):121 def updateBuild_IDLE(self, queueItem, slave_status, logtail, logger):
126 """Somehow the builder forgot about the build job.122 """Somehow the builder forgot about the build job.
@@ -150,13 +146,11 @@
150146
151 Clean the builder for another jobs.147 Clean the builder for another jobs.
152 """148 """
153 d = queueItem.builder.cleanSlave()149 queueItem.builder.cleanSlave()
154 def got_cleaned(ignored):150 queueItem.builder = None
155 queueItem.builder = None151 if queueItem.job.status != JobStatus.FAILED:
156 if queueItem.job.status != JobStatus.FAILED:152 queueItem.job.fail()
157 queueItem.job.fail()153 queueItem.specific_job.jobAborted()
158 queueItem.specific_job.jobAborted()
159 return d.addCallback(got_cleaned)
160154
161 def extractBuildStatus(self, slave_status):155 def extractBuildStatus(self, slave_status):
162 """Read build status name.156 """Read build status name.
@@ -191,8 +185,6 @@
191 # XXX: dsilvers 2005-03-02: Confirm the builder has the right build?185 # XXX: dsilvers 2005-03-02: Confirm the builder has the right build?
192186
193 build = queueItem.specific_job.build187 build = queueItem.specific_job.build
194 # XXX 2010-10-18 bug=662631
195 # Change this to do non-blocking IO.
196 build.handleStatus(build_status, librarian, slave_status)188 build.handleStatus(build_status, librarian, slave_status)
197189
198190
199191
=== modified file 'lib/lp/buildmaster/model/packagebuild.py'
--- lib/lp/buildmaster/model/packagebuild.py 2010-10-26 20:43:50 +0000
+++ lib/lp/buildmaster/model/packagebuild.py 2010-12-07 16:29:13 +0000
@@ -163,8 +163,6 @@
163 def getLogFromSlave(package_build):163 def getLogFromSlave(package_build):
164 """See `IPackageBuild`."""164 """See `IPackageBuild`."""
165 builder = package_build.buildqueue_record.builder165 builder = package_build.buildqueue_record.builder
166 # XXX 2010-10-18 bug=662631
167 # Change this to do non-blocking IO.
168 return builder.transferSlaveFileToLibrarian(166 return builder.transferSlaveFileToLibrarian(
169 SLAVE_LOG_FILENAME,167 SLAVE_LOG_FILENAME,
170 package_build.buildqueue_record.getLogFileName(),168 package_build.buildqueue_record.getLogFileName(),
@@ -180,8 +178,6 @@
180 # log, builder and date_finished are read-only, so we must178 # log, builder and date_finished are read-only, so we must
181 # currently remove the security proxy to set them.179 # currently remove the security proxy to set them.
182 naked_build = removeSecurityProxy(build)180 naked_build = removeSecurityProxy(build)
183 # XXX 2010-10-18 bug=662631
184 # Change this to do non-blocking IO.
185 naked_build.log = build.getLogFromSlave(build)181 naked_build.log = build.getLogFromSlave(build)
186 naked_build.builder = build.buildqueue_record.builder182 naked_build.builder = build.buildqueue_record.builder
187 # XXX cprov 20060615 bug=120584: Currently buildduration includes183 # XXX cprov 20060615 bug=120584: Currently buildduration includes
@@ -278,8 +274,6 @@
278 logger.critical("Unknown BuildStatus '%s' for builder '%s'"274 logger.critical("Unknown BuildStatus '%s' for builder '%s'"
279 % (status, self.buildqueue_record.builder.url))275 % (status, self.buildqueue_record.builder.url))
280 return276 return
281 # XXX 2010-10-18 bug=662631
282 # Change this to do non-blocking IO.
283 method(librarian, slave_status, logger)277 method(librarian, slave_status, logger)
284278
285 def _handleStatus_OK(self, librarian, slave_status, logger):279 def _handleStatus_OK(self, librarian, slave_status, logger):
286280
=== modified file 'lib/lp/buildmaster/tests/mock_slaves.py'
--- lib/lp/buildmaster/tests/mock_slaves.py 2010-10-14 15:37:56 +0000
+++ lib/lp/buildmaster/tests/mock_slaves.py 2010-12-07 16:29:13 +0000
@@ -6,40 +6,21 @@
6__metaclass__ = type6__metaclass__ = type
77
8__all__ = [8__all__ = [
9 'AbortedSlave',9 'MockBuilder',
10 'AbortingSlave',10 'LostBuildingBrokenSlave',
11 'BrokenSlave',11 'BrokenSlave',
12 'OkSlave',
12 'BuildingSlave',13 'BuildingSlave',
13 'CorruptBehavior',14 'AbortedSlave',
14 'DeadProxy',
15 'LostBuildingBrokenSlave',
16 'MockBuilder',
17 'OkSlave',
18 'SlaveTestHelpers',
19 'TrivialBehavior',
20 'WaitingSlave',15 'WaitingSlave',
16 'AbortingSlave',
21 ]17 ]
2218
23import fixtures
24import os
25
26from StringIO import StringIO19from StringIO import StringIO
27import xmlrpclib20import xmlrpclib
2821
29from testtools.content import Content22from lp.buildmaster.interfaces.builder import CannotFetchFile
30from testtools.content_type import UTF8_TEXT
31
32from twisted.internet import defer
33from twisted.web import xmlrpc
34
35from canonical.buildd.tests.harness import BuilddSlaveTestSetup
36
37from lp.buildmaster.interfaces.builder import (
38 CannotFetchFile,
39 CorruptBuildCookie,
40 )
41from lp.buildmaster.model.builder import (23from lp.buildmaster.model.builder import (
42 BuilderSlave,
43 rescueBuilderIfLost,24 rescueBuilderIfLost,
44 updateBuilderStatus,25 updateBuilderStatus,
45 )26 )
@@ -78,9 +59,15 @@
78 slave_build_id)59 slave_build_id)
7960
80 def cleanSlave(self):61 def cleanSlave(self):
62 # XXX: This should not print anything. The print is only here to make
63 # doc/builder.txt a meaningful test.
64 print 'Cleaning slave'
81 return self.slave.clean()65 return self.slave.clean()
8266
83 def requestAbort(self):67 def requestAbort(self):
68 # XXX: This should not print anything. The print is only here to make
69 # doc/builder.txt a meaningful test.
70 print 'Aborting slave'
84 return self.slave.abort()71 return self.slave.abort()
8572
86 def resumeSlave(self, logger):73 def resumeSlave(self, logger):
@@ -90,10 +77,10 @@
90 pass77 pass
9178
92 def rescueIfLost(self, logger=None):79 def rescueIfLost(self, logger=None):
93 return rescueBuilderIfLost(self, logger)80 rescueBuilderIfLost(self, logger)
9481
95 def updateStatus(self, logger=None):82 def updateStatus(self, logger=None):
96 return defer.maybeDeferred(updateBuilderStatus, self, logger)83 updateBuilderStatus(self, logger)
9784
9885
99# XXX: It would be *really* nice to run some set of tests against the real86# XXX: It would be *really* nice to run some set of tests against the real
@@ -108,44 +95,36 @@
108 self.arch_tag = arch_tag95 self.arch_tag = arch_tag
10996
110 def status(self):97 def status(self):
111 return defer.succeed(('BuilderStatus.IDLE', ''))98 return ('BuilderStatus.IDLE', '')
11299
113 def ensurepresent(self, sha1, url, user=None, password=None):100 def ensurepresent(self, sha1, url, user=None, password=None):
114 self.call_log.append(('ensurepresent', url, user, password))101 self.call_log.append(('ensurepresent', url, user, password))
115 return defer.succeed((True, None))102 return True, None
116103
117 def build(self, buildid, buildtype, chroot, filemap, args):104 def build(self, buildid, buildtype, chroot, filemap, args):
118 self.call_log.append(105 self.call_log.append(
119 ('build', buildid, buildtype, chroot, filemap.keys(), args))106 ('build', buildid, buildtype, chroot, filemap.keys(), args))
120 info = 'OkSlave BUILDING'107 info = 'OkSlave BUILDING'
121 return defer.succeed(('BuildStatus.Building', info))108 return ('BuildStatus.Building', info)
122109
123 def echo(self, *args):110 def echo(self, *args):
124 self.call_log.append(('echo',) + args)111 self.call_log.append(('echo',) + args)
125 return defer.succeed(args)112 return args
126113
127 def clean(self):114 def clean(self):
128 self.call_log.append('clean')115 self.call_log.append('clean')
129 return defer.succeed(None)
130116
131 def abort(self):117 def abort(self):
132 self.call_log.append('abort')118 self.call_log.append('abort')
133 return defer.succeed(None)
134119
135 def info(self):120 def info(self):
136 self.call_log.append('info')121 self.call_log.append('info')
137 return defer.succeed(('1.0', self.arch_tag, 'debian'))122 return ('1.0', self.arch_tag, 'debian')
138
139 def resume(self):
140 self.call_log.append('resume')
141 return defer.succeed(("", "", 0))
142123
143 def sendFileToSlave(self, sha1, url, username="", password=""):124 def sendFileToSlave(self, sha1, url, username="", password=""):
144 d = self.ensurepresent(sha1, url, username, password)125 present, info = self.ensurepresent(sha1, url, username, password)
145 def check_present((present, info)):126 if not present:
146 if not present:127 raise CannotFetchFile(url, info)
147 raise CannotFetchFile(url, info)
148 return d.addCallback(check_present)
149128
150 def cacheFile(self, logger, libraryfilealias):129 def cacheFile(self, logger, libraryfilealias):
151 return self.sendFileToSlave(130 return self.sendFileToSlave(
@@ -162,11 +141,9 @@
162 def status(self):141 def status(self):
163 self.call_log.append('status')142 self.call_log.append('status')
164 buildlog = xmlrpclib.Binary("This is a build log")143 buildlog = xmlrpclib.Binary("This is a build log")
165 return defer.succeed(144 return ('BuilderStatus.BUILDING', self.build_id, buildlog)
166 ('BuilderStatus.BUILDING', self.build_id, buildlog))
167145
168 def getFile(self, sum):146 def getFile(self, sum):
169 # XXX: This needs to be updated to return a Deferred.
170 self.call_log.append('getFile')147 self.call_log.append('getFile')
171 if sum == "buildlog":148 if sum == "buildlog":
172 s = StringIO("This is a build log")149 s = StringIO("This is a build log")
@@ -178,15 +155,11 @@
178 """A mock slave that looks like it's currently waiting."""155 """A mock slave that looks like it's currently waiting."""
179156
180 def __init__(self, state='BuildStatus.OK', dependencies=None,157 def __init__(self, state='BuildStatus.OK', dependencies=None,
181 build_id='1-1', filemap=None):158 build_id='1-1'):
182 super(WaitingSlave, self).__init__()159 super(WaitingSlave, self).__init__()
183 self.state = state160 self.state = state
184 self.dependencies = dependencies161 self.dependencies = dependencies
185 self.build_id = build_id162 self.build_id = build_id
186 if filemap is None:
187 self.filemap = {}
188 else:
189 self.filemap = filemap
190163
191 # By default, the slave only has a buildlog, but callsites164 # By default, the slave only has a buildlog, but callsites
192 # can update this list as needed.165 # can update this list as needed.
@@ -194,12 +167,10 @@
194167
195 def status(self):168 def status(self):
196 self.call_log.append('status')169 self.call_log.append('status')
197 return defer.succeed((170 return ('BuilderStatus.WAITING', self.state, self.build_id, {},
198 'BuilderStatus.WAITING', self.state, self.build_id, self.filemap,171 self.dependencies)
199 self.dependencies))
200172
201 def getFile(self, hash):173 def getFile(self, hash):
202 # XXX: This needs to be updated to return a Deferred.
203 self.call_log.append('getFile')174 self.call_log.append('getFile')
204 if hash in self.valid_file_hashes:175 if hash in self.valid_file_hashes:
205 content = "This is a %s" % hash176 content = "This is a %s" % hash
@@ -213,19 +184,15 @@
213184
214 def status(self):185 def status(self):
215 self.call_log.append('status')186 self.call_log.append('status')
216 return defer.succeed(('BuilderStatus.ABORTING', '1-1'))187 return ('BuilderStatus.ABORTING', '1-1')
217188
218189
219class AbortedSlave(OkSlave):190class AbortedSlave(OkSlave):
220 """A mock slave that looks like it's aborted."""191 """A mock slave that looks like it's aborted."""
221192
222 def clean(self):193 def status(self):
223 self.call_log.append('status')194 self.call_log.append('status')
224 return defer.succeed(None)195 return ('BuilderStatus.ABORTED', '1-1')
225
226 def status(self):
227 self.call_log.append('clean')
228 return defer.succeed(('BuilderStatus.ABORTED', '1-1'))
229196
230197
231class LostBuildingBrokenSlave:198class LostBuildingBrokenSlave:
@@ -239,108 +206,16 @@
239206
240 def status(self):207 def status(self):
241 self.call_log.append('status')208 self.call_log.append('status')
242 return defer.succeed(('BuilderStatus.BUILDING', '1000-10000'))209 return ('BuilderStatus.BUILDING', '1000-10000')
243210
244 def abort(self):211 def abort(self):
245 self.call_log.append('abort')212 self.call_log.append('abort')
246 return defer.fail(xmlrpclib.Fault(8002, "Could not abort"))213 raise xmlrpclib.Fault(8002, "Could not abort")
247214
248215
249class BrokenSlave:216class BrokenSlave:
250 """A mock slave that reports that it is broken."""217 """A mock slave that reports that it is broken."""
251218
252 def __init__(self):
253 self.call_log = []
254
255 def status(self):219 def status(self):
256 self.call_log.append('status')220 self.call_log.append('status')
257 return defer.fail(xmlrpclib.Fault(8001, "Broken slave"))221 raise xmlrpclib.Fault(8001, "Broken slave")
258
259
260class CorruptBehavior:
261
262 def verifySlaveBuildCookie(self, cookie):
263 raise CorruptBuildCookie("Bad value: %r" % (cookie,))
264
265
266class TrivialBehavior:
267
268 def verifySlaveBuildCookie(self, cookie):
269 pass
270
271
272class DeadProxy(xmlrpc.Proxy):
273 """An xmlrpc.Proxy that doesn't actually send any messages.
274
275 Used when you want to test timeouts, for example.
276 """
277
278 def callRemote(self, *args, **kwargs):
279 return defer.Deferred()
280
281
282class SlaveTestHelpers(fixtures.Fixture):
283
284 # The URL for the XML-RPC service set up by `BuilddSlaveTestSetup`.
285 BASE_URL = 'http://localhost:8221'
286 TEST_URL = '%s/rpc/' % (BASE_URL,)
287
288 def getServerSlave(self):
289 """Set up a test build slave server.
290
291 :return: A `BuilddSlaveTestSetup` object.
292 """
293 tachandler = BuilddSlaveTestSetup()
294 tachandler.setUp()
295 # Basically impossible to do this w/ TrialTestCase. But it would be
296 # really nice to keep it.
297 #
298 # def addLogFile(exc_info):
299 # self.addDetail(
300 # 'xmlrpc-log-file',
301 # Content(UTF8_TEXT, lambda: open(tachandler.logfile, 'r').read()))
302 # self.addOnException(addLogFile)
303 self.addCleanup(tachandler.tearDown)
304 return tachandler
305
306 def getClientSlave(self, reactor=None, proxy=None):
307 """Return a `BuilderSlave` for use in testing.
308
309 Points to a fixed URL that is also used by `BuilddSlaveTestSetup`.
310 """
311 return BuilderSlave.makeBuilderSlave(
312 self.TEST_URL, 'vmhost', reactor, proxy)
313
314 def makeCacheFile(self, tachandler, filename):
315 """Make a cache file available on the remote slave.
316
317 :param tachandler: The TacTestSetup object used to start the remote
318 slave.
319 :param filename: The name of the file to create in the file cache
320 area.
321 """
322 path = os.path.join(tachandler.root, 'filecache', filename)
323 fd = open(path, 'w')
324 fd.write('something')
325 fd.close()
326 self.addCleanup(os.unlink, path)
327
328 def triggerGoodBuild(self, slave, build_id=None):
329 """Trigger a good build on 'slave'.
330
331 :param slave: A `BuilderSlave` instance to trigger the build on.
332 :param build_id: The build identifier. If not specified, defaults to
333 an arbitrary string.
334 :type build_id: str
335 :return: The build id returned by the slave.
336 """
337 if build_id is None:
338 build_id = 'random-build-id'
339 tachandler = self.getServerSlave()
340 chroot_file = 'fake-chroot'
341 dsc_file = 'thing'
342 self.makeCacheFile(tachandler, chroot_file)
343 self.makeCacheFile(tachandler, dsc_file)
344 return slave.build(
345 build_id, 'debian', chroot_file, {'.dsc': dsc_file},
346 {'ogrecomponent': 'main'})
347222
=== modified file 'lib/lp/buildmaster/tests/test_builder.py'
--- lib/lp/buildmaster/tests/test_builder.py 2010-10-18 16:44:22 +0000
+++ lib/lp/buildmaster/tests/test_builder.py 2010-12-07 16:29:13 +0000
@@ -3,24 +3,20 @@
33
4"""Test Builder features."""4"""Test Builder features."""
55
6import errno
6import os7import os
7import signal8import socket
8import xmlrpclib9import xmlrpclib
910
10from twisted.web.client import getPage11from testtools.content import Content
1112from testtools.content_type import UTF8_TEXT
12from twisted.internet.defer import CancelledError
13from twisted.internet.task import Clock
14from twisted.python.failure import Failure
15from twisted.trial.unittest import TestCase as TrialTestCase
1613
17from zope.component import getUtility14from zope.component import getUtility
18from zope.security.proxy import removeSecurityProxy15from zope.security.proxy import removeSecurityProxy
1916
20from canonical.buildd.slave import BuilderStatus17from canonical.buildd.slave import BuilderStatus
21from canonical.config import config18from canonical.buildd.tests.harness import BuilddSlaveTestSetup
22from canonical.database.sqlbase import flush_database_updates19from canonical.database.sqlbase import flush_database_updates
23from canonical.launchpad.scripts import QuietFakeLogger
24from canonical.launchpad.webapp.interfaces import (20from canonical.launchpad.webapp.interfaces import (
25 DEFAULT_FLAVOR,21 DEFAULT_FLAVOR,
26 IStoreSelector,22 IStoreSelector,
@@ -28,38 +24,21 @@
28 )24 )
29from canonical.testing.layers import (25from canonical.testing.layers import (
30 DatabaseFunctionalLayer,26 DatabaseFunctionalLayer,
31 LaunchpadZopelessLayer,27 LaunchpadZopelessLayer
32 TwistedLaunchpadZopelessLayer,
33 TwistedLayer,
34 )28 )
35from lp.buildmaster.enums import BuildStatus29from lp.buildmaster.enums import BuildStatus
36from lp.buildmaster.interfaces.builder import (30from lp.buildmaster.interfaces.builder import IBuilder, IBuilderSet
37 CannotFetchFile,
38 IBuilder,
39 IBuilderSet,
40 )
41from lp.buildmaster.interfaces.buildfarmjobbehavior import (31from lp.buildmaster.interfaces.buildfarmjobbehavior import (
42 IBuildFarmJobBehavior,32 IBuildFarmJobBehavior,
43 )33 )
44from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet34from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet
45from lp.buildmaster.interfaces.builder import CannotResumeHost35from lp.buildmaster.model.builder import BuilderSlave
46from lp.buildmaster.model.buildfarmjobbehavior import IdleBuildBehavior36from lp.buildmaster.model.buildfarmjobbehavior import IdleBuildBehavior
47from lp.buildmaster.model.buildqueue import BuildQueue37from lp.buildmaster.model.buildqueue import BuildQueue
48from lp.buildmaster.tests.mock_slaves import (38from lp.buildmaster.tests.mock_slaves import (
49 AbortedSlave,39 AbortedSlave,
50 AbortingSlave,
51 BrokenSlave,
52 BuildingSlave,
53 CorruptBehavior,
54 DeadProxy,
55 LostBuildingBrokenSlave,
56 MockBuilder,40 MockBuilder,
57 OkSlave,
58 SlaveTestHelpers,
59 TrivialBehavior,
60 WaitingSlave,
61 )41 )
62from lp.services.job.interfaces.job import JobStatus
63from lp.soyuz.enums import (42from lp.soyuz.enums import (
64 ArchivePurpose,43 ArchivePurpose,
65 PackagePublishingStatus,44 PackagePublishingStatus,
@@ -70,12 +49,9 @@
70 )49 )
71from lp.soyuz.tests.test_publishing import SoyuzTestPublisher50from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
72from lp.testing import (51from lp.testing import (
73 ANONYMOUS,52 TestCase,
74 login_as,
75 logout,
76 TestCaseWithFactory,53 TestCaseWithFactory,
77 )54 )
78from lp.testing.factory import LaunchpadObjectFactory
79from lp.testing.fakemethod import FakeMethod55from lp.testing.fakemethod import FakeMethod
8056
8157
@@ -116,121 +92,42 @@
116 bq = builder.getBuildQueue()92 bq = builder.getBuildQueue()
117 self.assertIs(None, bq)93 self.assertIs(None, bq)
11894
11995 def test_updateBuilderStatus_catches_repeated_EINTR(self):
120class TestBuilderWithTrial(TrialTestCase):96 # A single EINTR return from a socket operation should cause the
12197 # operation to be retried, not fail/reset the builder.
122 layer = TwistedLaunchpadZopelessLayer98 builder = removeSecurityProxy(self.factory.makeBuilder())
12399 builder.handleTimeout = FakeMethod()
124 def setUp(self):100 builder.rescueIfLost = FakeMethod()
125 super(TestBuilderWithTrial, self)101
126 self.slave_helper = SlaveTestHelpers()102 def _fake_checkSlaveAlive():
127 self.slave_helper.setUp()103 # Raise an EINTR error for all invocations.
128 self.addCleanup(self.slave_helper.cleanUp)104 raise socket.error(errno.EINTR, "fake eintr")
129 self.factory = LaunchpadObjectFactory()105
130 login_as(ANONYMOUS)106 builder.checkSlaveAlive = _fake_checkSlaveAlive
131 self.addCleanup(logout)107 builder.updateStatus()
132108
133 def test_updateStatus_aborts_lost_and_broken_slave(self):109 # builder.updateStatus should eventually have called
134 # A slave that's 'lost' should be aborted; when the slave is110 # handleTimeout()
135 # broken then abort() should also throw a fault.111 self.assertEqual(1, builder.handleTimeout.call_count)
136 slave = LostBuildingBrokenSlave()112
137 lostbuilding_builder = MockBuilder(113 def test_updateBuilderStatus_catches_single_EINTR(self):
138 'Lost Building Broken Slave', slave, behavior=CorruptBehavior())114 builder = removeSecurityProxy(self.factory.makeBuilder())
139 d = lostbuilding_builder.updateStatus(QuietFakeLogger())115 builder.handleTimeout = FakeMethod()
140 def check_slave_status(failure):116 builder.rescueIfLost = FakeMethod()
141 self.assertIn('abort', slave.call_log)117 self.eintr_returned = False
142 # 'Fault' comes from the LostBuildingBrokenSlave, this is118
143 # just testing that the value is passed through.119 def _fake_checkSlaveAlive():
144 self.assertIsInstance(failure.value, xmlrpclib.Fault)120 # raise an EINTR error for the first invocation only.
145 return d.addBoth(check_slave_status)121 if not self.eintr_returned:
146122 self.eintr_returned = True
147 def test_resumeSlaveHost_nonvirtual(self):123 raise socket.error(errno.EINTR, "fake eintr")
148 builder = self.factory.makeBuilder(virtualized=False)124
149 d = builder.resumeSlaveHost()125 builder.checkSlaveAlive = _fake_checkSlaveAlive
150 return self.assertFailure(d, CannotResumeHost)126 builder.updateStatus()
151127
152 def test_resumeSlaveHost_no_vmhost(self):128 # builder.updateStatus should never call handleTimeout() for a
153 builder = self.factory.makeBuilder(virtualized=True, vm_host=None)129 # single EINTR.
154 d = builder.resumeSlaveHost()130 self.assertEqual(0, builder.handleTimeout.call_count)
155 return self.assertFailure(d, CannotResumeHost)
156
157 def test_resumeSlaveHost_success(self):
158 reset_config = """
159 [builddmaster]
160 vm_resume_command: /bin/echo -n parp"""
161 config.push('reset', reset_config)
162 self.addCleanup(config.pop, 'reset')
163
164 builder = self.factory.makeBuilder(virtualized=True, vm_host="pop")
165 d = builder.resumeSlaveHost()
166 def got_resume(output):
167 self.assertEqual(('parp', ''), output)
168 return d.addCallback(got_resume)
169
170 def test_resumeSlaveHost_command_failed(self):
171 reset_fail_config = """
172 [builddmaster]
173 vm_resume_command: /bin/false"""
174 config.push('reset fail', reset_fail_config)
175 self.addCleanup(config.pop, 'reset fail')
176 builder = self.factory.makeBuilder(virtualized=True, vm_host="pop")
177 d = builder.resumeSlaveHost()
178 return self.assertFailure(d, CannotResumeHost)
179
180 def test_handleTimeout_resume_failure(self):
181 reset_fail_config = """
182 [builddmaster]
183 vm_resume_command: /bin/false"""
184 config.push('reset fail', reset_fail_config)
185 self.addCleanup(config.pop, 'reset fail')
186 builder = self.factory.makeBuilder(virtualized=True, vm_host="pop")
187 builder.builderok = True
188 d = builder.handleTimeout(QuietFakeLogger(), 'blah')
189 return self.assertFailure(d, CannotResumeHost)
190
191 def _setupRecipeBuildAndBuilder(self):
192 # Helper function to make a builder capable of building a
193 # recipe, returning both.
194 processor = self.factory.makeProcessor(name="i386")
195 builder = self.factory.makeBuilder(
196 processor=processor, virtualized=True, vm_host="bladh")
197 builder.setSlaveForTesting(OkSlave())
198 distroseries = self.factory.makeDistroSeries()
199 das = self.factory.makeDistroArchSeries(
200 distroseries=distroseries, architecturetag="i386",
201 processorfamily=processor.family)
202 chroot = self.factory.makeLibraryFileAlias()
203 das.addOrUpdateChroot(chroot)
204 distroseries.nominatedarchindep = das
205 build = self.factory.makeSourcePackageRecipeBuild(
206 distroseries=distroseries)
207 return builder, build
208
209 def test_findAndStartJob_returns_candidate(self):
210 # findAndStartJob finds the next queued job using _findBuildCandidate.
211 # We don't care about the type of build at all.
212 builder, build = self._setupRecipeBuildAndBuilder()
213 candidate = build.queueBuild()
214 # _findBuildCandidate is tested elsewhere, we just make sure that
215 # findAndStartJob delegates to it.
216 removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
217 result=candidate)
218 d = builder.findAndStartJob()
219 return d.addCallback(self.assertEqual, candidate)
220
221 def test_findAndStartJob_starts_job(self):
222 # findAndStartJob finds the next queued job using _findBuildCandidate
223 # and then starts it.
224 # We don't care about the type of build at all.
225 builder, build = self._setupRecipeBuildAndBuilder()
226 candidate = build.queueBuild()
227 removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
228 result=candidate)
229 d = builder.findAndStartJob()
230 def check_build_started(candidate):
231 self.assertEqual(candidate.builder, builder)
232 self.assertEqual(BuildStatus.BUILDING, build.status)
233 return d.addCallback(check_build_started)
234131
235 def test_slave(self):132 def test_slave(self):
236 # Builder.slave is a BuilderSlave that points at the actual Builder.133 # Builder.slave is a BuilderSlave that points at the actual Builder.
@@ -239,147 +136,25 @@
239 builder = removeSecurityProxy(self.factory.makeBuilder())136 builder = removeSecurityProxy(self.factory.makeBuilder())
240 self.assertEqual(builder.url, builder.slave.url)137 self.assertEqual(builder.url, builder.slave.url)
241138
139
140class Test_rescueBuilderIfLost(TestCaseWithFactory):
141 """Tests for lp.buildmaster.model.builder.rescueBuilderIfLost."""
142
143 layer = LaunchpadZopelessLayer
144
242 def test_recovery_of_aborted_slave(self):145 def test_recovery_of_aborted_slave(self):
243 # If a slave is in the ABORTED state, rescueBuilderIfLost should146 # If a slave is in the ABORTED state, rescueBuilderIfLost should
244 # clean it if we don't think it's currently building anything.147 # clean it if we don't think it's currently building anything.
245 # See bug 463046.148 # See bug 463046.
246 aborted_slave = AbortedSlave()149 aborted_slave = AbortedSlave()
150 # The slave's clean() method is normally an XMLRPC call, so we
151 # can just stub it out and check that it got called.
152 aborted_slave.clean = FakeMethod()
247 builder = MockBuilder("mock_builder", aborted_slave)153 builder = MockBuilder("mock_builder", aborted_slave)
248 builder.currentjob = None154 builder.currentjob = None
249 d = builder.rescueIfLost()155 builder.rescueIfLost()
250 def check_slave_calls(ignored):156
251 self.assertIn('clean', aborted_slave.call_log)157 self.assertEqual(1, aborted_slave.clean.call_count)
252 return d.addCallback(check_slave_calls)
253
254 def test_recover_ok_slave(self):
255 # An idle slave is not rescued.
256 slave = OkSlave()
257 builder = MockBuilder("mock_builder", slave, TrivialBehavior())
258 d = builder.rescueIfLost()
259 def check_slave_calls(ignored):
260 self.assertNotIn('abort', slave.call_log)
261 self.assertNotIn('clean', slave.call_log)
262 return d.addCallback(check_slave_calls)
263
264 def test_recover_waiting_slave_with_good_id(self):
265 # rescueIfLost does not attempt to abort or clean a builder that is
266 # WAITING.
267 waiting_slave = WaitingSlave()
268 builder = MockBuilder("mock_builder", waiting_slave, TrivialBehavior())
269 d = builder.rescueIfLost()
270 def check_slave_calls(ignored):
271 self.assertNotIn('abort', waiting_slave.call_log)
272 self.assertNotIn('clean', waiting_slave.call_log)
273 return d.addCallback(check_slave_calls)
274
275 def test_recover_waiting_slave_with_bad_id(self):
276 # If a slave is WAITING with a build for us to get, and the build
277 # cookie cannot be verified, which means we don't recognize the build,
278 # then rescueBuilderIfLost should attempt to abort it, so that the
279 # builder is reset for a new build, and the corrupt build is
280 # discarded.
281 waiting_slave = WaitingSlave()
282 builder = MockBuilder("mock_builder", waiting_slave, CorruptBehavior())
283 d = builder.rescueIfLost()
284 def check_slave_calls(ignored):
285 self.assertNotIn('abort', waiting_slave.call_log)
286 self.assertIn('clean', waiting_slave.call_log)
287 return d.addCallback(check_slave_calls)
288
289 def test_recover_building_slave_with_good_id(self):
290 # rescueIfLost does not attempt to abort or clean a builder that is
291 # BUILDING.
292 building_slave = BuildingSlave()
293 builder = MockBuilder("mock_builder", building_slave, TrivialBehavior())
294 d = builder.rescueIfLost()
295 def check_slave_calls(ignored):
296 self.assertNotIn('abort', building_slave.call_log)
297 self.assertNotIn('clean', building_slave.call_log)
298 return d.addCallback(check_slave_calls)
299
300 def test_recover_building_slave_with_bad_id(self):
301 # If a slave is BUILDING with a build id we don't recognize, then we
302 # abort the build, thus stopping it in its tracks.
303 building_slave = BuildingSlave()
304 builder = MockBuilder("mock_builder", building_slave, CorruptBehavior())
305 d = builder.rescueIfLost()
306 def check_slave_calls(ignored):
307 self.assertIn('abort', building_slave.call_log)
308 self.assertNotIn('clean', building_slave.call_log)
309 return d.addCallback(check_slave_calls)
310
311
312class TestBuilderSlaveStatus(TestBuilderWithTrial):
313
314 # Verify what IBuilder.slaveStatus returns with slaves in different
315 # states.
316
317 def assertStatus(self, slave, builder_status=None,
318 build_status=None, logtail=False, filemap=None,
319 dependencies=None):
320 builder = self.factory.makeBuilder()
321 builder.setSlaveForTesting(slave)
322 d = builder.slaveStatus()
323
324 def got_status(status_dict):
325 expected = {}
326 if builder_status is not None:
327 expected["builder_status"] = builder_status
328 if build_status is not None:
329 expected["build_status"] = build_status
330 if dependencies is not None:
331 expected["dependencies"] = dependencies
332
333 # We don't care so much about the content of the logtail,
334 # just that it's there.
335 if logtail:
336 tail = status_dict.pop("logtail")
337 self.assertIsInstance(tail, xmlrpclib.Binary)
338
339 self.assertEqual(expected, status_dict)
340
341 return d.addCallback(got_status)
342
343 def test_slaveStatus_idle_slave(self):
344 self.assertStatus(
345 OkSlave(), builder_status='BuilderStatus.IDLE')
346
347 def test_slaveStatus_building_slave(self):
348 self.assertStatus(
349 BuildingSlave(), builder_status='BuilderStatus.BUILDING',
350 logtail=True)
351
352 def test_slaveStatus_waiting_slave(self):
353 self.assertStatus(
354 WaitingSlave(), builder_status='BuilderStatus.WAITING',
355 build_status='BuildStatus.OK', filemap={})
356
357 def test_slaveStatus_aborting_slave(self):
358 self.assertStatus(
359 AbortingSlave(), builder_status='BuilderStatus.ABORTING')
360
361 def test_slaveStatus_aborted_slave(self):
362 self.assertStatus(
363 AbortedSlave(), builder_status='BuilderStatus.ABORTED')
364
365 def test_isAvailable_with_not_builderok(self):
366 # isAvailable() is a wrapper around slaveStatusSentence()
367 builder = self.factory.makeBuilder()
368 builder.builderok = False
369 d = builder.isAvailable()
370 return d.addCallback(self.assertFalse)
371
372 def test_isAvailable_with_slave_fault(self):
373 builder = self.factory.makeBuilder()
374 builder.setSlaveForTesting(BrokenSlave())
375 d = builder.isAvailable()
376 return d.addCallback(self.assertFalse)
377
378 def test_isAvailable_with_slave_idle(self):
379 builder = self.factory.makeBuilder()
380 builder.setSlaveForTesting(OkSlave())
381 d = builder.isAvailable()
382 return d.addCallback(self.assertTrue)
383158
384159
385class TestFindBuildCandidateBase(TestCaseWithFactory):160class TestFindBuildCandidateBase(TestCaseWithFactory):
@@ -413,49 +188,6 @@
413 builder.manual = False188 builder.manual = False
414189
415190
416class TestFindBuildCandidateGeneralCases(TestFindBuildCandidateBase):
417 # Test usage of findBuildCandidate not specific to any archive type.
418
419 def test_findBuildCandidate_supersedes_builds(self):
420 # IBuilder._findBuildCandidate identifies if there are builds
421 # for superseded source package releases in the queue and marks
422 # the corresponding build record as SUPERSEDED.
423 archive = self.factory.makeArchive()
424 self.publisher.getPubSource(
425 sourcename="gedit", status=PackagePublishingStatus.PUBLISHED,
426 archive=archive).createMissingBuilds()
427 old_candidate = removeSecurityProxy(
428 self.frog_builder)._findBuildCandidate()
429
430 # The candidate starts off as NEEDSBUILD:
431 build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(
432 old_candidate)
433 self.assertEqual(BuildStatus.NEEDSBUILD, build.status)
434
435 # Now supersede the source package:
436 publication = build.current_source_publication
437 publication.status = PackagePublishingStatus.SUPERSEDED
438
439 # The candidate returned is now a different one:
440 new_candidate = removeSecurityProxy(
441 self.frog_builder)._findBuildCandidate()
442 self.assertNotEqual(new_candidate, old_candidate)
443
444 # And the old_candidate is superseded:
445 self.assertEqual(BuildStatus.SUPERSEDED, build.status)
446
447 def test_acquireBuildCandidate_marks_building(self):
448 # acquireBuildCandidate() should call _findBuildCandidate and
449 # mark the build as building.
450 archive = self.factory.makeArchive()
451 self.publisher.getPubSource(
452 sourcename="gedit", status=PackagePublishingStatus.PUBLISHED,
453 archive=archive).createMissingBuilds()
454 candidate = removeSecurityProxy(
455 self.frog_builder).acquireBuildCandidate()
456 self.assertEqual(JobStatus.RUNNING, candidate.job.status)
457
458
459class TestFindBuildCandidatePPAWithSingleBuilder(TestCaseWithFactory):191class TestFindBuildCandidatePPAWithSingleBuilder(TestCaseWithFactory):
460192
461 layer = LaunchpadZopelessLayer193 layer = LaunchpadZopelessLayer
@@ -588,16 +320,6 @@
588 build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(next_job)320 build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(next_job)
589 self.failUnlessEqual('joesppa', build.archive.name)321 self.failUnlessEqual('joesppa', build.archive.name)
590322
591 def test_findBuildCandidate_with_disabled_archive(self):
592 # Disabled archives should not be considered for dispatching
593 # builds.
594 disabled_job = removeSecurityProxy(self.builder4)._findBuildCandidate()
595 build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(
596 disabled_job)
597 build.archive.disable()
598 next_job = removeSecurityProxy(self.builder4)._findBuildCandidate()
599 self.assertNotEqual(disabled_job, next_job)
600
601323
602class TestFindBuildCandidatePrivatePPA(TestFindBuildCandidatePPABase):324class TestFindBuildCandidatePrivatePPA(TestFindBuildCandidatePPABase):
603325
@@ -610,14 +332,6 @@
610 build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(next_job)332 build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(next_job)
611 self.failUnlessEqual('joesppa', build.archive.name)333 self.failUnlessEqual('joesppa', build.archive.name)
612334
613 # If the source for the build is still pending, it won't be
614 # dispatched because the builder has to fetch the source files
615 # from the (password protected) repo area, not the librarian.
616 pub = build.current_source_publication
617 pub.status = PackagePublishingStatus.PENDING
618 candidate = removeSecurityProxy(self.builder4)._findBuildCandidate()
619 self.assertNotEqual(next_job.id, candidate.id)
620
621335
622class TestFindBuildCandidateDistroArchive(TestFindBuildCandidateBase):336class TestFindBuildCandidateDistroArchive(TestFindBuildCandidateBase):
623337
@@ -760,48 +474,97 @@
760 self.builder.current_build_behavior, BinaryPackageBuildBehavior)474 self.builder.current_build_behavior, BinaryPackageBuildBehavior)
761475
762476
763class TestSlave(TrialTestCase):477class TestSlave(TestCase):
764 """478 """
765 Integration tests for BuilderSlave that verify how it works against a479 Integration tests for BuilderSlave that verify how it works against a
766 real slave server.480 real slave server.
767 """481 """
768482
769 layer = TwistedLayer
770
771 def setUp(self):
772 super(TestSlave, self).setUp()
773 self.slave_helper = SlaveTestHelpers()
774 self.slave_helper.setUp()
775 self.addCleanup(self.slave_helper.cleanUp)
776
777 # XXX: JonathanLange 2010-09-20 bug=643521: There are also tests for483 # XXX: JonathanLange 2010-09-20 bug=643521: There are also tests for
778 # BuilderSlave in buildd-slave.txt and in other places. The tests here484 # BuilderSlave in buildd-slave.txt and in other places. The tests here
779 # ought to become the canonical tests for BuilderSlave vs running buildd485 # ought to become the canonical tests for BuilderSlave vs running buildd
780 # XML-RPC server interaction.486 # XML-RPC server interaction.
781487
488 # The URL for the XML-RPC service set up by `BuilddSlaveTestSetup`.
489 TEST_URL = 'http://localhost:8221/rpc/'
490
491 def getServerSlave(self):
492 """Set up a test build slave server.
493
494 :return: A `BuilddSlaveTestSetup` object.
495 """
496 tachandler = BuilddSlaveTestSetup()
497 tachandler.setUp()
498 self.addCleanup(tachandler.tearDown)
499 def addLogFile(exc_info):
500 self.addDetail(
501 'xmlrpc-log-file',
502 Content(UTF8_TEXT, lambda: open(tachandler.logfile, 'r').read()))
503 self.addOnException(addLogFile)
504 return tachandler
505
506 def getClientSlave(self):
507 """Return a `BuilderSlave` for use in testing.
508
509 Points to a fixed URL that is also used by `BuilddSlaveTestSetup`.
510 """
511 return BuilderSlave.makeBlockingSlave(self.TEST_URL, 'vmhost')
512
513 def makeCacheFile(self, tachandler, filename):
514 """Make a cache file available on the remote slave.
515
516 :param tachandler: The TacTestSetup object used to start the remote
517 slave.
518 :param filename: The name of the file to create in the file cache
519 area.
520 """
521 path = os.path.join(tachandler.root, 'filecache', filename)
522 fd = open(path, 'w')
523 fd.write('something')
524 fd.close()
525 self.addCleanup(os.unlink, path)
526
527 def triggerGoodBuild(self, slave, build_id=None):
528 """Trigger a good build on 'slave'.
529
530 :param slave: A `BuilderSlave` instance to trigger the build on.
531 :param build_id: The build identifier. If not specified, defaults to
532 an arbitrary string.
533 :type build_id: str
534 :return: The build id returned by the slave.
535 """
536 if build_id is None:
537 build_id = self.getUniqueString()
538 tachandler = self.getServerSlave()
539 chroot_file = 'fake-chroot'
540 dsc_file = 'thing'
541 self.makeCacheFile(tachandler, chroot_file)
542 self.makeCacheFile(tachandler, dsc_file)
543 return slave.build(
544 build_id, 'debian', chroot_file, {'.dsc': dsc_file},
545 {'ogrecomponent': 'main'})
546
782 # XXX 2010-10-06 Julian bug=655559547 # XXX 2010-10-06 Julian bug=655559
783 # This is failing on buildbot but not locally; it's trying to abort548 # This is failing on buildbot but not locally; it's trying to abort
784 # before the build has started.549 # before the build has started.
785 def disabled_test_abort(self):550 def disabled_test_abort(self):
786 slave = self.slave_helper.getClientSlave()551 slave = self.getClientSlave()
787 # We need to be in a BUILDING state before we can abort.552 # We need to be in a BUILDING state before we can abort.
788 d = self.slave_helper.triggerGoodBuild(slave)553 self.triggerGoodBuild(slave)
789 d.addCallback(lambda ignored: slave.abort())554 result = slave.abort()
790 d.addCallback(self.assertEqual, BuilderStatus.ABORTING)555 self.assertEqual(result, BuilderStatus.ABORTING)
791 return d
792556
793 def test_build(self):557 def test_build(self):
794 # Calling 'build' with an expected builder type, a good build id,558 # Calling 'build' with an expected builder type, a good build id,
795 # valid chroot & filemaps works and returns a BuilderStatus of559 # valid chroot & filemaps works and returns a BuilderStatus of
796 # BUILDING.560 # BUILDING.
797 build_id = 'some-id'561 build_id = 'some-id'
798 slave = self.slave_helper.getClientSlave()562 slave = self.getClientSlave()
799 d = self.slave_helper.triggerGoodBuild(slave, build_id)563 result = self.triggerGoodBuild(slave, build_id)
800 return d.addCallback(564 self.assertEqual([BuilderStatus.BUILDING, build_id], result)
801 self.assertEqual, [BuilderStatus.BUILDING, build_id])
802565
803 def test_clean(self):566 def test_clean(self):
804 slave = self.slave_helper.getClientSlave()567 slave = self.getClientSlave()
805 # XXX: JonathanLange 2010-09-21: Calling clean() on the slave requires568 # XXX: JonathanLange 2010-09-21: Calling clean() on the slave requires
806 # it to be in either the WAITING or ABORTED states, and both of these569 # it to be in either the WAITING or ABORTED states, and both of these
807 # states are very difficult to achieve in a test environment. For the570 # states are very difficult to achieve in a test environment. For the
@@ -811,248 +574,57 @@
811 def test_echo(self):574 def test_echo(self):
812 # Calling 'echo' contacts the server which returns the arguments we575 # Calling 'echo' contacts the server which returns the arguments we
813 # gave it.576 # gave it.
814 self.slave_helper.getServerSlave()577 self.getServerSlave()
815 slave = self.slave_helper.getClientSlave()578 slave = self.getClientSlave()
816 d = slave.echo('foo', 'bar', 42)579 result = slave.echo('foo', 'bar', 42)
817 return d.addCallback(self.assertEqual, ['foo', 'bar', 42])580 self.assertEqual(['foo', 'bar', 42], result)
818581
819 def test_info(self):582 def test_info(self):
820 # Calling 'info' gets some information about the slave.583 # Calling 'info' gets some information about the slave.
821 self.slave_helper.getServerSlave()584 self.getServerSlave()
822 slave = self.slave_helper.getClientSlave()585 slave = self.getClientSlave()
823 d = slave.info()586 result = slave.info()
824 # We're testing the hard-coded values, since the version is hard-coded587 # We're testing the hard-coded values, since the version is hard-coded
825 # into the remote slave, the supported build managers are hard-coded588 # into the remote slave, the supported build managers are hard-coded
826 # into the tac file for the remote slave and config is returned from589 # into the tac file for the remote slave and config is returned from
827 # the configuration file.590 # the configuration file.
828 return d.addCallback(591 self.assertEqual(
829 self.assertEqual,
830 ['1.0',592 ['1.0',
831 'i386',593 'i386',
832 ['sourcepackagerecipe',594 ['sourcepackagerecipe',
833 'translation-templates', 'binarypackage', 'debian']])595 'translation-templates', 'binarypackage', 'debian']],
596 result)
834597
835 def test_initial_status(self):598 def test_initial_status(self):
836 # Calling 'status' returns the current status of the slave. The599 # Calling 'status' returns the current status of the slave. The
837 # initial status is IDLE.600 # initial status is IDLE.
838 self.slave_helper.getServerSlave()601 self.getServerSlave()
839 slave = self.slave_helper.getClientSlave()602 slave = self.getClientSlave()
840 d = slave.status()603 status = slave.status()
841 return d.addCallback(self.assertEqual, [BuilderStatus.IDLE, ''])604 self.assertEqual([BuilderStatus.IDLE, ''], status)
842605
843 def test_status_after_build(self):606 def test_status_after_build(self):
844 # Calling 'status' returns the current status of the slave. After a607 # Calling 'status' returns the current status of the slave. After a
845 # build has been triggered, the status is BUILDING.608 # build has been triggered, the status is BUILDING.
846 slave = self.slave_helper.getClientSlave()609 slave = self.getClientSlave()
847 build_id = 'status-build-id'610 build_id = 'status-build-id'
848 d = self.slave_helper.triggerGoodBuild(slave, build_id)611 self.triggerGoodBuild(slave, build_id)
849 d.addCallback(lambda ignored: slave.status())612 status = slave.status()
850 def check_status(status):613 self.assertEqual([BuilderStatus.BUILDING, build_id], status[:2])
851 self.assertEqual([BuilderStatus.BUILDING, build_id], status[:2])614 [log_file] = status[2:]
852 [log_file] = status[2:]615 self.assertIsInstance(log_file, xmlrpclib.Binary)
853 self.assertIsInstance(log_file, xmlrpclib.Binary)
854 return d.addCallback(check_status)
855616
856 def test_ensurepresent_not_there(self):617 def test_ensurepresent_not_there(self):
857 # ensurepresent checks to see if a file is there.618 # ensurepresent checks to see if a file is there.
858 self.slave_helper.getServerSlave()619 self.getServerSlave()
859 slave = self.slave_helper.getClientSlave()620 slave = self.getClientSlave()
860 d = slave.ensurepresent('blahblah', None, None, None)621 result = slave.ensurepresent('blahblah', None, None, None)
861 d.addCallback(self.assertEqual, [False, 'No URL'])622 self.assertEqual([False, 'No URL'], result)
862 return d
863623
864 def test_ensurepresent_actually_there(self):624 def test_ensurepresent_actually_there(self):
865 # ensurepresent checks to see if a file is there.625 # ensurepresent checks to see if a file is there.
866 tachandler = self.slave_helper.getServerSlave()626 tachandler = self.getServerSlave()
867 slave = self.slave_helper.getClientSlave()627 slave = self.getClientSlave()
868 self.slave_helper.makeCacheFile(tachandler, 'blahblah')628 self.makeCacheFile(tachandler, 'blahblah')
869 d = slave.ensurepresent('blahblah', None, None, None)629 result = slave.ensurepresent('blahblah', None, None, None)
870 d.addCallback(self.assertEqual, [True, 'No URL'])630 self.assertEqual([True, 'No URL'], result)
871 return d
872
873 def test_sendFileToSlave_not_there(self):
874 self.slave_helper.getServerSlave()
875 slave = self.slave_helper.getClientSlave()
876 d = slave.sendFileToSlave('blahblah', None, None, None)
877 return self.assertFailure(d, CannotFetchFile)
878
879 def test_sendFileToSlave_actually_there(self):
880 tachandler = self.slave_helper.getServerSlave()
881 slave = self.slave_helper.getClientSlave()
882 self.slave_helper.makeCacheFile(tachandler, 'blahblah')
883 d = slave.sendFileToSlave('blahblah', None, None, None)
884 def check_present(ignored):
885 d = slave.ensurepresent('blahblah', None, None, None)
886 return d.addCallback(self.assertEqual, [True, 'No URL'])
887 d.addCallback(check_present)
888 return d
889
890 def test_resumeHost_success(self):
891 # On a successful resume resume() fires the returned deferred
892 # callback with 'None'.
893 self.slave_helper.getServerSlave()
894 slave = self.slave_helper.getClientSlave()
895
896 # The configuration testing command-line.
897 self.assertEqual(
898 'echo %(vm_host)s', config.builddmaster.vm_resume_command)
899
900 # On success the response is None.
901 def check_resume_success(response):
902 out, err, code = response
903 self.assertEqual(os.EX_OK, code)
904 # XXX: JonathanLange 2010-09-23: We should instead pass the
905 # expected vm_host into the client slave. Not doing this now,
906 # since the SlaveHelper is being moved around.
907 self.assertEqual("%s\n" % slave._vm_host, out)
908 d = slave.resume()
909 d.addBoth(check_resume_success)
910 return d
911
912 def test_resumeHost_failure(self):
913 # On a failed resume, 'resumeHost' fires the returned deferred
914 # errorback with the `ProcessTerminated` failure.
915 self.slave_helper.getServerSlave()
916 slave = self.slave_helper.getClientSlave()
917
918 # Override the configuration command-line with one that will fail.
919 failed_config = """
920 [builddmaster]
921 vm_resume_command: test "%(vm_host)s = 'no-sir'"
922 """
923 config.push('failed_resume_command', failed_config)
924 self.addCleanup(config.pop, 'failed_resume_command')
925
926 # On failures, the response is a twisted `Failure` object containing
927 # a tuple.
928 def check_resume_failure(failure):
929 out, err, code = failure.value
930 # The process will exit with a return code of "1".
931 self.assertEqual(code, 1)
932 d = slave.resume()
933 d.addBoth(check_resume_failure)
934 return d
935
936 def test_resumeHost_timeout(self):
937 # On a resume timeouts, 'resumeHost' fires the returned deferred
938 # errorback with the `TimeoutError` failure.
939 self.slave_helper.getServerSlave()
940 slave = self.slave_helper.getClientSlave()
941
942 # Override the configuration command-line with one that will timeout.
943 timeout_config = """
944 [builddmaster]
945 vm_resume_command: sleep 5
946 socket_timeout: 1
947 """
948 config.push('timeout_resume_command', timeout_config)
949 self.addCleanup(config.pop, 'timeout_resume_command')
950
951 # On timeouts, the response is a twisted `Failure` object containing
952 # a `TimeoutError` error.
953 def check_resume_timeout(failure):
954 self.assertIsInstance(failure, Failure)
955 out, err, code = failure.value
956 self.assertEqual(code, signal.SIGKILL)
957 clock = Clock()
958 d = slave.resume(clock=clock)
959 # Move the clock beyond the socket_timeout but earlier than the
960 # sleep 5. This stops the test having to wait for the timeout.
961 # Fast tests FTW!
962 clock.advance(2)
963 d.addBoth(check_resume_timeout)
964 return d
965
966
967class TestSlaveTimeouts(TrialTestCase):
968 # Testing that the methods that call callRemote() all time out
969 # as required.
970
971 layer = TwistedLayer
972
973 def setUp(self):
974 super(TestSlaveTimeouts, self).setUp()
975 self.slave_helper = SlaveTestHelpers()
976 self.slave_helper.setUp()
977 self.addCleanup(self.slave_helper.cleanUp)
978 self.clock = Clock()
979 self.proxy = DeadProxy("url")
980 self.slave = self.slave_helper.getClientSlave(
981 reactor=self.clock, proxy=self.proxy)
982
983 def assertCancelled(self, d):
984 self.clock.advance(config.builddmaster.socket_timeout + 1)
985 return self.assertFailure(d, CancelledError)
986
987 def test_timeout_abort(self):
988 return self.assertCancelled(self.slave.abort())
989
990 def test_timeout_clean(self):
991 return self.assertCancelled(self.slave.clean())
992
993 def test_timeout_echo(self):
994 return self.assertCancelled(self.slave.echo())
995
996 def test_timeout_info(self):
997 return self.assertCancelled(self.slave.info())
998
999 def test_timeout_status(self):
1000 return self.assertCancelled(self.slave.status())
1001
1002 def test_timeout_ensurepresent(self):
1003 return self.assertCancelled(
1004 self.slave.ensurepresent(None, None, None, None))
1005
1006 def test_timeout_build(self):
1007 return self.assertCancelled(
1008 self.slave.build(None, None, None, None, None))
1009
1010
1011class TestSlaveWithLibrarian(TrialTestCase):
1012 """Tests that need more of Launchpad to run."""
1013
1014 layer = TwistedLaunchpadZopelessLayer
1015
1016 def setUp(self):
1017 super(TestSlaveWithLibrarian, self)
1018 self.slave_helper = SlaveTestHelpers()
1019 self.slave_helper.setUp()
1020 self.addCleanup(self.slave_helper.cleanUp)
1021 self.factory = LaunchpadObjectFactory()
1022 login_as(ANONYMOUS)
1023 self.addCleanup(logout)
1024
1025 def test_ensurepresent_librarian(self):
1026 # ensurepresent, when given an http URL for a file will download the
1027 # file from that URL and report that the file is present, and it was
1028 # downloaded.
1029
1030 # Use the Librarian because it's a "convenient" web server.
1031 lf = self.factory.makeLibraryFileAlias(
1032 'HelloWorld.txt', content="Hello World")
1033 self.layer.txn.commit()
1034 self.slave_helper.getServerSlave()
1035 slave = self.slave_helper.getClientSlave()
1036 d = slave.ensurepresent(
1037 lf.content.sha1, lf.http_url, "", "")
1038 d.addCallback(self.assertEqual, [True, 'Download'])
1039 return d
1040
1041 def test_retrieve_files_from_filecache(self):
1042 # Files that are present on the slave can be downloaded with a
1043 # filename made from the sha1 of the content underneath the
1044 # 'filecache' directory.
1045 content = "Hello World"
1046 lf = self.factory.makeLibraryFileAlias(
1047 'HelloWorld.txt', content=content)
1048 self.layer.txn.commit()
1049 expected_url = '%s/filecache/%s' % (
1050 self.slave_helper.BASE_URL, lf.content.sha1)
1051 self.slave_helper.getServerSlave()
1052 slave = self.slave_helper.getClientSlave()
1053 d = slave.ensurepresent(
1054 lf.content.sha1, lf.http_url, "", "")
1055 def check_file(ignored):
1056 d = getPage(expected_url.encode('utf8'))
1057 return d.addCallback(self.assertEqual, content)
1058 return d.addCallback(check_file)
1059631
=== modified file 'lib/lp/buildmaster/tests/test_manager.py'
--- lib/lp/buildmaster/tests/test_manager.py 2010-10-19 13:58:21 +0000
+++ lib/lp/buildmaster/tests/test_manager.py 2010-12-07 16:29:13 +0000
@@ -6,7 +6,6 @@
6import os6import os
7import signal7import signal
8import time8import time
9import xmlrpclib
109
11import transaction10import transaction
1211
@@ -15,7 +14,9 @@
15 reactor,14 reactor,
16 task,15 task,
17 )16 )
17from twisted.internet.error import ConnectionClosed
18from twisted.internet.task import (18from twisted.internet.task import (
19 Clock,
19 deferLater,20 deferLater,
20 )21 )
21from twisted.python.failure import Failure22from twisted.python.failure import Failure
@@ -29,45 +30,577 @@
29 ANONYMOUS,30 ANONYMOUS,
30 login,31 login,
31 )32 )
32from canonical.launchpad.scripts.logger import (33from canonical.launchpad.scripts.logger import BufferLogger
33 QuietFakeLogger,
34 )
35from canonical.testing.layers import (34from canonical.testing.layers import (
36 LaunchpadScriptLayer,35 LaunchpadScriptLayer,
37 TwistedLaunchpadZopelessLayer,36 LaunchpadZopelessLayer,
38 TwistedLayer,37 TwistedLayer,
39 ZopelessDatabaseLayer,
40 )38 )
41from lp.buildmaster.enums import BuildStatus39from lp.buildmaster.enums import BuildStatus
42from lp.buildmaster.interfaces.builder import IBuilderSet40from lp.buildmaster.interfaces.builder import IBuilderSet
43from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet41from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet
44from lp.buildmaster.manager import (42from lp.buildmaster.manager import (
45 assessFailureCounts,43 BaseDispatchResult,
44 buildd_success_result_map,
46 BuilddManager,45 BuilddManager,
46 FailDispatchResult,
47 NewBuildersScanner,47 NewBuildersScanner,
48 RecordingSlave,
49 ResetDispatchResult,
48 SlaveScanner,50 SlaveScanner,
49 )51 )
50from lp.buildmaster.model.builder import Builder
51from lp.buildmaster.tests.harness import BuilddManagerTestSetup52from lp.buildmaster.tests.harness import BuilddManagerTestSetup
52from lp.buildmaster.tests.mock_slaves import (53from lp.buildmaster.tests.mock_slaves import BuildingSlave
53 BrokenSlave,
54 BuildingSlave,
55 OkSlave,
56 )
57from lp.registry.interfaces.distribution import IDistributionSet54from lp.registry.interfaces.distribution import IDistributionSet
58from lp.soyuz.interfaces.binarypackagebuild import IBinaryPackageBuildSet55from lp.soyuz.interfaces.binarypackagebuild import IBinaryPackageBuildSet
59from lp.testing import TestCaseWithFactory56from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
57from lp.testing import TestCase as LaunchpadTestCase
60from lp.testing.factory import LaunchpadObjectFactory58from lp.testing.factory import LaunchpadObjectFactory
61from lp.testing.fakemethod import FakeMethod59from lp.testing.fakemethod import FakeMethod
62from lp.testing.sampledata import BOB_THE_BUILDER_NAME60from lp.testing.sampledata import BOB_THE_BUILDER_NAME
6361
6462
63class TestRecordingSlaves(TrialTestCase):
64 """Tests for the recording slave class."""
65 layer = TwistedLayer
66
67 def setUp(self):
68 """Setup a fresh `RecordingSlave` for tests."""
69 TrialTestCase.setUp(self)
70 self.slave = RecordingSlave(
71 'foo', 'http://foo:8221/rpc', 'foo.host')
72
73 def test_representation(self):
74 """`RecordingSlave` has a custom representation.
75
76 It encloses builder name and xmlrpc url for debug purposes.
77 """
78 self.assertEqual('<foo:http://foo:8221/rpc>', repr(self.slave))
79
80 def assert_ensurepresent(self, func):
81 """Helper function to test results from calling ensurepresent."""
82 self.assertEqual(
83 [True, 'Download'],
84 func('boing', 'bar', 'baz'))
85 self.assertEqual(
86 [('ensurepresent', ('boing', 'bar', 'baz'))],
87 self.slave.calls)
88
89 def test_ensurepresent(self):
90 """`RecordingSlave.ensurepresent` always succeeds.
91
92 It returns the expected succeed code and records the interaction
93 information for later use.
94 """
95 self.assert_ensurepresent(self.slave.ensurepresent)
96
97 def test_sendFileToSlave(self):
98 """RecordingSlave.sendFileToSlave always succeeeds.
99
100 It calls ensurepresent() and hence returns the same results.
101 """
102 self.assert_ensurepresent(self.slave.sendFileToSlave)
103
104 def test_build(self):
105 """`RecordingSlave.build` always succeeds.
106
107 It returns the expected succeed code and records the interaction
108 information for later use.
109 """
110 self.assertEqual(
111 ['BuilderStatus.BUILDING', 'boing'],
112 self.slave.build('boing', 'bar', 'baz'))
113 self.assertEqual(
114 [('build', ('boing', 'bar', 'baz'))],
115 self.slave.calls)
116
117 def test_resume(self):
118 """`RecordingSlave.resume` always returns successs."""
119 # Resume isn't requested in a just-instantiated RecordingSlave.
120 self.assertFalse(self.slave.resume_requested)
121
122 # When resume is called, it returns the success list and mark
123 # the slave for resuming.
124 self.assertEqual(['', '', os.EX_OK], self.slave.resume())
125 self.assertTrue(self.slave.resume_requested)
126
127 def test_resumeHost_success(self):
128 # On a successful resume resumeHost() fires the returned deferred
129 # callback with 'None'.
130
131 # The configuration testing command-line.
132 self.assertEqual(
133 'echo %(vm_host)s', config.builddmaster.vm_resume_command)
134
135 # On success the response is None.
136 def check_resume_success(response):
137 out, err, code = response
138 self.assertEqual(os.EX_OK, code)
139 self.assertEqual("%s\n" % self.slave.vm_host, out)
140 d = self.slave.resumeSlave()
141 d.addBoth(check_resume_success)
142 return d
143
144 def test_resumeHost_failure(self):
145 # On a failed resume, 'resumeHost' fires the returned deferred
146 # errorback with the `ProcessTerminated` failure.
147
148 # Override the configuration command-line with one that will fail.
149 failed_config = """
150 [builddmaster]
151 vm_resume_command: test "%(vm_host)s = 'no-sir'"
152 """
153 config.push('failed_resume_command', failed_config)
154 self.addCleanup(config.pop, 'failed_resume_command')
155
156 # On failures, the response is a twisted `Failure` object containing
157 # a tuple.
158 def check_resume_failure(failure):
159 out, err, code = failure.value
160 # The process will exit with a return code of "1".
161 self.assertEqual(code, 1)
162 d = self.slave.resumeSlave()
163 d.addBoth(check_resume_failure)
164 return d
165
166 def test_resumeHost_timeout(self):
167 # On a resume timeouts, 'resumeHost' fires the returned deferred
168 # errorback with the `TimeoutError` failure.
169
170 # Override the configuration command-line with one that will timeout.
171 timeout_config = """
172 [builddmaster]
173 vm_resume_command: sleep 5
174 socket_timeout: 1
175 """
176 config.push('timeout_resume_command', timeout_config)
177 self.addCleanup(config.pop, 'timeout_resume_command')
178
179 # On timeouts, the response is a twisted `Failure` object containing
180 # a `TimeoutError` error.
181 def check_resume_timeout(failure):
182 self.assertIsInstance(failure, Failure)
183 out, err, code = failure.value
184 self.assertEqual(code, signal.SIGKILL)
185 clock = Clock()
186 d = self.slave.resumeSlave(clock=clock)
187 # Move the clock beyond the socket_timeout but earlier than the
188 # sleep 5. This stops the test having to wait for the timeout.
189 # Fast tests FTW!
190 clock.advance(2)
191 d.addBoth(check_resume_timeout)
192 return d
193
194
195class TestingXMLRPCProxy:
196 """This class mimics a twisted XMLRPC Proxy class."""
197
198 def __init__(self, failure_info=None):
199 self.calls = []
200 self.failure_info = failure_info
201 self.works = failure_info is None
202
203 def callRemote(self, *args):
204 self.calls.append(args)
205 if self.works:
206 result = buildd_success_result_map.get(args[0])
207 else:
208 result = 'boing'
209 return defer.succeed([result, self.failure_info])
210
211
212class TestingResetDispatchResult(ResetDispatchResult):
213 """Override the evaluation method to simply annotate the call."""
214
215 def __init__(self, slave, info=None):
216 ResetDispatchResult.__init__(self, slave, info)
217 self.processed = False
218
219 def __call__(self):
220 self.processed = True
221
222
223class TestingFailDispatchResult(FailDispatchResult):
224 """Override the evaluation method to simply annotate the call."""
225
226 def __init__(self, slave, info=None):
227 FailDispatchResult.__init__(self, slave, info)
228 self.processed = False
229
230 def __call__(self):
231 self.processed = True
232
233
234class TestingSlaveScanner(SlaveScanner):
235 """Override the dispatch result factories """
236
237 reset_result = TestingResetDispatchResult
238 fail_result = TestingFailDispatchResult
239
240
241class TestSlaveScanner(TrialTestCase):
242 """Tests for the actual build slave manager."""
243 layer = LaunchpadZopelessLayer
244
245 def setUp(self):
246 TrialTestCase.setUp(self)
247 self.manager = TestingSlaveScanner(
248 BOB_THE_BUILDER_NAME, BufferLogger())
249
250 self.fake_builder_url = 'http://bob.buildd:8221/'
251 self.fake_builder_host = 'bob.host'
252
253 # We will use an instrumented SlaveScanner instance for tests in
254 # this context.
255
256 # Stop cyclic execution and record the end of the cycle.
257 self.stopped = False
258
259 def testNextCycle():
260 self.stopped = True
261
262 self.manager.scheduleNextScanCycle = testNextCycle
263
264 # Return the testing Proxy version.
265 self.test_proxy = TestingXMLRPCProxy()
266
267 def testGetProxyForSlave(slave):
268 return self.test_proxy
269 self.manager._getProxyForSlave = testGetProxyForSlave
270
271 # Deactivate the 'scan' method.
272 def testScan():
273 pass
274 self.manager.scan = testScan
275
276 # Stop automatic collection of dispatching results.
277 def testslaveConversationEnded():
278 pass
279 self._realslaveConversationEnded = self.manager.slaveConversationEnded
280 self.manager.slaveConversationEnded = testslaveConversationEnded
281
282 def assertIsDispatchReset(self, result):
283 self.assertTrue(
284 isinstance(result, TestingResetDispatchResult),
285 'Dispatch failure did not result in a ResetBuildResult object')
286
287 def assertIsDispatchFail(self, result):
288 self.assertTrue(
289 isinstance(result, TestingFailDispatchResult),
290 'Dispatch failure did not result in a FailBuildResult object')
291
292 def test_checkResume(self):
293 """`SlaveScanner.checkResume` is chained after resume requests.
294
295 If the resume request succeed it returns None, otherwise it returns
296 a `ResetBuildResult` (the one in the test context) that will be
297 collect and evaluated later.
298
299 See `RecordingSlave.resumeHost` for more information about the resume
300 result contents.
301 """
302 slave = RecordingSlave('foo', 'http://foo.buildd:8221/', 'foo.host')
303
304 successful_response = ['', '', os.EX_OK]
305 result = self.manager.checkResume(successful_response, slave)
306 self.assertEqual(
307 None, result, 'Successful resume checks should return None')
308
309 failed_response = ['stdout', 'stderr', 1]
310 result = self.manager.checkResume(failed_response, slave)
311 self.assertIsDispatchReset(result)
312 self.assertEqual(
313 '<foo:http://foo.buildd:8221/> reset failure', repr(result))
314 self.assertEqual(
315 result.info, "stdout\nstderr")
316
317 def test_fail_to_resume_slave_resets_slave(self):
318 # If an attempt to resume and dispatch a slave fails, we reset the
319 # slave by calling self.reset_result(slave)().
320
321 reset_result_calls = []
322
323 class LoggingResetResult(BaseDispatchResult):
324 """A DispatchResult that logs calls to itself.
325
326 This *must* subclass BaseDispatchResult, otherwise finishCycle()
327 won't treat it like a dispatch result.
328 """
329
330 def __init__(self, slave, info=None):
331 self.slave = slave
332
333 def __call__(self):
334 reset_result_calls.append(self.slave)
335
336 # Make a failing slave that is requesting a resume.
337 slave = RecordingSlave('foo', 'http://foo.buildd:8221/', 'foo.host')
338 slave.resume_requested = True
339 slave.resumeSlave = lambda: deferLater(
340 reactor, 0, defer.fail, Failure(('out', 'err', 1)))
341
342 # Make the manager log the reset result calls.
343 self.manager.reset_result = LoggingResetResult
344
345 # We only care about this one slave. Reset the list of manager
346 # deferreds in case setUp did something unexpected.
347 self.manager._deferred_list = []
348
349 # Here, we're patching the slaveConversationEnded method so we can
350 # get an extra callback at the end of it, so we can
351 # verify that the reset_result was really called.
352 def _slaveConversationEnded():
353 d = self._realslaveConversationEnded()
354 return d.addCallback(
355 lambda ignored: self.assertEqual([slave], reset_result_calls))
356 self.manager.slaveConversationEnded = _slaveConversationEnded
357
358 self.manager.resumeAndDispatch(slave)
359
360 def test_failed_to_resume_slave_ready_for_reset(self):
361 # When a slave fails to resume, the manager has a Deferred in its
362 # Deferred list that is ready to fire with a ResetDispatchResult.
363
364 # Make a failing slave that is requesting a resume.
365 slave = RecordingSlave('foo', 'http://foo.buildd:8221/', 'foo.host')
366 slave.resume_requested = True
367 slave.resumeSlave = lambda: defer.fail(Failure(('out', 'err', 1)))
368
369 # We only care about this one slave. Reset the list of manager
370 # deferreds in case setUp did something unexpected.
371 self.manager._deferred_list = []
372 # Restore the slaveConversationEnded method. It's very relevant to
373 # this test.
374 self.manager.slaveConversationEnded = self._realslaveConversationEnded
375 self.manager.resumeAndDispatch(slave)
376 [d] = self.manager._deferred_list
377
378 # The Deferred for our failing slave should be ready to fire
379 # successfully with a ResetDispatchResult.
380 def check_result(result):
381 self.assertIsInstance(result, ResetDispatchResult)
382 self.assertEqual(slave, result.slave)
383 self.assertFalse(result.processed)
384 return d.addCallback(check_result)
385
386 def _setUpSlaveAndBuilder(self, builder_failure_count=None,
387 job_failure_count=None):
388 # Helper function to set up a builder and its recording slave.
389 if builder_failure_count is None:
390 builder_failure_count = 0
391 if job_failure_count is None:
392 job_failure_count = 0
393 slave = RecordingSlave(
394 BOB_THE_BUILDER_NAME, self.fake_builder_url,
395 self.fake_builder_host)
396 bob_builder = getUtility(IBuilderSet)[slave.name]
397 bob_builder.failure_count = builder_failure_count
398 bob_builder.getCurrentBuildFarmJob().failure_count = job_failure_count
399 return slave, bob_builder
400
401 def test_checkDispatch_success(self):
402 # SlaveScanner.checkDispatch returns None for a successful
403 # dispatch.
404
405 """
406 If the dispatch request fails or a unknown method is given, it
407 returns a `FailDispatchResult` (in the test context) that will
408 be evaluated later.
409
410 Builders will be marked as failed if the following responses
411 categories are received.
412
413 * Legitimate slave failures: when the response is a list with 2
414 elements but the first element ('status') does not correspond to
415 the expected 'success' result. See `buildd_success_result_map`.
416
417 * Unexpected (code) failures: when the given 'method' is unknown
418 or the response isn't a 2-element list or Failure instance.
419
420 Communication failures (a twisted `Failure` instance) will simply
421 cause the builder to be reset, a `ResetDispatchResult` object is
422 returned. In other words, network failures are ignored in this
423 stage, broken builders will be identified and marked as so
424 during 'scan()' stage.
425
426 On success dispatching it returns None.
427 """
428 slave, bob_builder = self._setUpSlaveAndBuilder(
429 builder_failure_count=0, job_failure_count=0)
430
431 # Successful legitimate response, None is returned.
432 successful_response = [
433 buildd_success_result_map.get('ensurepresent'), 'cool builder']
434 result = self.manager.checkDispatch(
435 successful_response, 'ensurepresent', slave)
436 self.assertEqual(
437 None, result, 'Successful dispatch checks should return None')
438
439 def test_checkDispatch_first_fail(self):
440 # Failed legitimate response, results in FailDispatchResult and
441 # failure_count on the job and the builder are both incremented.
442 slave, bob_builder = self._setUpSlaveAndBuilder(
443 builder_failure_count=0, job_failure_count=0)
444
445 failed_response = [False, 'uncool builder']
446 result = self.manager.checkDispatch(
447 failed_response, 'ensurepresent', slave)
448 self.assertIsDispatchFail(result)
449 self.assertEqual(
450 repr(result),
451 '<bob:%s> failure (uncool builder)' % self.fake_builder_url)
452 self.assertEqual(1, bob_builder.failure_count)
453 self.assertEqual(
454 1, bob_builder.getCurrentBuildFarmJob().failure_count)
455
456 def test_checkDispatch_second_reset_fail_by_builder(self):
457 # Twisted Failure response, results in a `FailDispatchResult`.
458 slave, bob_builder = self._setUpSlaveAndBuilder(
459 builder_failure_count=1, job_failure_count=0)
460
461 twisted_failure = Failure(ConnectionClosed('Boom!'))
462 result = self.manager.checkDispatch(
463 twisted_failure, 'ensurepresent', slave)
464 self.assertIsDispatchFail(result)
465 self.assertEqual(
466 '<bob:%s> failure (None)' % self.fake_builder_url, repr(result))
467 self.assertEqual(2, bob_builder.failure_count)
468 self.assertEqual(
469 1, bob_builder.getCurrentBuildFarmJob().failure_count)
470
471 def test_checkDispatch_second_comms_fail_by_builder(self):
472 # Unexpected response, results in a `FailDispatchResult`.
473 slave, bob_builder = self._setUpSlaveAndBuilder(
474 builder_failure_count=1, job_failure_count=0)
475
476 unexpected_response = [1, 2, 3]
477 result = self.manager.checkDispatch(
478 unexpected_response, 'build', slave)
479 self.assertIsDispatchFail(result)
480 self.assertEqual(
481 '<bob:%s> failure '
482 '(Unexpected response: [1, 2, 3])' % self.fake_builder_url,
483 repr(result))
484 self.assertEqual(2, bob_builder.failure_count)
485 self.assertEqual(
486 1, bob_builder.getCurrentBuildFarmJob().failure_count)
487
488 def test_checkDispatch_second_comms_fail_by_job(self):
489 # Unknown method was given, results in a `FailDispatchResult`.
490 # This could be caused by a faulty job which would fail the job.
491 slave, bob_builder = self._setUpSlaveAndBuilder(
492 builder_failure_count=0, job_failure_count=1)
493
494 successful_response = [
495 buildd_success_result_map.get('ensurepresent'), 'cool builder']
496 result = self.manager.checkDispatch(
497 successful_response, 'unknown-method', slave)
498 self.assertIsDispatchFail(result)
499 self.assertEqual(
500 '<bob:%s> failure '
501 '(Unknown slave method: unknown-method)' % self.fake_builder_url,
502 repr(result))
503 self.assertEqual(1, bob_builder.failure_count)
504 self.assertEqual(
505 2, bob_builder.getCurrentBuildFarmJob().failure_count)
506
507 def test_initiateDispatch(self):
508 """Check `dispatchBuild` in various scenarios.
509
510 When there are no recording slaves (i.e. no build got dispatched
511 in scan()) it simply finishes the cycle.
512
513 When there is a recording slave with pending slave calls, they are
514 performed and if they all succeed the cycle is finished with no
515 errors.
516
517 On slave call failure the chain is stopped immediately and an
518 FailDispatchResult is collected while finishing the cycle.
519 """
520 def check_no_events(results):
521 errors = [
522 r for s, r in results if isinstance(r, BaseDispatchResult)]
523 self.assertEqual(0, len(errors))
524
525 def check_events(results):
526 [error] = [r for s, r in results if r is not None]
527 self.assertEqual(
528 '<bob:%s> failure (very broken slave)'
529 % self.fake_builder_url,
530 repr(error))
531 self.assertTrue(error.processed)
532
533 def _wait_on_deferreds_then_check_no_events():
534 dl = self._realslaveConversationEnded()
535 dl.addCallback(check_no_events)
536
537 def _wait_on_deferreds_then_check_events():
538 dl = self._realslaveConversationEnded()
539 dl.addCallback(check_events)
540
541 # A functional slave charged with some interactions.
542 slave = RecordingSlave(
543 BOB_THE_BUILDER_NAME, self.fake_builder_url,
544 self.fake_builder_host)
545 slave.ensurepresent('arg1', 'arg2', 'arg3')
546 slave.build('arg1', 'arg2', 'arg3')
547
548 # If the previous step (resuming) has failed nothing gets dispatched.
549 reset_result = ResetDispatchResult(slave)
550 result = self.manager.initiateDispatch(reset_result, slave)
551 self.assertTrue(result is reset_result)
552 self.assertFalse(slave.resume_requested)
553 self.assertEqual(0, len(self.manager._deferred_list))
554
555 # Operation with the default (funcional slave), no resets or
556 # failures results are triggered.
557 slave.resume()
558 result = self.manager.initiateDispatch(None, slave)
559 self.assertEqual(None, result)
560 self.assertTrue(slave.resume_requested)
561 self.assertEqual(
562 [('ensurepresent', 'arg1', 'arg2', 'arg3'),
563 ('build', 'arg1', 'arg2', 'arg3')],
564 self.test_proxy.calls)
565 self.assertEqual(2, len(self.manager._deferred_list))
566
567 # Monkey patch the slaveConversationEnded method so we can chain a
568 # callback to check the end of the result chain.
569 self.manager.slaveConversationEnded = \
570 _wait_on_deferreds_then_check_no_events
571 events = self.manager.slaveConversationEnded()
572
573 # Create a broken slave and insert interaction that will
574 # cause the builder to be marked as fail.
575 self.test_proxy = TestingXMLRPCProxy('very broken slave')
576 slave = RecordingSlave(
577 BOB_THE_BUILDER_NAME, self.fake_builder_url,
578 self.fake_builder_host)
579 slave.ensurepresent('arg1', 'arg2', 'arg3')
580 slave.build('arg1', 'arg2', 'arg3')
581
582 result = self.manager.initiateDispatch(None, slave)
583 self.assertEqual(None, result)
584 self.assertEqual(3, len(self.manager._deferred_list))
585 self.assertEqual(
586 [('ensurepresent', 'arg1', 'arg2', 'arg3')],
587 self.test_proxy.calls)
588
589 # Monkey patch the slaveConversationEnded method so we can chain a
590 # callback to check the end of the result chain.
591 self.manager.slaveConversationEnded = \
592 _wait_on_deferreds_then_check_events
593 events = self.manager.slaveConversationEnded()
594
595 return events
596
597
65class TestSlaveScannerScan(TrialTestCase):598class TestSlaveScannerScan(TrialTestCase):
66 """Tests `SlaveScanner.scan` method.599 """Tests `SlaveScanner.scan` method.
67600
68 This method uses the old framework for scanning and dispatching builds.601 This method uses the old framework for scanning and dispatching builds.
69 """602 """
70 layer = TwistedLaunchpadZopelessLayer603 layer = LaunchpadZopelessLayer
71604
72 def setUp(self):605 def setUp(self):
73 """Setup TwistedLayer, TrialTestCase and BuilddSlaveTest.606 """Setup TwistedLayer, TrialTestCase and BuilddSlaveTest.
@@ -75,18 +608,19 @@
75 Also adjust the sampledata in a way a build can be dispatched to608 Also adjust the sampledata in a way a build can be dispatched to
76 'bob' builder.609 'bob' builder.
77 """610 """
78 from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
79 TwistedLayer.testSetUp()611 TwistedLayer.testSetUp()
80 TrialTestCase.setUp(self)612 TrialTestCase.setUp(self)
81 self.slave = BuilddSlaveTestSetup()613 self.slave = BuilddSlaveTestSetup()
82 self.slave.setUp()614 self.slave.setUp()
83615
84 # Creating the required chroots needed for dispatching.616 # Creating the required chroots needed for dispatching.
617 login('foo.bar@canonical.com')
85 test_publisher = SoyuzTestPublisher()618 test_publisher = SoyuzTestPublisher()
86 ubuntu = getUtility(IDistributionSet).getByName('ubuntu')619 ubuntu = getUtility(IDistributionSet).getByName('ubuntu')
87 hoary = ubuntu.getSeries('hoary')620 hoary = ubuntu.getSeries('hoary')
88 test_publisher.setUpDefaultDistroSeries(hoary)621 test_publisher.setUpDefaultDistroSeries(hoary)
89 test_publisher.addFakeChroots()622 test_publisher.addFakeChroots()
623 login(ANONYMOUS)
90624
91 def tearDown(self):625 def tearDown(self):
92 self.slave.tearDown()626 self.slave.tearDown()
@@ -94,7 +628,8 @@
94 TwistedLayer.testTearDown()628 TwistedLayer.testTearDown()
95629
96 def _resetBuilder(self, builder):630 def _resetBuilder(self, builder):
97 """Reset the given builder and its job."""631 """Reset the given builder and it's job."""
632 login('foo.bar@canonical.com')
98633
99 builder.builderok = True634 builder.builderok = True
100 job = builder.currentjob635 job = builder.currentjob
@@ -102,6 +637,7 @@
102 job.reset()637 job.reset()
103638
104 transaction.commit()639 transaction.commit()
640 login(ANONYMOUS)
105641
106 def assertBuildingJob(self, job, builder, logtail=None):642 def assertBuildingJob(self, job, builder, logtail=None):
107 """Assert the given job is building on the given builder."""643 """Assert the given job is building on the given builder."""
@@ -117,25 +653,55 @@
117 self.assertEqual(build.status, BuildStatus.BUILDING)653 self.assertEqual(build.status, BuildStatus.BUILDING)
118 self.assertEqual(job.logtail, logtail)654 self.assertEqual(job.logtail, logtail)
119655
120 def _getScanner(self, builder_name=None):656 def _getManager(self):
121 """Instantiate a SlaveScanner object.657 """Instantiate a SlaveScanner object.
122658
123 Replace its default logging handler by a testing version.659 Replace its default logging handler by a testing version.
124 """660 """
125 if builder_name is None:661 manager = SlaveScanner(BOB_THE_BUILDER_NAME, BufferLogger())
126 builder_name = BOB_THE_BUILDER_NAME662 manager.logger.name = 'slave-scanner'
127 scanner = SlaveScanner(builder_name, QuietFakeLogger())
128 scanner.logger.name = 'slave-scanner'
129663
130 return scanner664 return manager
131665
132 def _checkDispatch(self, slave, builder):666 def _checkDispatch(self, slave, builder):
133 # SlaveScanner.scan returns a slave when a dispatch was667 """`SlaveScanner.scan` returns a `RecordingSlave`.
134 # successful. We also check that the builder has a job on it.668
135669 The single slave returned should match the given builder and
136 self.assertTrue(slave is not None, "Expected a slave.")670 contain interactions that should be performed asynchronously for
671 properly dispatching the sampledata job.
672 """
673 self.assertFalse(
674 slave is None, "Unexpected recording_slaves.")
675
676 self.assertEqual(slave.name, builder.name)
677 self.assertEqual(slave.url, builder.url)
678 self.assertEqual(slave.vm_host, builder.vm_host)
137 self.assertEqual(0, builder.failure_count)679 self.assertEqual(0, builder.failure_count)
138 self.assertTrue(builder.currentjob is not None)680
681 self.assertEqual(
682 [('ensurepresent',
683 ('0feca720e2c29dafb2c900713ba560e03b758711',
684 'http://localhost:58000/93/fake_chroot.tar.gz',
685 '', '')),
686 ('ensurepresent',
687 ('4e3961baf4f56fdbc95d0dd47f3c5bc275da8a33',
688 'http://localhost:58000/43/alsa-utils_1.0.9a-4ubuntu1.dsc',
689 '', '')),
690 ('build',
691 ('6358a89e2215e19b02bf91e2e4d009640fae5cf8',
692 'binarypackage', '0feca720e2c29dafb2c900713ba560e03b758711',
693 {'alsa-utils_1.0.9a-4ubuntu1.dsc':
694 '4e3961baf4f56fdbc95d0dd47f3c5bc275da8a33'},
695 {'arch_indep': True,
696 'arch_tag': 'i386',
697 'archive_private': False,
698 'archive_purpose': 'PRIMARY',
699 'archives':
700 ['deb http://ftpmaster.internal/ubuntu hoary main'],
701 'build_debug_symbols': False,
702 'ogrecomponent': 'main',
703 'suite': u'hoary'}))],
704 slave.calls, "Job was not properly dispatched.")
139705
140 def testScanDispatchForResetBuilder(self):706 def testScanDispatchForResetBuilder(self):
141 # A job gets dispatched to the sampledata builder after it's reset.707 # A job gets dispatched to the sampledata builder after it's reset.
@@ -143,27 +709,26 @@
143 # Reset sampledata builder.709 # Reset sampledata builder.
144 builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]710 builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
145 self._resetBuilder(builder)711 self._resetBuilder(builder)
146 builder.setSlaveForTesting(OkSlave())
147 # Set this to 1 here so that _checkDispatch can make sure it's712 # Set this to 1 here so that _checkDispatch can make sure it's
148 # reset to 0 after a successful dispatch.713 # reset to 0 after a successful dispatch.
149 builder.failure_count = 1714 builder.failure_count = 1
150715
151 # Run 'scan' and check its result.716 # Run 'scan' and check its result.
152 self.layer.txn.commit()717 LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser)
153 self.layer.switchDbUser(config.builddmaster.dbuser)718 manager = self._getManager()
154 scanner = self._getScanner()719 d = defer.maybeDeferred(manager.scan)
155 d = defer.maybeDeferred(scanner.scan)
156 d.addCallback(self._checkDispatch, builder)720 d.addCallback(self._checkDispatch, builder)
157 return d721 return d
158722
159 def _checkNoDispatch(self, slave, builder):723 def _checkNoDispatch(self, recording_slave, builder):
160 """Assert that no dispatch has occurred.724 """Assert that no dispatch has occurred.
161725
162 'slave' is None, so no interations would be passed726 'recording_slave' is None, so no interations would be passed
163 to the asynchonous dispatcher and the builder remained active727 to the asynchonous dispatcher and the builder remained active
164 and IDLE.728 and IDLE.
165 """729 """
166 self.assertTrue(slave is None, "Unexpected slave.")730 self.assertTrue(
731 recording_slave is None, "Unexpected recording_slave.")
167732
168 builder = getUtility(IBuilderSet).get(builder.id)733 builder = getUtility(IBuilderSet).get(builder.id)
169 self.assertTrue(builder.builderok)734 self.assertTrue(builder.builderok)
@@ -188,9 +753,9 @@
188 login(ANONYMOUS)753 login(ANONYMOUS)
189754
190 # Run 'scan' and check its result.755 # Run 'scan' and check its result.
191 self.layer.switchDbUser(config.builddmaster.dbuser)756 LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser)
192 scanner = self._getScanner()757 manager = self._getManager()
193 d = defer.maybeDeferred(scanner.singleCycle)758 d = defer.maybeDeferred(manager.scan)
194 d.addCallback(self._checkNoDispatch, builder)759 d.addCallback(self._checkNoDispatch, builder)
195 return d760 return d
196761
@@ -228,9 +793,9 @@
228 login(ANONYMOUS)793 login(ANONYMOUS)
229794
230 # Run 'scan' and check its result.795 # Run 'scan' and check its result.
231 self.layer.switchDbUser(config.builddmaster.dbuser)796 LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser)
232 scanner = self._getScanner()797 manager = self._getManager()
233 d = defer.maybeDeferred(scanner.scan)798 d = defer.maybeDeferred(manager.scan)
234 d.addCallback(self._checkJobRescued, builder, job)799 d.addCallback(self._checkJobRescued, builder, job)
235 return d800 return d
236801
@@ -249,6 +814,8 @@
249 self.assertBuildingJob(job, builder, logtail='This is a build log')814 self.assertBuildingJob(job, builder, logtail='This is a build log')
250815
251 def testScanUpdatesBuildingJobs(self):816 def testScanUpdatesBuildingJobs(self):
817 # The job assigned to a broken builder is rescued.
818
252 # Enable sampledata builder attached to an appropriate testing819 # Enable sampledata builder attached to an appropriate testing
253 # slave. It will respond as if it was building the sampledata job.820 # slave. It will respond as if it was building the sampledata job.
254 builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]821 builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
@@ -263,174 +830,188 @@
263 self.assertBuildingJob(job, builder)830 self.assertBuildingJob(job, builder)
264831
265 # Run 'scan' and check its result.832 # Run 'scan' and check its result.
266 self.layer.switchDbUser(config.builddmaster.dbuser)833 LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser)
267 scanner = self._getScanner()834 manager = self._getManager()
268 d = defer.maybeDeferred(scanner.scan)835 d = defer.maybeDeferred(manager.scan)
269 d.addCallback(self._checkJobUpdated, builder, job)836 d.addCallback(self._checkJobUpdated, builder, job)
270 return d837 return d
271838
272 def test_scan_with_nothing_to_dispatch(self):839 def test_scan_assesses_failure_exceptions(self):
273 factory = LaunchpadObjectFactory()
274 builder = factory.makeBuilder()
275 builder.setSlaveForTesting(OkSlave())
276 scanner = self._getScanner(builder_name=builder.name)
277 d = scanner.scan()
278 return d.addCallback(self._checkNoDispatch, builder)
279
280 def test_scan_with_manual_builder(self):
281 # Reset sampledata builder.
282 builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
283 self._resetBuilder(builder)
284 builder.setSlaveForTesting(OkSlave())
285 builder.manual = True
286 scanner = self._getScanner()
287 d = scanner.scan()
288 d.addCallback(self._checkNoDispatch, builder)
289 return d
290
291 def test_scan_with_not_ok_builder(self):
292 # Reset sampledata builder.
293 builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
294 self._resetBuilder(builder)
295 builder.setSlaveForTesting(OkSlave())
296 builder.builderok = False
297 scanner = self._getScanner()
298 d = scanner.scan()
299 # Because the builder is not ok, we can't use _checkNoDispatch.
300 d.addCallback(
301 lambda ignored: self.assertIdentical(None, builder.currentjob))
302 return d
303
304 def test_scan_of_broken_slave(self):
305 builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]
306 self._resetBuilder(builder)
307 builder.setSlaveForTesting(BrokenSlave())
308 builder.failure_count = 0
309 scanner = self._getScanner(builder_name=builder.name)
310 d = scanner.scan()
311 return self.assertFailure(d, xmlrpclib.Fault)
312
313 def _assertFailureCounting(self, builder_count, job_count,
314 expected_builder_count, expected_job_count):
315 # If scan() fails with an exception, failure_counts should be840 # If scan() fails with an exception, failure_counts should be
316 # incremented. What we do with the results of the failure841 # incremented and tested.
317 # counts is tested below separately, this test just makes sure that
318 # scan() is setting the counts.
319 def failing_scan():842 def failing_scan():
320 return defer.fail(Exception("fake exception"))843 raise Exception("fake exception")
321 scanner = self._getScanner()844 manager = self._getManager()
322 scanner.scan = failing_scan845 manager.scan = failing_scan
846 manager.scheduleNextScanCycle = FakeMethod()
323 from lp.buildmaster import manager as manager_module847 from lp.buildmaster import manager as manager_module
324 self.patch(manager_module, 'assessFailureCounts', FakeMethod())848 self.patch(manager_module, 'assessFailureCounts', FakeMethod())
325 builder = getUtility(IBuilderSet)[scanner.builder_name]849 builder = getUtility(IBuilderSet)[manager.builder_name]
326850
327 builder.failure_count = builder_count851 # Failure counts start at zero.
328 builder.currentjob.specific_job.build.failure_count = job_count852 self.assertEqual(0, builder.failure_count)
329 # The _scanFailed() calls abort, so make sure our existing853 self.assertEqual(
330 # failure counts are persisted.854 0, builder.currentjob.specific_job.build.failure_count)
331 self.layer.txn.commit()855
332856 # startCycle() calls scan() which is our fake one that throws an
333 # singleCycle() calls scan() which is our fake one that throws an
334 # exception.857 # exception.
335 d = scanner.singleCycle()858 manager.startCycle()
336859
337 # Failure counts should be updated, and the assessment method860 # Failure counts should be updated, and the assessment method
338 # should have been called. The actual behaviour is tested below861 # should have been called.
339 # in TestFailureAssessments.862 self.assertEqual(1, builder.failure_count)
340 def got_scan(ignored):863 self.assertEqual(
341 self.assertEqual(expected_builder_count, builder.failure_count)864 1, builder.currentjob.specific_job.build.failure_count)
342 self.assertEqual(865
343 expected_job_count,866 self.assertEqual(
344 builder.currentjob.specific_job.build.failure_count)867 1, manager_module.assessFailureCounts.call_count)
345 self.assertEqual(868
346 1, manager_module.assessFailureCounts.call_count)869
347870class TestDispatchResult(LaunchpadTestCase):
348 return d.addCallback(got_scan)871 """Tests `BaseDispatchResult` variations.
349872
350 def test_scan_first_fail(self):873 Variations of `BaseDispatchResult` when evaluated update the database
351 # The first failure of a job should result in the failure_count874 information according to their purpose.
352 # on the job and the builder both being incremented.875 """
353 self._assertFailureCounting(876
354 builder_count=0, job_count=0, expected_builder_count=1,877 layer = LaunchpadZopelessLayer
355 expected_job_count=1)878
356879 def _getBuilder(self, name):
357 def test_scan_second_builder_fail(self):880 """Return a fixed `IBuilder` instance from the sampledata.
358 # The first failure of a job should result in the failure_count881
359 # on the job and the builder both being incremented.882 Ensure it's active (builderok=True) and it has a in-progress job.
360 self._assertFailureCounting(883 """
361 builder_count=1, job_count=0, expected_builder_count=2,884 login('foo.bar@canonical.com')
362 expected_job_count=1)885
363886 builder = getUtility(IBuilderSet)[name]
364 def test_scan_second_job_fail(self):887 builder.builderok = True
365 # The first failure of a job should result in the failure_count888
366 # on the job and the builder both being incremented.889 job = builder.currentjob
367 self._assertFailureCounting(890 build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job)
368 builder_count=0, job_count=1, expected_builder_count=1,891 self.assertEqual(
369 expected_job_count=2)892 'i386 build of mozilla-firefox 0.9 in ubuntu hoary RELEASE',
370893 build.title)
371 def test_scanFailed_handles_lack_of_a_job_on_the_builder(self):894
372 def failing_scan():895 self.assertEqual('BUILDING', build.status.name)
373 return defer.fail(Exception("fake exception"))896 self.assertNotEqual(None, job.builder)
374 scanner = self._getScanner()897 self.assertNotEqual(None, job.date_started)
375 scanner.scan = failing_scan898 self.assertNotEqual(None, job.logtail)
376 builder = getUtility(IBuilderSet)[scanner.builder_name]899
377 builder.failure_count = Builder.FAILURE_THRESHOLD900 transaction.commit()
378 builder.currentjob.reset()901
379 self.layer.txn.commit()902 return builder, job.id
380903
381 d = scanner.singleCycle()904 def assertBuildqueueIsClean(self, buildqueue):
382905 # Check that the buildqueue is reset.
383 def scan_finished(ignored):906 self.assertEqual(None, buildqueue.builder)
384 self.assertFalse(builder.builderok)907 self.assertEqual(None, buildqueue.date_started)
385908 self.assertEqual(None, buildqueue.logtail)
386 return d.addCallback(scan_finished)909
387910 def assertBuilderIsClean(self, builder):
388 def test_fail_to_resume_slave_resets_job(self):911 # Check that the builder is ready for a new build.
389 # If an attempt to resume and dispatch a slave fails, it should912 self.assertTrue(builder.builderok)
390 # reset the job via job.reset()913 self.assertIs(None, builder.failnotes)
391914 self.assertIs(None, builder.currentjob)
392 # Make a slave with a failing resume() method.915
393 slave = OkSlave()916 def testResetDispatchResult(self):
394 slave.resume = lambda: deferLater(917 # Test that `ResetDispatchResult` resets the builder and job.
395 reactor, 0, defer.fail, Failure(('out', 'err', 1)))918 builder, job_id = self._getBuilder(BOB_THE_BUILDER_NAME)
396919 buildqueue_id = builder.currentjob.id
397 # Reset sampledata builder.920 builder.builderok = True
398 builder = removeSecurityProxy(921 builder.failure_count = 1
399 getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME])922
400 self._resetBuilder(builder)923 # Setup a interaction to satisfy 'write_transaction' decorator.
401 self.assertEqual(0, builder.failure_count)924 login(ANONYMOUS)
402 builder.setSlaveForTesting(slave)925 slave = RecordingSlave(builder.name, builder.url, builder.vm_host)
403 builder.vm_host = "fake_vm_host"926 result = ResetDispatchResult(slave)
404927 result()
405 scanner = self._getScanner()928
406929 buildqueue = getUtility(IBuildQueueSet).get(buildqueue_id)
407 # Get the next job that will be dispatched.930 self.assertBuildqueueIsClean(buildqueue)
408 job = removeSecurityProxy(builder._findBuildCandidate())931
409 job.virtualized = True932 # XXX Julian
410 builder.virtualized = True933 # Disabled test until bug 586362 is fixed.
411 d = scanner.singleCycle()934 #self.assertFalse(builder.builderok)
412935 self.assertBuilderIsClean(builder)
413 def check(ignored):936
414 # The failure_count will have been incremented on the937 def testFailDispatchResult(self):
415 # builder, we can check that to see that a dispatch attempt938 # Test that `FailDispatchResult` calls assessFailureCounts() so
416 # did indeed occur.939 # that we know the builders and jobs are failed as necessary
417 self.assertEqual(1, builder.failure_count)940 # when a FailDispatchResult is called at the end of the dispatch
418 # There should also be no builder set on the job.941 # chain.
419 self.assertTrue(job.builder is None)942 builder, job_id = self._getBuilder(BOB_THE_BUILDER_NAME)
420 build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job)943
421 self.assertEqual(build.status, BuildStatus.NEEDSBUILD)944 # Setup a interaction to satisfy 'write_transaction' decorator.
422945 login(ANONYMOUS)
423 return d.addCallback(check)946 slave = RecordingSlave(builder.name, builder.url, builder.vm_host)
947 result = FailDispatchResult(slave, 'does not work!')
948 result.assessFailureCounts = FakeMethod()
949 self.assertEqual(0, result.assessFailureCounts.call_count)
950 result()
951 self.assertEqual(1, result.assessFailureCounts.call_count)
952
953 def _setup_failing_dispatch_result(self):
954 # assessFailureCounts should fail jobs or builders depending on
955 # whether it sees the failure_counts on each increasing.
956 builder, job_id = self._getBuilder(BOB_THE_BUILDER_NAME)
957 slave = RecordingSlave(builder.name, builder.url, builder.vm_host)
958 result = FailDispatchResult(slave, 'does not work!')
959 return builder, result
960
961 def test_assessFailureCounts_equal_failures(self):
962 # Basic case where the failure counts are equal and the job is
963 # reset to try again & the builder is not failed.
964 builder, result = self._setup_failing_dispatch_result()
965 buildqueue = builder.currentjob
966 build = buildqueue.specific_job.build
967 builder.failure_count = 2
968 build.failure_count = 2
969 result.assessFailureCounts()
970
971 self.assertBuilderIsClean(builder)
972 self.assertEqual('NEEDSBUILD', build.status.name)
973 self.assertBuildqueueIsClean(buildqueue)
974
975 def test_assessFailureCounts_job_failed(self):
976 # Case where the job has failed more than the builder.
977 builder, result = self._setup_failing_dispatch_result()
978 buildqueue = builder.currentjob
979 build = buildqueue.specific_job.build
980 build.failure_count = 2
981 builder.failure_count = 1
982 result.assessFailureCounts()
983
984 self.assertBuilderIsClean(builder)
985 self.assertEqual('FAILEDTOBUILD', build.status.name)
986 # The buildqueue should have been removed entirely.
987 self.assertEqual(
988 None, getUtility(IBuildQueueSet).getByBuilder(builder),
989 "Buildqueue was not removed when it should be.")
990
991 def test_assessFailureCounts_builder_failed(self):
992 # Case where the builder has failed more than the job.
993 builder, result = self._setup_failing_dispatch_result()
994 buildqueue = builder.currentjob
995 build = buildqueue.specific_job.build
996 build.failure_count = 2
997 builder.failure_count = 3
998 result.assessFailureCounts()
999
1000 self.assertFalse(builder.builderok)
1001 self.assertEqual('does not work!', builder.failnotes)
1002 self.assertTrue(builder.currentjob is None)
1003 self.assertEqual('NEEDSBUILD', build.status.name)
1004 self.assertBuildqueueIsClean(buildqueue)
4241005
4251006
426class TestBuilddManager(TrialTestCase):1007class TestBuilddManager(TrialTestCase):
4271008
428 layer = TwistedLaunchpadZopelessLayer1009 layer = LaunchpadZopelessLayer
4291010
430 def _stub_out_scheduleNextScanCycle(self):1011 def _stub_out_scheduleNextScanCycle(self):
431 # stub out the code that adds a callLater, so that later tests1012 # stub out the code that adds a callLater, so that later tests
432 # don't get surprises.1013 # don't get surprises.
433 self.patch(SlaveScanner, 'startCycle', FakeMethod())1014 self.patch(SlaveScanner, 'scheduleNextScanCycle', FakeMethod())
4341015
435 def test_addScanForBuilders(self):1016 def test_addScanForBuilders(self):
436 # Test that addScanForBuilders generates NewBuildersScanner objects.1017 # Test that addScanForBuilders generates NewBuildersScanner objects.
@@ -459,62 +1040,10 @@
459 self.assertNotEqual(0, manager.new_builders_scanner.scan.call_count)1040 self.assertNotEqual(0, manager.new_builders_scanner.scan.call_count)
4601041
4611042
462class TestFailureAssessments(TestCaseWithFactory):
463
464 layer = ZopelessDatabaseLayer
465
466 def setUp(self):
467 TestCaseWithFactory.setUp(self)
468 self.builder = self.factory.makeBuilder()
469 self.build = self.factory.makeSourcePackageRecipeBuild()
470 self.buildqueue = self.build.queueBuild()
471 self.buildqueue.markAsBuilding(self.builder)
472
473 def test_equal_failures_reset_job(self):
474 self.builder.gotFailure()
475 self.builder.getCurrentBuildFarmJob().gotFailure()
476
477 assessFailureCounts(self.builder, "failnotes")
478 self.assertIs(None, self.builder.currentjob)
479 self.assertEqual(self.build.status, BuildStatus.NEEDSBUILD)
480
481 def test_job_failing_more_than_builder_fails_job(self):
482 self.builder.getCurrentBuildFarmJob().gotFailure()
483
484 assessFailureCounts(self.builder, "failnotes")
485 self.assertIs(None, self.builder.currentjob)
486 self.assertEqual(self.build.status, BuildStatus.FAILEDTOBUILD)
487
488 def test_builder_failing_more_than_job_but_under_fail_threshold(self):
489 self.builder.failure_count = Builder.FAILURE_THRESHOLD - 1
490
491 assessFailureCounts(self.builder, "failnotes")
492 self.assertIs(None, self.builder.currentjob)
493 self.assertEqual(self.build.status, BuildStatus.NEEDSBUILD)
494 self.assertTrue(self.builder.builderok)
495
496 def test_builder_failing_more_than_job_but_over_fail_threshold(self):
497 self.builder.failure_count = Builder.FAILURE_THRESHOLD
498
499 assessFailureCounts(self.builder, "failnotes")
500 self.assertIs(None, self.builder.currentjob)
501 self.assertEqual(self.build.status, BuildStatus.NEEDSBUILD)
502 self.assertFalse(self.builder.builderok)
503 self.assertEqual("failnotes", self.builder.failnotes)
504
505 def test_builder_failing_with_no_attached_job(self):
506 self.buildqueue.reset()
507 self.builder.failure_count = Builder.FAILURE_THRESHOLD
508
509 assessFailureCounts(self.builder, "failnotes")
510 self.assertFalse(self.builder.builderok)
511 self.assertEqual("failnotes", self.builder.failnotes)
512
513
514class TestNewBuilders(TrialTestCase):1043class TestNewBuilders(TrialTestCase):
515 """Test detecting of new builders."""1044 """Test detecting of new builders."""
5161045
517 layer = TwistedLaunchpadZopelessLayer1046 layer = LaunchpadZopelessLayer
5181047
519 def _getScanner(self, manager=None, clock=None):1048 def _getScanner(self, manager=None, clock=None):
520 return NewBuildersScanner(manager=manager, clock=clock)1049 return NewBuildersScanner(manager=manager, clock=clock)
@@ -555,8 +1084,11 @@
555 new_builders, builder_scanner.checkForNewBuilders())1084 new_builders, builder_scanner.checkForNewBuilders())
5561085
557 def test_scan(self):1086 def test_scan(self):
558 # See if scan detects new builders.1087 # See if scan detects new builders and schedules the next scan.
5591088
1089 # stub out the addScanForBuilders and scheduleScan methods since
1090 # they use callLater; we only want to assert that they get
1091 # called.
560 def fake_checkForNewBuilders():1092 def fake_checkForNewBuilders():
561 return "new_builders"1093 return "new_builders"
5621094
@@ -572,6 +1104,9 @@
572 builder_scanner.scan()1104 builder_scanner.scan()
573 advance = NewBuildersScanner.SCAN_INTERVAL + 11105 advance = NewBuildersScanner.SCAN_INTERVAL + 1
574 clock.advance(advance)1106 clock.advance(advance)
1107 self.assertNotEqual(
1108 0, builder_scanner.scheduleScan.call_count,
1109 "scheduleScan did not get called")
5751110
5761111
577def is_file_growing(filepath, poll_interval=1, poll_repeat=10):1112def is_file_growing(filepath, poll_interval=1, poll_repeat=10):
@@ -612,7 +1147,7 @@
612 return False1147 return False
6131148
6141149
615class TestBuilddManagerScript(TestCaseWithFactory):1150class TestBuilddManagerScript(LaunchpadTestCase):
6161151
617 layer = LaunchpadScriptLayer1152 layer = LaunchpadScriptLayer
6181153
@@ -621,7 +1156,6 @@
621 fixture = BuilddManagerTestSetup()1156 fixture = BuilddManagerTestSetup()
622 fixture.setUp()1157 fixture.setUp()
623 fixture.tearDown()1158 fixture.tearDown()
624 self.layer.force_dirty_database()
6251159
626 # XXX Julian 2010-08-06 bug=6142751160 # XXX Julian 2010-08-06 bug=614275
627 # These next 2 tests are in the wrong place, they should be near the1161 # These next 2 tests are in the wrong place, they should be near the
6281162
=== modified file 'lib/lp/buildmaster/tests/test_packagebuild.py'
--- lib/lp/buildmaster/tests/test_packagebuild.py 2010-10-26 20:43:50 +0000
+++ lib/lp/buildmaster/tests/test_packagebuild.py 2010-12-07 16:29:13 +0000
@@ -97,8 +97,6 @@
97 self.assertRaises(97 self.assertRaises(
98 NotImplementedError, self.package_build.verifySuccessfulUpload)98 NotImplementedError, self.package_build.verifySuccessfulUpload)
99 self.assertRaises(NotImplementedError, self.package_build.notify)99 self.assertRaises(NotImplementedError, self.package_build.notify)
100 # XXX 2010-10-18 bug=662631
101 # Change this to do non-blocking IO.
102 self.assertRaises(100 self.assertRaises(
103 NotImplementedError, self.package_build.handleStatus,101 NotImplementedError, self.package_build.handleStatus,
104 None, None, None)102 None, None, None)
@@ -311,8 +309,6 @@
311 # A filemap with plain filenames should not cause a problem.309 # A filemap with plain filenames should not cause a problem.
312 # The call to handleStatus will attempt to get the file from310 # The call to handleStatus will attempt to get the file from
313 # the slave resulting in a URL error in this test case.311 # the slave resulting in a URL error in this test case.
314 # XXX 2010-10-18 bug=662631
315 # Change this to do non-blocking IO.
316 self.build.handleStatus('OK', None, {312 self.build.handleStatus('OK', None, {
317 'filemap': {'myfile.py': 'test_file_hash'},313 'filemap': {'myfile.py': 'test_file_hash'},
318 })314 })
@@ -323,8 +319,6 @@
323 def test_handleStatus_OK_absolute_filepath(self):319 def test_handleStatus_OK_absolute_filepath(self):
324 # A filemap that tries to write to files outside of320 # A filemap that tries to write to files outside of
325 # the upload directory will result in a failed upload.321 # the upload directory will result in a failed upload.
326 # XXX 2010-10-18 bug=662631
327 # Change this to do non-blocking IO.
328 self.build.handleStatus('OK', None, {322 self.build.handleStatus('OK', None, {
329 'filemap': {'/tmp/myfile.py': 'test_file_hash'},323 'filemap': {'/tmp/myfile.py': 'test_file_hash'},
330 })324 })
@@ -335,8 +329,6 @@
335 def test_handleStatus_OK_relative_filepath(self):329 def test_handleStatus_OK_relative_filepath(self):
336 # A filemap that tries to write to files outside of330 # A filemap that tries to write to files outside of
337 # the upload directory will result in a failed upload.331 # the upload directory will result in a failed upload.
338 # XXX 2010-10-18 bug=662631
339 # Change this to do non-blocking IO.
340 self.build.handleStatus('OK', None, {332 self.build.handleStatus('OK', None, {
341 'filemap': {'../myfile.py': 'test_file_hash'},333 'filemap': {'../myfile.py': 'test_file_hash'},
342 })334 })
@@ -347,8 +339,6 @@
347 # The build log is set during handleStatus.339 # The build log is set during handleStatus.
348 removeSecurityProxy(self.build).log = None340 removeSecurityProxy(self.build).log = None
349 self.assertEqual(None, self.build.log)341 self.assertEqual(None, self.build.log)
350 # XXX 2010-10-18 bug=662631
351 # Change this to do non-blocking IO.
352 self.build.handleStatus('OK', None, {342 self.build.handleStatus('OK', None, {
353 'filemap': {'myfile.py': 'test_file_hash'},343 'filemap': {'myfile.py': 'test_file_hash'},
354 })344 })
@@ -358,8 +348,6 @@
358 # The date finished is updated during handleStatus_OK.348 # The date finished is updated during handleStatus_OK.
359 removeSecurityProxy(self.build).date_finished = None349 removeSecurityProxy(self.build).date_finished = None
360 self.assertEqual(None, self.build.date_finished)350 self.assertEqual(None, self.build.date_finished)
361 # XXX 2010-10-18 bug=662631
362 # Change this to do non-blocking IO.
363 self.build.handleStatus('OK', None, {351 self.build.handleStatus('OK', None, {
364 'filemap': {'myfile.py': 'test_file_hash'},352 'filemap': {'myfile.py': 'test_file_hash'},
365 })353 })
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches

to all changes: