Some quick comments, almost all of which are shallow. This doesn't count as a proper code review. Importantly, I haven't rigorously checked that the deleted tests have been properly replaced, and I haven't checked a lot of the Deferred-transformation work for correctness. Will respond soon with a review of the new manager & test_manager. > === modified file 'lib/lp/buildmaster/interfaces/builder.py' > --- lib/lp/buildmaster/interfaces/builder.py 2010-09-23 18:17:21 +0000 > +++ lib/lp/buildmaster/interfaces/builder.py 2010-10-08 17:29:52 +0000 > @@ -154,11 +154,6 @@ class IBuilder(IHasOwner): > Could you please go through this interface and: * Make sure that all methods that return Deferreds are documented as doing so. * Possibly, group all of the methods that return Deferreds. I think it will make the interface clearer. > === modified file 'lib/lp/buildmaster/model/builder.py' > --- lib/lp/buildmaster/model/builder.py 2010-09-24 13:39:27 +0000 > +++ lib/lp/buildmaster/model/builder.py 2010-10-18 10:09:54 +0000 ... > @@ -125,24 +112,17 @@ class BuilderSlave(object): > # many false positives in your test run and will most likely break > # production. > > # XXX: Have a documented interface for the XML-RPC server: > # - what methods > # - what return values expected > # - what faults > # (see XMLRPCBuildDSlave in lib/canonical/buildd/slave.py). > I've filed bug https://bugs.edge.launchpad.net/soyuz/+bug/662599 for this. I don't think having the XXX here helps. > # XXX: Once we have a client object with a defined, tested interface, we > # should make a test double that doesn't do any XML-RPC and can be used to > # make testing easier & tests faster. > This XXX can be safely deleted, I think. > def getFile(self, sha_sum): > """Construct a file-like object to return the named file.""" > + # XXX: Change this to do non-blocking IO. Please file a bug. ... > + # Twisted API requires string but the configuration provides unicode. > + resume_argv = [str(term) for term in resume_command.split()] It's more explicit to do .encode('utf-8'), rather than str(). > def updateStatus(self, logger=None): > """See `IBuilder`.""" > - updateBuilderStatus(self, logger) > + # updateBuilderStatus returns a Deferred if the builder timed > + # out, otherwise it returns a thing that we can wrap in a > + # defer.succeed. maybeDeferred() handles this for us. > + return defer.maybeDeferred(updateBuilderStatus, self, logger) > This comment seems bogus. As far as I can tell, updateBuilderStatus always returns a Deferred. > - if builder_should_be_failed: > + d = self.resumeSlaveHost() > + return d > + else: > + # XXX: This should really let the failure bubble up to the > + # scan() method that does the failure counting. > # Mark builder as 'failed'. Are you going to fix this in this branch or in another? If so, when? > logger.warn( > - "Disabling builder: %s -- %s" % (self.url, error_message), > - exc_info=True) > + "Disabling builder: %s -- %s" % (self.url, error_message)) > self.failBuilder(error_message) > + return defer.succeed(None) > > def findAndStartJob(self, buildd_slave=None): > """See IBuilder.""" > + # XXX This method should be removed in favour of two separately > + # called methods that find and dispatch the job. But I don't > + # want to do that right now because I don't want to fix all the > + # tests. If you're going to use 'I', please say who you are. In fact, you should say who you are anyway. > === modified file 'lib/lp/buildmaster/model/buildfarmjobbehavior.py' > --- lib/lp/buildmaster/model/buildfarmjobbehavior.py 2010-08-20 20:31:18 +0000 > +++ lib/lp/buildmaster/model/buildfarmjobbehavior.py 2010-10-08 17:29:52 +0000 ... > @@ -69,9 +71,11 @@ class BuildFarmJobBehaviorBase: > """See `IBuildFarmJobBehavior`.""" > logger = logging.getLogger('slave-scanner') > > - try: > - slave_status = self._builder.slaveStatus() > - except (xmlrpclib.Fault, socket.error), info: > + d = self._builder.slaveStatus() > + > + def got_failure(failure): > + failure.trap(xmlrpclib.Fault, socket.error) > + info = failure.value > # XXX cprov 2005-06-29: > # Hmm, a problem with the xmlrpc interface, > # disable the builder ?? or simple notice the failure Is this XXX comment still relevant? I know you've done work consolidating the error handling, and it's odd to see the failure get swallowed like that. > === modified file 'lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py' > --- lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2010-10-04 19:50:45 +0000 > +++ lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py 2010-10-12 16:30:44 +0000 > @@ -7,27 +7,43 @@ from __future__ import with_statement ... > @@ -140,6 +159,31 @@ class TestBinaryBuildPackageBehavior(tri > builder, build, lf, archive, ArchivePurpose.PRIMARY, 'universe') > return d > > + def test_virtual_ppa_dispatch(self): > + # This is testing to make sure the builder slave gets reset > + # before a build is dispatched to it. "This is testing to make sure" is redundant. > @@ -196,9 +239,242 @@ class TestBinaryBuildPackageBehavior(tri ... > + > +class TestBinaryBuildPackageBehaviorBuildCollection(trialtest.TestCase): > + """Tests for the BinaryPackageBuildBehavior. > + > + Using various mock slaves, we check how updateBuild() behaves in > + various scenarios. > + """ > + > + # XXX: These tests replace part of the old buildd-slavescanner.txt > + # It was checking that each call to updateBuild was sending 3 (!) > + # emails but this behaviour is so ill-defined and dependent on the > + # sample data that I've not replicated that here. We need to > + # examine that behaviour separately somehow. > + Thanks for flagging this here. If you weren't around and I saw this comment, I wouldn't really know how to resolve it, and more importantly, I wouldn't know how to tell if it was resolved. Does "examine that behaviour separately" mean "figure out what on earth the system is doing?" or "figure out what the old tests were doing?" or "we know what the old tests were doing, we just need to write tests for it"? > + layer = TwistedLaunchpadZopelessLayer > + > + def setUp(self): > + super(TestBinaryBuildPackageBehaviorBuildCollection, self).setUp() > + self.factory = LaunchpadObjectFactory() > + login_as(ANONYMOUS) > + self.addCleanup(logout) > + self.layer.switchDbUser('testadmin') > + > + self.builder = self.factory.makeBuilder() > + self.build = self.factory.makeBinaryPackageBuild( > + builder=self.builder, pocket=PackagePublishingPocket.RELEASE) > + lf = self.factory.makeLibraryFileAlias() > + transaction.commit() > + self.build.distro_arch_series.addOrUpdateChroot(lf) > + self.candidate = self.build.queueBuild() > + self.candidate.markAsBuilding(self.builder) > + self.behavior = self.candidate.required_build_behavior > + self.behavior.setBuilder(self.builder) > + > + def assertBuildProperties(self, build): Docstring please. It's odd to see a test assert simple non-nullity. > + def test_log_file_collection(self): > + self.builder.setSlaveForTesting(WaitingSlave('BuildStatus.OK')) > + self.build.status = BuildStatus.FULLYBUILT > + old_tmps = sorted(os.listdir('/tmp')) > + > + # Grabbing logs should not leave new files in /tmp (bug #172798) > + logfile_lfa_id = self.build.getLogFromSlave(self.build) > + logfile_lfa = getUtility(ILibraryFileAliasSet)[logfile_lfa_id] > + new_tmps = sorted(os.listdir('/tmp')) > + self.assertEqual(old_tmps, new_tmps) > + > + # The new librarian file is stored compressed with a .gz > + # extension and text/plain file type for easy viewing in > + # browsers, as it decompresses and displays the file inline. > + self.assertTrue(logfile_lfa.filename).endswith('_FULLYBUILT.txt.gz') > + self.assertEqual('text/plain', logfile_lfa.mimetype) > + self.layer.txn.commit() > + > + # LibrarianFileAlias does not implement tell() or seek(), which > + # are required by gzip.open(), so we need to read the file out > + # of the librarian first. > + fd, fname = tempfile.mkstemp() > + self.addCleanup(os.remove, fname) > + tmp = os.fdopen(fd, 'wb') > + tmp.write(logfile_lfa.read()) > + tmp.close() > + uncompressed_file = gzip.open(fname).read() > + > + # XXX: When the mock slave is changed to return a Deferred, > + # update this test too. Could you please file a bug for this and update the comment? It'll be easier to find later. > === modified file 'lib/lp/translations/model/translationtemplatesbuildbehavior.py' > --- lib/lp/translations/model/translationtemplatesbuildbehavior.py 2010-10-06 18:53:53 +0000 > +++ lib/lp/translations/model/translationtemplatesbuildbehavior.py 2010-10-15 11:20:08 +0000 ... > @@ -129,5 +131,7 @@ class TranslationTemplatesBuildBehavior( > queue_item.specific_job.branch, tarball, logger) > logger.debug("Upload complete.") > > - queue_item.builder.cleanSlave() > - queue_item.destroySelf() > + # XXX: bigjools 2010-09-24: This call is not necessary, the build > + # manager will restart the virtual machine. > + d = queue_item.builder.cleanSlave() > + return d.addCallback(lambda ignored: queue_item.destroySelf()) > Is this a bug? What should be done about the unnecessary call? jml