Merge ~cjwatson/launchpad:fix-build-worker-test-race into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: a6ff5f26133c325e65a608ccceb42442c1dabdb1
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:fix-build-worker-test-race
Merge into: launchpad:master
Diff against target: 176 lines (+100/-13)
2 files modified
lib/lp/buildmaster/tests/mock_workers.py (+97/-2)
lib/lp/buildmaster/tests/test_interactor.py (+3/-11)
Reviewer Review Type Date Requested Status
Jürgen Gmach Approve
Review via email: mp+433863@code.launchpad.net

Commit message

Fix race in build worker tests

Description of the change

Our buildbot frequently hits a race in `TestWorker.test_abort`. This race occurs when `WorkerTestHelpers.triggerGoodBuild` tells the worker to start a build, which then runs through launchpad-buildd and eventually attempts to execute launchpad-buildd's helper programs to unpack a chroot and run an actual build in it; because the Launchpad test suite isn't set up to run full builds itself (and we wouldn't want it to even if it were), this fails. If launchpad-buildd manages to do all this before responding to the subsequent `worker.abort()` call from the test, then the abort call fails because the builder is unexpectedly not in the `BUILDING` state any more.

To fix this, we insert a substitute version of the "builder-prep" program that launchpad-buildd calls at the start of a build, which just reads a line from a named pipe and then exits non-zero after doing so. This allows us to control the progress of the build by only writing to that named pipe during test cleanup.

While launchpad-buildd's test harness is now set up to support this, unfortunately the necessary code only landed after we dropped Python 3.5 support from launchpad-buildd, and we still need that in Launchpad. For now, copy the test harness code to avoid this, which is well worth it given how often we run into this test race.

This change also allows simplifying `TestWorker.test_status_after_build`, which previously had to work around a similar race; see https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/419665.

To post a comment you must log in.
Revision history for this message
Jürgen Gmach (jugmac00) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/buildmaster/tests/mock_workers.py b/lib/lp/buildmaster/tests/mock_workers.py
2index 2251200..01c9732 100644
3--- a/lib/lp/buildmaster/tests/mock_workers.py
4+++ b/lib/lp/buildmaster/tests/mock_workers.py
5@@ -18,16 +18,23 @@ __all__ = [
6 ]
7
8 import os
9+import shlex
10 import sys
11 import xmlrpc.client
12 from collections import OrderedDict
13 from copy import copy
14+from textwrap import dedent
15+
16+try:
17+ from importlib import resources
18+except ImportError:
19+ import importlib_resources as resources
20
21 import fixtures
22-from lpbuildd.tests.harness import BuilddTestSetup
23 from testtools.content import attach_file
24 from twisted.internet import defer
25 from twisted.web.xmlrpc import Proxy
26+from txfixtures.tachandler import TacTestFixture
27
28 from lp.buildmaster.enums import BuilderCleanStatus, BuilderResetProtocol
29 from lp.buildmaster.interactor import BuilderWorker
30@@ -315,12 +322,75 @@ class DeadProxy(Proxy):
31 return defer.Deferred()
32
33
34-class LPBuilddTestSetup(BuilddTestSetup):
35+# XXX cjwatson 2022-11-30:
36+# https://git.launchpad.net/launchpad-buildd/commit?id=a42da402b9 made the
37+# harness less stateful in a way that's useful for some of our tests, but
38+# unfortunately it was after launchpad-buildd began to require Python >= 3.6
39+# which we can't yet do in Launchpad itself. Copy launchpad-buildd's code
40+# for now, but once Launchpad is running on Python >= 3.6 we should go back
41+# to subclassing it. (For similar reasons, we're currently stuck with some
42+# non-inclusive terminology here until we can upgrade the version of
43+# launchpad-buildd in our virtualenv.)
44+class LPBuilddTestSetup(TacTestFixture):
45 """A BuilddTestSetup that uses the LP virtualenv."""
46
47+ _root = None
48+
49 def setUp(self):
50 super().setUp(python_path=sys.executable, twistd_script=twistd_script)
51
52+ def setUpRoot(self):
53+ filecache = os.path.join(self.root, "filecache")
54+ os.mkdir(filecache)
55+ self.useFixture(fixtures.EnvironmentVariable("HOME", self.root))
56+ test_conffile = os.path.join(self.root, "buildd.conf")
57+ with open(test_conffile, "w") as f:
58+ f.write(
59+ dedent(
60+ """\
61+ [builder]
62+ architecturetag = i386
63+ filecache = {filecache}
64+ bindhost = localhost
65+ bindport = {self.daemon_port}
66+ sharepath = {self.root}
67+ """.format(
68+ filecache=filecache, self=self
69+ )
70+ )
71+ )
72+ self.useFixture(
73+ fixtures.EnvironmentVariable("BUILDD_SLAVE_CONFIG", test_conffile)
74+ )
75+
76+ @property
77+ def root(self):
78+ if self._root is None:
79+ self._root = self.useFixture(fixtures.TempDir()).path
80+ return self._root
81+
82+ @property
83+ def tacfile(self):
84+ # importlib.resources.path makes no guarantees about whether the
85+ # path is still valid after exiting the context manager (it might be
86+ # a temporary file), but in practice this works fine in Launchpad's
87+ # virtualenv setup.
88+ with resources.path("lpbuildd", "buildd-slave.tac") as tacpath:
89+ pass
90+ return tacpath.as_posix()
91+
92+ @property
93+ def pidfile(self):
94+ return os.path.join(self.root, "buildd.pid")
95+
96+ @property
97+ def logfile(self):
98+ return "/var/tmp/buildd.log"
99+
100+ @property
101+ def daemon_port(self):
102+ return 8321
103+
104
105 class WorkerTestHelpers(fixtures.Fixture):
106 @property
107@@ -370,6 +440,30 @@ class WorkerTestHelpers(fixtures.Fixture):
108 fd.write(contents)
109 self.addCleanup(os.unlink, path)
110
111+ def configureWaitingBuilder(self, tachandler):
112+ """Set up a builder to wait forever until told to stop."""
113+ fifo = os.path.join(tachandler.root, "builder-prep.fifo")
114+ os.mkfifo(fifo)
115+ builder_prep = os.path.join(tachandler.root, "bin", "builder-prep")
116+ os.makedirs(os.path.dirname(builder_prep), exist_ok=True)
117+ with open(builder_prep, "w") as f:
118+ f.write("#! /bin/sh\nread x <%s\nexit 1\n" % shlex.quote(fifo))
119+ os.fchmod(f.fileno(), 0o755)
120+ # This is run on cleanup, and we don't want it to fail.
121+ in_target = os.path.join(tachandler.root, "bin", "in-target")
122+ os.symlink("/bin/true", in_target)
123+ self.addCleanup(self.continueBuild, tachandler)
124+
125+ def continueBuild(self, tachandler):
126+ """Continue a build set up to wait via `configureWaitingBuilder`."""
127+ flag = os.path.join(tachandler.root, "builder-prep.continued")
128+ if not os.path.exists(flag):
129+ fifo = os.path.join(tachandler.root, "builder-prep.fifo")
130+ with open(fifo, "w") as f:
131+ f.write("\n")
132+ with open(flag, "w"):
133+ pass
134+
135 def triggerGoodBuild(self, worker, build_id=None):
136 """Trigger a good build on 'worker'.
137
138@@ -386,6 +480,7 @@ class WorkerTestHelpers(fixtures.Fixture):
139 dsc_file = "thing"
140 self.makeCacheFile(tachandler, chroot_file)
141 self.makeCacheFile(tachandler, dsc_file)
142+ self.configureWaitingBuilder(tachandler)
143 extra_args = {
144 "distribution": "ubuntu",
145 "series": "precise",
146diff --git a/lib/lp/buildmaster/tests/test_interactor.py b/lib/lp/buildmaster/tests/test_interactor.py
147index e8c6ea6..dc45dcc 100644
148--- a/lib/lp/buildmaster/tests/test_interactor.py
149+++ b/lib/lp/buildmaster/tests/test_interactor.py
150@@ -635,23 +635,15 @@ class TestWorker(TestCase):
151 @defer.inlineCallbacks
152 def test_status_after_build(self):
153 # Calling 'status' returns the current status of the worker. After a
154- # build has been triggered, the status is BUILDING. (It may also be
155- # WAITING if the build finishes before we have a chance to check its
156- # status.)
157+ # build has been triggered, the status is BUILDING.
158 worker = self.worker_helper.getClientWorker()
159 build_id = "status-build-id"
160 response = yield self.worker_helper.triggerGoodBuild(worker, build_id)
161 self.assertEqual([BuilderStatus.BUILDING, build_id], response)
162 status = yield worker.status()
163- self.assertIn(
164- status["builder_status"],
165- {BuilderStatus.BUILDING, BuilderStatus.WAITING},
166- )
167+ self.assertEqual(BuilderStatus.BUILDING, status["builder_status"])
168 self.assertEqual(build_id, status["build_id"])
169- # We only see a logtail if the build is still in the BUILDING
170- # status.
171- if "logtail" in status:
172- self.assertIsInstance(status["logtail"], xmlrpc.client.Binary)
173+ self.assertIsInstance(status["logtail"], xmlrpc.client.Binary)
174
175 @defer.inlineCallbacks
176 def test_ensurepresent_not_there(self):

Subscribers

People subscribed via source and target branches

to status/vote changes: