Merge lp:~cjwatson/launchpad/buildmaster-twisted-agent into lp:launchpad

Proposed by Colin Watson on 2016-05-24
Status: Merged
Merged at revision: 18068
Proposed branch: lp:~cjwatson/launchpad/buildmaster-twisted-agent
Merge into: lp:launchpad
Diff against target: 363 lines (+189/-20)
5 files modified
daemons/buildd-manager.tac (+13/-4)
lib/lp/buildmaster/interactor.py (+112/-10)
lib/lp/buildmaster/tests/mock_slaves.py (+3/-3)
lib/lp/buildmaster/tests/test_interactor.py (+53/-3)
lib/lp/services/config/schema-lazr.conf (+8/-0)
To merge this branch: bzr merge lp:~cjwatson/launchpad/buildmaster-twisted-agent
Reviewer Review Type Date Requested Status
William Grant code 2016-05-24 Approve on 2016-05-25
Celso Providelo (community) Approve on 2016-05-24
Review via email: mp+295593@code.launchpad.net

Commit message

Convert BuilderSlave to twisted.web.client.Agent, causing it to use a connection pool rather than trying to download everything at once.

Description of the change

Convert BuilderSlave to twisted.web.client.Agent, causing it to use a connection pool rather than trying to download everything at once. The old approach could overflow open-file limits.

We'll need to give this a good workout on dogfood.

To post a comment you must log in.
Celso Providelo (cprov) wrote :

Thanks Colin, great solution!

From http://twistedmatrix.com/documents/15.2.0/web/howto/client.html#multiple-connections-to-the-same-server it seems that it will sustain at most 2 simultaneous downloads from each slave. Is that reasonable and sufficient ?

review: Approve
William Grant (wgrant) :
review: Approve (code)
Colin Watson (cjwatson) :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'daemons/buildd-manager.tac'
2--- daemons/buildd-manager.tac 2011-12-29 05:29:36 +0000
3+++ daemons/buildd-manager.tac 2016-05-25 15:34:01 +0000
4@@ -1,14 +1,18 @@
5-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
6+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
7 # GNU Affero General Public License version 3 (see the file LICENSE).
8
9 # Twisted Application Configuration file.
10 # Use with "twistd2.4 -y <file.tac>", e.g. "twistd -noy server.tac"
11
12+import resource
13+
14 from twisted.application import service
15 from twisted.scripts.twistd import ServerOptions
16-from twisted.web import server
17
18-from lp.services.config import dbconfig
19+from lp.services.config import (
20+ config,
21+ dbconfig,
22+ )
23 from lp.services.daemons import readyservice
24 from lp.services.scripts import execute_zcml_for_scripts
25 from lp.buildmaster.manager import BuilddManager
26@@ -21,6 +25,12 @@
27 # Should be removed from callsites verified to not need it.
28 set_immediate_mail_delivery(True)
29
30+# Allow generous slack for database connections, idle download connections,
31+# etc.
32+soft_nofile = config.builddmaster.download_connections + 1024
33+_, hard_nofile = resource.getrlimit(resource.RLIMIT_NOFILE)
34+resource.setrlimit(resource.RLIMIT_NOFILE, (soft_nofile, hard_nofile))
35+
36 options = ServerOptions()
37 options.parseOptions()
38
39@@ -34,4 +44,3 @@
40 # Service for scanning buildd slaves.
41 service = BuilddManager()
42 service.setServiceParent(application)
43-
44
45=== modified file 'lib/lp/buildmaster/interactor.py'
46--- lib/lp/buildmaster/interactor.py 2015-02-17 05:38:37 +0000
47+++ lib/lp/buildmaster/interactor.py 2016-05-25 15:34:01 +0000
48@@ -1,4 +1,4 @@
49-# Copyright 2009-2014 Canonical Ltd. This software is licensed under the
50+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
51 # GNU Affero General Public License version 3 (see the file LICENSE).
52
53 __metaclass__ = type
54@@ -13,9 +13,17 @@
55 from urlparse import urlparse
56
57 import transaction
58-from twisted.internet import defer
59+from twisted.internet import (
60+ defer,
61+ reactor as default_reactor,
62+ )
63+from twisted.internet.protocol import Protocol
64 from twisted.web import xmlrpc
65-from twisted.web.client import downloadPage
66+from twisted.web.client import (
67+ Agent,
68+ HTTPConnectionPool,
69+ ResponseDone,
70+ )
71 from zope.security.proxy import (
72 isinstance as zope_isinstance,
73 removeSecurityProxy,
74@@ -46,6 +54,89 @@
75 noisy = False
76
77
78+class FileWritingProtocol(Protocol):
79+ """A protocol that saves data to a file."""
80+
81+ def __init__(self, finished, filename):
82+ self.finished = finished
83+ self.filename = filename
84+ self.file = None
85+
86+ def dataReceived(self, data):
87+ if self.file is None:
88+ self.file = open(self.filename, "wb")
89+ try:
90+ self.file.write(data)
91+ except IOError:
92+ try:
93+ self.file.close()
94+ except IOError:
95+ pass
96+ self.file = None
97+ self.finished.errback()
98+
99+ def connectionLost(self, reason):
100+ try:
101+ self.file.close()
102+ except IOError:
103+ self.finished.errback()
104+ else:
105+ if reason.check(ResponseDone):
106+ self.finished.callback(None)
107+ else:
108+ self.finished.errback(reason)
109+
110+
111+class LimitedHTTPConnectionPool(HTTPConnectionPool):
112+ """A connection pool with an upper limit on open connections."""
113+
114+ # XXX cjwatson 2016-05-25: This actually only limits active connections,
115+ # and doesn't count idle but open connections towards the limit; this is
116+ # because it's very difficult to do the latter with HTTPConnectionPool's
117+ # current design. Users of this pool must therefore expect some
118+ # additional file descriptors to be open for idle connections.
119+
120+ def __init__(self, reactor, limit, persistent=True):
121+ super(LimitedHTTPConnectionPool, self).__init__(
122+ reactor, persistent=persistent)
123+ self._semaphore = defer.DeferredSemaphore(limit)
124+
125+ def getConnection(self, key, endpoint):
126+ d = self._semaphore.acquire()
127+ d.addCallback(
128+ lambda _: super(LimitedHTTPConnectionPool, self).getConnection(
129+ key, endpoint))
130+ return d
131+
132+ def _putConnection(self, key, connection):
133+ super(LimitedHTTPConnectionPool, self)._putConnection(key, connection)
134+ # Only release the semaphore in the next main loop iteration; if we
135+ # release it here then the next request may start using this
136+ # connection's parser before this request has quite finished with
137+ # it.
138+ self._reactor.callLater(0, self._semaphore.release)
139+
140+
141+_default_pool = None
142+
143+
144+def default_pool(reactor=None):
145+ global _default_pool
146+ if reactor is None:
147+ reactor = default_reactor
148+ if _default_pool is None:
149+ # Circular import.
150+ from lp.buildmaster.manager import SlaveScanner
151+ # Short cached connection timeout to avoid potential weirdness with
152+ # virtual builders that reboot frequently.
153+ _default_pool = LimitedHTTPConnectionPool(
154+ reactor, config.builddmaster.download_connections)
155+ _default_pool.maxPersistentPerHost = (
156+ config.builddmaster.idle_download_connections_per_builder)
157+ _default_pool.cachedConnectionTimeout = SlaveScanner.SCAN_INTERVAL
158+ return _default_pool
159+
160+
161 class BuilderSlave(object):
162 """Add in a few useful methods for the XMLRPC slave.
163
164@@ -58,7 +149,7 @@
165 # many false positives in your test run and will most likely break
166 # production.
167
168- def __init__(self, proxy, builder_url, vm_host, timeout, reactor):
169+ def __init__(self, proxy, builder_url, vm_host, timeout, reactor, pool):
170 """Initialize a BuilderSlave.
171
172 :param proxy: An XML-RPC proxy, implementing 'callRemote'. It must
173@@ -71,11 +162,16 @@
174 self._file_cache_url = urlappend(builder_url, 'filecache')
175 self._server = proxy
176 self.timeout = timeout
177+ if reactor is None:
178+ reactor = default_reactor
179 self.reactor = reactor
180+ if pool is None:
181+ pool = default_pool(reactor=reactor)
182+ self.pool = pool
183
184 @classmethod
185 def makeBuilderSlave(cls, builder_url, vm_host, timeout, reactor=None,
186- proxy=None):
187+ proxy=None, pool=None):
188 """Create and return a `BuilderSlave`.
189
190 :param builder_url: The URL of the slave buildd machine,
191@@ -84,6 +180,7 @@
192 here.
193 :param reactor: Used by tests to override the Twisted reactor.
194 :param proxy: Used By tests to override the xmlrpc.Proxy.
195+ :param pool: Used by tests to override the HTTPConnectionPool.
196 """
197 rpc_url = urlappend(builder_url.encode('utf-8'), 'rpc')
198 if proxy is None:
199@@ -92,7 +189,7 @@
200 server_proxy.queryFactory = QuietQueryFactory
201 else:
202 server_proxy = proxy
203- return cls(server_proxy, builder_url, vm_host, timeout, reactor)
204+ return cls(server_proxy, builder_url, vm_host, timeout, reactor, pool)
205
206 def _with_timeout(self, d, timeout=None):
207 return cancel_on_timeout(d, timeout or self.timeout, self.reactor)
208@@ -138,10 +235,15 @@
209 errback with the error string.
210 """
211 file_url = urlappend(self._file_cache_url, sha_sum).encode('utf8')
212- # If desired we can pass a param "timeout" here but let's leave
213- # it at the default value if it becomes obvious we need to
214- # change it.
215- return downloadPage(file_url, file_to_write, followRedirect=0)
216+ d = Agent(self.reactor, pool=self.pool).request("GET", file_url)
217+
218+ def got_response(response):
219+ finished = defer.Deferred()
220+ response.deliverBody(FileWritingProtocol(finished, file_to_write))
221+ return finished
222+
223+ d.addCallback(got_response)
224+ return d
225
226 def getFiles(self, files):
227 """Fetch many files from the builder.
228
229=== modified file 'lib/lp/buildmaster/tests/mock_slaves.py'
230--- lib/lp/buildmaster/tests/mock_slaves.py 2015-09-28 17:38:45 +0000
231+++ lib/lp/buildmaster/tests/mock_slaves.py 2016-05-25 15:34:01 +0000
232@@ -1,4 +1,4 @@
233-# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
234+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
235 # GNU Affero General Public License version 3 (see the file LICENSE).
236
237 """Mock Build objects for tests soyuz buildd-system."""
238@@ -290,14 +290,14 @@
239 lambda: open(tachandler.logfile, 'r').readlines()))
240 return tachandler
241
242- def getClientSlave(self, reactor=None, proxy=None):
243+ def getClientSlave(self, reactor=None, proxy=None, pool=None):
244 """Return a `BuilderSlave` for use in testing.
245
246 Points to a fixed URL that is also used by `BuilddSlaveTestSetup`.
247 """
248 return BuilderSlave.makeBuilderSlave(
249 self.BASE_URL, 'vmhost', config.builddmaster.socket_timeout,
250- reactor, proxy)
251+ reactor=reactor, proxy=proxy, pool=pool)
252
253 def makeCacheFile(self, tachandler, filename):
254 """Make a cache file available on the remote slave.
255
256=== modified file 'lib/lp/buildmaster/tests/test_interactor.py'
257--- lib/lp/buildmaster/tests/test_interactor.py 2015-11-04 14:30:09 +0000
258+++ lib/lp/buildmaster/tests/test_interactor.py 2016-05-25 15:34:01 +0000
259@@ -1,4 +1,4 @@
260-# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
261+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
262 # GNU Affero General Public License version 3 (see the file LICENSE).
263
264 """Test BuilderInteractor features."""
265@@ -20,9 +20,16 @@
266 AsynchronousDeferredRunTestForBrokenTwisted,
267 SynchronousDeferredRunTest,
268 )
269-from testtools.matchers import ContainsAll
270+from testtools.matchers import (
271+ ContainsAll,
272+ HasLength,
273+ MatchesDict,
274+ )
275 from testtools.testcase import ExpectedException
276-from twisted.internet import defer
277+from twisted.internet import (
278+ defer,
279+ reactor as default_reactor,
280+ )
281 from twisted.internet.task import Clock
282 from twisted.python.failure import Failure
283 from twisted.web.client import getPage
284@@ -38,6 +45,7 @@
285 BuilderInteractor,
286 BuilderSlave,
287 extract_vitals_from_db,
288+ LimitedHTTPConnectionPool,
289 )
290 from lp.buildmaster.interfaces.builder import (
291 BuildDaemonIsolationError,
292@@ -789,6 +797,7 @@
293 for sha1, local_file in files:
294 with open(local_file) as f:
295 self.assertEqual(content_map[sha1], f.read())
296+ return slave.pool.closeCachedConnections()
297
298 def finished_uploading(ignored):
299 d = slave.getFiles(files)
300@@ -812,3 +821,44 @@
301 dl.append(d)
302
303 return defer.DeferredList(dl).addCallback(finished_uploading)
304+
305+ def test_getFiles_open_connections(self):
306+ # getFiles honours the configured limit on active download
307+ # connections.
308+ pool = LimitedHTTPConnectionPool(default_reactor, 2)
309+ contents = [self.factory.getUniqueString() for _ in range(10)]
310+ self.slave_helper.getServerSlave()
311+ slave = self.slave_helper.getClientSlave(pool=pool)
312+ files = []
313+ content_map = {}
314+
315+ def got_files(ignored):
316+ # Called back when getFiles finishes. Make sure all the
317+ # content is as expected.
318+ for sha1, local_file in files:
319+ with open(local_file) as f:
320+ self.assertEqual(content_map[sha1], f.read())
321+ # Only two connections were used.
322+ self.assertThat(
323+ slave.pool._connections,
324+ MatchesDict({("http", "localhost", 8221): HasLength(2)}))
325+ return slave.pool.closeCachedConnections()
326+
327+ def finished_uploading(ignored):
328+ d = slave.getFiles(files)
329+ return d.addCallback(got_files)
330+
331+ # Set up some files on the builder and store details in
332+ # content_map so we can compare downloads later.
333+ dl = []
334+ for content in contents:
335+ filename = content + '.txt'
336+ lf = self.factory.makeLibraryFileAlias(filename, content=content)
337+ content_map[lf.content.sha1] = content
338+ files.append((lf.content.sha1, tempfile.mkstemp()[1]))
339+ self.addCleanup(os.remove, files[-1][1])
340+ self.layer.txn.commit()
341+ d = slave.ensurepresent(lf.content.sha1, lf.http_url, "", "")
342+ dl.append(d)
343+
344+ return defer.DeferredList(dl).addCallback(finished_uploading)
345
346=== modified file 'lib/lp/services/config/schema-lazr.conf'
347--- lib/lp/services/config/schema-lazr.conf 2016-05-18 03:59:44 +0000
348+++ lib/lp/services/config/schema-lazr.conf 2016-05-25 15:34:01 +0000
349@@ -72,6 +72,14 @@
350 # datatype: integer
351 virtualized_socket_timeout: 30
352
353+# The maximum number of idle file download connections per builder that
354+# may be kept open.
355+idle_download_connections_per_builder: 10
356+
357+# The maximum number of file download connections that may be open
358+# across all builders.
359+download_connections: 2048
360+
361 # Activate the Build Notification system.
362 # datatype: boolean
363 send_build_notification: True