Merge ~twom/launchpad:stats-in-buildd-manager into launchpad:master

Proposed by Tom Wardill
Status: Merged
Approved by: Tom Wardill
Approved revision: aef55cbb6e7e9d12113a9995466204785ac577ff
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~twom/launchpad:stats-in-buildd-manager
Merge into: launchpad:master
Diff against target: 392 lines (+218/-5)
10 files modified
constraints.txt (+1/-0)
lib/lp/buildmaster/interactor.py (+2/-2)
lib/lp/buildmaster/manager.py (+40/-0)
lib/lp/buildmaster/tests/mock_slaves.py (+3/-1)
lib/lp/buildmaster/tests/test_manager.py (+90/-1)
lib/lp/services/config/schema-lazr.conf (+6/-1)
lib/lp/services/statsd/__init__.py (+35/-0)
lib/lp/services/statsd/tests/__init__.py (+0/-0)
lib/lp/services/statsd/tests/test_lp_statsd.py (+40/-0)
setup.py (+1/-0)
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Ioana Lasc (community) Approve
Review via email: mp+389011@code.launchpad.net

Commit message

Add statsd totals for builder stats by processor

Description of the change

It'd be useful to graph the builder states over time, by architecture. Create a statsd client that is configured from settings (with a dummy one to prevent network traffic if not).
Use this in builddmanager in an extra (tunable) loop to prevent impingement on other manager work.

To post a comment you must log in.
Revision history for this message
Ioana Lasc (ilasc) wrote :

looks good to me but i'd get a review from Colin as well.

review: Approve
Revision history for this message
Colin Watson (cjwatson) :
review: Approve
bf1b6d3... by Tom Wardill

Remove unnecessary empty config

Revision history for this message
Tom Wardill (twom) :
9eea3aa... by Tom Wardill

Add startService test, fix client method name

5f433b1... by Tom Wardill

Actually include the value, not the name

eeba945... by Tom Wardill

Change to use labels/tags for gauge names

aef55cb... by Tom Wardill

Change tag format, include more attributes as tags

Revision history for this message
Colin Watson (cjwatson) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/constraints.txt b/constraints.txt
2index 8a9077d..79ed49e 100644
3--- a/constraints.txt
4+++ b/constraints.txt
5@@ -297,6 +297,7 @@ SimpleTAL==4.3; python_version < "3"
6 SimpleTAL==5.2; python_version >= "3"
7 soupmatchers==0.4
8 soupsieve==1.9
9+statsd==3.3.0
10 # lp:~launchpad-committers/storm/lp
11 storm==0.24+lp416
12 subprocess32==3.2.6
13diff --git a/lib/lp/buildmaster/interactor.py b/lib/lp/buildmaster/interactor.py
14index 7450a0b..b1d9ccd 100644
15--- a/lib/lp/buildmaster/interactor.py
16+++ b/lib/lp/buildmaster/interactor.py
17@@ -321,7 +321,7 @@ BuilderVitals = namedtuple(
18 'BuilderVitals',
19 ('name', 'url', 'processor_names', 'virtualized', 'vm_host',
20 'vm_reset_protocol', 'builderok', 'manual', 'build_queue', 'version',
21- 'clean_status'))
22+ 'clean_status', 'active'))
23
24 _BQ_UNSPECIFIED = object()
25
26@@ -334,7 +334,7 @@ def extract_vitals_from_db(builder, build_queue=_BQ_UNSPECIFIED):
27 [processor.name for processor in builder.processors],
28 builder.virtualized, builder.vm_host, builder.vm_reset_protocol,
29 builder.builderok, builder.manual, build_queue, builder.version,
30- builder.clean_status)
31+ builder.clean_status, builder.active)
32
33
34 class BuilderInteractor(object):
35diff --git a/lib/lp/buildmaster/manager.py b/lib/lp/buildmaster/manager.py
36index a4b96e3..3ace0d8 100644
37--- a/lib/lp/buildmaster/manager.py
38+++ b/lib/lp/buildmaster/manager.py
39@@ -61,6 +61,7 @@ from lp.services.database.stormexpr import (
40 Values,
41 )
42 from lp.services.propertycache import get_property_cache
43+from lp.services.statsd import get_statsd_client
44
45
46 BUILDD_MANAGER_LOG_NAME = "slave-scanner"
47@@ -691,6 +692,9 @@ class BuilddManager(service.Service):
48 # How often to flush logtail updates, in seconds.
49 FLUSH_LOGTAILS_INTERVAL = 15
50
51+ # How often to update stats, in seconds
52+ UPDATE_STATS_INTERVAL = 60
53+
54 def __init__(self, clock=None, builder_factory=None):
55 # Use the clock if provided, it's so that tests can
56 # advance it. Use the reactor by default.
57@@ -702,6 +706,7 @@ class BuilddManager(service.Service):
58 self.logger = self._setupLogger()
59 self.current_builders = []
60 self.pending_logtails = {}
61+ self.statsd_client = get_statsd_client()
62
63 def _setupLogger(self):
64 """Set up a 'slave-scanner' logger that redirects to twisted.
65@@ -721,6 +726,36 @@ class BuilddManager(service.Service):
66 logger.setLevel(level)
67 return logger
68
69+ def updateStats(self):
70+ """Update statsd with the builder statuses."""
71+ self.logger.debug("Updating builder stats.")
72+ counts_by_processor = {}
73+ for builder in self.builder_factory.iterVitals():
74+ if not builder.active:
75+ continue
76+ for processor_name in builder.processor_names:
77+ counts = counts_by_processor.setdefault(
78+ "{},virtualized={}".format(
79+ processor_name,
80+ builder.virtualized),
81+ {'cleaning': 0, 'idle': 0, 'disabled': 0, 'building': 0})
82+ if not builder.builderok:
83+ counts['disabled'] += 1
84+ elif builder.clean_status == BuilderCleanStatus.CLEANING:
85+ counts['cleaning'] += 1
86+ elif (builder.build_queue and
87+ builder.build_queue.status == BuildQueueStatus.RUNNING):
88+ counts['building'] += 1
89+ elif builder.clean_status == BuilderCleanStatus.CLEAN:
90+ counts['idle'] += 1
91+ for processor, counts in counts_by_processor.items():
92+ for count_name, count_value in counts.items():
93+ gauge_name = "builders.{},arch={}".format(
94+ count_name, processor)
95+ self.logger.debug("{}: {}".format(gauge_name, count_value))
96+ self.statsd_client.gauge(gauge_name, count_value)
97+ self.logger.debug("Builder stats update complete.")
98+
99 def checkForNewBuilders(self):
100 """Add and return any new builders."""
101 new_builders = set(
102@@ -790,6 +825,9 @@ class BuilddManager(service.Service):
103 # Schedule bulk flushes for build queue logtail updates.
104 self.flush_logtails_loop, self.flush_logtails_deferred = (
105 self._startLoop(self.FLUSH_LOGTAILS_INTERVAL, self.flushLogTails))
106+ # Schedule stats updates.
107+ self.stats_update_loop, self.stats_update_deferred = (
108+ self._startLoop(self.UPDATE_STATS_INTERVAL, self.updateStats))
109
110 def stopService(self):
111 """Callback for when we need to shut down."""
112@@ -798,9 +836,11 @@ class BuilddManager(service.Service):
113 deferreds = [slave.stopping_deferred for slave in self.builder_slaves]
114 deferreds.append(self.scan_builders_deferred)
115 deferreds.append(self.flush_logtails_deferred)
116+ deferreds.append(self.stats_update_deferred)
117
118 self.flush_logtails_loop.stop()
119 self.scan_builders_loop.stop()
120+ self.stats_update_loop.stop()
121 for slave in self.builder_slaves:
122 slave.stopCycle()
123
124diff --git a/lib/lp/buildmaster/tests/mock_slaves.py b/lib/lp/buildmaster/tests/mock_slaves.py
125index 2794137..2e935e4 100644
126--- a/lib/lp/buildmaster/tests/mock_slaves.py
127+++ b/lib/lp/buildmaster/tests/mock_slaves.py
128@@ -58,7 +58,8 @@ class MockBuilder:
129 processors=None, virtualized=True, vm_host=None,
130 url='http://fake:0000', version=None,
131 clean_status=BuilderCleanStatus.DIRTY,
132- vm_reset_protocol=BuilderResetProtocol.PROTO_1_1):
133+ vm_reset_protocol=BuilderResetProtocol.PROTO_1_1,
134+ active=True):
135 self.currentjob = None
136 self.builderok = builderok
137 self.manual = manual
138@@ -71,6 +72,7 @@ class MockBuilder:
139 self.failnotes = None
140 self.version = version
141 self.clean_status = clean_status
142+ self.active = active
143
144 def setCleanStatus(self, clean_status):
145 self.clean_status = clean_status
146diff --git a/lib/lp/buildmaster/tests/test_manager.py b/lib/lp/buildmaster/tests/test_manager.py
147index b0d56ed..06b4e2d 100644
148--- a/lib/lp/buildmaster/tests/test_manager.py
149+++ b/lib/lp/buildmaster/tests/test_manager.py
150@@ -13,8 +13,12 @@ import os
151 import signal
152 import time
153
154+from fixtures import MockPatch
155 from six.moves import xmlrpc_client
156-from testtools.matchers import Equals
157+from testtools.matchers import (
158+ Equals,
159+ MatchesListwise,
160+ )
161 from testtools.testcase import ExpectedException
162 from testtools.twistedsupport import AsynchronousDeferredRunTest
163 import transaction
164@@ -45,6 +49,7 @@ from lp.buildmaster.interfaces.builder import (
165 IBuilderSet,
166 )
167 from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet
168+from lp.buildmaster.interfaces.processor import IProcessorSet
169 from lp.buildmaster.manager import (
170 BuilddManager,
171 BUILDER_FAILURE_THRESHOLD,
172@@ -1207,6 +1212,22 @@ class TestBuilddManager(TestCase):
173 clock.advance(advance)
174 self.assertNotEqual(0, manager.flushLogTails.call_count)
175
176+ def test_startService_adds_updateStats_loop(self):
177+ # When startService is called, the manager will start up a
178+ # updateStats loop.
179+ self._stub_out_scheduleNextScanCycle()
180+ clock = task.Clock()
181+ manager = BuilddManager(clock=clock)
182+
183+ # Replace updateStats() with FakeMethod so we can see if it was
184+ # called.
185+ manager.updateStats = FakeMethod()
186+
187+ manager.startService()
188+ advance = BuilddManager.UPDATE_STATS_INTERVAL + 1
189+ clock.advance(advance)
190+ self.assertNotEqual(0, manager.updateStats.call_count)
191+
192
193 class TestFailureAssessments(TestCaseWithFactory):
194
195@@ -1596,3 +1617,71 @@ class TestBuilddManagerScript(TestCaseWithFactory):
196 self.assertFalse(
197 os.access(rotated_logfilepath, os.F_OK),
198 "Twistd's log file was rotated by twistd.")
199+
200+
201+class TestStats(TestCaseWithFactory):
202+
203+ layer = ZopelessDatabaseLayer
204+ run_tests_with = AsynchronousDeferredRunTest.make_factory(timeout=20)
205+
206+ def setUp(self):
207+ super(TestStats, self).setUp()
208+ self.stats_client = self.useFixture(
209+ MockPatch(
210+ 'lp.buildmaster.manager.get_statsd_client'
211+ )).mock()
212+
213+ def test_single_processor(self):
214+ builder = self.factory.makeBuilder()
215+ builder.setCleanStatus(BuilderCleanStatus.CLEAN)
216+ self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
217+ transaction.commit()
218+ clock = task.Clock()
219+ manager = BuilddManager(clock=clock)
220+ manager.builder_factory.update()
221+ manager.updateStats()
222+
223+ self.assertEqual(8, self.stats_client.gauge.call_count)
224+ for call in self.stats_client.mock.gauge.call_args_list:
225+ self.assertIn('386', call[0][0])
226+
227+ def test_multiple_processor(self):
228+ builder = self.factory.makeBuilder(
229+ processors=[getUtility(IProcessorSet).getByName('amd64')])
230+ builder.setCleanStatus(BuilderCleanStatus.CLEAN)
231+ self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
232+ transaction.commit()
233+ clock = task.Clock()
234+ manager = BuilddManager(clock=clock)
235+ manager.builder_factory.update()
236+ manager.updateStats()
237+
238+ self.assertEqual(12, self.stats_client.gauge.call_count)
239+ i386_calls = [c for c in self.stats_client.gauge.call_args_list
240+ if '386' in c[0][0]]
241+ amd64_calls = [c for c in self.stats_client.gauge.call_args_list
242+ if 'amd64' in c[0][0]]
243+ self.assertEqual(8, len(i386_calls))
244+ self.assertEqual(4, len(amd64_calls))
245+
246+ def test_correct_values(self):
247+ builder = self.factory.makeBuilder(
248+ processors=[getUtility(IProcessorSet).getByName('amd64')])
249+ builder.setCleanStatus(BuilderCleanStatus.CLEANING)
250+ self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
251+ transaction.commit()
252+ clock = task.Clock()
253+ manager = BuilddManager(clock=clock)
254+ manager.builder_factory.update()
255+ manager.updateStats()
256+
257+ self.assertEqual(12, self.stats_client.gauge.call_count)
258+ calls = [c[0] for c in self.stats_client.gauge.call_args_list
259+ if 'amd64' in c[0][0]]
260+ self.assertThat(
261+ calls, MatchesListwise(
262+ [Equals(('builders.disabled,arch=amd64,virtualized=True', 0)),
263+ Equals(('builders.building,arch=amd64,virtualized=True', 0)),
264+ Equals(('builders.idle,arch=amd64,virtualized=True', 0)),
265+ Equals(('builders.cleaning,arch=amd64,virtualized=True', 1))
266+ ]))
267diff --git a/lib/lp/services/config/schema-lazr.conf b/lib/lp/services/config/schema-lazr.conf
268index 80bc7e2..bb8afcd 100644
269--- a/lib/lp/services/config/schema-lazr.conf
270+++ b/lib/lp/services/config/schema-lazr.conf
271@@ -113,7 +113,6 @@ authentication_endpoint: none
272 # authserver.
273 authentication_timeout: 15
274
275-
276 [canonical]
277 # datatype: boolean
278 show_tracebacks: False
279@@ -2022,3 +2021,9 @@ concurrency: 1
280 timeout: 86400
281 fallback_queue:
282 concurrency: 1
283+
284+# Statsd connection details
285+[statsd]
286+host: none
287+port: none
288+prefix: none
289diff --git a/lib/lp/services/statsd/__init__.py b/lib/lp/services/statsd/__init__.py
290new file mode 100644
291index 0000000..22c839f
292--- /dev/null
293+++ b/lib/lp/services/statsd/__init__.py
294@@ -0,0 +1,35 @@
295+# Copyright 2020 Canonical Ltd. This software is licensed under the
296+# GNU Affero General Public License version 3 (see the file LICENSE).
297+
298+"""Statsd client wrapper with Launchpad configuration"""
299+
300+from __future__ import absolute_import, print_function, unicode_literals
301+
302+__metaclass__ = type
303+__all__ = ['get_statsd_client']
304+
305+
306+from statsd import StatsClient
307+
308+from lp.services.config import config
309+
310+
311+class UnconfiguredStatsdClient:
312+ """Dummy client for if statsd is not configured in the environment.
313+
314+ This client will be used if the statsd settings are not available to
315+ Launchpad. Prevents unnecessary network traffic.
316+ """
317+
318+ def __getattr__(self, name):
319+ return lambda *args, **kwargs: None
320+
321+
322+def get_statsd_client():
323+ if config.statsd.host:
324+ return StatsClient(
325+ host=config.statsd.host,
326+ port=config.statsd.port,
327+ prefix=config.statsd.prefix)
328+ else:
329+ return UnconfiguredStatsdClient()
330diff --git a/lib/lp/services/statsd/tests/__init__.py b/lib/lp/services/statsd/tests/__init__.py
331new file mode 100644
332index 0000000..e69de29
333--- /dev/null
334+++ b/lib/lp/services/statsd/tests/__init__.py
335diff --git a/lib/lp/services/statsd/tests/test_lp_statsd.py b/lib/lp/services/statsd/tests/test_lp_statsd.py
336new file mode 100644
337index 0000000..691e93d
338--- /dev/null
339+++ b/lib/lp/services/statsd/tests/test_lp_statsd.py
340@@ -0,0 +1,40 @@
341+# Copyright 2020 Canonical Ltd. This software is licensed under the
342+# GNU Affero General Public License version 3 (see the file LICENSE).
343+
344+"""Tests for the Launchpad statsd client"""
345+
346+from __future__ import absolute_import, print_function, unicode_literals
347+
348+__metaclass__ = type
349+
350+from statsd import StatsClient
351+
352+from lp.services.config import config
353+from lp.services.statsd import (
354+ get_statsd_client,
355+ UnconfiguredStatsdClient,
356+ )
357+from lp.testing import TestCase
358+from lp.testing.layers import BaseLayer
359+
360+
361+class TestClientConfiguration(TestCase):
362+
363+ layer = BaseLayer
364+
365+ def test_get_correct_instance_unconfigured(self):
366+ """Test that we get the correct client, depending on config values."""
367+ config.push(
368+ 'statsd_test',
369+ "[statsd]\nhost:")
370+ client = get_statsd_client()
371+ self.assertEqual(
372+ type(client), UnconfiguredStatsdClient)
373+
374+ def test_get_correct_instance_configured(self):
375+ config.push(
376+ 'statsd_test',
377+ "[statsd]\nhost: 127.0.01\nport: 9999\nprefix: test\n")
378+ client = get_statsd_client()
379+ self.assertEqual(
380+ type(client), StatsClient)
381diff --git a/setup.py b/setup.py
382index db7963e..0f15f7f 100644
383--- a/setup.py
384+++ b/setup.py
385@@ -232,6 +232,7 @@ setup(
386 'six',
387 'soupmatchers',
388 'Sphinx',
389+ 'statsd',
390 'storm',
391 # XXX cjwatson 2020-08-07: Temporarily dropped on Python 3 until
392 # codeimport can be ported to Breezy.

Subscribers

People subscribed via source and target branches

to status/vote changes: