Merge ~twom/launchpad:stats-in-buildd-manager into launchpad:master

Proposed by Tom Wardill
Status: Merged
Approved by: Tom Wardill
Approved revision: aef55cbb6e7e9d12113a9995466204785ac577ff
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~twom/launchpad:stats-in-buildd-manager
Merge into: launchpad:master
Diff against target: 392 lines (+218/-5)
10 files modified
constraints.txt (+1/-0)
lib/lp/buildmaster/interactor.py (+2/-2)
lib/lp/buildmaster/manager.py (+40/-0)
lib/lp/buildmaster/tests/mock_slaves.py (+3/-1)
lib/lp/buildmaster/tests/test_manager.py (+90/-1)
lib/lp/services/config/schema-lazr.conf (+6/-1)
lib/lp/services/statsd/__init__.py (+35/-0)
lib/lp/services/statsd/tests/__init__.py (+0/-0)
lib/lp/services/statsd/tests/test_lp_statsd.py (+40/-0)
setup.py (+1/-0)
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Ioana Lasc (community) Approve
Review via email: mp+389011@code.launchpad.net

Commit message

Add statsd totals for builder stats by processor

Description of the change

It'd be useful to graph the builder states over time, by architecture. Create a statsd client that is configured from settings (with a dummy one to prevent network traffic if not).
Use this in builddmanager in an extra (tunable) loop to prevent impingement on other manager work.

To post a comment you must log in.
Revision history for this message
Ioana Lasc (ilasc) wrote :

looks good to me but i'd get a review from Colin as well.

review: Approve
Revision history for this message
Colin Watson (cjwatson) :
review: Approve
bf1b6d3... by Tom Wardill

Remove unnecessary empty config

Revision history for this message
Tom Wardill (twom) :
9eea3aa... by Tom Wardill

Add startService test, fix client method name

5f433b1... by Tom Wardill

Actually include the value, not the name

eeba945... by Tom Wardill

Change to use labels/tags for gauge names

aef55cb... by Tom Wardill

Change tag format, include more attributes as tags

Revision history for this message
Colin Watson (cjwatson) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/constraints.txt b/constraints.txt
index 8a9077d..79ed49e 100644
--- a/constraints.txt
+++ b/constraints.txt
@@ -297,6 +297,7 @@ SimpleTAL==4.3; python_version < "3"
297SimpleTAL==5.2; python_version >= "3"297SimpleTAL==5.2; python_version >= "3"
298soupmatchers==0.4298soupmatchers==0.4
299soupsieve==1.9299soupsieve==1.9
300statsd==3.3.0
300# lp:~launchpad-committers/storm/lp301# lp:~launchpad-committers/storm/lp
301storm==0.24+lp416302storm==0.24+lp416
302subprocess32==3.2.6303subprocess32==3.2.6
diff --git a/lib/lp/buildmaster/interactor.py b/lib/lp/buildmaster/interactor.py
index 7450a0b..b1d9ccd 100644
--- a/lib/lp/buildmaster/interactor.py
+++ b/lib/lp/buildmaster/interactor.py
@@ -321,7 +321,7 @@ BuilderVitals = namedtuple(
321 'BuilderVitals',321 'BuilderVitals',
322 ('name', 'url', 'processor_names', 'virtualized', 'vm_host',322 ('name', 'url', 'processor_names', 'virtualized', 'vm_host',
323 'vm_reset_protocol', 'builderok', 'manual', 'build_queue', 'version',323 'vm_reset_protocol', 'builderok', 'manual', 'build_queue', 'version',
324 'clean_status'))324 'clean_status', 'active'))
325325
326_BQ_UNSPECIFIED = object()326_BQ_UNSPECIFIED = object()
327327
@@ -334,7 +334,7 @@ def extract_vitals_from_db(builder, build_queue=_BQ_UNSPECIFIED):
334 [processor.name for processor in builder.processors],334 [processor.name for processor in builder.processors],
335 builder.virtualized, builder.vm_host, builder.vm_reset_protocol,335 builder.virtualized, builder.vm_host, builder.vm_reset_protocol,
336 builder.builderok, builder.manual, build_queue, builder.version,336 builder.builderok, builder.manual, build_queue, builder.version,
337 builder.clean_status)337 builder.clean_status, builder.active)
338338
339339
340class BuilderInteractor(object):340class BuilderInteractor(object):
diff --git a/lib/lp/buildmaster/manager.py b/lib/lp/buildmaster/manager.py
index a4b96e3..3ace0d8 100644
--- a/lib/lp/buildmaster/manager.py
+++ b/lib/lp/buildmaster/manager.py
@@ -61,6 +61,7 @@ from lp.services.database.stormexpr import (
61 Values,61 Values,
62 )62 )
63from lp.services.propertycache import get_property_cache63from lp.services.propertycache import get_property_cache
64from lp.services.statsd import get_statsd_client
6465
6566
66BUILDD_MANAGER_LOG_NAME = "slave-scanner"67BUILDD_MANAGER_LOG_NAME = "slave-scanner"
@@ -691,6 +692,9 @@ class BuilddManager(service.Service):
691 # How often to flush logtail updates, in seconds.692 # How often to flush logtail updates, in seconds.
692 FLUSH_LOGTAILS_INTERVAL = 15693 FLUSH_LOGTAILS_INTERVAL = 15
693694
695 # How often to update stats, in seconds
696 UPDATE_STATS_INTERVAL = 60
697
694 def __init__(self, clock=None, builder_factory=None):698 def __init__(self, clock=None, builder_factory=None):
695 # Use the clock if provided, it's so that tests can699 # Use the clock if provided, it's so that tests can
696 # advance it. Use the reactor by default.700 # advance it. Use the reactor by default.
@@ -702,6 +706,7 @@ class BuilddManager(service.Service):
702 self.logger = self._setupLogger()706 self.logger = self._setupLogger()
703 self.current_builders = []707 self.current_builders = []
704 self.pending_logtails = {}708 self.pending_logtails = {}
709 self.statsd_client = get_statsd_client()
705710
706 def _setupLogger(self):711 def _setupLogger(self):
707 """Set up a 'slave-scanner' logger that redirects to twisted.712 """Set up a 'slave-scanner' logger that redirects to twisted.
@@ -721,6 +726,36 @@ class BuilddManager(service.Service):
721 logger.setLevel(level)726 logger.setLevel(level)
722 return logger727 return logger
723728
729 def updateStats(self):
730 """Update statsd with the builder statuses."""
731 self.logger.debug("Updating builder stats.")
732 counts_by_processor = {}
733 for builder in self.builder_factory.iterVitals():
734 if not builder.active:
735 continue
736 for processor_name in builder.processor_names:
737 counts = counts_by_processor.setdefault(
738 "{},virtualized={}".format(
739 processor_name,
740 builder.virtualized),
741 {'cleaning': 0, 'idle': 0, 'disabled': 0, 'building': 0})
742 if not builder.builderok:
743 counts['disabled'] += 1
744 elif builder.clean_status == BuilderCleanStatus.CLEANING:
745 counts['cleaning'] += 1
746 elif (builder.build_queue and
747 builder.build_queue.status == BuildQueueStatus.RUNNING):
748 counts['building'] += 1
749 elif builder.clean_status == BuilderCleanStatus.CLEAN:
750 counts['idle'] += 1
751 for processor, counts in counts_by_processor.items():
752 for count_name, count_value in counts.items():
753 gauge_name = "builders.{},arch={}".format(
754 count_name, processor)
755 self.logger.debug("{}: {}".format(gauge_name, count_value))
756 self.statsd_client.gauge(gauge_name, count_value)
757 self.logger.debug("Builder stats update complete.")
758
724 def checkForNewBuilders(self):759 def checkForNewBuilders(self):
725 """Add and return any new builders."""760 """Add and return any new builders."""
726 new_builders = set(761 new_builders = set(
@@ -790,6 +825,9 @@ class BuilddManager(service.Service):
790 # Schedule bulk flushes for build queue logtail updates.825 # Schedule bulk flushes for build queue logtail updates.
791 self.flush_logtails_loop, self.flush_logtails_deferred = (826 self.flush_logtails_loop, self.flush_logtails_deferred = (
792 self._startLoop(self.FLUSH_LOGTAILS_INTERVAL, self.flushLogTails))827 self._startLoop(self.FLUSH_LOGTAILS_INTERVAL, self.flushLogTails))
828 # Schedule stats updates.
829 self.stats_update_loop, self.stats_update_deferred = (
830 self._startLoop(self.UPDATE_STATS_INTERVAL, self.updateStats))
793831
794 def stopService(self):832 def stopService(self):
795 """Callback for when we need to shut down."""833 """Callback for when we need to shut down."""
@@ -798,9 +836,11 @@ class BuilddManager(service.Service):
798 deferreds = [slave.stopping_deferred for slave in self.builder_slaves]836 deferreds = [slave.stopping_deferred for slave in self.builder_slaves]
799 deferreds.append(self.scan_builders_deferred)837 deferreds.append(self.scan_builders_deferred)
800 deferreds.append(self.flush_logtails_deferred)838 deferreds.append(self.flush_logtails_deferred)
839 deferreds.append(self.stats_update_deferred)
801840
802 self.flush_logtails_loop.stop()841 self.flush_logtails_loop.stop()
803 self.scan_builders_loop.stop()842 self.scan_builders_loop.stop()
843 self.stats_update_loop.stop()
804 for slave in self.builder_slaves:844 for slave in self.builder_slaves:
805 slave.stopCycle()845 slave.stopCycle()
806846
diff --git a/lib/lp/buildmaster/tests/mock_slaves.py b/lib/lp/buildmaster/tests/mock_slaves.py
index 2794137..2e935e4 100644
--- a/lib/lp/buildmaster/tests/mock_slaves.py
+++ b/lib/lp/buildmaster/tests/mock_slaves.py
@@ -58,7 +58,8 @@ class MockBuilder:
58 processors=None, virtualized=True, vm_host=None,58 processors=None, virtualized=True, vm_host=None,
59 url='http://fake:0000', version=None,59 url='http://fake:0000', version=None,
60 clean_status=BuilderCleanStatus.DIRTY,60 clean_status=BuilderCleanStatus.DIRTY,
61 vm_reset_protocol=BuilderResetProtocol.PROTO_1_1):61 vm_reset_protocol=BuilderResetProtocol.PROTO_1_1,
62 active=True):
62 self.currentjob = None63 self.currentjob = None
63 self.builderok = builderok64 self.builderok = builderok
64 self.manual = manual65 self.manual = manual
@@ -71,6 +72,7 @@ class MockBuilder:
71 self.failnotes = None72 self.failnotes = None
72 self.version = version73 self.version = version
73 self.clean_status = clean_status74 self.clean_status = clean_status
75 self.active = active
7476
75 def setCleanStatus(self, clean_status):77 def setCleanStatus(self, clean_status):
76 self.clean_status = clean_status78 self.clean_status = clean_status
diff --git a/lib/lp/buildmaster/tests/test_manager.py b/lib/lp/buildmaster/tests/test_manager.py
index b0d56ed..06b4e2d 100644
--- a/lib/lp/buildmaster/tests/test_manager.py
+++ b/lib/lp/buildmaster/tests/test_manager.py
@@ -13,8 +13,12 @@ import os
13import signal13import signal
14import time14import time
1515
16from fixtures import MockPatch
16from six.moves import xmlrpc_client17from six.moves import xmlrpc_client
17from testtools.matchers import Equals18from testtools.matchers import (
19 Equals,
20 MatchesListwise,
21 )
18from testtools.testcase import ExpectedException22from testtools.testcase import ExpectedException
19from testtools.twistedsupport import AsynchronousDeferredRunTest23from testtools.twistedsupport import AsynchronousDeferredRunTest
20import transaction24import transaction
@@ -45,6 +49,7 @@ from lp.buildmaster.interfaces.builder import (
45 IBuilderSet,49 IBuilderSet,
46 )50 )
47from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet51from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet
52from lp.buildmaster.interfaces.processor import IProcessorSet
48from lp.buildmaster.manager import (53from lp.buildmaster.manager import (
49 BuilddManager,54 BuilddManager,
50 BUILDER_FAILURE_THRESHOLD,55 BUILDER_FAILURE_THRESHOLD,
@@ -1207,6 +1212,22 @@ class TestBuilddManager(TestCase):
1207 clock.advance(advance)1212 clock.advance(advance)
1208 self.assertNotEqual(0, manager.flushLogTails.call_count)1213 self.assertNotEqual(0, manager.flushLogTails.call_count)
12091214
1215 def test_startService_adds_updateStats_loop(self):
1216 # When startService is called, the manager will start up a
1217 # updateStats loop.
1218 self._stub_out_scheduleNextScanCycle()
1219 clock = task.Clock()
1220 manager = BuilddManager(clock=clock)
1221
1222 # Replace updateStats() with FakeMethod so we can see if it was
1223 # called.
1224 manager.updateStats = FakeMethod()
1225
1226 manager.startService()
1227 advance = BuilddManager.UPDATE_STATS_INTERVAL + 1
1228 clock.advance(advance)
1229 self.assertNotEqual(0, manager.updateStats.call_count)
1230
12101231
1211class TestFailureAssessments(TestCaseWithFactory):1232class TestFailureAssessments(TestCaseWithFactory):
12121233
@@ -1596,3 +1617,71 @@ class TestBuilddManagerScript(TestCaseWithFactory):
1596 self.assertFalse(1617 self.assertFalse(
1597 os.access(rotated_logfilepath, os.F_OK),1618 os.access(rotated_logfilepath, os.F_OK),
1598 "Twistd's log file was rotated by twistd.")1619 "Twistd's log file was rotated by twistd.")
1620
1621
1622class TestStats(TestCaseWithFactory):
1623
1624 layer = ZopelessDatabaseLayer
1625 run_tests_with = AsynchronousDeferredRunTest.make_factory(timeout=20)
1626
1627 def setUp(self):
1628 super(TestStats, self).setUp()
1629 self.stats_client = self.useFixture(
1630 MockPatch(
1631 'lp.buildmaster.manager.get_statsd_client'
1632 )).mock()
1633
1634 def test_single_processor(self):
1635 builder = self.factory.makeBuilder()
1636 builder.setCleanStatus(BuilderCleanStatus.CLEAN)
1637 self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
1638 transaction.commit()
1639 clock = task.Clock()
1640 manager = BuilddManager(clock=clock)
1641 manager.builder_factory.update()
1642 manager.updateStats()
1643
1644 self.assertEqual(8, self.stats_client.gauge.call_count)
1645 for call in self.stats_client.mock.gauge.call_args_list:
1646 self.assertIn('386', call[0][0])
1647
1648 def test_multiple_processor(self):
1649 builder = self.factory.makeBuilder(
1650 processors=[getUtility(IProcessorSet).getByName('amd64')])
1651 builder.setCleanStatus(BuilderCleanStatus.CLEAN)
1652 self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
1653 transaction.commit()
1654 clock = task.Clock()
1655 manager = BuilddManager(clock=clock)
1656 manager.builder_factory.update()
1657 manager.updateStats()
1658
1659 self.assertEqual(12, self.stats_client.gauge.call_count)
1660 i386_calls = [c for c in self.stats_client.gauge.call_args_list
1661 if '386' in c[0][0]]
1662 amd64_calls = [c for c in self.stats_client.gauge.call_args_list
1663 if 'amd64' in c[0][0]]
1664 self.assertEqual(8, len(i386_calls))
1665 self.assertEqual(4, len(amd64_calls))
1666
1667 def test_correct_values(self):
1668 builder = self.factory.makeBuilder(
1669 processors=[getUtility(IProcessorSet).getByName('amd64')])
1670 builder.setCleanStatus(BuilderCleanStatus.CLEANING)
1671 self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
1672 transaction.commit()
1673 clock = task.Clock()
1674 manager = BuilddManager(clock=clock)
1675 manager.builder_factory.update()
1676 manager.updateStats()
1677
1678 self.assertEqual(12, self.stats_client.gauge.call_count)
1679 calls = [c[0] for c in self.stats_client.gauge.call_args_list
1680 if 'amd64' in c[0][0]]
1681 self.assertThat(
1682 calls, MatchesListwise(
1683 [Equals(('builders.disabled,arch=amd64,virtualized=True', 0)),
1684 Equals(('builders.building,arch=amd64,virtualized=True', 0)),
1685 Equals(('builders.idle,arch=amd64,virtualized=True', 0)),
1686 Equals(('builders.cleaning,arch=amd64,virtualized=True', 1))
1687 ]))
diff --git a/lib/lp/services/config/schema-lazr.conf b/lib/lp/services/config/schema-lazr.conf
index 80bc7e2..bb8afcd 100644
--- a/lib/lp/services/config/schema-lazr.conf
+++ b/lib/lp/services/config/schema-lazr.conf
@@ -113,7 +113,6 @@ authentication_endpoint: none
113# authserver.113# authserver.
114authentication_timeout: 15114authentication_timeout: 15
115115
116
117[canonical]116[canonical]
118# datatype: boolean117# datatype: boolean
119show_tracebacks: False118show_tracebacks: False
@@ -2022,3 +2021,9 @@ concurrency: 1
2022timeout: 864002021timeout: 86400
2023fallback_queue:2022fallback_queue:
2024concurrency: 12023concurrency: 1
2024
2025# Statsd connection details
2026[statsd]
2027host: none
2028port: none
2029prefix: none
diff --git a/lib/lp/services/statsd/__init__.py b/lib/lp/services/statsd/__init__.py
2025new file mode 1006442030new file mode 100644
index 0000000..22c839f
--- /dev/null
+++ b/lib/lp/services/statsd/__init__.py
@@ -0,0 +1,35 @@
1# Copyright 2020 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).
3
4"""Statsd client wrapper with Launchpad configuration"""
5
6from __future__ import absolute_import, print_function, unicode_literals
7
8__metaclass__ = type
9__all__ = ['get_statsd_client']
10
11
12from statsd import StatsClient
13
14from lp.services.config import config
15
16
17class UnconfiguredStatsdClient:
18 """Dummy client for if statsd is not configured in the environment.
19
20 This client will be used if the statsd settings are not available to
21 Launchpad. Prevents unnecessary network traffic.
22 """
23
24 def __getattr__(self, name):
25 return lambda *args, **kwargs: None
26
27
28def get_statsd_client():
29 if config.statsd.host:
30 return StatsClient(
31 host=config.statsd.host,
32 port=config.statsd.port,
33 prefix=config.statsd.prefix)
34 else:
35 return UnconfiguredStatsdClient()
diff --git a/lib/lp/services/statsd/tests/__init__.py b/lib/lp/services/statsd/tests/__init__.py
0new file mode 10064436new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/lp/services/statsd/tests/__init__.py
diff --git a/lib/lp/services/statsd/tests/test_lp_statsd.py b/lib/lp/services/statsd/tests/test_lp_statsd.py
1new file mode 10064437new file mode 100644
index 0000000..691e93d
--- /dev/null
+++ b/lib/lp/services/statsd/tests/test_lp_statsd.py
@@ -0,0 +1,40 @@
1# Copyright 2020 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).
3
4"""Tests for the Launchpad statsd client"""
5
6from __future__ import absolute_import, print_function, unicode_literals
7
8__metaclass__ = type
9
10from statsd import StatsClient
11
12from lp.services.config import config
13from lp.services.statsd import (
14 get_statsd_client,
15 UnconfiguredStatsdClient,
16 )
17from lp.testing import TestCase
18from lp.testing.layers import BaseLayer
19
20
21class TestClientConfiguration(TestCase):
22
23 layer = BaseLayer
24
25 def test_get_correct_instance_unconfigured(self):
26 """Test that we get the correct client, depending on config values."""
27 config.push(
28 'statsd_test',
29 "[statsd]\nhost:")
30 client = get_statsd_client()
31 self.assertEqual(
32 type(client), UnconfiguredStatsdClient)
33
34 def test_get_correct_instance_configured(self):
35 config.push(
36 'statsd_test',
37 "[statsd]\nhost: 127.0.01\nport: 9999\nprefix: test\n")
38 client = get_statsd_client()
39 self.assertEqual(
40 type(client), StatsClient)
diff --git a/setup.py b/setup.py
index db7963e..0f15f7f 100644
--- a/setup.py
+++ b/setup.py
@@ -232,6 +232,7 @@ setup(
232 'six',232 'six',
233 'soupmatchers',233 'soupmatchers',
234 'Sphinx',234 'Sphinx',
235 'statsd',
235 'storm',236 'storm',
236 # XXX cjwatson 2020-08-07: Temporarily dropped on Python 3 until237 # XXX cjwatson 2020-08-07: Temporarily dropped on Python 3 until
237 # codeimport can be ported to Breezy.238 # codeimport can be ported to Breezy.

Subscribers

People subscribed via source and target branches

to status/vote changes: