Merge ~twom/launchpad:stats-better-builder-stats-stacking into launchpad:master

Proposed by Tom Wardill
Status: Merged
Approved by: Tom Wardill
Approved revision: e8163c3a596f6d86126bac3b4820625c391a894f
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~twom/launchpad:stats-better-builder-stats-stacking
Merge into: launchpad:master
Diff against target: 233 lines (+62/-24)
10 files modified
lib/lp/buildmaster/manager.py (+6/-3)
lib/lp/buildmaster/model/buildfarmjobbehaviour.py (+5/-3)
lib/lp/buildmaster/tests/test_buildfarmjobbehaviour.py (+2/-1)
lib/lp/services/config/schema-lazr.conf (+1/-0)
lib/lp/services/statsd/model/statsd_client.py (+6/-0)
lib/lp/services/statsd/numbercruncher.py (+5/-5)
lib/lp/services/statsd/tests/__init__.py (+1/-0)
lib/lp/services/statsd/tests/test_numbercruncher.py (+14/-6)
lib/lp/services/statsd/tests/test_statsd_client.py (+14/-2)
lib/lp/services/webapp/publication.py (+8/-4)
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Ioana Lasc (community) Approve
Review via email: mp+392892@code.launchpad.net

Commit message

Move to use statsd environment config variable

Description of the change

The prefix setting in statsd doesn't let us do the final queries in quite the way we wanted, so add another setting for it and apply it as a label in all the metrics.

Also increase number cruncher logging level to DEBUG to see if we can work out why it appears to stop from the logs.

To post a comment you must log in.
Revision history for this message
Ioana Lasc (ilasc) wrote :

LGTM

review: Approve
Revision history for this message
Colin Watson (cjwatson) wrote :

Ideally I'd like the LP client to fill in the extra label automatically rather than having to repeat it everywhere, but this will do fine for now.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/buildmaster/manager.py b/lib/lp/buildmaster/manager.py
2index 7bbe4aa..e9732ea 100644
3--- a/lib/lp/buildmaster/manager.py
4+++ b/lib/lp/buildmaster/manager.py
5@@ -532,10 +532,13 @@ class SlaveScanner:
6 if builder.current_build is not None:
7 builder.current_build.gotFailure()
8 self.statsd_client.incr(
9- 'builders.judged_failed,build=True,arch={}'.format(
10- builder.current_build.processor.name))
11+ 'builders.judged_failed,build=True,arch={},env={}'.format(
12+ builder.current_build.processor.name,
13+ self.statsd_client.lp_environment))
14 else:
15- self.statsd_client.incr('builders.judged_failed,build=False')
16+ self.statsd_client.incr(
17+ 'builders.judged_failed,build=False,env={}'.format(
18+ self.statsd_client.lp_environment))
19 recover_failure(self.logger, vitals, builder, retry, failure.value)
20 transaction.commit()
21 except Exception:
22diff --git a/lib/lp/buildmaster/model/buildfarmjobbehaviour.py b/lib/lp/buildmaster/model/buildfarmjobbehaviour.py
23index edff53b..88ede6a 100644
24--- a/lib/lp/buildmaster/model/buildfarmjobbehaviour.py
25+++ b/lib/lp/buildmaster/model/buildfarmjobbehaviour.py
26@@ -156,10 +156,12 @@ class BuildFarmJobBehaviourBase:
27 # Update stats
28 job_type = getattr(self.build, 'job_type', None)
29 job_type_name = job_type.name if job_type else 'UNKNOWN'
30- getUtility(IStatsdClient).incr(
31- 'build.count,job_type={},builder_name={}'.format(
32+ statsd_client = getUtility(IStatsdClient)
33+ statsd_client.incr(
34+ 'build.count,job_type={},builder_name={},env={}'.format(
35 job_type_name,
36- self._builder.name))
37+ self._builder.name,
38+ statsd_client.lp_environment))
39
40 logger.info(
41 "Job %s (%s) started on %s: %s %s"
42diff --git a/lib/lp/buildmaster/tests/test_buildfarmjobbehaviour.py b/lib/lp/buildmaster/tests/test_buildfarmjobbehaviour.py
43index 3bdf53d..06b0e3f 100644
44--- a/lib/lp/buildmaster/tests/test_buildfarmjobbehaviour.py
45+++ b/lib/lp/buildmaster/tests/test_buildfarmjobbehaviour.py
46@@ -275,7 +275,8 @@ class TestDispatchBuildToSlave(StatsMixin, TestCase):
47 self.assertEqual(1, self.stats_client.incr.call_count)
48 self.assertEqual(
49 self.stats_client.incr.call_args_list[0][0],
50- ('build.count,job_type=UNKNOWN,builder_name=mock-builder',))
51+ ('build.count,job_type=UNKNOWN,'
52+ 'builder_name=mock-builder,env=test',))
53
54
55 class TestGetUploadMethodsMixin:
56diff --git a/lib/lp/services/config/schema-lazr.conf b/lib/lp/services/config/schema-lazr.conf
57index c16270a..aaa9d30 100644
58--- a/lib/lp/services/config/schema-lazr.conf
59+++ b/lib/lp/services/config/schema-lazr.conf
60@@ -2028,3 +2028,4 @@ concurrency: 1
61 host: none
62 port: none
63 prefix: none
64+environment: none
65diff --git a/lib/lp/services/statsd/model/statsd_client.py b/lib/lp/services/statsd/model/statsd_client.py
66index 868af30..6ccf027 100644
67--- a/lib/lp/services/statsd/model/statsd_client.py
68+++ b/lib/lp/services/statsd/model/statsd_client.py
69@@ -43,6 +43,12 @@ class StatsdClient:
70 self._make_client()
71
72 def __getattr__(self, name):
73+ # This is a convenience to keep all statsd client related
74+ # items on the client. Having this separate from prefix
75+ # allows us to use it as a label, rather than as part of the
76+ # gauge name
77+ if name == 'lp_environment':
78+ return config.statsd.environment
79 if self._client is not None:
80 return getattr(self._client, name)
81 else:
82diff --git a/lib/lp/services/statsd/numbercruncher.py b/lib/lp/services/statsd/numbercruncher.py
83index d2301d0..0a0552c 100644
84--- a/lib/lp/services/statsd/numbercruncher.py
85+++ b/lib/lp/services/statsd/numbercruncher.py
86@@ -47,7 +47,7 @@ class NumberCruncher(service.Service):
87 def _setupLogger(self):
88 """Set up a 'number-cruncher' logger that redirects to twisted.
89 """
90- level = logging.INFO
91+ level = logging.DEBUG
92 logger = logging.getLogger(NUMBER_CRUNCHER_LOG_NAME)
93 logger.propagate = False
94
95@@ -74,8 +74,8 @@ class NumberCruncher(service.Service):
96 for queue_type, contents in queue_details.items():
97 virt = queue_type == 'virt'
98 for arch, value in contents.items():
99- gauge_name = "buildqueue,virtualized={},arch={}".format(
100- virt, arch)
101+ gauge_name = "buildqueue,virtualized={},arch={},env={}".format(
102+ virt, arch, self.statsd_client.lp_environment)
103 self.logger.debug("{}: {}".format(gauge_name, value[0]))
104 self.statsd_client.gauge(gauge_name, value[0])
105 self.logger.debug("Build queue stats update complete.")
106@@ -107,8 +107,8 @@ class NumberCruncher(service.Service):
107 counts['idle'] += 1
108 for processor, counts in counts_by_processor.items():
109 for count_name, count_value in counts.items():
110- gauge_name = "builders.{},arch={}".format(
111- count_name, processor)
112+ gauge_name = "builders,status={},arch={},env={}".format(
113+ count_name, processor, self.statsd_client.lp_environment)
114 self.logger.debug("{}: {}".format(gauge_name, count_value))
115 self.statsd_client.gauge(gauge_name, count_value)
116 self.logger.debug("Builder stats update complete.")
117diff --git a/lib/lp/services/statsd/tests/__init__.py b/lib/lp/services/statsd/tests/__init__.py
118index 20885a4..23fb067 100644
119--- a/lib/lp/services/statsd/tests/__init__.py
120+++ b/lib/lp/services/statsd/tests/__init__.py
121@@ -19,5 +19,6 @@ class StatsMixin:
122 # Install a mock statsd client so we can assert against the call
123 # counts and args.
124 self.stats_client = mock.Mock()
125+ self.stats_client.lp_environment = "test"
126 self.useFixture(
127 ZopeUtilityFixture(self.stats_client, IStatsdClient))
128diff --git a/lib/lp/services/statsd/tests/test_numbercruncher.py b/lib/lp/services/statsd/tests/test_numbercruncher.py
129index 47c633f..c5a4ff6 100644
130--- a/lib/lp/services/statsd/tests/test_numbercruncher.py
131+++ b/lib/lp/services/statsd/tests/test_numbercruncher.py
132@@ -85,10 +85,18 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
133 if 'amd64' in c[0][0]]
134 self.assertThat(
135 calls, MatchesListwise(
136- [Equals(('builders.disabled,arch=amd64,virtualized=True', 0)),
137- Equals(('builders.building,arch=amd64,virtualized=True', 0)),
138- Equals(('builders.idle,arch=amd64,virtualized=True', 0)),
139- Equals(('builders.cleaning,arch=amd64,virtualized=True', 1))
140+ [Equals((
141+ 'builders,status=disabled,arch=amd64,'
142+ 'virtualized=True,env=test', 0)),
143+ Equals((
144+ 'builders,status=building,arch=amd64,'
145+ 'virtualized=True,env=test', 0)),
146+ Equals((
147+ 'builders,status=idle,arch=amd64,'
148+ 'virtualized=True,env=test', 0)),
149+ Equals((
150+ 'builders,status=cleaning,arch=amd64,'
151+ 'virtualized=True,env=test', 1))
152 ]))
153
154 def test_updateBuilderQueues(self):
155@@ -109,9 +117,9 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
156 self.assertThat(
157 [x[0] for x in self.stats_client.gauge.call_args_list],
158 MatchesListwise(
159- [Equals(('buildqueue,virtualized=True,arch={}'.format(
160+ [Equals(('buildqueue,virtualized=True,arch={},env=test'.format(
161 build.processor.name), 1)),
162- Equals(('buildqueue,virtualized=False,arch=386', 1))
163+ Equals(('buildqueue,virtualized=False,arch=386,env=test', 1))
164 ]))
165
166 def test_startService_starts_update_queues_loop(self):
167diff --git a/lib/lp/services/statsd/tests/test_statsd_client.py b/lib/lp/services/statsd/tests/test_statsd_client.py
168index 6852af7..2e1a8e1 100644
169--- a/lib/lp/services/statsd/tests/test_statsd_client.py
170+++ b/lib/lp/services/statsd/tests/test_statsd_client.py
171@@ -26,7 +26,8 @@ class TestClientConfiguration(TestCase):
172 self.addCleanup(client.reload)
173 config.push(
174 'statsd_test',
175- "[statsd]\nhost: 127.0.01\nport: 9999\nprefix: test\n")
176+ "[statsd]\nhost: 127.0.01\n"
177+ "port: 9999\nprefix: test\nenvironment: test\n")
178 client.reload()
179 self.assertIsInstance(client._client, StatsClient)
180
181@@ -45,6 +46,17 @@ class TestClientConfiguration(TestCase):
182 self.addCleanup(client.reload)
183 config.push(
184 'statsd_test',
185- "[statsd]\nhost: 127.0.01\nport: 9999\nprefix: test\n")
186+ "[statsd]\nhost: 127.0.01\n"
187+ "port: 9999\nprefix: test\nenvironment: test\n")
188 client.reload()
189 self.assertIsInstance(client._client, StatsClient)
190+
191+ def test_lp_environment_is_available(self):
192+ client = getUtility(IStatsdClient)
193+ self.addCleanup(client.reload)
194+ config.push(
195+ 'statsd_test',
196+ "[statsd]\nhost: 127.0.01\n"
197+ "port: 9999\nprefix: test\nenvironment: test\n")
198+ client.reload()
199+ self.assertEqual(client.lp_environment, "test")
200diff --git a/lib/lp/services/webapp/publication.py b/lib/lp/services/webapp/publication.py
201index 7325d9f..9a9a0e3 100644
202--- a/lib/lp/services/webapp/publication.py
203+++ b/lib/lp/services/webapp/publication.py
204@@ -795,13 +795,16 @@ class LaunchpadBrowserPublication(
205 status = request.response.getStatus()
206 if status == 404: # Not Found
207 OpStats.stats['404s'] += 1
208- statsd_client.incr('errors.404')
209+ statsd_client.incr('errors.404,env={}'.format(
210+ statsd_client.lp_environment))
211 elif status == 500: # Unhandled exceptions
212 OpStats.stats['500s'] += 1
213- statsd_client.incr('errors.500')
214+ statsd_client.incr('errors.500,env={}'.format(
215+ statsd_client.lp_environment))
216 elif status == 503: # Timeouts
217 OpStats.stats['503s'] += 1
218- statsd_client.incr('errors.503')
219+ statsd_client.incr('errors.503,env={}'.format(
220+ statsd_client.lp_environment))
221
222 # Increment counters for status code groups.
223 status_group = str(status)[0] + 'XXs'
224@@ -810,7 +813,8 @@ class LaunchpadBrowserPublication(
225 # Increment counter for 5XXs_b.
226 if is_browser(request) and status_group == '5XXs':
227 OpStats.stats['5XXs_b'] += 1
228- statsd_client.incr('errors.5XX')
229+ statsd_client.incr('errors.5XX,env={}'.format(
230+ statsd_client.lp_environment))
231
232 # Make sure our databases are in a sane state for the next request.
233 thread_name = threading.current_thread().name

Subscribers

People subscribed via source and target branches

to status/vote changes: