Merge ~tcuthbert/turku/+git/turku-api:main into ~turku/turku/+git/turku-api:main

Proposed by Thomas Cuthbert
Status: Approved
Approved by: Thomas Cuthbert
Approved revision: 147397ef167c5cfde36c5bc5eaaf6a1431cbca63
Proposed branch: ~tcuthbert/turku/+git/turku-api:main
Merge into: ~turku/turku/+git/turku-api:main
Diff against target: 133 lines (+127/-0)
1 file modified
scripts/turku_sick_sources (+127/-0)
Reviewer Review Type Date Requested Status
Barry Price Approve
Review via email: mp+411978@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Barry Price (barryprice) wrote :

LGTM +1

review: Approve

Unmerged commits

147397e... by Thomas Cuthbert

Import from Bazaar to Git

f249756... by Thomas Cuthbert

Refactor the metrics based on promtheus querying.

Reviewed-on: https://code.launchpad.net/~tcuthbert/turku/turku-api/+merge/397467
Reviewed-by: Junien Fridrick <email address hidden>

b1cdb06... by Thomas Cuthbert

Refactor the metrics based on findings with prometheus querying.

fc68957... by Thomas Cuthbert

Add unhealthy turku backup sources monitoring script

Reviewed-on: https://code.launchpad.net/~tcuthbert/turku/turku-api/+merge/397366
Reviewed-by: Haw Loeung <email address hidden>

2de52a4... by Thomas Cuthbert

Reduce indentation level

03b53d0... by Thomas Cuthbert

Add unhealthy turku backup sources monitoring script

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/scripts/turku_sick_sources b/scripts/turku_sick_sources
2new file mode 100755
3index 0000000..fc44712
4--- /dev/null
5+++ b/scripts/turku_sick_sources
6@@ -0,0 +1,127 @@
7+#!/usr/bin/env python2
8+
9+# Turku backups
10+# Copyright 2021 Canonical Ltd.
11+#
12+# This program is free software: you can redistribute it and/or modify it
13+# under the terms of the GNU General Public License version 3, as published by
14+# the Free Software Foundation.
15+#
16+# This program is distributed in the hope that it will be useful, but WITHOUT
17+# ANY WARRANTY; without even the implied warranties of MERCHANTABILITY,
18+# SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19+# General Public License for more details.
20+#
21+# You should have received a copy of the GNU General Public License along with
22+# this program. If not, see <http://www.gnu.org/licenses/>.
23+
24+import argparse
25+import os
26+import sys
27+
28+from time import mktime
29+
30+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
31+sys.path.append(BASE_DIR)
32+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "turku_api.settings")
33+
34+from turku_api.models import Source # NOQA: E402
35+
36+
37+MEASUREMENT_NAME = "turku_sick"
38+
39+
40+def options():
41+ parser = argparse.ArgumentParser(
42+ description=SickSources.__doc__, formatter_class=argparse.RawTextHelpFormatter
43+ )
44+ parser.add_argument(
45+ "--metrics_dir", help="directory where metrics will be dumped", type=str
46+ )
47+ return parser.parse_args()
48+
49+
50+class SickSources:
51+ """
52+ turku_sick_sources dumps metrics in the telegraf line format for consumption by telegraf.
53+
54+ The tool emits metrics for turku backup sources that are active and published, and failing their backups.
55+
56+ The tool has a --metrics_dir option that can be used for writing metrics to a file. This is useful when
57+ you want to use cron + the telegraf file input rather than exec. The metrics will be written to a file named
58+ sick_sources.out
59+ """
60+
61+ # Telegraf doesn't like boolean metrics so use this primitive enum-like object
62+ # for readability.
63+ _health_enum = {0: "no", 1: "yes"}
64+
65+ def run(self, options):
66+ sources_total = 0
67+ sources_sick = 0
68+ sick_objects = []
69+ data = []
70+
71+ for source in Source.objects.filter(
72+ machine__active=True, machine__published=True, active=True, published=True,
73+ ):
74+ sources_total += 1
75+ if not source.healthy():
76+ sources_sick += 1
77+ # Rather than mixing types, initialise timestamp as epoch at default precision.
78+ date_last_backed_up = 0.0
79+ if source.date_last_backed_up is not None:
80+ date_last_backed_up = mktime(source.date_last_backed_up.timetuple())
81+ source = (
82+ source.machine.unit_name,
83+ source.name,
84+ self._health_enum[int(source.healthy())],
85+ date_last_backed_up,
86+ )
87+ sick_objects.append(source)
88+
89+ if sources_sick == 0:
90+ sys.stdout.write("Nothing to do, all turku sources are healthy\n")
91+ return
92+
93+ totals = (
94+ "{measurement} sources_unhealthy={sources_sick},sources_total={sources_total}\n"
95+ ).format(
96+ measurement=MEASUREMENT_NAME,
97+ sources_sick=sources_sick,
98+ sources_total=sources_total,
99+ )
100+ data.append(totals)
101+
102+ sys.stdout.write(totals)
103+
104+ for source in sick_objects:
105+ machine_unit_name, name, health, date_last_backed_up = source
106+ # The metrics we are exporting is the unix timestamp of date last backed up. Include a healthy label
107+ # for concise query filtering.
108+ metric = (
109+ "{measurement},source_machine_name={machine_unit_name},source_name={name},healthy={health} "
110+ "date_last_backed_up={date_last_backed_up}\n"
111+ ).format(
112+ measurement=MEASUREMENT_NAME,
113+ machine_unit_name=machine_unit_name,
114+ name=name,
115+ health=health,
116+ date_last_backed_up=date_last_backed_up,
117+ )
118+ data.append(metric)
119+
120+ sys.stdout.write(metric)
121+
122+ if os.path.isdir(options.metrics_dir):
123+ file_p = os.path.join(options.metrics_dir, "sick_sources.out")
124+ with open(file_p, "w") as f:
125+ f.writelines(data)
126+
127+ sys.stdout.write(
128+ "Sick turku sources successfully dumped to: {}\n".format(file_p)
129+ )
130+
131+
132+if __name__ == "__main__":
133+ sys.exit(SickSources().run(options()))

Subscribers

People subscribed via source and target branches

to all changes: