Merge ~paulgear/ntp-charm/+git/ntp-charm:master into ntp-charm:master

Proposed by Paul Gear
Status: Merged
Merged at revision: b6cf82268f08a744bb58a25f404d7b5c247d5ab7
Proposed branch: ~paulgear/ntp-charm/+git/ntp-charm:master
Merge into: ntp-charm:master
Diff against target: 307 lines (+99/-31)
2 files modified
files/nagios/check_ntpmon.py (+55/-28)
unit_tests/test_check_ntpmon.py (+44/-3)
Reviewer Review Type Date Requested Status
Barry Price Approve
Paul Collins lgtm Approve
Review via email: mp+317085@code.launchpad.net

This proposal supersedes a proposal from 2017-02-13.

Commit message

Selectively downgrade missing sync peer to WARNING

This uses comparison of more recent and less recent polls to determine
whether reachability is improving. If so, a missing sync peer is
considered WARNING rather than CRITICAL, on the assumption that ntpd will
correct the problem shortly. Thanks to Paul Collins for the idea and
initial implementation of this.

Update the test suite to check for this, and add python3 compatibility
for print statements.

To post a comment you must log in.
Revision history for this message
Paul Collins (pjdc) :
review: Approve (lgtm)
Revision history for this message
Barry Price (barryprice) wrote :

Makes sense, +1

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/files/nagios/check_ntpmon.py b/files/nagios/check_ntpmon.py
2index 83613dd..8d24df4 100755
3--- a/files/nagios/check_ntpmon.py
4+++ b/files/nagios/check_ntpmon.py
5@@ -19,6 +19,8 @@
6 # this program. If not, see <http://www.gnu.org/licenses/>.
7 #
8
9+from __future__ import print_function
10+
11 import argparse
12 import psutil
13 import re
14@@ -43,6 +45,15 @@ def isipaddressy(name):
15 return re.search(r'^[0-9a-f.:]*$', name) is not None
16
17
18+def report_reachability(old, new):
19+ if old > new:
20+ return "decreasing"
21+ elif new > old:
22+ return "increasing"
23+ else:
24+ return "stable"
25+
26+
27 class CheckNTPMonSilent(object):
28
29 def __init__(self,
30@@ -109,12 +120,12 @@ class CheckNTPMonSilent(object):
31 return True
32
33 def dump(self):
34- print "warnpeers = %d" % (self.warnpeers)
35- print "okpeers = %d" % (self.okpeers)
36- print "warnoffset = %g" % (self.warnoffset)
37- print "critoffset = %g" % (self.critoffset)
38- print "warnreach = %g" % (self.warnreach)
39- print "critreach = %g" % (self.critreach)
40+ print("warnpeers = %d" % (self.warnpeers))
41+ print("okpeers = %d" % (self.okpeers))
42+ print("warnoffset = %g" % (self.warnoffset))
43+ print("critoffset = %g" % (self.critoffset))
44+ print("warnreach = %g" % (self.warnreach))
45+ print("critreach = %g" % (self.critreach))
46
47 @classmethod
48 def clone(cls, obj):
49@@ -151,7 +162,7 @@ class CheckNTPMon(CheckNTPMonSilent):
50 Return 1 if the number of peers is WARNING
51 Return 2 if the number of peers is CRITICAL"""
52 code, msg = CheckNTPMonSilent.peers(self, n)
53- print msg
54+ print(msg)
55 return code
56
57 def offset(self, offset):
58@@ -159,7 +170,7 @@ class CheckNTPMon(CheckNTPMonSilent):
59 Return 1 if the offset is WARNING
60 Return 2 if the offset is CRITICAL"""
61 code, msg = CheckNTPMonSilent.offset(self, offset)
62- print msg
63+ print(msg)
64 return code
65
66 def reachability(self, percent):
67@@ -168,14 +179,14 @@ class CheckNTPMon(CheckNTPMonSilent):
68 Return 2 if the reachability percentage is critical
69 Raise a ValueError if reachability is not a percentage"""
70 code, msg = CheckNTPMonSilent.reachability(self, percent)
71- print msg
72+ print(msg)
73 return code
74
75 def sync(self, synchost):
76 """Return 0 if the synchost is non-zero in length and is a roughly valid host identifier
77 Return 2 otherwise"""
78 code, msg = CheckNTPMonSilent.sync(self, synchost)
79- print msg
80+ print(msg)
81 return code
82
83 def is_silent(self):
84@@ -252,6 +263,8 @@ class NTPPeers(object):
85 'peers': 0,
86 'offsetall': 0,
87 'totalreach': 0,
88+ 'oldreach': 0,
89+ 'newreach': 0,
90 }
91 self.check = check
92
93@@ -284,8 +297,12 @@ class NTPPeers(object):
94 # reachability - this counts the number of bits set in the reachability field
95 # (which is displayed in octal in the ntpq output)
96 # http://stackoverflow.com/questions/9829578/fast-way-of-counting-bits-in-python
97- self.ntpdata['totalreach'] += bin(int(peerdata['reach'],
98- 8)).count("1")
99+ reachint = int(peerdata['reach'], 8)
100+ self.ntpdata['totalreach'] += bin(reachint).count("1")
101+
102+ # count the 4 oldest & 4 newest polls
103+ self.ntpdata['oldreach'] += bin((reachint >> 4) & 0x0f).count("1")
104+ self.ntpdata['newreach'] += bin(reachint & 0x0f).count("1")
105
106 # average offsets
107 if self.ntpdata['survivors'] > 0:
108@@ -309,24 +326,29 @@ class NTPPeers(object):
109
110 def dumppart(self, peertype, peertypeoffset, displaytype):
111 if self.ntpdata[peertype] > 0:
112- print "%d %s peers, average offset %g ms" % (
113+ print("%d %s peers, average offset %g ms" % (
114 self.ntpdata[peertype],
115 displaytype,
116 self.ntpdata[peertypeoffset],
117- )
118+ ))
119
120 def dump(self):
121 if self.ntpdata.get('syncpeer'):
122- print "Synced to: %s, offset %g ms" % (
123- self.ntpdata['syncpeer'], self.ntpdata['offsetsyncpeer'])
124+ print("Synced to: %s, offset %g ms" % (
125+ self.ntpdata['syncpeer'], self.ntpdata['offsetsyncpeer']))
126 else:
127- print "No remote sync peer"
128+ print("No remote sync peer")
129 self.dumppart('peers', 'averageoffset', 'total')
130 self.dumppart('survivors', 'averageoffsetsurvivors', 'good')
131 self.dumppart('backups', 'averageoffsetbackups', 'backup')
132 self.dumppart('discards', 'averageoffsetdiscards', 'discarded')
133- print "Average reachability of all peers: %d%%" % (
134- self.ntpdata['reachability'])
135+ print("Average reachability of all peers: %d%%" % (
136+ self.ntpdata['reachability']))
137+ print("Reachability is %s (%d vs %d)" % (
138+ report_reachability(self.ntpdata['oldreach'], self.ntpdata['newreach']),
139+ self.ntpdata['oldreach'],
140+ self.ntpdata['newreach'],
141+ ))
142
143 def check_peers(self, check=None):
144 """Check the number of usable peers"""
145@@ -358,14 +380,14 @@ class NTPPeers(object):
146 msg + " (" + result[1] + ")"
147 ]
148 else:
149- print msg
150+ print(msg)
151 return 1 if result < 1 else result
152 else:
153 ret = [2, "CRITICAL: No peers for which to check offset"]
154 if check.is_silent():
155 return ret
156 else:
157- print ret[1]
158+ print(ret[1])
159 return ret[0]
160
161 def check_reachability(self, check=None):
162@@ -379,11 +401,16 @@ class NTPPeers(object):
163 if check is None:
164 check = self.check if self.check else CheckNTPMon()
165 if self.ntpdata.get('syncpeer') is None:
166- ret = [2, "CRITICAL: No sync peer"]
167+ if self.ntpdata['oldreach'] < self.ntpdata['newreach']:
168+ # reachability is improving, we should be in sync soon
169+ ret = [1, "WARNING: No sync peer"]
170+ else:
171+ # reachability is decreasing or stable
172+ ret = [2, "CRITICAL: No sync peer"]
173 if check.is_silent():
174 return ret
175 else:
176- print ret[1]
177+ print(ret[1])
178 return ret[0]
179 return check.sync(self.ntpdata['syncpeer'])
180
181@@ -412,9 +439,9 @@ class NTPPeers(object):
182 msg = result[1]
183
184 if msg is None:
185- print "%s returned no results - please report a bug" % (method)
186+ print("%s returned no results - please report a bug" % (method))
187 return 3
188- print msg
189+ print(msg)
190 return ret
191
192 @staticmethod
193@@ -521,14 +548,14 @@ def main():
194 lines = NTPPeers.query() if not args.test else [x.rstrip() for x in sys.stdin.readlines()]
195 if lines is None:
196 # Unknown result
197- print "CRITICAL: Cannot get peers from ntpq. Please check that an NTP server is installed and running."
198+ print("CRITICAL: Cannot get peers from ntpq. Please check that an NTP server is installed and running.")
199 sys.exit(2)
200
201 # Don't report anything other than OK until ntpd has been running for at
202 # least enough time for 8 polling intervals of 64 seconds each.
203 age = NTPProcess().runtime()
204 if age > 0 and age <= args.run_time:
205- print "OK: ntpd still starting up (running %d seconds)" % age
206+ print("OK: ntpd still starting up (running %d seconds)" % age)
207 sys.exit(0)
208
209 # initialise our object with the results of ntpq and our preferred check
210@@ -536,7 +563,7 @@ def main():
211 ntp = NTPPeers(lines, checkntpmon)
212
213 if args.debug:
214- print "\n".join(lines)
215+ print("\n".join(lines))
216 checkntpmon.dump()
217 ntp.dump()
218
219diff --git a/unit_tests/test_check_ntpmon.py b/unit_tests/test_check_ntpmon.py
220index 4c41fc0..3194b31 100755
221--- a/unit_tests/test_check_ntpmon.py
222+++ b/unit_tests/test_check_ntpmon.py
223@@ -19,6 +19,8 @@
224 # this program. If not, see <http://www.gnu.org/licenses/>.
225 #
226
227+from __future__ import print_function
228+
229 import argparse
230 import unittest
231 import sys
232@@ -66,12 +68,25 @@ testdata = [
233 218.189.210.3 .INIT. 16 u - 64 0 0.000 0.000 0.000
234 103.224.117.98 .INIT. 16 u - 64 0 0.000 0.000 0.000
235 118.143.17.82 .INIT. 16 u - 64 0 0.000 0.000 0.000
236- 91.189.89.199 192.93.2.20 2 u 21 64 7 336.631 0.913 0.223
237+ 91.189.89.199 192.93.2.20 2 u 21 64 301 336.631 0.913 0.223
238 175.45.85.97 .INIT. 16 u - 64 0 0.000 0.000 0.000
239 192.189.54.33 .INIT. 16 u - 64 0 0.000 0.000 0.000
240 129.250.35.250 .INIT. 16 u - 64 0 0.000 0.000 0.000
241 54.252.165.245 .INIT. 16 u - 64 0 0.000 0.000 0.000
242 """,
243+"""
244+ remote refid st t when poll reach delay offset jitter
245+==============================================================================
246+ 0.ubuntu.pool.n .POOL. 16 p - 64 0 0.000 0.000 0.000
247+ 1.ubuntu.pool.n .POOL. 16 p - 64 0 0.000 0.000 0.000
248+ 2.ubuntu.pool.n .POOL. 16 p - 64 0 0.000 0.000 0.000
249+ 3.ubuntu.pool.n .POOL. 16 p - 64 0 0.000 0.000 0.000
250+ ntp.ubuntu.com .POOL. 16 p - 64 0 0.000 0.000 0.000
251++91.189.91.157 132.246.11.231 2 u 228 256 377 264.277 1.996 27.598
252++218.189.210.3 118.143.17.82 2 u 42 256 377 3.297 -3.598 1.100
253+ 209.58.185.100 130.133.1.10 2 u 62 64 3 2.237 28.653 0.088
254+ 203.95.213.129 193.62.22.74 2 u 67 64 3 4.002 -1.256 0.041
255+""",
256 ]
257
258 demodata = [
259@@ -342,6 +357,32 @@ class TestCheckNTPMon(unittest.TestCase):
260 self.assertEqual(ntp.check_peers(), 2, 'Low peers non-critical')
261 self.assertEqual(ntp.check_reachability(), 2, 'Low reachability non-critical')
262
263+ def test_NTPPeer4(self):
264+ # check the parsing done by NTPPeers
265+ ntp = NTPPeers(testdata[4].split("\n"))
266+ self.assertEqual(ntp.ntpdata.get('syncpeer'), None)
267+ self.assertEqual(ntp.ntpdata.get('offsetsyncpeer'), None)
268+ self.assertEqual(ntp.ntpdata['survivors'], 2)
269+ self.assertEqual(ntp.ntpdata.get('averageoffsetsurvivors'), 2.7969999999999997)
270+ self.assertEqual(ntp.ntpdata['discards'], 2)
271+ self.assertEqual(ntp.ntpdata['averageoffsetdiscards'], 14.9545)
272+ self.assertEqual(ntp.ntpdata['peers'], 4)
273+ self.assertEqual(ntp.ntpdata['averageoffset'], 8.87575)
274+ self.assertEqual(ntp.ntpdata['reachability'], 62.5)
275+
276+ # run checks on the data
277+ check = CheckNTPMon()
278+ self.assertEqual(check.offset(ntp.ntpdata['averageoffsetdiscards']), 1, 'Discards offset non-warning')
279+ self.assertEqual(check.offset(ntp.ntpdata['averageoffset']), 0, 'Offset non-OK')
280+ self.assertEqual(check.peers(ntp.ntpdata['peers']), 0, 'Peers non-OK')
281+ self.assertEqual(check.reachability(ntp.ntpdata['reachability']), 1, 'Reachability non-warning')
282+
283+ # run overall health checks
284+ self.assertEqual(ntp.check_sync(), 1, 'Missing sync peer non-warning')
285+ self.assertEqual(ntp.check_offset(), 0, 'Normal offset non-OK')
286+ self.assertEqual(ntp.check_peers(), 0, 'OK peer count non-OK')
287+ self.assertEqual(ntp.check_reachability(), 1, 'Low reachability non-critical')
288+
289 def test_defaults(self):
290 c = CheckNTPMon()
291 self.assertEqual(c.warnpeers, 2)
292@@ -463,7 +504,7 @@ def demo():
293 """Duplicate of test_demos which shows full output"""
294 i = 0
295 for d in demodata:
296- print "Parsing demo data %d: %s" % (i, d)
297+ print("Parsing demo data %d: %s" % (i, d))
298 ntp = NTPPeers(d.split("\n"))
299 i += 1
300 ntp.dump()
301@@ -472,7 +513,7 @@ def demo():
302 for method in methods:
303 ret = method()
304 if ret not in [0, 1, 2]:
305- print "Method %s returned invalid result parsing demo data:\n%s" % (method, d)
306+ print("Method %s returned invalid result parsing demo data:\n%s" % (method, d))
307 sys.exit(3)
308
309

Subscribers

People subscribed via source and target branches