Merge ~jillrouleau/charm-nrpe:xfs_checks into ~nrpe-charmers/charm-nrpe:master

Proposed by Jill Rouleau
Status: Merged
Approved by: Xav Paice
Approved revision: fdc3cda34b22235ba2d7777e048abce3ae71c053
Merged at revision: 5bd86d0b09166224a866c6e83c6c9b0bf0d3f93d
Proposed branch: ~jillrouleau/charm-nrpe:xfs_checks
Merge into: ~nrpe-charmers/charm-nrpe:master
Diff against target: 84 lines (+59/-0)
3 files modified
config.yaml (+7/-0)
files/plugins/check_xfs_errors.py (+46/-0)
hooks/nrpe_helpers.py (+6/-0)
Reviewer Review Type Date Requested Status
Xav Paice (community) Approve
Review via email: mp+333717@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Xav Paice (xavpaice) wrote :

LGTM

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/config.yaml b/config.yaml
index 3a55f70..b679aae 100644
--- a/config.yaml
+++ b/config.yaml
@@ -143,3 +143,10 @@ options:
143 description: |143 description: |
144 A string to be appended onto all the nrpe checks created by this charm144 A string to be appended onto all the nrpe checks created by this charm
145 to avoid potential clashes with existing checks145 to avoid potential clashes with existing checks
146 xfs_errors:
147 default: "120"
148 type: string
149 description: |
150 dmesg history length to check for xfs errors, in minutes
151 .
152 Set to '' in order to disable this check.
diff --git a/files/plugins/check_xfs_errors.py b/files/plugins/check_xfs_errors.py
146new file mode 100755153new file mode 100755
index 0000000..c031336
--- /dev/null
+++ b/files/plugins/check_xfs_errors.py
@@ -0,0 +1,46 @@
1#!/usr/bin/env python3
2#
3# Copyright 2017 Canonical Ltd
4#
5# Author: Jill Rouleau <jill.rouleau@canonical.com>
6#
7# Check for xfs errors and alert
8#
9
10import sys
11import re
12import datetime
13import subprocess
14
15# error messages commonly seen in dmesg on xfs errors
16raw_xfs_errors = ['XFS_WANT_CORRUPTED_',
17 'xfs_error_report',
18 'corruption detected at xfs_',
19 'Unmount and run xfs_repair']
20
21xfs_regex = [re.compile(i) for i in raw_xfs_errors]
22
23# nagios can't read from kern.log, so we look at dmesg - this does present
24# a known limitation if a node is rebooted or dmesg is otherwise cleared.
25log_lines = [line for line in subprocess.getoutput(['dmesg -T']).split('\n')]
26
27err_results = [line for line in log_lines for rgx in xfs_regex if
28 re.search(rgx, line)]
29
30# Look for errors within the last N minutes, specified in the check definition
31check_delta = int(sys.argv[1])
32
33# dmesg -T formatted timestamps are inside [], so we need to add them
34datetime_delta = '['+(datetime.datetime.now() -
35 datetime.timedelta(minutes=check_delta)
36 ).strftime('%c')+']'
37
38recent_logs = [i for i in err_results if i >= datetime_delta]
39
40if recent_logs:
41 print('CRITCAL: Recent XFS errors in kern.log.'+'\n'+'{}'.format(
42 recent_logs))
43 sys.exit(2)
44else:
45 print('OK')
46 sys.exit(0)
diff --git a/hooks/nrpe_helpers.py b/hooks/nrpe_helpers.py
index 60008f9..cfcb348 100644
--- a/hooks/nrpe_helpers.py
+++ b/hooks/nrpe_helpers.py
@@ -337,6 +337,12 @@ class SubordinateCheckDefinitions(dict):
337 'cmd_exec': local_plugin_dir + 'check_conntrack.sh',337 'cmd_exec': local_plugin_dir + 'check_conntrack.sh',
338 'cmd_params': hookenv.config('conntrack'),338 'cmd_params': hookenv.config('conntrack'),
339 },339 },
340 {
341 'description': 'XFS Errors',
342 'cmd_name': 'check_xfs_errors',
343 'cmd_exec': local_plugin_dir + 'check_xfs_errors.py',
344 'cmd_params': hookenv.config('xfs_errors'),
345 },
340 ]346 ]
341 self['checks'] = []347 self['checks'] = []
342 sub_postfix = str(hookenv.config("sub_postfix"))348 sub_postfix = str(hookenv.config("sub_postfix"))

Subscribers

People subscribed via source and target branches