Merge charm-hw-health:unknown-on-tool-error into charm-hw-health:master

Proposed by Facundo Ciccioli
Status: Work in progress
Proposed branch: charm-hw-health:unknown-on-tool-error
Merge into: charm-hw-health:master
Diff against target: 122 lines (+39/-7)
4 files modified
src/files/megacli/check_megacli.py (+7/-4)
src/files/perccli/check_perccli.py (+14/-1)
src/tests/unit/test_check_megacli.py (+5/-2)
src/tests/unit/test_check_perccli.py (+13/-0)
Reviewer Review Type Date Requested Status
Xav Paice (community) Needs Fixing
Stephan Pampel (community) Approve
🤖 prod-jenkaas-bootstack (community) continuous-integration Approve
BootStack Reviewers mr tracking; do not claim Pending
BootStack Reviewers Pending
Review via email: mp+415103@code.launchpad.net

Commit message

For megacli and perccli, report UNKNOWN when the tool's execution fails

To post a comment you must log in.
Revision history for this message
🤖 Canonical IS Merge Bot (canonical-is-mergebot) wrote :

This merge proposal is being monitored by mergebot. Change the status to Approved to merge.

Revision history for this message
🤖 prod-jenkaas-bootstack (prod-jenkaas-bootstack) wrote :

A CI job is currently in progress. A follow up comment will be added when it completes.

Revision history for this message
🤖 prod-jenkaas-bootstack (prod-jenkaas-bootstack) wrote :
review: Approve (continuous-integration)
Revision history for this message
Stephan Pampel (stephanpampel) wrote :

LGTM

review: Approve
Revision history for this message
Xav Paice (xavpaice) wrote :

The change makes sense, I agree UNKNOWN is a more appropriate state, and will alert where WARNING will not.

Let's not provide the location of the failed command output, unless we're able to show that output somehow without needing ssh to the unit. Nagios check output needs to be very brief and a single line, and the location of the file doesn't change according to the result of the test.

review: Needs Fixing
Revision history for this message
Alvaro Uria (aluria) wrote :

Marked WIP until Xav's concerns are addressed.

Unmerged commits

2140e14... by Facundo Ciccioli

For megacli and perccli, report UNKNOWN when the tool's execution fails

A warning is not enough. We want to know if the charm decided to install
the wrong tool or if the host's hardware ceased to be supported by the
tool.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/src/files/megacli/check_megacli.py b/src/files/megacli/check_megacli.py
2index be8c065..e5a83e5 100755
3--- a/src/files/megacli/check_megacli.py
4+++ b/src/files/megacli/check_megacli.py
5@@ -7,7 +7,7 @@ import os
6 import re
7 import sys
8
9-from nagios_plugin3 import CriticalError, WarnError, try_check
10+from nagios_plugin3 import CriticalError, UnknownError, WarnError, try_check
11
12 try:
13 # shared lib will be under $NAGIOS_PLUGIN_DIR at runtime
14@@ -62,8 +62,11 @@ def check_cron_return_code(rc_file=RC_FILE):
15 with open(rc_file) as return_code_file:
16 rc = return_code_file.read().strip()
17 if rc != 0:
18- msg = "WARNING: megacli cron job failed, return code {}".format(rc)
19- raise WarnError(msg)
20+ msg = (
21+ "UNKNOWN: megacli cron job failed, return code {}. "
22+ "Last job output on {}".format(rc, INPUT_FILE)
23+ )
24+ raise UnknownError(msg)
25
26
27 def parse_output(policy=False): # noqa:C901
28@@ -151,7 +154,7 @@ def parse_args(argv=None):
29 def main(argv, policy=False):
30 """Define main subroutine."""
31 parse_args(argv)
32- check_cron_return_code(RC_FILE)
33+ try_check(check_cron_return_code, RC_FILE)
34 try_check(parse_output, policy)
35
36
37diff --git a/src/files/perccli/check_perccli.py b/src/files/perccli/check_perccli.py
38index 7cc2c9c..5133830 100755
39--- a/src/files/perccli/check_perccli.py
40+++ b/src/files/perccli/check_perccli.py
41@@ -6,7 +6,7 @@ import json
42 import os
43 import sys
44
45-from nagios_plugin3 import CriticalError, WarnError, try_check
46+from nagios_plugin3 import CriticalError, UnknownError, WarnError, try_check
47
48 try:
49 # shared lib will be under $NAGIOS_PLUGIN_DIR at runtime
50@@ -21,6 +21,7 @@ except ImportError:
51 from hw_health_lib import HWCheckArgumentParser
52
53 INPUT_FILE = "/var/lib/nagios/perccli.out"
54+RC_FILE = "/var/lib/nagios/perccli.retcode"
55 ARGS = argparse.Namespace()
56
57
58@@ -56,6 +57,17 @@ def handle_results(nlines, match, critical, errors, num_ldrive, num_pdrive, poli
59 print(msg)
60
61
62+def check_cron_return_code(rc_file=RC_FILE):
63+ with open(rc_file) as return_code_file:
64+ rc = return_code_file.read().strip()
65+ if rc != 0:
66+ msg = (
67+ "UNKNOWN: perccli cron job failed, return code {}. "
68+ "Last job output on {}".format(rc, INPUT_FILE)
69+ )
70+ raise UnknownError(msg)
71+
72+
73 def parse_output(policy=False): # noqa:C901
74 num_pdrive = num_ldrive = failed_ld = wrg_policy_ld = 0
75 nlines = 0
76@@ -110,6 +122,7 @@ def parse_args(argv=None):
77 def main(argv, policy=False):
78 """Define main subroutine."""
79 parse_args(argv)
80+ try_check(check_cron_return_code, RC_FILE)
81 try_check(parse_output, policy)
82
83
84diff --git a/src/tests/unit/test_check_megacli.py b/src/tests/unit/test_check_megacli.py
85index 1f77b9f..a6b7d22 100644
86--- a/src/tests/unit/test_check_megacli.py
87+++ b/src/tests/unit/test_check_megacli.py
88@@ -48,8 +48,11 @@ class TestCheckMegaCLI(unittest.TestCase):
89 def test_check_cron_return_code(self, mock_print):
90 file_content = "127"
91 mock_open = mock.mock_open(read_data=file_content)
92- expected = "WARNING: megacli cron job failed, return code 127"
93+ expected = (
94+ "UNKNOWN: megacli cron job failed, return code 127. "
95+ "Last job output on {}".format(check_megacli.INPUT_FILE)
96+ )
97 with mock.patch("builtins.open", mock_open):
98- with self.assertRaises(nagios_plugin3.WarnError) as context:
99+ with self.assertRaises(nagios_plugin3.UnknownError) as context:
100 check_megacli.check_cron_return_code("/path/to/return_code_file")
101 self.assertEqual(expected, str(context.exception))
102diff --git a/src/tests/unit/test_check_perccli.py b/src/tests/unit/test_check_perccli.py
103index 9220d6b..35b4297 100644
104--- a/src/tests/unit/test_check_perccli.py
105+++ b/src/tests/unit/test_check_perccli.py
106@@ -43,3 +43,16 @@ class TestCheckPercCli(unittest.TestCase):
107 with self.assertRaises(nagios_plugin3.CriticalError) as context:
108 check_perccli.parse_output()
109 self.assertEqual(expected, str(context.exception))
110+
111+ @mock.patch("sys.stdout", new_callable=io.StringIO)
112+ def test_check_cron_return_code(self, mock_print):
113+ file_content = "127"
114+ mock_open = mock.mock_open(read_data=file_content)
115+ expected = (
116+ "UNKNOWN: perccli cron job failed, return code 127. "
117+ "Last job output on {}".format(check_perccli.INPUT_FILE)
118+ )
119+ with mock.patch("builtins.open", mock_open):
120+ with self.assertRaises(nagios_plugin3.UnknownError) as context:
121+ check_perccli.check_cron_return_code("/path/to/return_code_file")
122+ self.assertEqual(expected, str(context.exception))

Subscribers

No one subscribed via source and target branches