Merge ~mertkirpici/charm-nrpe:topic/cpu_governor_check into charm-nrpe:master

Proposed by Mert Kirpici
Status: Merged
Approved by: Eric Chen
Approved revision: b0502704eb60b760b83d5c1bff87ec70ef9b0cb0
Merged at revision: 07830068f89c297e6b60b65a532a9f98eff1dce9
Proposed branch: ~mertkirpici/charm-nrpe:topic/cpu_governor_check
Merge into: charm-nrpe:master
Diff against target: 240 lines (+185/-10)
3 files modified
config.yaml (+9/-3)
hooks/nrpe_helpers.py (+24/-7)
tests/unit/test_nrpe_helpers.py (+152/-0)
Reviewer Review Type Date Requested Status
Facundo Ciccioli Approve
Robert Gildein Approve
Eric Chen Approve
BootStack Reviewers Pending
Review via email: mp+427905@code.launchpad.net

Commit message

Set default cpu_governor values for specific principle charms

Description of the change

The old behavior was to get this default value through the
nrpe-external-master relation. This new approach is introducing
inconsistent behavior for the default value however, this is saving us
from updating every single principle charm code that this is relevant.

To post a comment you must log in.
Revision history for this message
🤖 Canonical IS Merge Bot (canonical-is-mergebot) wrote :

This merge proposal is being monitored by mergebot. Change the status to Approved to merge.

Revision history for this message
Eric Chen (eric-chen) :
review: Approve
Revision history for this message
Robert Gildein (rgildein) wrote :

Can we add at least unit test for these changes?

Revision history for this message
Robert Gildein (rgildein) wrote :

Also commit title should not be longer than 50 characters.

Revision history for this message
Mert Kirpici (mertkirpici) wrote :

Fixed the commit message title and added unit tests in the second commit

Revision history for this message
Mert Kirpici (mertkirpici) wrote :

Current state of the unit tests:
https://pastebin.ubuntu.com/p/24rb7rWPF3/

Lint:
https://pastebin.ubuntu.com/p/D7zJwKPdnH/

Failing unit test was introduced by a recent change, not relevant to this change.
Failing lint rule has been there for some time..

Revision history for this message
Robert Gildein (rgildein) wrote :

Thanks Mert. LGTM

review: Approve
Revision history for this message
Eric Chen (eric-chen) wrote :

Mert, Can you rebase this PR can test the unit test & lint rule again to make sure it is pass? Thanks!

Revision history for this message
Mert Kirpici (mertkirpici) wrote :
Revision history for this message
🤖 Canonical IS Merge Bot (canonical-is-mergebot) wrote :

Change successfully merged at revision 07830068f89c297e6b60b65a532a9f98eff1dce9

Revision history for this message
Facundo Ciccioli (fandanbango) wrote :

My comments are non-blocking. This looks good to me, great job :)

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/config.yaml b/config.yaml
2index ba882f5..b81e3f3 100644
3--- a/config.yaml
4+++ b/config.yaml
5@@ -254,9 +254,15 @@ options:
6 description: |
7 CPU governor check. The string value here will be checked against all CPUs in
8 /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor. The supported values are
9- 'ondemand', 'performance', 'powersave'. Unset value means the check will be disabled.
10- There is a relation key called requested_cpu_governor='string', but the charm config value
11- will take precedence over the relation data.
12+ 'ondemand', 'performance', 'powersave'. Although this value is initially unset for
13+ most principal charms, for the following principal charms a default check for
14+ 'performance' will be deployed to aid the operator since for most use cases these
15+ applications will require all the available cpu power they need. If this value is set,
16+ the set value will take precedence over the default.
17+ - nova-compute
18+ - rabbitmq-server
19+ - kubernetes-worker
20+ - percona-cluster
21 cis_audit_enabled:
22 default: False
23 type: boolean
24diff --git a/hooks/nrpe_helpers.py b/hooks/nrpe_helpers.py
25index b980912..3aa9238 100644
26--- a/hooks/nrpe_helpers.py
27+++ b/hooks/nrpe_helpers.py
28@@ -637,16 +637,33 @@ class SubordinateCheckDefinitions(dict):
29 checks.append(netlink_check)
30
31 # Checking if CPU governor is supported by the system and add nrpe check
32- cpu_governor_paths = "/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor"
33- cpu_governor_supported = glob.glob(cpu_governor_paths)
34- requested_cpu_governor = hookenv.relation_get("requested_cpu_governor")
35- cpu_governor_config = hookenv.config("cpu_governor")
36- wanted_cpu_governor = cpu_governor_config or requested_cpu_governor
37- if wanted_cpu_governor and cpu_governor_supported:
38+ cpu_governor_supported = glob.glob(
39+ "/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor"
40+ )
41+ cpu_governor_setting = hookenv.config("cpu_governor")
42+ if not cpu_governor_setting:
43+ principal_unit = hookenv.principal_unit()
44+ principal_charm_name = hookenv._metadata_unit(principal_unit).get("name")
45+ if principal_charm_name in [
46+ "nova-compute",
47+ "kubernetes-worker",
48+ "rabbitmq-server",
49+ "percona-cluster",
50+ ]:
51+ hookenv.log(
52+ "Setting default cpu freq scaling governor to 'performance' \
53+ for unit:[{}] with charm name:[{}]".format(
54+ principal_unit, principal_charm_name
55+ ),
56+ level=hookenv.DEBUG,
57+ )
58+ cpu_governor_setting = "performance"
59+
60+ if cpu_governor_setting and cpu_governor_supported:
61 description = "Check CPU governor scaler"
62 cmd_name = "check_cpu_governor"
63 cmd_exec = local_plugin_dir + "check_cpu_governor.py"
64- cmd_params = "--governor {}".format(wanted_cpu_governor)
65+ cmd_params = "--governor {}".format(cpu_governor_setting)
66 cpu_governor_check = {
67 "description": description,
68 "cmd_name": cmd_name,
69diff --git a/tests/unit/test_nrpe_helpers.py b/tests/unit/test_nrpe_helpers.py
70index fa5a54f..d9ea79a 100644
71--- a/tests/unit/test_nrpe_helpers.py
72+++ b/tests/unit/test_nrpe_helpers.py
73@@ -1,4 +1,5 @@
74 """Unit tests for hooks/nrpe_helpers.py module."""
75+import os
76 import unittest
77 from unittest import mock
78
79@@ -7,6 +8,8 @@ import netifaces
80 import nrpe_helpers
81 from nrpe_helpers import match_cidr_to_ifaces
82
83+import yaml
84+
85
86 class TestMatchCidrToIfaces(unittest.TestCase):
87 """Test match_cidr_to_ifaces helper function."""
88@@ -289,3 +292,152 @@ class TestDiskSpaceCheck(unittest.TestCase):
89 self.assertEqual("/" in result, True)
90 self.assertEqual("/srv/instances" in result, True)
91 self.assertEqual("/srv/jammy" in result, True)
92+
93+
94+def load_default_config():
95+ """Load the default config values from the charm config.yaml.
96+
97+ Returns a dict of {config:default}
98+ """
99+ with open(os.path.join(os.getcwd(), "config.yaml"), "r") as f:
100+ config_raw = yaml.safe_load(f)
101+ return {key: value["default"] for key, value in config_raw["options"].items()}
102+
103+
104+class TestSubordinateCheckDefinitions(unittest.TestCase):
105+ """Test SubordinateCheckDefinitions() related code."""
106+
107+ def glob_valid_cpufreq_path(self, arg):
108+ """Return a valid list of cpufreq sysfs paths.
109+
110+ This function is meant to be used as a side_effect when
111+ mocking glob.glob(), to provide a more controlled unit
112+ testing environment.
113+ """
114+ ret = []
115+ if arg == "/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor":
116+ ret = ["/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor"]
117+ return ret
118+
119+ @mock.patch("nrpe_helpers.glob.glob")
120+ @mock.patch("nrpe_helpers.hookenv._metadata_unit")
121+ @mock.patch("nrpe_helpers.hookenv.principal_unit")
122+ @mock.patch("nrpe_helpers.hookenv.config")
123+ def test_cpu_governor_default_enabled_for_principle_charms(
124+ self, mock_config, mock_principal_unit, mock__metadata_unit, mock_glob
125+ ):
126+ """Test cpu_governor check is enabled by default for principle charms.
127+
128+ A cpu scaling governor value of 'performance' is expected
129+ by default for the charms nova-compute, kubernetes-worker,
130+ rabbitmq-server, percona-cluster even if the charm config
131+ is unset
132+ """
133+ config = load_default_config()
134+ mock_config.side_effect = lambda key: config[key]
135+ mock_glob.side_effect = self.glob_valid_cpufreq_path
136+
137+ principal_charms = [
138+ "nova-compute",
139+ "rabbitmq-server",
140+ "kubernetes-worker",
141+ "percona-cluster",
142+ ]
143+ for charm in principal_charms:
144+ mock_principal_unit.return_value = charm + "/0"
145+ mock__metadata_unit.return_value = {"name": charm}
146+
147+ check = {
148+ "description": "Check CPU governor scaler (sub)",
149+ "cmd_name": "check_cpu_governor",
150+ "cmd_exec": "/usr/local/lib/nagios/plugins/check_cpu_governor.py",
151+ "cmd_params": "--governor performance",
152+ "matching_files": [],
153+ }
154+ checks = nrpe_helpers.SubordinateCheckDefinitions()["checks"]
155+ self.assertIn(check, checks)
156+
157+ mock_principal_unit.reset_mock(return_value=True)
158+ mock__metadata_unit.reset_mock(return_value=True)
159+
160+ @mock.patch("nrpe_helpers.glob.glob")
161+ @mock.patch("nrpe_helpers.hookenv._metadata_unit")
162+ @mock.patch("nrpe_helpers.hookenv.principal_unit")
163+ @mock.patch("nrpe_helpers.hookenv.config")
164+ def test_cpu_governor_disabled_default_for_non_principle_charms(
165+ self, mock_config, mock_principal_unit, mock__metadata_unit, mock_glob
166+ ):
167+ """Test cpu_governor check is disabled by default for non principle charms.
168+
169+ A cpu scaling governor value check should be disabled by default
170+ by most charms.
171+ """
172+ config = load_default_config()
173+ mock_config.side_effect = lambda key: config[key]
174+ mock_glob.side_effect = self.glob_valid_cpufreq_path
175+ mock_principal_unit.return_value = "mongodb/0"
176+ mock__metadata_unit.return_value = {"name": "mongodb"}
177+
178+ checks = nrpe_helpers.SubordinateCheckDefinitions()["checks"]
179+ check_in_list = False
180+ for check in checks:
181+ if check["cmd_name"].startswith("check_cpu_governor"):
182+ check_in_list = True
183+ break
184+ self.assertFalse(check_in_list)
185+
186+ @mock.patch("nrpe_helpers.glob.glob")
187+ @mock.patch("nrpe_helpers.hookenv._metadata_unit")
188+ @mock.patch("nrpe_helpers.hookenv.principal_unit")
189+ @mock.patch("nrpe_helpers.hookenv.config")
190+ def test_cpu_governor_config_overrides_default(
191+ self, mock_config, mock_principal_unit, mock__metadata_unit, mock_glob
192+ ):
193+ """Test cpu_governor default value overriden by config.
194+
195+ The value coming from the configuration should take precedence
196+ in case it is set for principle charms.
197+ """
198+ config = load_default_config()
199+ config["cpu_governor"] = "ondemand"
200+ mock_config.side_effect = lambda key: config[key]
201+ mock_glob.side_effect = self.glob_valid_cpufreq_path
202+ mock_principal_unit.return_value = "nova-compute/0"
203+ mock__metadata_unit.return_value = {"name": "nova-compute"}
204+
205+ check = {
206+ "description": "Check CPU governor scaler (sub)",
207+ "cmd_name": "check_cpu_governor",
208+ "cmd_exec": "/usr/local/lib/nagios/plugins/check_cpu_governor.py",
209+ "cmd_params": "--governor ondemand",
210+ "matching_files": [],
211+ }
212+ checks = nrpe_helpers.SubordinateCheckDefinitions()["checks"]
213+ self.assertIn(check, checks)
214+
215+ @mock.patch("nrpe_helpers.glob.glob")
216+ @mock.patch("nrpe_helpers.hookenv._metadata_unit")
217+ @mock.patch("nrpe_helpers.hookenv.principal_unit")
218+ @mock.patch("nrpe_helpers.hookenv.config")
219+ def test_cpu_governor_check_disabled_in_vm(
220+ self, mock_config, mock_principal_unit, mock__metadata_unit, mock_glob
221+ ):
222+ """Test cpu_governor check is disabled for virtual environments.
223+
224+ When running in VM, this check should be disabled even if it is
225+ set through the config.
226+ """
227+ config = load_default_config()
228+ config["cpu_governor"] = "performance"
229+ mock_config.side_effect = lambda key: config[key]
230+ mock_glob.side_effect = lambda arg: [] # cpufreq paths no exist in VM
231+ mock_principal_unit.return_value = "nova-compute/0"
232+ mock__metadata_unit.return_value = {"name": "nova-compute"}
233+
234+ check_in_list = False
235+ checks = nrpe_helpers.SubordinateCheckDefinitions()["checks"]
236+ for check in checks:
237+ if check["cmd_name"].startswith("check_cpu_governor"):
238+ check_in_list = True
239+ break
240+ self.assertFalse(check_in_list)

Subscribers

People subscribed via source and target branches

to all changes: