Merge ~addyess/charm-openstack-service-checks:bugfix/lp1882822-keystone-client-errors-block-resolving-hooks into ~llama-charmers/charm-openstack-service-checks:master

Proposed by Adam Dyess on 2020-07-06
Status: Merged
Approved by: Chris Sanders on 2020-07-09
Approved revision: def538077257afefd3ef71eb03ba3ea974fc79eb
Merged at revision: def538077257afefd3ef71eb03ba3ea974fc79eb
Proposed branch: ~addyess/charm-openstack-service-checks:bugfix/lp1882822-keystone-client-errors-block-resolving-hooks
Merge into: ~llama-charmers/charm-openstack-service-checks:master
Diff against target: 409 lines (+196/-56)
4 files modified
lib/lib_openstack_service_checks.py (+49/-12)
reactive/openstack_service_checks.py (+17/-6)
tests/functional/test_deploy.py (+105/-38)
tests/unit/test_lib.py (+25/-0)
Reviewer Review Type Date Requested Status
Chris Sanders 2020-07-06 Approve on 2020-07-09
Giuseppe Petralia 2020-07-06 Approve on 2020-07-08
Zachary Zehring 2020-07-07 Pending
Review via email: mp+386897@code.launchpad.net
To post a comment you must log in.
Giuseppe Petralia (peppepetra86) wrote :

Not sure if i get the point of this MR. Maybe a functional test to cover its functionality will help with this.

What i have done is:
- i have deployed os-s-c as long as a small openstack bundle as the one created for existing func tests
- i have stopped keystone

I can't see the workload_status updated to the exception message if an update-status or a config-changed hooks execute.

review: Needs Fixing
Giuseppe Petralia (peppepetra86) wrote :

After some chat, I understood that this the workload status is updated only when a change on the relation with keystone is triggered.

I have reproduced with the following steps:
- stopped keystone
- triggered a config changed on the keystone service port
and the workload status of os-s-c has been updated to:

Keystone server error was encountered trying to list keystone resources. Check keystone server health. View juju logs for more info.

Adam Dyess (addyess) wrote :

> After some chat, I understood that this the workload status is updated only
> when a change on the relation with keystone is triggered.
>
> I have reproduced with the following steps:
> - stopped keystone
> - triggered a config changed on the keystone service port
> and the workload status of os-s-c has been updated to:
>
> Keystone server error was encountered trying to list keystone resources. Check
> keystone server health. View juju logs for more info.

I've added a functional tests which does what we chatted about:
  the test pauses keystone
  applies a change to keystone service-port
  verifies that the osc workload status message is updated
  then reverses the config change and pause of the service.

Giuseppe Petralia (peppepetra86) wrote :

lgtm.

Thanks.

review: Approve
Chris Sanders (chris.sanders) wrote :

looks good, thanks for adding test results

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lib_openstack_service_checks.py b/lib/lib_openstack_service_checks.py
2index 47a136b..64dda9e 100644
3--- a/lib/lib_openstack_service_checks.py
4+++ b/lib/lib_openstack_service_checks.py
5@@ -22,8 +22,34 @@ class OSCCredentialsError(Exception):
6 pass
7
8
9-class OSCEndpointError(OSCCredentialsError):
10- pass
11+class OSCKeystoneError(Exception):
12+ @property
13+ def workload_status(self):
14+ raise NotImplementedError
15+
16+
17+class OSCKeystoneServerError(OSCKeystoneError):
18+ @property
19+ def workload_status(self):
20+ return 'Keystone server error was encountered trying to list keystone '\
21+ 'resources. Check keystone server health. '\
22+ 'View juju logs for more info.'
23+
24+
25+class OSCKeystoneClientError(OSCKeystoneError):
26+ @property
27+ def workload_status(self):
28+ return 'Keystone client request error was encountered trying to '\
29+ 'keystone resources. Check keystone auth creds and url.'\
30+ 'View juju logs for more info.'
31+
32+
33+class OSCSslError(OSCKeystoneError):
34+ @property
35+ def workload_status(self):
36+ return 'SSL error was encountered when requesting Keystone for ' \
37+ 'resource list. Check trusted_ssl_ca config option. ' \
38+ 'View juju logs for more info.'
39
40
41 class OSCHelper():
42@@ -409,25 +435,36 @@ class OSCHelper():
43 self._keystone_client = client.Client(session=sess)
44
45 if self._keystone_client is None:
46- raise OSCEndpointError('Unable to list the endpoint errors, yet: '
47- 'could not connect to the Identity Service')
48+ raise OSCKeystoneServerError('Unable to list the endpoints yet: '
49+ 'could not connect to the Identity Service')
50
51 @property
52 def keystone_endpoints(self):
53- try:
54- endpoints = self._keystone_client.endpoints.list()
55- hookenv.log("Endpoints from keystone: {}".format(endpoints))
56- return endpoints
57- except keystoneauth1.exceptions.http.InternalServerError as error:
58- raise OSCEndpointError(
59- 'Unable to list the keystone endpoints, yet: {}'.format(error))
60+ endpoints = self._safe_keystone_client_list('endpoints')
61+ hookenv.log("Endpoints from keystone: {}".format(endpoints))
62+ return endpoints
63
64 @property
65 def keystone_services(self):
66- services = self._keystone_client.services.list()
67+ services = self._safe_keystone_client_list('services')
68 hookenv.log("Services from keystone: {}".format(services))
69 return services
70
71+ def _safe_keystone_client_list(self, object_type):
72+ list_command = getattr(self._keystone_client, object_type).list
73+ try:
74+ response = list_command()
75+ except (keystoneauth1.exceptions.http.InternalServerError,
76+ keystoneauth1.exceptions.connection.ConnectFailure) as server_error:
77+ raise OSCKeystoneServerError(
78+ 'Keystone server unable to list keystone {}: {}'.format(server_error, object_type))
79+ except keystoneauth1.exceptions.http.BadRequest as client_error:
80+ raise OSCKeystoneClientError(
81+ 'Keystone client error when listing {}: {}'.format(client_error, object_type))
82+ except keystoneauth1.exceptions.connection.SSLError as ssl_error:
83+ raise OSCSslError('Keystone ssl error when listing {}: {}'.format(ssl_error, object_type))
84+ return response
85+
86 @property
87 def _load_envvars(self, novarc='/var/lib/nagios/nagios.novarc'):
88 if not os.path.exists(novarc):
89diff --git a/reactive/openstack_service_checks.py b/reactive/openstack_service_checks.py
90index 95dfaf0..cce8b28 100644
91--- a/reactive/openstack_service_checks.py
92+++ b/reactive/openstack_service_checks.py
93@@ -24,7 +24,7 @@ from charms.reactive import any_flags_set, clear_flag, is_flag_set, set_flag, wh
94 from lib_openstack_service_checks import (
95 OSCHelper,
96 OSCCredentialsError,
97- OSCEndpointError
98+ OSCKeystoneError
99 )
100
101 CERT_FILE = '/usr/local/share/ca-certificates/openstack-service-checks.crt'
102@@ -176,8 +176,8 @@ def render_config():
103 try:
104 helper.render_checks(creds)
105 set_flag('openstack-service-checks.endpoints.configured')
106- except OSCEndpointError as error:
107- hookenv.log(error)
108+ except OSCKeystoneError as keystone_error:
109+ _set_keystone_error_workload_status(keystone_error)
110
111 if not helper.deploy_rally():
112 # Rally could not be installed (if enabled). No further actions taken
113@@ -207,8 +207,8 @@ def configure_nrpe_endpoints():
114 helper.create_endpoint_checks(creds)
115 set_flag('openstack-service-checks.endpoints.configured')
116 clear_flag('openstack-service-checks.started')
117- except OSCEndpointError as error:
118- hookenv.log(error)
119+ except OSCKeystoneError as keystone_error:
120+ _set_keystone_error_workload_status(keystone_error)
121
122
123 @when('identity-notifications.available.updated')
124@@ -221,10 +221,15 @@ def endpoints_changed():
125 def do_restart():
126 hookenv.log('Reloading nagios-nrpe-server')
127 host.service_restart('nagios-nrpe-server')
128- hookenv.status_set('active', 'Unit is ready')
129 set_flag('openstack-service-checks.started')
130
131
132+@when('openstack-service-checks.started')
133+@when('openstack-service-checks.endpoints.configured')
134+def set_active():
135+ hookenv.status_set('active', 'Unit is ready')
136+
137+
138 @when('nrpe-external-master.available')
139 def do_reconfigure_nrpe():
140 os_credentials_flag = 'config.changed.os-credentials'
141@@ -285,3 +290,9 @@ def parse_hooks():
142
143 # render configs again
144 do_reconfigure_nrpe()
145+
146+
147+def _set_keystone_error_workload_status(keystone_error):
148+ error_status_message = 'Failed to create endpoint checks due issue communicating with Keystone'
149+ hookenv.log('{}. Error:\n{}'.format(error_status_message, keystone_error), level=hookenv.ERROR)
150+ hookenv.status_set('blocked', keystone_error.workload_status)
151diff --git a/tests/functional/test_deploy.py b/tests/functional/test_deploy.py
152index 163f130..d5bb74f 100644
153--- a/tests/functional/test_deploy.py
154+++ b/tests/functional/test_deploy.py
155@@ -23,20 +23,60 @@ async def osc_apps(request, model):
156 return app
157
158
159-@pytest.fixture(scope='module', params=SERIES)
160-async def units(request, apps):
161- return apps.units
162+class ActionFailed(Exception):
163+ """Exception raised when action fails."""
164+
165+ def __init__(self, action):
166+ """Set information about action failure in message and raise."""
167+ params = {key: getattr(action, key, "<not-set>")
168+ for key in ['name', 'parameters', 'receiver',
169+ 'message', 'id', 'status',
170+ 'enqueued', 'started', 'completed']}
171+ message = ('Run of action "{name}" with parameters "{parameters}" on '
172+ '"{receiver}" failed with "{message}" (id={id} '
173+ 'status={status} enqueued={enqueued} started={started} '
174+ 'completed={completed})'
175+ .format(**params))
176+ super(ActionFailed, self).__init__(message)
177+
178+
179+class Agent:
180+ def __init__(self, unit):
181+ self.unit = unit
182+
183+ async def _act(self, action, **kwargs):
184+ action_obj = await self.unit.run_action(action, **kwargs)
185+ await action_obj.wait()
186+ if action_obj.status != 'completed':
187+ raise ActionFailed(action_obj)
188+
189+ def status(self, status):
190+ return (self.unit.workload_status == status and
191+ self.unit.agent_status == 'idle')
192+
193+ async def pause(self):
194+ await self._act('pause')
195+
196+ async def resume(self):
197+ await self._act('resume')
198+
199+ async def block_until(self, lambda_f, timeout=600, wait_period=1):
200+ await self.unit.model.block_until(
201+ lambda_f, timeout=timeout, wait_period=wait_period
202+ )
203
204
205 def app_names(series=None):
206- apps = dict([(app, app)
207- for app in ('keystone neutron-api nova-cloud-controller percona-cluster'
208- ' rabbitmq-server nagios ceph-radosgw'.split())])
209+ apps = {
210+ app: app for app in
211+ ['keystone', 'neutron-api', 'nova-cloud-controller',
212+ 'percona-cluster', 'rabbitmq-server', 'nagios', 'ceph-radosgw']
213+ }
214 if not series:
215 return apps
216
217- apps.update(dict([(app, '{}-{}'.format(app, series))
218- for app in 'openstack-service-checks nrpe'.split()]))
219+ for app in ['openstack-service-checks', 'nrpe']:
220+ apps[app] = '{}-{}'.format(app, series)
221 return apps
222
223
224@@ -80,7 +120,9 @@ async def deploy_openstack(model):
225
226
227 @pytest.fixture(scope='module', params=SERIES)
228-async def deploy_app(request, model):
229+async def deploy_app(request, deploy_openstack, model):
230+ await model.block_until(lambda: all([app.status == 'active' for app in deploy_openstack]),
231+ timeout=1200)
232 series = request.param
233 apps = app_names(series)
234
235@@ -107,8 +149,13 @@ async def deploy_app(request, model):
236 yield osc_app
237
238
239-# Tests
240+def unit_from(model, name):
241+ unit = [unit for unit in model.units.values() if unit.entity_id.startswith(name)]
242+ assert len(unit) == 1
243+ return unit[0]
244+
245
246+# Tests
247 async def test_openstackservicechecks_deploy_openstack(deploy_openstack, model):
248 await model.block_until(lambda: all([app.status == 'active' for app in deploy_openstack]),
249 timeout=1200)
250@@ -119,11 +166,7 @@ async def test_openstackservicechecks_deploy(deploy_app, model):
251
252
253 async def test_openstackservicechecks_verify_default_nrpe_checks(deploy_app, model, file_stat):
254- unit = [unit for unit in model.units.values() if unit.entity_id.startswith(deploy_app.name)]
255- if len(unit) != 1:
256- assert False
257-
258- unit = unit[0]
259+ unit = unit_from(model, deploy_app.name)
260 endpoint_checks_config = ['check_{endpoint}_urls'.format(endpoint=endpoint)
261 for endpoint in 'admin internal public'.split()]
262 await deploy_app.reset_config(endpoint_checks_config)
263@@ -145,9 +188,7 @@ async def test_openstackservicechecks_verify_default_nrpe_checks(deploy_app, mod
264
265
266 async def test_openstackservicechecks_update_endpoint(deploy_app, model, file_stat):
267- unit = [unit for unit in model.units.values() if unit.entity_id.startswith(deploy_app.name)]
268- assert len(unit) == 1
269- unit = unit[0]
270+ unit = unit_from(model, deploy_app.name)
271 keystone = model.applications['keystone']
272 assert len(keystone.units) == 1
273 kst_unit = keystone.units[0]
274@@ -179,11 +220,7 @@ async def test_openstackservicechecks_update_endpoint(deploy_app, model, file_st
275
276
277 async def test_openstackservicechecks_remove_endpoint_checks(deploy_app, model, file_stat):
278- unit = [unit for unit in model.units.values() if unit.entity_id.startswith(deploy_app.name)]
279- if len(unit) != 1:
280- assert False
281-
282- unit = unit[0]
283+ unit = unit_from(model, deploy_app.name)
284 endpoint_checks_config = {'check_{endpoint}_urls'.format(endpoint=endpoint): 'false'
285 for endpoint in 'admin internal public'.split()}
286 await deploy_app.set_config(endpoint_checks_config)
287@@ -208,11 +245,7 @@ async def test_openstackservicechecks_remove_endpoint_checks(deploy_app, model,
288
289
290 async def test_openstackservicechecks_enable_rally(deploy_app, model, file_stat):
291- unit = [unit for unit in model.units.values() if unit.entity_id.startswith(deploy_app.name)]
292- if len(unit) != 1:
293- assert False
294-
295- unit = unit[0]
296+ unit = unit_from(model, deploy_app.name)
297 filenames = ['/etc/cron.d/osc_rally', '/etc/nagios/nrpe.d/check_rally.cfg']
298
299 # disable rally nrpe check if it was enabled (ie. from a previous run of functests)
300@@ -241,11 +274,7 @@ async def test_openstackservicechecks_enable_rally(deploy_app, model, file_stat)
301
302
303 async def test_openstackservicechecks_enable_contrail_analytics_vip(deploy_app, model, file_stat):
304- unit = [unit for unit in model.units.values() if unit.entity_id.startswith(deploy_app.name)]
305- if len(unit) != 1:
306- assert False
307-
308- unit = unit[0]
309+ unit = unit_from(model, deploy_app.name)
310 filename = '/etc/nagios/nrpe.d/check_contrail_analytics_alarms.cfg'
311
312 # disable contrail nrpe check if it was enabled (ie. from a previous run of functests)
313@@ -272,11 +301,7 @@ async def test_openstackservicechecks_enable_contrail_analytics_vip(deploy_app,
314
315
316 async def test_openstackservicechecks_disable_check_neutron_agents(deploy_app, model, file_stat):
317- unit = [unit for unit in model.units.values() if unit.entity_id.startswith(deploy_app.name)]
318- if len(unit) != 1:
319- assert False
320-
321- unit = unit[0]
322+ unit = unit_from(model, deploy_app.name)
323 filename = '/etc/nagios/nrpe.d/check_neutron_agents.cfg'
324
325 # disable neutron_agents nrpe check if it was enabled (ie. from a previous run of functests)
326@@ -300,3 +325,45 @@ async def test_openstackservicechecks_disable_check_neutron_agents(deploy_app, m
327 # Check AFTER enabling neutron_agents check
328 test_stat = await file_stat(filename, unit)
329 assert test_stat['size'] > 0
330+
331+
332+@pytest.fixture(scope='module')
333+async def paused_keystone(deploy_app, deploy_openstack, model):
334+ await model.block_until(lambda: all([app.status == 'active' for app in deploy_openstack]),
335+ timeout=1200)
336+ keystone = model.applications['keystone']
337+ agent = Agent(unit_from(model, 'keystone'))
338+
339+ # get default port
340+ kst_cfg = await keystone.get_config()
341+ default_port = kst_cfg['service-port'].get('value') or kst_cfg['service-port'].get('default')
342+ new_svc_port = int(default_port) + 1
343+
344+ # adjust keystone config service-port
345+ await keystone.set_config({'service-port': str(new_svc_port)})
346+ await agent.block_until(lambda: agent.status('active'))
347+
348+ # pause keystone
349+ await agent.pause()
350+ await agent.block_until(lambda: agent.status('maintenance'))
351+
352+ yield keystone
353+
354+ # resume keystone
355+ await agent.resume()
356+ await agent.block_until(lambda: agent.status('active'))
357+
358+ # restore service-port
359+ await keystone.set_config({'service-port': str(default_port)})
360+ await agent.block_until(lambda: agent.status('active'))
361+
362+
363+@pytest.mark.usefixtures("paused_keystone")
364+async def test_openstackservicechecks_invalid_keystone_workload_status(model, deploy_app):
365+ agent = Agent(unit_from(model, deploy_app.name))
366+
367+ # Wait for osc app to block with expected workload-status
368+ await agent.block_until(lambda: agent.status('blocked'))
369+ assert agent.unit.workload_status_message == \
370+ 'Keystone server error was encountered trying to list keystone ' \
371+ 'resources. Check keystone server health. View juju logs for more info.'
372diff --git a/tests/unit/test_lib.py b/tests/unit/test_lib.py
373index effd5ea..5d4767a 100644
374--- a/tests/unit/test_lib.py
375+++ b/tests/unit/test_lib.py
376@@ -1,4 +1,9 @@
377+from unittest.mock import MagicMock
378+
379 import pytest
380+import keystoneauth1
381+
382+from lib_openstack_service_checks import OSCKeystoneServerError, OSCKeystoneClientError, OSCSslError
383
384
385 def test_openstackservicechecks_common_properties(openstackservicechecks):
386@@ -61,3 +66,23 @@ def test_get_rally_checks_context(skip_rally, result, openstackservicechecks):
387 expected = {comp: result[num]
388 for num, comp in enumerate('cinder glance nova neutron'.split())}
389 assert openstackservicechecks._get_rally_checks_context() == expected
390+
391+
392+@pytest.mark.parametrize('keystone_auth_exception,expected_raised_exception', [
393+ (keystoneauth1.exceptions.http.InternalServerError, OSCKeystoneServerError),
394+ (keystoneauth1.exceptions.connection.ConnectFailure, OSCKeystoneServerError),
395+ (keystoneauth1.exceptions.http.BadRequest, OSCKeystoneClientError),
396+ (keystoneauth1.exceptions.connection.SSLError, OSCSslError),
397+])
398+@pytest.mark.parametrize('source', ["endpoints", "services"])
399+def test_keystone_client_exceptions(
400+ keystone_auth_exception, expected_raised_exception,
401+ openstackservicechecks, source):
402+ mock_keystone_client = MagicMock()
403+ getattr(mock_keystone_client, source).list.side_effect = keystone_auth_exception
404+ openstackservicechecks._keystone_client = mock_keystone_client
405+ with pytest.raises(expected_raised_exception):
406+ if source == 'endpoints':
407+ openstackservicechecks.keystone_endpoints
408+ else:
409+ openstackservicechecks.keystone_services

Subscribers

People subscribed via source and target branches