Merge lp:~jjo/charms/trusty/neutron-openvswitch/add-nrpe-checks-lp1530227 into lp:~openstack-charmers-archive/charms/trusty/neutron-openvswitch/next
- Trusty Tahr (14.04)
- add-nrpe-checks-lp1530227
- Merge into next
Proposed by
JuanJo Ciarlante
Status: | Work in progress |
---|---|
Proposed branch: | lp:~jjo/charms/trusty/neutron-openvswitch/add-nrpe-checks-lp1530227 |
Merge into: | lp:~openstack-charmers-archive/charms/trusty/neutron-openvswitch/next |
Diff against target: |
998 lines (+914/-0) 7 files modified
charm-helpers-hooks.yaml (+1/-0) config.yaml (+6/-0) files/nrpe-external-master/neutron-check-tun_ids.py (+219/-0) hooks/charmhelpers/contrib/charmsupport/__init__.py (+15/-0) hooks/charmhelpers/contrib/charmsupport/nrpe.py (+458/-0) hooks/charmhelpers/contrib/charmsupport/volumes.py (+175/-0) hooks/neutron_ovs_hooks.py (+40/-0) |
To merge this branch: | bzr merge lp:~jjo/charms/trusty/neutron-openvswitch/add-nrpe-checks-lp1530227 |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
OpenStack Charmers | Pending | ||
Review via email: mp+281931@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Unmerged revisions
- 99. By JuanJo Ciarlante
-
[jjo] WIP: add NRPE support via NRPESet passed at relation time to nova-compute principal lp#1530227
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'charm-helpers-hooks.yaml' |
2 | --- charm-helpers-hooks.yaml 2015-09-28 09:47:21 +0000 |
3 | +++ charm-helpers-hooks.yaml 2016-01-07 21:40:19 +0000 |
4 | @@ -11,3 +11,4 @@ |
5 | - payload.execd |
6 | - contrib.network.ip |
7 | - contrib.python.packages |
8 | + - contrib.charmsupport |
9 | |
10 | === modified file 'config.yaml' |
11 | --- config.yaml 2015-09-15 07:47:30 +0000 |
12 | +++ config.yaml 2016-01-07 21:40:19 +0000 |
13 | @@ -106,3 +106,9 @@ |
14 | which do not include a neutron-gateway (do not require l3, lbaas or vpnaas |
15 | services) and should only be used in-conjunction with flat or VLAN provider |
16 | networks configurations. |
17 | + enable-nrpe-checks: |
18 | + type: boolean |
19 | + default: true |
20 | + description: | |
21 | + Provide nrpe data to main charm (ie nova-compute) using 'neutron-plugin' |
22 | + relation via the 'nrpe-checks' key |
23 | |
24 | === added directory 'files' |
25 | === added directory 'files/nrpe-external-master' |
26 | === added file 'files/nrpe-external-master/neutron-check-tun_ids.py' |
27 | --- files/nrpe-external-master/neutron-check-tun_ids.py 1970-01-01 00:00:00 +0000 |
28 | +++ files/nrpe-external-master/neutron-check-tun_ids.py 2016-01-07 21:40:19 +0000 |
29 | @@ -0,0 +1,219 @@ |
30 | +#!/usr/bin/python |
31 | +# vim: si et sw=4 ts=4 |
32 | +# |
33 | +# Author: JuanJo Ciarlante <jjo@canonical.com> |
34 | +# Copyright (C) 2015 Canonical |
35 | +# License: GPLv3 |
36 | +""" |
37 | +Verify that all local OVS tun flows match the ones expected from locally |
38 | +running nova instances |
39 | + |
40 | +Example usage: |
41 | +{0} |
42 | +{0} -i br-tun # peek at other OVS interface |
43 | +{0} --conf-file=/etc/nova/nova.conf # config file to peek creds |
44 | + |
45 | +""" |
46 | +import sys |
47 | +import os |
48 | +import re |
49 | +import logging |
50 | +import argparse |
51 | +import ConfigParser |
52 | +import socket |
53 | +import subprocess |
54 | + |
55 | +from neutronclient.v2_0 import client as neutron_client |
56 | +from novaclient import client as nova_client |
57 | + |
58 | + |
59 | +(STATUS_OK, STATUS_WARN, STATUS_CRIT, STATUS_UNKNOWN) = range(0, 4) |
60 | + |
61 | + |
62 | +def get_creds(args): |
63 | + """ return creds dictionary from conf-file (/etc/nova/nova.conf), |
64 | + overridden by OS_ environment vars """ |
65 | + config = ConfigParser.RawConfigParser() |
66 | + config.read(args.conf_file) |
67 | + config_creds_section_prefix = { |
68 | + 'DEFAULT': 'neutron_admin_', |
69 | + 'neutron': 'admin_', |
70 | + } |
71 | + |
72 | + creds = {} |
73 | + for key in ('auth_url', 'username', 'password', 'tenant_name'): |
74 | + env_key = "OS_{}".format(key.upper()) |
75 | + value = os.environ.get(env_key) |
76 | + # If no creds from environment, try known possible config_creds_keys |
77 | + if value: |
78 | + logging.info("get_creds: found {} as env['{}']={}".format( |
79 | + key, env_key, value if key != 'password' else '...')) |
80 | + if not value: |
81 | + for section, prefix in config_creds_section_prefix.iteritems(): |
82 | + try: |
83 | + value = config.get(section, prefix + key) |
84 | + logging.info("get_creds: found {}.{}={}".format( |
85 | + section, prefix + key, |
86 | + value if key != 'password' else '...')) |
87 | + break |
88 | + except ConfigParser.NoOptionError: |
89 | + pass |
90 | + except ConfigParser.NoSectionError: |
91 | + pass |
92 | + if value: |
93 | + creds[key] = value |
94 | + else: |
95 | + raise KeyError("Couldn't find config value for '{}'".format(key)) |
96 | + |
97 | + logging.debug("creds: username={username} tenant_name={tenant_name} " |
98 | + "auth_url={auth_url} password=...".format(**creds)) |
99 | + |
100 | + return creds |
101 | + |
102 | + |
103 | +def nova_list_instances(nova_cli, host): |
104 | + "return instances ids running at host" |
105 | + logging.info('getting all instances running at host="{}" ...'.format( |
106 | + socket.gethostname())) |
107 | + search_opts = {'all_tenants': 1, 'host': host} |
108 | + instances = [server.id for server in |
109 | + nova_cli.servers.list(search_opts=search_opts) |
110 | + if server.status == 'ACTIVE'] |
111 | + logging.info('instances count={}'.format(len(instances))) |
112 | + logging.debug('instances: {}'.format(instances)) |
113 | + return instances |
114 | + |
115 | + |
116 | +def instances_port_nets(neutron_cli, instances): |
117 | + "return instances ports attached network ids" |
118 | + logging.info('getting all instances networks ...') |
119 | + instances_nets = set() |
120 | + for instance in instances: |
121 | + for ports in neutron_cli.list_ports( |
122 | + device_id=instance, fields=['id', 'network_id']).values(): |
123 | + for port in ports: |
124 | + instances_nets.add(port['network_id']) |
125 | + logging.info('instances networks count={}'.format(len(instances_nets))) |
126 | + logging.debug('instances networks: {}'.format(instances_nets)) |
127 | + return instances_nets |
128 | + |
129 | + |
130 | +def neutron_networks_by_id(neutron_cli): |
131 | + "return all neutron networks, keyed by id" |
132 | + logging.info('getting all neutron networks ...') |
133 | + all_nets = neutron_cli.list_networks().get('networks') |
134 | + logging.info('neutron networks count={}'.format(len(all_nets))) |
135 | + logging.debug('neutron networks: {}'.format(all_nets)) |
136 | + networks_by_id = {net['id']: net for net in all_nets} |
137 | + return networks_by_id |
138 | + |
139 | + |
140 | +def get_instances_tun_ids(instances_nets, all_nets_by_id): |
141 | + """return tun_ids from for passed instances networks |
142 | + by looking up all_nets_by_id info""" |
143 | + logging.info('getting network segmentation_id info for all instances...') |
144 | + SEG_ID = 'provider:segmentation_id' |
145 | + NET_TYPE = 'provider:network_type' |
146 | + instances_tun_ids = {all_nets_by_id[net_id].get(SEG_ID) |
147 | + for net_id in instances_nets |
148 | + if all_nets_by_id[net_id].get(NET_TYPE) in |
149 | + ('gre', 'vxlan')} |
150 | + logging.info('instances_tun_ids: {}'.format(instances_tun_ids)) |
151 | + return instances_tun_ids |
152 | + |
153 | + |
154 | +def get_ovs_tun_ids(interface): |
155 | + """get local tun_ids from ovs-ofctl output, ala: |
156 | + ovs-ofctl dump-flows br-tun |egrep -o 'tun_id=\w+' """ |
157 | + logging.info('local tun_ids: running: ovs-ofctl dump-flows {}'.format( |
158 | + interface)) |
159 | + ovs_dump = subprocess.Popen(["ovs-ofctl", "dump-flows", interface], |
160 | + stdin=None, |
161 | + stdout=subprocess.PIPE, |
162 | + stderr=subprocess.PIPE) |
163 | + ovs_tun_ids = set() |
164 | + # match lines with: ... tun_id=0x<TUN_ID> ... |
165 | + for line in ovs_dump.stdout: |
166 | + match = re.search("tun_id=(?P<tun_id>0x\w+)", line) |
167 | + if match: |
168 | + ovs_tun_ids.add(int(match.group(1), 16)) |
169 | + logging.info('ovs_tun_ids: {}'.format(ovs_tun_ids)) |
170 | + return ovs_tun_ids |
171 | + |
172 | + |
173 | +def nrpe_check_tun_ids(expected_tun_ids, local_tun_ids, all_nets_by_id): |
174 | + # order is important: substract local_tun_ids from expected_tun_ids, |
175 | + # result should be empty |
176 | + tun_ids_diff = expected_tun_ids.difference(local_tun_ids) |
177 | + rc = STATUS_OK |
178 | + msg = [] |
179 | + if tun_ids_diff: |
180 | + tun_ids_str = ' '.join(['tun_id=0x{0:x}'.format(x) |
181 | + for x in tun_ids_diff]) |
182 | + msg.append('CRITICAL: host={} missing local tun_ids: {}'.format( |
183 | + socket.gethostname(), tun_ids_str)) |
184 | + # helper dict by tun_id |
185 | + net_by_tun_id = {net_val.get('provider:segmentation_id'): net_val |
186 | + for net_id, net_val in all_nets_by_id.iteritems()} |
187 | + for tun_id in tun_ids_diff: |
188 | + net = net_by_tun_id.get(tun_id, {}) |
189 | + msg.append('CRITICAL: tun_id=0x{0:x} network.id={id} ' |
190 | + 'network.name="{name}"'.format(tun_id, **net)) |
191 | + logging.info('exp_tun_ids: {}'.format(sorted(expected_tun_ids))) |
192 | + logging.info('loc_tun_ids: {}'.format(sorted(local_tun_ids))) |
193 | + rc = STATUS_CRIT |
194 | + else: |
195 | + msg.append('OK: host={} all needed tun_ids present: {}' |
196 | + ''.format(socket.gethostname(), list(local_tun_ids))) |
197 | + return (rc, msg) |
198 | + |
199 | + |
200 | +def parse_args(): |
201 | + parser = argparse.ArgumentParser( |
202 | + description=__doc__.format(*sys.argv), |
203 | + formatter_class=argparse.RawDescriptionHelpFormatter) |
204 | + parser.add_argument('--conf-file', default='/etc/nova/nova.conf', |
205 | + help='config file to peek creds from') |
206 | + parser.add_argument('-i', '--interface', default='br-tun', |
207 | + help='OVS iface where to find tun_ids, as: ' |
208 | + 'ovs-ofctl dump-flows <interface>') |
209 | + parser.add_argument('--test', default=False, action='store_true', |
210 | + help='simulate missing local tun_ids, force CRITICAL') |
211 | + parser.add_argument('--verbose', default=False, action='store_true') |
212 | + parser.add_argument('--debug', default=False, action='store_true') |
213 | + return parser.parse_args() |
214 | + |
215 | + |
216 | +if __name__ == '__main__': |
217 | + args = parse_args() |
218 | + if args.verbose: |
219 | + logging.basicConfig(level=logging.INFO) |
220 | + if args.debug: |
221 | + logging.basicConfig(level=logging.DEBUG) |
222 | + |
223 | + # initialize needed clients |
224 | + creds = get_creds(args) |
225 | + logging.info("initializing nova_client") |
226 | + nova_cli = nova_client.Client(1.1, creds['username'], creds['password'], |
227 | + creds['tenant_name'], creds['auth_url']) |
228 | + logging.info("initializing neutron_client") |
229 | + neutron_cli = neutron_client.Client(**creds) |
230 | + |
231 | + # instances: local instances id-s (ie running at this host) |
232 | + # instances_nets: local instances' networks id-s |
233 | + # all_nets_by_id: all neutron networks, keyed by id |
234 | + # exp_tun_ids: local instances' networks' segmentation_id-s |
235 | + # loc_tun_ids: locally present tun_ids from ovs-ofctl dump-flows br-tun |
236 | + instances = nova_list_instances(nova_cli, socket.gethostname()) |
237 | + instances_nets = instances_port_nets(neutron_cli, instances) |
238 | + all_nets_by_id = neutron_networks_by_id(neutron_cli) |
239 | + exp_tun_ids = get_instances_tun_ids(instances_nets, all_nets_by_id) |
240 | + loc_tun_ids = get_ovs_tun_ids(args.interface) |
241 | + |
242 | + if args.test: |
243 | + logging.info('TEST: remove a local tun_id'.format(args.interface)) |
244 | + loc_tun_ids.pop() |
245 | + |
246 | + rc, msg = nrpe_check_tun_ids(exp_tun_ids, loc_tun_ids, all_nets_by_id) |
247 | + print "\n".join(msg) |
248 | + sys.exit(rc) |
249 | |
250 | === added directory 'hooks/charmhelpers/contrib/charmsupport' |
251 | === added file 'hooks/charmhelpers/contrib/charmsupport/__init__.py' |
252 | --- hooks/charmhelpers/contrib/charmsupport/__init__.py 1970-01-01 00:00:00 +0000 |
253 | +++ hooks/charmhelpers/contrib/charmsupport/__init__.py 2016-01-07 21:40:19 +0000 |
254 | @@ -0,0 +1,15 @@ |
255 | +# Copyright 2014-2015 Canonical Limited. |
256 | +# |
257 | +# This file is part of charm-helpers. |
258 | +# |
259 | +# charm-helpers is free software: you can redistribute it and/or modify |
260 | +# it under the terms of the GNU Lesser General Public License version 3 as |
261 | +# published by the Free Software Foundation. |
262 | +# |
263 | +# charm-helpers is distributed in the hope that it will be useful, |
264 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
265 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
266 | +# GNU Lesser General Public License for more details. |
267 | +# |
268 | +# You should have received a copy of the GNU Lesser General Public License |
269 | +# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>. |
270 | |
271 | === added file 'hooks/charmhelpers/contrib/charmsupport/nrpe.py' |
272 | --- hooks/charmhelpers/contrib/charmsupport/nrpe.py 1970-01-01 00:00:00 +0000 |
273 | +++ hooks/charmhelpers/contrib/charmsupport/nrpe.py 2016-01-07 21:40:19 +0000 |
274 | @@ -0,0 +1,458 @@ |
275 | +# Copyright 2014-2015 Canonical Limited. |
276 | +# |
277 | +# This file is part of charm-helpers. |
278 | +# |
279 | +# charm-helpers is free software: you can redistribute it and/or modify |
280 | +# it under the terms of the GNU Lesser General Public License version 3 as |
281 | +# published by the Free Software Foundation. |
282 | +# |
283 | +# charm-helpers is distributed in the hope that it will be useful, |
284 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
285 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
286 | +# GNU Lesser General Public License for more details. |
287 | +# |
288 | +# You should have received a copy of the GNU Lesser General Public License |
289 | +# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>. |
290 | + |
291 | +"""Compatibility with the nrpe-external-master charm""" |
292 | +# Copyright 2012 Canonical Ltd. |
293 | +# |
294 | +# Authors: |
295 | +# Matthew Wedgwood <matthew.wedgwood@canonical.com> |
296 | + |
297 | +import subprocess |
298 | +import pwd |
299 | +import grp |
300 | +import os |
301 | +import glob |
302 | +import shutil |
303 | +import re |
304 | +import shlex |
305 | +import yaml |
306 | + |
307 | +from charmhelpers.core.hookenv import ( |
308 | + config, |
309 | + local_unit, |
310 | + log, |
311 | + relation_ids, |
312 | + relation_set, |
313 | + relations_of_type, |
314 | +) |
315 | + |
316 | +from charmhelpers.core.host import ( |
317 | + service, |
318 | + rsync, |
319 | +) |
320 | +NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins' |
321 | + |
322 | +# This module adds compatibility with the nrpe-external-master and plain nrpe |
323 | +# subordinate charms. To use it in your charm: |
324 | +# |
325 | +# 1. Update metadata.yaml |
326 | +# |
327 | +# provides: |
328 | +# (...) |
329 | +# nrpe-external-master: |
330 | +# interface: nrpe-external-master |
331 | +# scope: container |
332 | +# |
333 | +# and/or |
334 | +# |
335 | +# provides: |
336 | +# (...) |
337 | +# local-monitors: |
338 | +# interface: local-monitors |
339 | +# scope: container |
340 | + |
341 | +# |
342 | +# 2. Add the following to config.yaml |
343 | +# |
344 | +# nagios_context: |
345 | +# default: "juju" |
346 | +# type: string |
347 | +# description: | |
348 | +# Used by the nrpe subordinate charms. |
349 | +# A string that will be prepended to instance name to set the host name |
350 | +# in nagios. So for instance the hostname would be something like: |
351 | +# juju-myservice-0 |
352 | +# If you're running multiple environments with the same services in them |
353 | +# this allows you to differentiate between them. |
354 | +# nagios_servicegroups: |
355 | +# default: "" |
356 | +# type: string |
357 | +# description: | |
358 | +# A comma-separated list of nagios servicegroups. |
359 | +# If left empty, the nagios_context will be used as the servicegroup |
360 | +# |
361 | +# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master |
362 | +# |
363 | +# 4. Update your hooks.py with something like this: |
364 | +# |
365 | +# from charmsupport.nrpe import NRPE |
366 | +# (...) |
367 | +# def update_nrpe_config(): |
368 | +# nrpe_compat = NRPE() |
369 | +# nrpe_compat.add_check( |
370 | +# shortname = "myservice", |
371 | +# description = "Check MyService", |
372 | +# check_cmd = "check_http -w 2 -c 10 http://localhost" |
373 | +# ) |
374 | +# nrpe_compat.add_check( |
375 | +# "myservice_other", |
376 | +# "Check for widget failures", |
377 | +# check_cmd = "/srv/myapp/scripts/widget_check" |
378 | +# ) |
379 | +# nrpe_compat.write() |
380 | +# |
381 | +# def config_changed(): |
382 | +# (...) |
383 | +# update_nrpe_config() |
384 | +# |
385 | +# def nrpe_external_master_relation_changed(): |
386 | +# update_nrpe_config() |
387 | +# |
388 | +# def local_monitors_relation_changed(): |
389 | +# update_nrpe_config() |
390 | +# |
391 | +# 5. ln -s hooks.py nrpe-external-master-relation-changed |
392 | +# ln -s hooks.py local-monitors-relation-changed |
393 | + |
394 | + |
395 | +class CheckException(Exception): |
396 | + pass |
397 | + |
398 | + |
399 | +class Check(object): |
400 | + shortname_re = '[A-Za-z0-9-_]+$' |
401 | + service_template = (""" |
402 | +#--------------------------------------------------- |
403 | +# This file is Juju managed |
404 | +#--------------------------------------------------- |
405 | +define service {{ |
406 | + use active-service |
407 | + host_name {nagios_hostname} |
408 | + service_description {nagios_hostname}[{shortname}] """ |
409 | + """{description} |
410 | + check_command check_nrpe!{command} |
411 | + servicegroups {nagios_servicegroup} |
412 | +}} |
413 | +""") |
414 | + |
415 | + def __init__(self, shortname, description, check_cmd): |
416 | + super(Check, self).__init__() |
417 | + # XXX: could be better to calculate this from the service name |
418 | + if not re.match(self.shortname_re, shortname): |
419 | + raise CheckException("shortname must match {}".format( |
420 | + Check.shortname_re)) |
421 | + self.shortname = shortname |
422 | + self.command = "check_{}".format(shortname) |
423 | + # Note: a set of invalid characters is defined by the |
424 | + # Nagios server config |
425 | + # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()= |
426 | + self.description = description |
427 | + self.check_cmd = self._locate_cmd(check_cmd) |
428 | + |
429 | + def _locate_cmd(self, check_cmd): |
430 | + search_path = ( |
431 | + '/usr/lib/nagios/plugins', |
432 | + '/usr/local/lib/nagios/plugins', |
433 | + ) |
434 | + parts = shlex.split(check_cmd) |
435 | + for path in search_path: |
436 | + if os.path.exists(os.path.join(path, parts[0])): |
437 | + command = os.path.join(path, parts[0]) |
438 | + if len(parts) > 1: |
439 | + command += " " + " ".join(parts[1:]) |
440 | + return command |
441 | + log('Check command not found: {}'.format(parts[0])) |
442 | + return '' |
443 | + |
444 | + def write(self, nagios_context, hostname, nagios_servicegroups): |
445 | + nrpe_check_file = '/etc/nagios/nrpe.d/{}.cfg'.format( |
446 | + self.command) |
447 | + with open(nrpe_check_file, 'w') as nrpe_check_config: |
448 | + nrpe_check_config.write("# check {}\n".format(self.shortname)) |
449 | + nrpe_check_config.write("command[{}]={}\n".format( |
450 | + self.command, self.check_cmd)) |
451 | + |
452 | + if not os.path.exists(NRPE.nagios_exportdir): |
453 | + log('Not writing service config as {} is not accessible'.format( |
454 | + NRPE.nagios_exportdir)) |
455 | + else: |
456 | + self.write_service_config(nagios_context, hostname, |
457 | + nagios_servicegroups) |
458 | + |
459 | + def write_service_config(self, nagios_context, hostname, |
460 | + nagios_servicegroups): |
461 | + for f in os.listdir(NRPE.nagios_exportdir): |
462 | + if re.search('.*{}.cfg'.format(self.command), f): |
463 | + os.remove(os.path.join(NRPE.nagios_exportdir, f)) |
464 | + |
465 | + templ_vars = { |
466 | + 'nagios_hostname': hostname, |
467 | + 'nagios_servicegroup': nagios_servicegroups, |
468 | + 'description': self.description, |
469 | + 'shortname': self.shortname, |
470 | + 'command': self.command, |
471 | + } |
472 | + nrpe_service_text = Check.service_template.format(**templ_vars) |
473 | + nrpe_service_file = '{}/service__{}_{}.cfg'.format( |
474 | + NRPE.nagios_exportdir, hostname, self.command) |
475 | + with open(nrpe_service_file, 'w') as nrpe_service_config: |
476 | + nrpe_service_config.write(str(nrpe_service_text)) |
477 | + |
478 | + def run(self): |
479 | + subprocess.call(self.check_cmd) |
480 | + |
481 | + |
482 | +class Cron(object): |
483 | + def __init__(self, filename, cron_freq, cron_user, cron_cmd): |
484 | + super(Cron, self).__init__() |
485 | + self.filename = filename |
486 | + self.cron_freq = cron_freq |
487 | + self.cron_user = cron_user |
488 | + self.cron_cmd = cron_cmd |
489 | + |
490 | + def write(self): |
491 | + cron_filename = '/etc/cron.d/{}'.format(self.filename) |
492 | + log("cron.write: {}".format(cron_filename)) |
493 | + with open(cron_filename, 'w') as cron_file: |
494 | + cron_file.write("# cron {}\n".format(self.filename)) |
495 | + cron_file.write("{} {} {}\n".format(self.cron_freq, |
496 | + self.cron_user, |
497 | + self.cron_cmd)) |
498 | + |
499 | +class NagiosPlugin(object): |
500 | + def __init__(self, filename): |
501 | + super(NagiosPlugin, self).__init__() |
502 | + self.filename = filename |
503 | + |
504 | + def write(self): |
505 | + if not os.path.exists(NAGIOS_PLUGINS): |
506 | + os.makedirs(NAGIOS_PLUGINS) |
507 | + if os.path.exists(self.filename): |
508 | + log("NagiosPlugin.write: {} {}".format(self.filename, NAGIOS_PLUGINS)) |
509 | + rsync(self.filename, NAGIOS_PLUGINS) |
510 | + else: |
511 | + log("SKIPPED: NagiosPlugin.write: {} {}".format(self.filename, NAGIOS_PLUGINS)) |
512 | + |
513 | + |
514 | +class NRPE(object): |
515 | + nagios_logdir = '/var/log/nagios' |
516 | + nagios_exportdir = '/var/lib/nagios/export' |
517 | + nrpe_confdir = '/etc/nagios/nrpe.d' |
518 | + |
519 | + def __init__(self, hostname=None): |
520 | + super(NRPE, self).__init__() |
521 | + self.config = config() |
522 | + self.nagios_context = self.config['nagios_context'] |
523 | + if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']: |
524 | + self.nagios_servicegroups = self.config['nagios_servicegroups'] |
525 | + else: |
526 | + self.nagios_servicegroups = self.nagios_context |
527 | + self.unit_name = local_unit().replace('/', '-') |
528 | + if hostname: |
529 | + self.hostname = hostname |
530 | + else: |
531 | + self.hostname = "{}-{}".format(self.nagios_context, self.unit_name) |
532 | + self.checks = [] |
533 | + self.crons = [] |
534 | + self.nagios_plugins = [] |
535 | + |
536 | + def add_check(self, *args, **kwargs): |
537 | + self.checks.append(Check(*args, **kwargs)) |
538 | + |
539 | + def add_cron(self, *args, **kwargs): |
540 | + self.crons.append(Cron(*args, **kwargs)) |
541 | + |
542 | + def add_nagios_plugin(self, *args, **kwargs): |
543 | + self.nagios_plugins.append(NagiosPlugin(*args, **kwargs)) |
544 | + |
545 | + def add_from_config(self, config_key): |
546 | + saved_config = config() |
547 | + nrpe_set = NRPESet(saved_config.get(config_key)) |
548 | + log('NRPE.add_from_config: nrpe_set={}'.format(str(nrpe_set))) |
549 | + if nrpe_set: |
550 | + for check in nrpe_set.checks: |
551 | + self.add_check(*check) |
552 | + for cron in nrpe_set.crons: |
553 | + self.add_cron(*cron) |
554 | + for nagios_plugins in nrpe_set.nagios_plugins: |
555 | + self.add_nagios_plugin(*nagios_plugins) |
556 | + |
557 | + def write(self): |
558 | + try: |
559 | + nagios_uid = pwd.getpwnam('nagios').pw_uid |
560 | + nagios_gid = grp.getgrnam('nagios').gr_gid |
561 | + except: |
562 | + log("Nagios user not set up, nrpe checks not updated") |
563 | + return |
564 | + |
565 | + if not os.path.exists(NRPE.nagios_logdir): |
566 | + os.mkdir(NRPE.nagios_logdir) |
567 | + os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid) |
568 | + |
569 | + nrpe_monitors = {} |
570 | + monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}} |
571 | + for nrpecheck in self.checks: |
572 | + nrpecheck.write(self.nagios_context, self.hostname, |
573 | + self.nagios_servicegroups) |
574 | + nrpe_monitors[nrpecheck.shortname] = { |
575 | + "command": nrpecheck.command, |
576 | + } |
577 | + |
578 | + for cron in self.crons: |
579 | + cron.write() |
580 | + |
581 | + for nagios_plugins in self.nagios_plugins: |
582 | + nagios_plugins.write() |
583 | + |
584 | + service('restart', 'nagios-nrpe-server') |
585 | + |
586 | + monitor_ids = relation_ids("local-monitors") + \ |
587 | + relation_ids("nrpe-external-master") |
588 | + for rid in monitor_ids: |
589 | + relation_set(relation_id=rid, monitors=yaml.dump(monitors)) |
590 | + |
591 | + |
592 | +def get_nagios_hostcontext(relation_name='nrpe-external-master'): |
593 | + """ |
594 | + Query relation with nrpe subordinate, return the nagios_host_context |
595 | + |
596 | + :param str relation_name: Name of relation nrpe sub joined to |
597 | + """ |
598 | + for rel in relations_of_type(relation_name): |
599 | + if 'nagios_hostname' in rel: |
600 | + return rel['nagios_host_context'] |
601 | + |
602 | + |
603 | +def get_nagios_hostname(relation_name='nrpe-external-master'): |
604 | + """ |
605 | + Query relation with nrpe subordinate, return the nagios_hostname |
606 | + |
607 | + :param str relation_name: Name of relation nrpe sub joined to |
608 | + """ |
609 | + for rel in relations_of_type(relation_name): |
610 | + if 'nagios_hostname' in rel: |
611 | + return rel['nagios_hostname'] |
612 | + |
613 | + |
614 | +def get_nagios_unit_name(relation_name='nrpe-external-master'): |
615 | + """ |
616 | + Return the nagios unit name prepended with host_context if needed |
617 | + |
618 | + :param str relation_name: Name of relation nrpe sub joined to |
619 | + """ |
620 | + host_context = get_nagios_hostcontext(relation_name) |
621 | + if host_context: |
622 | + unit = "%s:%s" % (host_context, local_unit()) |
623 | + else: |
624 | + unit = local_unit() |
625 | + return unit |
626 | + |
627 | + |
628 | +def add_init_service_checks(nrpe, services, unit_name): |
629 | + """ |
630 | + Add checks for each service in list |
631 | + |
632 | + :param NRPE nrpe: NRPE object to add check to |
633 | + :param list services: List of services to check |
634 | + :param str unit_name: Unit name to use in check description |
635 | + """ |
636 | + for svc in services: |
637 | + upstart_init = '/etc/init/%s.conf' % svc |
638 | + sysv_init = '/etc/init.d/%s' % svc |
639 | + if os.path.exists(upstart_init): |
640 | + nrpe.add_check( |
641 | + shortname=svc, |
642 | + description='process check {%s}' % unit_name, |
643 | + check_cmd='check_upstart_job %s' % svc |
644 | + ) |
645 | + elif os.path.exists(sysv_init): |
646 | + cronpath = '/etc/cron.d/nagios-service-check-%s' % svc |
647 | + cron_file = ('*/5 * * * * root ' |
648 | + '/usr/local/lib/nagios/plugins/check_exit_status.pl ' |
649 | + '-s /etc/init.d/%s status > ' |
650 | + '/var/lib/nagios/service-check-%s.txt\n' % (svc, |
651 | + svc) |
652 | + ) |
653 | + f = open(cronpath, 'w') |
654 | + f.write(cron_file) |
655 | + f.close() |
656 | + nrpe.add_check( |
657 | + shortname=svc, |
658 | + description='process check {%s}' % unit_name, |
659 | + check_cmd='check_status_file.py -f ' |
660 | + '/var/lib/nagios/service-check-%s.txt' % svc, |
661 | + ) |
662 | + |
663 | + |
664 | +def copy_nrpe_checks(): |
665 | + """ |
666 | + Copy the nrpe checks into place |
667 | + |
668 | + """ |
669 | + nrpe_files_dir = os.path.join(os.getenv('CHARM_DIR'), 'hooks', |
670 | + 'charmhelpers', 'contrib', 'openstack', |
671 | + 'files') |
672 | + |
673 | + if not os.path.exists(NAGIOS_PLUGINS): |
674 | + os.makedirs(NAGIOS_PLUGINS) |
675 | + for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")): |
676 | + if os.path.isfile(fname): |
677 | + shutil.copy2(fname, |
678 | + os.path.join(NAGIOS_PLUGINS, os.path.basename(fname))) |
679 | + |
680 | + |
681 | +def add_haproxy_checks(nrpe, unit_name): |
682 | + """ |
683 | + Add checks for each service in list |
684 | + |
685 | + :param NRPE nrpe: NRPE object to add check to |
686 | + :param str unit_name: Unit name to use in check description |
687 | + """ |
688 | + nrpe.add_check( |
689 | + shortname='haproxy_servers', |
690 | + description='Check HAProxy {%s}' % unit_name, |
691 | + check_cmd='check_haproxy.sh') |
692 | + nrpe.add_check( |
693 | + shortname='haproxy_queue', |
694 | + description='Check HAProxy queue depth {%s}' % unit_name, |
695 | + check_cmd='check_haproxy_queue_depth.sh') |
696 | + |
697 | +class NRPESet: |
698 | + checks = [] |
699 | + crons = [] |
700 | + nagios_plugins = [] |
701 | + def __init__(self, yaml_str='null'): |
702 | + init_values = None |
703 | + if yaml_str: |
704 | + init_values = yaml.safe_load(yaml_str) |
705 | + log('NRPESet: init_values={}'.format(init_values)) |
706 | + if type(init_values) == type({}): |
707 | + self.checks = init_values.get('checks', []) |
708 | + self.crons = init_values.get('crons', []) |
709 | + self.nagios_plugins = init_values.get('nagios_plugins', []) |
710 | + |
711 | + def add_check(self, shortname, description, check_cmd): |
712 | + log('NRPESet: add_check({})'.format((shortname, description, check_cmd))) |
713 | + self.checks.append((shortname, description, check_cmd)) |
714 | + |
715 | + def add_init_service_checks(self, services, unit_name=None): |
716 | + if not unit_name: |
717 | + unit_name = local_unit().replace('/', '-') |
718 | + log('NRPESet: add_init_service_checks({}, {})'.format(services, unit_name)) |
719 | + add_init_service_checks(self, services, unit_name) |
720 | + |
721 | + def add_cron(self, filename, cron_freq, cron_user, cron_cmd): |
722 | + log('NRPESet: add_cron({}, ...)'.format(filename)) |
723 | + self.crons.append((filename, cron_freq, cron_user, cron_cmd)) |
724 | + |
725 | + def add_nagios_plugin(self, filename): |
726 | + log('NRPESet: add_nagios_plugin({})'.format(filename)) |
727 | + self.nagios_plugins.append((filename,)) |
728 | + |
729 | + def __str__(self): |
730 | + return yaml.safe_dump({'checks': self.checks, |
731 | + 'crons': self.crons, |
732 | + 'nagios_plugins': self.nagios_plugins}) |
733 | |
734 | === added file 'hooks/charmhelpers/contrib/charmsupport/volumes.py' |
735 | --- hooks/charmhelpers/contrib/charmsupport/volumes.py 1970-01-01 00:00:00 +0000 |
736 | +++ hooks/charmhelpers/contrib/charmsupport/volumes.py 2016-01-07 21:40:19 +0000 |
737 | @@ -0,0 +1,175 @@ |
738 | +# Copyright 2014-2015 Canonical Limited. |
739 | +# |
740 | +# This file is part of charm-helpers. |
741 | +# |
742 | +# charm-helpers is free software: you can redistribute it and/or modify |
743 | +# it under the terms of the GNU Lesser General Public License version 3 as |
744 | +# published by the Free Software Foundation. |
745 | +# |
746 | +# charm-helpers is distributed in the hope that it will be useful, |
747 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
748 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
749 | +# GNU Lesser General Public License for more details. |
750 | +# |
751 | +# You should have received a copy of the GNU Lesser General Public License |
752 | +# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>. |
753 | + |
754 | +''' |
755 | +Functions for managing volumes in juju units. One volume is supported per unit. |
756 | +Subordinates may have their own storage, provided it is on its own partition. |
757 | + |
758 | +Configuration stanzas:: |
759 | + |
760 | + volume-ephemeral: |
761 | + type: boolean |
762 | + default: true |
763 | + description: > |
764 | + If false, a volume is mounted as sepecified in "volume-map" |
765 | + If true, ephemeral storage will be used, meaning that log data |
766 | + will only exist as long as the machine. YOU HAVE BEEN WARNED. |
767 | + volume-map: |
768 | + type: string |
769 | + default: {} |
770 | + description: > |
771 | + YAML map of units to device names, e.g: |
772 | + "{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }" |
773 | + Service units will raise a configure-error if volume-ephemeral |
774 | + is 'true' and no volume-map value is set. Use 'juju set' to set a |
775 | + value and 'juju resolved' to complete configuration. |
776 | + |
777 | +Usage:: |
778 | + |
779 | + from charmsupport.volumes import configure_volume, VolumeConfigurationError |
780 | + from charmsupport.hookenv import log, ERROR |
781 | + def post_mount_hook(): |
782 | + stop_service('myservice') |
783 | + def post_mount_hook(): |
784 | + start_service('myservice') |
785 | + |
786 | + if __name__ == '__main__': |
787 | + try: |
788 | + configure_volume(before_change=pre_mount_hook, |
789 | + after_change=post_mount_hook) |
790 | + except VolumeConfigurationError: |
791 | + log('Storage could not be configured', ERROR) |
792 | + |
793 | +''' |
794 | + |
795 | +# XXX: Known limitations |
796 | +# - fstab is neither consulted nor updated |
797 | + |
798 | +import os |
799 | +from charmhelpers.core import hookenv |
800 | +from charmhelpers.core import host |
801 | +import yaml |
802 | + |
803 | + |
804 | +MOUNT_BASE = '/srv/juju/volumes' |
805 | + |
806 | + |
807 | +class VolumeConfigurationError(Exception): |
808 | + '''Volume configuration data is missing or invalid''' |
809 | + pass |
810 | + |
811 | + |
812 | +def get_config(): |
813 | + '''Gather and sanity-check volume configuration data''' |
814 | + volume_config = {} |
815 | + config = hookenv.config() |
816 | + |
817 | + errors = False |
818 | + |
819 | + if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'): |
820 | + volume_config['ephemeral'] = True |
821 | + else: |
822 | + volume_config['ephemeral'] = False |
823 | + |
824 | + try: |
825 | + volume_map = yaml.safe_load(config.get('volume-map', '{}')) |
826 | + except yaml.YAMLError as e: |
827 | + hookenv.log("Error parsing YAML volume-map: {}".format(e), |
828 | + hookenv.ERROR) |
829 | + errors = True |
830 | + if volume_map is None: |
831 | + # probably an empty string |
832 | + volume_map = {} |
833 | + elif not isinstance(volume_map, dict): |
834 | + hookenv.log("Volume-map should be a dictionary, not {}".format( |
835 | + type(volume_map))) |
836 | + errors = True |
837 | + |
838 | + volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME']) |
839 | + if volume_config['device'] and volume_config['ephemeral']: |
840 | + # asked for ephemeral storage but also defined a volume ID |
841 | + hookenv.log('A volume is defined for this unit, but ephemeral ' |
842 | + 'storage was requested', hookenv.ERROR) |
843 | + errors = True |
844 | + elif not volume_config['device'] and not volume_config['ephemeral']: |
845 | + # asked for permanent storage but did not define volume ID |
846 | + hookenv.log('Ephemeral storage was requested, but there is no volume ' |
847 | + 'defined for this unit.', hookenv.ERROR) |
848 | + errors = True |
849 | + |
850 | + unit_mount_name = hookenv.local_unit().replace('/', '-') |
851 | + volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name) |
852 | + |
853 | + if errors: |
854 | + return None |
855 | + return volume_config |
856 | + |
857 | + |
858 | +def mount_volume(config): |
859 | + if os.path.exists(config['mountpoint']): |
860 | + if not os.path.isdir(config['mountpoint']): |
861 | + hookenv.log('Not a directory: {}'.format(config['mountpoint'])) |
862 | + raise VolumeConfigurationError() |
863 | + else: |
864 | + host.mkdir(config['mountpoint']) |
865 | + if os.path.ismount(config['mountpoint']): |
866 | + unmount_volume(config) |
867 | + if not host.mount(config['device'], config['mountpoint'], persist=True): |
868 | + raise VolumeConfigurationError() |
869 | + |
870 | + |
871 | +def unmount_volume(config): |
872 | + if os.path.ismount(config['mountpoint']): |
873 | + if not host.umount(config['mountpoint'], persist=True): |
874 | + raise VolumeConfigurationError() |
875 | + |
876 | + |
877 | +def managed_mounts(): |
878 | + '''List of all mounted managed volumes''' |
879 | + return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts()) |
880 | + |
881 | + |
882 | +def configure_volume(before_change=lambda: None, after_change=lambda: None): |
883 | + '''Set up storage (or don't) according to the charm's volume configuration. |
884 | + Returns the mount point or "ephemeral". before_change and after_change |
885 | + are optional functions to be called if the volume configuration changes. |
886 | + ''' |
887 | + |
888 | + config = get_config() |
889 | + if not config: |
890 | + hookenv.log('Failed to read volume configuration', hookenv.CRITICAL) |
891 | + raise VolumeConfigurationError() |
892 | + |
893 | + if config['ephemeral']: |
894 | + if os.path.ismount(config['mountpoint']): |
895 | + before_change() |
896 | + unmount_volume(config) |
897 | + after_change() |
898 | + return 'ephemeral' |
899 | + else: |
900 | + # persistent storage |
901 | + if os.path.ismount(config['mountpoint']): |
902 | + mounts = dict(managed_mounts()) |
903 | + if mounts.get(config['mountpoint']) != config['device']: |
904 | + before_change() |
905 | + unmount_volume(config) |
906 | + mount_volume(config) |
907 | + after_change() |
908 | + else: |
909 | + before_change() |
910 | + mount_volume(config) |
911 | + after_change() |
912 | + return config['mountpoint'] |
913 | |
914 | === modified file 'hooks/neutron_ovs_hooks.py' |
915 | --- hooks/neutron_ovs_hooks.py 2015-11-12 09:33:27 +0000 |
916 | +++ hooks/neutron_ovs_hooks.py 2016-01-07 21:40:19 +0000 |
917 | @@ -1,5 +1,6 @@ |
918 | #!/usr/bin/python |
919 | |
920 | +import os |
921 | import sys |
922 | |
923 | from copy import deepcopy |
924 | @@ -16,6 +17,7 @@ |
925 | log, |
926 | relation_set, |
927 | relation_ids, |
928 | + local_unit, |
929 | ) |
930 | |
931 | from charmhelpers.core.host import ( |
932 | @@ -45,6 +47,7 @@ |
933 | REQUIRED_INTERFACES, |
934 | check_optional_relations, |
935 | ) |
936 | +from charmhelpers.contrib.charmsupport import nrpe |
937 | |
938 | hooks = Hooks() |
939 | CONFIGS = register_configs() |
940 | @@ -72,6 +75,10 @@ |
941 | for rid in relation_ids('neutron-plugin'): |
942 | neutron_plugin_joined(relation_id=rid) |
943 | |
944 | + if config_value_changed('enable-nrpe-checks'): |
945 | + for rid in relation_ids('neutron-plugin'): |
946 | + neutron_plugin_joined(rid) |
947 | + |
948 | |
949 | @hooks.hook('neutron-plugin-api-relation-changed') |
950 | @restart_on_change(restart_map()) |
951 | @@ -87,6 +94,37 @@ |
952 | neutron_plugin_joined(relation_id=rid) |
953 | |
954 | |
955 | +def neutron_plugin_nrpe_checks(): |
956 | + log('neutron_plugin_nrpe_checks: enable-nrpe-checks={}'.format( |
957 | + config('enable-nrpe-checks'))) |
958 | + if not config('enable-nrpe-checks'): |
959 | + return '' |
960 | + current_unit = local_unit().replace('/', '-') |
961 | + nrpe_set = nrpe.NRPESet() |
962 | + nrpe_set.add_nagios_plugin( |
963 | + os.path.join(os.getenv('CHARM_DIR'), |
964 | + 'files', 'nrpe-external-master', |
965 | + 'neutron-check-tun_ids.py')) |
966 | + nrpe_set.add_cron( |
967 | + filename='nagios-check-tun_ids', |
968 | + cron_freq='*/5 * * * *', |
969 | + cron_user='root', |
970 | + cron_cmd='{}/neutron-check-tun_ids.py > ' |
971 | + '/var/lib/nagios/neutron-check-tun_ids.txt' |
972 | + ''.format(nrpe.NAGIOS_PLUGINS), |
973 | + ) |
974 | + nrpe_set.add_check( |
975 | + shortname='neutron_tun_ids', |
976 | + description='Check neutron ovs tun_ids {%s}' % current_unit, |
977 | + check_cmd='check_status_file.py -f ' |
978 | + '/var/lib/nagios/neutron-check-tun_ids.txt' |
979 | + ) |
980 | + nrpe_set.add_init_service_checks( |
981 | + ['openvswitch-switch', 'neutron-plugin-openvswitch-agent']) |
982 | + log('neutron_plugin_nrpe_checks: nrpe_set={}'.format(str(nrpe_set))) |
983 | + return str(nrpe_set) |
984 | + |
985 | + |
986 | @hooks.hook('neutron-plugin-relation-joined') |
987 | def neutron_plugin_joined(relation_id=None): |
988 | if enable_local_dhcp(): |
989 | @@ -100,8 +138,10 @@ |
990 | pkgs.extend(METADATA_PACKAGES) |
991 | purge_packages(pkgs) |
992 | secret = get_shared_secret() if enable_nova_metadata() else None |
993 | + nrpe_checks = neutron_plugin_nrpe_checks() |
994 | rel_data = { |
995 | 'metadata-shared-secret': secret, |
996 | + 'nrpe-checks': nrpe_checks, |
997 | } |
998 | relation_set(relation_id=relation_id, **rel_data) |
999 |