Merge lp:~jjo/charms/trusty/neutron-openvswitch/add-nrpe-checks-lp1530227 into lp:~openstack-charmers-archive/charms/trusty/neutron-openvswitch/next

Proposed by JuanJo Ciarlante
Status: Work in progress
Proposed branch: lp:~jjo/charms/trusty/neutron-openvswitch/add-nrpe-checks-lp1530227
Merge into: lp:~openstack-charmers-archive/charms/trusty/neutron-openvswitch/next
Diff against target: 998 lines (+914/-0)
7 files modified
charm-helpers-hooks.yaml (+1/-0)
config.yaml (+6/-0)
files/nrpe-external-master/neutron-check-tun_ids.py (+219/-0)
hooks/charmhelpers/contrib/charmsupport/__init__.py (+15/-0)
hooks/charmhelpers/contrib/charmsupport/nrpe.py (+458/-0)
hooks/charmhelpers/contrib/charmsupport/volumes.py (+175/-0)
hooks/neutron_ovs_hooks.py (+40/-0)
To merge this branch: bzr merge lp:~jjo/charms/trusty/neutron-openvswitch/add-nrpe-checks-lp1530227
Reviewer Review Type Date Requested Status
OpenStack Charmers Pending
Review via email: mp+281931@code.launchpad.net
To post a comment you must log in.

Unmerged revisions

99. By JuanJo Ciarlante

[jjo] WIP: add NRPE support via NRPESet passed at relation time to nova-compute principal lp#1530227

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'charm-helpers-hooks.yaml'
2--- charm-helpers-hooks.yaml 2015-09-28 09:47:21 +0000
3+++ charm-helpers-hooks.yaml 2016-01-07 21:40:19 +0000
4@@ -11,3 +11,4 @@
5 - payload.execd
6 - contrib.network.ip
7 - contrib.python.packages
8+ - contrib.charmsupport
9
10=== modified file 'config.yaml'
11--- config.yaml 2015-09-15 07:47:30 +0000
12+++ config.yaml 2016-01-07 21:40:19 +0000
13@@ -106,3 +106,9 @@
14 which do not include a neutron-gateway (do not require l3, lbaas or vpnaas
15 services) and should only be used in-conjunction with flat or VLAN provider
16 networks configurations.
17+ enable-nrpe-checks:
18+ type: boolean
19+ default: true
20+ description: |
21+ Provide nrpe data to main charm (ie nova-compute) using 'neutron-plugin'
22+ relation via the 'nrpe-checks' key
23
24=== added directory 'files'
25=== added directory 'files/nrpe-external-master'
26=== added file 'files/nrpe-external-master/neutron-check-tun_ids.py'
27--- files/nrpe-external-master/neutron-check-tun_ids.py 1970-01-01 00:00:00 +0000
28+++ files/nrpe-external-master/neutron-check-tun_ids.py 2016-01-07 21:40:19 +0000
29@@ -0,0 +1,219 @@
30+#!/usr/bin/python
31+# vim: si et sw=4 ts=4
32+#
33+# Author: JuanJo Ciarlante <jjo@canonical.com>
34+# Copyright (C) 2015 Canonical
35+# License: GPLv3
36+"""
37+Verify that all local OVS tun flows match the ones expected from locally
38+running nova instances
39+
40+Example usage:
41+{0}
42+{0} -i br-tun # peek at other OVS interface
43+{0} --conf-file=/etc/nova/nova.conf # config file to peek creds
44+
45+"""
46+import sys
47+import os
48+import re
49+import logging
50+import argparse
51+import ConfigParser
52+import socket
53+import subprocess
54+
55+from neutronclient.v2_0 import client as neutron_client
56+from novaclient import client as nova_client
57+
58+
59+(STATUS_OK, STATUS_WARN, STATUS_CRIT, STATUS_UNKNOWN) = range(0, 4)
60+
61+
62+def get_creds(args):
63+ """ return creds dictionary from conf-file (/etc/nova/nova.conf),
64+ overridden by OS_ environment vars """
65+ config = ConfigParser.RawConfigParser()
66+ config.read(args.conf_file)
67+ config_creds_section_prefix = {
68+ 'DEFAULT': 'neutron_admin_',
69+ 'neutron': 'admin_',
70+ }
71+
72+ creds = {}
73+ for key in ('auth_url', 'username', 'password', 'tenant_name'):
74+ env_key = "OS_{}".format(key.upper())
75+ value = os.environ.get(env_key)
76+ # If no creds from environment, try known possible config_creds_keys
77+ if value:
78+ logging.info("get_creds: found {} as env['{}']={}".format(
79+ key, env_key, value if key != 'password' else '...'))
80+ if not value:
81+ for section, prefix in config_creds_section_prefix.iteritems():
82+ try:
83+ value = config.get(section, prefix + key)
84+ logging.info("get_creds: found {}.{}={}".format(
85+ section, prefix + key,
86+ value if key != 'password' else '...'))
87+ break
88+ except ConfigParser.NoOptionError:
89+ pass
90+ except ConfigParser.NoSectionError:
91+ pass
92+ if value:
93+ creds[key] = value
94+ else:
95+ raise KeyError("Couldn't find config value for '{}'".format(key))
96+
97+ logging.debug("creds: username={username} tenant_name={tenant_name} "
98+ "auth_url={auth_url} password=...".format(**creds))
99+
100+ return creds
101+
102+
103+def nova_list_instances(nova_cli, host):
104+ "return instances ids running at host"
105+ logging.info('getting all instances running at host="{}" ...'.format(
106+ socket.gethostname()))
107+ search_opts = {'all_tenants': 1, 'host': host}
108+ instances = [server.id for server in
109+ nova_cli.servers.list(search_opts=search_opts)
110+ if server.status == 'ACTIVE']
111+ logging.info('instances count={}'.format(len(instances)))
112+ logging.debug('instances: {}'.format(instances))
113+ return instances
114+
115+
116+def instances_port_nets(neutron_cli, instances):
117+ "return instances ports attached network ids"
118+ logging.info('getting all instances networks ...')
119+ instances_nets = set()
120+ for instance in instances:
121+ for ports in neutron_cli.list_ports(
122+ device_id=instance, fields=['id', 'network_id']).values():
123+ for port in ports:
124+ instances_nets.add(port['network_id'])
125+ logging.info('instances networks count={}'.format(len(instances_nets)))
126+ logging.debug('instances networks: {}'.format(instances_nets))
127+ return instances_nets
128+
129+
130+def neutron_networks_by_id(neutron_cli):
131+ "return all neutron networks, keyed by id"
132+ logging.info('getting all neutron networks ...')
133+ all_nets = neutron_cli.list_networks().get('networks')
134+ logging.info('neutron networks count={}'.format(len(all_nets)))
135+ logging.debug('neutron networks: {}'.format(all_nets))
136+ networks_by_id = {net['id']: net for net in all_nets}
137+ return networks_by_id
138+
139+
140+def get_instances_tun_ids(instances_nets, all_nets_by_id):
141+ """return tun_ids from for passed instances networks
142+ by looking up all_nets_by_id info"""
143+ logging.info('getting network segmentation_id info for all instances...')
144+ SEG_ID = 'provider:segmentation_id'
145+ NET_TYPE = 'provider:network_type'
146+ instances_tun_ids = {all_nets_by_id[net_id].get(SEG_ID)
147+ for net_id in instances_nets
148+ if all_nets_by_id[net_id].get(NET_TYPE) in
149+ ('gre', 'vxlan')}
150+ logging.info('instances_tun_ids: {}'.format(instances_tun_ids))
151+ return instances_tun_ids
152+
153+
154+def get_ovs_tun_ids(interface):
155+ """get local tun_ids from ovs-ofctl output, ala:
156+ ovs-ofctl dump-flows br-tun |egrep -o 'tun_id=\w+' """
157+ logging.info('local tun_ids: running: ovs-ofctl dump-flows {}'.format(
158+ interface))
159+ ovs_dump = subprocess.Popen(["ovs-ofctl", "dump-flows", interface],
160+ stdin=None,
161+ stdout=subprocess.PIPE,
162+ stderr=subprocess.PIPE)
163+ ovs_tun_ids = set()
164+ # match lines with: ... tun_id=0x<TUN_ID> ...
165+ for line in ovs_dump.stdout:
166+ match = re.search("tun_id=(?P<tun_id>0x\w+)", line)
167+ if match:
168+ ovs_tun_ids.add(int(match.group(1), 16))
169+ logging.info('ovs_tun_ids: {}'.format(ovs_tun_ids))
170+ return ovs_tun_ids
171+
172+
173+def nrpe_check_tun_ids(expected_tun_ids, local_tun_ids, all_nets_by_id):
174+ # order is important: substract local_tun_ids from expected_tun_ids,
175+ # result should be empty
176+ tun_ids_diff = expected_tun_ids.difference(local_tun_ids)
177+ rc = STATUS_OK
178+ msg = []
179+ if tun_ids_diff:
180+ tun_ids_str = ' '.join(['tun_id=0x{0:x}'.format(x)
181+ for x in tun_ids_diff])
182+ msg.append('CRITICAL: host={} missing local tun_ids: {}'.format(
183+ socket.gethostname(), tun_ids_str))
184+ # helper dict by tun_id
185+ net_by_tun_id = {net_val.get('provider:segmentation_id'): net_val
186+ for net_id, net_val in all_nets_by_id.iteritems()}
187+ for tun_id in tun_ids_diff:
188+ net = net_by_tun_id.get(tun_id, {})
189+ msg.append('CRITICAL: tun_id=0x{0:x} network.id={id} '
190+ 'network.name="{name}"'.format(tun_id, **net))
191+ logging.info('exp_tun_ids: {}'.format(sorted(expected_tun_ids)))
192+ logging.info('loc_tun_ids: {}'.format(sorted(local_tun_ids)))
193+ rc = STATUS_CRIT
194+ else:
195+ msg.append('OK: host={} all needed tun_ids present: {}'
196+ ''.format(socket.gethostname(), list(local_tun_ids)))
197+ return (rc, msg)
198+
199+
200+def parse_args():
201+ parser = argparse.ArgumentParser(
202+ description=__doc__.format(*sys.argv),
203+ formatter_class=argparse.RawDescriptionHelpFormatter)
204+ parser.add_argument('--conf-file', default='/etc/nova/nova.conf',
205+ help='config file to peek creds from')
206+ parser.add_argument('-i', '--interface', default='br-tun',
207+ help='OVS iface where to find tun_ids, as: '
208+ 'ovs-ofctl dump-flows <interface>')
209+ parser.add_argument('--test', default=False, action='store_true',
210+ help='simulate missing local tun_ids, force CRITICAL')
211+ parser.add_argument('--verbose', default=False, action='store_true')
212+ parser.add_argument('--debug', default=False, action='store_true')
213+ return parser.parse_args()
214+
215+
216+if __name__ == '__main__':
217+ args = parse_args()
218+ if args.verbose:
219+ logging.basicConfig(level=logging.INFO)
220+ if args.debug:
221+ logging.basicConfig(level=logging.DEBUG)
222+
223+ # initialize needed clients
224+ creds = get_creds(args)
225+ logging.info("initializing nova_client")
226+ nova_cli = nova_client.Client(1.1, creds['username'], creds['password'],
227+ creds['tenant_name'], creds['auth_url'])
228+ logging.info("initializing neutron_client")
229+ neutron_cli = neutron_client.Client(**creds)
230+
231+ # instances: local instances id-s (ie running at this host)
232+ # instances_nets: local instances' networks id-s
233+ # all_nets_by_id: all neutron networks, keyed by id
234+ # exp_tun_ids: local instances' networks' segmentation_id-s
235+ # loc_tun_ids: locally present tun_ids from ovs-ofctl dump-flows br-tun
236+ instances = nova_list_instances(nova_cli, socket.gethostname())
237+ instances_nets = instances_port_nets(neutron_cli, instances)
238+ all_nets_by_id = neutron_networks_by_id(neutron_cli)
239+ exp_tun_ids = get_instances_tun_ids(instances_nets, all_nets_by_id)
240+ loc_tun_ids = get_ovs_tun_ids(args.interface)
241+
242+ if args.test:
243+ logging.info('TEST: remove a local tun_id'.format(args.interface))
244+ loc_tun_ids.pop()
245+
246+ rc, msg = nrpe_check_tun_ids(exp_tun_ids, loc_tun_ids, all_nets_by_id)
247+ print "\n".join(msg)
248+ sys.exit(rc)
249
250=== added directory 'hooks/charmhelpers/contrib/charmsupport'
251=== added file 'hooks/charmhelpers/contrib/charmsupport/__init__.py'
252--- hooks/charmhelpers/contrib/charmsupport/__init__.py 1970-01-01 00:00:00 +0000
253+++ hooks/charmhelpers/contrib/charmsupport/__init__.py 2016-01-07 21:40:19 +0000
254@@ -0,0 +1,15 @@
255+# Copyright 2014-2015 Canonical Limited.
256+#
257+# This file is part of charm-helpers.
258+#
259+# charm-helpers is free software: you can redistribute it and/or modify
260+# it under the terms of the GNU Lesser General Public License version 3 as
261+# published by the Free Software Foundation.
262+#
263+# charm-helpers is distributed in the hope that it will be useful,
264+# but WITHOUT ANY WARRANTY; without even the implied warranty of
265+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
266+# GNU Lesser General Public License for more details.
267+#
268+# You should have received a copy of the GNU Lesser General Public License
269+# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
270
271=== added file 'hooks/charmhelpers/contrib/charmsupport/nrpe.py'
272--- hooks/charmhelpers/contrib/charmsupport/nrpe.py 1970-01-01 00:00:00 +0000
273+++ hooks/charmhelpers/contrib/charmsupport/nrpe.py 2016-01-07 21:40:19 +0000
274@@ -0,0 +1,458 @@
275+# Copyright 2014-2015 Canonical Limited.
276+#
277+# This file is part of charm-helpers.
278+#
279+# charm-helpers is free software: you can redistribute it and/or modify
280+# it under the terms of the GNU Lesser General Public License version 3 as
281+# published by the Free Software Foundation.
282+#
283+# charm-helpers is distributed in the hope that it will be useful,
284+# but WITHOUT ANY WARRANTY; without even the implied warranty of
285+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
286+# GNU Lesser General Public License for more details.
287+#
288+# You should have received a copy of the GNU Lesser General Public License
289+# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
290+
291+"""Compatibility with the nrpe-external-master charm"""
292+# Copyright 2012 Canonical Ltd.
293+#
294+# Authors:
295+# Matthew Wedgwood <matthew.wedgwood@canonical.com>
296+
297+import subprocess
298+import pwd
299+import grp
300+import os
301+import glob
302+import shutil
303+import re
304+import shlex
305+import yaml
306+
307+from charmhelpers.core.hookenv import (
308+ config,
309+ local_unit,
310+ log,
311+ relation_ids,
312+ relation_set,
313+ relations_of_type,
314+)
315+
316+from charmhelpers.core.host import (
317+ service,
318+ rsync,
319+)
320+NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins'
321+
322+# This module adds compatibility with the nrpe-external-master and plain nrpe
323+# subordinate charms. To use it in your charm:
324+#
325+# 1. Update metadata.yaml
326+#
327+# provides:
328+# (...)
329+# nrpe-external-master:
330+# interface: nrpe-external-master
331+# scope: container
332+#
333+# and/or
334+#
335+# provides:
336+# (...)
337+# local-monitors:
338+# interface: local-monitors
339+# scope: container
340+
341+#
342+# 2. Add the following to config.yaml
343+#
344+# nagios_context:
345+# default: "juju"
346+# type: string
347+# description: |
348+# Used by the nrpe subordinate charms.
349+# A string that will be prepended to instance name to set the host name
350+# in nagios. So for instance the hostname would be something like:
351+# juju-myservice-0
352+# If you're running multiple environments with the same services in them
353+# this allows you to differentiate between them.
354+# nagios_servicegroups:
355+# default: ""
356+# type: string
357+# description: |
358+# A comma-separated list of nagios servicegroups.
359+# If left empty, the nagios_context will be used as the servicegroup
360+#
361+# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
362+#
363+# 4. Update your hooks.py with something like this:
364+#
365+# from charmsupport.nrpe import NRPE
366+# (...)
367+# def update_nrpe_config():
368+# nrpe_compat = NRPE()
369+# nrpe_compat.add_check(
370+# shortname = "myservice",
371+# description = "Check MyService",
372+# check_cmd = "check_http -w 2 -c 10 http://localhost"
373+# )
374+# nrpe_compat.add_check(
375+# "myservice_other",
376+# "Check for widget failures",
377+# check_cmd = "/srv/myapp/scripts/widget_check"
378+# )
379+# nrpe_compat.write()
380+#
381+# def config_changed():
382+# (...)
383+# update_nrpe_config()
384+#
385+# def nrpe_external_master_relation_changed():
386+# update_nrpe_config()
387+#
388+# def local_monitors_relation_changed():
389+# update_nrpe_config()
390+#
391+# 5. ln -s hooks.py nrpe-external-master-relation-changed
392+# ln -s hooks.py local-monitors-relation-changed
393+
394+
395+class CheckException(Exception):
396+ pass
397+
398+
399+class Check(object):
400+ shortname_re = '[A-Za-z0-9-_]+$'
401+ service_template = ("""
402+#---------------------------------------------------
403+# This file is Juju managed
404+#---------------------------------------------------
405+define service {{
406+ use active-service
407+ host_name {nagios_hostname}
408+ service_description {nagios_hostname}[{shortname}] """
409+ """{description}
410+ check_command check_nrpe!{command}
411+ servicegroups {nagios_servicegroup}
412+}}
413+""")
414+
415+ def __init__(self, shortname, description, check_cmd):
416+ super(Check, self).__init__()
417+ # XXX: could be better to calculate this from the service name
418+ if not re.match(self.shortname_re, shortname):
419+ raise CheckException("shortname must match {}".format(
420+ Check.shortname_re))
421+ self.shortname = shortname
422+ self.command = "check_{}".format(shortname)
423+ # Note: a set of invalid characters is defined by the
424+ # Nagios server config
425+ # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
426+ self.description = description
427+ self.check_cmd = self._locate_cmd(check_cmd)
428+
429+ def _locate_cmd(self, check_cmd):
430+ search_path = (
431+ '/usr/lib/nagios/plugins',
432+ '/usr/local/lib/nagios/plugins',
433+ )
434+ parts = shlex.split(check_cmd)
435+ for path in search_path:
436+ if os.path.exists(os.path.join(path, parts[0])):
437+ command = os.path.join(path, parts[0])
438+ if len(parts) > 1:
439+ command += " " + " ".join(parts[1:])
440+ return command
441+ log('Check command not found: {}'.format(parts[0]))
442+ return ''
443+
444+ def write(self, nagios_context, hostname, nagios_servicegroups):
445+ nrpe_check_file = '/etc/nagios/nrpe.d/{}.cfg'.format(
446+ self.command)
447+ with open(nrpe_check_file, 'w') as nrpe_check_config:
448+ nrpe_check_config.write("# check {}\n".format(self.shortname))
449+ nrpe_check_config.write("command[{}]={}\n".format(
450+ self.command, self.check_cmd))
451+
452+ if not os.path.exists(NRPE.nagios_exportdir):
453+ log('Not writing service config as {} is not accessible'.format(
454+ NRPE.nagios_exportdir))
455+ else:
456+ self.write_service_config(nagios_context, hostname,
457+ nagios_servicegroups)
458+
459+ def write_service_config(self, nagios_context, hostname,
460+ nagios_servicegroups):
461+ for f in os.listdir(NRPE.nagios_exportdir):
462+ if re.search('.*{}.cfg'.format(self.command), f):
463+ os.remove(os.path.join(NRPE.nagios_exportdir, f))
464+
465+ templ_vars = {
466+ 'nagios_hostname': hostname,
467+ 'nagios_servicegroup': nagios_servicegroups,
468+ 'description': self.description,
469+ 'shortname': self.shortname,
470+ 'command': self.command,
471+ }
472+ nrpe_service_text = Check.service_template.format(**templ_vars)
473+ nrpe_service_file = '{}/service__{}_{}.cfg'.format(
474+ NRPE.nagios_exportdir, hostname, self.command)
475+ with open(nrpe_service_file, 'w') as nrpe_service_config:
476+ nrpe_service_config.write(str(nrpe_service_text))
477+
478+ def run(self):
479+ subprocess.call(self.check_cmd)
480+
481+
482+class Cron(object):
483+ def __init__(self, filename, cron_freq, cron_user, cron_cmd):
484+ super(Cron, self).__init__()
485+ self.filename = filename
486+ self.cron_freq = cron_freq
487+ self.cron_user = cron_user
488+ self.cron_cmd = cron_cmd
489+
490+ def write(self):
491+ cron_filename = '/etc/cron.d/{}'.format(self.filename)
492+ log("cron.write: {}".format(cron_filename))
493+ with open(cron_filename, 'w') as cron_file:
494+ cron_file.write("# cron {}\n".format(self.filename))
495+ cron_file.write("{} {} {}\n".format(self.cron_freq,
496+ self.cron_user,
497+ self.cron_cmd))
498+
499+class NagiosPlugin(object):
500+ def __init__(self, filename):
501+ super(NagiosPlugin, self).__init__()
502+ self.filename = filename
503+
504+ def write(self):
505+ if not os.path.exists(NAGIOS_PLUGINS):
506+ os.makedirs(NAGIOS_PLUGINS)
507+ if os.path.exists(self.filename):
508+ log("NagiosPlugin.write: {} {}".format(self.filename, NAGIOS_PLUGINS))
509+ rsync(self.filename, NAGIOS_PLUGINS)
510+ else:
511+ log("SKIPPED: NagiosPlugin.write: {} {}".format(self.filename, NAGIOS_PLUGINS))
512+
513+
514+class NRPE(object):
515+ nagios_logdir = '/var/log/nagios'
516+ nagios_exportdir = '/var/lib/nagios/export'
517+ nrpe_confdir = '/etc/nagios/nrpe.d'
518+
519+ def __init__(self, hostname=None):
520+ super(NRPE, self).__init__()
521+ self.config = config()
522+ self.nagios_context = self.config['nagios_context']
523+ if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']:
524+ self.nagios_servicegroups = self.config['nagios_servicegroups']
525+ else:
526+ self.nagios_servicegroups = self.nagios_context
527+ self.unit_name = local_unit().replace('/', '-')
528+ if hostname:
529+ self.hostname = hostname
530+ else:
531+ self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
532+ self.checks = []
533+ self.crons = []
534+ self.nagios_plugins = []
535+
536+ def add_check(self, *args, **kwargs):
537+ self.checks.append(Check(*args, **kwargs))
538+
539+ def add_cron(self, *args, **kwargs):
540+ self.crons.append(Cron(*args, **kwargs))
541+
542+ def add_nagios_plugin(self, *args, **kwargs):
543+ self.nagios_plugins.append(NagiosPlugin(*args, **kwargs))
544+
545+ def add_from_config(self, config_key):
546+ saved_config = config()
547+ nrpe_set = NRPESet(saved_config.get(config_key))
548+ log('NRPE.add_from_config: nrpe_set={}'.format(str(nrpe_set)))
549+ if nrpe_set:
550+ for check in nrpe_set.checks:
551+ self.add_check(*check)
552+ for cron in nrpe_set.crons:
553+ self.add_cron(*cron)
554+ for nagios_plugins in nrpe_set.nagios_plugins:
555+ self.add_nagios_plugin(*nagios_plugins)
556+
557+ def write(self):
558+ try:
559+ nagios_uid = pwd.getpwnam('nagios').pw_uid
560+ nagios_gid = grp.getgrnam('nagios').gr_gid
561+ except:
562+ log("Nagios user not set up, nrpe checks not updated")
563+ return
564+
565+ if not os.path.exists(NRPE.nagios_logdir):
566+ os.mkdir(NRPE.nagios_logdir)
567+ os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
568+
569+ nrpe_monitors = {}
570+ monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
571+ for nrpecheck in self.checks:
572+ nrpecheck.write(self.nagios_context, self.hostname,
573+ self.nagios_servicegroups)
574+ nrpe_monitors[nrpecheck.shortname] = {
575+ "command": nrpecheck.command,
576+ }
577+
578+ for cron in self.crons:
579+ cron.write()
580+
581+ for nagios_plugins in self.nagios_plugins:
582+ nagios_plugins.write()
583+
584+ service('restart', 'nagios-nrpe-server')
585+
586+ monitor_ids = relation_ids("local-monitors") + \
587+ relation_ids("nrpe-external-master")
588+ for rid in monitor_ids:
589+ relation_set(relation_id=rid, monitors=yaml.dump(monitors))
590+
591+
592+def get_nagios_hostcontext(relation_name='nrpe-external-master'):
593+ """
594+ Query relation with nrpe subordinate, return the nagios_host_context
595+
596+ :param str relation_name: Name of relation nrpe sub joined to
597+ """
598+ for rel in relations_of_type(relation_name):
599+ if 'nagios_hostname' in rel:
600+ return rel['nagios_host_context']
601+
602+
603+def get_nagios_hostname(relation_name='nrpe-external-master'):
604+ """
605+ Query relation with nrpe subordinate, return the nagios_hostname
606+
607+ :param str relation_name: Name of relation nrpe sub joined to
608+ """
609+ for rel in relations_of_type(relation_name):
610+ if 'nagios_hostname' in rel:
611+ return rel['nagios_hostname']
612+
613+
614+def get_nagios_unit_name(relation_name='nrpe-external-master'):
615+ """
616+ Return the nagios unit name prepended with host_context if needed
617+
618+ :param str relation_name: Name of relation nrpe sub joined to
619+ """
620+ host_context = get_nagios_hostcontext(relation_name)
621+ if host_context:
622+ unit = "%s:%s" % (host_context, local_unit())
623+ else:
624+ unit = local_unit()
625+ return unit
626+
627+
628+def add_init_service_checks(nrpe, services, unit_name):
629+ """
630+ Add checks for each service in list
631+
632+ :param NRPE nrpe: NRPE object to add check to
633+ :param list services: List of services to check
634+ :param str unit_name: Unit name to use in check description
635+ """
636+ for svc in services:
637+ upstart_init = '/etc/init/%s.conf' % svc
638+ sysv_init = '/etc/init.d/%s' % svc
639+ if os.path.exists(upstart_init):
640+ nrpe.add_check(
641+ shortname=svc,
642+ description='process check {%s}' % unit_name,
643+ check_cmd='check_upstart_job %s' % svc
644+ )
645+ elif os.path.exists(sysv_init):
646+ cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
647+ cron_file = ('*/5 * * * * root '
648+ '/usr/local/lib/nagios/plugins/check_exit_status.pl '
649+ '-s /etc/init.d/%s status > '
650+ '/var/lib/nagios/service-check-%s.txt\n' % (svc,
651+ svc)
652+ )
653+ f = open(cronpath, 'w')
654+ f.write(cron_file)
655+ f.close()
656+ nrpe.add_check(
657+ shortname=svc,
658+ description='process check {%s}' % unit_name,
659+ check_cmd='check_status_file.py -f '
660+ '/var/lib/nagios/service-check-%s.txt' % svc,
661+ )
662+
663+
664+def copy_nrpe_checks():
665+ """
666+ Copy the nrpe checks into place
667+
668+ """
669+ nrpe_files_dir = os.path.join(os.getenv('CHARM_DIR'), 'hooks',
670+ 'charmhelpers', 'contrib', 'openstack',
671+ 'files')
672+
673+ if not os.path.exists(NAGIOS_PLUGINS):
674+ os.makedirs(NAGIOS_PLUGINS)
675+ for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")):
676+ if os.path.isfile(fname):
677+ shutil.copy2(fname,
678+ os.path.join(NAGIOS_PLUGINS, os.path.basename(fname)))
679+
680+
681+def add_haproxy_checks(nrpe, unit_name):
682+ """
683+ Add checks for each service in list
684+
685+ :param NRPE nrpe: NRPE object to add check to
686+ :param str unit_name: Unit name to use in check description
687+ """
688+ nrpe.add_check(
689+ shortname='haproxy_servers',
690+ description='Check HAProxy {%s}' % unit_name,
691+ check_cmd='check_haproxy.sh')
692+ nrpe.add_check(
693+ shortname='haproxy_queue',
694+ description='Check HAProxy queue depth {%s}' % unit_name,
695+ check_cmd='check_haproxy_queue_depth.sh')
696+
697+class NRPESet:
698+ checks = []
699+ crons = []
700+ nagios_plugins = []
701+ def __init__(self, yaml_str='null'):
702+ init_values = None
703+ if yaml_str:
704+ init_values = yaml.safe_load(yaml_str)
705+ log('NRPESet: init_values={}'.format(init_values))
706+ if type(init_values) == type({}):
707+ self.checks = init_values.get('checks', [])
708+ self.crons = init_values.get('crons', [])
709+ self.nagios_plugins = init_values.get('nagios_plugins', [])
710+
711+ def add_check(self, shortname, description, check_cmd):
712+ log('NRPESet: add_check({})'.format((shortname, description, check_cmd)))
713+ self.checks.append((shortname, description, check_cmd))
714+
715+ def add_init_service_checks(self, services, unit_name=None):
716+ if not unit_name:
717+ unit_name = local_unit().replace('/', '-')
718+ log('NRPESet: add_init_service_checks({}, {})'.format(services, unit_name))
719+ add_init_service_checks(self, services, unit_name)
720+
721+ def add_cron(self, filename, cron_freq, cron_user, cron_cmd):
722+ log('NRPESet: add_cron({}, ...)'.format(filename))
723+ self.crons.append((filename, cron_freq, cron_user, cron_cmd))
724+
725+ def add_nagios_plugin(self, filename):
726+ log('NRPESet: add_nagios_plugin({})'.format(filename))
727+ self.nagios_plugins.append((filename,))
728+
729+ def __str__(self):
730+ return yaml.safe_dump({'checks': self.checks,
731+ 'crons': self.crons,
732+ 'nagios_plugins': self.nagios_plugins})
733
734=== added file 'hooks/charmhelpers/contrib/charmsupport/volumes.py'
735--- hooks/charmhelpers/contrib/charmsupport/volumes.py 1970-01-01 00:00:00 +0000
736+++ hooks/charmhelpers/contrib/charmsupport/volumes.py 2016-01-07 21:40:19 +0000
737@@ -0,0 +1,175 @@
738+# Copyright 2014-2015 Canonical Limited.
739+#
740+# This file is part of charm-helpers.
741+#
742+# charm-helpers is free software: you can redistribute it and/or modify
743+# it under the terms of the GNU Lesser General Public License version 3 as
744+# published by the Free Software Foundation.
745+#
746+# charm-helpers is distributed in the hope that it will be useful,
747+# but WITHOUT ANY WARRANTY; without even the implied warranty of
748+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
749+# GNU Lesser General Public License for more details.
750+#
751+# You should have received a copy of the GNU Lesser General Public License
752+# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
753+
754+'''
755+Functions for managing volumes in juju units. One volume is supported per unit.
756+Subordinates may have their own storage, provided it is on its own partition.
757+
758+Configuration stanzas::
759+
760+ volume-ephemeral:
761+ type: boolean
762+ default: true
763+ description: >
764+ If false, a volume is mounted as sepecified in "volume-map"
765+ If true, ephemeral storage will be used, meaning that log data
766+ will only exist as long as the machine. YOU HAVE BEEN WARNED.
767+ volume-map:
768+ type: string
769+ default: {}
770+ description: >
771+ YAML map of units to device names, e.g:
772+ "{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }"
773+ Service units will raise a configure-error if volume-ephemeral
774+ is 'true' and no volume-map value is set. Use 'juju set' to set a
775+ value and 'juju resolved' to complete configuration.
776+
777+Usage::
778+
779+ from charmsupport.volumes import configure_volume, VolumeConfigurationError
780+ from charmsupport.hookenv import log, ERROR
781+ def post_mount_hook():
782+ stop_service('myservice')
783+ def post_mount_hook():
784+ start_service('myservice')
785+
786+ if __name__ == '__main__':
787+ try:
788+ configure_volume(before_change=pre_mount_hook,
789+ after_change=post_mount_hook)
790+ except VolumeConfigurationError:
791+ log('Storage could not be configured', ERROR)
792+
793+'''
794+
795+# XXX: Known limitations
796+# - fstab is neither consulted nor updated
797+
798+import os
799+from charmhelpers.core import hookenv
800+from charmhelpers.core import host
801+import yaml
802+
803+
804+MOUNT_BASE = '/srv/juju/volumes'
805+
806+
807+class VolumeConfigurationError(Exception):
808+ '''Volume configuration data is missing or invalid'''
809+ pass
810+
811+
812+def get_config():
813+ '''Gather and sanity-check volume configuration data'''
814+ volume_config = {}
815+ config = hookenv.config()
816+
817+ errors = False
818+
819+ if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'):
820+ volume_config['ephemeral'] = True
821+ else:
822+ volume_config['ephemeral'] = False
823+
824+ try:
825+ volume_map = yaml.safe_load(config.get('volume-map', '{}'))
826+ except yaml.YAMLError as e:
827+ hookenv.log("Error parsing YAML volume-map: {}".format(e),
828+ hookenv.ERROR)
829+ errors = True
830+ if volume_map is None:
831+ # probably an empty string
832+ volume_map = {}
833+ elif not isinstance(volume_map, dict):
834+ hookenv.log("Volume-map should be a dictionary, not {}".format(
835+ type(volume_map)))
836+ errors = True
837+
838+ volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME'])
839+ if volume_config['device'] and volume_config['ephemeral']:
840+ # asked for ephemeral storage but also defined a volume ID
841+ hookenv.log('A volume is defined for this unit, but ephemeral '
842+ 'storage was requested', hookenv.ERROR)
843+ errors = True
844+ elif not volume_config['device'] and not volume_config['ephemeral']:
845+ # asked for permanent storage but did not define volume ID
846+ hookenv.log('Ephemeral storage was requested, but there is no volume '
847+ 'defined for this unit.', hookenv.ERROR)
848+ errors = True
849+
850+ unit_mount_name = hookenv.local_unit().replace('/', '-')
851+ volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name)
852+
853+ if errors:
854+ return None
855+ return volume_config
856+
857+
858+def mount_volume(config):
859+ if os.path.exists(config['mountpoint']):
860+ if not os.path.isdir(config['mountpoint']):
861+ hookenv.log('Not a directory: {}'.format(config['mountpoint']))
862+ raise VolumeConfigurationError()
863+ else:
864+ host.mkdir(config['mountpoint'])
865+ if os.path.ismount(config['mountpoint']):
866+ unmount_volume(config)
867+ if not host.mount(config['device'], config['mountpoint'], persist=True):
868+ raise VolumeConfigurationError()
869+
870+
871+def unmount_volume(config):
872+ if os.path.ismount(config['mountpoint']):
873+ if not host.umount(config['mountpoint'], persist=True):
874+ raise VolumeConfigurationError()
875+
876+
877+def managed_mounts():
878+ '''List of all mounted managed volumes'''
879+ return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts())
880+
881+
882+def configure_volume(before_change=lambda: None, after_change=lambda: None):
883+ '''Set up storage (or don't) according to the charm's volume configuration.
884+ Returns the mount point or "ephemeral". before_change and after_change
885+ are optional functions to be called if the volume configuration changes.
886+ '''
887+
888+ config = get_config()
889+ if not config:
890+ hookenv.log('Failed to read volume configuration', hookenv.CRITICAL)
891+ raise VolumeConfigurationError()
892+
893+ if config['ephemeral']:
894+ if os.path.ismount(config['mountpoint']):
895+ before_change()
896+ unmount_volume(config)
897+ after_change()
898+ return 'ephemeral'
899+ else:
900+ # persistent storage
901+ if os.path.ismount(config['mountpoint']):
902+ mounts = dict(managed_mounts())
903+ if mounts.get(config['mountpoint']) != config['device']:
904+ before_change()
905+ unmount_volume(config)
906+ mount_volume(config)
907+ after_change()
908+ else:
909+ before_change()
910+ mount_volume(config)
911+ after_change()
912+ return config['mountpoint']
913
914=== modified file 'hooks/neutron_ovs_hooks.py'
915--- hooks/neutron_ovs_hooks.py 2015-11-12 09:33:27 +0000
916+++ hooks/neutron_ovs_hooks.py 2016-01-07 21:40:19 +0000
917@@ -1,5 +1,6 @@
918 #!/usr/bin/python
919
920+import os
921 import sys
922
923 from copy import deepcopy
924@@ -16,6 +17,7 @@
925 log,
926 relation_set,
927 relation_ids,
928+ local_unit,
929 )
930
931 from charmhelpers.core.host import (
932@@ -45,6 +47,7 @@
933 REQUIRED_INTERFACES,
934 check_optional_relations,
935 )
936+from charmhelpers.contrib.charmsupport import nrpe
937
938 hooks = Hooks()
939 CONFIGS = register_configs()
940@@ -72,6 +75,10 @@
941 for rid in relation_ids('neutron-plugin'):
942 neutron_plugin_joined(relation_id=rid)
943
944+ if config_value_changed('enable-nrpe-checks'):
945+ for rid in relation_ids('neutron-plugin'):
946+ neutron_plugin_joined(rid)
947+
948
949 @hooks.hook('neutron-plugin-api-relation-changed')
950 @restart_on_change(restart_map())
951@@ -87,6 +94,37 @@
952 neutron_plugin_joined(relation_id=rid)
953
954
955+def neutron_plugin_nrpe_checks():
956+ log('neutron_plugin_nrpe_checks: enable-nrpe-checks={}'.format(
957+ config('enable-nrpe-checks')))
958+ if not config('enable-nrpe-checks'):
959+ return ''
960+ current_unit = local_unit().replace('/', '-')
961+ nrpe_set = nrpe.NRPESet()
962+ nrpe_set.add_nagios_plugin(
963+ os.path.join(os.getenv('CHARM_DIR'),
964+ 'files', 'nrpe-external-master',
965+ 'neutron-check-tun_ids.py'))
966+ nrpe_set.add_cron(
967+ filename='nagios-check-tun_ids',
968+ cron_freq='*/5 * * * *',
969+ cron_user='root',
970+ cron_cmd='{}/neutron-check-tun_ids.py > '
971+ '/var/lib/nagios/neutron-check-tun_ids.txt'
972+ ''.format(nrpe.NAGIOS_PLUGINS),
973+ )
974+ nrpe_set.add_check(
975+ shortname='neutron_tun_ids',
976+ description='Check neutron ovs tun_ids {%s}' % current_unit,
977+ check_cmd='check_status_file.py -f '
978+ '/var/lib/nagios/neutron-check-tun_ids.txt'
979+ )
980+ nrpe_set.add_init_service_checks(
981+ ['openvswitch-switch', 'neutron-plugin-openvswitch-agent'])
982+ log('neutron_plugin_nrpe_checks: nrpe_set={}'.format(str(nrpe_set)))
983+ return str(nrpe_set)
984+
985+
986 @hooks.hook('neutron-plugin-relation-joined')
987 def neutron_plugin_joined(relation_id=None):
988 if enable_local_dhcp():
989@@ -100,8 +138,10 @@
990 pkgs.extend(METADATA_PACKAGES)
991 purge_packages(pkgs)
992 secret = get_shared_secret() if enable_nova_metadata() else None
993+ nrpe_checks = neutron_plugin_nrpe_checks()
994 rel_data = {
995 'metadata-shared-secret': secret,
996+ 'nrpe-checks': nrpe_checks,
997 }
998 relation_set(relation_id=relation_id, **rel_data)
999

Subscribers

People subscribed via source and target branches