Merge lp:~abentley/charms/precise/charmworld/nagios into lp:~juju-jitsu/charms/precise/charmworld/trunk

Proposed by Aaron Bentley
Status: Merged
Merged at revision: 36
Proposed branch: lp:~abentley/charms/precise/charmworld/nagios
Merge into: lp:~juju-jitsu/charms/precise/charmworld/trunk
Diff against target: 550 lines (+375/-84)
13 files modified
charmsupport/hookenv.py (+150/-0)
charmsupport/nrpe.py (+169/-0)
config.yaml (+10/-0)
files/nrpe-external-master/check_ingest.sh (+5/-0)
hooks/config-changed (+3/-1)
hooks/install (+1/-1)
hooks/nrpe-external-master-relation-changed (+2/-0)
hooks/upgrade-charm (+3/-0)
metadata.yaml (+3/-0)
revision (+1/-1)
run-write-errors (+16/-0)
shhh.py (+0/-81)
update-nrpe.py (+12/-0)
To merge this branch: bzr merge lp:~abentley/charms/precise/charmworld/nagios
Reviewer Review Type Date Requested Status
Juju-Jitsu Hackers Pending
Review via email: mp+145970@code.launchpad.net

Description of the change

Implement nagios support

Change the way errors are reported so that any errors running ingest cause a
file, ~ubuntu/var/ingest-errors, to be created. The presence of this file is
used for a nagios check.

The files in charmsupport are copied almost verbatim from lp:charmsupport r27.
(Exception, the change in lp:~pjdc/charmsupport/hookenv-vs-nrpe was applied.)

Ideally we will switch to a packaged version of charmsupport, but not if its
nagios support is broken!

The shhh.py file is deleted because it is no longer needed.

https://codereview.appspot.com/7241058/

To post a comment you must log in.
Revision history for this message
Richard Harding (rharding) wrote :

lgtm but I'd like to move the new files into a scripts directory. I've
started one in my branch as well. Things like the run-write-errors and
update-nrpe.py could find a home there and help prevent polluting the
root of the file tree.

https://codereview.appspot.com/7241058/

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== added directory 'charmsupport'
=== added file 'charmsupport/__init__.py'
=== added file 'charmsupport/hookenv.py'
--- charmsupport/hookenv.py 1970-01-01 00:00:00 +0000
+++ charmsupport/hookenv.py 2013-01-31 20:16:33 +0000
@@ -0,0 +1,150 @@
1"Interactions with the Juju environment"
2# source: 27:lp:charmsupport
3# Copyright 2012 Canonical Ltd.
4#
5# Authors:
6# Matthew Wedgwood <matthew.wedgwood@canonical.com>
7
8import os
9import json
10import yaml
11import subprocess
12
13CRITICAL = "CRITICAL"
14ERROR = "ERROR"
15WARNING = "WARNING"
16INFO = "INFO"
17DEBUG = "DEBUG"
18def log(message, level=DEBUG):
19 "Write a message to the juju log"
20 subprocess.call( [ 'juju-log', '-l', level, message ] )
21
22class Serializable(object):
23 "Wrapper, an object that can be serialized to yaml or json"
24 def __init__(self, obj):
25 # wrap the object
26 super(Serializable, self).__init__()
27 self._wrapped_obj = obj
28
29 def __getattr__(self, attr):
30 # see if this object has attr
31 if attr in self.__dict__:
32 return getattr(self, attr)
33 # proxy to the wrapped object
34 return self[attr]
35
36 def __getitem__(self, key):
37 return self._wrapped_obj[key]
38
39 def json(self):
40 "Serialize the object to json"
41 return json.dumps(self._wrapped_obj)
42
43 def yaml(self):
44 "Serialize the object to yaml"
45 return yaml.dump(self._wrapped_obj)
46
47def execution_environment():
48 """A convenient bundling of the current execution context"""
49 context = {}
50 context['conf'] = config()
51 context['unit'] = local_unit()
52 context['rel'] = relations_of_type()
53 context['env'] = os.environ
54 return context
55
56def in_relation_hook():
57 "Determine whether we're running in a relation hook"
58 return os.environ.has_key('JUJU_RELATION')
59
60def relation_type():
61 "The scope for the current relation hook"
62 return os.environ['JUJU_RELATION']
63def relation_id():
64 "The relation ID for the current relation hook"
65 return os.environ['JUJU_RELATION_ID']
66def local_unit():
67 "Local unit ID"
68 return os.environ['JUJU_UNIT_NAME']
69def remote_unit():
70 "The remote unit for the current relation hook"
71 return os.environ['JUJU_REMOTE_UNIT']
72
73def config(scope=None):
74 "Juju charm configuration"
75 config_cmd_line = ['config-get']
76 if scope is not None:
77 config_cmd_line.append(scope)
78 config_cmd_line.append('--format=json')
79 try:
80 config_data = json.loads(subprocess.check_output(config_cmd_line))
81 except (ValueError, OSError, subprocess.CalledProcessError) as err:
82 log(str(err), level=ERROR)
83 raise err
84 return Serializable(config_data)
85
86def relation_ids(reltype=None):
87 "A list of relation_ids"
88 reltype = reltype or relation_type()
89 relids = []
90 relid_cmd_line = ['relation-ids', '--format=json', reltype]
91 relids.extend(json.loads(subprocess.check_output(relid_cmd_line)))
92 return relids
93
94def related_units(relid=None):
95 "A list of related units"
96 relid = relid or relation_id()
97 units_cmd_line = ['relation-list', '--format=json', '-r', relid]
98 units = json.loads(subprocess.check_output(units_cmd_line))
99 return units
100
101def relation_for_unit(unit=None):
102 "Get the json represenation of a unit's relation"
103 unit = unit or remote_unit()
104 relation_cmd_line = ['relation-get', '--format=json', '-', unit]
105 try:
106 relation = json.loads(subprocess.check_output(relation_cmd_line))
107 except (ValueError, OSError, subprocess.CalledProcessError), err:
108 log(str(err), level=ERROR)
109 raise err
110 for key in relation:
111 if key.endswith('-list'):
112 relation[key] = relation[key].split()
113 relation['__unit__'] = unit
114 return Serializable(relation)
115
116def relations_for_id(relid=None):
117 "Get relations of a specific relation ID"
118 relation_data = []
119 relid = relid or relation_ids()
120 for unit in related_units(relid):
121 unit_data = relation_for_unit(unit)
122 unit_data['__relid__'] = relid
123 relation_data.append(unit_data)
124 return relation_data
125
126def relations_of_type(reltype=None):
127 "Get relations of a specific type"
128 relation_data = []
129 if in_relation_hook():
130 reltype = reltype or relation_type()
131 for relid in relation_ids(reltype):
132 for relation in relations_for_id(relid):
133 relation['__relid__'] = relid
134 relation_data.append(relation)
135 return relation_data
136
137class UnregisteredHookError(Exception): pass
138
139class Hooks(object):
140 def __init__(self):
141 super(Hooks, self).__init__()
142 self._hooks = {}
143 def register(self, name, function):
144 self._hooks[name] = function
145 def execute(self, args):
146 hook_name = os.path.basename(args[0])
147 if hook_name in self._hooks:
148 self._hooks[hook_name]()
149 else:
150 raise UnregisteredHookError(hook_name)
0151
=== added file 'charmsupport/nrpe.py'
--- charmsupport/nrpe.py 1970-01-01 00:00:00 +0000
+++ charmsupport/nrpe.py 2013-01-31 20:16:33 +0000
@@ -0,0 +1,169 @@
1"""Compatibility with the nrpe-external-master charm"""
2# source: 27:lp:charmsupport
3# Copyright 2012 Canonical Ltd.
4#
5# Authors:
6# Matthew Wedgwood <matthew.wedgwood@canonical.com>
7
8import subprocess
9import pwd
10import grp
11import os
12import re
13import shlex
14
15from hookenv import config, local_unit
16
17# This module adds compatibility with the nrpe_external_master
18# subordinate charm. To use it in your charm:
19#
20# 1. Update metadata.yaml
21#
22# provides:
23# (...)
24# nrpe-external-master:
25# interface: nrpe-external-master
26# scope: container
27#
28# 2. Add the following to config.yaml
29#
30# nagios_context:
31# default: "juju"
32# type: string
33# description: |
34# Used by the nrpe-external-master subordinate charm.
35# A string that will be prepended to instance name to set the host name
36# in nagios. So for instance the hostname would be something like:
37# juju-myservice-0
38# If you're running multiple environments with the same services in them
39# this allows you to differentiate between them.
40#
41# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
42#
43# 4. Update your hooks.py with something like this:
44#
45# import nrpe
46# (...)
47# def update_nrpe_config():
48# nrpe_compat = NRPE("myservice")
49# nrpe_compat.add_check(
50# shortname = "myservice",
51# description = "Check MyService",
52# check_cmd = "check_http -w 2 -c 10 http://localhost"
53# )
54# nrpe_compat.add_check(
55# "myservice_other",
56# "Check for widget failures",
57# check_cmd = "/srv/myapp/scripts/widget_check"
58# )
59# nrpe_compat.write()
60#
61# def config_changed():
62# (...)
63# update_nrpe_config()
64# def nrpe_external_master_relation_changed():
65# update_nrpe_config()
66#
67# 5. ln -s hooks.py nrpe-external-master-relation-changed
68
69class CheckException(Exception): pass
70class Check(object):
71 shortname_re = '[A-Za-z0-9-_]*'
72 service_template = """
73#---------------------------------------------------
74# This file is Juju managed
75#---------------------------------------------------
76define service {{
77 use active-service
78 host_name {nagios_hostname}
79 service_description {nagios_hostname}[{shortname}] {description}
80 check_command check_nrpe!check_{shortname}
81 servicegroups {nagios_servicegroup}
82}}
83"""
84 def __init__(self, shortname, description, check_cmd):
85 super(Check, self).__init__()
86 # XXX: could be better to calculate this from the service name
87 if not re.match(self.shortname_re, shortname):
88 raise CheckException("shortname must match {}".format(Check.shortname_re))
89 self.shortname = shortname
90 # Note: a set of invalid characters is defined by the Nagios server config
91 # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
92 self.description = description
93 self.check_cmd = self._locate_cmd(check_cmd)
94
95 def _locate_cmd(self, check_cmd):
96 search_path = (
97 '/',
98 os.path.join(os.environ['CHARM_DIR'], 'files/nrpe-external-master'),
99 '/usr/lib/nagios/plugins',
100 )
101 command = shlex.split(check_cmd)
102 for path in search_path:
103 if os.path.exists(os.path.join(path,command[0])):
104 return os.path.join(path, command[0]) + " " + " ".join(command[1:])
105 subprocess.call(['juju-log', 'Check command not found: {}'.format(command[0])])
106 return ''
107
108 def write(self, nagios_context, hostname):
109 for f in os.listdir(NRPE.nagios_exportdir):
110 if re.search('.*check_{}.cfg'.format(self.shortname), f):
111 os.remove(os.path.join(NRPE.nagios_exportdir, f))
112
113 templ_vars = {
114 'nagios_hostname': hostname,
115 'nagios_servicegroup': nagios_context,
116 'description': self.description,
117 'shortname': self.shortname,
118 }
119 nrpe_service_text = Check.service_template.format(**templ_vars)
120 nrpe_service_file = '{}/service__{}_check_{}.cfg'.format(
121 NRPE.nagios_exportdir, hostname, self.shortname)
122 with open(nrpe_service_file, 'w') as nrpe_service_config:
123 nrpe_service_config.write(str(nrpe_service_text))
124
125 nrpe_check_file = '/etc/nagios/nrpe.d/check_{}.cfg'.format(self.shortname)
126 with open(nrpe_check_file, 'w') as nrpe_check_config:
127 nrpe_check_config.write("# check {}\n".format(self.shortname))
128 nrpe_check_config.write("command[check_{}]={}\n".format(
129 self.shortname, self.check_cmd))
130
131 def run(self):
132 subprocess.call(self.check_cmd)
133
134class NRPE(object):
135 nagios_logdir = '/var/log/nagios'
136 nagios_exportdir = '/var/lib/nagios/export'
137 nrpe_confdir = '/etc/nagios/nrpe.d'
138 def __init__(self):
139 super(NRPE, self).__init__()
140 self.config = config()
141 self.nagios_context = self.config['nagios_context']
142 self.unit_name = local_unit().replace('/', '-')
143 self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
144 self.checks = []
145
146 def add_check(self, *args, **kwargs):
147 self.checks.append( Check(*args, **kwargs) )
148
149 def write(self):
150 try:
151 nagios_uid = pwd.getpwnam('nagios').pw_uid
152 nagios_gid = grp.getgrnam('nagios').gr_gid
153 except:
154 subprocess.call(['juju-log', "Nagios user not set up, nrpe checks not updated"])
155 return
156
157 if not os.path.exists(NRPE.nagios_exportdir):
158 subprocess.call(['juju-log', 'Exiting as {} is not accessible'.format(NRPE.nagios_exportdir)])
159 return
160
161 if not os.path.exists(NRPE.nagios_logdir):
162 os.mkdir(NRPE.nagios_logdir)
163 os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
164
165 for nrpecheck in self.checks:
166 nrpecheck.write(self.nagios_context, self.hostname)
167
168 if os.path.isfile('/etc/init.d/nagios-nrpe-server'):
169 subprocess.call(['service', 'nagios-nrpe-server', 'reload'])
0170
=== modified file 'config.yaml'
--- config.yaml 2013-01-23 21:21:06 +0000
+++ config.yaml 2013-01-31 20:16:33 +0000
@@ -15,3 +15,13 @@
15 type: string15 type: string
16 default: ""16 default: ""
17 description: "Address to mail errors to."17 description: "Address to mail errors to."
18 nagios_context:
19 default: "juju"
20 type: string
21 description: |
22 Used by the nrpe-external-master subordinate charm.
23 A string that will be prepended to instance name to set the host name
24 in nagios. So for instance the hostname would be something like:
25 juju-myservice-0
26 If you're running multiple environments with the same services in them
27 this allows you to differentiate between them.
1828
=== added directory 'files'
=== added directory 'files/nrpe-external-master'
=== added file 'files/nrpe-external-master/check_ingest.sh'
--- files/nrpe-external-master/check_ingest.sh 1970-01-01 00:00:00 +0000
+++ files/nrpe-external-master/check_ingest.sh 2013-01-31 20:16:33 +0000
@@ -0,0 +1,5 @@
1#!/bin/sh
2if [ -f /home/ubuntu/var/ingest-errors ]; then
3 echo Charmworld ingest failing
4 exit 1
5fi
06
=== modified file 'hooks/config-changed'
--- hooks/config-changed 2013-01-29 15:45:19 +0000
+++ hooks/config-changed 2013-01-31 20:16:33 +0000
@@ -79,9 +79,11 @@
79PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin79PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
80$mailto80$mailto
8181
82*/$interval * * * * ubuntu HOME=/home/ubuntu INI=$CONFIG_FILE ~ubuntu/shhh.py $project_dir/scripts/ingest82*/$interval * * * * ubuntu HOME=/home/ubuntu INI=$CONFIG_FILE ~ubuntu/run-write-errors /home/ubuntu/var/ingest-errors $project_dir/scripts/ingest
83@daily ubuntu HOME=/home/ubuntu INI=$project_dir/production.ini $project_dir/bin/python $project_dir/charmworld/jobs/cstat.py83@daily ubuntu HOME=/home/ubuntu INI=$project_dir/production.ini $project_dir/bin/python $project_dir/charmworld/jobs/cstat.py
8484
85EOF85EOF
8686
87fi87fi
88# nagios_context may have changed.
89./update-nrpe.py
8890
=== modified file 'hooks/install'
--- hooks/install 2013-01-24 17:34:53 +0000
+++ hooks/install 2013-01-31 20:16:33 +0000
@@ -17,4 +17,4 @@
17postfix reload17postfix reload
18mkdir -p ~ubuntu/var/charms18mkdir -p ~ubuntu/var/charms
19chown -R ubuntu.ubuntu ~ubuntu/var19chown -R ubuntu.ubuntu ~ubuntu/var
20cp shhh.py ~ubuntu20install -o ubuntu -g ubuntu run-write-errors ~ubuntu
2121
=== added file 'hooks/nrpe-external-master-relation-changed'
--- hooks/nrpe-external-master-relation-changed 1970-01-01 00:00:00 +0000
+++ hooks/nrpe-external-master-relation-changed 2013-01-31 20:16:33 +0000
@@ -0,0 +1,2 @@
1#!/bin/sh
2./update-nrpe.py
03
=== modified file 'hooks/upgrade-charm'
--- hooks/upgrade-charm 2013-01-24 17:34:53 +0000
+++ hooks/upgrade-charm 2013-01-31 20:16:33 +0000
@@ -10,3 +10,6 @@
10fi10fi
11hooks/config-changed11hooks/config-changed
12hooks/restart12hooks/restart
13if [ -f $HOME/shhh.py ]; then
14 rm $HOME/shhh.py
15fi
1316
=== modified file 'metadata.yaml'
--- metadata.yaml 2012-08-20 19:57:46 +0000
+++ metadata.yaml 2013-01-31 20:16:33 +0000
@@ -6,6 +6,9 @@
6provides:6provides:
7 website:7 website:
8 interface: http8 interface: http
9 nrpe-external-master:
10 interface: nrpe-external-master
11 scope: container
9requires:12requires:
10 database:13 database:
11 interface: mongodb14 interface: mongodb
1215
=== modified file 'revision'
--- revision 2013-01-29 15:32:47 +0000
+++ revision 2013-01-31 20:16:33 +0000
@@ -1,1 +1,1 @@
118119
22
=== added file 'run-write-errors'
--- run-write-errors 1970-01-01 00:00:00 +0000
+++ run-write-errors 2013-01-31 20:16:33 +0000
@@ -0,0 +1,16 @@
1#!/bin/sh
2# Run a command, and if it exits with non-zero status, leave its output behind
3# in the file specified. Any existing file will be removed.
4error_file=$1
5shift
6"$@" > $error_file.tmp 2>&1
7status=$?
8if [ $status -ne 0 ]; then
9 mv $error_file.tmp $error_file
10else
11 rm $error_file.tmp
12 if [ -f $error_file ]; then
13 rm $error_file
14 fi
15fi
16exit $status
017
=== removed file 'shhh.py'
--- shhh.py 2013-01-24 21:00:17 +0000
+++ shhh.py 1970-01-01 00:00:00 +0000
@@ -1,81 +0,0 @@
1#! /usr/bin/python -S
2#
3# Copyright 2009 Canonical Ltd. This software is licensed under the
4# GNU Affero General Public License version 3 (see the file LICENSE).
5
6"""
7Run a command and suppress output unless it returns a non-zero exit status
8"""
9
10__metaclass__ = type
11
12from subprocess import (
13 PIPE,
14 Popen,
15 )
16import sys
17
18
19def shhh(cmd):
20 r"""Run a command and suppress output unless it returns a non-zero exitcode
21
22 If output is generated, stderr will be output before stdout, so output
23 order may be messed up if the command attempts to control order by
24 flushing stdout at points or setting it to unbuffered.
25
26
27 To test, we invoke both this method and this script with some commands
28 and examine the output and exitvalue
29
30 >>> python = sys.executable
31
32 >>> def shhh_script(cmd):
33 ... from subprocess import Popen, PIPE
34 ... script = '%s %s' % (python, __file__)
35 ... cmd = "%s '%s'" % (script, cmd)
36 ... p = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
37 ... (out, err) = p.communicate()
38 ... return (out, err, p.returncode)
39
40 >>> cmd = '''%s -c "import sys; sys.exit(%d)"''' % (python, 0)
41 >>> shhh(cmd)
42 0
43 >>> shhh_script(cmd)
44 ('', '', 0)
45
46 >>> cmd = '''%s -c "import sys; sys.exit(%d)"''' % (python, 1)
47 >>> shhh(cmd)
48 1
49 >>> shhh_script(cmd)
50 ('', '', 1)
51
52 >>> cmd = '''%s -c "import sys; print 666; sys.exit(%d)"''' % (
53 ... python, 42)
54 >>> shhh(cmd)
55 666
56 42
57 >>> shhh_script(cmd)
58 ('666\n', '', 42)
59
60 >>> cmd = (
61 ... '''%s -c "import sys; print 666; '''
62 ... '''print >> sys.stderr, 667; sys.exit(42)"''' % python
63 ... )
64 >>> shhh_script(cmd)
65 ('666\n', '667\n', 42)
66 """
67
68 process = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
69 (out, err) = process.communicate()
70 if process.returncode == 0:
71 return 0
72 else:
73 sys.stderr.write(err)
74 sys.stdout.write(out)
75 return process.returncode
76
77
78if __name__ == '__main__':
79 cmd = ' '.join(sys.argv[1:])
80 sys.exit(shhh(cmd))
81
820
=== added file 'update-nrpe.py'
--- update-nrpe.py 1970-01-01 00:00:00 +0000
+++ update-nrpe.py 2013-01-31 20:16:33 +0000
@@ -0,0 +1,12 @@
1#!/usr/bin/env python
2from charmsupport import nrpe
3
4
5def update_nrpe_config():
6 nrpe_compat = nrpe.NRPE()
7 nrpe_compat.add_check('ingest', 'Check ingest runs', 'check_ingest.sh')
8 nrpe_compat.write()
9
10
11if __name__ == '__main__':
12 update_nrpe_config()

Subscribers

People subscribed via source and target branches

to all changes: