Merge lp:~abentley/charms/precise/charmworld/nagios into lp:~juju-jitsu/charms/precise/charmworld/trunk

Proposed by Aaron Bentley
Status: Merged
Merged at revision: 36
Proposed branch: lp:~abentley/charms/precise/charmworld/nagios
Merge into: lp:~juju-jitsu/charms/precise/charmworld/trunk
Diff against target: 550 lines (+375/-84)
13 files modified
charmsupport/hookenv.py (+150/-0)
charmsupport/nrpe.py (+169/-0)
config.yaml (+10/-0)
files/nrpe-external-master/check_ingest.sh (+5/-0)
hooks/config-changed (+3/-1)
hooks/install (+1/-1)
hooks/nrpe-external-master-relation-changed (+2/-0)
hooks/upgrade-charm (+3/-0)
metadata.yaml (+3/-0)
revision (+1/-1)
run-write-errors (+16/-0)
shhh.py (+0/-81)
update-nrpe.py (+12/-0)
To merge this branch: bzr merge lp:~abentley/charms/precise/charmworld/nagios
Reviewer Review Type Date Requested Status
Juju-Jitsu Hackers Pending
Review via email: mp+145970@code.launchpad.net

Description of the change

Implement nagios support

Change the way errors are reported so that any errors running ingest cause a
file, ~ubuntu/var/ingest-errors, to be created. The presence of this file is
used for a nagios check.

The files in charmsupport are copied almost verbatim from lp:charmsupport r27.
(Exception, the change in lp:~pjdc/charmsupport/hookenv-vs-nrpe was applied.)

Ideally we will switch to a packaged version of charmsupport, but not if its
nagios support is broken!

The shhh.py file is deleted because it is no longer needed.

https://codereview.appspot.com/7241058/

To post a comment you must log in.
Revision history for this message
Richard Harding (rharding) wrote :

lgtm but I'd like to move the new files into a scripts directory. I've
started one in my branch as well. Things like the run-write-errors and
update-nrpe.py could find a home there and help prevent polluting the
root of the file tree.

https://codereview.appspot.com/7241058/

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added directory 'charmsupport'
2=== added file 'charmsupport/__init__.py'
3=== added file 'charmsupport/hookenv.py'
4--- charmsupport/hookenv.py 1970-01-01 00:00:00 +0000
5+++ charmsupport/hookenv.py 2013-01-31 20:16:33 +0000
6@@ -0,0 +1,150 @@
7+"Interactions with the Juju environment"
8+# source: 27:lp:charmsupport
9+# Copyright 2012 Canonical Ltd.
10+#
11+# Authors:
12+# Matthew Wedgwood <matthew.wedgwood@canonical.com>
13+
14+import os
15+import json
16+import yaml
17+import subprocess
18+
19+CRITICAL = "CRITICAL"
20+ERROR = "ERROR"
21+WARNING = "WARNING"
22+INFO = "INFO"
23+DEBUG = "DEBUG"
24+def log(message, level=DEBUG):
25+ "Write a message to the juju log"
26+ subprocess.call( [ 'juju-log', '-l', level, message ] )
27+
28+class Serializable(object):
29+ "Wrapper, an object that can be serialized to yaml or json"
30+ def __init__(self, obj):
31+ # wrap the object
32+ super(Serializable, self).__init__()
33+ self._wrapped_obj = obj
34+
35+ def __getattr__(self, attr):
36+ # see if this object has attr
37+ if attr in self.__dict__:
38+ return getattr(self, attr)
39+ # proxy to the wrapped object
40+ return self[attr]
41+
42+ def __getitem__(self, key):
43+ return self._wrapped_obj[key]
44+
45+ def json(self):
46+ "Serialize the object to json"
47+ return json.dumps(self._wrapped_obj)
48+
49+ def yaml(self):
50+ "Serialize the object to yaml"
51+ return yaml.dump(self._wrapped_obj)
52+
53+def execution_environment():
54+ """A convenient bundling of the current execution context"""
55+ context = {}
56+ context['conf'] = config()
57+ context['unit'] = local_unit()
58+ context['rel'] = relations_of_type()
59+ context['env'] = os.environ
60+ return context
61+
62+def in_relation_hook():
63+ "Determine whether we're running in a relation hook"
64+ return os.environ.has_key('JUJU_RELATION')
65+
66+def relation_type():
67+ "The scope for the current relation hook"
68+ return os.environ['JUJU_RELATION']
69+def relation_id():
70+ "The relation ID for the current relation hook"
71+ return os.environ['JUJU_RELATION_ID']
72+def local_unit():
73+ "Local unit ID"
74+ return os.environ['JUJU_UNIT_NAME']
75+def remote_unit():
76+ "The remote unit for the current relation hook"
77+ return os.environ['JUJU_REMOTE_UNIT']
78+
79+def config(scope=None):
80+ "Juju charm configuration"
81+ config_cmd_line = ['config-get']
82+ if scope is not None:
83+ config_cmd_line.append(scope)
84+ config_cmd_line.append('--format=json')
85+ try:
86+ config_data = json.loads(subprocess.check_output(config_cmd_line))
87+ except (ValueError, OSError, subprocess.CalledProcessError) as err:
88+ log(str(err), level=ERROR)
89+ raise err
90+ return Serializable(config_data)
91+
92+def relation_ids(reltype=None):
93+ "A list of relation_ids"
94+ reltype = reltype or relation_type()
95+ relids = []
96+ relid_cmd_line = ['relation-ids', '--format=json', reltype]
97+ relids.extend(json.loads(subprocess.check_output(relid_cmd_line)))
98+ return relids
99+
100+def related_units(relid=None):
101+ "A list of related units"
102+ relid = relid or relation_id()
103+ units_cmd_line = ['relation-list', '--format=json', '-r', relid]
104+ units = json.loads(subprocess.check_output(units_cmd_line))
105+ return units
106+
107+def relation_for_unit(unit=None):
108+ "Get the json represenation of a unit's relation"
109+ unit = unit or remote_unit()
110+ relation_cmd_line = ['relation-get', '--format=json', '-', unit]
111+ try:
112+ relation = json.loads(subprocess.check_output(relation_cmd_line))
113+ except (ValueError, OSError, subprocess.CalledProcessError), err:
114+ log(str(err), level=ERROR)
115+ raise err
116+ for key in relation:
117+ if key.endswith('-list'):
118+ relation[key] = relation[key].split()
119+ relation['__unit__'] = unit
120+ return Serializable(relation)
121+
122+def relations_for_id(relid=None):
123+ "Get relations of a specific relation ID"
124+ relation_data = []
125+ relid = relid or relation_ids()
126+ for unit in related_units(relid):
127+ unit_data = relation_for_unit(unit)
128+ unit_data['__relid__'] = relid
129+ relation_data.append(unit_data)
130+ return relation_data
131+
132+def relations_of_type(reltype=None):
133+ "Get relations of a specific type"
134+ relation_data = []
135+ if in_relation_hook():
136+ reltype = reltype or relation_type()
137+ for relid in relation_ids(reltype):
138+ for relation in relations_for_id(relid):
139+ relation['__relid__'] = relid
140+ relation_data.append(relation)
141+ return relation_data
142+
143+class UnregisteredHookError(Exception): pass
144+
145+class Hooks(object):
146+ def __init__(self):
147+ super(Hooks, self).__init__()
148+ self._hooks = {}
149+ def register(self, name, function):
150+ self._hooks[name] = function
151+ def execute(self, args):
152+ hook_name = os.path.basename(args[0])
153+ if hook_name in self._hooks:
154+ self._hooks[hook_name]()
155+ else:
156+ raise UnregisteredHookError(hook_name)
157
158=== added file 'charmsupport/nrpe.py'
159--- charmsupport/nrpe.py 1970-01-01 00:00:00 +0000
160+++ charmsupport/nrpe.py 2013-01-31 20:16:33 +0000
161@@ -0,0 +1,169 @@
162+"""Compatibility with the nrpe-external-master charm"""
163+# source: 27:lp:charmsupport
164+# Copyright 2012 Canonical Ltd.
165+#
166+# Authors:
167+# Matthew Wedgwood <matthew.wedgwood@canonical.com>
168+
169+import subprocess
170+import pwd
171+import grp
172+import os
173+import re
174+import shlex
175+
176+from hookenv import config, local_unit
177+
178+# This module adds compatibility with the nrpe_external_master
179+# subordinate charm. To use it in your charm:
180+#
181+# 1. Update metadata.yaml
182+#
183+# provides:
184+# (...)
185+# nrpe-external-master:
186+# interface: nrpe-external-master
187+# scope: container
188+#
189+# 2. Add the following to config.yaml
190+#
191+# nagios_context:
192+# default: "juju"
193+# type: string
194+# description: |
195+# Used by the nrpe-external-master subordinate charm.
196+# A string that will be prepended to instance name to set the host name
197+# in nagios. So for instance the hostname would be something like:
198+# juju-myservice-0
199+# If you're running multiple environments with the same services in them
200+# this allows you to differentiate between them.
201+#
202+# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
203+#
204+# 4. Update your hooks.py with something like this:
205+#
206+# import nrpe
207+# (...)
208+# def update_nrpe_config():
209+# nrpe_compat = NRPE("myservice")
210+# nrpe_compat.add_check(
211+# shortname = "myservice",
212+# description = "Check MyService",
213+# check_cmd = "check_http -w 2 -c 10 http://localhost"
214+# )
215+# nrpe_compat.add_check(
216+# "myservice_other",
217+# "Check for widget failures",
218+# check_cmd = "/srv/myapp/scripts/widget_check"
219+# )
220+# nrpe_compat.write()
221+#
222+# def config_changed():
223+# (...)
224+# update_nrpe_config()
225+# def nrpe_external_master_relation_changed():
226+# update_nrpe_config()
227+#
228+# 5. ln -s hooks.py nrpe-external-master-relation-changed
229+
230+class CheckException(Exception): pass
231+class Check(object):
232+ shortname_re = '[A-Za-z0-9-_]*'
233+ service_template = """
234+#---------------------------------------------------
235+# This file is Juju managed
236+#---------------------------------------------------
237+define service {{
238+ use active-service
239+ host_name {nagios_hostname}
240+ service_description {nagios_hostname}[{shortname}] {description}
241+ check_command check_nrpe!check_{shortname}
242+ servicegroups {nagios_servicegroup}
243+}}
244+"""
245+ def __init__(self, shortname, description, check_cmd):
246+ super(Check, self).__init__()
247+ # XXX: could be better to calculate this from the service name
248+ if not re.match(self.shortname_re, shortname):
249+ raise CheckException("shortname must match {}".format(Check.shortname_re))
250+ self.shortname = shortname
251+ # Note: a set of invalid characters is defined by the Nagios server config
252+ # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
253+ self.description = description
254+ self.check_cmd = self._locate_cmd(check_cmd)
255+
256+ def _locate_cmd(self, check_cmd):
257+ search_path = (
258+ '/',
259+ os.path.join(os.environ['CHARM_DIR'], 'files/nrpe-external-master'),
260+ '/usr/lib/nagios/plugins',
261+ )
262+ command = shlex.split(check_cmd)
263+ for path in search_path:
264+ if os.path.exists(os.path.join(path,command[0])):
265+ return os.path.join(path, command[0]) + " " + " ".join(command[1:])
266+ subprocess.call(['juju-log', 'Check command not found: {}'.format(command[0])])
267+ return ''
268+
269+ def write(self, nagios_context, hostname):
270+ for f in os.listdir(NRPE.nagios_exportdir):
271+ if re.search('.*check_{}.cfg'.format(self.shortname), f):
272+ os.remove(os.path.join(NRPE.nagios_exportdir, f))
273+
274+ templ_vars = {
275+ 'nagios_hostname': hostname,
276+ 'nagios_servicegroup': nagios_context,
277+ 'description': self.description,
278+ 'shortname': self.shortname,
279+ }
280+ nrpe_service_text = Check.service_template.format(**templ_vars)
281+ nrpe_service_file = '{}/service__{}_check_{}.cfg'.format(
282+ NRPE.nagios_exportdir, hostname, self.shortname)
283+ with open(nrpe_service_file, 'w') as nrpe_service_config:
284+ nrpe_service_config.write(str(nrpe_service_text))
285+
286+ nrpe_check_file = '/etc/nagios/nrpe.d/check_{}.cfg'.format(self.shortname)
287+ with open(nrpe_check_file, 'w') as nrpe_check_config:
288+ nrpe_check_config.write("# check {}\n".format(self.shortname))
289+ nrpe_check_config.write("command[check_{}]={}\n".format(
290+ self.shortname, self.check_cmd))
291+
292+ def run(self):
293+ subprocess.call(self.check_cmd)
294+
295+class NRPE(object):
296+ nagios_logdir = '/var/log/nagios'
297+ nagios_exportdir = '/var/lib/nagios/export'
298+ nrpe_confdir = '/etc/nagios/nrpe.d'
299+ def __init__(self):
300+ super(NRPE, self).__init__()
301+ self.config = config()
302+ self.nagios_context = self.config['nagios_context']
303+ self.unit_name = local_unit().replace('/', '-')
304+ self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
305+ self.checks = []
306+
307+ def add_check(self, *args, **kwargs):
308+ self.checks.append( Check(*args, **kwargs) )
309+
310+ def write(self):
311+ try:
312+ nagios_uid = pwd.getpwnam('nagios').pw_uid
313+ nagios_gid = grp.getgrnam('nagios').gr_gid
314+ except:
315+ subprocess.call(['juju-log', "Nagios user not set up, nrpe checks not updated"])
316+ return
317+
318+ if not os.path.exists(NRPE.nagios_exportdir):
319+ subprocess.call(['juju-log', 'Exiting as {} is not accessible'.format(NRPE.nagios_exportdir)])
320+ return
321+
322+ if not os.path.exists(NRPE.nagios_logdir):
323+ os.mkdir(NRPE.nagios_logdir)
324+ os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
325+
326+ for nrpecheck in self.checks:
327+ nrpecheck.write(self.nagios_context, self.hostname)
328+
329+ if os.path.isfile('/etc/init.d/nagios-nrpe-server'):
330+ subprocess.call(['service', 'nagios-nrpe-server', 'reload'])
331
332=== modified file 'config.yaml'
333--- config.yaml 2013-01-23 21:21:06 +0000
334+++ config.yaml 2013-01-31 20:16:33 +0000
335@@ -15,3 +15,13 @@
336 type: string
337 default: ""
338 description: "Address to mail errors to."
339+ nagios_context:
340+ default: "juju"
341+ type: string
342+ description: |
343+ Used by the nrpe-external-master subordinate charm.
344+ A string that will be prepended to instance name to set the host name
345+ in nagios. So for instance the hostname would be something like:
346+ juju-myservice-0
347+ If you're running multiple environments with the same services in them
348+ this allows you to differentiate between them.
349
350=== added directory 'files'
351=== added directory 'files/nrpe-external-master'
352=== added file 'files/nrpe-external-master/check_ingest.sh'
353--- files/nrpe-external-master/check_ingest.sh 1970-01-01 00:00:00 +0000
354+++ files/nrpe-external-master/check_ingest.sh 2013-01-31 20:16:33 +0000
355@@ -0,0 +1,5 @@
356+#!/bin/sh
357+if [ -f /home/ubuntu/var/ingest-errors ]; then
358+ echo Charmworld ingest failing
359+ exit 1
360+fi
361
362=== modified file 'hooks/config-changed'
363--- hooks/config-changed 2013-01-29 15:45:19 +0000
364+++ hooks/config-changed 2013-01-31 20:16:33 +0000
365@@ -79,9 +79,11 @@
366 PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
367 $mailto
368
369-*/$interval * * * * ubuntu HOME=/home/ubuntu INI=$CONFIG_FILE ~ubuntu/shhh.py $project_dir/scripts/ingest
370+*/$interval * * * * ubuntu HOME=/home/ubuntu INI=$CONFIG_FILE ~ubuntu/run-write-errors /home/ubuntu/var/ingest-errors $project_dir/scripts/ingest
371 @daily ubuntu HOME=/home/ubuntu INI=$project_dir/production.ini $project_dir/bin/python $project_dir/charmworld/jobs/cstat.py
372
373 EOF
374
375 fi
376+# nagios_context may have changed.
377+./update-nrpe.py
378
379=== modified file 'hooks/install'
380--- hooks/install 2013-01-24 17:34:53 +0000
381+++ hooks/install 2013-01-31 20:16:33 +0000
382@@ -17,4 +17,4 @@
383 postfix reload
384 mkdir -p ~ubuntu/var/charms
385 chown -R ubuntu.ubuntu ~ubuntu/var
386-cp shhh.py ~ubuntu
387+install -o ubuntu -g ubuntu run-write-errors ~ubuntu
388
389=== added file 'hooks/nrpe-external-master-relation-changed'
390--- hooks/nrpe-external-master-relation-changed 1970-01-01 00:00:00 +0000
391+++ hooks/nrpe-external-master-relation-changed 2013-01-31 20:16:33 +0000
392@@ -0,0 +1,2 @@
393+#!/bin/sh
394+./update-nrpe.py
395
396=== modified file 'hooks/upgrade-charm'
397--- hooks/upgrade-charm 2013-01-24 17:34:53 +0000
398+++ hooks/upgrade-charm 2013-01-31 20:16:33 +0000
399@@ -10,3 +10,6 @@
400 fi
401 hooks/config-changed
402 hooks/restart
403+if [ -f $HOME/shhh.py ]; then
404+ rm $HOME/shhh.py
405+fi
406
407=== modified file 'metadata.yaml'
408--- metadata.yaml 2012-08-20 19:57:46 +0000
409+++ metadata.yaml 2013-01-31 20:16:33 +0000
410@@ -6,6 +6,9 @@
411 provides:
412 website:
413 interface: http
414+ nrpe-external-master:
415+ interface: nrpe-external-master
416+ scope: container
417 requires:
418 database:
419 interface: mongodb
420
421=== modified file 'revision'
422--- revision 2013-01-29 15:32:47 +0000
423+++ revision 2013-01-31 20:16:33 +0000
424@@ -1,1 +1,1 @@
425-18
426+19
427
428=== added file 'run-write-errors'
429--- run-write-errors 1970-01-01 00:00:00 +0000
430+++ run-write-errors 2013-01-31 20:16:33 +0000
431@@ -0,0 +1,16 @@
432+#!/bin/sh
433+# Run a command, and if it exits with non-zero status, leave its output behind
434+# in the file specified. Any existing file will be removed.
435+error_file=$1
436+shift
437+"$@" > $error_file.tmp 2>&1
438+status=$?
439+if [ $status -ne 0 ]; then
440+ mv $error_file.tmp $error_file
441+else
442+ rm $error_file.tmp
443+ if [ -f $error_file ]; then
444+ rm $error_file
445+ fi
446+fi
447+exit $status
448
449=== removed file 'shhh.py'
450--- shhh.py 2013-01-24 21:00:17 +0000
451+++ shhh.py 1970-01-01 00:00:00 +0000
452@@ -1,81 +0,0 @@
453-#! /usr/bin/python -S
454-#
455-# Copyright 2009 Canonical Ltd. This software is licensed under the
456-# GNU Affero General Public License version 3 (see the file LICENSE).
457-
458-"""
459-Run a command and suppress output unless it returns a non-zero exit status
460-"""
461-
462-__metaclass__ = type
463-
464-from subprocess import (
465- PIPE,
466- Popen,
467- )
468-import sys
469-
470-
471-def shhh(cmd):
472- r"""Run a command and suppress output unless it returns a non-zero exitcode
473-
474- If output is generated, stderr will be output before stdout, so output
475- order may be messed up if the command attempts to control order by
476- flushing stdout at points or setting it to unbuffered.
477-
478-
479- To test, we invoke both this method and this script with some commands
480- and examine the output and exitvalue
481-
482- >>> python = sys.executable
483-
484- >>> def shhh_script(cmd):
485- ... from subprocess import Popen, PIPE
486- ... script = '%s %s' % (python, __file__)
487- ... cmd = "%s '%s'" % (script, cmd)
488- ... p = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
489- ... (out, err) = p.communicate()
490- ... return (out, err, p.returncode)
491-
492- >>> cmd = '''%s -c "import sys; sys.exit(%d)"''' % (python, 0)
493- >>> shhh(cmd)
494- 0
495- >>> shhh_script(cmd)
496- ('', '', 0)
497-
498- >>> cmd = '''%s -c "import sys; sys.exit(%d)"''' % (python, 1)
499- >>> shhh(cmd)
500- 1
501- >>> shhh_script(cmd)
502- ('', '', 1)
503-
504- >>> cmd = '''%s -c "import sys; print 666; sys.exit(%d)"''' % (
505- ... python, 42)
506- >>> shhh(cmd)
507- 666
508- 42
509- >>> shhh_script(cmd)
510- ('666\n', '', 42)
511-
512- >>> cmd = (
513- ... '''%s -c "import sys; print 666; '''
514- ... '''print >> sys.stderr, 667; sys.exit(42)"''' % python
515- ... )
516- >>> shhh_script(cmd)
517- ('666\n', '667\n', 42)
518- """
519-
520- process = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
521- (out, err) = process.communicate()
522- if process.returncode == 0:
523- return 0
524- else:
525- sys.stderr.write(err)
526- sys.stdout.write(out)
527- return process.returncode
528-
529-
530-if __name__ == '__main__':
531- cmd = ' '.join(sys.argv[1:])
532- sys.exit(shhh(cmd))
533-
534
535=== added file 'update-nrpe.py'
536--- update-nrpe.py 1970-01-01 00:00:00 +0000
537+++ update-nrpe.py 2013-01-31 20:16:33 +0000
538@@ -0,0 +1,12 @@
539+#!/usr/bin/env python
540+from charmsupport import nrpe
541+
542+
543+def update_nrpe_config():
544+ nrpe_compat = nrpe.NRPE()
545+ nrpe_compat.add_check('ingest', 'Check ingest runs', 'check_ingest.sh')
546+ nrpe_compat.write()
547+
548+
549+if __name__ == '__main__':
550+ update_nrpe_config()

Subscribers

People subscribed via source and target branches

to all changes: