Merge lp:~dweaver/orange-box/orange-box-status-all-nodes-improvement into lp:orange-box

Proposed by Darryl Weaver
Status: Merged
Merged at revision: 513
Proposed branch: lp:~dweaver/orange-box/orange-box-status-all-nodes-improvement
Merge into: lp:orange-box
Diff against target: 207 lines (+84/-90)
2 files modified
etc/init/orange-box-amt-monitor.conf (+0/-11)
usr/bin/orange-box-status-all-nodes (+84/-79)
To merge this branch: bzr merge lp:~dweaver/orange-box/orange-box-status-all-nodes-improvement
Reviewer Review Type Date Requested Status
Dustin Kirkland  Pending
Review via email: mp+239512@code.launchpad.net

Description of the change

Improve orange-box-status-all-nodes handling and error reporting.
Disable orange-box-amt-monitor service for MAAS 1.7 environments to reduce chances of AMT lockup.

To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== removed file 'etc/init/orange-box-amt-monitor.conf'
2--- etc/init/orange-box-amt-monitor.conf 2014-04-16 22:26:52 +0000
3+++ etc/init/orange-box-amt-monitor.conf 1970-01-01 00:00:00 +0000
4@@ -1,11 +0,0 @@
5-# orange-box-amt-monitor - monitor the AMT port
6-
7-description "Orange Box AMT Monitor"
8-
9-start on runlevel [2345]
10-stop on runlevel [!2345]
11-
12-respawn
13-respawn limit 10 5
14-
15-exec orange-box-amt-monitor
16
17=== modified file 'usr/bin/orange-box-status-all-nodes'
18--- usr/bin/orange-box-status-all-nodes 2014-10-10 08:46:06 +0000
19+++ usr/bin/orange-box-status-all-nodes 2014-10-23 22:21:21 +0000
20@@ -4,6 +4,7 @@
21 # Copyright (C) 2014 Canonical Ltd.
22 #
23 # Authors: Ronald McCollam <ronald.mccollam@canonical.com>
24+# Darryl Weaver <darryl.weaver@canonical.com>
25 #
26 # This program is free software: you can redistribute it and/or modify
27 # it under the terms of the GNU General Public License as published by
28@@ -22,96 +23,100 @@
29 ERRORS=0
30
31 okay() {
32- echo "OK: $@"
33+ echo "OK: $@"
34 }
35
36 error() {
37- echo "ERROR: $@"
38- ERRORS=$((ERRORS+1))
39+ echo "ERROR: $@"
40+ ERRORS=$((ERRORS+1))
41 }
42
43
44 status_all_nodes() {
45- # Check all VM nodes
46- for i in $(seq 0 2); do
47- if virsh list --all | grep node0vm${i} | grep running >/dev/null 2>&1; then
48- okay "[node0vm${i}]: Is running"
49- uptime=$(ssh node0vm${i}.maas "uptime" 2>/dev/null)
50- if [ -n "$uptime" ]; then
51- okay "[node0vm${i}]: SSH to node successful; uptime=[$uptime]"
52- else
53- error "[node0vm${i}]: SSH to node unsuccessful; uptime=[$uptime]"
54- fi
55- else
56- error "[node0vm${i}]: Is not running"
57- fi
58- done
59- # Check all AMT nodes
60- for i in $(seq 1 9); do
61- hostname="node${i}.maas"
62- # (1) Ping AMT IP Addresses
63- amt_ip="10.14.4.1$i"
64- if ping -c 1 -q $amt_ip >/dev/null 2>&1; then
65- okay "[$hostname]: AMT responding to ping [$amt_ip]"
66- else
67- error "[$hostname]: AMT not responding to ping [$amt_ip]"
68- continue
69- fi
70- # (2) If AMT pingable, check for AMT open port 16992
71- if netcat -z -v $amt_ip 16992 >/dev/null 2>&1; then
72- okay "[$hostname]: AMT listening on port [16992] [$amt_ip]"
73- else
74- error "[$hostname]: AMT not listening on port [16992] [$amt_ip]"
75- continue
76- fi
77- amtoutput=$(yes | amttool $amt_ip info)
78- amtpowerstate=$(echo "$amtoutput" | grep "^Powerstate:" | awk '{print $2}')
79- # (3) If AMT port open, check AMT for power state
80- if [ -n "$amtpowerstate" ]; then
81- okay "[$hostname]: AMT powerstate [$amtpowerstate] on [$amt_ip]"
82- else
83- error "[$hostname]: AMT powerstate unknown [$amtpowerstate] on [$amt_ip]"
84- continue
85- fi
86- # (4) If power state is running, test pingable OS
87- ip=$(host $hostname | tail -n1 | sed -e "s/.* //")
88- if ping -c 1 -q $hostname >/dev/null 2>&1; then
89- okay "[$hostname]: OS responding to ping, ip=[$ip]"
90- else
91- error "[$hostname]: OS not responding to ping, ip=[$ip]"
92- continue
93- fi
94- uptime=$(ssh $hostname "uptime" 2>/dev/null)
95- # (5) If OS is pingable, test SSH to OS
96- if [ -n "$uptime" ]; then
97- okay "[$hostname]: SSH to node successful; uptime=[$uptime]"
98- else
99- error "[$hostname]: SSH to node unsuccessful; uptime=[$uptime]"
100- continue
101- fi
102- memory=$(ssh $hostname "grep -m1 '^MemTotal:' /proc/meminfo | sed -e 's/.*:\s\+//'" 2>/dev/null)
103- # (6) If SSH works, test memory
104- if [ -n "$memory" ]; then
105- okay "[$hostname]: Memory check of node successful; memory=[$memory]"
106- else
107- error "[$hostname]: Memory check of node unsuccessful; memory=[$memory]"
108- continue
109- fi
110- # (7) If SSH works, test disk
111- disk=$(ssh $hostname "sudo fdisk -l | grep '^Disk /' | sed -e 's/^Disk //' -e 's/,.*//'" 2>/dev/null)
112- if [ -n "$disk" ]; then
113- okay "[$hostname]: Disk check of node successful; disk=[$(echo $disk | tr '\n' ' ')]"
114- else
115- error "[$hostname]: Disk check of node unsuccessful; disk=[$disk]"
116- continue
117- fi
118- done
119+ # Check all VM nodes
120+ for i in $(seq 0 2); do
121+ if virsh list --all | grep node0vm${i} | grep running >/dev/null 2>&1; then
122+ okay "[node0vm${i}]: Is running"
123+ uptime=$(ssh node0vm${i}.maas "uptime" 2>/dev/null)
124+ if [ -n "$uptime" ]; then
125+ okay "[node0vm${i}]: SSH to node successful; uptime=[$uptime]"
126+ else
127+ error "[node0vm${i}]: SSH to node unsuccessful; uptime=[$uptime]"
128+ fi
129+ else
130+ error "[node0vm${i}]: Is not running"
131+ fi
132+ done
133+ # Check all AMT nodes
134+ for i in $(seq 1 9); do
135+ hostname="node${i}.maas"
136+ # (1) Ping AMT IP Addresses
137+ amt_ip="10.14.4.1$i"
138+ if ping -c 1 -q $amt_ip >/dev/null 2>&1; then
139+ okay "[$hostname]: AMT responding to ping [$amt_ip]"
140+ else
141+ error "[$hostname]: AMT not responding to ping [$amt_ip]"
142+ continue
143+ fi
144+ # (2) If AMT pingable, check for AMT open port 16992
145+ if netcat -z -v $amt_ip 16992 >/dev/null 2>&1; then
146+ okay "[$hostname]: AMT listening on port [16992] [$amt_ip]"
147+ else
148+ error "[$hostname]: AMT not listening on port [16992] [$amt_ip]"
149+ continue
150+ fi
151+ amtoutput=$(yes | amttool $amt_ip info)
152+ amtpowerstate=$(echo "$amtoutput" | grep "^Powerstate:" | awk '{print $2}')
153+ # (3) If AMT port open, check AMT for power state
154+ if [ "$amtpowerstate" = "S0" ]; then
155+ okay "[$hostname]: AMT powerstate [$amtpowerstate](running) on [$amt_ip]"
156+ elif [ "$amtpowerstate" = "S5" ]; then
157+ okay "[$hostname]: AMT powerstate [$amtpowerstate](not running) on [$amt_ip]"
158+ continue
159+ else
160+ error "[$hostname]: AMT powerstate unknown [$amtpowerstate] on [$amt_ip]"
161+ continue
162+ fi
163+ # (4) If power state is running, test pingable OS
164+ ip=$(host $hostname | tail -n1 | sed -e "s/.* //")
165+ if ping -c 1 -q $hostname >/dev/null 2>&1; then
166+ okay "[$hostname]: OS responding to ping, ip=[$ip]"
167+ else
168+ error "[$hostname]: OS not responding to ping, ip=[$ip]"
169+ continue
170+ fi
171+ uptime=$(ssh $hostname "uptime" 2>/dev/null)
172+ # (5) If OS is pingable, test SSH to OS
173+ if [ -n "$uptime" ]; then
174+ okay "[$hostname]: SSH to node successful; uptime=[$uptime]"
175+ else
176+ error "[$hostname]: SSH to node unsuccessful; uptime=[$uptime]"
177+ continue
178+ fi
179+ memory=$(ssh $hostname "grep -m1 '^MemTotal:' /proc/meminfo | sed -e 's/.*:\s\+//'" 2>/dev/null)
180+ # (6) If SSH works, test memory
181+ if [ -n "$memory" ]; then
182+ okay "[$hostname]: Memory check of node successful; memory=[$memory]"
183+ else
184+ error "[$hostname]: Memory check of node unsuccessful; memory=[$memory]"
185+ continue
186+ fi
187+ # (7) If SSH works, test disk
188+ disk=$(ssh $hostname "sudo fdisk -l | grep '^Disk /' | sed -e 's/^Disk //' -e 's/,.*//'" 2>/dev/null)
189+ if [ -n "$disk" ]; then
190+ okay "[$hostname]: Disk check of node successful; disk=[$(echo $disk | tr '\n' ' ')]"
191+ else
192+ error "[$hostname]: Disk check of node unsuccessful; disk=[$disk]"
193+ continue
194+ fi
195+ done
196 }
197
198 status_all_nodes | ccze -A
199
200 if [ $ERRORS -eq 0 ]; then
201- exit 0
202+ exit 0
203 else
204- exit $ERRORS
205+ exit $ERRORS
206 fi
207+

Subscribers

People subscribed via source and target branches