Merge lp:~dweaver/orange-box/orange-box-status-all-nodes-improvement into lp:orange-box

Proposed by Darryl Weaver
Status: Merged
Merged at revision: 513
Proposed branch: lp:~dweaver/orange-box/orange-box-status-all-nodes-improvement
Merge into: lp:orange-box
Diff against target: 207 lines (+84/-90)
2 files modified
etc/init/orange-box-amt-monitor.conf (+0/-11)
usr/bin/orange-box-status-all-nodes (+84/-79)
To merge this branch: bzr merge lp:~dweaver/orange-box/orange-box-status-all-nodes-improvement
Reviewer Review Type Date Requested Status
Dustin Kirkland  Pending
Review via email: mp+239512@code.launchpad.net

Description of the change

Improve orange-box-status-all-nodes handling and error reporting.
Disable orange-box-amt-monitor service for MAAS 1.7 environments to reduce chances of AMT lockup.

To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== removed file 'etc/init/orange-box-amt-monitor.conf'
--- etc/init/orange-box-amt-monitor.conf 2014-04-16 22:26:52 +0000
+++ etc/init/orange-box-amt-monitor.conf 1970-01-01 00:00:00 +0000
@@ -1,11 +0,0 @@
1# orange-box-amt-monitor - monitor the AMT port
2
3description "Orange Box AMT Monitor"
4
5start on runlevel [2345]
6stop on runlevel [!2345]
7
8respawn
9respawn limit 10 5
10
11exec orange-box-amt-monitor
120
=== modified file 'usr/bin/orange-box-status-all-nodes'
--- usr/bin/orange-box-status-all-nodes 2014-10-10 08:46:06 +0000
+++ usr/bin/orange-box-status-all-nodes 2014-10-23 22:21:21 +0000
@@ -4,6 +4,7 @@
4# Copyright (C) 2014 Canonical Ltd.4# Copyright (C) 2014 Canonical Ltd.
5#5#
6# Authors: Ronald McCollam <ronald.mccollam@canonical.com>6# Authors: Ronald McCollam <ronald.mccollam@canonical.com>
7# Darryl Weaver <darryl.weaver@canonical.com>
7#8#
8# This program is free software: you can redistribute it and/or modify9# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by10# it under the terms of the GNU General Public License as published by
@@ -22,96 +23,100 @@
22ERRORS=023ERRORS=0
2324
24okay() {25okay() {
25 echo "OK: $@"26 echo "OK: $@"
26}27}
2728
28error() {29error() {
29 echo "ERROR: $@"30 echo "ERROR: $@"
30 ERRORS=$((ERRORS+1))31 ERRORS=$((ERRORS+1))
31}32}
3233
3334
34status_all_nodes() {35status_all_nodes() {
35 # Check all VM nodes36 # Check all VM nodes
36 for i in $(seq 0 2); do37 for i in $(seq 0 2); do
37 if virsh list --all | grep node0vm${i} | grep running >/dev/null 2>&1; then38 if virsh list --all | grep node0vm${i} | grep running >/dev/null 2>&1; then
38 okay "[node0vm${i}]: Is running"39 okay "[node0vm${i}]: Is running"
39 uptime=$(ssh node0vm${i}.maas "uptime" 2>/dev/null)40 uptime=$(ssh node0vm${i}.maas "uptime" 2>/dev/null)
40 if [ -n "$uptime" ]; then41 if [ -n "$uptime" ]; then
41 okay "[node0vm${i}]: SSH to node successful; uptime=[$uptime]"42 okay "[node0vm${i}]: SSH to node successful; uptime=[$uptime]"
42 else43 else
43 error "[node0vm${i}]: SSH to node unsuccessful; uptime=[$uptime]"44 error "[node0vm${i}]: SSH to node unsuccessful; uptime=[$uptime]"
44 fi45 fi
45 else46 else
46 error "[node0vm${i}]: Is not running"47 error "[node0vm${i}]: Is not running"
47 fi48 fi
48 done49 done
49 # Check all AMT nodes50 # Check all AMT nodes
50 for i in $(seq 1 9); do51 for i in $(seq 1 9); do
51 hostname="node${i}.maas"52 hostname="node${i}.maas"
52 # (1) Ping AMT IP Addresses53 # (1) Ping AMT IP Addresses
53 amt_ip="10.14.4.1$i"54 amt_ip="10.14.4.1$i"
54 if ping -c 1 -q $amt_ip >/dev/null 2>&1; then55 if ping -c 1 -q $amt_ip >/dev/null 2>&1; then
55 okay "[$hostname]: AMT responding to ping [$amt_ip]"56 okay "[$hostname]: AMT responding to ping [$amt_ip]"
56 else57 else
57 error "[$hostname]: AMT not responding to ping [$amt_ip]"58 error "[$hostname]: AMT not responding to ping [$amt_ip]"
58 continue59 continue
59 fi60 fi
60 # (2) If AMT pingable, check for AMT open port 1699261 # (2) If AMT pingable, check for AMT open port 16992
61 if netcat -z -v $amt_ip 16992 >/dev/null 2>&1; then62 if netcat -z -v $amt_ip 16992 >/dev/null 2>&1; then
62 okay "[$hostname]: AMT listening on port [16992] [$amt_ip]"63 okay "[$hostname]: AMT listening on port [16992] [$amt_ip]"
63 else64 else
64 error "[$hostname]: AMT not listening on port [16992] [$amt_ip]"65 error "[$hostname]: AMT not listening on port [16992] [$amt_ip]"
65 continue66 continue
66 fi67 fi
67 amtoutput=$(yes | amttool $amt_ip info)68 amtoutput=$(yes | amttool $amt_ip info)
68 amtpowerstate=$(echo "$amtoutput" | grep "^Powerstate:" | awk '{print $2}')69 amtpowerstate=$(echo "$amtoutput" | grep "^Powerstate:" | awk '{print $2}')
69 # (3) If AMT port open, check AMT for power state70 # (3) If AMT port open, check AMT for power state
70 if [ -n "$amtpowerstate" ]; then71 if [ "$amtpowerstate" = "S0" ]; then
71 okay "[$hostname]: AMT powerstate [$amtpowerstate] on [$amt_ip]"72 okay "[$hostname]: AMT powerstate [$amtpowerstate](running) on [$amt_ip]"
72 else73 elif [ "$amtpowerstate" = "S5" ]; then
73 error "[$hostname]: AMT powerstate unknown [$amtpowerstate] on [$amt_ip]"74 okay "[$hostname]: AMT powerstate [$amtpowerstate](not running) on [$amt_ip]"
74 continue75 continue
75 fi76 else
76 # (4) If power state is running, test pingable OS77 error "[$hostname]: AMT powerstate unknown [$amtpowerstate] on [$amt_ip]"
77 ip=$(host $hostname | tail -n1 | sed -e "s/.* //")78 continue
78 if ping -c 1 -q $hostname >/dev/null 2>&1; then79 fi
79 okay "[$hostname]: OS responding to ping, ip=[$ip]"80 # (4) If power state is running, test pingable OS
80 else81 ip=$(host $hostname | tail -n1 | sed -e "s/.* //")
81 error "[$hostname]: OS not responding to ping, ip=[$ip]"82 if ping -c 1 -q $hostname >/dev/null 2>&1; then
82 continue83 okay "[$hostname]: OS responding to ping, ip=[$ip]"
83 fi84 else
84 uptime=$(ssh $hostname "uptime" 2>/dev/null)85 error "[$hostname]: OS not responding to ping, ip=[$ip]"
85 # (5) If OS is pingable, test SSH to OS86 continue
86 if [ -n "$uptime" ]; then87 fi
87 okay "[$hostname]: SSH to node successful; uptime=[$uptime]"88 uptime=$(ssh $hostname "uptime" 2>/dev/null)
88 else89 # (5) If OS is pingable, test SSH to OS
89 error "[$hostname]: SSH to node unsuccessful; uptime=[$uptime]"90 if [ -n "$uptime" ]; then
90 continue91 okay "[$hostname]: SSH to node successful; uptime=[$uptime]"
91 fi92 else
92 memory=$(ssh $hostname "grep -m1 '^MemTotal:' /proc/meminfo | sed -e 's/.*:\s\+//'" 2>/dev/null)93 error "[$hostname]: SSH to node unsuccessful; uptime=[$uptime]"
93 # (6) If SSH works, test memory94 continue
94 if [ -n "$memory" ]; then95 fi
95 okay "[$hostname]: Memory check of node successful; memory=[$memory]"96 memory=$(ssh $hostname "grep -m1 '^MemTotal:' /proc/meminfo | sed -e 's/.*:\s\+//'" 2>/dev/null)
96 else97 # (6) If SSH works, test memory
97 error "[$hostname]: Memory check of node unsuccessful; memory=[$memory]"98 if [ -n "$memory" ]; then
98 continue99 okay "[$hostname]: Memory check of node successful; memory=[$memory]"
99 fi100 else
100 # (7) If SSH works, test disk101 error "[$hostname]: Memory check of node unsuccessful; memory=[$memory]"
101 disk=$(ssh $hostname "sudo fdisk -l | grep '^Disk /' | sed -e 's/^Disk //' -e 's/,.*//'" 2>/dev/null)102 continue
102 if [ -n "$disk" ]; then103 fi
103 okay "[$hostname]: Disk check of node successful; disk=[$(echo $disk | tr '\n' ' ')]"104 # (7) If SSH works, test disk
104 else105 disk=$(ssh $hostname "sudo fdisk -l | grep '^Disk /' | sed -e 's/^Disk //' -e 's/,.*//'" 2>/dev/null)
105 error "[$hostname]: Disk check of node unsuccessful; disk=[$disk]"106 if [ -n "$disk" ]; then
106 continue107 okay "[$hostname]: Disk check of node successful; disk=[$(echo $disk | tr '\n' ' ')]"
107 fi108 else
108 done109 error "[$hostname]: Disk check of node unsuccessful; disk=[$disk]"
110 continue
111 fi
112 done
109}113}
110114
111status_all_nodes | ccze -A115status_all_nodes | ccze -A
112116
113if [ $ERRORS -eq 0 ]; then117if [ $ERRORS -eq 0 ]; then
114 exit 0118 exit 0
115else119else
116 exit $ERRORS120 exit $ERRORS
117fi121fi
122

Subscribers

People subscribed via source and target branches