Merge ~ack/maas:1871423-supervisord-backoff into maas:master

Proposed by Alberto Donato
Status: Merged
Approved by: Alberto Donato
Approved revision: 61f73ef7c776182f7d4176188be3df44649c90cc
Merge reported by: MAAS Lander
Merged at revision: not available
Proposed branch: ~ack/maas:1871423-supervisord-backoff
Merge into: maas:master
Diff against target: 132 lines (+38/-0)
3 files modified
snap/local/tree/usr/share/maas/supervisord.conf.template (+10/-0)
src/provisioningserver/utils/service_monitor.py (+1/-0)
src/provisioningserver/utils/tests/test_service_monitor.py (+27/-0)
Reviewer Review Type Date Requested Status
Björn Tillenius Approve
MAAS Lander Approve
Review via email: mp+381881@code.launchpad.net

Commit message

LP: #1871423 - handle BACKOFF status from supervisord
LP: #1871582 - add "startsecs" to supervisor program stanzas to avoid quick respawns

To post a comment you must log in.
Revision history for this message
MAAS Lander (maas-lander) wrote :

UNIT TESTS
-b 1871423-supervisord-backoff lp:~ack/maas/+git/maas into -b master lp:~maas-committers/maas

STATUS: SUCCESS
COMMIT: 61f73ef7c776182f7d4176188be3df44649c90cc

review: Approve
Revision history for this message
Björn Tillenius (bjornt) wrote :

Discussed on IRC that mapping BACKOFF to DEAD seems a bit weird. But considering that the systemd states "activating" and "reloading" maps to DEAD as well, I guess it's not a problem.

So +1 for now, but we should rethink how we do service monitoring at some point.

review: Approve

There was an error fetching revisions from git servers. Please try again in a few minutes. If the problem persists, contact Launchpad support.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/snap/local/tree/usr/share/maas/supervisord.conf.template b/snap/local/tree/usr/share/maas/supervisord.conf.template
index d911830..78f5eac 100644
--- a/snap/local/tree/usr/share/maas/supervisord.conf.template
+++ b/snap/local/tree/usr/share/maas/supervisord.conf.template
@@ -24,6 +24,7 @@ stopasgroup=true
24killasgroup=true24killasgroup=true
25redirect_stderr=true25redirect_stderr=true
26stdout_logfile=%(ENV_SNAP_COMMON)s/log/postgresql.log26stdout_logfile=%(ENV_SNAP_COMMON)s/log/postgresql.log
27startsecs=10
27{{endif}}28{{endif}}
2829
2930
@@ -36,6 +37,7 @@ killasgroup=true
36redirect_stderr=true37redirect_stderr=true
37stdout_logfile=%(ENV_SNAP_COMMON)s/log/regiond.log38stdout_logfile=%(ENV_SNAP_COMMON)s/log/regiond.log
38serverurl=unix://%(ENV_SNAP_DATA)s/supervisord/sock39serverurl=unix://%(ENV_SNAP_DATA)s/supervisord/sock
40startsecs=10
39{{endif}}41{{endif}}
4042
4143
@@ -48,6 +50,7 @@ killasgroup=true
48redirect_stderr=true50redirect_stderr=true
49stdout_logfile=%(ENV_SNAP_COMMON)s/log/rackd.log51stdout_logfile=%(ENV_SNAP_COMMON)s/log/rackd.log
50serverurl=unix://%(ENV_SNAP_DATA)s/supervisord/sock52serverurl=unix://%(ENV_SNAP_DATA)s/supervisord/sock
53startsecs=10
5154
52[program:dhcpd]55[program:dhcpd]
53process_name=dhcpd56process_name=dhcpd
@@ -57,6 +60,7 @@ stopasgroup=true
57killasgroup=true60killasgroup=true
58redirect_stderr=true61redirect_stderr=true
59stdout_logfile=%(ENV_SNAP_COMMON)s/log/dhcpd.log62stdout_logfile=%(ENV_SNAP_COMMON)s/log/dhcpd.log
63startsecs=10
6064
61[program:dhcpd6]65[program:dhcpd6]
62process_name=dhcpd666process_name=dhcpd6
@@ -66,6 +70,7 @@ stopasgroup=true
66killasgroup=true70killasgroup=true
67redirect_stderr=true71redirect_stderr=true
68stdout_logfile=%(ENV_SNAP_COMMON)s/log/dhcpd6.log72stdout_logfile=%(ENV_SNAP_COMMON)s/log/dhcpd6.log
73startsecs=10
6974
70[program:http]75[program:http]
71process_name=http76process_name=http
@@ -74,6 +79,7 @@ stopasgroup=true
74killasgroup=true79killasgroup=true
75redirect_stderr=true80redirect_stderr=true
76stdout_logfile=%(ENV_SNAP_COMMON)s/log/nginx.log81stdout_logfile=%(ENV_SNAP_COMMON)s/log/nginx.log
82startsecs=10
77{{endif}}83{{endif}}
7884
79{{if rackd or regiond}}85{{if rackd or regiond}}
@@ -84,6 +90,7 @@ stopasgroup=true
84killasgroup=true90killasgroup=true
85redirect_stderr=true91redirect_stderr=true
86stdout_logfile=%(ENV_SNAP_COMMON)s/log/named.log92stdout_logfile=%(ENV_SNAP_COMMON)s/log/named.log
93startsecs=10
8794
88[program:ntp]95[program:ntp]
89process_name=ntp96process_name=ntp
@@ -92,6 +99,7 @@ stopasgroup=true
92killasgroup=true99killasgroup=true
93redirect_stderr=true100redirect_stderr=true
94stdout_logfile=%(ENV_SNAP_COMMON)s/log/chrony.log101stdout_logfile=%(ENV_SNAP_COMMON)s/log/chrony.log
102startsecs=10
95103
96[program:proxy]104[program:proxy]
97process_name=proxy105process_name=proxy
@@ -101,6 +109,7 @@ stopasgroup=true
101killasgroup=true109killasgroup=true
102redirect_stderr=true110redirect_stderr=true
103stdout_logfile=%(ENV_SNAP_COMMON)s/log/proxy.log111stdout_logfile=%(ENV_SNAP_COMMON)s/log/proxy.log
112startsecs=10
104113
105[program:syslog]114[program:syslog]
106process_name=syslog115process_name=syslog
@@ -110,4 +119,5 @@ stopasgroup=true
110killasgroup=true119killasgroup=true
111redirect_stderr=true120redirect_stderr=true
112stdout_logfile=%(ENV_SNAP_COMMON)s/log/rsyslog.log121stdout_logfile=%(ENV_SNAP_COMMON)s/log/rsyslog.log
122startsecs=10
113{{endif}}123{{endif}}
diff --git a/src/provisioningserver/utils/service_monitor.py b/src/provisioningserver/utils/service_monitor.py
index 5770f01..5ce21ed 100644
--- a/src/provisioningserver/utils/service_monitor.py
+++ b/src/provisioningserver/utils/service_monitor.py
@@ -243,6 +243,7 @@ class ServiceMonitor:
243 # Used to convert the supervisor state to the `SERVICE_STATE` enum.243 # Used to convert the supervisor state to the `SERVICE_STATE` enum.
244 SUPERVISOR_TO_STATE = {244 SUPERVISOR_TO_STATE = {
245 "STARTING": SERVICE_STATE.ON,245 "STARTING": SERVICE_STATE.ON,
246 "BACKOFF": SERVICE_STATE.DEAD,
246 "RUNNING": SERVICE_STATE.ON,247 "RUNNING": SERVICE_STATE.ON,
247 "STOPPED": SERVICE_STATE.OFF,248 "STOPPED": SERVICE_STATE.OFF,
248 "FATAL": SERVICE_STATE.DEAD,249 "FATAL": SERVICE_STATE.DEAD,
diff --git a/src/provisioningserver/utils/tests/test_service_monitor.py b/src/provisioningserver/utils/tests/test_service_monitor.py
index 8dda950..c06f75e 100644
--- a/src/provisioningserver/utils/tests/test_service_monitor.py
+++ b/src/provisioningserver/utils/tests/test_service_monitor.py
@@ -1248,6 +1248,33 @@ class TestServiceMonitor(MAASTestCase):
1248 self.assertEqual("Result: exit-code", process_state)1248 self.assertEqual("Result: exit-code", process_state)
12491249
1250 @inlineCallbacks1250 @inlineCallbacks
1251 def test___loadSupervisorServiceState_backoff_returns_dead(self):
1252 service = make_fake_service(SERVICE_STATE.ON)
1253 service_monitor = self.make_service_monitor([service])
1254 supervisor_status_output = (
1255 dedent(
1256 """\
1257 %s BACKOFF Respawning too fast
1258 """
1259 )
1260 % (service.snap_service_name)
1261 )
1262
1263 mock_execSupervisorServiceAction = self.patch(
1264 service_monitor, "_execSupervisorServiceAction"
1265 )
1266 mock_execSupervisorServiceAction.return_value = (
1267 1,
1268 supervisor_status_output,
1269 "",
1270 )
1271 active_state, process_state = yield (
1272 service_monitor._loadSupervisorServiceState(service)
1273 )
1274 self.assertEqual(SERVICE_STATE.DEAD, active_state)
1275 self.assertEqual("Result: exit-code", process_state)
1276
1277 @inlineCallbacks
1251 def test___ensureService_logs_warning_in_mismatch_process_state(self):1278 def test___ensureService_logs_warning_in_mismatch_process_state(self):
1252 service = make_fake_service(SERVICE_STATE.ON)1279 service = make_fake_service(SERVICE_STATE.ON)
1253 service_monitor = self.make_service_monitor([service])1280 service_monitor = self.make_service_monitor([service])

Subscribers

People subscribed via source and target branches