Merge ~blake-rouse/maas:bind9-restart into maas:master

Proposed by Blake Rouse
Status: Merged
Approved by: Blake Rouse
Approved revision: e9e8b973d4fb446e9b067daf812ba8b4f9b216b7
Merge reported by: MAAS Lander
Merged at revision: not available
Proposed branch: ~blake-rouse/maas:bind9-restart
Merge into: maas:master
Diff against target: 738 lines (+260/-43)
15 files modified
debian/extras/99-maas-common-sudoers (+3/-2)
src/maasserver/dns/config.py (+4/-4)
src/maasserver/dns/tests/test_config.py (+5/-2)
src/maasserver/region_controller.py (+25/-4)
src/maasserver/service_monitor.py (+3/-0)
src/maasserver/tests/test_region_controller.py (+48/-8)
src/provisioningserver/dns/actions.py (+11/-5)
src/provisioningserver/dns/config.py (+2/-2)
src/provisioningserver/dns/tests/test_actions.py (+4/-4)
src/provisioningserver/dns/tests/test_config.py (+6/-2)
src/provisioningserver/service_monitor.py (+3/-0)
src/provisioningserver/utils/service_monitor.py (+56/-3)
src/provisioningserver/utils/shell.py (+2/-1)
src/provisioningserver/utils/tests/test_service_monitor.py (+79/-6)
src/provisioningserver/utils/tests/test_shell.py (+9/-0)
Reviewer Review Type Date Requested Status
Lee Trager (community) Approve
MAAS Lander Approve
Review via email: mp+371218@code.launchpad.net

Commit message

Fixes LP: #1710278 - Notice when bind9 is hung, force kill the service, ensure it starts again, then perform a reload.

To post a comment you must log in.
Revision history for this message
MAAS Lander (maas-lander) wrote :

UNIT TESTS
-b bind9-restart lp:~blake-rouse/maas/+git/maas into -b master lp:~maas-committers/maas

STATUS: SUCCESS
COMMIT: e9e8b973d4fb446e9b067daf812ba8b4f9b216b7

review: Approve
Revision history for this message
Lee Trager (ltrager) wrote :

LGTM!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/debian/extras/99-maas-common-sudoers b/debian/extras/99-maas-common-sudoers
2index 47387d2..4c2bfd3 100644
3--- a/debian/extras/99-maas-common-sudoers
4+++ b/debian/extras/99-maas-common-sudoers
5@@ -18,10 +18,11 @@ maas ALL= NOPASSWD: /bin/systemctl restart chrony
6 maas ALL= NOPASSWD: /bin/systemctl start chrony
7 maas ALL= NOPASSWD: /bin/systemctl status chrony
8
9-# Control of the DNS server: MAAS needs to track status and start it
10-# again if stopped manually.
11+# Control of the DNS server: MAAS needs to track status, start it
12+# again if stopped manually, or if MAAS must brutally kill it!
13 maas ALL= NOPASSWD: /bin/systemctl status bind9
14 maas ALL= NOPASSWD: /bin/systemctl start bind9
15+maas ALL= NOPASSWD: /bin/systemctl kill -s SIGKILL bind9
16
17 # Control of the Proxy server: MAAS needs to reconfigure it after editing
18 # its configuration file, start it and stop it.
19diff --git a/src/maasserver/dns/config.py b/src/maasserver/dns/config.py
20index 702b194..36aaacf 100644
21--- a/src/maasserver/dns/config.py
22+++ b/src/maasserver/dns/config.py
23@@ -54,7 +54,7 @@ def dns_force_reload():
24 DNSPublication(source="Force reload").save()
25
26
27-def dns_update_all_zones(reload_retry=False):
28+def dns_update_all_zones(reload_retry=False, reload_timeout=2):
29 """Update all zone files for all domains.
30
31 Serving these zone files means updating BIND's configuration to include
32@@ -96,12 +96,12 @@ def dns_update_all_zones(reload_retry=False):
33 # actually needed but it seems safer to maintain this behaviour until we
34 # have a better understanding.
35 if reload_retry:
36- bind_reload_with_retries()
37+ reloaded = bind_reload_with_retries(timeout=reload_timeout)
38 else:
39- bind_reload()
40+ reloaded = bind_reload(timeout=reload_timeout)
41
42 # Return the current serial and list of domain names.
43- return serial, [
44+ return serial, reloaded, [
45 domain.name
46 for domain in domains
47 ]
48diff --git a/src/maasserver/dns/tests/test_config.py b/src/maasserver/dns/tests/test_config.py
49index 6bb5bfb..6d0bdfb 100644
50--- a/src/maasserver/dns/tests/test_config.py
51+++ b/src/maasserver/dns/tests/test_config.py
52@@ -62,6 +62,7 @@ from testtools.matchers import (
53 Equals,
54 FileContains,
55 HasLength,
56+ Is,
57 MatchesSetwise,
58 MatchesStructure,
59 )
60@@ -307,7 +308,8 @@ class TestDNSConfigModifications(TestDNSServer):
61 bind_reload_with_retries = self.patch_autospec(
62 dns_config_module, "bind_reload_with_retries")
63 dns_update_all_zones(reload_retry=True)
64- self.assertThat(bind_reload_with_retries, MockCalledOnceWith())
65+ self.assertThat(
66+ bind_reload_with_retries, MockCalledOnceWith(timeout=2))
67
68 def test_dns_update_all_zones_passes_upstream_dns_parameter(self):
69 self.patch(settings, 'DNS_CONNECT', True)
70@@ -377,8 +379,9 @@ class TestDNSConfigModifications(TestDNSServer):
71 self.patch(
72 dns_config_module,
73 "current_zone_serial").return_value = fake_serial
74- serial, domains = dns_update_all_zones()
75+ serial, reloaded, domains = dns_update_all_zones()
76 self.assertThat(serial, Equals(fake_serial))
77+ self.assertThat(reloaded, Is(True))
78 self.assertThat(domains, MatchesSetwise(*[
79 Equals(domain.name)
80 for domain in Domain.objects.filter(authoritative=True)
81diff --git a/src/maasserver/region_controller.py b/src/maasserver/region_controller.py
82index 9a41c72..2989b6e 100644
83--- a/src/maasserver/region_controller.py
84+++ b/src/maasserver/region_controller.py
85@@ -48,6 +48,7 @@ from maasserver.rbac import (
86 Resource,
87 SyncConflictError,
88 )
89+from maasserver.service_monitor import service_monitor
90 from maasserver.utils import synchronised
91 from maasserver.utils.orm import (
92 transactional,
93@@ -191,7 +192,11 @@ class RegionControllerService(Service):
94 d = deferToDatabase(transactional(dns_update_all_zones))
95 d.addCallback(self._checkSerial)
96 d.addCallback(self._logDNSReload)
97+ # Order here matters, first needsDNSUpdate is set then pass the
98+ # failure onto `_onDNSReloadFailure` to do the correct thing
99+ # with the DNS server.
100 d.addErrback(_onFailureRetry, 'needsDNSUpdate')
101+ d.addErrback(self._onDNSReloadFailure)
102 d.addErrback(
103 log.err,
104 "Failed configuring DNS.")
105@@ -229,7 +234,10 @@ class RegionControllerService(Service):
106 """Check that the serial of the domain is updated."""
107 if result is None:
108 return None
109- serial, domain_names = result
110+ serial, reloaded, domain_names = result
111+ if not reloaded:
112+ raise DNSReloadError(
113+ "Failed to reload DNS; timeout or rdnc command failed.")
114 not_matching_domains = set(domain_names)
115 loop = 0
116 while len(not_matching_domains) > 0 and loop != 30:
117@@ -252,13 +260,13 @@ class RegionControllerService(Service):
118 raise DNSReloadError(
119 "Failed to reload DNS; serial mismatch "
120 "on domains %s" % ', '.join(not_matching_domains))
121- return serial, domain_names
122+ return result
123
124 def _logDNSReload(self, result):
125 """Log the reason DNS was reloaded."""
126 if result is None:
127 return None
128- serial, domain_names = result
129+ serial, _, domain_names = result
130 if self.previousSerial is None:
131 # This was the first load for starting the service.
132 self.previousSerial = serial
133@@ -269,7 +277,10 @@ class RegionControllerService(Service):
134 # reason for the reload.
135
136 def _logReason(reasons):
137- if len(reasons) == 1:
138+ if len(reasons) == 0:
139+ msg = (
140+ "Reloaded DNS configuration; previous failure (retry)")
141+ elif len(reasons) == 1:
142 msg = "Reloaded DNS configuration; %s" % reasons[0]
143 else:
144 msg = 'Reloaded DNS configuration: \n' + '\n'.join(
145@@ -286,6 +297,16 @@ class RegionControllerService(Service):
146 self.previousSerial = serial
147 return d
148
149+ def _onDNSReloadFailure(self, failure):
150+ """Force kill and restart bind9."""
151+ failure.trap(DNSReloadError)
152+ if not self.retryOnFailure:
153+ return failure
154+ log.err(failure, "Failed configuring DNS; killing and restarting")
155+ d = service_monitor.killService('bind9')
156+ d.addErrback(log.err, "Failed to kill and restart DNS.")
157+ return d
158+
159 @transactional
160 def _getReloadReasons(self, previousSerial, currentSerial):
161 return [
162diff --git a/src/maasserver/service_monitor.py b/src/maasserver/service_monitor.py
163index 92baa43..c902f86 100644
164--- a/src/maasserver/service_monitor.py
165+++ b/src/maasserver/service_monitor.py
166@@ -30,6 +30,9 @@ class BIND9Service(AlwaysOnService):
167 service_name = "bind9"
168 snap_service_name = "bind9"
169
170+ # Pass SIGKILL directly to parent.
171+ kill_extra_opts = ('-s', 'SIGKILL')
172+
173
174 class NTPServiceOnRegion(AlwaysOnService):
175 """Monitored NTP service on a region controller host."""
176diff --git a/src/maasserver/tests/test_region_controller.py b/src/maasserver/tests/test_region_controller.py
177index 2c109db..b00fb2d 100644
178--- a/src/maasserver/tests/test_region_controller.py
179+++ b/src/maasserver/tests/test_region_controller.py
180@@ -212,7 +212,7 @@ class TestRegionControllerService(MAASServerTestCase):
181 service = self.make_service(sentinel.listener)
182 service.needsDNSUpdate = True
183 dns_result = (
184- random.randint(1, 1000), [
185+ random.randint(1, 1000), True, [
186 factory.make_name('domain')
187 for _ in range(3)
188 ])
189@@ -234,6 +234,46 @@ class TestRegionControllerService(MAASServerTestCase):
190
191 @wait_for_reactor
192 @inlineCallbacks
193+ def test_process_zones_kills_bind_on_failed_reload(self):
194+ service = self.make_service(sentinel.listener)
195+ service.needsDNSUpdate = True
196+ service.retryOnFailure = True
197+ dns_result_0 = (
198+ random.randint(1, 1000), False, [
199+ factory.make_name('domain')
200+ for _ in range(3)
201+ ])
202+ dns_result_1 = (dns_result_0[0], True, dns_result_0[2])
203+ mock_dns_update_all_zones = self.patch(
204+ region_controller, "dns_update_all_zones")
205+ mock_dns_update_all_zones.side_effect = [dns_result_0, dns_result_1]
206+
207+ service._checkSerialCalled = False
208+ orig_checkSerial = service._checkSerial
209+
210+ def _checkSerial(result):
211+ if service._checkSerialCalled:
212+ return dns_result_1
213+ service._checkSerialCalled = True
214+ return orig_checkSerial(result)
215+
216+ mock_check_serial = self.patch(service, "_checkSerial")
217+ mock_check_serial.side_effect = _checkSerial
218+ mock_killService = self.patch(
219+ region_controller.service_monitor, 'killService')
220+ mock_killService.return_value = succeed(None)
221+ service.startProcessing()
222+ yield service.processingDefer
223+ self.assertThat(
224+ mock_dns_update_all_zones, MockCallsMatch(call(), call()))
225+ self.assertThat(
226+ mock_check_serial,
227+ MockCallsMatch(call(dns_result_0), call(dns_result_1)))
228+ self.assertThat(
229+ mock_killService, MockCalledOnceWith("bind9"))
230+
231+ @wait_for_reactor
232+ @inlineCallbacks
233 def test_process_updates_proxy(self):
234 service = self.make_service(sentinel.listener)
235 service.needsProxyUpdate = True
236@@ -348,7 +388,7 @@ class TestRegionControllerService(MAASServerTestCase):
237 service.needsProxyUpdate = True
238 service.needsRBACUpdate = True
239 dns_result = (
240- random.randint(1, 1000), [
241+ random.randint(1, 1000), True, [
242 factory.make_name('domain')
243 for _ in range(3)
244 ])
245@@ -407,7 +447,7 @@ class TestRegionControllerService(MAASServerTestCase):
246 succeed(([self.make_soa_result(result_serial)], [], [])),
247 ]
248 # Error should not be raised.
249- return service._checkSerial((formatted_serial, dns_names))
250+ return service._checkSerial((formatted_serial, True, dns_names))
251
252 @wait_for_reactor
253 @inlineCallbacks
254@@ -425,7 +465,7 @@ class TestRegionControllerService(MAASServerTestCase):
255 mock_lookup.side_effect = lambda *args: succeed(([], [], []))
256 # Error should not be raised.
257 with ExpectedException(DNSReloadError):
258- yield service._checkSerial((formatted_serial, dns_names))
259+ yield service._checkSerial((formatted_serial, True, dns_names))
260
261 @wait_for_reactor
262 @inlineCallbacks
263@@ -443,7 +483,7 @@ class TestRegionControllerService(MAASServerTestCase):
264 mock_lookup.side_effect = ValueError()
265 # Error should not be raised.
266 with ExpectedException(DNSReloadError):
267- yield service._checkSerial((formatted_serial, dns_names))
268+ yield service._checkSerial((formatted_serial, True, dns_names))
269
270 @wait_for_reactor
271 @inlineCallbacks
272@@ -461,7 +501,7 @@ class TestRegionControllerService(MAASServerTestCase):
273 mock_lookup.side_effect = TimeoutError()
274 # Error should not be raised.
275 with ExpectedException(DNSReloadError):
276- yield service._checkSerial((formatted_serial, dns_names))
277+ yield service._checkSerial((formatted_serial, True, dns_names))
278
279 def test__getRBACClient_returns_None_when_no_url(self):
280 service = self.make_service(sentinel.listener)
281@@ -565,7 +605,7 @@ class TestRegionControllerServiceTransactional(MAASTransactionServerTestCase):
282 service.needsDNSUpdate = True
283 service.previousSerial = publications[0].serial
284 dns_result = (
285- publications[-1].serial, [
286+ publications[-1].serial, True, [
287 factory.make_name('domain')
288 for _ in range(3)
289 ])
290@@ -602,7 +642,7 @@ class TestRegionControllerServiceTransactional(MAASTransactionServerTestCase):
291 service.needsDNSUpdate = True
292 service.previousSerial = publications[0].serial
293 dns_result = (
294- publications[-1].serial, [
295+ publications[-1].serial, True, [
296 factory.make_name('domain')
297 for _ in range(3)
298 ])
299diff --git a/src/provisioningserver/dns/actions.py b/src/provisioningserver/dns/actions.py
300index a955023..d8439f8 100644
301--- a/src/provisioningserver/dns/actions.py
302+++ b/src/provisioningserver/dns/actions.py
303@@ -13,7 +13,10 @@ __all__ = [
304 ]
305
306 import collections
307-from subprocess import CalledProcessError
308+from subprocess import (
309+ CalledProcessError,
310+ TimeoutExpired,
311+)
312 from time import sleep
313
314 from provisioningserver.dns.config import (
315@@ -48,7 +51,7 @@ def bind_reconfigure():
316 raise
317
318
319-def bind_reload():
320+def bind_reload(timeout=2):
321 """Ask BIND to reload its configuration and all zone files. This operation
322 is 'best effort' (with logging) as the server may not be running, and there
323 is often no context for reporting.
324@@ -56,21 +59,24 @@ def bind_reload():
325 :return: True if success, False otherwise.
326 """
327 try:
328- execute_rndc_command(("reload",))
329+ execute_rndc_command(("reload",), timeout=timeout)
330 return True
331 except CalledProcessError as exc:
332 maaslog.error("Reloading BIND failed (is it running?): %s", exc)
333 return False
334+ except TimeoutExpired as exc:
335+ maaslog.error("Reloading BIND timed out (is it locked?): %s", exc)
336+ return False
337
338
339-def bind_reload_with_retries(attempts=10, interval=2):
340+def bind_reload_with_retries(attempts=10, interval=2, timeout=2):
341 """Ask BIND to reload its configuration and all zone files.
342
343 :param attempts: The number of attempts.
344 :param interval: The time in seconds to sleep between each attempt.
345 """
346 for countdown in range(attempts - 1, -1, -1):
347- if bind_reload():
348+ if bind_reload(timeout=timeout):
349 break
350 if countdown == 0:
351 break
352diff --git a/src/provisioningserver/dns/config.py b/src/provisioningserver/dns/config.py
353index 715b36e..f196902 100644
354--- a/src/provisioningserver/dns/config.py
355+++ b/src/provisioningserver/dns/config.py
356@@ -177,12 +177,12 @@ def set_up_rndc():
357 f.write(named_content)
358
359
360-def execute_rndc_command(arguments):
361+def execute_rndc_command(arguments, timeout=None):
362 """Execute a rndc command."""
363 rndc_conf = get_rndc_conf_path()
364 rndc_cmd = ['rndc', '-c', rndc_conf]
365 rndc_cmd.extend(arguments)
366- call_and_check(rndc_cmd)
367+ call_and_check(rndc_cmd, timeout=timeout)
368
369
370 def set_up_options_conf(overwrite=True, **kwargs):
371diff --git a/src/provisioningserver/dns/tests/test_actions.py b/src/provisioningserver/dns/tests/test_actions.py
372index 472bb56..58d216e 100644
373--- a/src/provisioningserver/dns/tests/test_actions.py
374+++ b/src/provisioningserver/dns/tests/test_actions.py
375@@ -78,7 +78,7 @@ class TestReload(MAASTestCase):
376 actions.bind_reload()
377 self.assertThat(
378 actions.execute_rndc_command,
379- MockCalledOnceWith(("reload",)))
380+ MockCalledOnceWith(("reload",), timeout=2))
381
382 def test__logs_subprocess_error(self):
383 erc = self.patch_autospec(actions, "execute_rndc_command")
384@@ -105,7 +105,7 @@ class TestReloadWithRetries(MAASTestCase):
385 bind_reload.return_value = False
386 attempts = randint(3, 13)
387 actions.bind_reload_with_retries(attempts=attempts)
388- expected_calls = [call()] * attempts
389+ expected_calls = [call(timeout=2)] * attempts
390 self.assertThat(
391 actions.bind_reload,
392 MockCallsMatch(*expected_calls))
393@@ -114,11 +114,11 @@ class TestReloadWithRetries(MAASTestCase):
394 self.patch_autospec(actions, "sleep") # Disable.
395 bind_reload = self.patch(actions, "bind_reload")
396 bind_reload_return_values = [False, False, True, ]
397- bind_reload.side_effect = lambda: (
398+ bind_reload.side_effect = lambda *args, **kwargs: (
399 bind_reload_return_values.pop(0))
400
401 actions.bind_reload_with_retries(attempts=5)
402- expected_calls = [call(), call(), call()]
403+ expected_calls = [call(timeout=2), call(timeout=2), call(timeout=2)]
404 self.assertThat(
405 actions.bind_reload,
406 MockCallsMatch(*expected_calls))
407diff --git a/src/provisioningserver/dns/tests/test_config.py b/src/provisioningserver/dns/tests/test_config.py
408index f569394..902ab12 100644
409--- a/src/provisioningserver/dns/tests/test_config.py
410+++ b/src/provisioningserver/dns/tests/test_config.py
411@@ -9,7 +9,10 @@ import errno
412 import os.path
413 import random
414 from textwrap import dedent
415-from unittest.mock import Mock
416+from unittest.mock import (
417+ Mock,
418+ sentinel,
419+)
420
421 from fixtures import EnvironmentVariable
422 from maastesting.factory import factory
423@@ -278,10 +281,11 @@ class TestRNDCUtilities(MAASTestCase):
424 fake_dir = patch_dns_config_path(self)
425 self.patch(config, 'call_and_check', recorder)
426 command = factory.make_string()
427- execute_rndc_command([command])
428+ execute_rndc_command([command], timeout=sentinel.timeout)
429 rndc_conf_path = os.path.join(fake_dir, MAAS_RNDC_CONF_NAME)
430 expected_command = ['rndc', '-c', rndc_conf_path, command]
431 self.assertEqual((expected_command,), recorder.calls[0][0])
432+ self.assertEqual({'timeout': sentinel.timeout}, recorder.calls[0][1])
433
434 def test_extract_suggested_named_conf_extracts_section(self):
435 named_part = factory.make_string()
436diff --git a/src/provisioningserver/service_monitor.py b/src/provisioningserver/service_monitor.py
437index 75caab2..701fe85 100644
438--- a/src/provisioningserver/service_monitor.py
439+++ b/src/provisioningserver/service_monitor.py
440@@ -50,6 +50,9 @@ class DNSServiceOnRack(ToggleableService):
441 service_name = "bind9"
442 snap_service_name = "bind9"
443
444+ # Pass SIGKILL directly to parent.
445+ kill_extra_opts = ('-s', 'SIGKILL')
446+
447
448 class ProxyServiceOnRack(ToggleableService):
449 """Monitored proxy service on a rack controller host."""
450diff --git a/src/provisioningserver/utils/service_monitor.py b/src/provisioningserver/utils/service_monitor.py
451index a224632..2959679 100644
452--- a/src/provisioningserver/utils/service_monitor.py
453+++ b/src/provisioningserver/utils/service_monitor.py
454@@ -413,6 +413,26 @@ class ServiceMonitor:
455 raise ServiceActionError(error_msg)
456 yield self._performServiceAction(service, "reload")
457
458+ @asynchronous
459+ @inlineCallbacks
460+ def killService(self, name):
461+ """Kill service.
462+
463+ Service will be killed then its state will be ensured to be in its
464+ expected state. This is a way of getting a broken related service to be
465+ responsive.
466+ """
467+ service = self.getServiceByName(name)
468+ try:
469+ yield self._performServiceAction(service, "kill")
470+ except ServiceActionError:
471+ # Kill action is allowed to fail, as its possible the service is
472+ # already dead or not responding to correct signals to check
473+ # the current status.
474+ pass
475+ state = yield self.ensureService(name)
476+ return state
477+
478 @inlineCallbacks
479 def _execCmd(self, cmd, env, timeout=8, retries=3):
480 """Execute the `cmd` with the `env`."""
481@@ -468,14 +488,46 @@ class ServiceMonitor:
482 return self._execCmd(cmd, env)
483
484 @asynchronous
485- def _execSupervisorServiceAction(self, service_name, action):
486+ def _execSupervisorServiceAction(
487+ self, service_name, action, extra_opts=None):
488 """Perform the action with the run-supervisorctl command.
489
490 :return: tuple (exit code, std-output, std-error)
491 """
492 env = get_env_with_bytes_locale()
493 cmd = os.path.join(snappy.get_snap_path(), "bin", "run-supervisorctl")
494- cmd = (cmd, action, service_name)
495+
496+ # supervisord doesn't support native kill like systemd. Emulate this
497+ # behaviour by getting the PID of the process and then killing the PID.
498+ if action == 'kill':
499+
500+ def _kill_pid(result):
501+ exit_code, stdout, _ = result
502+ if exit_code != 0:
503+ return result
504+ try:
505+ pid = int(stdout.strip())
506+ except ValueError:
507+ pid = 0
508+ if pid == 0:
509+ # supervisorctl returns 0 when the process is already dead
510+ # or we where not able to get the actual pid. Nothing to
511+ # do, as its already dead.
512+ return 0, "", ""
513+ cmd = ('kill',)
514+ if extra_opts:
515+ cmd += extra_opts
516+ cmd += ('%s' % pid,)
517+ return self._execCmd(cmd, env)
518+
519+ d = self._execCmd((cmd, 'pid', service_name), env)
520+ d.addCallback(_kill_pid)
521+ return d
522+
523+ cmd = (cmd, action)
524+ if extra_opts is not None:
525+ cmd += extra_opts
526+ cmd += (service_name,)
527 return self._execCmd(cmd, env)
528
529 @inlineCallbacks
530@@ -488,8 +540,9 @@ class ServiceMonitor:
531 else:
532 exec_action = self._execSystemDServiceAction
533 service_name = service.service_name
534+ extra_opts = getattr(service, '%s_extra_opts' % action, None)
535 exit_code, output, error = yield lock.run(
536- exec_action, service_name, action)
537+ exec_action, service_name, action, extra_opts=extra_opts)
538 if exit_code != 0:
539 error_msg = (
540 "Service '%s' failed to %s: %s" % (
541diff --git a/src/provisioningserver/utils/shell.py b/src/provisioningserver/utils/shell.py
542index 0ec7c39..488afb6 100644
543--- a/src/provisioningserver/utils/shell.py
544+++ b/src/provisioningserver/utils/shell.py
545@@ -105,8 +105,9 @@ def call_and_check(command, *args, **kwargs):
546 :return: The command's output from standard output.
547 :raise ExternalProcessError: If the command returns nonzero.
548 """
549+ timeout = kwargs.pop('timeout', None)
550 process = Popen(command, *args, stdout=PIPE, stderr=PIPE, **kwargs)
551- stdout, stderr = process.communicate()
552+ stdout, stderr = process.communicate(timeout=timeout)
553 stderr = stderr.strip()
554 if process.returncode != 0:
555 raise ExternalProcessError(process.returncode, command, output=stderr)
556diff --git a/src/provisioningserver/utils/tests/test_service_monitor.py b/src/provisioningserver/utils/tests/test_service_monitor.py
557index eb984ed..11e71da 100644
558--- a/src/provisioningserver/utils/tests/test_service_monitor.py
559+++ b/src/provisioningserver/utils/tests/test_service_monitor.py
560@@ -10,6 +10,7 @@ import os
561 import random
562 from textwrap import dedent
563 from unittest.mock import (
564+ call,
565 Mock,
566 sentinel,
567 )
568@@ -18,6 +19,7 @@ from fixtures import FakeLogger
569 from maastesting.factory import factory
570 from maastesting.matchers import (
571 MockCalledOnceWith,
572+ MockCallsMatch,
573 MockNotCalled,
574 )
575 from maastesting.runtest import MAASTwistedRunTest
576@@ -50,6 +52,7 @@ from twisted.internet import reactor
577 from twisted.internet.defer import (
578 CancelledError,
579 DeferredLock,
580+ fail,
581 inlineCallbacks,
582 succeed,
583 )
584@@ -452,6 +455,42 @@ class TestServiceMonitor(MAASTestCase):
585 yield service_monitor.reloadService(fake_service.name, if_on=True)
586
587 @inlineCallbacks
588+ def test__killService_performs_kill_then_ensureService(self):
589+ fake_service = make_fake_service(SERVICE_STATE.ON)
590+ service_monitor = self.make_service_monitor([fake_service])
591+ mock_performServiceAction = self.patch(
592+ service_monitor, "_performServiceAction")
593+ mock_performServiceAction.return_value = succeed(None)
594+ mock_ensureService = self.patch(service_monitor, "ensureService")
595+ mock_ensureService.return_value = succeed(
596+ ServiceState(SERVICE_STATE.ON, "running"))
597+ yield service_monitor.killService(fake_service.name)
598+ self.assertThat(
599+ mock_performServiceAction,
600+ MockCalledOnceWith(fake_service, "kill"))
601+ self.assertThat(
602+ mock_ensureService,
603+ MockCalledOnceWith(fake_service.name))
604+
605+ @inlineCallbacks
606+ def test__killService_doesnt_fail_on_ServiceActionError(self):
607+ fake_service = make_fake_service(SERVICE_STATE.ON)
608+ service_monitor = self.make_service_monitor([fake_service])
609+ mock_performServiceAction = self.patch(
610+ service_monitor, "_performServiceAction")
611+ mock_performServiceAction.return_value = fail(ServiceActionError())
612+ mock_ensureService = self.patch(service_monitor, "ensureService")
613+ mock_ensureService.return_value = succeed(
614+ ServiceState(SERVICE_STATE.ON, "running"))
615+ yield service_monitor.killService(fake_service.name)
616+ self.assertThat(
617+ mock_performServiceAction,
618+ MockCalledOnceWith(fake_service, "kill"))
619+ self.assertThat(
620+ mock_ensureService,
621+ MockCalledOnceWith(fake_service.name))
622+
623+ @inlineCallbacks
624 def test___execCmd_times_out(self):
625 monitor = ServiceMonitor(make_fake_service())
626 with ExpectedException(ServiceActionError):
627@@ -535,16 +574,44 @@ class TestServiceMonitor(MAASTestCase):
628 mock_getProcessOutputAndValue = self.patch(
629 service_monitor_module, "getProcessOutputAndValue")
630 mock_getProcessOutputAndValue.return_value = succeed((b"", b"", 0))
631+ extra_opts = ('--extra', factory.make_name('extra'))
632 yield service_monitor._execSupervisorServiceAction(
633- service_name, action)
634+ service_name, action, extra_opts=extra_opts)
635 cmd = os.path.join(snap_path, 'bin', 'run-supervisorctl')
636- cmd = cmd, action, service_name
637+ cmd = (cmd, action) + extra_opts + (service_name,)
638 self.assertThat(
639 mock_getProcessOutputAndValue, MockCalledOnceWith(
640 # The environment contains LC_ALL and LANG too.
641 cmd[0], cmd[1:], env=get_env_with_bytes_locale()))
642
643 @inlineCallbacks
644+ def test___execSupervisorServiceAction_emulates_kill(self):
645+ snap_path = factory.make_name("path")
646+ self.patch(snappy, "get_snap_path").return_value = snap_path
647+ service_monitor = self.make_service_monitor()
648+ service_name = factory.make_name("service")
649+ fake_pid = random.randint(1, 100)
650+ mock_getProcessOutputAndValue = self.patch(
651+ service_monitor_module, "getProcessOutputAndValue")
652+ mock_getProcessOutputAndValue.side_effect = [
653+ succeed((("%s" % fake_pid).encode('utf-8'), b"", 0)),
654+ succeed((b"", b"", 0)),
655+ ]
656+ extra_opts = ('-s', factory.make_name('SIGKILL'))
657+ yield service_monitor._execSupervisorServiceAction(
658+ service_name, 'kill', extra_opts=extra_opts)
659+ cmd = os.path.join(snap_path, 'bin', 'run-supervisorctl')
660+ self.assertThat(
661+ mock_getProcessOutputAndValue, MockCallsMatch(
662+ call(
663+ cmd, ("pid", service_name),
664+ env=get_env_with_bytes_locale()),
665+ call(
666+ "kill", extra_opts + ("%s" % fake_pid,),
667+ env=get_env_with_bytes_locale()),
668+ ))
669+
670+ @inlineCallbacks
671 def test___execSupervisorServiceAction_decodes_stdout_and_stderr(self):
672 # From https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-demo.txt.
673 example_text = (
674@@ -579,13 +646,16 @@ class TestServiceMonitor(MAASTestCase):
675 service_monitor, "_execSystemDServiceAction")
676 mock_execSystemDServiceAction.return_value = (0, "", "")
677 action = factory.make_name("action")
678+ extra_opts = ('--option', factory.make_name("option"))
679+ setattr(service, '%s_extra_opts' % action, extra_opts)
680 yield service_monitor._performServiceAction(service, action)
681 self.assertThat(service_lock.run, MockCalledOnceWith(
682 service_monitor._execSystemDServiceAction,
683- service.service_name, action))
684+ service.service_name, action, extra_opts=extra_opts))
685 self.assertThat(
686 mock_execSystemDServiceAction,
687- MockCalledOnceWith(service.service_name, action))
688+ MockCalledOnceWith(
689+ service.service_name, action, extra_opts=extra_opts))
690
691 @inlineCallbacks
692 def test___performServiceAction_holds_lock_perform_supervisor_action(self):
693@@ -599,13 +669,16 @@ class TestServiceMonitor(MAASTestCase):
694 service_monitor, "_execSupervisorServiceAction")
695 mock_execSupervisorServiceAction.return_value = (0, "", "")
696 action = factory.make_name("action")
697+ extra_opts = ('--option', factory.make_name("option"))
698+ setattr(service, '%s_extra_opts' % action, extra_opts)
699 yield service_monitor._performServiceAction(service, action)
700 self.assertThat(service_lock.run, MockCalledOnceWith(
701 service_monitor._execSupervisorServiceAction,
702- service.service_name, action))
703+ service.service_name, action, extra_opts=extra_opts))
704 self.assertThat(
705 mock_execSupervisorServiceAction,
706- MockCalledOnceWith(service.service_name, action))
707+ MockCalledOnceWith(
708+ service.service_name, action, extra_opts=extra_opts))
709
710 @inlineCallbacks
711 def test___performServiceAction_raises_ServiceActionError_if_fails(self):
712diff --git a/src/provisioningserver/utils/tests/test_shell.py b/src/provisioningserver/utils/tests/test_shell.py
713index 4492b3a..03f67fd 100644
714--- a/src/provisioningserver/utils/tests/test_shell.py
715+++ b/src/provisioningserver/utils/tests/test_shell.py
716@@ -6,6 +6,7 @@
717 __all__ = []
718
719 import os
720+import random
721 from subprocess import CalledProcessError
722
723 from fixtures import EnvironmentVariable
724@@ -54,6 +55,14 @@ class TestCallAndCheck(MAASTestCase):
725 self.assertEqual(command, error.cmd)
726 self.assertEqual(message, error.output)
727
728+ def test__passes_timeout_to_communicate(self):
729+ command = factory.make_name('command')
730+ process = self.patch_popen()
731+ timeout = random.randint(1, 10)
732+ call_and_check(command, timeout=timeout)
733+ self.assertThat(
734+ process.communicate, MockCalledOnceWith(timeout=timeout))
735+
736 def test__reports_stderr_on_failure(self):
737 nonfile = os.path.join(self.make_dir(), factory.make_name('nonesuch'))
738 error = self.assertRaises(

Subscribers

People subscribed via source and target branches