Merge lp:~cjwatson/launchpad/loggerhead-shutdown-race into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18749
Proposed branch: lp:~cjwatson/launchpad/loggerhead-shutdown-race
Merge into: lp:launchpad
Diff against target: 83 lines (+24/-12)
2 files modified
lib/lp/services/osutils.py (+13/-6)
scripts/stop-loggerhead.py (+11/-6)
To merge this branch: bzr merge lp:~cjwatson/launchpad/loggerhead-shutdown-race
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+352884@code.launchpad.net

Commit message

Fix stop-loggerhead to do a two-stage kill.

Description of the change

This avoids problems during deployments where stop-loggerhead exits before the old process has actually stopped.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/services/osutils.py'
2--- lib/lp/services/osutils.py 2018-06-06 12:46:56 +0000
3+++ lib/lp/services/osutils.py 2018-08-10 10:18:30 +0000
4@@ -110,13 +110,15 @@
5 raise
6
7
8-def two_stage_kill(pid, poll_interval=0.1, num_polls=50):
9+def two_stage_kill(pid, poll_interval=0.1, num_polls=50, get_status=True):
10 """Kill process 'pid' with SIGTERM. If it doesn't die, SIGKILL it.
11
12 :param pid: The pid of the process to kill.
13 :param poll_interval: The polling interval used to check if the
14 process is still around.
15 :param num_polls: The number of polls to do before doing a SIGKILL.
16+ :param get_status: If True, collect the process' exit status (which
17+ requires it to be a child of the process running this function).
18 """
19 # Kill the process.
20 _kill_may_race(pid, SIGTERM)
21@@ -124,11 +126,16 @@
22 # Poll until the process has ended.
23 for i in range(num_polls):
24 try:
25- # Reap the child process and get its return value. If it's not
26- # gone yet, continue.
27- new_pid, result = os.waitpid(pid, os.WNOHANG)
28- if new_pid:
29- return result
30+ if get_status:
31+ # Reap the child process and get its return value. If it's
32+ # not gone yet, continue.
33+ new_pid, result = os.waitpid(pid, os.WNOHANG)
34+ if new_pid:
35+ return result
36+ else:
37+ # If the process isn't gone yet, continue.
38+ if not process_exists(pid):
39+ return
40 time.sleep(poll_interval)
41 except OSError as e:
42 if e.errno in (errno.ESRCH, errno.ECHILD):
43
44=== modified file 'scripts/stop-loggerhead.py'
45--- scripts/stop-loggerhead.py 2018-06-06 12:46:56 +0000
46+++ scripts/stop-loggerhead.py 2018-08-10 10:18:30 +0000
47@@ -8,10 +8,12 @@
48 import _pythonpath
49
50 from optparse import OptionParser
51-import os
52-import signal
53 import sys
54
55+from lp.services.osutils import (
56+ process_exists,
57+ two_stage_kill,
58+ )
59 from lp.services.pidfile import get_pid
60
61
62@@ -20,9 +22,11 @@
63
64 pid = get_pid("codebrowse")
65
66-try:
67- os.kill(pid, 0)
68-except OSError as e:
69+if pid is None:
70+ # Already stopped.
71+ sys.exit(0)
72+
73+if not process_exists(pid):
74 print('Stale pid file; server is not running.')
75 sys.exit(1)
76
77@@ -30,4 +34,5 @@
78 print('Shutting down previous server @ pid %d.' % (pid,))
79 print()
80
81-os.kill(pid, signal.SIGTERM)
82+# A busy gunicorn can take a while to shut down.
83+two_stage_kill(pid, poll_interval=0.5, num_polls=120, get_status=False)