1
=== modified file 'bzrplugins/lpserve/__init__.py'
2
--- bzrplugins/lpserve/__init__.py	2011-02-17 21:47:07 +0000
3
+++ bzrplugins/lpserve/__init__.py	2011-03-02 13:07:04 +0000
4
@@ -15,6 +15,7 @@
5
15
15
6
16
16
7
17
import errno
17
import errno
8
18
import fcntl
9
18
import logging
19
import logging
10
19
import os
20
import os
11
20
import resource
21
import resource
12
@@ -31,6 +32,7 @@
13
31
from bzrlib.option import Option
32
from bzrlib.option import Option
14
32
from bzrlib import (
33
from bzrlib import (
15
33
    commands,
34
    commands,
16
35
    errors,
17
34
    lockdir,
36
    lockdir,
18
35
    osutils,
37
    osutils,
19
36
    trace,
38
    trace,
20
@@ -309,6 +311,11 @@
21
309
    SLEEP_FOR_CHILDREN_TIMEOUT = 1.0
311
    SLEEP_FOR_CHILDREN_TIMEOUT = 1.0
22
310
    WAIT_FOR_REQUEST_TIMEOUT = 1.0 # No request should take longer than this to
312
    WAIT_FOR_REQUEST_TIMEOUT = 1.0 # No request should take longer than this to
23
311
                                   # be read
313
                                   # be read
24
314
    CHILD_CONNECT_TIMEOUT = 120   # If we get a fork() request, but nobody
25
315
                                  # connects just exit
26
316
                                  # On a heavily loaded server, it could take a
27
317
                                  # couple secs, but it should never take
28
318
                                  # minutes
29
312
319
30
313
    _fork_function = os.fork
320
    _fork_function = os.fork
31
314
321
32
@@ -324,6 +331,7 @@
33
324
        # Map from pid => (temp_path_for_handles, request_socket)
331
        # Map from pid => (temp_path_for_handles, request_socket)
34
325
        self._child_processes = {}
332
        self._child_processes = {}
35
326
        self._children_spawned = 0
333
        self._children_spawned = 0
36
334
        self._child_connect_timeout = self.CHILD_CONNECT_TIMEOUT
37
327
335
38
328
    def _create_master_socket(self):
336
    def _create_master_socket(self):
39
329
        self._server_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
337
        self._server_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
40
@@ -372,28 +380,94 @@
41
372
        signal.signal(signal.SIGCHLD, signal.SIG_DFL)
380
        signal.signal(signal.SIGCHLD, signal.SIG_DFL)
42
373
        signal.signal(signal.SIGTERM, signal.SIG_DFL)
381
        signal.signal(signal.SIGTERM, signal.SIG_DFL)
43
374
382
44
383
    def _compute_paths(self, base_path):
45
384
        stdin_path = os.path.join(base_path, 'stdin')
46
385
        stdout_path = os.path.join(base_path, 'stdout')
47
386
        stderr_path = os.path.join(base_path, 'stderr')
48
387
        return (stdin_path, stdout_path, stderr_path)
49
388
50
375
    def _create_child_file_descriptors(self, base_path):
389
    def _create_child_file_descriptors(self, base_path):
54
376
        stdin_path = os.path.join(base_path, 'stdin')
390
        stdin_path, stdout_path, stderr_path = self._compute_paths(base_path)
52
377
        stdout_path = os.path.join(base_path, 'stdout')
53
378
        stderr_path = os.path.join(base_path, 'stderr')
55
379
        os.mkfifo(stdin_path)
391
        os.mkfifo(stdin_path)
56
380
        os.mkfifo(stdout_path)
392
        os.mkfifo(stdout_path)
57
381
        os.mkfifo(stderr_path)
393
        os.mkfifo(stderr_path)
58
382
394
63
383
    def _bind_child_file_descriptors(self, base_path):
395
    def _set_blocking(self, fd):
64
384
        stdin_path = os.path.join(base_path, 'stdin')
396
        """Change the file descriptor to unset the O_NONBLOCK flag."""
65
385
        stdout_path = os.path.join(base_path, 'stdout')
397
        flags = fcntl.fcntl(fd, fcntl.F_GETFD)
66
386
        stderr_path = os.path.join(base_path, 'stderr')
398
        flags = flags & (~os.O_NONBLOCK)
67
399
        fcntl.fcntl(fd, fcntl.F_SETFD, flags)
68
400
69
401
    def _get_child_connect_timeout(self):
70
402
        """signal.alarm only supports 1s granularity.
71
403
72
404
        We have to make sure we don't ever send 0, which would not generate an
73
405
        alarm.
74
406
        """
75
407
        timeout = int(self._child_connect_timeout)
76
408
        if timeout <= 0:
77
409
            timeout = 1
78
410
        return timeout
79
411
80
412
    def _open_handles(self, base_path):
81
413
        """Open the given file handles.
82
414
83
415
        This will attempt to open all of these file handles, but will not block
84
416
        while opening them, timing out after self._child_connect_timeout
85
417
        seconds.
86
418
87
419
        :param base_path: The directory where all FIFOs are located
88
420
        :return: (stdin_fid, stdout_fid, stderr_fid)
89
421
        """
90
422
        stdin_path, stdout_path, stderr_path = self._compute_paths(base_path)
91
387
        # These open calls will block until another process connects (which
423
        # These open calls will block until another process connects (which
92
388
        # must connect in the same order)
424
        # must connect in the same order)
96
389
        stdin_fid = os.open(stdin_path, os.O_RDONLY)
425
        fids = []
97
390
        stdout_fid = os.open(stdout_path, os.O_WRONLY)
426
        to_open = [(stdin_path, os.O_RDONLY), (stdout_path, os.O_WRONLY),
98
391
        stderr_fid = os.open(stderr_path, os.O_WRONLY)
427
                   (stderr_path, os.O_WRONLY)]
99
428
        signal.alarm(self._get_child_connect_timeout())
100
429
        tstart = time.time()
101
430
        for path, flags in to_open:
102
431
            try:
103
432
                fids.append(os.open(path, flags))
104
433
            except OSError, e:
105
434
                if e.errno == errno.EINTR:
106
435
                    error = ('After %.3fs we failed to open %s, exiting'
107
436
                             % (time.time() - tstart, path,))
108
437
                    trace.warning(error)
109
438
                    for fid in fids:
110
439
                        try:
111
440
                            os.close(fid)
112
441
                        except OSError:
113
442
                            pass
114
443
                    self._cleanup_fifos(base_path)
115
444
                    raise errors.BzrError(error)
116
445
                raise
117
446
        # If we get to here, that means all the handles were opened
118
447
        # successfully, so cancel the wakeup call.
119
448
        signal.alarm(0)
120
449
        return fids
121
450
122
451
    def _cleanup_fifos(self, base_path):
123
452
        """Remove the FIFO objects and directory from disk."""
124
453
        stdin_path, stdout_path, stderr_path = self._compute_paths(base_path)
125
454
        # Now that we've opened the handles, delete everything so that we don't
126
455
        # leave garbage around. Because the open() is done in blocking mode, we
127
456
        # know that someone has already connected to them, and we don't want
128
457
        # anyone else getting confused and connecting.
129
458
        # See [Decision #5]
130
459
        os.remove(stdin_path)
131
460
        os.remove(stdout_path)
132
461
        os.remove(stderr_path)
133
462
        os.rmdir(base_path)
134
463
135
464
    def _bind_child_file_descriptors(self, base_path):
136
392
        # Note: by this point bzrlib has opened stderr for logging
465
        # Note: by this point bzrlib has opened stderr for logging
137
393
        #       (as part of starting the service process in the first place).
466
        #       (as part of starting the service process in the first place).
138
394
        #       As such, it has a stream handler that writes to stderr. logging
467
        #       As such, it has a stream handler that writes to stderr. logging
139
395
        #       tries to flush and close that, but the file is already closed.
468
        #       tries to flush and close that, but the file is already closed.
140
396
        #       This just supresses that exception
469
        #       This just supresses that exception
141
470
        stdin_fid, stdout_fid, stderr_fid = self._open_handles(base_path)
142
397
        logging.raiseExceptions = False
471
        logging.raiseExceptions = False
143
398
        sys.stdin.close()
472
        sys.stdin.close()
144
399
        sys.stdout.close()
473
        sys.stdout.close()
145
@@ -407,15 +481,7 @@
146
407
        ui.ui_factory.stdin = sys.stdin
481
        ui.ui_factory.stdin = sys.stdin
147
408
        ui.ui_factory.stdout = sys.stdout
482
        ui.ui_factory.stdout = sys.stdout
148
409
        ui.ui_factory.stderr = sys.stderr
483
        ui.ui_factory.stderr = sys.stderr
158
410
        # Now that we've opened the handles, delete everything so that we don't
484
        self._cleanup_fifos(base_path)
150
411
        # leave garbage around. Because the open() is done in blocking mode, we
151
412
        # know that someone has already connected to them, and we don't want
152
413
        # anyone else getting confused and connecting.
153
414
        # See [Decision #5]
154
415
        os.remove(stderr_path)
155
416
        os.remove(stdout_path)
156
417
        os.remove(stdin_path)
157
418
        os.rmdir(base_path)
159
419
485
160
420
    def _close_child_file_descriptors(self):
486
    def _close_child_file_descriptors(self):
161
421
        sys.stdin.close()
487
        sys.stdin.close()
162
@@ -724,6 +790,15 @@
163
724
            self._should_terminate.set()
790
            self._should_terminate.set()
164
725
            conn.sendall('ok\nquit command requested... exiting\n')
791
            conn.sendall('ok\nquit command requested... exiting\n')
165
726
            conn.close()
792
            conn.close()
166
793
        elif request.startswith('child_connect_timeout '):
167
794
            try:
168
795
                value = int(request.split(' ', 1)[1])
169
796
            except ValueError, e:
170
797
                conn.sendall('FAILURE: %r\n' % (e,))
171
798
            else:
172
799
                self._child_connect_timeout = value
173
800
                conn.sendall('ok\n')
174
801
            conn.close()
175
727
        elif request.startswith('fork ') or request.startswith('fork-env '):
802
        elif request.startswith('fork ') or request.startswith('fork-env '):
176
728
            command_argv, env = self._parse_fork_request(conn, client_addr,
803
            command_argv, env = self._parse_fork_request(conn, client_addr,
177
729
                                                         request)
804
                                                         request)
178
730
805
179
=== modified file 'bzrplugins/lpserve/test_lpserve.py'
180
--- bzrplugins/lpserve/test_lpserve.py	2010-11-08 22:43:58 +0000
181
+++ bzrplugins/lpserve/test_lpserve.py	2011-03-02 13:07:04 +0000
182
@@ -3,6 +3,7 @@
183
3
3
184
4
import errno
4
import errno
185
5
import os
5
import os
186
6
import shutil
187
6
import signal
7
import signal
188
7
import socket
8
import socket
189
8
import subprocess
9
import subprocess
190
@@ -13,6 +14,7 @@
191
13
from testtools import content
14
from testtools import content
192
14
15
193
15
from bzrlib import (
16
from bzrlib import (
194
17
    errors,
195
16
    osutils,
18
    osutils,
196
17
    tests,
19
    tests,
197
18
    trace,
20
    trace,
198
@@ -263,6 +265,46 @@
199
263
                                                one_byte_at_a_time=True)
265
                                                one_byte_at_a_time=True)
200
264
        self.assertStartsWith(response, 'FAILURE\n')
266
        self.assertStartsWith(response, 'FAILURE\n')
201
265
267
202
268
    def test_child_connection_timeout(self):
203
269
        self.assertEqual(self.service.CHILD_CONNECT_TIMEOUT,
204
270
                         self.service._child_connect_timeout)
205
271
        response = self.send_message_to_service('child_connect_timeout 1\n')
206
272
        self.assertEqual('ok\n', response)
207
273
        self.assertEqual(1, self.service._child_connect_timeout)
208
274
209
275
    def test_child_connection_timeout_bad_float(self):
210
276
        self.assertEqual(self.service.CHILD_CONNECT_TIMEOUT,
211
277
                         self.service._child_connect_timeout)
212
278
        response = self.send_message_to_service('child_connect_timeout 1.2\n')
213
279
        self.assertStartsWith(response, 'FAILURE:')
214
280
215
281
    def test_child_connection_timeout_no_val(self):
216
282
        response = self.send_message_to_service('child_connect_timeout \n')
217
283
        self.assertStartsWith(response, 'FAILURE:')
218
284
219
285
    def test_child_connection_timeout_bad_val(self):
220
286
        response = self.send_message_to_service('child_connect_timeout b\n')
221
287
        self.assertStartsWith(response, 'FAILURE:')
222
288
223
289
    def test__open_handles_will_timeout(self):
224
290
        # signal.alarm() has only 1-second granularity. :(
225
291
        self.service._child_connect_timeout = 1
226
292
        tempdir = tempfile.mkdtemp(prefix='testlpserve-')
227
293
        self.addCleanup(shutil.rmtree, tempdir, ignore_errors=True)
228
294
        os.mkfifo(os.path.join(tempdir, 'stdin'))
229
295
        os.mkfifo(os.path.join(tempdir, 'stdout'))
230
296
        os.mkfifo(os.path.join(tempdir, 'stderr'))
231
297
        def noop_on_alarm(signal, frame):
232
298
            return
233
299
        signal.signal(signal.SIGALRM, noop_on_alarm)
234
300
        self.addCleanup(signal.signal, signal.SIGALRM, signal.SIG_DFL)
235
301
        e = self.assertRaises(errors.BzrError,
236
302
            self.service._open_handles, tempdir)
237
303
        self.assertContainsRe(str(e), r'After \d+.\d+s we failed to open.*')
238
304
        # Even though it timed out, we still cleanup the temp dir
239
305
        self.assertFalse(os.path.exists(tempdir))
240
306
241
307
242
266
308
243
267
class TestCaseWithSubprocess(tests.TestCaseWithTransport):
309
class TestCaseWithSubprocess(tests.TestCaseWithTransport):
244
268
    """Override the bzr start_bzr_subprocess command.
310
    """Override the bzr start_bzr_subprocess command.
245
@@ -452,9 +494,9 @@
246
452
        stderr_path = os.path.join(path, 'stderr')
494
        stderr_path = os.path.join(path, 'stderr')
247
453
        # The ordering must match the ordering of the service or we get a
495
        # The ordering must match the ordering of the service or we get a
248
454
        # deadlock.
496
        # deadlock.
252
455
        child_stdin = open(stdin_path, 'wb')
497
        child_stdin = open(stdin_path, 'wb', 0)
253
456
        child_stdout = open(stdout_path, 'rb')
498
        child_stdout = open(stdout_path, 'rb', 0)
254
457
        child_stderr = open(stderr_path, 'rb')
499
        child_stderr = open(stderr_path, 'rb', 0)
255
458
        return child_stdin, child_stdout, child_stderr
500
        return child_stdin, child_stdout, child_stderr
256
459
501
257
460
    def communicate_with_fork(self, path, stdin=None):
502
    def communicate_with_fork(self, path, stdin=None):
258
@@ -484,6 +526,24 @@
259
484
        self.assertEqual('', stderr_content)
526
        self.assertEqual('', stderr_content)
260
485
        self.assertReturnCode(0, sock)
527
        self.assertReturnCode(0, sock)
261
486
528
262
529
    def DONT_test_fork_lp_serve_multiple_hello(self):
263
530
        # This ensures that the fifos are all set to blocking mode
264
531
        # We can't actually run this test, because by default 'bzr serve
265
532
        # --inet' does not flush after each message. So we end up blocking
266
533
        # forever waiting for the server to finish responding to the first
267
534
        # request.
268
535
        path, _, sock = self.send_fork_request('lp-serve --inet 2')
269
536
        child_stdin, child_stdout, child_stderr = self._get_fork_handles(path)
270
537
        child_stdin.write('hello\n')
271
538
        child_stdin.flush()
272
539
        self.assertEqual('ok\x012\n', child_stdout.read())
273
540
        child_stdin.write('hello\n')
274
541
        self.assertEqual('ok\x012\n', child_stdout.read())
275
542
        child_stdin.close()
276
543
        self.assertEqual('', child_stderr.read())
277
544
        child_stdout.close()
278
545
        child_stderr.close()
279
546
280
487
    def test_fork_replay(self):
547
    def test_fork_replay(self):
281
488
        path, _, sock = self.send_fork_request('launchpad-replay')
548
        path, _, sock = self.send_fork_request('launchpad-replay')
282
489
        stdout_content, stderr_content = self.communicate_with_fork(path,
549
        stdout_content, stderr_content = self.communicate_with_fork(path,
283
@@ -540,6 +600,29 @@
284
540
    def test_sigint_exits_nicely(self):
600
    def test_sigint_exits_nicely(self):
285
541
        self._check_exits_nicely(signal.SIGINT)
601
        self._check_exits_nicely(signal.SIGINT)
286
542
602
287
603
    def test_child_exits_eventually(self):
288
604
        # We won't ever bind to the socket the child wants, and after some
289
605
        # time, the child should exit cleanly.
290
606
        # First, tell the subprocess that we want children to exit quickly.
291
607
        # *sigh* signal.alarm only has 1s resolution, so this test is slow.
292
608
        response = self.send_message_to_service('child_connect_timeout 1\n')
293
609
        self.assertEqual('ok\n', response)
294
610
        # Now request a fork
295
611
        path, pid, sock = self.send_fork_request('rocks')
296
612
        # # Open one handle, but not all of them
297
613
        stdin_path = os.path.join(path, 'stdin')
298
614
        stdout_path = os.path.join(path, 'stdout')
299
615
        stderr_path = os.path.join(path, 'stderr')
300
616
        child_stdin = open(stdin_path, 'wb')
301
617
        # We started opening the child, but stop before we get all handles
302
618
        # open. After 1 second, the child should get signaled and die.
303
619
        # The master process should notice, and tell us the status of the
304
620
        # exited child.
305
621
        val = sock.recv(4096)
306
622
        self.assertEqual('exited\n%s\n' % (signal.SIGALRM,), val)
307
623
        # The master process should clean up after the now deceased child.
308
624
        self.failIfExists(path)
309
625
310
543
626
311
544
class TestCaseWithLPForkingServiceDaemon(
627
class TestCaseWithLPForkingServiceDaemon(
312
545
    TestCaseWithLPForkingServiceSubprocess):
628
    TestCaseWithLPForkingServiceSubprocess):
Status:	Superseded
Proposed branch:	lp:~jameinel/launchpad/lp-serve-child-hangup
Merge into:	lp:launchpad
Prerequisite:	lp:~jameinel/launchpad/lp-forking-serve-cleaner-childre
Diff against target:	312 lines (+180/-22) 2 files modified bzrplugins/lpserve/__init__.py (+94/-19) bzrplugins/lpserve/test_lpserve.py (+86/-3)
To merge this branch:	bzr merge lp:~jameinel/launchpad/lp-serve-child-hangup
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
Launchpad code reviewers		2011-02-16	Pending
Review via email: mp+50055@code.launchpad.net