Merge lp:~mwhudson/lava-scheduler/do-not-drop-monitor-output into lp:lava-scheduler

Proposed by Michael Hudson-Doyle
Status: Merged
Merged at revision: 209
Proposed branch: lp:~mwhudson/lava-scheduler/do-not-drop-monitor-output
Merge into: lp:lava-scheduler
Diff against target: 94 lines (+27/-13)
2 files modified
fake-dispatcher (+1/-8)
lava_scheduler_daemon/board.py (+26/-5)
To merge this branch: bzr merge lp:~mwhudson/lava-scheduler/do-not-drop-monitor-output
Reviewer Review Type Date Requested Status
Andy Doan (community) Approve
Review via email: mp+121982@code.launchpad.net

Description of the change

I'm cautiously optimistic that this branch will help debugging bug 1043059.

To post a comment you must log in.
Revision history for this message
Andy Doan (doanac) wrote :

this change would have helped me debug a similar type problem in the past, so I'm +1

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'fake-dispatcher'
2--- fake-dispatcher 2012-03-27 21:30:51 +0000
3+++ fake-dispatcher 2012-08-30 04:00:24 +0000
4@@ -11,15 +11,8 @@
5
6 echo starting processing $1
7 echo error >&2
8-for i in `seq 100`; do
9+for i in `seq 10`; do
10 echo p $i
11-cat $1
12-echo
13-done
14-for i in `seq 300`; do
15 sleep 1
16-echo $i
17-cat $1
18-echo
19 done
20 echo dashboard-put-result: http://disney.com >&3
21
22=== modified file 'lava_scheduler_daemon/board.py'
23--- lava_scheduler_daemon/board.py 2012-07-10 22:45:19 +0000
24+++ lava_scheduler_daemon/board.py 2012-08-30 04:00:24 +0000
25@@ -5,7 +5,7 @@
26 import tempfile
27 import logging
28
29-from twisted.internet.error import ProcessExitedAlready
30+from twisted.internet.error import ProcessDone, ProcessExitedAlready
31 from twisted.internet.protocol import ProcessProtocol
32 from twisted.internet import defer, task
33 from twisted.protocols.basic import LineReceiver
34@@ -64,7 +64,13 @@
35 self.job.cancel("exceeded log size limit")
36 self.log_file.flush()
37
38+ def processExited(self, reason):
39+ self.logger.info("processExited for %s: %s",
40+ self.job.board_name, reason.value)
41+
42 def processEnded(self, reason):
43+ self.logger.info("processEnded for %s: %s",
44+ self.job.board_name, reason.value)
45 self.log_file.close()
46 self.deferred.callback(reason.value.exitCode)
47
48@@ -170,10 +176,23 @@
49 lambda r:exit_code)
50
51
52-class SimplePP(ProcessProtocol):
53- def __init__(self, d):
54+class SchedulerMonitorPP(ProcessProtocol):
55+
56+ def __init__(self, d, board_name):
57 self.d = d
58+ self.board_name = board_name
59+ self.logger = logging.getLogger(__name__ + '.SchedulerMonitorPP')
60+
61+ def childDataReceived(self, childFD, data):
62+ self.logger.warning(
63+ "scheduler monitor for %s produced output: %r on fd %s",
64+ self.board_name, data, childFD)
65+
66 def processEnded(self, reason):
67+ if not reason.check(ProcessDone):
68+ self.logger.error(
69+ "scheduler monitor for %s crashed: %s",
70+ self.board_name, reason)
71 self.d.callback(None)
72
73
74@@ -199,6 +218,7 @@
75 with os.fdopen(fd, 'wb') as f:
76 json.dump(json_data, f)
77
78+ childFDs = {0:0, 1:1, 2:2}
79 if self.use_celery:
80 args = [
81 'setsid', 'lava', 'celery-schedulermonitor',
82@@ -210,10 +230,11 @@
83 '-l', self.daemon_options['LOG_LEVEL']]
84 if self.daemon_options['LOG_FILE_PATH']:
85 args.extend(['-f', self.daemon_options['LOG_FILE_PATH']])
86+ childFDs = None
87 self.logger.info('executing "%s"', ' '.join(args))
88 self.reactor.spawnProcess(
89- SimplePP(d), 'setsid', childFDs={0:0, 1:1, 2:2},
90- env=None, args=args)
91+ SchedulerMonitorPP(d, self.board_name), 'setsid',
92+ childFDs=childFDs, env=None, args=args)
93 d.addBoth(self._exited)
94 return d
95

Subscribers

People subscribed via source and target branches