Merge lp:~codehelp/lava-dispatcher/multinode into lp:lava-dispatcher/multinode
- multinode
- Merge into multinode
Proposed by
Neil Williams
Status: | Merged |
---|---|
Approved by: | Senthil Kumaran S |
Approved revision: | 649 |
Merged at revision: | 644 |
Proposed branch: | lp:~codehelp/lava-dispatcher/multinode |
Merge into: | lp:lava-dispatcher/multinode |
Diff against target: |
348 lines (+52/-35) 7 files modified
lava/dispatcher/group.py (+15/-15) lava/dispatcher/node.py (+11/-13) lava_dispatcher/actions/lava_test_shell.py (+1/-1) lava_dispatcher/job.py (+10/-0) lava_dispatcher/signals/__init__.py (+2/-1) lava_test_shell/multi_node/lava-multi-node.lib (+11/-4) lava_test_shell/multi_node/lava-sync (+2/-1) |
To merge this branch: | bzr merge lp:~codehelp/lava-dispatcher/multinode |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Senthil Kumaran S | Approve | ||
Review via email: mp+171517@code.launchpad.net |
Commit message
Description of the change
Add debug to investigate problems with lava_sync not picking up the message over the serial connection.
To post a comment you must log in.
- 649. By Neil Williams
-
Fix missing handdown of the context to the test shell.
Revision history for this message
Senthil Kumaran S (stylesen) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lava/dispatcher/group.py' |
2 | --- lava/dispatcher/group.py 2013-06-25 17:35:08 +0000 |
3 | +++ lava/dispatcher/group.py 2013-06-26 12:50:53 +0000 |
4 | @@ -51,25 +51,23 @@ |
5 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
6 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) |
7 | try: |
8 | - print "binding" |
9 | + logging.info("binding") |
10 | s.bind(('localhost', self.group_port)) |
11 | break |
12 | except socket.error as e: |
13 | - print "delay=%d msg=%s" % (self.delay, e.message) |
14 | + logging.warn("Unable to bind, trying again with delay=%d msg=%s" % (self.delay, e.message)) |
15 | time.sleep(self.delay) |
16 | self.delay *= 2 |
17 | s.listen(1) |
18 | - print "listening" |
19 | self.running = True |
20 | while self.running: |
21 | - print "trying to accept" |
22 | + logging.info("waiting to accept new connections") |
23 | self.conn, addr = s.accept() |
24 | - print "Connected", addr |
25 | +# logging.info("Connected", addr) |
26 | data = str(self.conn.recv(self.blocksize)) |
27 | try: |
28 | json_data = json.loads(data) |
29 | except ValueError: |
30 | - print "data", data |
31 | logging.warn("JSON error for %s" % data) |
32 | self.conn.close() |
33 | continue |
34 | @@ -144,7 +142,7 @@ |
35 | logging.error("Unable to find messageID %s for client %s" % (messageID, client_name)) |
36 | self._badRequest() |
37 | return |
38 | - logging.info("Sending message %s to %s" % (messageID, client_name)) |
39 | + logging.info("Sending message '%s' to %s in group %s" % (messageID, client_name, self.group['group'])) |
40 | self.conn.send(json.dumps({"response": "ack", "message": self.group['messages'][client_name][messageID]})) |
41 | self.conn.close() |
42 | del self.group['messages'][client_name][messageID] |
43 | @@ -181,13 +179,13 @@ |
44 | Global synchronization primitive. Sends a message and waits for the same |
45 | message from all of the other devices. |
46 | """ |
47 | - logging.info("GroupDispatcher:lavaSync %s from %s" %(json.dumps(json_data), client_name)) |
48 | + logging.debug("GroupDispatcher:lavaSync %s from %s in group %s" %(json.dumps(json_data), client_name, self.group['group'])) |
49 | messageID = self._getMessageID(json_data) |
50 | message = self._getMessage(json_data) |
51 | # FIXME: in _sendMessage, be sure to send the messageID if message is empty |
52 | if not message: |
53 | message = messageID |
54 | - logging.info("LavaSync request for %s at stage %s" % (client_name, messageID)) |
55 | + logging.info("LavaSync request for '%s' at stage '%s' in group '%s'" % (client_name, messageID, self.group['group'])) |
56 | self.group['syncs'].setdefault(messageID, {}) |
57 | self.group['messages'].setdefault(client_name, {}).setdefault(messageID, {}) |
58 | if len(self.group['syncs'][messageID]) >= self.group['count']: |
59 | @@ -196,8 +194,8 @@ |
60 | # mark this client as having picked up the message |
61 | self.group['syncs'][messageID][client_name] = 0 |
62 | else: |
63 | - logging.info("waiting: not all clients seen yet %d < %d" % |
64 | - (len(self.group['syncs'][messageID]), self.group['count'])) |
65 | + logging.info("waiting: not all clients in group '%s' have been seen yet %d < %d" % |
66 | + (self.group['group'], len(self.group['syncs'][messageID]), self.group['count'])) |
67 | self.group['messages'][client_name][messageID] = message |
68 | self.group['syncs'][messageID][client_name] = 1 |
69 | self._waitResponse() |
70 | @@ -209,7 +207,7 @@ |
71 | if self.group['syncs'][messageID][pending]: |
72 | clear_syncs = False |
73 | if clear_syncs: |
74 | - logging.debug("Clearing all sync messages for %s" % messageID) |
75 | + logging.debug("Clearing all sync messages for '%s' in group '%s'" % (messageID, self.group['group'])) |
76 | self.group['syncs'][messageID].clear() |
77 | |
78 | def lavaWaitAll(self, json_data, client_name): |
79 | @@ -250,7 +248,7 @@ |
80 | If lava_wait is called first, the message will be sent when the client reconnects |
81 | """ |
82 | message = self._getMessage(json_data) |
83 | - logging.info("lavaSend handler in GroupDispatcher received a message: %s" % message) |
84 | + logging.info("lavaSend handler in GroupDispatcher received a message '%s' for group '%s'" % (message, self.group['group'])) |
85 | messageID = self._getMessageID(json_data) |
86 | for client in self.group['clients']: |
87 | if messageID not in self.group['messages'][client]: |
88 | @@ -277,7 +275,8 @@ |
89 | if request == 'group_data': |
90 | self._setGroupData(json_data) |
91 | elif request == "lava_sync": |
92 | - logging.info("lava_sync: %s request made by %s" % (json.dumps(json_data), client_name)) |
93 | + logging.debug("lava_sync: %s request made by '%s' in group '%s'" % |
94 | + (json.dumps(json_data), client_name, self.group['group'])) |
95 | self.lavaSync(json_data, client_name) |
96 | elif request == 'lava_wait_all': |
97 | self.lavaWaitAll(json_data, client_name) |
98 | @@ -287,7 +286,8 @@ |
99 | logging.info("lava_send: %s" % json_data) |
100 | self.lavaSend(json_data) |
101 | elif request == "complete": |
102 | - logging.info("dispatcher for '%s' communication complete, closing." % client_name) |
103 | + logging.info("dispatcher communication for '%s' in group '%s' is complete, closing." % |
104 | + (client_name, self.group['group'])) |
105 | self.conn.close() |
106 | else: |
107 | self._badRequest() |
108 | |
109 | === modified file 'lava/dispatcher/node.py' |
110 | --- lava/dispatcher/node.py 2013-06-25 17:35:23 +0000 |
111 | +++ lava/dispatcher/node.py 2013-06-26 12:50:53 +0000 |
112 | @@ -52,11 +52,11 @@ |
113 | blocks = 1024 |
114 | |
115 | def __init__(self, data_str): |
116 | - print "Poller init passed json_data", data_str |
117 | + logging.debug("Poller init passed json_data: %s" % data_str) |
118 | try: |
119 | self.json_data = json.loads(data_str) |
120 | except ValueError: |
121 | - print "bad JSON" |
122 | + logging.error("bad JSON") |
123 | exit(1) |
124 | if 'port' in self.json_data: |
125 | self.port = self.json_data['port'] |
126 | @@ -64,27 +64,27 @@ |
127 | self.blocks = self.json_data['blocksize'] |
128 | |
129 | def poll(self, msg_str): |
130 | - print "polling %s" % json.dumps(self.json_data) |
131 | + logging.debug("polling %s" % json.dumps(self.json_data)) |
132 | self.polling = True |
133 | while self.polling: |
134 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
135 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) |
136 | try: |
137 | - print "host:%s port:%s" % (self.json_data['host'], self.json_data['port']) |
138 | + logging.debug("socket created for host:%s port:%s" % (self.json_data['host'], self.json_data['port'])) |
139 | s.connect(('localhost', self.json_data['port'])) |
140 | self.delay = 1 |
141 | except socket.error as e: |
142 | - print "socket error on connect", e.errno |
143 | + logging.warn("socket error on connect: %d" % e.errno) |
144 | time.sleep(self.delay) |
145 | self.delay += 2 |
146 | s.close() |
147 | continue |
148 | - print "msg %s" % msg_str |
149 | + logging.debug("read message: %s" % msg_str) |
150 | # blocking synchronous call |
151 | try: |
152 | s.send(msg_str) |
153 | except socket.error as e: |
154 | - print "socket error on send", e.errno |
155 | + logging.warn("socket error '%d' on send" % e.errno) |
156 | # self.s.shutdown(socket.SHUT_RDWR) |
157 | s.close() |
158 | continue |
159 | @@ -92,7 +92,7 @@ |
160 | try: |
161 | self.response = s.recv(self.blocks) |
162 | except socket.error as e: |
163 | - print "socket error on response", e.errno |
164 | + logging.warn("socket error '%d' on response" % e.errno) |
165 | # self.s.shutdown(socket.SHUT_RDWR) |
166 | s.close() |
167 | continue |
168 | @@ -105,7 +105,7 @@ |
169 | logging.error("response was not JSON %s" % self.response) |
170 | break |
171 | if json_data['response'] != 'wait': |
172 | - logging.info(json_data['response']) |
173 | + logging.info("Response: %s" % json_data['response']) |
174 | self.polling = False |
175 | break |
176 | else: |
177 | @@ -166,13 +166,13 @@ |
178 | def run(self): |
179 | init_msg = {"request": "group_data", "group_size": self.group_size} |
180 | init_msg.update(self.base_msg) |
181 | - print "NodeDispatcher: init_msg", json.dumps(init_msg) |
182 | + logging.debug("init_msg %s" % json.dumps(init_msg)) |
183 | self.poller.poll(json.dumps(init_msg)) |
184 | self.run_tests(self.json_data) |
185 | |
186 | def __call__(self, args): |
187 | try: |
188 | - logging.info("transport handler for NodeDispatcher %s" % args) |
189 | + logging.debug("transport handler for NodeDispatcher %s" % args) |
190 | self._select(json.loads(args)) |
191 | except KeyError: |
192 | logging.warn("Unable to use callable send in NodeDispatcher") |
193 | @@ -186,7 +186,6 @@ |
194 | return |
195 | if json_data['request'] == "lava_sync": |
196 | logging.info("requesting lava_sync") |
197 | - # FIXME: pointless redirection unles request_sync does more work eventually. |
198 | self.request_sync(json_data['messageID']) |
199 | elif json_data['request'] == 'lava_wait': |
200 | logging.info("requesting lava_wait") |
201 | @@ -276,7 +275,6 @@ |
202 | # pass this NodeDispatcher down so that the lava_test_shell can __call__ nodeTransport to write a message |
203 | # job.run(self.dispatcher) |
204 | job.run(self) |
205 | - logging.info("job.run has returned") |
206 | |
207 | def writeMessage(self): |
208 | """ |
209 | |
210 | === modified file 'lava_dispatcher/actions/lava_test_shell.py' |
211 | --- lava_dispatcher/actions/lava_test_shell.py 2013-06-25 10:27:04 +0000 |
212 | +++ lava_dispatcher/actions/lava_test_shell.py 2013-06-26 12:50:53 +0000 |
213 | @@ -525,7 +525,7 @@ |
214 | logging.debug("Received signal <%s>" % name) |
215 | params = params.split() |
216 | try: |
217 | - signal_director.signal(name, params) |
218 | + signal_director.signal(name, params, self.context) |
219 | except: |
220 | logging.exception("on_signal failed") |
221 | runner._connection.sendline('echo LAVA_ACK') |
222 | |
223 | === modified file 'lava_dispatcher/job.py' |
224 | --- lava_dispatcher/job.py 2013-06-25 17:35:23 +0000 |
225 | +++ lava_dispatcher/job.py 2013-06-26 12:50:53 +0000 |
226 | @@ -248,6 +248,7 @@ |
227 | status = 'fail' |
228 | action.run(**params) |
229 | except ADBConnectError as err: |
230 | + logging.info("ADBConnectError") |
231 | if cmd.get('command') == 'boot_linaro_android_image': |
232 | logging.warning(('[ACTION-E] %s failed to create the' |
233 | ' adb connection') % (cmd['command'])) |
234 | @@ -266,6 +267,7 @@ |
235 | ## mark it as pass if the second boot works |
236 | status = 'pass' |
237 | except TimeoutError as err: |
238 | + logging.info("TimeoutError") |
239 | if cmd.get('command').startswith('lava_android_test'): |
240 | logging.warning("[ACTION-E] %s times out." % ( |
241 | cmd['command'])) |
242 | @@ -285,15 +287,23 @@ |
243 | self.context.client.proc.sendline("") |
244 | time.sleep(5) |
245 | self.context.client.boot_linaro_android_image() |
246 | + else: |
247 | + logging.warn("Unhandled timeout condition") |
248 | + continue |
249 | except CriticalError as err: |
250 | + logging.info("CriticalError") |
251 | raise |
252 | except (pexpect.TIMEOUT, GeneralError) as err: |
253 | + logging.warn("pexpect timed out, pass with status %s" % status) |
254 | pass |
255 | except Exception as err: |
256 | + logging.info("General Exception") |
257 | raise |
258 | else: |
259 | + logging.info("setting status pass") |
260 | status = 'pass' |
261 | finally: |
262 | + logging.info("finally status %s" % status) |
263 | err_msg = "" |
264 | if status == 'fail': |
265 | # XXX mwhudson, 2013-01-17: I have no idea what this |
266 | |
267 | === modified file 'lava_dispatcher/signals/__init__.py' |
268 | --- lava_dispatcher/signals/__init__.py 2013-06-26 11:22:13 +0000 |
269 | +++ lava_dispatcher/signals/__init__.py 2013-06-26 12:50:53 +0000 |
270 | @@ -177,7 +177,8 @@ |
271 | self.context.transport(json.dumps(msg)) |
272 | target = self.client.target_device |
273 | with target.runner() as runner: |
274 | - runner._connection.sendline("<LAVA_SYNC_COMPLETE>") |
275 | + ret = runner._connection.sendline("<LAVA_SYNC_COMPLETE>") |
276 | + logging.info("runner._connection.sendline wrote %d bytes" % ret) |
277 | |
278 | def _on_WAIT(self, message_id, message): |
279 | logging.debug("Handling signal <LAVA_WAIT %s>" % message_id) |
280 | |
281 | === modified file 'lava_test_shell/multi_node/lava-multi-node.lib' |
282 | --- lava_test_shell/multi_node/lava-multi-node.lib 2013-06-26 07:50:19 +0000 |
283 | +++ lava_test_shell/multi_node/lava-multi-node.lib 2013-06-26 12:50:53 +0000 |
284 | @@ -20,14 +20,15 @@ |
285 | } |
286 | |
287 | _lava_multi_node_send () { |
288 | - |
289 | +echo "<_LAVA_SEND started>" |
290 | result=$(echo $1 | grep "..*=..*") |
291 | +echo "<_LAVA_SEND result $result>" |
292 | if [ -n "$1" -a "${result}x" == "x" ] ; then |
293 | echo ${MESSAGE_HEAD} $@">" |
294 | else |
295 | exit 1 |
296 | fi |
297 | - |
298 | +echo "<_LAVA_SEND finished>" |
299 | } |
300 | |
301 | _lava_multi_node_print_message () { |
302 | @@ -41,9 +42,9 @@ |
303 | } |
304 | |
305 | lava_multi_node_send () { |
306 | - |
307 | +echo "<LAVA_SEND preparing>" |
308 | _lava_multi_node_send $@ |
309 | - |
310 | +echo "<LAVA_SEND waiting for ack>" |
311 | while [ -n "$MESSAGE_NEED_ACK" ] ; do |
312 | read -t $MESSAGE_TIMEOUT line |
313 | result=$(echo $line | grep "${MESSAGE_ACK}") |
314 | @@ -53,9 +54,13 @@ |
315 | fi |
316 | _lava_multi_node_send $@ |
317 | done |
318 | +echo "<LAVA_SEND finished>" |
319 | +date |
320 | } |
321 | |
322 | lava_multi_node_wait_for_signal () { |
323 | +date |
324 | +echo "<LAVA_DEBUG starting to wait>" |
325 | while read line; do |
326 | result=$(echo $line | grep "${MESSAGE_REPLY}>") |
327 | if [ "${result}x" != "x" ] ; then |
328 | @@ -65,6 +70,8 @@ |
329 | break |
330 | fi |
331 | done |
332 | +echo "<LAVA_DEBUG waiting over>" |
333 | +date |
334 | } |
335 | |
336 | lava_multi_node_wait_for_message () { |
337 | |
338 | === modified file 'lava_test_shell/multi_node/lava-sync' |
339 | --- lava_test_shell/multi_node/lava-sync 2013-06-21 13:23:41 +0000 |
340 | +++ lava_test_shell/multi_node/lava-sync 2013-06-26 12:50:53 +0000 |
341 | @@ -16,5 +16,6 @@ |
342 | source $LAVA_TEST_BIN/lava-multi-node.lib |
343 | |
344 | lava_multi_node_send $1 |
345 | - |
346 | +echo "<LAVA_SYNC_DEBUG send complete>" |
347 | lava_multi_node_wait_for_signal |
348 | +echo "<LAVA_SYNC_DEBUG wait complete>" |