Merge lp:~codehelp/lava-dispatcher/multinode into lp:lava-dispatcher/multinode

Proposed by Neil Williams
Status: Merged
Approved by: Senthil Kumaran S
Approved revision: 649
Merged at revision: 644
Proposed branch: lp:~codehelp/lava-dispatcher/multinode
Merge into: lp:lava-dispatcher/multinode
Diff against target: 348 lines (+52/-35)
7 files modified
lava/dispatcher/group.py (+15/-15)
lava/dispatcher/node.py (+11/-13)
lava_dispatcher/actions/lava_test_shell.py (+1/-1)
lava_dispatcher/job.py (+10/-0)
lava_dispatcher/signals/__init__.py (+2/-1)
lava_test_shell/multi_node/lava-multi-node.lib (+11/-4)
lava_test_shell/multi_node/lava-sync (+2/-1)
To merge this branch: bzr merge lp:~codehelp/lava-dispatcher/multinode
Reviewer Review Type Date Requested Status
Senthil Kumaran S Approve
Review via email: mp+171517@code.launchpad.net

Description of the change

Add debug to investigate problems with lava_sync not picking up the message over the serial connection.

To post a comment you must log in.
649. By Neil Williams

Fix missing handdown of the context to the test shell.

Revision history for this message
Senthil Kumaran S (stylesen) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lava/dispatcher/group.py'
2--- lava/dispatcher/group.py 2013-06-25 17:35:08 +0000
3+++ lava/dispatcher/group.py 2013-06-26 12:50:53 +0000
4@@ -51,25 +51,23 @@
5 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
6 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
7 try:
8- print "binding"
9+ logging.info("binding")
10 s.bind(('localhost', self.group_port))
11 break
12 except socket.error as e:
13- print "delay=%d msg=%s" % (self.delay, e.message)
14+ logging.warn("Unable to bind, trying again with delay=%d msg=%s" % (self.delay, e.message))
15 time.sleep(self.delay)
16 self.delay *= 2
17 s.listen(1)
18- print "listening"
19 self.running = True
20 while self.running:
21- print "trying to accept"
22+ logging.info("waiting to accept new connections")
23 self.conn, addr = s.accept()
24- print "Connected", addr
25+# logging.info("Connected", addr)
26 data = str(self.conn.recv(self.blocksize))
27 try:
28 json_data = json.loads(data)
29 except ValueError:
30- print "data", data
31 logging.warn("JSON error for %s" % data)
32 self.conn.close()
33 continue
34@@ -144,7 +142,7 @@
35 logging.error("Unable to find messageID %s for client %s" % (messageID, client_name))
36 self._badRequest()
37 return
38- logging.info("Sending message %s to %s" % (messageID, client_name))
39+ logging.info("Sending message '%s' to %s in group %s" % (messageID, client_name, self.group['group']))
40 self.conn.send(json.dumps({"response": "ack", "message": self.group['messages'][client_name][messageID]}))
41 self.conn.close()
42 del self.group['messages'][client_name][messageID]
43@@ -181,13 +179,13 @@
44 Global synchronization primitive. Sends a message and waits for the same
45 message from all of the other devices.
46 """
47- logging.info("GroupDispatcher:lavaSync %s from %s" %(json.dumps(json_data), client_name))
48+ logging.debug("GroupDispatcher:lavaSync %s from %s in group %s" %(json.dumps(json_data), client_name, self.group['group']))
49 messageID = self._getMessageID(json_data)
50 message = self._getMessage(json_data)
51 # FIXME: in _sendMessage, be sure to send the messageID if message is empty
52 if not message:
53 message = messageID
54- logging.info("LavaSync request for %s at stage %s" % (client_name, messageID))
55+ logging.info("LavaSync request for '%s' at stage '%s' in group '%s'" % (client_name, messageID, self.group['group']))
56 self.group['syncs'].setdefault(messageID, {})
57 self.group['messages'].setdefault(client_name, {}).setdefault(messageID, {})
58 if len(self.group['syncs'][messageID]) >= self.group['count']:
59@@ -196,8 +194,8 @@
60 # mark this client as having picked up the message
61 self.group['syncs'][messageID][client_name] = 0
62 else:
63- logging.info("waiting: not all clients seen yet %d < %d" %
64- (len(self.group['syncs'][messageID]), self.group['count']))
65+ logging.info("waiting: not all clients in group '%s' have been seen yet %d < %d" %
66+ (self.group['group'], len(self.group['syncs'][messageID]), self.group['count']))
67 self.group['messages'][client_name][messageID] = message
68 self.group['syncs'][messageID][client_name] = 1
69 self._waitResponse()
70@@ -209,7 +207,7 @@
71 if self.group['syncs'][messageID][pending]:
72 clear_syncs = False
73 if clear_syncs:
74- logging.debug("Clearing all sync messages for %s" % messageID)
75+ logging.debug("Clearing all sync messages for '%s' in group '%s'" % (messageID, self.group['group']))
76 self.group['syncs'][messageID].clear()
77
78 def lavaWaitAll(self, json_data, client_name):
79@@ -250,7 +248,7 @@
80 If lava_wait is called first, the message will be sent when the client reconnects
81 """
82 message = self._getMessage(json_data)
83- logging.info("lavaSend handler in GroupDispatcher received a message: %s" % message)
84+ logging.info("lavaSend handler in GroupDispatcher received a message '%s' for group '%s'" % (message, self.group['group']))
85 messageID = self._getMessageID(json_data)
86 for client in self.group['clients']:
87 if messageID not in self.group['messages'][client]:
88@@ -277,7 +275,8 @@
89 if request == 'group_data':
90 self._setGroupData(json_data)
91 elif request == "lava_sync":
92- logging.info("lava_sync: %s request made by %s" % (json.dumps(json_data), client_name))
93+ logging.debug("lava_sync: %s request made by '%s' in group '%s'" %
94+ (json.dumps(json_data), client_name, self.group['group']))
95 self.lavaSync(json_data, client_name)
96 elif request == 'lava_wait_all':
97 self.lavaWaitAll(json_data, client_name)
98@@ -287,7 +286,8 @@
99 logging.info("lava_send: %s" % json_data)
100 self.lavaSend(json_data)
101 elif request == "complete":
102- logging.info("dispatcher for '%s' communication complete, closing." % client_name)
103+ logging.info("dispatcher communication for '%s' in group '%s' is complete, closing." %
104+ (client_name, self.group['group']))
105 self.conn.close()
106 else:
107 self._badRequest()
108
109=== modified file 'lava/dispatcher/node.py'
110--- lava/dispatcher/node.py 2013-06-25 17:35:23 +0000
111+++ lava/dispatcher/node.py 2013-06-26 12:50:53 +0000
112@@ -52,11 +52,11 @@
113 blocks = 1024
114
115 def __init__(self, data_str):
116- print "Poller init passed json_data", data_str
117+ logging.debug("Poller init passed json_data: %s" % data_str)
118 try:
119 self.json_data = json.loads(data_str)
120 except ValueError:
121- print "bad JSON"
122+ logging.error("bad JSON")
123 exit(1)
124 if 'port' in self.json_data:
125 self.port = self.json_data['port']
126@@ -64,27 +64,27 @@
127 self.blocks = self.json_data['blocksize']
128
129 def poll(self, msg_str):
130- print "polling %s" % json.dumps(self.json_data)
131+ logging.debug("polling %s" % json.dumps(self.json_data))
132 self.polling = True
133 while self.polling:
134 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
135 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
136 try:
137- print "host:%s port:%s" % (self.json_data['host'], self.json_data['port'])
138+ logging.debug("socket created for host:%s port:%s" % (self.json_data['host'], self.json_data['port']))
139 s.connect(('localhost', self.json_data['port']))
140 self.delay = 1
141 except socket.error as e:
142- print "socket error on connect", e.errno
143+ logging.warn("socket error on connect: %d" % e.errno)
144 time.sleep(self.delay)
145 self.delay += 2
146 s.close()
147 continue
148- print "msg %s" % msg_str
149+ logging.debug("read message: %s" % msg_str)
150 # blocking synchronous call
151 try:
152 s.send(msg_str)
153 except socket.error as e:
154- print "socket error on send", e.errno
155+ logging.warn("socket error '%d' on send" % e.errno)
156 # self.s.shutdown(socket.SHUT_RDWR)
157 s.close()
158 continue
159@@ -92,7 +92,7 @@
160 try:
161 self.response = s.recv(self.blocks)
162 except socket.error as e:
163- print "socket error on response", e.errno
164+ logging.warn("socket error '%d' on response" % e.errno)
165 # self.s.shutdown(socket.SHUT_RDWR)
166 s.close()
167 continue
168@@ -105,7 +105,7 @@
169 logging.error("response was not JSON %s" % self.response)
170 break
171 if json_data['response'] != 'wait':
172- logging.info(json_data['response'])
173+ logging.info("Response: %s" % json_data['response'])
174 self.polling = False
175 break
176 else:
177@@ -166,13 +166,13 @@
178 def run(self):
179 init_msg = {"request": "group_data", "group_size": self.group_size}
180 init_msg.update(self.base_msg)
181- print "NodeDispatcher: init_msg", json.dumps(init_msg)
182+ logging.debug("init_msg %s" % json.dumps(init_msg))
183 self.poller.poll(json.dumps(init_msg))
184 self.run_tests(self.json_data)
185
186 def __call__(self, args):
187 try:
188- logging.info("transport handler for NodeDispatcher %s" % args)
189+ logging.debug("transport handler for NodeDispatcher %s" % args)
190 self._select(json.loads(args))
191 except KeyError:
192 logging.warn("Unable to use callable send in NodeDispatcher")
193@@ -186,7 +186,6 @@
194 return
195 if json_data['request'] == "lava_sync":
196 logging.info("requesting lava_sync")
197- # FIXME: pointless redirection unles request_sync does more work eventually.
198 self.request_sync(json_data['messageID'])
199 elif json_data['request'] == 'lava_wait':
200 logging.info("requesting lava_wait")
201@@ -276,7 +275,6 @@
202 # pass this NodeDispatcher down so that the lava_test_shell can __call__ nodeTransport to write a message
203 # job.run(self.dispatcher)
204 job.run(self)
205- logging.info("job.run has returned")
206
207 def writeMessage(self):
208 """
209
210=== modified file 'lava_dispatcher/actions/lava_test_shell.py'
211--- lava_dispatcher/actions/lava_test_shell.py 2013-06-25 10:27:04 +0000
212+++ lava_dispatcher/actions/lava_test_shell.py 2013-06-26 12:50:53 +0000
213@@ -525,7 +525,7 @@
214 logging.debug("Received signal <%s>" % name)
215 params = params.split()
216 try:
217- signal_director.signal(name, params)
218+ signal_director.signal(name, params, self.context)
219 except:
220 logging.exception("on_signal failed")
221 runner._connection.sendline('echo LAVA_ACK')
222
223=== modified file 'lava_dispatcher/job.py'
224--- lava_dispatcher/job.py 2013-06-25 17:35:23 +0000
225+++ lava_dispatcher/job.py 2013-06-26 12:50:53 +0000
226@@ -248,6 +248,7 @@
227 status = 'fail'
228 action.run(**params)
229 except ADBConnectError as err:
230+ logging.info("ADBConnectError")
231 if cmd.get('command') == 'boot_linaro_android_image':
232 logging.warning(('[ACTION-E] %s failed to create the'
233 ' adb connection') % (cmd['command']))
234@@ -266,6 +267,7 @@
235 ## mark it as pass if the second boot works
236 status = 'pass'
237 except TimeoutError as err:
238+ logging.info("TimeoutError")
239 if cmd.get('command').startswith('lava_android_test'):
240 logging.warning("[ACTION-E] %s times out." % (
241 cmd['command']))
242@@ -285,15 +287,23 @@
243 self.context.client.proc.sendline("")
244 time.sleep(5)
245 self.context.client.boot_linaro_android_image()
246+ else:
247+ logging.warn("Unhandled timeout condition")
248+ continue
249 except CriticalError as err:
250+ logging.info("CriticalError")
251 raise
252 except (pexpect.TIMEOUT, GeneralError) as err:
253+ logging.warn("pexpect timed out, pass with status %s" % status)
254 pass
255 except Exception as err:
256+ logging.info("General Exception")
257 raise
258 else:
259+ logging.info("setting status pass")
260 status = 'pass'
261 finally:
262+ logging.info("finally status %s" % status)
263 err_msg = ""
264 if status == 'fail':
265 # XXX mwhudson, 2013-01-17: I have no idea what this
266
267=== modified file 'lava_dispatcher/signals/__init__.py'
268--- lava_dispatcher/signals/__init__.py 2013-06-26 11:22:13 +0000
269+++ lava_dispatcher/signals/__init__.py 2013-06-26 12:50:53 +0000
270@@ -177,7 +177,8 @@
271 self.context.transport(json.dumps(msg))
272 target = self.client.target_device
273 with target.runner() as runner:
274- runner._connection.sendline("<LAVA_SYNC_COMPLETE>")
275+ ret = runner._connection.sendline("<LAVA_SYNC_COMPLETE>")
276+ logging.info("runner._connection.sendline wrote %d bytes" % ret)
277
278 def _on_WAIT(self, message_id, message):
279 logging.debug("Handling signal <LAVA_WAIT %s>" % message_id)
280
281=== modified file 'lava_test_shell/multi_node/lava-multi-node.lib'
282--- lava_test_shell/multi_node/lava-multi-node.lib 2013-06-26 07:50:19 +0000
283+++ lava_test_shell/multi_node/lava-multi-node.lib 2013-06-26 12:50:53 +0000
284@@ -20,14 +20,15 @@
285 }
286
287 _lava_multi_node_send () {
288-
289+echo "<_LAVA_SEND started>"
290 result=$(echo $1 | grep "..*=..*")
291+echo "<_LAVA_SEND result $result>"
292 if [ -n "$1" -a "${result}x" == "x" ] ; then
293 echo ${MESSAGE_HEAD} $@">"
294 else
295 exit 1
296 fi
297-
298+echo "<_LAVA_SEND finished>"
299 }
300
301 _lava_multi_node_print_message () {
302@@ -41,9 +42,9 @@
303 }
304
305 lava_multi_node_send () {
306-
307+echo "<LAVA_SEND preparing>"
308 _lava_multi_node_send $@
309-
310+echo "<LAVA_SEND waiting for ack>"
311 while [ -n "$MESSAGE_NEED_ACK" ] ; do
312 read -t $MESSAGE_TIMEOUT line
313 result=$(echo $line | grep "${MESSAGE_ACK}")
314@@ -53,9 +54,13 @@
315 fi
316 _lava_multi_node_send $@
317 done
318+echo "<LAVA_SEND finished>"
319+date
320 }
321
322 lava_multi_node_wait_for_signal () {
323+date
324+echo "<LAVA_DEBUG starting to wait>"
325 while read line; do
326 result=$(echo $line | grep "${MESSAGE_REPLY}>")
327 if [ "${result}x" != "x" ] ; then
328@@ -65,6 +70,8 @@
329 break
330 fi
331 done
332+echo "<LAVA_DEBUG waiting over>"
333+date
334 }
335
336 lava_multi_node_wait_for_message () {
337
338=== modified file 'lava_test_shell/multi_node/lava-sync'
339--- lava_test_shell/multi_node/lava-sync 2013-06-21 13:23:41 +0000
340+++ lava_test_shell/multi_node/lava-sync 2013-06-26 12:50:53 +0000
341@@ -16,5 +16,6 @@
342 source $LAVA_TEST_BIN/lava-multi-node.lib
343
344 lava_multi_node_send $1
345-
346+echo "<LAVA_SYNC_DEBUG send complete>"
347 lava_multi_node_wait_for_signal
348+echo "<LAVA_SYNC_DEBUG wait complete>"

Subscribers

People subscribed via source and target branches

to status/vote changes: