LAVA Dispatcher

Merge lp:~codehelp/lava-dispatcher/multinode into lp:lava-dispatcher/multinode

multinode
Merge into multinode

Proposed by Neil Williams on 2013-06-26

Status:	Merged
Approved by:	Senthil Kumaran S on 2013-06-26
Approved revision:	649
Merged at revision:	644
Proposed branch:	lp:~codehelp/lava-dispatcher/multinode
Merge into:	lp:lava-dispatcher/multinode
Diff against target:	348 lines (+52/-35) 7 files modified lava/dispatcher/group.py (+15/-15) lava/dispatcher/node.py (+11/-13) lava_dispatcher/actions/lava_test_shell.py (+1/-1) lava_dispatcher/job.py (+10/-0) lava_dispatcher/signals/__init__.py (+2/-1) lava_test_shell/multi_node/lava-multi-node.lib (+11/-4) lava_test_shell/multi_node/lava-sync (+2/-1)
To merge this branch:	bzr merge lp:~codehelp/lava-dispatcher/multinode
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
Senthil Kumaran S		2013-06-26	Approve on 2013-06-26
Review via email: mp+171517@code.launchpad.net

Description of the change

Add debug to investigate problems with lava_sync not picking up the message over the serial connection.

lp:~codehelp/lava-dispatcher/multinode updated on 2013-06-26

649. By Neil Williams on 2013-06-26: Fix missing handdown of the context to the test shell.

Revision history for this message

Senthil Kumaran S (stylesen) on 2013-06-26:

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Antonio Terceiro

Neil Williams

to status/vote changes:

Fu Wei

Senthil Kumaran S

1	=== modified file 'lava/dispatcher/group.py'
2	--- lava/dispatcher/group.py 2013-06-25 17:35:08 +0000
3	+++ lava/dispatcher/group.py 2013-06-26 12:50:53 +0000
4	@@ -51,25 +51,23 @@
5	s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
6	s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
7	try:
8	- print "binding"
9	+ logging.info("binding")
10	s.bind(('localhost', self.group_port))
11	break
12	except socket.error as e:
13	- print "delay=%d msg=%s" % (self.delay, e.message)
14	+ logging.warn("Unable to bind, trying again with delay=%d msg=%s" % (self.delay, e.message))
15	time.sleep(self.delay)
16	self.delay *= 2
17	s.listen(1)
18	- print "listening"
19	self.running = True
20	while self.running:
21	- print "trying to accept"
22	+ logging.info("waiting to accept new connections")
23	self.conn, addr = s.accept()
24	- print "Connected", addr
25	+# logging.info("Connected", addr)
26	data = str(self.conn.recv(self.blocksize))
27	try:
28	json_data = json.loads(data)
29	except ValueError:
30	- print "data", data
31	logging.warn("JSON error for %s" % data)
32	self.conn.close()
33	continue
34	@@ -144,7 +142,7 @@
35	logging.error("Unable to find messageID %s for client %s" % (messageID, client_name))
36	self._badRequest()
37	return
38	- logging.info("Sending message %s to %s" % (messageID, client_name))
39	+ logging.info("Sending message '%s' to %s in group %s" % (messageID, client_name, self.group['group']))
40	self.conn.send(json.dumps({"response": "ack", "message": self.group['messages'][client_name][messageID]}))
41	self.conn.close()
42	del self.group['messages'][client_name][messageID]
43	@@ -181,13 +179,13 @@
44	Global synchronization primitive. Sends a message and waits for the same
45	message from all of the other devices.
46	"""
47	- logging.info("GroupDispatcher:lavaSync %s from %s" %(json.dumps(json_data), client_name))
48	+ logging.debug("GroupDispatcher:lavaSync %s from %s in group %s" %(json.dumps(json_data), client_name, self.group['group']))
49	messageID = self._getMessageID(json_data)
50	message = self._getMessage(json_data)
51	# FIXME: in _sendMessage, be sure to send the messageID if message is empty
52	if not message:
53	message = messageID
54	- logging.info("LavaSync request for %s at stage %s" % (client_name, messageID))
55	+ logging.info("LavaSync request for '%s' at stage '%s' in group '%s'" % (client_name, messageID, self.group['group']))
56	self.group['syncs'].setdefault(messageID, {})
57	self.group['messages'].setdefault(client_name, {}).setdefault(messageID, {})
58	if len(self.group['syncs'][messageID]) >= self.group['count']:
59	@@ -196,8 +194,8 @@
60	# mark this client as having picked up the message
61	self.group['syncs'][messageID][client_name] = 0
62	else:
63	- logging.info("waiting: not all clients seen yet %d < %d" %
64	- (len(self.group['syncs'][messageID]), self.group['count']))
65	+ logging.info("waiting: not all clients in group '%s' have been seen yet %d < %d" %
66	+ (self.group['group'], len(self.group['syncs'][messageID]), self.group['count']))
67	self.group['messages'][client_name][messageID] = message
68	self.group['syncs'][messageID][client_name] = 1
69	self._waitResponse()
70	@@ -209,7 +207,7 @@
71	if self.group['syncs'][messageID][pending]:
72	clear_syncs = False
73	if clear_syncs:
74	- logging.debug("Clearing all sync messages for %s" % messageID)
75	+ logging.debug("Clearing all sync messages for '%s' in group '%s'" % (messageID, self.group['group']))
76	self.group['syncs'][messageID].clear()
77
78	def lavaWaitAll(self, json_data, client_name):
79	@@ -250,7 +248,7 @@
80	If lava_wait is called first, the message will be sent when the client reconnects
81	"""
82	message = self._getMessage(json_data)
83	- logging.info("lavaSend handler in GroupDispatcher received a message: %s" % message)
84	+ logging.info("lavaSend handler in GroupDispatcher received a message '%s' for group '%s'" % (message, self.group['group']))
85	messageID = self._getMessageID(json_data)
86	for client in self.group['clients']:
87	if messageID not in self.group['messages'][client]:
88	@@ -277,7 +275,8 @@
89	if request == 'group_data':
90	self._setGroupData(json_data)
91	elif request == "lava_sync":
92	- logging.info("lava_sync: %s request made by %s" % (json.dumps(json_data), client_name))
93	+ logging.debug("lava_sync: %s request made by '%s' in group '%s'" %
94	+ (json.dumps(json_data), client_name, self.group['group']))
95	self.lavaSync(json_data, client_name)
96	elif request == 'lava_wait_all':
97	self.lavaWaitAll(json_data, client_name)
98	@@ -287,7 +286,8 @@
99	logging.info("lava_send: %s" % json_data)
100	self.lavaSend(json_data)
101	elif request == "complete":
102	- logging.info("dispatcher for '%s' communication complete, closing." % client_name)
103	+ logging.info("dispatcher communication for '%s' in group '%s' is complete, closing." %
104	+ (client_name, self.group['group']))
105	self.conn.close()
106	else:
107	self._badRequest()
108
109	=== modified file 'lava/dispatcher/node.py'
110	--- lava/dispatcher/node.py 2013-06-25 17:35:23 +0000
111	+++ lava/dispatcher/node.py 2013-06-26 12:50:53 +0000
112	@@ -52,11 +52,11 @@
113	blocks = 1024
114
115	def __init__(self, data_str):
116	- print "Poller init passed json_data", data_str
117	+ logging.debug("Poller init passed json_data: %s" % data_str)
118	try:
119	self.json_data = json.loads(data_str)
120	except ValueError:
121	- print "bad JSON"
122	+ logging.error("bad JSON")
123	exit(1)
124	if 'port' in self.json_data:
125	self.port = self.json_data['port']
126	@@ -64,27 +64,27 @@
127	self.blocks = self.json_data['blocksize']
128
129	def poll(self, msg_str):
130	- print "polling %s" % json.dumps(self.json_data)
131	+ logging.debug("polling %s" % json.dumps(self.json_data))
132	self.polling = True
133	while self.polling:
134	s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
135	s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
136	try:
137	- print "host:%s port:%s" % (self.json_data['host'], self.json_data['port'])
138	+ logging.debug("socket created for host:%s port:%s" % (self.json_data['host'], self.json_data['port']))
139	s.connect(('localhost', self.json_data['port']))
140	self.delay = 1
141	except socket.error as e:
142	- print "socket error on connect", e.errno
143	+ logging.warn("socket error on connect: %d" % e.errno)
144	time.sleep(self.delay)
145	self.delay += 2
146	s.close()
147	continue
148	- print "msg %s" % msg_str
149	+ logging.debug("read message: %s" % msg_str)
150	# blocking synchronous call
151	try:
152	s.send(msg_str)
153	except socket.error as e:
154	- print "socket error on send", e.errno
155	+ logging.warn("socket error '%d' on send" % e.errno)
156	# self.s.shutdown(socket.SHUT_RDWR)
157	s.close()
158	continue
159	@@ -92,7 +92,7 @@
160	try:
161	self.response = s.recv(self.blocks)
162	except socket.error as e:
163	- print "socket error on response", e.errno
164	+ logging.warn("socket error '%d' on response" % e.errno)
165	# self.s.shutdown(socket.SHUT_RDWR)
166	s.close()
167	continue
168	@@ -105,7 +105,7 @@
169	logging.error("response was not JSON %s" % self.response)
170	break
171	if json_data['response'] != 'wait':
172	- logging.info(json_data['response'])
173	+ logging.info("Response: %s" % json_data['response'])
174	self.polling = False
175	break
176	else:
177	@@ -166,13 +166,13 @@
178	def run(self):
179	init_msg = {"request": "group_data", "group_size": self.group_size}
180	init_msg.update(self.base_msg)
181	- print "NodeDispatcher: init_msg", json.dumps(init_msg)
182	+ logging.debug("init_msg %s" % json.dumps(init_msg))
183	self.poller.poll(json.dumps(init_msg))
184	self.run_tests(self.json_data)
185
186	def __call__(self, args):
187	try:
188	- logging.info("transport handler for NodeDispatcher %s" % args)
189	+ logging.debug("transport handler for NodeDispatcher %s" % args)
190	self._select(json.loads(args))
191	except KeyError:
192	logging.warn("Unable to use callable send in NodeDispatcher")
193	@@ -186,7 +186,6 @@
194	return
195	if json_data['request'] == "lava_sync":
196	logging.info("requesting lava_sync")
197	- # FIXME: pointless redirection unles request_sync does more work eventually.
198	self.request_sync(json_data['messageID'])
199	elif json_data['request'] == 'lava_wait':
200	logging.info("requesting lava_wait")
201	@@ -276,7 +275,6 @@
202	# pass this NodeDispatcher down so that the lava_test_shell can __call__ nodeTransport to write a message
203	# job.run(self.dispatcher)
204	job.run(self)
205	- logging.info("job.run has returned")
206
207	def writeMessage(self):
208	"""
209
210	=== modified file 'lava_dispatcher/actions/lava_test_shell.py'
211	--- lava_dispatcher/actions/lava_test_shell.py 2013-06-25 10:27:04 +0000
212	+++ lava_dispatcher/actions/lava_test_shell.py 2013-06-26 12:50:53 +0000
213	@@ -525,7 +525,7 @@
214	logging.debug("Received signal <%s>" % name)
215	params = params.split()
216	try:
217	- signal_director.signal(name, params)
218	+ signal_director.signal(name, params, self.context)
219	except:
220	logging.exception("on_signal failed")
221	runner._connection.sendline('echo LAVA_ACK')
222
223	=== modified file 'lava_dispatcher/job.py'
224	--- lava_dispatcher/job.py 2013-06-25 17:35:23 +0000
225	+++ lava_dispatcher/job.py 2013-06-26 12:50:53 +0000
226	@@ -248,6 +248,7 @@
227	status = 'fail'
228	action.run(**params)
229	except ADBConnectError as err:
230	+ logging.info("ADBConnectError")
231	if cmd.get('command') == 'boot_linaro_android_image':
232	logging.warning(('[ACTION-E] %s failed to create the'
233	' adb connection') % (cmd['command']))
234	@@ -266,6 +267,7 @@
235	## mark it as pass if the second boot works
236	status = 'pass'
237	except TimeoutError as err:
238	+ logging.info("TimeoutError")
239	if cmd.get('command').startswith('lava_android_test'):
240	logging.warning("[ACTION-E] %s times out." % (
241	cmd['command']))
242	@@ -285,15 +287,23 @@
243	self.context.client.proc.sendline("")
244	time.sleep(5)
245	self.context.client.boot_linaro_android_image()
246	+ else:
247	+ logging.warn("Unhandled timeout condition")
248	+ continue
249	except CriticalError as err:
250	+ logging.info("CriticalError")
251	raise
252	except (pexpect.TIMEOUT, GeneralError) as err:
253	+ logging.warn("pexpect timed out, pass with status %s" % status)
254	pass
255	except Exception as err:
256	+ logging.info("General Exception")
257	raise
258	else:
259	+ logging.info("setting status pass")
260	status = 'pass'
261	finally:
262	+ logging.info("finally status %s" % status)
263	err_msg = ""
264	if status == 'fail':
265	# XXX mwhudson, 2013-01-17: I have no idea what this
266
267	=== modified file 'lava_dispatcher/signals/__init__.py'
268	--- lava_dispatcher/signals/__init__.py 2013-06-26 11:22:13 +0000
269	+++ lava_dispatcher/signals/__init__.py 2013-06-26 12:50:53 +0000
270	@@ -177,7 +177,8 @@
271	self.context.transport(json.dumps(msg))
272	target = self.client.target_device
273	with target.runner() as runner:
274	- runner._connection.sendline("<LAVA_SYNC_COMPLETE>")
275	+ ret = runner._connection.sendline("<LAVA_SYNC_COMPLETE>")
276	+ logging.info("runner._connection.sendline wrote %d bytes" % ret)
277
278	def _on_WAIT(self, message_id, message):
279	logging.debug("Handling signal <LAVA_WAIT %s>" % message_id)
280
281	=== modified file 'lava_test_shell/multi_node/lava-multi-node.lib'
282	--- lava_test_shell/multi_node/lava-multi-node.lib 2013-06-26 07:50:19 +0000
283	+++ lava_test_shell/multi_node/lava-multi-node.lib 2013-06-26 12:50:53 +0000
284	@@ -20,14 +20,15 @@
285	}
286
287	_lava_multi_node_send () {
288	-
289	+echo "<_LAVA_SEND started>"
290	result=$(echo $1 \| grep "..=..")
291	+echo "<_LAVA_SEND result $result>"
292	if [ -n "$1" -a "${result}x" == "x" ] ; then
293	echo ${MESSAGE_HEAD} $@">"
294	else
295	exit 1
296	fi
297	-
298	+echo "<_LAVA_SEND finished>"
299	}
300
301	_lava_multi_node_print_message () {
302	@@ -41,9 +42,9 @@
303	}
304
305	lava_multi_node_send () {
306	-
307	+echo "<LAVA_SEND preparing>"
308	_lava_multi_node_send $@
309	-
310	+echo "<LAVA_SEND waiting for ack>"
311	while [ -n "$MESSAGE_NEED_ACK" ] ; do
312	read -t $MESSAGE_TIMEOUT line
313	result=$(echo $line \| grep "${MESSAGE_ACK}")
314	@@ -53,9 +54,13 @@
315	fi
316	_lava_multi_node_send $@
317	done
318	+echo "<LAVA_SEND finished>"
319	+date
320	}
321
322	lava_multi_node_wait_for_signal () {
323	+date
324	+echo "<LAVA_DEBUG starting to wait>"
325	while read line; do
326	result=$(echo $line \| grep "${MESSAGE_REPLY}>")
327	if [ "${result}x" != "x" ] ; then
328	@@ -65,6 +70,8 @@
329	break
330	fi
331	done
332	+echo "<LAVA_DEBUG waiting over>"
333	+date
334	}
335
336	lava_multi_node_wait_for_message () {
337
338	=== modified file 'lava_test_shell/multi_node/lava-sync'
339	--- lava_test_shell/multi_node/lava-sync 2013-06-21 13:23:41 +0000
340	+++ lava_test_shell/multi_node/lava-sync 2013-06-26 12:50:53 +0000
341	@@ -16,5 +16,6 @@
342	source $LAVA_TEST_BIN/lava-multi-node.lib
343
344	lava_multi_node_send $1
345	-
346	+echo "<LAVA_SYNC_DEBUG send complete>"
347	lava_multi_node_wait_for_signal
348	+echo "<LAVA_SYNC_DEBUG wait complete>"

LAVA Dispatcher

Merge lp:~codehelp/lava-dispatcher/multinode into lp:lava-dispatcher/multinode

Commit message

Description of the change

Preview Diff

Subscribers