Merge lp:~le-chi-thu/lava-dispatcher/fix-reboot-issues into lp:lava-dispatcher

Proposed by Le Chi Thu
Status: Merged
Approved by: Michael Hudson-Doyle
Approved revision: 296
Merged at revision: 297
Proposed branch: lp:~le-chi-thu/lava-dispatcher/fix-reboot-issues
Merge into: lp:lava-dispatcher
Diff against target: 118 lines (+26/-10)
3 files modified
doc/changes.rst (+2/-0)
lava_dispatcher/client/base.py (+2/-1)
lava_dispatcher/client/master.py (+22/-9)
To merge this branch: bzr merge lp:~le-chi-thu/lava-dispatcher/fix-reboot-issues
Reviewer Review Type Date Requested Status
Michael Hudson-Doyle (community) Approve
Zygmunt Krynicki (community) Approve
Review via email: mp+106808@code.launchpad.net

Description of the change

Fixed reboot issues such as :
* Schrodinger's boot. example http://validation.linaro.org/lava-server/scheduler/job/20201
* Hard reboot does not clear the pexpect buffer.
* Add more timeout when issue commands right after reboot.

Tested in staging for 100 jobs and just 6-9 failing jobs (caused by other reasons such as home screen, wget issues)

To post a comment you must log in.
Revision history for this message
Zygmunt Krynicki (zyga) wrote :

I guess this could be split into "increase timeouts for everything" and "assorted changes"

This can go in I guess, no harm if that helps to cure some of the effect's we're seeing

review: Approve
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

Only one comment really: sendcontrol('c') is a bit clearer than sendline('\003').

Otherwise seems good, thanks for chasing the issues.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'doc/changes.rst'
2--- doc/changes.rst 2012-05-17 21:25:09 +0000
3+++ doc/changes.rst 2012-05-22 13:22:20 +0000
4@@ -1,6 +1,8 @@
5 Version History
6 ***************
7
8+* Fixed reboot issues
9+
10 .. _version_0_7_1:
11
12 Version 0.7.1
13
14=== modified file 'lava_dispatcher/client/base.py'
15--- lava_dispatcher/client/base.py 2012-05-17 13:02:48 +0000
16+++ lava_dispatcher/client/base.py 2012-05-22 13:22:20 +0000
17@@ -411,7 +411,7 @@
18 # /root/.bashrc, it is
19 # "${debian_chroot:+($debian_chroot)}\u@\h:\w\$ "
20 self.proc.sendline('export PS1="$PS1 [rc=$(echo \$?)]: "')
21- self.proc.expect(self.tester_str, timeout=10)
22+ self.proc.expect(self.tester_str, timeout=120)
23
24 self.setup_proxy(self.tester_str)
25 logging.info("System is in test image now")
26@@ -429,6 +429,7 @@
27 self._boot_linaro_android_image()
28 self.in_test_shell(timeout=900)
29 self.proc.sendline("export PS1=\"root@linaro: \"")
30+ self.proc.expect(self.tester_str, timeout=120)
31 #TODO: set up proxy
32
33 if self.config.get("enable_network_after_boot_android"):
34
35=== modified file 'lava_dispatcher/client/master.py'
36--- lava_dispatcher/client/master.py 2012-05-17 21:02:42 +0000
37+++ lava_dispatcher/client/master.py 2012-05-22 13:22:20 +0000
38@@ -538,7 +538,7 @@
39 logging.info("Downloading the image files")
40
41 proxy = lava_proxy if use_cache else None
42-
43+
44 boot_path = download(boot_url, tarball_dir, proxy)
45 system_path = download(system_url, tarball_dir, proxy)
46 data_path = download(data_url, tarball_dir, proxy)
47@@ -554,17 +554,19 @@
48 reboot the system, and check that we are in a master shell
49 """
50 logging.info("Boot the system master image")
51- self.soft_reboot()
52 try:
53+ self.soft_reboot()
54 image_boot_msg = self.device_option('image_boot_msg')
55- self.proc.expect(image_boot_msg)
56+ self.proc.expect(image_boot_msg, timeout=300)
57 self._in_master_shell(300)
58 except:
59 logging.exception("in_master_shell failed")
60 self.hard_reboot()
61+ image_boot_msg = self.device_option('image_boot_msg')
62+ self.proc.expect(image_boot_msg, timeout=300)
63 self._in_master_shell(300)
64 self.proc.sendline('export PS1="$PS1 [rc=$(echo \$?)]: "')
65- self.proc.expect(self.master_str, timeout=10, lava_no_logging=1)
66+ self.proc.expect(self.master_str, timeout=120, lava_no_logging=1)
67 self.setup_proxy(self.master_str)
68 logging.info("System is in master image now")
69
70@@ -728,16 +730,19 @@
71 def soft_reboot(self):
72 logging.info("Perform soft reboot the system")
73 cmd = self.device_option("soft_boot_cmd")
74+ # make sure in the shell (sometime the earlier command has not exit) by sending CTRL + C
75+ self.proc.sendline("\003")
76 if cmd != "":
77 self.proc.sendline(cmd)
78 else:
79 self.proc.sendline("reboot")
80- # set soft reboot timeout 120s, or do a hard reset
81+ # Looking for reboot messages or if they are missing, the U-Boot message will also indicate the
82+ # reboot is done.
83 id = self.proc.expect(
84 ['Restarting system.', 'The system is going down for reboot NOW',
85- 'Will now restart', pexpect.TIMEOUT], timeout=120)
86- if id not in [0, 1, 2]:
87- self.hard_reboot()
88+ 'Will now restart', 'U-Boot', pexpect.TIMEOUT], timeout=120)
89+ if id not in [0, 1, 2, 3]:
90+ raise Exception("Soft reboot failed")
91
92 def hard_reboot(self):
93 logging.info("Perform hard reset on the system")
94@@ -747,6 +752,14 @@
95 else:
96 self.proc.send("~$")
97 self.proc.sendline("hardreset")
98+ # after hardreset empty the pexpect buffer
99+ self._empty_pexpect_buffer()
100+ def _empty_pexpect_buffer(self):
101+ """Make sure there is nothing in the pexpect buffer."""
102+ index = 0
103+ while index == 0:
104+ index = self.proc.expect(
105+ ['.+', pexpect.EOF, pexpect.TIMEOUT], timeout=1, lava_no_logging=1)
106
107 def _enter_uboot(self):
108 interrupt_boot_prompt = self.device_option('interrupt_boot_prompt')
109@@ -762,8 +775,8 @@
110 self._boot(string_to_list(self.config.get('boot_cmds_android')))
111
112 def _boot(self, boot_cmds):
113- self.soft_reboot()
114 try:
115+ self.soft_reboot()
116 self._enter_uboot()
117 except:
118 logging.exception("_enter_uboot failed")

Subscribers

People subscribed via source and target branches