Merge lp:~le-chi-thu/lava-dispatcher/fix-reboot-issues into lp:lava-dispatcher

Proposed by Le Chi Thu
Status: Merged
Approved by: Michael Hudson-Doyle
Approved revision: 296
Merged at revision: 297
Proposed branch: lp:~le-chi-thu/lava-dispatcher/fix-reboot-issues
Merge into: lp:lava-dispatcher
Diff against target: 118 lines (+26/-10)
3 files modified
doc/changes.rst (+2/-0)
lava_dispatcher/client/base.py (+2/-1)
lava_dispatcher/client/master.py (+22/-9)
To merge this branch: bzr merge lp:~le-chi-thu/lava-dispatcher/fix-reboot-issues
Reviewer Review Type Date Requested Status
Michael Hudson-Doyle (community) Approve
Zygmunt Krynicki (community) Approve
Review via email: mp+106808@code.launchpad.net

Description of the change

Fixed reboot issues such as :
* Schrodinger's boot. example http://validation.linaro.org/lava-server/scheduler/job/20201
* Hard reboot does not clear the pexpect buffer.
* Add more timeout when issue commands right after reboot.

Tested in staging for 100 jobs and just 6-9 failing jobs (caused by other reasons such as home screen, wget issues)

To post a comment you must log in.
Revision history for this message
Zygmunt Krynicki (zyga) wrote :

I guess this could be split into "increase timeouts for everything" and "assorted changes"

This can go in I guess, no harm if that helps to cure some of the effect's we're seeing

review: Approve
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

Only one comment really: sendcontrol('c') is a bit clearer than sendline('\003').

Otherwise seems good, thanks for chasing the issues.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'doc/changes.rst'
--- doc/changes.rst 2012-05-17 21:25:09 +0000
+++ doc/changes.rst 2012-05-22 13:22:20 +0000
@@ -1,6 +1,8 @@
1Version History1Version History
2***************2***************
33
4* Fixed reboot issues
5
4.. _version_0_7_1:6.. _version_0_7_1:
57
6Version 0.7.18Version 0.7.1
79
=== modified file 'lava_dispatcher/client/base.py'
--- lava_dispatcher/client/base.py 2012-05-17 13:02:48 +0000
+++ lava_dispatcher/client/base.py 2012-05-22 13:22:20 +0000
@@ -411,7 +411,7 @@
411 # /root/.bashrc, it is411 # /root/.bashrc, it is
412 # "${debian_chroot:+($debian_chroot)}\u@\h:\w\$ "412 # "${debian_chroot:+($debian_chroot)}\u@\h:\w\$ "
413 self.proc.sendline('export PS1="$PS1 [rc=$(echo \$?)]: "')413 self.proc.sendline('export PS1="$PS1 [rc=$(echo \$?)]: "')
414 self.proc.expect(self.tester_str, timeout=10)414 self.proc.expect(self.tester_str, timeout=120)
415415
416 self.setup_proxy(self.tester_str)416 self.setup_proxy(self.tester_str)
417 logging.info("System is in test image now")417 logging.info("System is in test image now")
@@ -429,6 +429,7 @@
429 self._boot_linaro_android_image()429 self._boot_linaro_android_image()
430 self.in_test_shell(timeout=900)430 self.in_test_shell(timeout=900)
431 self.proc.sendline("export PS1=\"root@linaro: \"")431 self.proc.sendline("export PS1=\"root@linaro: \"")
432 self.proc.expect(self.tester_str, timeout=120)
432 #TODO: set up proxy433 #TODO: set up proxy
433434
434 if self.config.get("enable_network_after_boot_android"):435 if self.config.get("enable_network_after_boot_android"):
435436
=== modified file 'lava_dispatcher/client/master.py'
--- lava_dispatcher/client/master.py 2012-05-17 21:02:42 +0000
+++ lava_dispatcher/client/master.py 2012-05-22 13:22:20 +0000
@@ -538,7 +538,7 @@
538 logging.info("Downloading the image files")538 logging.info("Downloading the image files")
539539
540 proxy = lava_proxy if use_cache else None540 proxy = lava_proxy if use_cache else None
541 541
542 boot_path = download(boot_url, tarball_dir, proxy)542 boot_path = download(boot_url, tarball_dir, proxy)
543 system_path = download(system_url, tarball_dir, proxy)543 system_path = download(system_url, tarball_dir, proxy)
544 data_path = download(data_url, tarball_dir, proxy)544 data_path = download(data_url, tarball_dir, proxy)
@@ -554,17 +554,19 @@
554 reboot the system, and check that we are in a master shell554 reboot the system, and check that we are in a master shell
555 """555 """
556 logging.info("Boot the system master image")556 logging.info("Boot the system master image")
557 self.soft_reboot()
558 try:557 try:
558 self.soft_reboot()
559 image_boot_msg = self.device_option('image_boot_msg')559 image_boot_msg = self.device_option('image_boot_msg')
560 self.proc.expect(image_boot_msg)560 self.proc.expect(image_boot_msg, timeout=300)
561 self._in_master_shell(300)561 self._in_master_shell(300)
562 except:562 except:
563 logging.exception("in_master_shell failed")563 logging.exception("in_master_shell failed")
564 self.hard_reboot()564 self.hard_reboot()
565 image_boot_msg = self.device_option('image_boot_msg')
566 self.proc.expect(image_boot_msg, timeout=300)
565 self._in_master_shell(300)567 self._in_master_shell(300)
566 self.proc.sendline('export PS1="$PS1 [rc=$(echo \$?)]: "')568 self.proc.sendline('export PS1="$PS1 [rc=$(echo \$?)]: "')
567 self.proc.expect(self.master_str, timeout=10, lava_no_logging=1)569 self.proc.expect(self.master_str, timeout=120, lava_no_logging=1)
568 self.setup_proxy(self.master_str)570 self.setup_proxy(self.master_str)
569 logging.info("System is in master image now")571 logging.info("System is in master image now")
570572
@@ -728,16 +730,19 @@
728 def soft_reboot(self):730 def soft_reboot(self):
729 logging.info("Perform soft reboot the system")731 logging.info("Perform soft reboot the system")
730 cmd = self.device_option("soft_boot_cmd")732 cmd = self.device_option("soft_boot_cmd")
733 # make sure in the shell (sometime the earlier command has not exit) by sending CTRL + C
734 self.proc.sendline("\003")
731 if cmd != "":735 if cmd != "":
732 self.proc.sendline(cmd)736 self.proc.sendline(cmd)
733 else:737 else:
734 self.proc.sendline("reboot")738 self.proc.sendline("reboot")
735 # set soft reboot timeout 120s, or do a hard reset739 # Looking for reboot messages or if they are missing, the U-Boot message will also indicate the
740 # reboot is done.
736 id = self.proc.expect(741 id = self.proc.expect(
737 ['Restarting system.', 'The system is going down for reboot NOW',742 ['Restarting system.', 'The system is going down for reboot NOW',
738 'Will now restart', pexpect.TIMEOUT], timeout=120)743 'Will now restart', 'U-Boot', pexpect.TIMEOUT], timeout=120)
739 if id not in [0, 1, 2]:744 if id not in [0, 1, 2, 3]:
740 self.hard_reboot()745 raise Exception("Soft reboot failed")
741746
742 def hard_reboot(self):747 def hard_reboot(self):
743 logging.info("Perform hard reset on the system")748 logging.info("Perform hard reset on the system")
@@ -747,6 +752,14 @@
747 else:752 else:
748 self.proc.send("~$")753 self.proc.send("~$")
749 self.proc.sendline("hardreset")754 self.proc.sendline("hardreset")
755 # after hardreset empty the pexpect buffer
756 self._empty_pexpect_buffer()
757 def _empty_pexpect_buffer(self):
758 """Make sure there is nothing in the pexpect buffer."""
759 index = 0
760 while index == 0:
761 index = self.proc.expect(
762 ['.+', pexpect.EOF, pexpect.TIMEOUT], timeout=1, lava_no_logging=1)
750763
751 def _enter_uboot(self):764 def _enter_uboot(self):
752 interrupt_boot_prompt = self.device_option('interrupt_boot_prompt')765 interrupt_boot_prompt = self.device_option('interrupt_boot_prompt')
@@ -762,8 +775,8 @@
762 self._boot(string_to_list(self.config.get('boot_cmds_android')))775 self._boot(string_to_list(self.config.get('boot_cmds_android')))
763776
764 def _boot(self, boot_cmds):777 def _boot(self, boot_cmds):
765 self.soft_reboot()
766 try:778 try:
779 self.soft_reboot()
767 self._enter_uboot()780 self._enter_uboot()
768 except:781 except:
769 logging.exception("_enter_uboot failed")782 logging.exception("_enter_uboot failed")

Subscribers

People subscribed via source and target branches