Merge lp:~cbjchen/nova/juno-sru-lp1353939 into lp:~ubuntu-server-dev/nova/juno

Proposed by Liang Chen
Status: Needs review
Proposed branch: lp:~cbjchen/nova/juno-sru-lp1353939
Merge into: lp:~ubuntu-server-dev/nova/juno
Diff against target: 163 lines (+141/-0)
3 files modified
debian/changelog (+7/-0)
debian/patches/series (+1/-0)
debian/patches/shutdown-timeout-retry.patch (+133/-0)
To merge this branch: bzr merge lp:~cbjchen/nova/juno-sru-lp1353939
Reviewer Review Type Date Requested Status
Corey Bryant Abstain
Review via email: mp+265466@code.launchpad.net
To post a comment you must log in.
lp:~cbjchen/nova/juno-sru-lp1353939 updated
726. By Liang Chen <email address hidden>

edit changelog

Revision history for this message
Corey Bryant (corey.bryant) wrote :

Looks good, thanks Liang!

review: Approve
Revision history for this message
Corey Bryant (corey.bryant) wrote :

I'm moving my vote to abstain for now until this lands upstream in stable/juno.

review: Abstain
Revision history for this message
Billy Olsen (billy-olsen) wrote :

Proposed change upstream in stable/juno (https://review.openstack.org/#/c/221529/)

Unmerged revisions

726. By Liang Chen <email address hidden>

edit changelog

725. By lchen <<email address hidden>@canonical.com>

SRU LP: #1353939

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'debian/changelog'
2--- debian/changelog 2015-07-16 09:42:45 +0000
3+++ debian/changelog 2015-07-24 18:56:41 +0000
4@@ -1,5 +1,12 @@
5 nova (1:2014.2.3-0ubuntu1.2) utopic; urgency=medium
6
7+ * Cherrypick fix for not able to stop instances: Device or resource busy
8+ - d/p/shutdown-timeout-retry.patch (LP: #1353939)
9+
10+ -- Liang Chen <liang.chen@canonical.com> Tue, 21 Jul 2015 16:33:47 -0400
11+
12+nova (1:2014.2.3-0ubuntu1.2) utopic; urgency=medium
13+
14 * Add rsyslog retry support (LP: #1459046)
15 - d/p/add-support-for-syslog-connect-retries.patch
16
17
18=== modified file 'debian/patches/series'
19--- debian/patches/series 2015-07-15 16:11:19 +0000
20+++ debian/patches/series 2015-07-24 18:56:41 +0000
21@@ -6,3 +6,4 @@
22 disable-websockify-tests.patch
23 neutron-floating-ip-list.patch
24 add-support-for-syslog-connect-retries.patch
25+shutdown-timeout-retry.patch
26
27=== added file 'debian/patches/shutdown-timeout-retry.patch'
28--- debian/patches/shutdown-timeout-retry.patch 1970-01-01 00:00:00 +0000
29+++ debian/patches/shutdown-timeout-retry.patch 2015-07-24 18:56:41 +0000
30@@ -0,0 +1,133 @@
31+commit add5b4f751ff27a1e1af82a0799cf75ef6169619
32+Author: Matt Riedemann <mriedem@us.ibm.com>
33+Date: Sun May 10 18:46:37 2015 -0700
34+
35+ libvirt: handle code=38 + sigkill (ebusy) in destroy()
36+
37+ Handle the libvirt error during destroy when the sigkill fails due to an
38+ EBUSY. This is taken from a comment by danpb in the bug report as a
39+ potential workaround.
40+
41+ Co-authored-by: Daniel Berrange (berrange@redhat.com)
42+
43+ Closes-Bug: #1353939
44+
45+ Conflicts:
46+ nova/tests/unit/virt/libvirt/test_driver.py
47+
48+ NOTE (kashyapc): 'stable/kilo' branch doesn't have the
49+ 'libvirt_guest' object, so, adjust the below unit tests accordingly:
50+
51+ test_private_destroy_ebusy_timeout
52+ test_private_destroy_ebusy_multiple_attempt_ok
53+
54+ Change-Id: I128bf6b939fbbc85df521fd3fe23c3c6f93b1b2c
55+ (cherry picked from commit 3907867601d1044eaadebff68a590d176abff6cf)
56+
57+ Conflicts:
58+ nova/tests/unit/virt/libvirt/test_driver.py
59+
60+--- a/nova/tests/virt/libvirt/test_driver.py
61++++ b/nova/tests/virt/libvirt/test_driver.py
62+@@ -7905,6 +7905,53 @@ class LibvirtConnTestCase(test.TestCase):
63+ # NOTE(vish): verifies destroy doesn't raise if the instance disappears
64+ conn._destroy(instance)
65+
66++ def test_private_destroy_ebusy_timeout(self):
67++ # Tests that _destroy will retry 3 times to destroy the guest when an
68++ # EBUSY is raised, but eventually times out and raises the libvirtError
69++ ex = fakelibvirt.make_libvirtError(
70++ libvirt.libvirtError,
71++ "Failed to terminate process 26425 with SIGKILL: "
72++ "Device or resource busy",
73++ error_code=libvirt.VIR_ERR_SYSTEM_ERROR,
74++ int1=errno.EBUSY)
75++
76++ instance = self.create_instance_obj(self.context)
77++ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
78++
79++ with mock.patch.object(drvr._conn, 'lookupByName') as mock_get_domain:
80++ mock_domain = mock.MagicMock()
81++ mock_domain.ID.return_value = 1
82++ mock_get_domain.return_value = mock_domain
83++ mock_domain.destroy.side_effect = ex
84++
85++ self.assertRaises(libvirt.libvirtError, drvr._destroy, instance)
86++
87++ self.assertEqual(3, mock_domain.destroy.call_count)
88++
89++ def test_private_destroy_ebusy_multiple_attempt_ok(self):
90++ # Tests that the _destroy attempt loop is broken when EBUSY is no
91++ # longer raised.
92++ ex = fakelibvirt.make_libvirtError(
93++ libvirt.libvirtError,
94++ "Failed to terminate process 26425 with SIGKILL: "
95++ "Device or resource busy",
96++ error_code=libvirt.VIR_ERR_SYSTEM_ERROR,
97++ int1=errno.EBUSY)
98++
99++ inst_info = {'state': power_state.SHUTDOWN, 'id': 1}
100++ instance = self.create_instance_obj(self.context)
101++ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
102++
103++ with mock.patch.object(drvr._conn, 'lookupByName') as mock_get_domain, \
104++ mock.patch.object(drvr, 'get_info', return_value=inst_info):
105++ mock_domain = mock.MagicMock()
106++ mock_domain.ID.return_value = 1
107++ mock_get_domain.return_value = mock_domain
108++ mock_domain.destroy.side_effect = ex, None
109++ drvr._destroy(instance)
110++
111++ self.assertEqual(2, mock_domain.destroy.call_count)
112++
113+ def test_undefine_domain_with_not_found_instance(self):
114+ def fake_lookup(instance_name):
115+ raise libvirt.libvirtError("not found")
116+Index: nova-2014.2.3/nova/virt/libvirt/driver.py
117+===================================================================
118+--- nova-2014.2.3.orig/nova/virt/libvirt/driver.py
119++++ nova-2014.2.3/nova/virt/libvirt/driver.py
120+@@ -965,7 +965,7 @@ class LibvirtDriver(driver.ComputeDriver
121+ rootfs_dev = instance.system_metadata.get('rootfs_device_name')
122+ disk.teardown_container(container_dir, rootfs_dev)
123+
124+- def _destroy(self, instance):
125++ def _destroy(self, instance, attempt=1):
126+ try:
127+ virt_dom = self._lookup_by_name(instance['name'])
128+ except exception.InstanceNotFound:
129+@@ -1002,6 +1002,34 @@ class LibvirtDriver(driver.ComputeDriver
130+ instance=instance)
131+ reason = _("operation time out")
132+ raise exception.InstancePowerOffFailure(reason=reason)
133++ elif errcode == libvirt.VIR_ERR_SYSTEM_ERROR:
134++ if e.get_int1() == errno.EBUSY:
135++ # NOTE(danpb): When libvirt kills a process it sends it
136++ # SIGTERM first and waits 10 seconds. If it hasn't gone
137++ # it sends SIGKILL and waits another 5 seconds. If it
138++ # still hasn't gone then you get this EBUSY error.
139++ # Usually when a QEMU process fails to go away upon
140++ # SIGKILL it is because it is stuck in an
141++ # uninterruptable kernel sleep waiting on I/O from
142++ # some non-responsive server.
143++ # Given the CPU load of the gate tests though, it is
144++ # conceivable that the 15 second timeout is too short,
145++ # particularly if the VM running tempest has a high
146++ # steal time from the cloud host. ie 15 wallclock
147++ # seconds may have passed, but the VM might have only
148++ # have a few seconds of scheduled run time.
149++ LOG.warn(_LW('Error from libvirt during destroy. '
150++ 'Code=%(errcode)s Error=%(e)s; '
151++ 'attempt %(attempt)d of 3'),
152++ {'errcode': errcode, 'e': e,
153++ 'attempt': attempt},
154++ instance=instance)
155++ with excutils.save_and_reraise_exception() as ctxt:
156++ # Try up to 3 times before giving up.
157++ if attempt < 3:
158++ ctxt.reraise = False
159++ self._destroy(instance, attempt + 1)
160++ return
161+
162+ if not is_okay:
163+ with excutils.save_and_reraise_exception():

Subscribers

People subscribed via source and target branches