Merge ~smoser/cloud-init:fix/1783198-workaround-or-retry-lxd-shutdown into cloud-init:master

Proposed by Scott Moser
Status: Merged
Approved by: Scott Moser
Approved revision: 1fe249a56b0492a14e3aa7dd1f84c4c70115da6d
Merge reported by: Server Team CI bot
Merged at revision: not available
Proposed branch: ~smoser/cloud-init:fix/1783198-workaround-or-retry-lxd-shutdown
Merge into: cloud-init:master
Diff against target: 104 lines (+40/-5)
2 files modified
tests/cloud_tests/platforms/instances.py (+2/-1)
tests/cloud_tests/platforms/lxd/instance.py (+38/-4)
Reviewer Review Type Date Requested Status
Ryan Harper Approve
Server Team CI bot continuous-integration Approve
Review via email: mp+351371@code.launchpad.net

Commit message

tests: improve LXDInstance trying to workaround or catch bug.

Described in bug 1783198 we have seen some transient failures when
using pylxd -> lxd api.
This does:
 * adds a str() representation of LXDInstance
 * checks the value of the pylxd_container object on instantion
 * sets pylxd_container object to None on deletion.
 * adds retry logic to shutdown()

Description of the change

see commit message

To post a comment you must log in.
Revision history for this message
Server Team CI bot (server-team-bot) wrote :

PASSED: Continuous integration, rev:c447c6441e7d9cf3363cf9ab43032baacbf163fc
https://jenkins.ubuntu.com/server/job/cloud-init-ci/174/
Executed test runs:
    SUCCESS: Checkout
    SUCCESS: Unit & Style Tests
    SUCCESS: Ubuntu LTS: Build
    SUCCESS: Ubuntu LTS: Integration
    SUCCESS: MAAS Compatability Testing
    IN_PROGRESS: Declarative: Post Actions

Click here to trigger a rebuild:
https://jenkins.ubuntu.com/server/job/cloud-init-ci/174/rebuild

review: Approve (continuous-integration)
Revision history for this message
Server Team CI bot (server-team-bot) wrote :

PASSED: Continuous integration, rev:34054fd68ebf52822c64923056809712211a9eae
https://jenkins.ubuntu.com/server/job/cloud-init-ci/175/
Executed test runs:
    SUCCESS: Checkout
    SUCCESS: Unit & Style Tests
    SUCCESS: Ubuntu LTS: Build
    SUCCESS: Ubuntu LTS: Integration
    SUCCESS: MAAS Compatability Testing
    IN_PROGRESS: Declarative: Post Actions

Click here to trigger a rebuild:
https://jenkins.ubuntu.com/server/job/cloud-init-ci/175/rebuild

review: Approve (continuous-integration)
Revision history for this message
Scott Moser (smoser) wrote :

I'm running this on torkoal now, trying to catch a failure.

http://paste.ubuntu.com/p/MnsK2VwS5w/

Revision history for this message
Scott Moser (smoser) wrote :
Revision history for this message
Server Team CI bot (server-team-bot) wrote :

PASSED: Continuous integration, rev:1fe249a56b0492a14e3aa7dd1f84c4c70115da6d
https://jenkins.ubuntu.com/server/job/cloud-init-ci/176/
Executed test runs:
    SUCCESS: Checkout
    SUCCESS: Unit & Style Tests
    SUCCESS: Ubuntu LTS: Build
    SUCCESS: Ubuntu LTS: Integration
    SUCCESS: MAAS Compatability Testing
    IN_PROGRESS: Declarative: Post Actions

Click here to trigger a rebuild:
https://jenkins.ubuntu.com/server/job/cloud-init-ci/176/rebuild

review: Approve (continuous-integration)
Revision history for this message
Ryan Harper (raharper) :
review: Approve

There was an error fetching revisions from git servers. Please try again in a few minutes. If the problem persists, contact Launchpad support.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/tests/cloud_tests/platforms/instances.py b/tests/cloud_tests/platforms/instances.py
2index 95bc3b1..529e79c 100644
3--- a/tests/cloud_tests/platforms/instances.py
4+++ b/tests/cloud_tests/platforms/instances.py
5@@ -97,7 +97,8 @@ class Instance(TargetBase):
6 return self._ssh_client
7
8 if not self.ssh_ip or not self.ssh_port:
9- raise ValueError
10+ raise ValueError("Cannot ssh_connect, ssh_ip=%s ssh_port=%s" %
11+ (self.ssh_ip, self.ssh_port))
12
13 client = paramiko.SSHClient()
14 client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
15diff --git a/tests/cloud_tests/platforms/lxd/instance.py b/tests/cloud_tests/platforms/lxd/instance.py
16index d396519..83c97ab 100644
17--- a/tests/cloud_tests/platforms/lxd/instance.py
18+++ b/tests/cloud_tests/platforms/lxd/instance.py
19@@ -12,6 +12,8 @@ from tests.cloud_tests.util import PlatformError
20
21 from ..instances import Instance
22
23+from pylxd import exceptions as pylxd_exc
24+
25
26 class LXDInstance(Instance):
27 """LXD container backed instance."""
28@@ -30,6 +32,9 @@ class LXDInstance(Instance):
29 @param config: image config
30 @param features: supported feature flags
31 """
32+ if not pylxd_container:
33+ raise ValueError("Invalid value pylxd_container: %s" %
34+ pylxd_container)
35 self._pylxd_container = pylxd_container
36 super(LXDInstance, self).__init__(
37 platform, name, properties, config, features)
38@@ -40,9 +45,19 @@ class LXDInstance(Instance):
39 @property
40 def pylxd_container(self):
41 """Property function."""
42+ if self._pylxd_container is None:
43+ raise RuntimeError(
44+ "%s: Attempted use of pylxd_container after deletion." % self)
45 self._pylxd_container.sync()
46 return self._pylxd_container
47
48+ def __str__(self):
49+ return (
50+ '%s(name=%s) status=%s' %
51+ (self.__class__.__name__, self.name,
52+ ("deleted" if self._pylxd_container is None else
53+ self.pylxd_container.status)))
54+
55 def _execute(self, command, stdin=None, env=None):
56 if env is None:
57 env = {}
58@@ -165,10 +180,27 @@ class LXDInstance(Instance):
59 self.shutdown(wait=wait)
60 self.start(wait=wait)
61
62- def shutdown(self, wait=True):
63+ def shutdown(self, wait=True, retry=1):
64 """Shutdown instance."""
65- if self.pylxd_container.status != 'Stopped':
66+ if self.pylxd_container.status == 'Stopped':
67+ return
68+
69+ try:
70+ LOG.debug("%s: shutting down (wait=%s)", self, wait)
71 self.pylxd_container.stop(wait=wait)
72+ except (pylxd_exc.LXDAPIException, pylxd_exc.NotFound) as e:
73+ # An exception happens here sometimes (LP: #1783198)
74+ # LOG it, and try again.
75+ LOG.warning(
76+ ("%s: shutdown(retry=%d) caught %s in shutdown "
77+ "(response=%s): %s"),
78+ self, retry, e.__class__.__name__, e.response, e)
79+ if isinstance(e, pylxd_exc.NotFound):
80+ LOG.debug("container_exists(%s) == %s",
81+ self.name, self.platform.container_exists(self.name))
82+ if retry == 0:
83+ raise e
84+ return self.shutdown(wait=wait, retry=retry - 1)
85
86 def start(self, wait=True, wait_for_cloud_init=False):
87 """Start instance."""
88@@ -189,12 +221,14 @@ class LXDInstance(Instance):
89
90 def destroy(self):
91 """Clean up instance."""
92+ LOG.debug("%s: deleting container.", self)
93 self.unfreeze()
94 self.shutdown()
95 self.pylxd_container.delete(wait=True)
96+ self._pylxd_container = None
97+
98 if self.platform.container_exists(self.name):
99- raise OSError('container {} was not properly removed'
100- .format(self.name))
101+ raise OSError('%s: container was not properly removed' % self)
102 if self._console_log_file and os.path.exists(self._console_log_file):
103 os.unlink(self._console_log_file)
104 shutil.rmtree(self.tmpd)

Subscribers

People subscribed via source and target branches