Merge lp:~johannes.erdfelt/nova/lp821046 into lp:~hudson-openstack/nova/trunk

Proposed by Johannes Erdfelt
Status: Merged
Approved by: Matt Dietz
Approved revision: 1405
Merged at revision: 1409
Proposed branch: lp:~johannes.erdfelt/nova/lp821046
Merge into: lp:~hudson-openstack/nova/trunk
Diff against target: 187 lines (+46/-44)
1 file modified
nova/virt/xenapi/vmops.py (+46/-44)
To merge this branch: bzr merge lp:~johannes.erdfelt/nova/lp821046
Reviewer Review Type Date Requested Status
Matt Dietz (community) Approve
Rick Harris (community) Approve
Review via email: mp+70936@code.launchpad.net

Description of the change

Be more tolerant of agent failures. It is often the case there is only a problem with the agent, not with the instance, so don't claim it failed to boot so quickly.

To post a comment you must log in.
Revision history for this message
Rick Harris (rconradharris) wrote :

lgtm.

review: Approve
Revision history for this message
Matt Dietz (cerberus) wrote :

ditto

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'nova/virt/xenapi/vmops.py'
2--- nova/virt/xenapi/vmops.py 2011-08-08 20:06:54 +0000
3+++ nova/virt/xenapi/vmops.py 2011-08-09 17:41:24 +0000
4@@ -282,6 +282,7 @@
5 'architecture': instance.architecture})
6
7 def _check_agent_version():
8+ LOG.debug(_("Querying agent version"))
9 if instance.os_type == 'windows':
10 # Windows will generally perform a setup process on first boot
11 # that can take a couple of minutes and then reboot. So we
12@@ -292,7 +293,6 @@
13 else:
14 version = self.get_agent_version(instance)
15 if not version:
16- LOG.info(_('No agent version returned by instance'))
17 return
18
19 LOG.info(_('Instance agent version: %s') % version)
20@@ -327,6 +327,10 @@
21 LOG.debug(_("Setting admin password"))
22 self.set_admin_password(instance, admin_password)
23
24+ def _reset_network():
25+ LOG.debug(_("Resetting network"))
26+ self.reset_network(instance, vm_ref)
27+
28 # NOTE(armando): Do we really need to do this in virt?
29 # NOTE(tr3buchet): not sure but wherever we do it, we need to call
30 # reset_network afterwards
31@@ -341,7 +345,7 @@
32 _check_agent_version()
33 _inject_files()
34 _set_admin_password()
35- self.reset_network(instance, vm_ref)
36+ _reset_network()
37 return True
38 except Exception, exc:
39 LOG.warn(exc)
40@@ -597,13 +601,13 @@
41 transaction_id = str(uuid.uuid4())
42 args = {'id': transaction_id}
43 resp = self._make_agent_call('version', instance, '', args)
44- if resp is None:
45- # No response from the agent
46- return
47- resp_dict = json.loads(resp)
48+ if resp['returncode'] != '0':
49+ LOG.error(_('Failed to query agent version: %(resp)r') %
50+ locals())
51+ return None
52 # Some old versions of the Windows agent have a trailing \\r\\n
53 # (ie CRLF escaped) for some reason. Strip that off.
54- return resp_dict['message'].replace('\\r\\n', '')
55+ return resp['message'].replace('\\r\\n', '')
56
57 if timeout:
58 vm_ref = self._get_vm_opaque_ref(instance)
59@@ -634,13 +638,10 @@
60 transaction_id = str(uuid.uuid4())
61 args = {'id': transaction_id, 'url': url, 'md5sum': md5sum}
62 resp = self._make_agent_call('agentupdate', instance, '', args)
63- if resp is None:
64- # No response from the agent
65- return
66- resp_dict = json.loads(resp)
67- if resp_dict['returncode'] != '0':
68- raise RuntimeError(resp_dict['message'])
69- return resp_dict['message']
70+ if resp['returncode'] != '0':
71+ LOG.error(_('Failed to update agent: %(resp)r') % locals())
72+ return None
73+ return resp['message']
74
75 def set_admin_password(self, instance, new_pass):
76 """Set the root/admin password on the VM instance.
77@@ -659,18 +660,13 @@
78 key_init_args = {'id': key_init_transaction_id,
79 'pub': str(dh.get_public())}
80 resp = self._make_agent_call('key_init', instance, '', key_init_args)
81- if resp is None:
82- # No response from the agent
83- return
84- resp_dict = json.loads(resp)
85 # Successful return code from key_init is 'D0'
86- if resp_dict['returncode'] != 'D0':
87- # There was some sort of error; the message will contain
88- # a description of the error.
89- raise RuntimeError(resp_dict['message'])
90+ if resp['returncode'] != 'D0':
91+ LOG.error(_('Failed to exchange keys: %(resp)r') % locals())
92+ return None
93 # Some old versions of the Windows agent have a trailing \\r\\n
94 # (ie CRLF escaped) for some reason. Strip that off.
95- agent_pub = int(resp_dict['message'].replace('\\r\\n', ''))
96+ agent_pub = int(resp['message'].replace('\\r\\n', ''))
97 dh.compute_shared(agent_pub)
98 # Some old versions of Linux and Windows agent expect trailing \n
99 # on password to work correctly.
100@@ -679,17 +675,14 @@
101 password_transaction_id = str(uuid.uuid4())
102 password_args = {'id': password_transaction_id, 'enc_pass': enc_pass}
103 resp = self._make_agent_call('password', instance, '', password_args)
104- if resp is None:
105- # No response from the agent
106- return
107- resp_dict = json.loads(resp)
108 # Successful return code from password is '0'
109- if resp_dict['returncode'] != '0':
110- raise RuntimeError(resp_dict['message'])
111+ if resp['returncode'] != '0':
112+ LOG.error(_('Failed to update password: %(resp)r') % locals())
113+ return None
114 db.instance_update(nova_context.get_admin_context(),
115 instance['id'],
116 dict(admin_pass=new_pass))
117- return resp_dict['message']
118+ return resp['message']
119
120 def inject_file(self, instance, path, contents):
121 """Write a file to the VM instance.
122@@ -712,12 +705,10 @@
123 # If the agent doesn't support file injection, a NotImplementedError
124 # will be raised with the appropriate message.
125 resp = self._make_agent_call('inject_file', instance, '', args)
126- resp_dict = json.loads(resp)
127- if resp_dict['returncode'] != '0':
128- # There was some other sort of error; the message will contain
129- # a description of the error.
130- raise RuntimeError(resp_dict['message'])
131- return resp_dict['message']
132+ if resp['returncode'] != '0':
133+ LOG.error(_('Failed to inject file: %(resp)r') % locals())
134+ return None
135+ return resp['message']
136
137 def _shutdown(self, instance, vm_ref, hard=True):
138 """Shutdown an instance."""
139@@ -1178,8 +1169,19 @@
140
141 def _make_agent_call(self, method, vm, path, addl_args=None):
142 """Abstracts out the interaction with the agent xenapi plugin."""
143- return self._make_plugin_call('agent', method=method, vm=vm,
144+ ret = self._make_plugin_call('agent', method=method, vm=vm,
145 path=path, addl_args=addl_args)
146+ if isinstance(ret, dict):
147+ return ret
148+ try:
149+ return json.loads(ret)
150+ except TypeError:
151+ instance_id = vm.id
152+ LOG.error(_('The agent call to %(method)s returned an invalid'
153+ ' response: %(ret)r. VM id=%(instance_id)s;'
154+ ' path=%(path)s; args=%(addl_args)r') % locals())
155+ return {'returncode': 'error',
156+ 'message': 'unable to deserialize response'}
157
158 def _make_plugin_call(self, plugin, method, vm, path, addl_args=None,
159 vm_ref=None):
160@@ -1197,20 +1199,20 @@
161 ret = self._session.wait_for_task(task, instance_id)
162 except self.XenAPI.Failure, e:
163 ret = None
164- err_trace = e.details[-1]
165- err_msg = err_trace.splitlines()[-1]
166- strargs = str(args)
167+ err_msg = e.details[-1].splitlines()[-1]
168 if 'TIMEOUT:' in err_msg:
169 LOG.error(_('TIMEOUT: The call to %(method)s timed out. '
170- 'VM id=%(instance_id)s; args=%(strargs)s') % locals())
171+ 'VM id=%(instance_id)s; args=%(args)r') % locals())
172+ return {'returncode': 'timeout', 'message': err_msg}
173 elif 'NOT IMPLEMENTED:' in err_msg:
174 LOG.error(_('NOT IMPLEMENTED: The call to %(method)s is not'
175 ' supported by the agent. VM id=%(instance_id)s;'
176- ' args=%(strargs)s') % locals())
177- raise NotImplementedError(err_msg)
178+ ' args=%(args)r') % locals())
179+ return {'returncode': 'notimplemented', 'message': err_msg}
180 else:
181 LOG.error(_('The call to %(method)s returned an error: %(e)s. '
182- 'VM id=%(instance_id)s; args=%(strargs)s') % locals())
183+ 'VM id=%(instance_id)s; args=%(args)r') % locals())
184+ return {'returncode': 'error', 'message': err_msg}
185 return ret
186
187 def add_to_xenstore(self, vm, path, key, value):