Merge lp:~jk0/nova/xs-rescue into lp:~hudson-openstack/nova/trunk

Proposed by Josh Kearney
Status: Merged
Approved by: Rick Harris
Approved revision: 603
Merged at revision: 754
Proposed branch: lp:~jk0/nova/xs-rescue
Merge into: lp:~hudson-openstack/nova/trunk
Diff against target: 641 lines (+220/-62)
13 files modified
.mailmap (+1/-0)
Authors (+1/-1)
nova/api/openstack/__init__.py (+2/-0)
nova/api/openstack/servers.py (+22/-0)
nova/compute/manager.py (+24/-10)
nova/db/sqlalchemy/models.py (+6/-1)
nova/tests/xenapi/stubs.py (+1/-1)
nova/virt/libvirt_conn.py (+2/-2)
nova/virt/xenapi/fake.py (+1/-1)
nova/virt/xenapi/vm_utils.py (+10/-12)
nova/virt/xenapi/vmops.py (+132/-27)
nova/virt/xenapi/volumeops.py (+1/-1)
nova/virt/xenapi_conn.py (+17/-6)
To merge this branch: bzr merge lp:~jk0/nova/xs-rescue
Reviewer Review Type Date Requested Status
Rick Harris (community) Approve
Matt Dietz (community) Approve
Review via email: mp+51780@code.launchpad.net

Description of the change

Provide the ability to rescue and unrescue a XenServer instance.

To post a comment you must log in.
Revision history for this message
Rick Harris (rconradharris) wrote :
Download full text (4.2 KiB)

Good work jk0, this is a great start.

Some small nits:

> 80 + self.driver.rescue(instance_ref,
> 81 + lambda result: self._update_state_callback(self,
> 82 + context,
> 83 + instance_id,
> 84 + result))

Breaking this into two lines would probably aid readability.

> 145 + def rescue(self, instance, callback=None):

The callback function isn't firing for rescue or unrescue. It looks like the
other methods are using

        self._wait_with_callback(instance.id, task, callback)

    To fire it off.

> 342 + def _bootlock(self, vm, unlock=False):
> 343 + """Prevent an instance from booting"""
> 344 + if unlock:
> 345 + self._session.call_xenapi(
> 346 + "VM.remove_from_blocked_operations",
> 347 + vm,
> 348 + "start")
> 349 + else:
> 350 + self._session.call_xenapi(
> 351 + "VM.set_blocked_operations",
> 352 + vm,
> 353 + {"start": ""})
> 354 +

I think this make more sense as two methods:
_acquire_bootlock and _release_bootlock.

Another thing you could do, is assert that we're in the correct state. For
example, that we're not trying to unlock a VM that's already unlocked (that'd
indicate a bug in the code).

> 394 + try:
> 395 + task = self._session.call_xenapi("Async.VM.clean_shutdown", vm)
> 396 + self._session.wait_for_task(task, instance.id)
> 397 + except self.XenAPI.Failure:
> 398 + task = self._session.call_xenapi("Async.VM.hard_shutdown", vm)
> 399 + self._session.wait_for_task(task, instance.id)

By default, I think _shutdown should perform either a clean or hard shutdown
depending on whats requested (Dietz added a hard=true kwarg in his
xs-migrations branch).

If at all possible, I'd like to avoid silent-fall-backs (like from clean to
hard), because they're usually indicative of a larger problem

That said, a _force_shutdown that attempts a clean and then falls-back to
hard would be okay since it's very explicity when that behavior is being
requested (And will be easier to remove later on if we want to).

> 484 + if vbd["userdevice"] == str(1):

str(1) can just be "1".

> 488 + task1 = self._session.call_xenapi("Async.VM.hard_shutdown", rescue_vm)
> 489 + self._session.wait_for_task(task1, instance.id)

This could be something like _shutdown(instance, rescue_vm, hard=True).

> 491 + vdis = VMHelper.lookup_vm_vdis(self._session, rescue_vm)
> 492 + for vdi in vdis:
> 493 + try:
> 494 + task = self._session.call_xenapi('Async.VDI.destroy', vdi)
> 495 + self._session.wait_for_task(task, instance.id)
> 496 + except self.XenAPI.Failure:
> 497 + continue

Should be able to use `_destroy_vdis` here.

> 499 + task2 = self._session.call_xenapi('Async.VM.destroy', rescue_vm)
> 500 + self._session.wait_for_task(task2, instance.i...

Read more...

review: Needs Fixing
Revision history for this message
Josh Kearney (jk0) wrote :
Download full text (5.3 KiB)

> Good work jk0, this is a great start.
>
>
> Some small nits:
>
> > 80 + self.driver.rescue(instance_ref,
> > 81 + lambda result: self._update_state_callback(self,
> > 82 + context,
> > 83 + instance_id,
> > 84 + result))
>
> Breaking this into two lines would probably aid readability.

Copied this directly from another call, wanted to maintain consistency. I'll go thru them all and clean up.

>
> > 145 + def rescue(self, instance, callback=None):
>
> The callback function isn't firing for rescue or unrescue. It looks like the
> other methods are using
>
> self._wait_with_callback(instance.id, task, callback)
>
> To fire it off.

I had to keep this in place since libvirt uses the callback (passed down to the driver).

>
> > 342 + def _bootlock(self, vm, unlock=False):
> > 343 + """Prevent an instance from booting"""
> > 344 + if unlock:
> > 345 + self._session.call_xenapi(
> > 346 + "VM.remove_from_blocked_operations",
> > 347 + vm,
> > 348 + "start")
> > 349 + else:
> > 350 + self._session.call_xenapi(
> > 351 + "VM.set_blocked_operations",
> > 352 + vm,
> > 353 + {"start": ""})
> > 354 +
>
> I think this make more sense as two methods:
> _acquire_bootlock and _release_bootlock.
>
> Another thing you could do, is assert that we're in the correct state. For
> example, that we're not trying to unlock a VM that's already unlocked (that'd
> indicate a bug in the code).

Good points, I'll break this down into two methods.

>
> > 394 + try:
> > 395 + task =
> self._session.call_xenapi("Async.VM.clean_shutdown", vm)
> > 396 + self._session.wait_for_task(task, instance.id)
> > 397 + except self.XenAPI.Failure:
> > 398 + task =
> self._session.call_xenapi("Async.VM.hard_shutdown", vm)
> > 399 + self._session.wait_for_task(task, instance.id)
>
> By default, I think _shutdown should perform either a clean or hard shutdown
> depending on whats requested (Dietz added a hard=true kwarg in his
> xs-migrations branch).
>
> If at all possible, I'd like to avoid silent-fall-backs (like from clean to
> hard), because they're usually indicative of a larger problem
>
> That said, a _force_shutdown that attempts a clean and then falls-back to
> hard would be okay since it's very explicity when that behavior is being
> requested (And will be easier to remove later on if we want to).
>

Sounds like I could get by using Dietz' method. I'll do this to avoid conflicts down the road.

> > 484 + if vbd["userdevice"] == str(1):
>
> str(1) can just be "1".

Nice catch. Not sure why I used str() here.

>
> > 488 + task1 = self._session.call_xenapi("Async.VM.hard_shutdown",
> rescue_vm)
> > 489 + self._session.wait_for_task(task1, instance.id)
>
> This could be som...

Read more...

lp:~jk0/nova/xs-rescue updated
602. By Josh Kearney

Review feedback

Revision history for this message
Matt Dietz (cerberus) wrote :

Pinning rescue status on the name of the VM seems pretty messy to me. It seems to me that, since rescue is such a critical state to be aware of, that we should maintain that either through a flag on the instance model or through the instance status attribute. Is there a reason you didn't go this route?

review: Needs Information
Revision history for this message
Josh Kearney (jk0) wrote :

> Pinning rescue status on the name of the VM seems pretty messy to me. It seems
> to me that, since rescue is such a critical state to be aware of, that we
> should maintain that either through a flag on the instance model or through
> the instance status attribute. Is there a reason you didn't go this route?

We had to go this route to avoid duplicate name-label exceptions. By doing this, we can spin up a rescue image using the same instance object and easily refer back to the original image when the time comes to unrescue. The name-labels are guaranteed to be unique.

Revision history for this message
Matt Dietz (cerberus) wrote :

Sure, I get that. My concern comes in cases where we may want to auto-unrescue instances. If we were to search for anything with '-rescue' on the end, we might try to unrescue stuff that's just unfortunately named.

Revision history for this message
Matt Dietz (cerberus) wrote :

Nevermind, feedback received off list. Forgot that name-labels were not instance-descriptions (we should probably rename one or both of those...)

review: Approve
Revision history for this message
Rick Harris (rconradharris) wrote :

Thanks for the fixes jk0. Have a couple of more potential cleanups, nothing big:

> 136 @property
> 137 def name(self):
> 138 - return FLAGS.instance_name_template % self.id
> 139 + base_name = FLAGS.instance_name_template % self.id
> 140 + if getattr(self, '_rescue', False):
> 141 + base_name += "-rescue"
> 142 + return base_name

I know I suggested this, but I'm having second thoughts about putting this
logic here. At least right now, this is XenAPI hackiness, I'm not sure this
belongs in the generic instance model.

Instead, I think the XenAPI code should have a method, `get_name_label` which
it uses instead of instance.name to generate the VM name-labels. That code
would look like:

def get_name_label(instance):
    """Generate name label for the XenServer VM record

    We have two scenarios:

        1. We're in normal mode and the INSTANCE VM record uses the instance name as
           its name label

        2. We're in rescue mode and the RESCUE VM Record uses instance name +
           '-rescue' as its name label
    """
    base_name = instance.name
    if getattr(self, '_rescue', False):
        base_name += "-rescue"
    return base_name

The code is really pretty readable, but, it still might be a good idea to
include a big doc-string on the XenAPI rescue method that explains, at a
high-level, what's involved in a rescue, like:

"""
Rescue consists of a number of steps

1. We shutdown the INSTANCE VM

2. We apply a 'bootlock' to prevent the instance VM from being started while
   in rescue

2. We spawn a RESCUE VM (the vm name-label in this case will be
   instance-0002-rescue)

3. We attach the INSTANCE VM's VBDs to the RESCUE VM

Unrescue consists of:

1. We unplug the INSTANCE VM's from the RESCUE VM

2. We teardown the RESCUE VM

3. Release the bootlock to allow the INSTANCE VM to be started

4. Start the INSTANCE VM
"""

lp:~jk0/nova/xs-rescue updated
603. By Josh Kearney

Updated docstrings

Revision history for this message
Rick Harris (rconradharris) wrote :

Looks good.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file '.mailmap'
2--- .mailmap 2011-02-18 18:27:30 +0000
3+++ .mailmap 2011-03-02 23:12:16 +0000
4@@ -19,6 +19,7 @@
5 <jmckenty@gmail.com> <jmckenty@joshua-mckentys-macbook-pro.local>
6 <jmckenty@gmail.com> <jmckenty@yyj-dhcp171.corp.flock.com>
7 <jmckenty@gmail.com> <joshua.mckenty@nasa.gov>
8+<josh@jk0.org> <josh.kearney@rackspace.com>
9 <justin@fathomdb.com> <justinsb@justinsb-desktop>
10 <justin@fathomdb.com> <superstack@superstack.org>
11 <masumotok@nttdata.co.jp> Masumoto<masumotok@nttdata.co.jp>
12
13=== modified file 'Authors'
14--- Authors 2011-03-01 00:46:39 +0000
15+++ Authors 2011-03-02 23:12:16 +0000
16@@ -31,7 +31,7 @@
17 Jonathan Bryce <jbryce@jbryce.com>
18 Jordan Rinke <jordan@openstack.org>
19 Josh Durgin <joshd@hq.newdream.net>
20-Josh Kearney <josh.kearney@rackspace.com>
21+Josh Kearney <josh@jk0.org>
22 Joshua McKenty <jmckenty@gmail.com>
23 Justin Santa Barbara <justin@fathomdb.com>
24 Kei Masumoto <masumotok@nttdata.co.jp>
25
26=== modified file 'nova/api/openstack/__init__.py'
27--- nova/api/openstack/__init__.py 2011-02-24 23:35:21 +0000
28+++ nova/api/openstack/__init__.py 2011-03-02 23:12:16 +0000
29@@ -80,6 +80,8 @@
30 server_members["actions"] = "GET"
31 server_members['suspend'] = 'POST'
32 server_members['resume'] = 'POST'
33+ server_members['rescue'] = 'POST'
34+ server_members['unrescue'] = 'POST'
35 server_members['reset_network'] = 'POST'
36 server_members['inject_network_info'] = 'POST'
37
38
39=== modified file 'nova/api/openstack/servers.py'
40--- nova/api/openstack/servers.py 2011-03-01 00:56:46 +0000
41+++ nova/api/openstack/servers.py 2011-03-02 23:12:16 +0000
42@@ -335,6 +335,28 @@
43 return faults.Fault(exc.HTTPUnprocessableEntity())
44 return exc.HTTPAccepted()
45
46+ def rescue(self, req, id):
47+ """Permit users to rescue the server."""
48+ context = req.environ["nova.context"]
49+ try:
50+ self.compute_api.rescue(context, id)
51+ except:
52+ readable = traceback.format_exc()
53+ LOG.exception(_("compute.api::rescue %s"), readable)
54+ return faults.Fault(exc.HTTPUnprocessableEntity())
55+ return exc.HTTPAccepted()
56+
57+ def unrescue(self, req, id):
58+ """Permit users to unrescue the server."""
59+ context = req.environ["nova.context"]
60+ try:
61+ self.compute_api.unrescue(context, id)
62+ except:
63+ readable = traceback.format_exc()
64+ LOG.exception(_("compute.api::unrescue %s"), readable)
65+ return faults.Fault(exc.HTTPUnprocessableEntity())
66+ return exc.HTTPAccepted()
67+
68 def get_ajax_console(self, req, id):
69 """ Returns a url to an instance's ajaxterm console. """
70 try:
71
72=== modified file 'nova/compute/manager.py'
73--- nova/compute/manager.py 2011-02-24 23:35:21 +0000
74+++ nova/compute/manager.py 2011-03-02 23:12:16 +0000
75@@ -370,12 +370,19 @@
76 context = context.elevated()
77 instance_ref = self.db.instance_get(context, instance_id)
78 LOG.audit(_('instance %s: rescuing'), instance_id, context=context)
79- self.db.instance_set_state(context,
80- instance_id,
81- power_state.NOSTATE,
82- 'rescuing')
83+ self.db.instance_set_state(
84+ context,
85+ instance_id,
86+ power_state.NOSTATE,
87+ 'rescuing')
88 self.network_manager.setup_compute_network(context, instance_id)
89- self.driver.rescue(instance_ref)
90+ self.driver.rescue(
91+ instance_ref,
92+ lambda result: self._update_state_callback(
93+ self,
94+ context,
95+ instance_id,
96+ result))
97 self._update_state(context, instance_id)
98
99 @exception.wrap_exception
100@@ -385,11 +392,18 @@
101 context = context.elevated()
102 instance_ref = self.db.instance_get(context, instance_id)
103 LOG.audit(_('instance %s: unrescuing'), instance_id, context=context)
104- self.db.instance_set_state(context,
105- instance_id,
106- power_state.NOSTATE,
107- 'unrescuing')
108- self.driver.unrescue(instance_ref)
109+ self.db.instance_set_state(
110+ context,
111+ instance_id,
112+ power_state.NOSTATE,
113+ 'unrescuing')
114+ self.driver.unrescue(
115+ instance_ref,
116+ lambda result: self._update_state_callback(
117+ self,
118+ context,
119+ instance_id,
120+ result))
121 self._update_state(context, instance_id)
122
123 @staticmethod
124
125=== modified file 'nova/db/sqlalchemy/models.py'
126--- nova/db/sqlalchemy/models.py 2011-02-24 05:47:29 +0000
127+++ nova/db/sqlalchemy/models.py 2011-03-02 23:12:16 +0000
128@@ -126,11 +126,16 @@
129 class Instance(BASE, NovaBase):
130 """Represents a guest vm."""
131 __tablename__ = 'instances'
132+ onset_files = []
133+
134 id = Column(Integer, primary_key=True, autoincrement=True)
135
136 @property
137 def name(self):
138- return FLAGS.instance_name_template % self.id
139+ base_name = FLAGS.instance_name_template % self.id
140+ if getattr(self, '_rescue', False):
141+ base_name += "-rescue"
142+ return base_name
143
144 admin_pass = Column(String(255))
145 user_id = Column(String(255))
146
147=== modified file 'nova/tests/xenapi/stubs.py'
148--- nova/tests/xenapi/stubs.py 2011-02-25 16:47:08 +0000
149+++ nova/tests/xenapi/stubs.py 2011-03-02 23:12:16 +0000
150@@ -27,7 +27,7 @@
151 def fake_fetch_image(cls, session, instance_id, image, user, project,
152 type):
153 # Stubout wait_for_task
154- def fake_wait_for_task(self, id, task):
155+ def fake_wait_for_task(self, task, id):
156 class FakeEvent:
157
158 def send(self, value):
159
160=== modified file 'nova/virt/libvirt_conn.py'
161--- nova/virt/libvirt_conn.py 2011-01-27 23:58:22 +0000
162+++ nova/virt/libvirt_conn.py 2011-03-02 23:12:16 +0000
163@@ -362,7 +362,7 @@
164 raise exception.APIError("resume not supported for libvirt")
165
166 @exception.wrap_exception
167- def rescue(self, instance):
168+ def rescue(self, instance, callback=None):
169 self.destroy(instance, False)
170
171 xml = self.to_xml(instance, rescue=True)
172@@ -392,7 +392,7 @@
173 return timer.start(interval=0.5, now=True)
174
175 @exception.wrap_exception
176- def unrescue(self, instance):
177+ def unrescue(self, instance, callback=None):
178 # NOTE(vish): Because reboot destroys and recreates an instance using
179 # the normal xml file, we can just call reboot here
180 self.reboot(instance)
181
182=== modified file 'nova/virt/xenapi/fake.py'
183--- nova/virt/xenapi/fake.py 2011-02-09 10:08:15 +0000
184+++ nova/virt/xenapi/fake.py 2011-03-02 23:12:16 +0000
185@@ -401,7 +401,7 @@
186 field in _db_content[cls][ref]):
187 return _db_content[cls][ref][field]
188
189- LOG.debuug(_('Raising NotImplemented'))
190+ LOG.debug(_('Raising NotImplemented'))
191 raise NotImplementedError(
192 _('xenapi.fake does not have an implementation for %s or it has '
193 'been called with the wrong number of arguments') % name)
194
195=== modified file 'nova/virt/xenapi/vm_utils.py'
196--- nova/virt/xenapi/vm_utils.py 2011-02-25 16:47:08 +0000
197+++ nova/virt/xenapi/vm_utils.py 2011-03-02 23:12:16 +0000
198@@ -205,19 +205,17 @@
199 """Destroy VBD from host database"""
200 try:
201 task = session.call_xenapi('Async.VBD.destroy', vbd_ref)
202- #FIXME(armando): find a solution to missing instance_id
203- #with Josh Kearney
204- session.wait_for_task(0, task)
205+ session.wait_for_task(task)
206 except cls.XenAPI.Failure, exc:
207 LOG.exception(exc)
208 raise StorageError(_('Unable to destroy VBD %s') % vbd_ref)
209
210 @classmethod
211- def create_vif(cls, session, vm_ref, network_ref, mac_address):
212+ def create_vif(cls, session, vm_ref, network_ref, mac_address, dev="0"):
213 """Create a VIF record. Returns a Deferred that gives the new
214 VIF reference."""
215 vif_rec = {}
216- vif_rec['device'] = '0'
217+ vif_rec['device'] = dev
218 vif_rec['network'] = network_ref
219 vif_rec['VM'] = vm_ref
220 vif_rec['MAC'] = mac_address
221@@ -269,7 +267,7 @@
222 original_parent_uuid = get_vhd_parent_uuid(session, vm_vdi_ref)
223
224 task = session.call_xenapi('Async.VM.snapshot', vm_ref, label)
225- template_vm_ref = session.wait_for_task(instance_id, task)
226+ template_vm_ref = session.wait_for_task(task, instance_id)
227 template_vdi_rec = get_vdi_for_vm_safely(session, template_vm_ref)[1]
228 template_vdi_uuid = template_vdi_rec["uuid"]
229
230@@ -302,7 +300,7 @@
231
232 kwargs = {'params': pickle.dumps(params)}
233 task = session.async_call_plugin('glance', 'upload_vhd', kwargs)
234- session.wait_for_task(instance_id, task)
235+ session.wait_for_task(task, instance_id)
236
237 @classmethod
238 def fetch_image(cls, session, instance_id, image, user, project,
239@@ -345,7 +343,7 @@
240
241 kwargs = {'params': pickle.dumps(params)}
242 task = session.async_call_plugin('glance', 'download_vhd', kwargs)
243- vdi_uuid = session.wait_for_task(instance_id, task)
244+ vdi_uuid = session.wait_for_task(task, instance_id)
245
246 scan_sr(session, instance_id, sr_ref)
247
248@@ -401,7 +399,7 @@
249 #let the plugin copy the correct number of bytes
250 args['image-size'] = str(vdi_size)
251 task = session.async_call_plugin('glance', fn, args)
252- filename = session.wait_for_task(instance_id, task)
253+ filename = session.wait_for_task(task, instance_id)
254 #remove the VDI as it is not needed anymore
255 session.get_xenapi().VDI.destroy(vdi)
256 LOG.debug(_("Kernel/Ramdisk VDI %s destroyed"), vdi)
257@@ -493,7 +491,7 @@
258 if image_type == ImageType.DISK_RAW:
259 args['raw'] = 'true'
260 task = session.async_call_plugin('objectstore', fn, args)
261- uuid = session.wait_for_task(instance_id, task)
262+ uuid = session.wait_for_task(task, instance_id)
263 return uuid
264
265 @classmethod
266@@ -513,7 +511,7 @@
267 args = {}
268 args['vdi-ref'] = vdi_ref
269 task = session.async_call_plugin('objectstore', fn, args)
270- pv_str = session.wait_for_task(instance_id, task)
271+ pv_str = session.wait_for_task(task, instance_id)
272 pv = None
273 if pv_str.lower() == 'true':
274 pv = True
275@@ -654,7 +652,7 @@
276 def scan_sr(session, instance_id, sr_ref):
277 LOG.debug(_("Re-scanning SR %s"), sr_ref)
278 task = session.call_xenapi('Async.SR.scan', sr_ref)
279- session.wait_for_task(instance_id, task)
280+ session.wait_for_task(task, instance_id)
281
282
283 def wait_for_vhd_coalesce(session, instance_id, sr_ref, vdi_ref,
284
285=== modified file 'nova/virt/xenapi/vmops.py'
286--- nova/virt/xenapi/vmops.py 2011-02-25 16:47:08 +0000
287+++ nova/virt/xenapi/vmops.py 2011-03-02 23:12:16 +0000
288@@ -49,6 +49,7 @@
289 def __init__(self, session):
290 self.XenAPI = session.get_imported_xenapi()
291 self._session = session
292+
293 VMHelper.XenAPI = self.XenAPI
294
295 def list_instances(self):
296@@ -62,20 +63,20 @@
297
298 def spawn(self, instance):
299 """Create VM instance"""
300- vm = VMHelper.lookup(self._session, instance.name)
301+ instance_name = instance.name
302+ vm = VMHelper.lookup(self._session, instance_name)
303 if vm is not None:
304 raise exception.Duplicate(_('Attempted to create'
305- ' non-unique name %s') % instance.name)
306+ ' non-unique name %s') % instance_name)
307
308 #ensure enough free memory is available
309 if not VMHelper.ensure_free_mem(self._session, instance):
310- name = instance['name']
311- LOG.exception(_('instance %(name)s: not enough free memory')
312- % locals())
313- db.instance_set_state(context.get_admin_context(),
314- instance['id'],
315- power_state.SHUTDOWN)
316- return
317+ LOG.exception(_('instance %(instance_name)s: not enough free '
318+ 'memory') % locals())
319+ db.instance_set_state(context.get_admin_context(),
320+ instance['id'],
321+ power_state.SHUTDOWN)
322+ return
323
324 user = AuthManager().get_user(instance.user_id)
325 project = AuthManager().get_project(instance.project_id)
326@@ -116,10 +117,9 @@
327 self.create_vifs(instance, networks)
328
329 LOG.debug(_('Starting VM %s...'), vm_ref)
330- self._session.call_xenapi('VM.start', vm_ref, False, False)
331- instance_name = instance.name
332+ self._start(instance, vm_ref)
333 LOG.info(_('Spawning VM %(instance_name)s created %(vm_ref)s.')
334- % locals())
335+ % locals())
336
337 def _inject_onset_files():
338 onset_files = instance.onset_files
339@@ -143,18 +143,18 @@
340
341 def _wait_for_boot():
342 try:
343- state = self.get_info(instance['name'])['state']
344+ state = self.get_info(instance_name)['state']
345 db.instance_set_state(context.get_admin_context(),
346 instance['id'], state)
347 if state == power_state.RUNNING:
348- LOG.debug(_('Instance %s: booted'), instance['name'])
349+ LOG.debug(_('Instance %s: booted'), instance_name)
350 timer.stop()
351 _inject_onset_files()
352 return True
353 except Exception, exc:
354 LOG.warn(exc)
355 LOG.exception(_('instance %s: failed to boot'),
356- instance['name'])
357+ instance_name)
358 db.instance_set_state(context.get_admin_context(),
359 instance['id'],
360 power_state.SHUTDOWN)
361@@ -202,6 +202,20 @@
362 _('Instance not present %s') % instance_name)
363 return vm
364
365+ def _acquire_bootlock(self, vm):
366+ """Prevent an instance from booting"""
367+ self._session.call_xenapi(
368+ "VM.set_blocked_operations",
369+ vm,
370+ {"start": ""})
371+
372+ def _release_bootlock(self, vm):
373+ """Allow an instance to boot"""
374+ self._session.call_xenapi(
375+ "VM.remove_from_blocked_operations",
376+ vm,
377+ "start")
378+
379 def snapshot(self, instance, image_id):
380 """ Create snapshot from a running VM instance
381
382@@ -254,7 +268,7 @@
383 """Reboot VM instance"""
384 vm = self._get_vm_opaque_ref(instance)
385 task = self._session.call_xenapi('Async.VM.clean_reboot', vm)
386- self._session.wait_for_task(instance.id, task)
387+ self._session.wait_for_task(task, instance.id)
388
389 def set_admin_password(self, instance, new_pass):
390 """Set the root/admin password on the VM instance. This is done via
391@@ -294,6 +308,11 @@
392 raise RuntimeError(resp_dict['message'])
393 return resp_dict['message']
394
395+ def _start(self, instance, vm):
396+ """Start an instance"""
397+ task = self._session.call_xenapi("Async.VM.start", vm, False, False)
398+ self._session.wait_for_task(task, instance.id)
399+
400 def inject_file(self, instance, b64_path, b64_contents):
401 """Write a file to the VM instance. The path to which it is to be
402 written and the contents of the file need to be supplied; both should
403@@ -320,8 +339,8 @@
404 raise RuntimeError(resp_dict['message'])
405 return resp_dict['message']
406
407- def _shutdown(self, instance, vm):
408- """Shutdown an instance """
409+ def _shutdown(self, instance, vm, hard=True):
410+ """Shutdown an instance"""
411 state = self.get_info(instance['name'])['state']
412 if state == power_state.SHUTDOWN:
413 LOG.warn(_("VM %(vm)s already halted, skipping shutdown...") %
414@@ -332,8 +351,13 @@
415 LOG.debug(_("Shutting down VM for Instance %(instance_id)s")
416 % locals())
417 try:
418- task = self._session.call_xenapi('Async.VM.hard_shutdown', vm)
419- self._session.wait_for_task(instance.id, task)
420+ task = None
421+ if hard:
422+ task = self._session.call_xenapi("Async.VM.hard_shutdown", vm)
423+ else:
424+ task = self._session.call_xenapi("Async.VM.clean_shutdown", vm)
425+
426+ self._session.wait_for_task(task, instance.id)
427 except self.XenAPI.Failure, exc:
428 LOG.exception(exc)
429
430@@ -350,7 +374,7 @@
431 for vdi in vdis:
432 try:
433 task = self._session.call_xenapi('Async.VDI.destroy', vdi)
434- self._session.wait_for_task(instance.id, task)
435+ self._session.wait_for_task(task, instance.id)
436 except self.XenAPI.Failure, exc:
437 LOG.exception(exc)
438
439@@ -389,7 +413,7 @@
440 args = {'kernel-file': kernel, 'ramdisk-file': ramdisk}
441 task = self._session.async_call_plugin(
442 'glance', 'remove_kernel_ramdisk', args)
443- self._session.wait_for_task(instance.id, task)
444+ self._session.wait_for_task(task, instance.id)
445
446 LOG.debug(_("kernel/ramdisk files removed"))
447
448@@ -398,7 +422,7 @@
449 instance_id = instance.id
450 try:
451 task = self._session.call_xenapi('Async.VM.destroy', vm)
452- self._session.wait_for_task(instance_id, task)
453+ self._session.wait_for_task(task, instance_id)
454 except self.XenAPI.Failure, exc:
455 LOG.exception(exc)
456
457@@ -441,7 +465,7 @@
458 def _wait_with_callback(self, instance_id, task, callback):
459 ret = None
460 try:
461- ret = self._session.wait_for_task(instance_id, task)
462+ ret = self._session.wait_for_task(task, instance_id)
463 except self.XenAPI.Failure, exc:
464 LOG.exception(exc)
465 callback(ret)
466@@ -470,6 +494,78 @@
467 task = self._session.call_xenapi('Async.VM.resume', vm, False, True)
468 self._wait_with_callback(instance.id, task, callback)
469
470+ def rescue(self, instance, callback):
471+ """Rescue the specified instance
472+ - shutdown the instance VM
473+ - set 'bootlock' to prevent the instance from starting in rescue
474+ - spawn a rescue VM (the vm name-label will be instance-N-rescue)
475+
476+ """
477+ rescue_vm = VMHelper.lookup(self._session, instance.name + "-rescue")
478+ if rescue_vm:
479+ raise RuntimeError(_(
480+ "Instance is already in Rescue Mode: %s" % instance.name))
481+
482+ vm = self._get_vm_opaque_ref(instance)
483+ self._shutdown(instance, vm)
484+ self._acquire_bootlock(vm)
485+
486+ instance._rescue = True
487+ self.spawn(instance)
488+ rescue_vm = self._get_vm_opaque_ref(instance)
489+
490+ vbd = self._session.get_xenapi().VM.get_VBDs(vm)[0]
491+ vdi_ref = self._session.get_xenapi().VBD.get_record(vbd)["VDI"]
492+ vbd_ref = VMHelper.create_vbd(
493+ self._session,
494+ rescue_vm,
495+ vdi_ref,
496+ 1,
497+ False)
498+
499+ self._session.call_xenapi("Async.VBD.plug", vbd_ref)
500+
501+ def unrescue(self, instance, callback):
502+ """Unrescue the specified instance
503+ - unplug the instance VM's disk from the rescue VM
504+ - teardown the rescue VM
505+ - release the bootlock to allow the instance VM to start
506+
507+ """
508+ rescue_vm = VMHelper.lookup(self._session, instance.name + "-rescue")
509+
510+ if not rescue_vm:
511+ raise exception.NotFound(_(
512+ "Instance is not in Rescue Mode: %s" % instance.name))
513+
514+ original_vm = self._get_vm_opaque_ref(instance)
515+ vbds = self._session.get_xenapi().VM.get_VBDs(rescue_vm)
516+
517+ instance._rescue = False
518+
519+ for vbd_ref in vbds:
520+ vbd = self._session.get_xenapi().VBD.get_record(vbd_ref)
521+ if vbd["userdevice"] == "1":
522+ VMHelper.unplug_vbd(self._session, vbd_ref)
523+ VMHelper.destroy_vbd(self._session, vbd_ref)
524+
525+ task1 = self._session.call_xenapi("Async.VM.hard_shutdown", rescue_vm)
526+ self._session.wait_for_task(task1, instance.id)
527+
528+ vdis = VMHelper.lookup_vm_vdis(self._session, rescue_vm)
529+ for vdi in vdis:
530+ try:
531+ task = self._session.call_xenapi('Async.VDI.destroy', vdi)
532+ self._session.wait_for_task(task, instance.id)
533+ except self.XenAPI.Failure:
534+ continue
535+
536+ task2 = self._session.call_xenapi('Async.VM.destroy', rescue_vm)
537+ self._session.wait_for_task(task2, instance.id)
538+
539+ self._release_bootlock(original_vm)
540+ self._start(instance, original_vm)
541+
542 def get_info(self, instance):
543 """Return data about VM instance"""
544 vm = self._get_vm_opaque_ref(instance)
545@@ -556,8 +652,17 @@
546 NetworkHelper.find_network_with_bridge(self._session, bridge)
547
548 if network_ref:
549- VMHelper.create_vif(self._session, vm_opaque_ref,
550- network_ref, instance.mac_address)
551+ try:
552+ device = "1" if instance._rescue else "0"
553+ except AttributeError:
554+ device = "0"
555+
556+ VMHelper.create_vif(
557+ self._session,
558+ vm_opaque_ref,
559+ network_ref,
560+ instance.mac_address,
561+ device)
562
563 def reset_network(self, instance):
564 """
565@@ -627,7 +732,7 @@
566 args.update(addl_args)
567 try:
568 task = self._session.async_call_plugin(plugin, method, args)
569- ret = self._session.wait_for_task(instance_id, task)
570+ ret = self._session.wait_for_task(task, instance_id)
571 except self.XenAPI.Failure, e:
572 ret = None
573 err_trace = e.details[-1]
574
575=== modified file 'nova/virt/xenapi/volumeops.py'
576--- nova/virt/xenapi/volumeops.py 2011-01-19 20:26:09 +0000
577+++ nova/virt/xenapi/volumeops.py 2011-03-02 23:12:16 +0000
578@@ -83,7 +83,7 @@
579 try:
580 task = self._session.call_xenapi('Async.VBD.plug',
581 vbd_ref)
582- self._session.wait_for_task(vol_rec['deviceNumber'], task)
583+ self._session.wait_for_task(task, vol_rec['deviceNumber'])
584 except self.XenAPI.Failure, exc:
585 LOG.exception(exc)
586 VolumeHelper.destroy_iscsi_storage(self._session,
587
588=== modified file 'nova/virt/xenapi_conn.py'
589--- nova/virt/xenapi_conn.py 2011-02-25 16:47:08 +0000
590+++ nova/virt/xenapi_conn.py 2011-03-02 23:12:16 +0000
591@@ -196,6 +196,14 @@
592 """resume the specified instance"""
593 self._vmops.resume(instance, callback)
594
595+ def rescue(self, instance, callback):
596+ """Rescue the specified instance"""
597+ self._vmops.rescue(instance, callback)
598+
599+ def unrescue(self, instance, callback):
600+ """Unrescue the specified instance"""
601+ self._vmops.unrescue(instance, callback)
602+
603 def reset_network(self, instance):
604 """reset networking for specified instance"""
605 self._vmops.reset_network(instance)
606@@ -279,7 +287,7 @@
607 self._session.xenapi.Async.host.call_plugin,
608 self.get_xenapi_host(), plugin, fn, args)
609
610- def wait_for_task(self, id, task):
611+ def wait_for_task(self, task, id=None):
612 """Return the result of the given task. The task is polled
613 until it completes. Not re-entrant."""
614 done = event.Event()
615@@ -306,10 +314,11 @@
616 try:
617 name = self._session.xenapi.task.get_name_label(task)
618 status = self._session.xenapi.task.get_status(task)
619- action = dict(
620- instance_id=int(id),
621- action=name[0:255], # Ensure action is never > 255
622- error=None)
623+ if id:
624+ action = dict(
625+ instance_id=int(id),
626+ action=name[0:255], # Ensure action is never > 255
627+ error=None)
628 if status == "pending":
629 return
630 elif status == "success":
631@@ -323,7 +332,9 @@
632 LOG.warn(_("Task [%(name)s] %(task)s status:"
633 " %(status)s %(error_info)s") % locals())
634 done.send_exception(self.XenAPI.Failure(error_info))
635- db.instance_action_create(context.get_admin_context(), action)
636+
637+ if id:
638+ db.instance_action_create(context.get_admin_context(), action)
639 except self.XenAPI.Failure, exc:
640 LOG.warn(exc)
641 done.send_exception(*sys.exc_info())