Merge lp:~jk0/nova/diagnostics-per-instance into lp:~hudson-openstack/nova/trunk

Proposed by Josh Kearney
Status: Merged
Approved by: Rick Clark
Approved revision: 478
Merged at revision: 475
Proposed branch: lp:~jk0/nova/diagnostics-per-instance
Merge into: lp:~hudson-openstack/nova/trunk
Diff against target: 403 lines (+101/-66)
6 files modified
nova/db/api.py (+5/-0)
nova/db/sqlalchemy/api.py (+12/-0)
nova/db/sqlalchemy/models.py (+0/-1)
nova/tests/virt_unittest.py (+24/-22)
nova/virt/xenapi/vmops.py (+22/-18)
nova/virt/xenapi_conn.py (+38/-25)
To merge this branch: bzr merge lp:~jk0/nova/diagnostics-per-instance
Reviewer Review Type Date Requested Status
Eric Day (community) Needs Information
Rick Clark (community) Approve
Ed Leafe (community) Approve
Review via email: mp+44394@code.launchpad.net

Commit message

Log all XenAPI actions to InstanceActions.

To post a comment you must log in.
475. By Josh Kearney

Filter templates and dom0 from list_instances()

476. By Josh Kearney

Style correction

477. By Josh Kearney

Merged trunk

478. By Josh Kearney

Merged trunk

Revision history for this message
Ed Leafe (ed-leafe) wrote :

Looks good.

review: Approve
Revision history for this message
Rick Clark (dendrobates) wrote :

lgtm and tests pass

review: Approve
Revision history for this message
Eric Day (eday) wrote :

How is the diagnostics data intended to be pulled out and analyzed? Right now it looks like it's only being stored, but there is no API or tools to read it.

Part of my concern with this and the previous branch sticking it into the same core database is that this seems like more of a logging hook, where you could push to some plugin interface, and then write implementations to a queue, another DB with monitoring, etc. It would be great to see more design details around how this data will be accessed and the use cases behind them before we start sticking it all in the DB.

review: Needs Information
Revision history for this message
Josh Kearney (jk0) wrote :

> How is the diagnostics data intended to be pulled out and analyzed? Right now
> it looks like it's only being stored, but there is no API or tools to read it.

Currently there is no way to retrieve the data short of making calls directly in SQL (that is my project today -- making this available via the API). My thoughts were to propose these merges in small steps instead of one big chunk, getting lots of good feedback along the way.

> Part of my concern with this and the previous branch sticking it into the same
> core database is that this seems like more of a logging hook, where you could
> push to some plugin interface, and then write implementations to a queue,
> another DB with monitoring, etc. It would be great to see more design details
> around how this data will be accessed and the use cases behind them before we
> start sticking it all in the DB.

I'm just going off of what the blueprint wants for now. I agree that this might not be the best way to store the data, but it's easy enough to change once the distributed data store design is in place. Otherwise I just don't see this getting done in time for Bexar.

Revision history for this message
Sandy Walsh (sandy-walsh) wrote :

Would like to see tests around _poll_task calling done.send_exception(XenAPI.Failure(error_info)) to ensure the action gets logged. This is independent of the pending xenserver tests.

If the len(action) > 255, will this bust the operation? Shouldn't just the logging fail, but the action complete?

Perhaps more of general question, but should there be an error code on failures?

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'nova/db/api.py'
2--- nova/db/api.py 2010-11-03 22:06:00 +0000
3+++ nova/db/api.py 2010-12-22 16:46:27 +0000
4@@ -334,6 +334,11 @@
5 security_group_id)
6
7
8+def instance_action_create(context, values):
9+ """Create an instance action from the values dictionary."""
10+ return IMPL.instance_action_create(context, values)
11+
12+
13 ###################
14
15
16
17=== modified file 'nova/db/sqlalchemy/api.py'
18--- nova/db/sqlalchemy/api.py 2010-12-21 15:25:39 +0000
19+++ nova/db/sqlalchemy/api.py 2010-12-22 16:46:27 +0000
20@@ -749,6 +749,18 @@
21 instance_ref.save(session=session)
22
23
24+@require_context
25+def instance_action_create(context, values):
26+ """Create an instance action from the values dictionary."""
27+ action_ref = models.InstanceActions()
28+ action_ref.update(values)
29+
30+ session = get_session()
31+ with session.begin():
32+ action_ref.save(session=session)
33+ return action_ref
34+
35+
36 ###################
37
38
39
40=== modified file 'nova/db/sqlalchemy/models.py'
41--- nova/db/sqlalchemy/models.py 2010-12-20 20:21:01 +0000
42+++ nova/db/sqlalchemy/models.py 2010-12-22 16:46:27 +0000
43@@ -248,7 +248,6 @@
44 instance_id = Column(Integer, ForeignKey('instances.id'))
45
46 action = Column(String(255))
47- result = Column(Boolean)
48 error = Column(Text)
49
50
51
52=== modified file 'nova/tests/virt_unittest.py'
53--- nova/tests/virt_unittest.py 2010-12-17 12:07:43 +0000
54+++ nova/tests/virt_unittest.py 2010-12-22 16:46:27 +0000
55@@ -129,43 +129,45 @@
56 check_list.append(check)
57 else:
58 if expect_kernel:
59- check = (lambda t: t.find('./os/kernel').text.split('/'
60- )[1], 'kernel')
61+ check = (lambda t: t.find('./os/kernel').text.split(
62+ '/')[1], 'kernel')
63 else:
64 check = (lambda t: t.find('./os/kernel'), None)
65 check_list.append(check)
66
67 if expect_ramdisk:
68- check = (lambda t: t.find('./os/initrd').text.split('/'
69- )[1], 'ramdisk')
70+ check = (lambda t: t.find('./os/initrd').text.split(
71+ '/')[1], 'ramdisk')
72 else:
73 check = (lambda t: t.find('./os/initrd'), None)
74 check_list.append(check)
75
76 common_checks = [
77 (lambda t: t.find('.').tag, 'domain'),
78- (lambda t: t.find('./devices/interface/filterref/parameter'
79- ).get('name'), 'IP'),
80- (lambda t: t.find('./devices/interface/filterref/parameter'
81- ).get('value'), '10.11.12.13'),
82- (lambda t: t.findall('./devices/interface/filterref/parameter'
83- )[1].get('name'), 'DHCPSERVER'),
84- (lambda t: t.findall('./devices/interface/filterref/parameter'
85- )[1].get('value'), '10.0.0.1'),
86- (lambda t: t.find('./devices/serial/source').get('path'
87- ).split('/')[1], 'console.log'),
88+ (lambda t: t.find(
89+ './devices/interface/filterref/parameter').get('name'), 'IP'),
90+ (lambda t: t.find(
91+ './devices/interface/filterref/parameter').get(
92+ 'value'), '10.11.12.13'),
93+ (lambda t: t.findall(
94+ './devices/interface/filterref/parameter')[1].get(
95+ 'name'), 'DHCPSERVER'),
96+ (lambda t: t.findall(
97+ './devices/interface/filterref/parameter')[1].get(
98+ 'value'), '10.0.0.1'),
99+ (lambda t: t.find('./devices/serial/source').get(
100+ 'path').split('/')[1], 'console.log'),
101 (lambda t: t.find('./memory').text, '2097152')]
102
103 if rescue:
104- common_checks += [(lambda t: t.findall('./devices/disk/source'
105- )[0].get('file').split('/')[1],
106- 'rescue-disk'),
107- (lambda t: t.findall('./devices/disk/source'
108- )[1].get('file').split('/')[1],
109- 'disk')]
110+ common_checks += [
111+ (lambda t: t.findall('./devices/disk/source')[0].get(
112+ 'file').split('/')[1], 'rescue-disk'),
113+ (lambda t: t.findall('./devices/disk/source')[1].get(
114+ 'file').split('/')[1], 'disk')]
115 else:
116- common_checks += [(lambda t: t.findall('./devices/disk/source'
117- )[0].get('file').split('/')[1],
118+ common_checks += [(lambda t: t.findall(
119+ './devices/disk/source')[0].get('file').split('/')[1],
120 'disk')]
121
122 for (libvirt_type, (expected_uri, checks)) in type_uri_map.iteritems():
123
124=== modified file 'nova/virt/xenapi/vmops.py'
125--- nova/virt/xenapi/vmops.py 2010-12-15 18:19:39 +0000
126+++ nova/virt/xenapi/vmops.py 2010-12-22 16:46:27 +0000
127@@ -44,12 +44,16 @@
128 VMHelper.late_import()
129
130 def list_instances(self):
131- """ List VM instances """
132- return [self._session.get_xenapi().VM.get_name_label(vm) \
133- for vm in self._session.get_xenapi().VM.get_all()]
134+ """List VM instances"""
135+ vms = []
136+ for vm in self._session.get_xenapi().VM.get_all():
137+ rec = self._session.get_xenapi().VM.get_record(vm)
138+ if not rec["is_a_template"] and not rec["is_control_domain"]:
139+ vms.append(rec["name_label"])
140+ return vms
141
142 def spawn(self, instance):
143- """ Create VM instance """
144+ """Create VM instance"""
145 vm = VMHelper.lookup(self._session, instance.name)
146 if vm is not None:
147 raise Exception('Attempted to create non-unique name %s' %
148@@ -81,16 +85,16 @@
149 vm_ref)
150
151 def reboot(self, instance):
152- """ Reboot VM instance """
153+ """Reboot VM instance"""
154 instance_name = instance.name
155 vm = VMHelper.lookup(self._session, instance_name)
156 if vm is None:
157 raise Exception('instance not present %s' % instance_name)
158 task = self._session.call_xenapi('Async.VM.clean_reboot', vm)
159- self._session.wait_for_task(task)
160+ self._session.wait_for_task(instance.id, task)
161
162 def destroy(self, instance):
163- """ Destroy VM instance """
164+ """Destroy VM instance"""
165 vm = VMHelper.lookup(self._session, instance.name)
166 if vm is None:
167 # Don't complain, just return. This lets us clean up instances
168@@ -101,7 +105,7 @@
169 try:
170 task = self._session.call_xenapi('Async.VM.hard_shutdown',
171 vm)
172- self._session.wait_for_task(task)
173+ self._session.wait_for_task(instance.id, task)
174 except XenAPI.Failure, exc:
175 logging.warn(exc)
176 # Disk clean-up
177@@ -109,43 +113,43 @@
178 for vdi in vdis:
179 try:
180 task = self._session.call_xenapi('Async.VDI.destroy', vdi)
181- self._session.wait_for_task(task)
182+ self._session.wait_for_task(instance.id, task)
183 except XenAPI.Failure, exc:
184 logging.warn(exc)
185 try:
186 task = self._session.call_xenapi('Async.VM.destroy', vm)
187- self._session.wait_for_task(task)
188+ self._session.wait_for_task(instance.id, task)
189 except XenAPI.Failure, exc:
190 logging.warn(exc)
191
192- def _wait_with_callback(self, task, callback):
193+ def _wait_with_callback(self, instance_id, task, callback):
194 ret = None
195 try:
196- ret = self._session.wait_for_task(task)
197+ ret = self._session.wait_for_task(instance_id, task)
198 except XenAPI.Failure, exc:
199 logging.warn(exc)
200 callback(ret)
201
202 def pause(self, instance, callback):
203- """ Pause VM instance """
204+ """Pause VM instance"""
205 instance_name = instance.name
206 vm = VMHelper.lookup(self._session, instance_name)
207 if vm is None:
208 raise Exception('instance not present %s' % instance_name)
209 task = self._session.call_xenapi('Async.VM.pause', vm)
210- self._wait_with_callback(task, callback)
211+ self._wait_with_callback(instance.id, task, callback)
212
213 def unpause(self, instance, callback):
214- """ Unpause VM instance """
215+ """Unpause VM instance"""
216 instance_name = instance.name
217 vm = VMHelper.lookup(self._session, instance_name)
218 if vm is None:
219 raise Exception('instance not present %s' % instance_name)
220 task = self._session.call_xenapi('Async.VM.unpause', vm)
221- self._wait_with_callback(task, callback)
222+ self._wait_with_callback(instance.id, task, callback)
223
224 def get_info(self, instance_id):
225- """ Return data about VM instance """
226+ """Return data about VM instance"""
227 vm = VMHelper.lookup_blocking(self._session, instance_id)
228 if vm is None:
229 raise Exception('instance not present %s' % instance_id)
230@@ -161,6 +165,6 @@
231 return VMHelper.compile_diagnostics(self._session, rec)
232
233 def get_console_output(self, instance):
234- """ Return snapshot of console """
235+ """Return snapshot of console"""
236 # TODO: implement this to fix pylint!
237 return 'FAKE CONSOLE OUTPUT of instance'
238
239=== modified file 'nova/virt/xenapi_conn.py'
240--- nova/virt/xenapi_conn.py 2010-12-21 15:25:39 +0000
241+++ nova/virt/xenapi_conn.py 2010-12-22 16:46:27 +0000
242@@ -54,6 +54,8 @@
243 from eventlet import event
244 from eventlet import tpool
245
246+from nova import context
247+from nova import db
248 from nova import utils
249 from nova import flags
250 from nova.virt.xenapi.vmops import VMOps
251@@ -101,7 +103,7 @@
252
253
254 class XenAPIConnection(object):
255- """ A connection to XenServer or Xen Cloud Platform """
256+ """A connection to XenServer or Xen Cloud Platform"""
257
258 def __init__(self, url, user, pw):
259 session = XenAPISession(url, user, pw)
260@@ -109,31 +111,31 @@
261 self._volumeops = VolumeOps(session)
262
263 def list_instances(self):
264- """ List VM instances """
265+ """List VM instances"""
266 return self._vmops.list_instances()
267
268 def spawn(self, instance):
269- """ Create VM instance """
270+ """Create VM instance"""
271 self._vmops.spawn(instance)
272
273 def reboot(self, instance):
274- """ Reboot VM instance """
275+ """Reboot VM instance"""
276 self._vmops.reboot(instance)
277
278 def destroy(self, instance):
279- """ Destroy VM instance """
280+ """Destroy VM instance"""
281 self._vmops.destroy(instance)
282
283 def pause(self, instance, callback):
284- """ Pause VM instance """
285+ """Pause VM instance"""
286 self._vmops.pause(instance, callback)
287
288 def unpause(self, instance, callback):
289- """ Unpause paused VM instance """
290+ """Unpause paused VM instance"""
291 self._vmops.unpause(instance, callback)
292
293 def get_info(self, instance_id):
294- """ Return data about VM instance """
295+ """Return data about VM instance"""
296 return self._vmops.get_info(instance_id)
297
298 def get_diagnostics(self, instance_id):
299@@ -141,33 +143,33 @@
300 return self._vmops.get_diagnostics(instance_id)
301
302 def get_console_output(self, instance):
303- """ Return snapshot of console """
304+ """Return snapshot of console"""
305 return self._vmops.get_console_output(instance)
306
307 def attach_volume(self, instance_name, device_path, mountpoint):
308- """ Attach volume storage to VM instance """
309+ """Attach volume storage to VM instance"""
310 return self._volumeops.attach_volume(instance_name,
311 device_path,
312 mountpoint)
313
314 def detach_volume(self, instance_name, mountpoint):
315- """ Detach volume storage to VM instance """
316+ """Detach volume storage to VM instance"""
317 return self._volumeops.detach_volume(instance_name, mountpoint)
318
319
320 class XenAPISession(object):
321- """ The session to invoke XenAPI SDK calls """
322+ """The session to invoke XenAPI SDK calls"""
323
324 def __init__(self, url, user, pw):
325 self._session = XenAPI.Session(url)
326 self._session.login_with_password(user, pw)
327
328 def get_xenapi(self):
329- """ Return the xenapi object """
330+ """Return the xenapi object"""
331 return self._session.xenapi
332
333 def get_xenapi_host(self):
334- """ Return the xenapi host """
335+ """Return the xenapi host"""
336 return self._session.xenapi.session.get_this_host(self._session.handle)
337
338 def call_xenapi(self, method, *args):
339@@ -183,42 +185,53 @@
340 self._session.xenapi.Async.host.call_plugin,
341 self.get_xenapi_host(), plugin, fn, args)
342
343- def wait_for_task(self, task):
344+ def wait_for_task(self, instance_id, task):
345 """Return a Deferred that will give the result of the given task.
346 The task is polled until it completes."""
347
348 done = event.Event()
349- loop = utils.LoopingCall(self._poll_task, task, done)
350+ loop = utils.LoopingCall(self._poll_task, instance_id, task, done)
351 loop.start(FLAGS.xenapi_task_poll_interval, now=True)
352 rv = done.wait()
353 loop.stop()
354 return rv
355
356- def _poll_task(self, task, done):
357+ def _poll_task(self, instance_id, task, done):
358 """Poll the given XenAPI task, and fire the given Deferred if we
359 get a result."""
360 try:
361- #logging.debug('Polling task %s...', task)
362+ name = self._session.xenapi.task.get_name_label(task)
363 status = self._session.xenapi.task.get_status(task)
364- if status == 'pending':
365+ action = dict(
366+ instance_id=int(instance_id),
367+ action=name,
368+ error=None)
369+ if status == "pending":
370 return
371- elif status == 'success':
372+ elif status == "success":
373 result = self._session.xenapi.task.get_result(task)
374- logging.info(_('Task %s status: success. %s'), task, result)
375+ logging.info(_("Task [%s] %s status: success %s") % (
376+ name,
377+ task,
378+ result))
379 done.send(_parse_xmlrpc_value(result))
380 else:
381 error_info = self._session.xenapi.task.get_error_info(task)
382- logging.warn(_('Task %s status: %s. %s'), task, status,
383- error_info)
384+ action["error"] = str(error_info)
385+ logging.warn(_("Task [%s] %s status: %s %s") % (
386+ name,
387+ task,
388+ status,
389+ error_info))
390 done.send_exception(XenAPI.Failure(error_info))
391- #logging.debug('Polling task %s done.', task)
392+ db.instance_action_create(context.get_admin_context(), action)
393 except XenAPI.Failure, exc:
394 logging.warn(exc)
395 done.send_exception(*sys.exc_info())
396
397
398 def _unwrap_plugin_exceptions(func, *args, **kwargs):
399- """ Parse exception details """
400+ """Parse exception details"""
401 try:
402 return func(*args, **kwargs)
403 except XenAPI.Failure, exc: