Merge lp:~jk0/nova/diagnostics-per-instance into lp:~hudson-openstack/nova/trunk
- diagnostics-per-instance
- Merge into trunk
Status: | Merged |
---|---|
Approved by: | Rick Clark |
Approved revision: | 478 |
Merged at revision: | 475 |
Proposed branch: | lp:~jk0/nova/diagnostics-per-instance |
Merge into: | lp:~hudson-openstack/nova/trunk |
Diff against target: |
403 lines (+101/-66) 6 files modified
nova/db/api.py (+5/-0) nova/db/sqlalchemy/api.py (+12/-0) nova/db/sqlalchemy/models.py (+0/-1) nova/tests/virt_unittest.py (+24/-22) nova/virt/xenapi/vmops.py (+22/-18) nova/virt/xenapi_conn.py (+38/-25) |
To merge this branch: | bzr merge lp:~jk0/nova/diagnostics-per-instance |
Related bugs: | |
Related blueprints: |
Run Diagnostics on Instance
(Medium)
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Eric Day (community) | Needs Information | ||
Rick Clark (community) | Approve | ||
Ed Leafe (community) | Approve | ||
Review via email: mp+44394@code.launchpad.net |
Commit message
Log all XenAPI actions to InstanceActions.
Description of the change
- 475. By Josh Kearney
-
Filter templates and dom0 from list_instances()
- 476. By Josh Kearney
-
Style correction
- 477. By Josh Kearney
-
Merged trunk
- 478. By Josh Kearney
-
Merged trunk
Rick Clark (dendrobates) wrote : | # |
lgtm and tests pass
Eric Day (eday) wrote : | # |
How is the diagnostics data intended to be pulled out and analyzed? Right now it looks like it's only being stored, but there is no API or tools to read it.
Part of my concern with this and the previous branch sticking it into the same core database is that this seems like more of a logging hook, where you could push to some plugin interface, and then write implementations to a queue, another DB with monitoring, etc. It would be great to see more design details around how this data will be accessed and the use cases behind them before we start sticking it all in the DB.
Josh Kearney (jk0) wrote : | # |
> How is the diagnostics data intended to be pulled out and analyzed? Right now
> it looks like it's only being stored, but there is no API or tools to read it.
Currently there is no way to retrieve the data short of making calls directly in SQL (that is my project today -- making this available via the API). My thoughts were to propose these merges in small steps instead of one big chunk, getting lots of good feedback along the way.
> Part of my concern with this and the previous branch sticking it into the same
> core database is that this seems like more of a logging hook, where you could
> push to some plugin interface, and then write implementations to a queue,
> another DB with monitoring, etc. It would be great to see more design details
> around how this data will be accessed and the use cases behind them before we
> start sticking it all in the DB.
I'm just going off of what the blueprint wants for now. I agree that this might not be the best way to store the data, but it's easy enough to change once the distributed data store design is in place. Otherwise I just don't see this getting done in time for Bexar.
Sandy Walsh (sandy-walsh) wrote : | # |
Would like to see tests around _poll_task calling done.send_
If the len(action) > 255, will this bust the operation? Shouldn't just the logging fail, but the action complete?
Perhaps more of general question, but should there be an error code on failures?
Preview Diff
1 | === modified file 'nova/db/api.py' |
2 | --- nova/db/api.py 2010-11-03 22:06:00 +0000 |
3 | +++ nova/db/api.py 2010-12-22 16:46:27 +0000 |
4 | @@ -334,6 +334,11 @@ |
5 | security_group_id) |
6 | |
7 | |
8 | +def instance_action_create(context, values): |
9 | + """Create an instance action from the values dictionary.""" |
10 | + return IMPL.instance_action_create(context, values) |
11 | + |
12 | + |
13 | ################### |
14 | |
15 | |
16 | |
17 | === modified file 'nova/db/sqlalchemy/api.py' |
18 | --- nova/db/sqlalchemy/api.py 2010-12-21 15:25:39 +0000 |
19 | +++ nova/db/sqlalchemy/api.py 2010-12-22 16:46:27 +0000 |
20 | @@ -749,6 +749,18 @@ |
21 | instance_ref.save(session=session) |
22 | |
23 | |
24 | +@require_context |
25 | +def instance_action_create(context, values): |
26 | + """Create an instance action from the values dictionary.""" |
27 | + action_ref = models.InstanceActions() |
28 | + action_ref.update(values) |
29 | + |
30 | + session = get_session() |
31 | + with session.begin(): |
32 | + action_ref.save(session=session) |
33 | + return action_ref |
34 | + |
35 | + |
36 | ################### |
37 | |
38 | |
39 | |
40 | === modified file 'nova/db/sqlalchemy/models.py' |
41 | --- nova/db/sqlalchemy/models.py 2010-12-20 20:21:01 +0000 |
42 | +++ nova/db/sqlalchemy/models.py 2010-12-22 16:46:27 +0000 |
43 | @@ -248,7 +248,6 @@ |
44 | instance_id = Column(Integer, ForeignKey('instances.id')) |
45 | |
46 | action = Column(String(255)) |
47 | - result = Column(Boolean) |
48 | error = Column(Text) |
49 | |
50 | |
51 | |
52 | === modified file 'nova/tests/virt_unittest.py' |
53 | --- nova/tests/virt_unittest.py 2010-12-17 12:07:43 +0000 |
54 | +++ nova/tests/virt_unittest.py 2010-12-22 16:46:27 +0000 |
55 | @@ -129,43 +129,45 @@ |
56 | check_list.append(check) |
57 | else: |
58 | if expect_kernel: |
59 | - check = (lambda t: t.find('./os/kernel').text.split('/' |
60 | - )[1], 'kernel') |
61 | + check = (lambda t: t.find('./os/kernel').text.split( |
62 | + '/')[1], 'kernel') |
63 | else: |
64 | check = (lambda t: t.find('./os/kernel'), None) |
65 | check_list.append(check) |
66 | |
67 | if expect_ramdisk: |
68 | - check = (lambda t: t.find('./os/initrd').text.split('/' |
69 | - )[1], 'ramdisk') |
70 | + check = (lambda t: t.find('./os/initrd').text.split( |
71 | + '/')[1], 'ramdisk') |
72 | else: |
73 | check = (lambda t: t.find('./os/initrd'), None) |
74 | check_list.append(check) |
75 | |
76 | common_checks = [ |
77 | (lambda t: t.find('.').tag, 'domain'), |
78 | - (lambda t: t.find('./devices/interface/filterref/parameter' |
79 | - ).get('name'), 'IP'), |
80 | - (lambda t: t.find('./devices/interface/filterref/parameter' |
81 | - ).get('value'), '10.11.12.13'), |
82 | - (lambda t: t.findall('./devices/interface/filterref/parameter' |
83 | - )[1].get('name'), 'DHCPSERVER'), |
84 | - (lambda t: t.findall('./devices/interface/filterref/parameter' |
85 | - )[1].get('value'), '10.0.0.1'), |
86 | - (lambda t: t.find('./devices/serial/source').get('path' |
87 | - ).split('/')[1], 'console.log'), |
88 | + (lambda t: t.find( |
89 | + './devices/interface/filterref/parameter').get('name'), 'IP'), |
90 | + (lambda t: t.find( |
91 | + './devices/interface/filterref/parameter').get( |
92 | + 'value'), '10.11.12.13'), |
93 | + (lambda t: t.findall( |
94 | + './devices/interface/filterref/parameter')[1].get( |
95 | + 'name'), 'DHCPSERVER'), |
96 | + (lambda t: t.findall( |
97 | + './devices/interface/filterref/parameter')[1].get( |
98 | + 'value'), '10.0.0.1'), |
99 | + (lambda t: t.find('./devices/serial/source').get( |
100 | + 'path').split('/')[1], 'console.log'), |
101 | (lambda t: t.find('./memory').text, '2097152')] |
102 | |
103 | if rescue: |
104 | - common_checks += [(lambda t: t.findall('./devices/disk/source' |
105 | - )[0].get('file').split('/')[1], |
106 | - 'rescue-disk'), |
107 | - (lambda t: t.findall('./devices/disk/source' |
108 | - )[1].get('file').split('/')[1], |
109 | - 'disk')] |
110 | + common_checks += [ |
111 | + (lambda t: t.findall('./devices/disk/source')[0].get( |
112 | + 'file').split('/')[1], 'rescue-disk'), |
113 | + (lambda t: t.findall('./devices/disk/source')[1].get( |
114 | + 'file').split('/')[1], 'disk')] |
115 | else: |
116 | - common_checks += [(lambda t: t.findall('./devices/disk/source' |
117 | - )[0].get('file').split('/')[1], |
118 | + common_checks += [(lambda t: t.findall( |
119 | + './devices/disk/source')[0].get('file').split('/')[1], |
120 | 'disk')] |
121 | |
122 | for (libvirt_type, (expected_uri, checks)) in type_uri_map.iteritems(): |
123 | |
124 | === modified file 'nova/virt/xenapi/vmops.py' |
125 | --- nova/virt/xenapi/vmops.py 2010-12-15 18:19:39 +0000 |
126 | +++ nova/virt/xenapi/vmops.py 2010-12-22 16:46:27 +0000 |
127 | @@ -44,12 +44,16 @@ |
128 | VMHelper.late_import() |
129 | |
130 | def list_instances(self): |
131 | - """ List VM instances """ |
132 | - return [self._session.get_xenapi().VM.get_name_label(vm) \ |
133 | - for vm in self._session.get_xenapi().VM.get_all()] |
134 | + """List VM instances""" |
135 | + vms = [] |
136 | + for vm in self._session.get_xenapi().VM.get_all(): |
137 | + rec = self._session.get_xenapi().VM.get_record(vm) |
138 | + if not rec["is_a_template"] and not rec["is_control_domain"]: |
139 | + vms.append(rec["name_label"]) |
140 | + return vms |
141 | |
142 | def spawn(self, instance): |
143 | - """ Create VM instance """ |
144 | + """Create VM instance""" |
145 | vm = VMHelper.lookup(self._session, instance.name) |
146 | if vm is not None: |
147 | raise Exception('Attempted to create non-unique name %s' % |
148 | @@ -81,16 +85,16 @@ |
149 | vm_ref) |
150 | |
151 | def reboot(self, instance): |
152 | - """ Reboot VM instance """ |
153 | + """Reboot VM instance""" |
154 | instance_name = instance.name |
155 | vm = VMHelper.lookup(self._session, instance_name) |
156 | if vm is None: |
157 | raise Exception('instance not present %s' % instance_name) |
158 | task = self._session.call_xenapi('Async.VM.clean_reboot', vm) |
159 | - self._session.wait_for_task(task) |
160 | + self._session.wait_for_task(instance.id, task) |
161 | |
162 | def destroy(self, instance): |
163 | - """ Destroy VM instance """ |
164 | + """Destroy VM instance""" |
165 | vm = VMHelper.lookup(self._session, instance.name) |
166 | if vm is None: |
167 | # Don't complain, just return. This lets us clean up instances |
168 | @@ -101,7 +105,7 @@ |
169 | try: |
170 | task = self._session.call_xenapi('Async.VM.hard_shutdown', |
171 | vm) |
172 | - self._session.wait_for_task(task) |
173 | + self._session.wait_for_task(instance.id, task) |
174 | except XenAPI.Failure, exc: |
175 | logging.warn(exc) |
176 | # Disk clean-up |
177 | @@ -109,43 +113,43 @@ |
178 | for vdi in vdis: |
179 | try: |
180 | task = self._session.call_xenapi('Async.VDI.destroy', vdi) |
181 | - self._session.wait_for_task(task) |
182 | + self._session.wait_for_task(instance.id, task) |
183 | except XenAPI.Failure, exc: |
184 | logging.warn(exc) |
185 | try: |
186 | task = self._session.call_xenapi('Async.VM.destroy', vm) |
187 | - self._session.wait_for_task(task) |
188 | + self._session.wait_for_task(instance.id, task) |
189 | except XenAPI.Failure, exc: |
190 | logging.warn(exc) |
191 | |
192 | - def _wait_with_callback(self, task, callback): |
193 | + def _wait_with_callback(self, instance_id, task, callback): |
194 | ret = None |
195 | try: |
196 | - ret = self._session.wait_for_task(task) |
197 | + ret = self._session.wait_for_task(instance_id, task) |
198 | except XenAPI.Failure, exc: |
199 | logging.warn(exc) |
200 | callback(ret) |
201 | |
202 | def pause(self, instance, callback): |
203 | - """ Pause VM instance """ |
204 | + """Pause VM instance""" |
205 | instance_name = instance.name |
206 | vm = VMHelper.lookup(self._session, instance_name) |
207 | if vm is None: |
208 | raise Exception('instance not present %s' % instance_name) |
209 | task = self._session.call_xenapi('Async.VM.pause', vm) |
210 | - self._wait_with_callback(task, callback) |
211 | + self._wait_with_callback(instance.id, task, callback) |
212 | |
213 | def unpause(self, instance, callback): |
214 | - """ Unpause VM instance """ |
215 | + """Unpause VM instance""" |
216 | instance_name = instance.name |
217 | vm = VMHelper.lookup(self._session, instance_name) |
218 | if vm is None: |
219 | raise Exception('instance not present %s' % instance_name) |
220 | task = self._session.call_xenapi('Async.VM.unpause', vm) |
221 | - self._wait_with_callback(task, callback) |
222 | + self._wait_with_callback(instance.id, task, callback) |
223 | |
224 | def get_info(self, instance_id): |
225 | - """ Return data about VM instance """ |
226 | + """Return data about VM instance""" |
227 | vm = VMHelper.lookup_blocking(self._session, instance_id) |
228 | if vm is None: |
229 | raise Exception('instance not present %s' % instance_id) |
230 | @@ -161,6 +165,6 @@ |
231 | return VMHelper.compile_diagnostics(self._session, rec) |
232 | |
233 | def get_console_output(self, instance): |
234 | - """ Return snapshot of console """ |
235 | + """Return snapshot of console""" |
236 | # TODO: implement this to fix pylint! |
237 | return 'FAKE CONSOLE OUTPUT of instance' |
238 | |
239 | === modified file 'nova/virt/xenapi_conn.py' |
240 | --- nova/virt/xenapi_conn.py 2010-12-21 15:25:39 +0000 |
241 | +++ nova/virt/xenapi_conn.py 2010-12-22 16:46:27 +0000 |
242 | @@ -54,6 +54,8 @@ |
243 | from eventlet import event |
244 | from eventlet import tpool |
245 | |
246 | +from nova import context |
247 | +from nova import db |
248 | from nova import utils |
249 | from nova import flags |
250 | from nova.virt.xenapi.vmops import VMOps |
251 | @@ -101,7 +103,7 @@ |
252 | |
253 | |
254 | class XenAPIConnection(object): |
255 | - """ A connection to XenServer or Xen Cloud Platform """ |
256 | + """A connection to XenServer or Xen Cloud Platform""" |
257 | |
258 | def __init__(self, url, user, pw): |
259 | session = XenAPISession(url, user, pw) |
260 | @@ -109,31 +111,31 @@ |
261 | self._volumeops = VolumeOps(session) |
262 | |
263 | def list_instances(self): |
264 | - """ List VM instances """ |
265 | + """List VM instances""" |
266 | return self._vmops.list_instances() |
267 | |
268 | def spawn(self, instance): |
269 | - """ Create VM instance """ |
270 | + """Create VM instance""" |
271 | self._vmops.spawn(instance) |
272 | |
273 | def reboot(self, instance): |
274 | - """ Reboot VM instance """ |
275 | + """Reboot VM instance""" |
276 | self._vmops.reboot(instance) |
277 | |
278 | def destroy(self, instance): |
279 | - """ Destroy VM instance """ |
280 | + """Destroy VM instance""" |
281 | self._vmops.destroy(instance) |
282 | |
283 | def pause(self, instance, callback): |
284 | - """ Pause VM instance """ |
285 | + """Pause VM instance""" |
286 | self._vmops.pause(instance, callback) |
287 | |
288 | def unpause(self, instance, callback): |
289 | - """ Unpause paused VM instance """ |
290 | + """Unpause paused VM instance""" |
291 | self._vmops.unpause(instance, callback) |
292 | |
293 | def get_info(self, instance_id): |
294 | - """ Return data about VM instance """ |
295 | + """Return data about VM instance""" |
296 | return self._vmops.get_info(instance_id) |
297 | |
298 | def get_diagnostics(self, instance_id): |
299 | @@ -141,33 +143,33 @@ |
300 | return self._vmops.get_diagnostics(instance_id) |
301 | |
302 | def get_console_output(self, instance): |
303 | - """ Return snapshot of console """ |
304 | + """Return snapshot of console""" |
305 | return self._vmops.get_console_output(instance) |
306 | |
307 | def attach_volume(self, instance_name, device_path, mountpoint): |
308 | - """ Attach volume storage to VM instance """ |
309 | + """Attach volume storage to VM instance""" |
310 | return self._volumeops.attach_volume(instance_name, |
311 | device_path, |
312 | mountpoint) |
313 | |
314 | def detach_volume(self, instance_name, mountpoint): |
315 | - """ Detach volume storage to VM instance """ |
316 | + """Detach volume storage to VM instance""" |
317 | return self._volumeops.detach_volume(instance_name, mountpoint) |
318 | |
319 | |
320 | class XenAPISession(object): |
321 | - """ The session to invoke XenAPI SDK calls """ |
322 | + """The session to invoke XenAPI SDK calls""" |
323 | |
324 | def __init__(self, url, user, pw): |
325 | self._session = XenAPI.Session(url) |
326 | self._session.login_with_password(user, pw) |
327 | |
328 | def get_xenapi(self): |
329 | - """ Return the xenapi object """ |
330 | + """Return the xenapi object""" |
331 | return self._session.xenapi |
332 | |
333 | def get_xenapi_host(self): |
334 | - """ Return the xenapi host """ |
335 | + """Return the xenapi host""" |
336 | return self._session.xenapi.session.get_this_host(self._session.handle) |
337 | |
338 | def call_xenapi(self, method, *args): |
339 | @@ -183,42 +185,53 @@ |
340 | self._session.xenapi.Async.host.call_plugin, |
341 | self.get_xenapi_host(), plugin, fn, args) |
342 | |
343 | - def wait_for_task(self, task): |
344 | + def wait_for_task(self, instance_id, task): |
345 | """Return a Deferred that will give the result of the given task. |
346 | The task is polled until it completes.""" |
347 | |
348 | done = event.Event() |
349 | - loop = utils.LoopingCall(self._poll_task, task, done) |
350 | + loop = utils.LoopingCall(self._poll_task, instance_id, task, done) |
351 | loop.start(FLAGS.xenapi_task_poll_interval, now=True) |
352 | rv = done.wait() |
353 | loop.stop() |
354 | return rv |
355 | |
356 | - def _poll_task(self, task, done): |
357 | + def _poll_task(self, instance_id, task, done): |
358 | """Poll the given XenAPI task, and fire the given Deferred if we |
359 | get a result.""" |
360 | try: |
361 | - #logging.debug('Polling task %s...', task) |
362 | + name = self._session.xenapi.task.get_name_label(task) |
363 | status = self._session.xenapi.task.get_status(task) |
364 | - if status == 'pending': |
365 | + action = dict( |
366 | + instance_id=int(instance_id), |
367 | + action=name, |
368 | + error=None) |
369 | + if status == "pending": |
370 | return |
371 | - elif status == 'success': |
372 | + elif status == "success": |
373 | result = self._session.xenapi.task.get_result(task) |
374 | - logging.info(_('Task %s status: success. %s'), task, result) |
375 | + logging.info(_("Task [%s] %s status: success %s") % ( |
376 | + name, |
377 | + task, |
378 | + result)) |
379 | done.send(_parse_xmlrpc_value(result)) |
380 | else: |
381 | error_info = self._session.xenapi.task.get_error_info(task) |
382 | - logging.warn(_('Task %s status: %s. %s'), task, status, |
383 | - error_info) |
384 | + action["error"] = str(error_info) |
385 | + logging.warn(_("Task [%s] %s status: %s %s") % ( |
386 | + name, |
387 | + task, |
388 | + status, |
389 | + error_info)) |
390 | done.send_exception(XenAPI.Failure(error_info)) |
391 | - #logging.debug('Polling task %s done.', task) |
392 | + db.instance_action_create(context.get_admin_context(), action) |
393 | except XenAPI.Failure, exc: |
394 | logging.warn(exc) |
395 | done.send_exception(*sys.exc_info()) |
396 | |
397 | |
398 | def _unwrap_plugin_exceptions(func, *args, **kwargs): |
399 | - """ Parse exception details """ |
400 | + """Parse exception details""" |
401 | try: |
402 | return func(*args, **kwargs) |
403 | except XenAPI.Failure, exc: |
Looks good.