Merge lp:~vishvananda/nova/fix-exceptions into lp:~hudson-openstack/nova/trunk

Proposed by Vish Ishaya
Status: Merged
Merged at revision: 219
Proposed branch: lp:~vishvananda/nova/fix-exceptions
Merge into: lp:~hudson-openstack/nova/trunk
Diff against target: 424 lines (+125/-78)
3 files modified
nova/tests/volume_unittest.py (+59/-18)
nova/virt/libvirt_conn.py (+2/-1)
nova/volume/service.py (+64/-59)
To merge this branch: bzr merge lp:~vishvananda/nova/fix-exceptions
Reviewer Review Type Date Requested Status
Eric Day (community) Approve
Review via email: mp+31877@code.launchpad.net

Description of the change

More changes to volume to fix concurrency issues. Also testing updates.

volumes now store a set of available shelf/blades in the datastore, instead of depending on a filesystem glob. This avoids a race condition that occurs where two different volumes are attempting to export to the same location.

The general idea of pooled resources needs to be abstracted out into the datamodel. It is used for vpn ports and volumes now, and should be uses for ip addresses as well.

To post a comment you must log in.
Revision history for this message
Vish Ishaya (vishvananda) wrote :
Download full text (17.7 KiB)

Hmm, looks like there were some weird merge errors, Merged in trunk to fix.

Vish

On Thu, Aug 5, 2010 at 12:23 PM, vishvananda <email address hidden> wrote:

> vishvananda has proposed merging lp:~vishvananda/nova/fix-exceptions into
> lp:nova.
>
> Requested reviews:
> Nova Core (nova-core)
>
>
> More changes to volume to fix concurrency issues. Also testing updates.
>
> volumes now store a set of available shelf/blades in the datastore, instead
> of depending on a filesystem glob. This avoids a race condition that occurs
> where two different volumes are attempting to export to the same location.
>
> The general idea of pooled resources needs to be abstracted out into the
> datamodel. It is used for vpn ports and volumes now, and should be uses for
> ip addresses as well.
>
>
> --
> https://code.launchpad.net/~vishvananda/nova/fix-exceptions/+merge/31877
> You are the owner of lp:~vishvananda/nova/fix-exceptions.
>
> === modified file 'nova/tests/volume_unittest.py'
> --- nova/tests/volume_unittest.py 2010-07-31 02:33:07 +0000
> +++ nova/tests/volume_unittest.py 2010-08-05 19:23:40 +0000
> @@ -17,6 +17,10 @@
> # under the License.
>
> import logging
> +import shutil
> +import tempfile
> +
> +from twisted.internet import defer
>
> from nova import compute
> from nova import exception
> @@ -34,10 +38,16 @@
> super(VolumeTestCase, self).setUp()
> self.compute = compute.service.ComputeService()
> self.volume = None
> + self.tempdir = tempfile.mkdtemp()
> self.flags(connection_type='fake',
> - fake_storage=True)
> + fake_storage=True,
> + aoe_export_dir=self.tempdir)
> self.volume = volume_service.VolumeService()
>
> + def tearDown(self):
> + shutil.rmtree(self.tempdir)
> +
> + @defer.inlineCallbacks
> def test_run_create_volume(self):
> vol_size = '0'
> user_id = 'fake'
> @@ -48,34 +58,52 @@
> volume_service.get_volume(volume_id)['volume_id'])
>
> rv = self.volume.delete_volume(volume_id)
> +<<<<<<< TREE
> self.assertFailure(volume_service.get_volume(volume_id),
> exception.Error)
> +=======
> + self.assertRaises(exception.Error, volume_service.get_volume,
> volume_id)
> +>>>>>>> MERGE-SOURCE
>
> + @defer.inlineCallbacks
> def test_too_big_volume(self):
> vol_size = '1001'
> user_id = 'fake'
> project_id = 'fake'
> - self.assertRaises(TypeError,
> - self.volume.create_volume,
> - vol_size, user_id, project_id)
> + try:
> + yield self.volume.create_volume(vol_size, user_id, project_id)
> + self.fail("Should have thrown TypeError")
> + except TypeError:
> + pass
>
> + @defer.inlineCallbacks
> def test_too_many_volumes(self):
> vol_size = '1'
> user_id = 'fake'
> project_id = 'fake'
> num_shelves = FLAGS.last_shelf_id - FLAGS.first_shelf_id + 1
> - total_slots = FLAGS.slots_per_shelf * num_shelves
> + total_slots = FLAGS.blades_per_sh...

Revision history for this message
Eric Day (eday) wrote :

lgtm!

review: Approve
Revision history for this message
OpenStack Infra (hudson-openstack) wrote :

There is no resulting diff between lp:~vishvananda/nova/fix-exceptions and lp:nova.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'nova/tests/volume_unittest.py'
2--- nova/tests/volume_unittest.py 2010-07-31 02:33:07 +0000
3+++ nova/tests/volume_unittest.py 2010-08-06 22:56:00 +0000
4@@ -17,6 +17,10 @@
5 # under the License.
6
7 import logging
8+import shutil
9+import tempfile
10+
11+from twisted.internet import defer
12
13 from nova import compute
14 from nova import exception
15@@ -34,10 +38,16 @@
16 super(VolumeTestCase, self).setUp()
17 self.compute = compute.service.ComputeService()
18 self.volume = None
19+ self.tempdir = tempfile.mkdtemp()
20 self.flags(connection_type='fake',
21- fake_storage=True)
22+ fake_storage=True,
23+ aoe_export_dir=self.tempdir)
24 self.volume = volume_service.VolumeService()
25
26+ def tearDown(self):
27+ shutil.rmtree(self.tempdir)
28+
29+ @defer.inlineCallbacks
30 def test_run_create_volume(self):
31 vol_size = '0'
32 user_id = 'fake'
33@@ -48,34 +58,40 @@
34 volume_service.get_volume(volume_id)['volume_id'])
35
36 rv = self.volume.delete_volume(volume_id)
37- self.assertFailure(volume_service.get_volume(volume_id),
38- exception.Error)
39+ self.assertRaises(exception.Error, volume_service.get_volume, volume_id)
40
41+ @defer.inlineCallbacks
42 def test_too_big_volume(self):
43 vol_size = '1001'
44 user_id = 'fake'
45 project_id = 'fake'
46- self.assertRaises(TypeError,
47- self.volume.create_volume,
48- vol_size, user_id, project_id)
49+ try:
50+ yield self.volume.create_volume(vol_size, user_id, project_id)
51+ self.fail("Should have thrown TypeError")
52+ except TypeError:
53+ pass
54
55+ @defer.inlineCallbacks
56 def test_too_many_volumes(self):
57 vol_size = '1'
58 user_id = 'fake'
59 project_id = 'fake'
60 num_shelves = FLAGS.last_shelf_id - FLAGS.first_shelf_id + 1
61- total_slots = FLAGS.slots_per_shelf * num_shelves
62+ total_slots = FLAGS.blades_per_shelf * num_shelves
63 vols = []
64+ from nova import datastore
65+ redis = datastore.Redis.instance()
66 for i in xrange(total_slots):
67 vid = yield self.volume.create_volume(vol_size, user_id, project_id)
68 vols.append(vid)
69 self.assertFailure(self.volume.create_volume(vol_size,
70 user_id,
71 project_id),
72- volume_service.NoMoreVolumes)
73+ volume_service.NoMoreBlades)
74 for id in vols:
75 yield self.volume.delete_volume(id)
76
77+ @defer.inlineCallbacks
78 def test_run_attach_detach_volume(self):
79 # Create one volume and one compute to test with
80 instance_id = "storage-test"
81@@ -84,22 +100,26 @@
82 project_id = 'fake'
83 mountpoint = "/dev/sdf"
84 volume_id = yield self.volume.create_volume(vol_size, user_id, project_id)
85-
86 volume_obj = volume_service.get_volume(volume_id)
87 volume_obj.start_attach(instance_id, mountpoint)
88- rv = yield self.compute.attach_volume(volume_id,
89- instance_id,
90- mountpoint)
91+ if FLAGS.fake_tests:
92+ volume_obj.finish_attach()
93+ else:
94+ rv = yield self.compute.attach_volume(instance_id,
95+ volume_id,
96+ mountpoint)
97 self.assertEqual(volume_obj['status'], "in-use")
98- self.assertEqual(volume_obj['attachStatus'], "attached")
99+ self.assertEqual(volume_obj['attach_status'], "attached")
100 self.assertEqual(volume_obj['instance_id'], instance_id)
101 self.assertEqual(volume_obj['mountpoint'], mountpoint)
102
103- self.assertRaises(exception.Error,
104- self.volume.delete_volume,
105- volume_id)
106-
107- rv = yield self.volume.detach_volume(volume_id)
108+ self.assertFailure(self.volume.delete_volume(volume_id), exception.Error)
109+ volume_obj.start_detach()
110+ if FLAGS.fake_tests:
111+ volume_obj.finish_detach()
112+ else:
113+ rv = yield self.volume.detach_volume(instance_id,
114+ volume_id)
115 volume_obj = volume_service.get_volume(volume_id)
116 self.assertEqual(volume_obj['status'], "available")
117
118@@ -108,6 +128,27 @@
119 volume_service.get_volume,
120 volume_id)
121
122+ @defer.inlineCallbacks
123+ def test_multiple_volume_race_condition(self):
124+ vol_size = "5"
125+ user_id = "fake"
126+ project_id = 'fake'
127+ shelf_blades = []
128+ def _check(volume_id):
129+ vol = volume_service.get_volume(volume_id)
130+ shelf_blade = '%s.%s' % (vol['shelf_id'], vol['blade_id'])
131+ self.assert_(shelf_blade not in shelf_blades)
132+ shelf_blades.append(shelf_blade)
133+ logging.debug("got %s" % shelf_blade)
134+ vol.destroy()
135+ deferreds = []
136+ for i in range(5):
137+ d = self.volume.create_volume(vol_size, user_id, project_id)
138+ d.addCallback(_check)
139+ d.addErrback(self.fail)
140+ deferreds.append(d)
141+ yield defer.DeferredList(deferreds)
142+
143 def test_multi_node(self):
144 # TODO(termie): Figure out how to test with two nodes,
145 # each of them having a different FLAG for storage_node
146
147=== modified file 'nova/virt/libvirt_conn.py'
148--- nova/virt/libvirt_conn.py 2010-08-06 21:27:48 +0000
149+++ nova/virt/libvirt_conn.py 2010-08-06 22:56:00 +0000
150@@ -114,7 +114,8 @@
151 def _cleanup(self, instance):
152 target = os.path.abspath(instance.datamodel['basepath'])
153 logging.info("Deleting instance files at %s", target)
154- shutil.rmtree(target)
155+ if os.path.exists(target):
156+ shutil.rmtree(target)
157
158
159 @defer.inlineCallbacks
160
161=== modified file 'nova/volume/service.py'
162--- nova/volume/service.py 2010-07-31 02:33:07 +0000
163+++ nova/volume/service.py 2010-08-06 22:56:00 +0000
164@@ -22,12 +22,8 @@
165 Currently uses Ata-over-Ethernet.
166 """
167
168-import glob
169 import logging
170 import os
171-import shutil
172-import socket
173-import tempfile
174
175 from twisted.internet import defer
176
177@@ -47,9 +43,6 @@
178 'Name for the VG that will contain exported volumes')
179 flags.DEFINE_string('aoe_eth_dev', 'eth0',
180 'Which device to export the volumes on')
181-flags.DEFINE_string('storage_name',
182- socket.gethostname(),
183- 'name of this service')
184 flags.DEFINE_integer('first_shelf_id',
185 utils.last_octet(utils.get_my_ip()) * 10,
186 'AoE starting shelf_id for this service')
187@@ -59,9 +52,9 @@
188 flags.DEFINE_string('aoe_export_dir',
189 '/var/lib/vblade-persist/vblades',
190 'AoE directory where exports are created')
191-flags.DEFINE_integer('slots_per_shelf',
192+flags.DEFINE_integer('blades_per_shelf',
193 16,
194- 'Number of AoE slots per shelf')
195+ 'Number of AoE blades per shelf')
196 flags.DEFINE_string('storage_availability_zone',
197 'nova',
198 'availability zone of this service')
199@@ -69,7 +62,7 @@
200 'Should we make real storage volumes to attach?')
201
202
203-class NoMoreVolumes(exception.Error):
204+class NoMoreBlades(exception.Error):
205 pass
206
207 def get_volume(volume_id):
208@@ -77,8 +70,9 @@
209 volume_class = Volume
210 if FLAGS.fake_storage:
211 volume_class = FakeVolume
212- if datastore.Redis.instance().sismember('volumes', volume_id):
213- return volume_class(volume_id=volume_id)
214+ vol = volume_class.lookup(volume_id)
215+ if vol:
216+ return vol
217 raise exception.Error("Volume does not exist")
218
219 class VolumeService(service.Service):
220@@ -91,18 +85,9 @@
221 super(VolumeService, self).__init__()
222 self.volume_class = Volume
223 if FLAGS.fake_storage:
224- FLAGS.aoe_export_dir = tempfile.mkdtemp()
225 self.volume_class = FakeVolume
226 self._init_volume_group()
227
228- def __del__(self):
229- # TODO(josh): Get rid of this destructor, volumes destroy themselves
230- if FLAGS.fake_storage:
231- try:
232- shutil.rmtree(FLAGS.aoe_export_dir)
233- except Exception, err:
234- pass
235-
236 @defer.inlineCallbacks
237 @validate.rangetest(size=(0, 1000))
238 def create_volume(self, size, user_id, project_id):
239@@ -113,8 +98,6 @@
240 """
241 logging.debug("Creating volume of size: %s" % (size))
242 vol = yield self.volume_class.create(size, user_id, project_id)
243- datastore.Redis.instance().sadd('volumes', vol['volume_id'])
244- datastore.Redis.instance().sadd('volumes:%s' % (FLAGS.storage_name), vol['volume_id'])
245 logging.debug("restarting exports")
246 yield self._restart_exports()
247 defer.returnValue(vol['volume_id'])
248@@ -134,21 +117,19 @@
249 def delete_volume(self, volume_id):
250 logging.debug("Deleting volume with id of: %s" % (volume_id))
251 vol = get_volume(volume_id)
252- if vol['status'] == "attached":
253+ if vol['attach_status'] == "attached":
254 raise exception.Error("Volume is still attached")
255- if vol['node_name'] != FLAGS.storage_name:
256+ if vol['node_name'] != FLAGS.node_name:
257 raise exception.Error("Volume is not local to this node")
258 yield vol.destroy()
259- datastore.Redis.instance().srem('volumes', vol['volume_id'])
260- datastore.Redis.instance().srem('volumes:%s' % (FLAGS.storage_name), vol['volume_id'])
261 defer.returnValue(True)
262
263 @defer.inlineCallbacks
264 def _restart_exports(self):
265 if FLAGS.fake_storage:
266 return
267- yield process.simple_execute("sudo vblade-persist auto all")
268- # NOTE(vish): this command sometimes sends output to stderr for warnings
269+ # NOTE(vish): these commands sometimes sends output to stderr for warnings
270+ yield process.simple_execute("sudo vblade-persist auto all", error_ok=1)
271 yield process.simple_execute("sudo vblade-persist start all", error_ok=1)
272
273 @defer.inlineCallbacks
274@@ -172,14 +153,15 @@
275 return self.volume_id
276
277 def default_state(self):
278- return {"volume_id": self.volume_id}
279+ return {"volume_id": self.volume_id,
280+ "node_name": "unassigned"}
281
282 @classmethod
283 @defer.inlineCallbacks
284 def create(cls, size, user_id, project_id):
285 volume_id = utils.generate_uid('vol')
286 vol = cls(volume_id)
287- vol['node_name'] = FLAGS.storage_name
288+ vol['node_name'] = FLAGS.node_name
289 vol['size'] = size
290 vol['user_id'] = user_id
291 vol['project_id'] = project_id
292@@ -225,14 +207,31 @@
293 self['attach_status'] = "detached"
294 self.save()
295
296+ def save(self):
297+ is_new = self.is_new_record()
298+ super(Volume, self).save()
299+ if is_new:
300+ redis = datastore.Redis.instance()
301+ key = self.__devices_key
302+ # TODO(vish): these should be added by admin commands
303+ more = redis.scard(self._redis_association_name("node",
304+ self['node_name']))
305+ if (not redis.exists(key) and not more):
306+ for shelf_id in range(FLAGS.first_shelf_id,
307+ FLAGS.last_shelf_id + 1):
308+ for blade_id in range(FLAGS.blades_per_shelf):
309+ redis.sadd(key, "%s.%s" % (shelf_id, blade_id))
310+ self.associate_with("node", self['node_name'])
311+
312 @defer.inlineCallbacks
313 def destroy(self):
314- try:
315- yield self._remove_export()
316- except Exception as ex:
317- logging.debug("Ingnoring failure to remove export %s" % ex)
318- pass
319+ yield self._remove_export()
320 yield self._delete_lv()
321+ self.unassociate_with("node", self['node_name'])
322+ if self.get('shelf_id', None) and self.get('blade_id', None):
323+ redis = datastore.Redis.instance()
324+ key = self.__devices_key
325+ redis.sadd(key, "%s.%s" % (self['shelf_id'], self['blade_id']))
326 super(Volume, self).destroy()
327
328 @defer.inlineCallbacks
329@@ -244,66 +243,72 @@
330 yield process.simple_execute(
331 "sudo lvcreate -L %s -n %s %s" % (sizestr,
332 self['volume_id'],
333- FLAGS.volume_group))
334+ FLAGS.volume_group),
335+ error_ok=1)
336
337 @defer.inlineCallbacks
338 def _delete_lv(self):
339 yield process.simple_execute(
340 "sudo lvremove -f %s/%s" % (FLAGS.volume_group,
341- self['volume_id']))
342+ self['volume_id']), error_ok=1)
343+
344+ @property
345+ def __devices_key(self):
346+ return 'volume_devices:%s' % FLAGS.node_name
347
348 @defer.inlineCallbacks
349 def _setup_export(self):
350- (shelf_id, blade_id) = get_next_aoe_numbers()
351+ redis = datastore.Redis.instance()
352+ key = self.__devices_key
353+ device = redis.spop(key)
354+ if not device:
355+ raise NoMoreBlades()
356+ (shelf_id, blade_id) = device.split('.')
357 self['aoe_device'] = "e%s.%s" % (shelf_id, blade_id)
358 self['shelf_id'] = shelf_id
359 self['blade_id'] = blade_id
360 self.save()
361- yield self._exec_export()
362+ yield self._exec_setup_export()
363
364 @defer.inlineCallbacks
365- def _exec_export(self):
366+ def _exec_setup_export(self):
367 yield process.simple_execute(
368 "sudo vblade-persist setup %s %s %s /dev/%s/%s" %
369 (self['shelf_id'],
370 self['blade_id'],
371 FLAGS.aoe_eth_dev,
372 FLAGS.volume_group,
373- self['volume_id']))
374+ self['volume_id']), error_ok=1)
375
376 @defer.inlineCallbacks
377 def _remove_export(self):
378+ if not self.get('shelf_id', None) or not self.get('blade_id', None):
379+ defer.returnValue(False)
380+ yield self._exec_remove_export()
381+ defer.returnValue(True)
382+
383+ @defer.inlineCallbacks
384+ def _exec_remove_export(self):
385 yield process.simple_execute(
386 "sudo vblade-persist stop %s %s" % (self['shelf_id'],
387- self['blade_id']))
388+ self['blade_id']), error_ok=1)
389 yield process.simple_execute(
390 "sudo vblade-persist destroy %s %s" % (self['shelf_id'],
391- self['blade_id']))
392+ self['blade_id']), error_ok=1)
393+
394
395
396 class FakeVolume(Volume):
397 def _create_lv(self):
398 pass
399
400- def _exec_export(self):
401+ def _exec_setup_export(self):
402 fname = os.path.join(FLAGS.aoe_export_dir, self['aoe_device'])
403 f = file(fname, "w")
404 f.close()
405
406- def _remove_export(self):
407- pass
408+ def _exec_remove_export(self):
409+ os.unlink(os.path.join(FLAGS.aoe_export_dir, self['aoe_device']))
410
411 def _delete_lv(self):
412 pass
413-
414-def get_next_aoe_numbers():
415- for shelf_id in xrange(FLAGS.first_shelf_id, FLAGS.last_shelf_id + 1):
416- aoes = glob.glob("%s/e%s.*" % (FLAGS.aoe_export_dir, shelf_id))
417- if not aoes:
418- blade_id = 0
419- else:
420- blade_id = int(max([int(a.rpartition('.')[2]) for a in aoes])) + 1
421- if blade_id < FLAGS.slots_per_shelf:
422- logging.debug("Next shelf.blade is %s.%s", shelf_id, blade_id)
423- return (shelf_id, blade_id)
424- raise NoMoreVolumes()