Merge lp:~julian-edwards/maas/race-for-staticip-bug-1387262 into lp:~maas-committers/maas/trunk

Proposed by Julian Edwards
Status: Merged
Approved by: Julian Edwards
Approved revision: no longer in the source branch.
Merged at revision: 3337
Proposed branch: lp:~julian-edwards/maas/race-for-staticip-bug-1387262
Merge into: lp:~maas-committers/maas/trunk
Diff against target: 128 lines (+50/-11)
3 files modified
src/maasserver/locks.py (+3/-0)
src/maasserver/models/staticipaddress.py (+22/-11)
src/maasserver/models/tests/test_staticipaddress.py (+25/-0)
To merge this branch: bzr merge lp:~julian-edwards/maas/race-for-staticip-bug-1387262
Reviewer Review Type Date Requested Status
Graham Binns (community) Approve
Newell Jensen (community) Approve
Review via email: mp+240669@code.launchpad.net

Commit message

Use a lock in the critical section of the code that finds and records a new static IP address in the database. Some users have reported errors in the PG log file complaining about inserting duplicates, so there must be a race.

Description of the change

With reference to the attached bug, it looks like we need a lock around the section of code that acquires static IP addresses. At least, I can't see any other reason for errors like this in the PG log:

2014-10-28 15:40:35 UTC ERROR: duplicate key value violates unique constraint "maasserver_staticipaddress_ip_key"
2014-10-28 15:40:35 UTC DETAIL: Key (ip)=(10.245.0.201) already exists.
2014-10-28 15:40:35 UTC STATEMENT: INSERT INTO "maasserver_staticipaddress" ("created", "updated", "ip", "alloc_type", "user_id") VALUES ('2014-10-28 15:40:32.431728', '2014-10-28 15:40:32.431728', '10.245.0.201', 0, NULL) RETURNING "maasserver_staticipaddress"."id"

However the error that bubbles up to the appserver is a little odd (see the bug), you'd think it would be an IntegrityError. Maybe it's a consequence of the atomic context manager used in Node.claim_static_ip_addresses() ?

To post a comment you must log in.
Revision history for this message
Newell Jensen (newell-jensen) wrote :

Approve. One question inline.

review: Approve
Revision history for this message
Julian Edwards (julian-edwards) wrote :

On Wednesday 05 Nov 2014 04:22:23 you wrote:
> I assume you are using two calls to assertThat instead of expectThat because
> if the lock was not acquired then you know it wouldn't have been released,
> correct? I.e., you can't get the second test to error unless the first one
> passes. This stood out to me and I had to ask because I know you love
> expectThat!

Correct :)

Revision history for this message
Graham Binns (gmb) wrote :

LGTM. I wondered for a second whether we needed a lock in _attempt_allocation(), too, but I see upon looking at the code that we already handle IntegrityErrors there.

Nice work that man.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/maasserver/locks.py'
2--- src/maasserver/locks.py 2014-10-30 00:02:48 +0000
3+++ src/maasserver/locks.py 2014-11-05 03:59:43 +0000
4@@ -43,3 +43,6 @@
5
6 # Lock to prevent concurrent acquisition of nodes.
7 node_acquire = DatabaseXactLock(7)
8+
9+# Lock to prevent concurrent allocation of StaticIPAddress
10+staticip_acquire = DatabaseXactLock(8)
11
12=== modified file 'src/maasserver/models/staticipaddress.py'
13--- src/maasserver/models/staticipaddress.py 2014-10-21 11:35:32 +0000
14+++ src/maasserver/models/staticipaddress.py 2014-11-05 03:59:43 +0000
15@@ -35,7 +35,10 @@
16 IntegerField,
17 Manager,
18 )
19-from maasserver import DefaultMeta
20+from maasserver import (
21+ DefaultMeta,
22+ locks,
23+ )
24 from maasserver.enum import IPADDRESS_TYPE
25 from maasserver.exceptions import (
26 StaticIPAddressExhaustion,
27@@ -146,6 +149,22 @@
28 static_range = IPRange(range_low, range_high)
29
30 if requested_address is None:
31+ return self._find_free_ip(
32+ range_low, range_high, static_range, alloc_type, user)
33+ else:
34+ requested_address = IPAddress(requested_address)
35+ if requested_address not in static_range:
36+ raise StaticIPAddressOutOfRange(
37+ "%s is not inside the range %s to %s" % (
38+ requested_address.format(), range_low.format(),
39+ range_high.format()))
40+ return self._attempt_allocation(
41+ requested_address, alloc_type, user)
42+
43+ def _find_free_ip(self, range_low, range_high, static_range, alloc_type,
44+ user):
45+ """Helper function that finds a free IP address using a lock."""
46+ with locks.staticip_acquire:
47 # The set of _allocated_ addresses in the range is going to be
48 # smaller or at least no bigger than the set of addresses in the
49 # whole range, so we materialise a Python set of only allocated
50@@ -172,19 +191,11 @@
51 except StaticIPAddressUnavailable:
52 # That address has been taken since we obtained the
53 # list of existing addresses from the database. This
54- # is a race!
55+ # is a race! It also ought to be impossible as
56+ # this critical section is in a lock...
57 continue
58 else:
59 raise StaticIPAddressExhaustion()
60- else:
61- requested_address = IPAddress(requested_address)
62- if requested_address not in static_range:
63- raise StaticIPAddressOutOfRange(
64- "%s is not inside the range %s to %s" % (
65- requested_address.format(), range_low.format(),
66- range_high.format()))
67- return self._attempt_allocation(
68- requested_address, alloc_type, user)
69
70 def _deallocate(self, filter):
71 """Helper func to deallocate the records in the supplied queryset
72
73=== modified file 'src/maasserver/models/tests/test_staticipaddress.py'
74--- src/maasserver/models/tests/test_staticipaddress.py 2014-09-19 03:12:47 +0000
75+++ src/maasserver/models/tests/test_staticipaddress.py 2014-11-05 03:59:43 +0000
76@@ -16,6 +16,7 @@
77
78
79 from django.core.exceptions import ValidationError
80+from maasserver import locks
81 from maasserver.enum import IPADDRESS_TYPE
82 from maasserver.exceptions import (
83 StaticIPAddressExhaustion,
84@@ -25,6 +26,10 @@
85 from maasserver.models.staticipaddress import StaticIPAddress
86 from maasserver.testing.factory import factory
87 from maasserver.testing.testcase import MAASServerTestCase
88+from maastesting.matchers import (
89+ MockCalledOnceWith,
90+ MockNotCalled,
91+ )
92 from mock import sentinel
93 from netaddr import (
94 IPAddress,
95@@ -105,6 +110,17 @@
96 StaticIPAddressUnavailable, StaticIPAddress.objects.allocate_new,
97 low, high, requested_address=requested_address)
98
99+ def test_allocate_new_does_not_use_lock_for_requested_ip(self):
100+ # When requesting a specific IP address, there's no need to
101+ # acquire the lock.
102+ lock = self.patch(locks, 'staticip_acquire')
103+ low, high = factory.make_ip_range()
104+ requested_address = low + 1
105+ ipaddress = StaticIPAddress.objects.allocate_new(
106+ low, high, requested_address=requested_address)
107+ self.assertIsInstance(ipaddress, StaticIPAddress)
108+ self.assertThat(lock.__enter__, MockNotCalled())
109+
110 def test_allocate_new_raises_when_requested_IP_out_of_range(self):
111 low, high = factory.make_ip_range()
112 requested_address = low - 1
113@@ -134,6 +150,15 @@
114 "IP address type 12345 is not a member of IPADDRESS_TYPE.",
115 unicode(error))
116
117+ def test_allocate_new_uses_staticip_acquire_lock(self):
118+ lock = self.patch(locks, 'staticip_acquire')
119+ low, high = factory.make_ip_range()
120+ ipaddress = StaticIPAddress.objects.allocate_new(low, high)
121+ self.assertIsInstance(ipaddress, StaticIPAddress)
122+ self.assertThat(lock.__enter__, MockCalledOnceWith())
123+ self.assertThat(
124+ lock.__exit__, MockCalledOnceWith(None, None, None))
125+
126 def test_deallocate_by_node_removes_addresses(self):
127 node = factory.make_Node()
128 [mac1, mac2] = [