Merge lp:~blake-rouse/maas/fix-1558785 into lp:~maas-committers/maas/trunk

Proposed by Blake Rouse
Status: Merged
Approved by: Blake Rouse
Approved revision: no longer in the source branch.
Merged at revision: 4805
Proposed branch: lp:~blake-rouse/maas/fix-1558785
Merge into: lp:~maas-committers/maas/trunk
Diff against target: 543 lines (+252/-30)
6 files modified
src/maasserver/middleware.py (+3/-3)
src/maasserver/tests/test_middleware.py (+11/-1)
src/maasserver/utils/orm.py (+78/-10)
src/maasserver/utils/tests/test_orm.py (+122/-9)
src/maasserver/utils/tests/test_views.py (+31/-0)
src/maasserver/utils/views.py (+7/-7)
To merge this branch: bzr merge lp:~blake-rouse/maas/fix-1558785
Reviewer Review Type Date Requested Status
Gavin Panella (community) Approve
Review via email: mp+289508@code.launchpad.net

Commit message

Retry deadlock detection failures just like serialization failures.

To post a comment you must log in.
Revision history for this message
Gavin Panella (allenap) wrote :

Cool!

review: Approve
Revision history for this message
MAAS Lander (maas-lander) wrote :

The attempt to merge lp:~blake-rouse/maas/fix-1558785 into lp:maas failed. Below is the output from the failed tests.

Hit:1 http://security.ubuntu.com/ubuntu xenial-security InRelease
Get:2 http://prodstack-zone-2.clouds.archive.ubuntu.com/ubuntu xenial InRelease [116 kB]
Hit:3 http://prodstack-zone-2.clouds.archive.ubuntu.com/ubuntu xenial-updates InRelease
Hit:4 http://prodstack-zone-2.clouds.archive.ubuntu.com/ubuntu xenial-backports InRelease
Get:5 http://prodstack-zone-2.clouds.archive.ubuntu.com/ubuntu xenial/main Sources [1,104 kB]
Get:6 http://prodstack-zone-2.clouds.archive.ubuntu.com/ubuntu xenial/universe Sources [7,510 kB]
Get:7 http://prodstack-zone-2.clouds.archive.ubuntu.com/ubuntu xenial/main amd64 Packages [1,435 kB]
Get:8 http://prodstack-zone-2.clouds.archive.ubuntu.com/ubuntu xenial/universe amd64 Packages [7,242 kB]
Get:9 http://prodstack-zone-2.clouds.archive.ubuntu.com/ubuntu xenial/universe Translation-en [4,185 kB]
Err:9 http://prodstack-zone-2.clouds.archive.ubuntu.com/ubuntu xenial/universe Translation-en
  Hash Sum mismatch
Fetched 21.6 MB in 1s (11.3 MB/s)
Reading package lists...

sudo: unable to resolve host juju-prod-cdo-maas-machine-3
W: Failed to fetch http://prodstack-zone-2.clouds.archive.ubuntu.com/ubuntu/dists/xenial/universe/i18n/Translation-en.xz Hash Sum mismatch
E: Some index files failed to download. They have been ignored, or old ones used instead.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/maasserver/middleware.py'
2--- src/maasserver/middleware.py 2016-03-15 20:25:31 +0000
3+++ src/maasserver/middleware.py 2016-03-18 14:47:09 +0000
4@@ -46,7 +46,7 @@
5 from maasserver.exceptions import MAASAPIException
6 from maasserver.models.node import RackController
7 from maasserver.rpc import getAllClients
8-from maasserver.utils.orm import is_serialization_failure
9+from maasserver.utils.orm import is_retryable_failure
10 from maasserver.views.combo import MERGE_VIEWS
11 from provisioningserver.rpc.exceptions import (
12 NoConnectionsAvailable,
13@@ -184,8 +184,8 @@
14 # Not a path we're handling exceptions for.
15 return None
16
17- if is_serialization_failure(exception):
18- # We never handle serialization failures.
19+ if is_retryable_failure(exception):
20+ # We never handle retryable failures.
21 return None
22
23 encoding = 'utf-8'
24
25=== modified file 'src/maasserver/tests/test_middleware.py'
26--- src/maasserver/tests/test_middleware.py 2016-02-02 14:20:45 +0000
27+++ src/maasserver/tests/test_middleware.py 2016-03-18 14:47:09 +0000
28@@ -42,7 +42,10 @@
29 from maasserver.testing import extract_redirect
30 from maasserver.testing.factory import factory
31 from maasserver.testing.testcase import MAASServerTestCase
32-from maasserver.utils.orm import make_serialization_failure
33+from maasserver.utils.orm import (
34+ make_deadlock_failure,
35+ make_serialization_failure,
36+)
37 from maastesting.matchers import MockCalledOnceWith
38 from maastesting.utils import sample_binary_data
39 from mock import Mock
40@@ -97,6 +100,13 @@
41 exception = make_serialization_failure()
42 self.assertIsNone(middleware.process_exception(request, exception))
43
44+ def test_ignores_deadlock_failures(self):
45+ base_path = self.make_base_path()
46+ middleware = self.make_middleware(base_path)
47+ request = factory.make_fake_request(base_path)
48+ exception = make_deadlock_failure()
49+ self.assertIsNone(middleware.process_exception(request, exception))
50+
51 def test_unknown_exception_generates_internal_server_error(self):
52 # An unknown exception generates an internal server error with the
53 # exception message.
54
55=== modified file 'src/maasserver/utils/orm.py'
56--- src/maasserver/utils/orm.py 2016-02-26 18:48:26 +0000
57+++ src/maasserver/utils/orm.py 2016-03-18 14:47:09 +0000
58@@ -13,13 +13,16 @@
59 'get_first',
60 'get_one',
61 'in_transaction',
62+ 'is_deadlock_failure',
63+ 'is_retryable_failure',
64 'is_serialization_failure',
65+ 'make_deadlock_failure',
66 'make_serialization_failure',
67 'post_commit',
68 'post_commit_do',
69 'psql_array',
70 'request_transaction_retry',
71- 'retry_on_serialization_failure',
72+ 'retry_on_retryable_failure',
73 'savepoint',
74 'TotallyDisconnected',
75 'transactional',
76@@ -66,7 +69,10 @@
77 from provisioningserver.utils.network import parse_integer
78 from provisioningserver.utils.twisted import callOut
79 import psycopg2
80-from psycopg2.errorcodes import SERIALIZATION_FAILURE
81+from psycopg2.errorcodes import (
82+ DEADLOCK_DETECTED,
83+ SERIALIZATION_FAILURE,
84+)
85 from twisted.internet.defer import Deferred
86
87
88@@ -211,6 +217,62 @@
89 return exception
90
91
92+class DeadlockFailure(psycopg2.OperationalError):
93+ """Explicit deadlock failure.
94+
95+ A real deadlock failure, arising out of psycopg2 (and thus signalled
96+ from the database) would *NOT* be an instance of this class. However, it
97+ is not obvious how to create a `psycopg2.OperationalError` with ``pgcode``
98+ set to `DEADLOCK_DETECTED` without subclassing. I suspect only the C
99+ interface can do that.
100+ """
101+ pgcode = DEADLOCK_DETECTED
102+
103+
104+def make_deadlock_failure():
105+ """Make a deadlock exception.
106+
107+ Artificially construct an exception that resembles what Django's ORM would
108+ raise when PostgreSQL fails a transaction because of a deadlock
109+ failure.
110+
111+ :returns: an instance of :py:class:`OperationalError` that will pass the
112+ `is_deadlock_failure` predicate.
113+ """
114+ exception = OperationalError()
115+ exception.__cause__ = DeadlockFailure()
116+ assert is_deadlock_failure(exception)
117+ return exception
118+
119+
120+def get_psycopg2_deadlock_exception(exception):
121+ """Return the root-cause if `exception` is a deadlock failure.
122+
123+ PostgreSQL sets a specific error code, "40P01", when a transaction breaks
124+ because of a deadlock failure.
125+
126+ :return: The underlying `psycopg2.Error` if it's a deadlock failure,
127+ or `None` if there isn't one.
128+ """
129+ exception = get_psycopg2_exception(exception)
130+ if exception is None:
131+ return None
132+ elif exception.pgcode == DEADLOCK_DETECTED:
133+ return exception
134+ else:
135+ return None
136+
137+
138+def is_deadlock_failure(exception):
139+ """Does `exception` represent a deadlock failure?
140+
141+ PostgreSQL sets a specific error code, "40P01", when a transaction breaks
142+ because of a deadlock failure. This is normally about the right time
143+ to try again.
144+ """
145+ return get_psycopg2_deadlock_exception(exception) is not None
146+
147+
148 def request_transaction_retry():
149 """Raise a serialization exception.
150
151@@ -223,6 +285,12 @@
152 raise make_serialization_failure()
153
154
155+def is_retryable_failure(exception):
156+ """Does `exception` represent a serialization or deadlock failure?"""
157+ return (
158+ is_serialization_failure(exception) or is_deadlock_failure(exception))
159+
160+
161 def gen_retry_intervals(base=0.01, rate=2.5, maximum=10.0):
162 """Generate retry intervals based on an exponential series.
163
164@@ -248,11 +316,11 @@
165 """Do nothing."""
166
167
168-def retry_on_serialization_failure(func, reset=noop):
169- """Retry the wrapped function when it raises a serialization failure.
170+def retry_on_retryable_failure(func, reset=noop):
171+ """Retry the wrapped function when it raises a retryable failure.
172
173 It will call `func` a maximum of ten times, and will only retry if a
174- serialization failure is detected.
175+ retryable failure is detected.
176
177 BE CAREFUL WHERE YOU USE THIS.
178
179@@ -263,8 +331,8 @@
180
181 :param reset: An optional callable that will be called between attempts.
182 It is *not* called before the first attempt. If the last attempt fails
183- with a serialization failure it will *not* be called. If an attempt
184- fails with a non-serialization failure, it will *not* be called.
185+ with a retryable failure it will *not* be called. If an attempt
186+ fails with a non-retryable failure, it will *not* be called.
187
188 """
189 @wraps(func)
190@@ -274,7 +342,7 @@
191 try:
192 return func(*args, **kwargs)
193 except OperationalError as error:
194- if is_serialization_failure(error):
195+ if is_retryable_failure(error):
196 reset() # Which may do nothing.
197 sleep(next(intervals))
198 else:
199@@ -417,10 +485,10 @@
200 happy, especially in the test suite.
201
202 In addition, if `func` is being invoked from outside of a transaction,
203- this will retry if it fails with a serialization failure.
204+ this will retry if it fails with a retryable failure.
205 """
206 func_within_txn = transaction.atomic(func) # For savepoints.
207- func_outside_txn = retry_on_serialization_failure(
208+ func_outside_txn = retry_on_retryable_failure(
209 func_within_txn, reset=post_commit_hooks.reset)
210
211 @wraps(func)
212
213=== modified file 'src/maasserver/utils/tests/test_orm.py'
214--- src/maasserver/utils/tests/test_orm.py 2016-02-10 20:24:35 +0000
215+++ src/maasserver/utils/tests/test_orm.py 2016-03-18 14:47:09 +0000
216@@ -37,9 +37,12 @@
217 get_first,
218 get_model_object_name,
219 get_one,
220+ get_psycopg2_deadlock_exception,
221 get_psycopg2_exception,
222 get_psycopg2_serialization_exception,
223 in_transaction,
224+ is_deadlock_failure,
225+ is_retryable_failure,
226 is_serialization_failure,
227 make_serialization_failure,
228 post_commit,
229@@ -47,7 +50,7 @@
230 post_commit_hooks,
231 psql_array,
232 request_transaction_retry,
233- retry_on_serialization_failure,
234+ retry_on_retryable_failure,
235 savepoint,
236 TotallyDisconnected,
237 validate_in_transaction,
238@@ -74,7 +77,10 @@
239 DeferredValue,
240 )
241 import psycopg2
242-from psycopg2.errorcodes import SERIALIZATION_FAILURE
243+from psycopg2.errorcodes import (
244+ DEADLOCK_DETECTED,
245+ SERIALIZATION_FAILURE,
246+)
247 from testtools import ExpectedException
248 from testtools.deferredruntest import extract_result
249 from testtools.matchers import (
250@@ -226,11 +232,16 @@
251 exception = factory.make_exception()
252 self.assertIsNone(get_psycopg2_serialization_exception(exception))
253
254- def test__returns_psycopg2_error_root_cause(self):
255+ def test__returns_psycopg2_error_root_cause_for_serialization(self):
256 exception = Exception()
257 exception.__cause__ = orm.SerializationFailure()
258 self.assertIs(exception.__cause__, get_psycopg2_exception(exception))
259
260+ def test__returns_psycopg2_error_root_cause_for_deadlock(self):
261+ exception = Exception()
262+ exception.__cause__ = orm.DeadlockFailure()
263+ self.assertIs(exception.__cause__, get_psycopg2_exception(exception))
264+
265
266 class TestGetPsycopg2SerializationException(MAASTestCase):
267 """Tests for `get_psycopg2_serialization_exception`."""
268@@ -251,6 +262,25 @@
269 get_psycopg2_serialization_exception(exception))
270
271
272+class TestGetPsycopg2DeadlockException(MAASTestCase):
273+ """Tests for `get_psycopg2_deadlock_exception`."""
274+
275+ def test__returns_None_for_plain_psycopg2_error(self):
276+ exception = psycopg2.Error()
277+ self.assertIsNone(get_psycopg2_deadlock_exception(exception))
278+
279+ def test__returns_None_for_other_error(self):
280+ exception = factory.make_exception()
281+ self.assertIsNone(get_psycopg2_deadlock_exception(exception))
282+
283+ def test__returns_psycopg2_error_root_cause(self):
284+ exception = Exception()
285+ exception.__cause__ = orm.DeadlockFailure()
286+ self.assertIs(
287+ exception.__cause__,
288+ get_psycopg2_deadlock_exception(exception))
289+
290+
291 class TestIsSerializationFailure(SerializationFailureTestCase):
292 """Tests relating to MAAS's use of SERIALIZABLE isolation."""
293
294@@ -281,7 +311,75 @@
295 self.assertFalse(is_serialization_failure(error))
296
297
298-class TestRetryOnSerializationFailure(SerializationFailureTestCase):
299+class TestIsDeadlockFailure(MAASTestCase):
300+ """Tests relating to MAAS's use of catching deadlock failures."""
301+
302+ def test_detects_operational_error_with_matching_cause(self):
303+ error = orm.make_deadlock_failure()
304+ self.assertTrue(is_deadlock_failure(error))
305+
306+ def test_rejects_operational_error_without_matching_cause(self):
307+ error = OperationalError()
308+ cause = self.patch(error, "__cause__", Exception())
309+ cause.pgcode = factory.make_name("pgcode")
310+ self.assertFalse(is_deadlock_failure(error))
311+
312+ def test_rejects_operational_error_with_unrelated_cause(self):
313+ error = OperationalError()
314+ error.__cause__ = Exception()
315+ self.assertFalse(is_deadlock_failure(error))
316+
317+ def test_rejects_operational_error_without_cause(self):
318+ error = OperationalError()
319+ self.assertFalse(is_deadlock_failure(error))
320+
321+ def test_rejects_non_operational_error_with_matching_cause(self):
322+ error = factory.make_exception()
323+ cause = self.patch(error, "__cause__", Exception())
324+ cause.pgcode = DEADLOCK_DETECTED
325+ self.assertFalse(is_deadlock_failure(error))
326+
327+
328+class TestIsRetryableFailure(MAASTestCase):
329+ """Tests relating to MAAS's use of catching retryable failures."""
330+
331+ def test_detects_serialization_failure(self):
332+ error = orm.make_serialization_failure()
333+ self.assertTrue(is_retryable_failure(error))
334+
335+ def test_detects_deadlock_failure(self):
336+ error = orm.make_deadlock_failure()
337+ self.assertTrue(is_retryable_failure(error))
338+
339+ def test_rejects_operational_error_without_matching_cause(self):
340+ error = OperationalError()
341+ cause = self.patch(error, "__cause__", Exception())
342+ cause.pgcode = factory.make_name("pgcode")
343+ self.assertFalse(is_retryable_failure(error))
344+
345+ def test_rejects_operational_error_with_unrelated_cause(self):
346+ error = OperationalError()
347+ error.__cause__ = Exception()
348+ self.assertFalse(is_retryable_failure(error))
349+
350+ def test_rejects_operational_error_without_cause(self):
351+ error = OperationalError()
352+ self.assertFalse(is_retryable_failure(error))
353+
354+ def test_rejects_non_operational_error_with_cause_serialization(self):
355+ error = factory.make_exception()
356+ cause = self.patch(error, "__cause__", Exception())
357+ cause.pgcode = SERIALIZATION_FAILURE
358+ self.assertFalse(is_retryable_failure(error))
359+
360+ def test_rejects_non_operational_error_with_cause_deadlock(self):
361+ error = factory.make_exception()
362+ cause = self.patch(error, "__cause__", Exception())
363+ cause.pgcode = DEADLOCK_DETECTED
364+ self.assertFalse(is_retryable_failure(error))
365+
366+
367+class TestRetryOnRetryableFailure(SerializationFailureTestCase):
368
369 def make_mock_function(self):
370 function_name = factory.make_name("function")
371@@ -291,7 +389,7 @@
372 def test_retries_on_serialization_failure(self):
373 function = self.make_mock_function()
374 function.side_effect = self.cause_serialization_failure
375- function_wrapped = retry_on_serialization_failure(function)
376+ function_wrapped = retry_on_retryable_failure(function)
377 self.assertRaises(OperationalError, function_wrapped)
378 expected_calls = [call()] * 10
379 self.assertThat(function, MockCallsMatch(*expected_calls))
380@@ -301,13 +399,28 @@
381 OperationalError, self.cause_serialization_failure)
382 function = self.make_mock_function()
383 function.side_effect = [serialization_error, sentinel.result]
384- function_wrapped = retry_on_serialization_failure(function)
385+ function_wrapped = retry_on_retryable_failure(function)
386+ self.assertEqual(sentinel.result, function_wrapped())
387+ self.assertThat(function, MockCallsMatch(call(), call()))
388+
389+ def test_retries_on_deadlock_failure(self):
390+ function = self.make_mock_function()
391+ function.side_effect = orm.make_deadlock_failure()
392+ function_wrapped = retry_on_retryable_failure(function)
393+ self.assertRaises(OperationalError, function_wrapped)
394+ expected_calls = [call()] * 10
395+ self.assertThat(function, MockCallsMatch(*expected_calls))
396+
397+ def test_retries_on_deadlock_failure_until_successful(self):
398+ function = self.make_mock_function()
399+ function.side_effect = [orm.make_deadlock_failure(), sentinel.result]
400+ function_wrapped = retry_on_retryable_failure(function)
401 self.assertEqual(sentinel.result, function_wrapped())
402 self.assertThat(function, MockCallsMatch(call(), call()))
403
404 def test_passes_args_to_wrapped_function(self):
405 function = lambda a, b: (a, b)
406- function_wrapped = retry_on_serialization_failure(function)
407+ function_wrapped = retry_on_retryable_failure(function)
408 self.assertEqual(
409 (sentinel.a, sentinel.b),
410 function_wrapped(sentinel.a, b=sentinel.b))
411@@ -316,7 +429,7 @@
412 reset = Mock()
413 function = self.make_mock_function()
414 function.side_effect = self.cause_serialization_failure
415- function_wrapped = retry_on_serialization_failure(function, reset)
416+ function_wrapped = retry_on_retryable_failure(function, reset)
417 self.assertRaises(OperationalError, function_wrapped)
418 expected_function_calls = [call()] * 10
419 self.expectThat(function, MockCallsMatch(*expected_function_calls))
420@@ -328,7 +441,7 @@
421 reset = Mock()
422 function = self.make_mock_function()
423 function.return_value = sentinel.all_is_okay
424- function_wrapped = retry_on_serialization_failure(function, reset)
425+ function_wrapped = retry_on_retryable_failure(function, reset)
426 function_wrapped()
427 self.assertThat(reset, MockNotCalled())
428
429
430=== modified file 'src/maasserver/utils/tests/test_views.py'
431--- src/maasserver/utils/tests/test_views.py 2016-02-24 15:42:58 +0000
432+++ src/maasserver/utils/tests/test_views.py 2016-03-18 14:47:09 +0000
433@@ -32,6 +32,7 @@
434 )
435 from maasserver.utils import views
436 from maasserver.utils.orm import (
437+ make_deadlock_failure,
438 post_commit_hooks,
439 validate_in_transaction,
440 )
441@@ -283,6 +284,20 @@
442 self.assertThat(
443 response, IsInstance(HttpResponseConflict))
444
445+ def test__get_response_catches_deadlock_failures(self):
446+ get_response = self.patch(WSGIHandler, "get_response")
447+ get_response.side_effect = make_deadlock_failure()
448+
449+ handler = views.WebApplicationHandler(1)
450+ request = make_request()
451+ request.path = factory.make_name("path")
452+ response = handler.get_response(request)
453+
454+ self.assertThat(
455+ get_response, MockCalledOnceWith(request))
456+ self.assertThat(
457+ response, IsInstance(HttpResponseConflict))
458+
459 def test__get_response_sends_signal_on_serialization_failures(self):
460 get_response = self.patch(WSGIHandler, "get_response")
461 get_response.side_effect = (
462@@ -300,6 +315,22 @@
463 send_request_exception, MockCalledOnceWith(
464 sender=views.WebApplicationHandler, request=request))
465
466+ def test__get_response_sends_signal_on_deadlock_failures(self):
467+ get_response = self.patch(WSGIHandler, "get_response")
468+ get_response.side_effect = make_deadlock_failure()
469+
470+ send_request_exception = self.patch_autospec(
471+ signals.got_request_exception, "send")
472+
473+ handler = views.WebApplicationHandler(1)
474+ request = make_request()
475+ request.path = factory.make_name("path")
476+ handler.get_response(request)
477+
478+ self.assertThat(
479+ send_request_exception, MockCalledOnceWith(
480+ sender=views.WebApplicationHandler, request=request))
481+
482 def test__get_response_tries_only_once(self):
483 get_response = self.patch(WSGIHandler, "get_response")
484 get_response.return_value = sentinel.response
485
486=== modified file 'src/maasserver/utils/views.py'
487--- src/maasserver/utils/views.py 2016-02-24 15:42:58 +0000
488+++ src/maasserver/utils/views.py 2016-03-18 14:47:09 +0000
489@@ -21,7 +21,7 @@
490 from django.template.response import SimpleTemplateResponse
491 from maasserver.utils.orm import (
492 gen_retry_intervals,
493- is_serialization_failure,
494+ is_retryable_failure,
495 post_commit_hooks,
496 )
497 from piston3.authentication import initialize_server_request
498@@ -126,7 +126,7 @@
499 :ivar __retry_timeout: The number of seconds after which this request will
500 no longer be considered for a retry.
501 :ivar __retry: A weak set containing responses that have been generated as
502- a result of a serialization failure.
503+ a result of a retryable failure.
504 """
505
506 def __init__(self, attempts=10, timeout=90.0):
507@@ -139,15 +139,15 @@
508 def handle_uncaught_exception(self, request, resolver, exc_info):
509 """Override `BaseHandler.handle_uncaught_exception`.
510
511- If a serialization failure is detected, a retry is requested. It's up
512+ If a retryable failure is detected, a retry is requested. It's up
513 to ``get_response`` to actually do the retry.
514 """
515 upcall = super(WebApplicationHandler, self).handle_uncaught_exception
516 response = upcall(request, resolver, exc_info)
517 # Add it to the retry set if this response was caused by a
518- # serialization failure.
519+ # retryable failure.
520 exc_type, exc_value, exc_traceback = exc_info
521- if is_serialization_failure(exc_value):
522+ if is_retryable_failure(exc_value):
523 self.__retry.add(response)
524 else:
525 # Log the error to the regiond.log.
526@@ -155,7 +155,7 @@
527 exc_value=exc_value, exc_type=exc_type, exc_tb=exc_traceback)
528 log.err(failure, _why="500 Error - %s" % request.path)
529 # Return the response regardless. This means that we'll get Django's
530- # error page when there's a persistent serialization failure.
531+ # error page when there's a persistent retryable failure.
532 return response
533
534 def make_view_atomic(self, view):
535@@ -193,7 +193,7 @@
536
537 def get_response(request):
538 # Up-call to Django's get_response() in a transaction. This
539- # transaction may fail because of a serialization conflict, so
540+ # transaction may fail because of a retryable conflict, so
541 # pass errors to handle_uncaught_exception().
542 try:
543 with post_commit_hooks: