Merge lp:~allenap/maas/rpc-resilience-when-advertising into lp:~maas-committers/maas/trunk

Proposed by Gavin Panella
Status: Merged
Approved by: Gavin Panella
Approved revision: no longer in the source branch.
Merged at revision: 3161
Proposed branch: lp:~allenap/maas/rpc-resilience-when-advertising
Merge into: lp:~maas-committers/maas/trunk
Diff against target: 104 lines (+47/-18)
2 files modified
src/maasserver/rpc/regionservice.py (+32/-3)
src/maasserver/rpc/tests/test_regionservice.py (+15/-15)
To merge this branch: bzr merge lp:~allenap/maas/rpc-resilience-when-advertising
Reviewer Review Type Date Requested Status
Graham Binns (community) Approve
Review via email: mp+236749@code.launchpad.net

Commit message

Do not be swayed from starting RegionAdvertisingService by mere errors. Fight the good fight until it works.

To post a comment you must log in.
Revision history for this message
Graham Binns (gmb) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/maasserver/rpc/regionservice.py'
2--- src/maasserver/rpc/regionservice.py 2014-09-30 13:32:27 +0000
3+++ src/maasserver/rpc/regionservice.py 2014-10-01 16:49:21 +0000
4@@ -62,6 +62,7 @@
5 asynchronous,
6 callOut,
7 deferWithTimeout,
8+ pause,
9 synchronous,
10 )
11 from twisted.application import service
12@@ -560,9 +561,11 @@
13
14 @asynchronous
15 def startService(self):
16- self.starting = deferToThread(self.prepare)
17- self.starting.addCallback(lambda ignore: (
18- super(RegionAdvertisingService, self).startService()))
19+ self.starting = self._prepareService()
20+
21+ def prepared(_):
22+ return super(RegionAdvertisingService, self).startService()
23+ self.starting.addCallback(prepared)
24
25 def ignore_cancellation(failure):
26 failure.trap(defer.CancelledError)
27@@ -585,6 +588,32 @@
28 self.starting.cancel()
29 return self.starting
30
31+ @inlineCallbacks
32+ def _prepareService(self):
33+ """Keep calling `prepare` until it works.
34+
35+ The call to `prepare` can sometimes fail, particularly after starting
36+ the region for the first time on a fresh MAAS installation, but the
37+ mechanism is not yet understood. We take a pragmatic stance and just
38+ keep trying until it works.
39+
40+ Each failure will be logged, and there will be a pause of 5 seconds
41+ between each attempt.
42+
43+ """
44+ while True:
45+ try:
46+ yield deferToThread(self.prepare)
47+ except defer.CancelledError:
48+ raise
49+ except Exception as e:
50+ log.err(e, (
51+ "Preparation of %s failed; will try again in "
52+ "5 seconds." % self.__class__.__name__))
53+ yield pause(5)
54+ else:
55+ break
56+
57 @synchronous
58 @synchronised(lock)
59 @transactional
60
61=== modified file 'src/maasserver/rpc/tests/test_regionservice.py'
62--- src/maasserver/rpc/tests/test_regionservice.py 2014-10-01 12:16:33 +0000
63+++ src/maasserver/rpc/tests/test_regionservice.py 2014-10-01 16:49:21 +0000
64@@ -1417,25 +1417,25 @@
65 @wait_for_reactor
66 def test_start_up_errors_are_logged(self):
67 service = RegionAdvertisingService()
68-
69+ # Prevent real pauses.
70+ self.patch_autospec(regionservice, "pause").return_value = None
71 # Ensure that service.prepare fails with a obvious error.
72 exception = ValueError("You don't vote for kings!")
73- self.patch(service, "prepare").side_effect = exception
74-
75- err_calls = []
76- self.patch(log, "err", err_calls.append)
77-
78- err_calls_expected = [
79- AfterPreprocessing(
80- (lambda failure: failure.value),
81- Is(exception)),
82- ]
83-
84- def check(ignore):
85- self.assertThat(err_calls, MatchesListwise(err_calls_expected))
86+ self.patch(service, "prepare").side_effect = [exception, None]
87+ # Capture all Twisted logs.
88+ logger = self.useFixture(TwistedLoggerFixture())
89+
90+ def check_logs(ignore):
91+ self.assertDocTestMatches(
92+ """\
93+ Preparation of ... failed; will try again in 5 seconds.
94+ Traceback (most recent call last):...
95+ Failure: exceptions.ValueError: You don't vote for kings!
96+ """,
97+ logger.dump())
98
99 service.startService()
100- service.starting.addCallback(check)
101+ service.starting.addCallback(check_logs)
102 return service.starting
103
104 @wait_for_reactor