Merge lp:~allenap/maas/rpc-rapid-connections--1.5 into lp:maas/1.5

Proposed by Gavin Panella on 2014-05-15
Status: Merged
Approved by: Gavin Panella on 2014-05-17
Approved revision: 2271
Merged at revision: 2272
Proposed branch: lp:~allenap/maas/rpc-rapid-connections--1.5
Merge into: lp:maas/1.5
Diff against target: 189 lines (+89/-46)
2 files modified
src/provisioningserver/rpc/clusterservice.py (+42/-12)
src/provisioningserver/rpc/tests/test_clusterservice.py (+47/-34)
To merge this branch: bzr merge lp:~allenap/maas/rpc-rapid-connections--1.5
Reviewer Review Type Date Requested Status
Gavin Panella (community) Approve on 2014-05-17
Review via email: mp+219676@code.launchpad.net

Commit message

Backport trunk r2309: Shorten the time taken for a cluster to initially connect to the region via RPC to around 2 seconds.

Previously it could take between 30 and 90 seconds.

To post a comment you must log in.
Gavin Panella (allenap) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'src/provisioningserver/rpc/clusterservice.py'
--- src/provisioningserver/rpc/clusterservice.py 2014-03-28 15:17:34 +0000
+++ src/provisioningserver/rpc/clusterservice.py 2014-05-15 10:42:28 +0000
@@ -221,9 +221,13 @@
221 instances connected to it.221 instances connected to it.
222 """222 """
223223
224 INTERVAL_LOW = 2 # seconds.
225 INTERVAL_MID = 10 # seconds.
226 INTERVAL_HIGH = 30 # seconds.
227
224 def __init__(self, reactor):228 def __init__(self, reactor):
225 super(ClusterClientService, self).__init__(229 super(ClusterClientService, self).__init__(
226 self._get_random_interval(), self.update)230 self._calculate_interval(None, None), self.update)
227 self.connections = {}231 self.connections = {}
228 self.clock = reactor232 self.clock = reactor
229233
@@ -248,9 +252,6 @@
248 This obtains a list of endpoints from the region then connects252 This obtains a list of endpoints from the region then connects
249 to new ones and drops connections to those no longer used.253 to new ones and drops connections to those no longer used.
250 """254 """
251 # 0. Update interval.
252 self._update_interval()
253 # 1. Obtain RPC endpoints.
254 try:255 try:
255 info_url = self._get_rpc_info_url()256 info_url = self._get_rpc_info_url()
256 info_page = yield getPage(info_url)257 info_page = yield getPage(info_url)
@@ -258,9 +259,13 @@
258 eventloops = info["eventloops"]259 eventloops = info["eventloops"]
259 yield self._update_connections(eventloops)260 yield self._update_connections(eventloops)
260 except ConnectError as error:261 except ConnectError as error:
262 self._update_interval(None, len(self.connections))
261 log.msg("Region not available: %s" % (error,))263 log.msg("Region not available: %s" % (error,))
262 except:264 except:
265 self._update_interval(None, len(self.connections))
263 log.err()266 log.err()
267 else:
268 self._update_interval(len(eventloops), len(self.connections))
264269
265 @staticmethod270 @staticmethod
266 def _get_rpc_info_url():271 def _get_rpc_info_url():
@@ -270,14 +275,39 @@
270 url = url.geturl()275 url = url.geturl()
271 return ascii_url(url)276 return ascii_url(url)
272277
273 @staticmethod278 def _calculate_interval(self, num_eventloops, num_connections):
274 def _get_random_interval():279 """Calculate the update interval.
275 """Return a random interval between 30 and 90 seconds."""280
276 return random.randint(30, 90)281 The interval is `INTERVAL_LOW` seconds when there are no
277282 connections, so that this can quickly obtain its first
278 def _update_interval(self):283 connection.
279 """Change the interval randomly to avoid stampedes of clusters."""284
280 self._loop.interval = self.step = self._get_random_interval()285 The interval changes to `INTERVAL_MID` seconds when there are
286 some connections, but fewer than there are event-loops.
287
288 After that it drops back to `INTERVAL_HIGH` seconds.
289 """
290 if num_eventloops is None:
291 # The region is not available; keep trying regularly.
292 return self.INTERVAL_LOW
293 elif num_eventloops == 0:
294 # The region is coming up; keep trying regularly.
295 return self.INTERVAL_LOW
296 elif num_connections == 0:
297 # No connections to the region; keep trying regularly.
298 return self.INTERVAL_LOW
299 elif num_connections < num_eventloops:
300 # Some connections to the region, but not to all event
301 # loops; keep updating reasonably frequently.
302 return self.INTERVAL_MID
303 else:
304 # Fully connected to the region; update every so often.
305 return self.INTERVAL_HIGH
306
307 def _update_interval(self, num_eventloops, num_connections):
308 """Change the update interval."""
309 self._loop.interval = self.step = self._calculate_interval(
310 num_eventloops, num_connections)
281311
282 @inlineCallbacks312 @inlineCallbacks
283 def _update_connections(self, eventloops):313 def _update_connections(self, eventloops):
284314
=== modified file 'src/provisioningserver/rpc/tests/test_clusterservice.py'
--- src/provisioningserver/rpc/tests/test_clusterservice.py 2014-03-28 04:31:32 +0000
+++ src/provisioningserver/rpc/tests/test_clusterservice.py 2014-05-15 10:42:28 +0000
@@ -254,40 +254,6 @@
254 observed_rpc_info_url = ClusterClientService._get_rpc_info_url()254 observed_rpc_info_url = ClusterClientService._get_rpc_info_url()
255 self.assertThat(observed_rpc_info_url, Equals(expected_rpc_info_url))255 self.assertThat(observed_rpc_info_url, Equals(expected_rpc_info_url))
256256
257 def test__get_random_interval(self):
258 # _get_random_interval() returns a random number between 30 and
259 # 90 inclusive.
260 is_between_30_and_90_inclusive = MatchesAll(
261 MatchesAny(GreaterThan(30), Equals(30)),
262 MatchesAny(LessThan(90), Equals(90)))
263 for _ in range(100):
264 self.assertThat(
265 ClusterClientService._get_random_interval(),
266 is_between_30_and_90_inclusive)
267
268 def test__get_random_interval_calls_into_standard_library(self):
269 # _get_random_interval() depends entirely on the standard library.
270 random = self.patch(clusterservice, "random")
271 random.randint.return_value = sentinel.randint
272 self.assertIs(
273 sentinel.randint,
274 ClusterClientService._get_random_interval())
275 self.assertThat(random.randint, MockCalledOnceWith(30, 90))
276
277 def test__update_interval(self):
278 service = ClusterClientService(Clock())
279 # ClusterClientService's superclass, TimerService, creates a
280 # LoopingCall with now=True. We neuter it here because we only
281 # want to observe the behaviour of _update_interval().
282 service.call = (lambda: None, (), {})
283 service.startService()
284 self.assertThat(service.step, MatchesAll(
285 Equals(service._loop.interval), IsInstance(int)))
286 service.step = service._loop.interval = sentinel.undefined
287 service._update_interval()
288 self.assertThat(service.step, MatchesAll(
289 Equals(service._loop.interval), IsInstance(int)))
290
291 def test_update_connect_error_is_logged_tersely(self):257 def test_update_connect_error_is_logged_tersely(self):
292 getPage = self.patch(clusterservice, "getPage")258 getPage = self.patch(clusterservice, "getPage")
293 getPage.side_effect = error.ConnectionRefusedError()259 getPage.side_effect = error.ConnectionRefusedError()
@@ -512,6 +478,53 @@
512 service.getClient)478 service.getClient)
513479
514480
481class TestClusterClientServiceIntervals(MAASTestCase):
482
483 scenarios = (
484 ("initial", {
485 "num_eventloops": None,
486 "num_connections": None,
487 "expected": ClusterClientService.INTERVAL_LOW,
488 }),
489 ("no-event-loops", {
490 "num_eventloops": 0,
491 "num_connections": sentinel.undefined,
492 "expected": ClusterClientService.INTERVAL_LOW,
493 }),
494 ("no-connections", {
495 "num_eventloops": 1, # anything > 1.
496 "num_connections": 0,
497 "expected": ClusterClientService.INTERVAL_LOW,
498 }),
499 ("fewer-connections-than-event-loops", {
500 "num_eventloops": 2, # anything > num_connections.
501 "num_connections": 1, # anything > 0.
502 "expected": ClusterClientService.INTERVAL_MID,
503 }),
504 ("default", {
505 "num_eventloops": 3, # same as num_connections.
506 "num_connections": 3, # same as num_eventloops.
507 "expected": ClusterClientService.INTERVAL_HIGH,
508 }),
509 )
510
511 def make_inert_client_service(self):
512 service = ClusterClientService(Clock())
513 # ClusterClientService's superclass, TimerService, creates a
514 # LoopingCall with now=True. We neuter it here to allow
515 # observation of the behaviour of _update_interval() for
516 # example.
517 service.call = (lambda: None, (), {})
518 return service
519
520 def test__calculate_interval(self):
521 service = self.make_inert_client_service()
522 service.startService()
523 self.assertEqual(
524 self.expected, service._calculate_interval(
525 self.num_eventloops, self.num_connections))
526
527
515class TestClusterClient(MAASTestCase):528class TestClusterClient(MAASTestCase):
516529
517 run_tests_with = AsynchronousDeferredRunTest.make_factory(timeout=5)530 run_tests_with = AsynchronousDeferredRunTest.make_factory(timeout=5)

Subscribers

People subscribed via source and target branches

to all changes: