Merge lp:~blake-rouse/maas/power-query-multiple into lp:~maas-committers/maas/trunk
- power-query-multiple
- Merge into trunk
Status: | Merged |
---|---|
Approved by: | Blake Rouse |
Approved revision: | no longer in the source branch. |
Merged at revision: | 3274 |
Proposed branch: | lp:~blake-rouse/maas/power-query-multiple |
Merge into: | lp:~maas-committers/maas/trunk |
Diff against target: |
232 lines (+98/-30) 4 files modified
src/provisioningserver/pserv_services/node_power_monitor_service.py (+4/-1) src/provisioningserver/pserv_services/tests/test_node_power_monitor_service.py (+9/-2) src/provisioningserver/rpc/power.py (+47/-24) src/provisioningserver/rpc/tests/test_power.py (+38/-3) |
To merge this branch: | bzr merge lp:~blake-rouse/maas/power-query-multiple |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Gavin Panella (community) | Approve | ||
Blake Rouse (community) | Abstain | ||
Review via email: mp+237499@code.launchpad.net |
Commit message
Run 5 queries in parallel, when querying the power status of nodes.
Description of the change
Julian Edwards (julian-edwards) wrote : | # |
Blake Rouse (blake-rouse) wrote : | # |
I have updated the branch to stop the blocking on just the oldest defer, if you could give it another test that would be great.
Blake Rouse (blake-rouse) wrote : | # |
Haha. Sorry about the "Approve" I must press that to much. ;-)
Blake Rouse (blake-rouse) : | # |
Julian Edwards (julian-edwards) wrote : | # |
On Friday 10 Oct 2014 12:34:47 you wrote:
> Haha. Sorry about the "Approve" I must press that to much. ;-)
I laughed :)
Julian Edwards (julian-edwards) wrote : | # |
On Friday 10 Oct 2014 12:34:07 you wrote:
> I think I will keep it the way it is, because it allows me to make the log
> message type different per exception and the message different for each. I
You could still do that with the single callback, FWIW.
> choose not to catch general exceptions, do you think this is a good idea?
> Or should I just catch all exceptions and maaslog as an unknown error
> printing the exception message.
I think the service has a general catch-all at the top doesn't it? Remember
that the service stops completely if there's any exception, so make sure
that's the case. I think it's fine to omit the catch-all here if there's one
at the top of the service.
Right, I'll go and test this now, the code looks much nicer now with the
semaphore.
Julian Edwards (julian-edwards) : | # |
Julian Edwards (julian-edwards) wrote : | # |
Ok tested and it doesn't work. I manually powered up a node and it was not reflected in the UI.
Julian Edwards (julian-edwards) wrote : | # |
(And I have a good test rig here, I intentionally left power disconnected from some of the BMCs so I have a mix of Microservers with power, some without and some NUCs)
Blake Rouse (blake-rouse) wrote : | # |
There is an issue using semaphore.run so I went back to using acquire and release.
I was able to test this, and it is now working correctly.
Gavin Panella (allenap) wrote : | # |
Not a full review yet, but some questions.
Blake Rouse (blake-rouse) : | # |
Gavin Panella (allenap) wrote : | # |
Comment about handling failures.
Gavin Panella (allenap) wrote : | # |
Info about cooperate/
Gavin Panella (allenap) wrote : | # |
You may also need http://
Gavin Panella (allenap) wrote : | # |
Applying http://
Preview Diff
1 | === modified file 'src/provisioningserver/pserv_services/node_power_monitor_service.py' | |||
2 | --- src/provisioningserver/pserv_services/node_power_monitor_service.py 2014-08-27 09:41:37 +0000 | |||
3 | +++ src/provisioningserver/pserv_services/node_power_monitor_service.py 2014-10-20 21:07:30 +0000 | |||
4 | @@ -43,6 +43,7 @@ | |||
5 | 43 | """Service to monitor the power status of all nodes in this cluster.""" | 43 | """Service to monitor the power status of all nodes in this cluster.""" |
6 | 44 | 44 | ||
7 | 45 | check_interval = timedelta(minutes=5).total_seconds() | 45 | check_interval = timedelta(minutes=5).total_seconds() |
8 | 46 | max_nodes_at_once = 5 | ||
9 | 46 | 47 | ||
10 | 47 | def __init__(self, cluster_uuid, clock=None): | 48 | def __init__(self, cluster_uuid, clock=None): |
11 | 48 | # Call self.query_nodes() every self.check_interval. | 49 | # Call self.query_nodes() every self.check_interval. |
12 | @@ -92,4 +93,6 @@ | |||
13 | 92 | uuid) | 93 | uuid) |
14 | 93 | else: | 94 | else: |
15 | 94 | node_power_parameters = response['nodes'] | 95 | node_power_parameters = response['nodes'] |
17 | 95 | yield query_all_nodes(node_power_parameters, clock=self.clock) | 96 | yield query_all_nodes( |
18 | 97 | node_power_parameters, | ||
19 | 98 | max_concurrency=self.max_nodes_at_once, clock=self.clock) | ||
20 | 96 | 99 | ||
21 | === modified file 'src/provisioningserver/pserv_services/tests/test_node_power_monitor_service.py' | |||
22 | --- src/provisioningserver/pserv_services/tests/test_node_power_monitor_service.py 2014-08-27 09:41:37 +0000 | |||
23 | +++ src/provisioningserver/pserv_services/tests/test_node_power_monitor_service.py 2014-10-20 21:07:30 +0000 | |||
24 | @@ -21,7 +21,10 @@ | |||
25 | 21 | MockCalledOnceWith, | 21 | MockCalledOnceWith, |
26 | 22 | MockCallsMatch, | 22 | MockCallsMatch, |
27 | 23 | ) | 23 | ) |
29 | 24 | from maastesting.testcase import MAASTestCase | 24 | from maastesting.testcase import ( |
30 | 25 | MAASTestCase, | ||
31 | 26 | MAASTwistedRunTest, | ||
32 | 27 | ) | ||
33 | 25 | from mock import ( | 28 | from mock import ( |
34 | 26 | ANY, | 29 | ANY, |
35 | 27 | call, | 30 | call, |
36 | @@ -48,6 +51,8 @@ | |||
37 | 48 | 51 | ||
38 | 49 | class TestNodePowerMonitorService(MAASTestCase): | 52 | class TestNodePowerMonitorService(MAASTestCase): |
39 | 50 | 53 | ||
40 | 54 | run_tests_with = MAASTwistedRunTest.make_factory(timeout=5) | ||
41 | 55 | |||
42 | 51 | def test_init_sets_up_timer_correctly(self): | 56 | def test_init_sets_up_timer_correctly(self): |
43 | 52 | cluster_uuid = factory.make_UUID() | 57 | cluster_uuid = factory.make_UUID() |
44 | 53 | service = npms.NodePowerMonitorService(cluster_uuid) | 58 | service = npms.NodePowerMonitorService(cluster_uuid) |
45 | @@ -118,7 +123,9 @@ | |||
46 | 118 | self.assertEqual(None, extract_result(d)) | 123 | self.assertEqual(None, extract_result(d)) |
47 | 119 | self.assertThat( | 124 | self.assertThat( |
48 | 120 | query_all_nodes, | 125 | query_all_nodes, |
50 | 121 | MockCalledOnceWith([], clock=service.clock)) | 126 | MockCalledOnceWith( |
51 | 127 | [], max_concurrency=service.max_nodes_at_once, | ||
52 | 128 | clock=service.clock)) | ||
53 | 122 | 129 | ||
54 | 123 | def test_query_nodes_copes_with_NoSuchCluster(self): | 130 | def test_query_nodes_copes_with_NoSuchCluster(self): |
55 | 124 | cluster_uuid, service = self.make_monitor_service() | 131 | cluster_uuid, service = self.make_monitor_service() |
56 | 125 | 132 | ||
57 | === modified file 'src/provisioningserver/rpc/power.py' | |||
58 | --- src/provisioningserver/rpc/power.py 2014-10-06 11:02:29 +0000 | |||
59 | +++ src/provisioningserver/rpc/power.py 2014-10-20 21:07:30 +0000 | |||
60 | @@ -18,6 +18,7 @@ | |||
61 | 18 | ] | 18 | ] |
62 | 19 | 19 | ||
63 | 20 | from datetime import timedelta | 20 | from datetime import timedelta |
64 | 21 | from functools import partial | ||
65 | 21 | 22 | ||
66 | 22 | from provisioningserver.events import ( | 23 | from provisioningserver.events import ( |
67 | 23 | EVENT_TYPES, | 24 | EVENT_TYPES, |
68 | @@ -46,6 +47,8 @@ | |||
69 | 46 | ) | 47 | ) |
70 | 47 | from twisted.internet import reactor | 48 | from twisted.internet import reactor |
71 | 48 | from twisted.internet.defer import ( | 49 | from twisted.internet.defer import ( |
72 | 50 | DeferredList, | ||
73 | 51 | DeferredSemaphore, | ||
74 | 49 | inlineCallbacks, | 52 | inlineCallbacks, |
75 | 50 | returnValue, | 53 | returnValue, |
76 | 51 | ) | 54 | ) |
77 | @@ -314,29 +317,49 @@ | |||
78 | 314 | raise PowerActionFail(error) | 317 | raise PowerActionFail(error) |
79 | 315 | 318 | ||
80 | 316 | 319 | ||
84 | 317 | @asynchronous | 320 | def maaslog_query_failure(node, failure): |
85 | 318 | @inlineCallbacks | 321 | """Log failure to query node.""" |
86 | 319 | def query_all_nodes(nodes, clock=reactor): | 322 | if failure.check(PowerActionFail): |
87 | 323 | maaslog.error( | ||
88 | 324 | "%s: Failed to query power state: %s.", | ||
89 | 325 | node['hostname'], failure.getErrorMessage()) | ||
90 | 326 | elif failure.check(NoSuchNode): | ||
91 | 327 | maaslog.debug( | ||
92 | 328 | "%s: Could not update power status; " | ||
93 | 329 | "no such node.", node['hostname']) | ||
94 | 330 | else: | ||
95 | 331 | maaslog.error( | ||
96 | 332 | "%s: Failed to query power state, unknown error: %s", | ||
97 | 333 | node['hostname'], failure.getErrorMessage()) | ||
98 | 334 | |||
99 | 335 | |||
100 | 336 | def maaslog_query(node, power_state): | ||
101 | 337 | """Log change in power state for node.""" | ||
102 | 338 | if node['power_state'] != power_state: | ||
103 | 339 | maaslog.info( | ||
104 | 340 | "%s: Power state has changed from %s to %s.", node['hostname'], | ||
105 | 341 | node['power_state'], power_state) | ||
106 | 342 | return power_state | ||
107 | 343 | |||
108 | 344 | |||
109 | 345 | def _query_node(node, clock): | ||
110 | 346 | """Performs `power_query` on the given node.""" | ||
111 | 347 | # Start the query of the power state for the given node, logging to | ||
112 | 348 | # maaslog as errors and power states change. | ||
113 | 349 | d = get_power_state( | ||
114 | 350 | node['system_id'], node['hostname'], | ||
115 | 351 | node['power_type'], node['context'], | ||
116 | 352 | clock=clock) | ||
117 | 353 | d.addCallbacks( | ||
118 | 354 | partial(maaslog_query, node), | ||
119 | 355 | partial(maaslog_query_failure, node)) | ||
120 | 356 | return d | ||
121 | 357 | |||
122 | 358 | |||
123 | 359 | def query_all_nodes(nodes, max_concurrency=5, clock=reactor): | ||
124 | 320 | """Performs `power_query` on all nodes. If the nodes state has changed, | 360 | """Performs `power_query` on all nodes. If the nodes state has changed, |
125 | 321 | then that is sent back to the region.""" | 361 | then that is sent back to the region.""" |
147 | 322 | for node in nodes: | 362 | semaphore = DeferredSemaphore(tokens=max_concurrency) |
148 | 323 | system_id = node['system_id'] | 363 | return DeferredList( |
149 | 324 | hostname = node['hostname'] | 364 | semaphore.run(_query_node, node, clock) |
150 | 325 | power_state_recorded = node['power_state'] | 365 | for node in nodes) |
130 | 326 | try: | ||
131 | 327 | power_state_observed = yield get_power_state( | ||
132 | 328 | system_id, hostname, node['power_type'], node['context'], | ||
133 | 329 | clock=clock) | ||
134 | 330 | except PowerActionFail as e: | ||
135 | 331 | maaslog.error( | ||
136 | 332 | "%s: Failed to query power state: %s.", hostname, e) | ||
137 | 333 | continue | ||
138 | 334 | except NoSuchNode: | ||
139 | 335 | maaslog.debug( | ||
140 | 336 | "%s: Could not update power status; " | ||
141 | 337 | "no such node.", hostname) | ||
142 | 338 | continue | ||
143 | 339 | if power_state_observed != power_state_recorded: | ||
144 | 340 | maaslog.info( | ||
145 | 341 | "%s: Power state has changed from %s to %s.", hostname, | ||
146 | 342 | power_state_recorded, power_state_observed) | ||
151 | 343 | 366 | ||
152 | === modified file 'src/provisioningserver/rpc/tests/test_power.py' | |||
153 | --- src/provisioningserver/rpc/tests/test_power.py 2014-10-06 11:02:29 +0000 | |||
154 | +++ src/provisioningserver/rpc/tests/test_power.py 2014-10-20 21:07:30 +0000 | |||
155 | @@ -58,9 +58,11 @@ | |||
156 | 58 | from twisted.internet import reactor | 58 | from twisted.internet import reactor |
157 | 59 | from twisted.internet.defer import ( | 59 | from twisted.internet.defer import ( |
158 | 60 | Deferred, | 60 | Deferred, |
159 | 61 | fail, | ||
160 | 61 | inlineCallbacks, | 62 | inlineCallbacks, |
161 | 62 | maybeDeferred, | 63 | maybeDeferred, |
162 | 63 | returnValue, | 64 | returnValue, |
163 | 65 | succeed, | ||
164 | 64 | ) | 66 | ) |
165 | 65 | from twisted.internet.task import Clock | 67 | from twisted.internet.task import Clock |
166 | 66 | 68 | ||
167 | @@ -620,7 +622,10 @@ | |||
168 | 620 | # Report back that all nodes' power states are as recorded. | 622 | # Report back that all nodes' power states are as recorded. |
169 | 621 | power_states = [node['power_state'] for node in nodes] | 623 | power_states = [node['power_state'] for node in nodes] |
170 | 622 | get_power_state = self.patch(power, 'get_power_state') | 624 | get_power_state = self.patch(power, 'get_power_state') |
172 | 623 | get_power_state.side_effect = power_states | 625 | get_power_state.side_effect = [ |
173 | 626 | succeed(power_state) | ||
174 | 627 | for power_state in power_states | ||
175 | 628 | ] | ||
176 | 624 | 629 | ||
177 | 625 | yield power.query_all_nodes(nodes) | 630 | yield power.query_all_nodes(nodes) |
178 | 626 | self.assertThat(get_power_state, MockCallsMatch(*( | 631 | self.assertThat(get_power_state, MockCallsMatch(*( |
179 | @@ -638,7 +643,7 @@ | |||
180 | 638 | get_power_state = self.patch(power, 'get_power_state') | 643 | get_power_state = self.patch(power, 'get_power_state') |
181 | 639 | error_msg = factory.make_name("error") | 644 | error_msg = factory.make_name("error") |
182 | 640 | get_power_state.side_effect = [ | 645 | get_power_state.side_effect = [ |
184 | 641 | PowerActionFail(error_msg), new_state_2] | 646 | fail(PowerActionFail(error_msg)), succeed(new_state_2)] |
185 | 642 | 647 | ||
186 | 643 | with FakeLogger("maas.power", level=logging.DEBUG) as maaslog: | 648 | with FakeLogger("maas.power", level=logging.DEBUG) as maaslog: |
187 | 644 | yield power.query_all_nodes([node1, node2]) | 649 | yield power.query_all_nodes([node1, node2]) |
188 | @@ -656,7 +661,7 @@ | |||
189 | 656 | new_state_2 = self.pick_alternate_state(node2['power_state']) | 661 | new_state_2 = self.pick_alternate_state(node2['power_state']) |
190 | 657 | get_power_state = self.patch(power, 'get_power_state') | 662 | get_power_state = self.patch(power, 'get_power_state') |
191 | 658 | get_power_state.side_effect = [ | 663 | get_power_state.side_effect = [ |
193 | 659 | exceptions.NoSuchNode(), new_state_2] | 664 | fail(exceptions.NoSuchNode()), succeed(new_state_2)] |
194 | 660 | 665 | ||
195 | 661 | with FakeLogger("maas.power", level=logging.DEBUG) as maaslog: | 666 | with FakeLogger("maas.power", level=logging.DEBUG) as maaslog: |
196 | 662 | yield power.query_all_nodes([node1, node2]) | 667 | yield power.query_all_nodes([node1, node2]) |
197 | @@ -668,6 +673,36 @@ | |||
198 | 668 | """, | 673 | """, |
199 | 669 | maaslog.output) | 674 | maaslog.output) |
200 | 670 | 675 | ||
201 | 676 | @inlineCallbacks | ||
202 | 677 | def test_query_all_nodes_swallows_Exception(self): | ||
203 | 678 | node1, node2 = self.make_nodes(2) | ||
204 | 679 | new_state_2 = self.pick_alternate_state(node2['power_state']) | ||
205 | 680 | get_power_state = self.patch(power, 'get_power_state') | ||
206 | 681 | get_power_state.side_effect = [ | ||
207 | 682 | fail(Exception('unknown')), succeed(new_state_2)] | ||
208 | 683 | |||
209 | 684 | with FakeLogger("maas.power", level=logging.DEBUG) as maaslog: | ||
210 | 685 | yield power.query_all_nodes([node1, node2]) | ||
211 | 686 | |||
212 | 687 | self.assertDocTestMatches( | ||
213 | 688 | """\ | ||
214 | 689 | hostname-...: Failed to query power state, unknown error: unknown | ||
215 | 690 | hostname-...: Power state has changed from ... to ... | ||
216 | 691 | """, | ||
217 | 692 | maaslog.output) | ||
218 | 693 | |||
219 | 694 | @inlineCallbacks | ||
220 | 695 | def test_query_all_nodes_returns_deferredlist_of_number_of_nodes(self): | ||
221 | 696 | node1, node2 = self.make_nodes(2) | ||
222 | 697 | get_power_state = self.patch(power, 'get_power_state') | ||
223 | 698 | get_power_state.side_effect = [ | ||
224 | 699 | succeed(node1['power_state']), succeed(node2['power_state'])] | ||
225 | 700 | |||
226 | 701 | results = yield power.query_all_nodes([node1, node2]) | ||
227 | 702 | self.assertEqual( | ||
228 | 703 | [(True, node1['power_state']), (True, node2['power_state'])], | ||
229 | 704 | results) | ||
230 | 705 | |||
231 | 671 | 706 | ||
232 | 672 | class TestMaybeChangePowerState(MAASTestCase): | 707 | class TestMaybeChangePowerState(MAASTestCase): |
233 | 673 | 708 |
Looking good so far. I have a suggestion to make the parallelism better since you're blocking on arbitrary Deferreds rather than the ones that might have fired.