Merge lp:~allenap/maas/power-change-merge-and-log into lp:~maas-committers/maas/trunk

Proposed by Gavin Panella
Status: Merged
Approved by: Gavin Panella
Approved revision: no longer in the source branch.
Merged at revision: 3735
Proposed branch: lp:~allenap/maas/power-change-merge-and-log
Merge into: lp:~maas-committers/maas/trunk
Diff against target: 306 lines (+115/-41)
2 files modified
src/provisioningserver/rpc/power.py (+46/-22)
src/provisioningserver/rpc/tests/test_power.py (+69/-19)
To merge this branch: bzr merge lp:~allenap/maas/power-change-merge-and-log
Reviewer Review Type Date Requested Status
Raphaël Badin (community) Approve
Review via email: mp+254332@code.launchpad.net

Commit message

Improve logging for power changes in the cluster, and allow non-conflicting power changes.

To post a comment you must log in.
Revision history for this message
Raphaël Badin (rvb) wrote :

Looks good… will this improve the situation for one of the bugs I filed yesterday about the mysterious unhandled twisted errors? (1436885 & co)

review: Approve
Revision history for this message
Gavin Panella (allenap) wrote :

> Looks good… will this improve the situation for one of the bugs I
> filed yesterday about the mysterious unhandled twisted errors?
> (1436885 & co)

I hope so. I wasn't sure so I haven't linked them to the bug/bugs, but I
think I will, but not assign the bug to myself unless they do end up
fixed.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/provisioningserver/rpc/power.py'
2--- src/provisioningserver/rpc/power.py 2015-03-25 15:33:23 +0000
3+++ src/provisioningserver/rpc/power.py 2015-03-27 00:50:35 +0000
4@@ -40,6 +40,7 @@
5 )
6 from provisioningserver.utils.twisted import (
7 asynchronous,
8+ callOut,
9 deferred,
10 deferWithTimeout,
11 pause,
12@@ -47,6 +48,7 @@
13 )
14 from twisted.internet import reactor
15 from twisted.internet.defer import (
16+ CancelledError,
17 DeferredList,
18 DeferredSemaphore,
19 inlineCallbacks,
20@@ -183,33 +185,55 @@
21 """
22 assert power_change in ('on', 'off'), (
23 "Unknown power change: %s" % power_change)
24+
25 # There should be one and only one power change for each system ID.
26- # If there's one already, raise an error.
27- registered_power_action = power_action_registry.get(system_id, None)
28- if registered_power_action is not None:
29+ if system_id in power_action_registry:
30+ current_power_change, d = power_action_registry[system_id]
31+ else:
32+ current_power_change, d = None, None
33+
34+ if current_power_change is None:
35+ # Arrange for the power change to happen later; do not make the caller
36+ # wait, because it might take a long time. We set a timeout of two
37+ # minutes so that if the power action doesn't return in a timely
38+ # fashion (or fails silently or some such) it doesn't block other
39+ # actions on the node.
40+ d = deferLater(
41+ clock, 0, deferWithTimeout, CHANGE_POWER_STATE_TIMEOUT,
42+ change_power_state, system_id, hostname, power_type, power_change,
43+ context, clock)
44+
45+ power_action_registry[system_id] = power_change, d
46+
47+ # Whether we succeed or fail, we need to remove the action from the
48+ # registry of actions, otherwise subsequent actions will fail.
49+ d.addBoth(callOut, power_action_registry.pop, system_id, None)
50+
51+ # Log cancellations distinctly from other errors.
52+ def eb_cancelled(failure):
53+ failure.trap(CancelledError)
54+ log.msg(
55+ "%s: Power could not be turned %s; timed out."
56+ % (hostname, power_change))
57+ d.addErrback(eb_cancelled)
58+
59+ # Catch-all log.
60+ d.addErrback(
61+ log.err, "%s: Power could not be turned %s." % (
62+ hostname, power_change))
63+
64+ elif current_power_change == power_change:
65+ # What we want is already happening; let it continue.
66+ pass
67+
68+ else:
69+ # Right now we reject conflicting power changes. However, we have the
70+ # Deferred (in `d`) along which the current power change is occurring,
71+ # so the option to cancel is available if we want it.
72 raise PowerActionAlreadyInProgress(
73 "Unable to change power state to '%s' for node %s: another "
74 "action is already in progress for that node." %
75 (power_change, hostname))
76- power_action_registry[system_id] = power_change
77-
78- def clean_up(*args, **kwargs):
79- power_action_registry.pop(system_id, None)
80-
81- # Arrange for the power change to happen later; do not make the caller
82- # wait, because it might take a long time.
83- # We set a timeout of two minutes so that if the power action
84- # doesn't return in a timely fashion (or fails silently or
85- # some such) it doesn't block other actions on the node.
86- d = deferLater(
87- clock, 0, deferWithTimeout, CHANGE_POWER_STATE_TIMEOUT,
88- change_power_state, system_id, hostname, power_type,
89- power_change, context, clock)
90- d.addErrback(log.err)
91- # Whether we succeeded or failed, we need to remove the action
92- # from the registry of actions, otherwise every subsequent
93- # action will fail.
94- d.addBoth(clean_up)
95
96
97 @asynchronous
98
99=== modified file 'src/provisioningserver/rpc/tests/test_power.py'
100--- src/provisioningserver/rpc/tests/test_power.py 2015-03-25 15:33:23 +0000
101+++ src/provisioningserver/rpc/tests/test_power.py 2015-03-27 00:50:35 +0000
102@@ -243,7 +243,7 @@
103 factory.make_name('context-key'): factory.make_name('context-val')
104 }
105 self.patch(power, 'pause')
106- power.power_action_registry[system_id] = power_change
107+ power.power_action_registry[system_id] = power_change, sentinel.d
108 # Patch the power action utility so that it says the node is
109 # in the required power state.
110 power_action, execute = patch_power_action(
111@@ -274,7 +274,7 @@
112 context = {
113 factory.make_name('context-key'): factory.make_name('context-val')
114 }
115- power.power_action_registry[system_id] = power_change
116+ power.power_action_registry[system_id] = power_change, sentinel.d
117 self.patch(power, 'pause')
118 power_action, execute = patch_power_action(
119 self, return_value=random.choice(['on', 'off']))
120@@ -302,7 +302,7 @@
121 factory.make_name('context-key'): factory.make_name('context-val')
122 }
123 self.patch(power, 'pause')
124- power.power_action_registry[system_id] = power_change
125+ power.power_action_registry[system_id] = power_change, sentinel.d
126 # Simulate a failure to power up the node, then a success.
127 power_action, execute = patch_power_action(
128 self, side_effect=[None, 'off', None, 'on'])
129@@ -332,7 +332,7 @@
130 factory.make_name('context-key'): factory.make_name('context-val')
131 }
132 self.patch(power, 'pause')
133- power.power_action_registry[system_id] = power_change
134+ power.power_action_registry[system_id] = power_change, sentinel.d
135 # Patch the power action utility so that it says the node is
136 # in the required power state.
137 power_action, execute = patch_power_action(
138@@ -363,7 +363,7 @@
139 factory.make_name('context-key'): factory.make_name('context-val')
140 }
141 self.patch(power, 'pause')
142- power.power_action_registry[system_id] = power_change
143+ power.power_action_registry[system_id] = power_change, sentinel.d
144 # Simulate a persistent failure.
145 power_action, execute = patch_power_action(
146 self, return_value='off')
147@@ -393,7 +393,7 @@
148 factory.make_name('context-key'): factory.make_name('context-val')
149 }
150 self.patch(power, 'pause')
151- power.power_action_registry[system_id] = power_change
152+ power.power_action_registry[system_id] = power_change, sentinel.d
153 # Simulate an exception.
154 exception_message = factory.make_name('exception')
155 power_action, execute = patch_power_action(
156@@ -418,7 +418,7 @@
157 context = {
158 factory.make_name('context-key'): factory.make_name('context-val')
159 }
160- power.power_action_registry[system_id] = power_change
161+ power.power_action_registry[system_id] = power_change, sentinel.d
162 # Simulate two failures to power up the node, then a success.
163 power_action, execute = patch_power_action(
164 self, side_effect=[None, 'off', None, 'off', None, 'on'])
165@@ -789,7 +789,8 @@
166
167 run_tests_with = MAASTwistedRunTest.make_factory(timeout=5)
168
169- def patch_power_action_registry(self):
170+ def setUp(self):
171+ super(TestMaybeChangePowerState, self).setUp()
172 self.patch(power, 'power_action_registry', {})
173
174 def patch_methods_using_rpc(self):
175@@ -809,7 +810,6 @@
176 @inlineCallbacks
177 def test_adds_action_to_registry(self):
178 self.patch_methods_using_rpc()
179- self.patch_power_action_registry()
180
181 system_id = factory.make_name('system_id')
182 hostname = factory.make_name('hostname')
183@@ -822,27 +822,45 @@
184 yield power.maybe_change_power_state(
185 system_id, hostname, power_type, power_change, context)
186 self.assertEqual(
187- {system_id: power_change},
188+ {system_id: (power_change, ANY)},
189 power.power_action_registry)
190 reactor.runUntilCurrent() # Run all delayed calls.
191 self.assertEqual({}, power.power_action_registry)
192
193 @inlineCallbacks
194- def test_errors_when_change_already_registered(self):
195+ def test_errors_when_change_conflicts_with_in_progress_change(self):
196 system_id = factory.make_name('system_id')
197 hostname = factory.make_name('hostname')
198 power_type = random.choice(power.QUERY_POWER_TYPES)
199- power_change = random.choice(['on', 'off'])
200+ power_changes = ['on', 'off']
201+ random.shuffle(power_changes)
202+ current_power_change, power_change = power_changes
203 context = {
204 factory.make_name('context-key'): factory.make_name('context-val')
205 }
206-
207- power.power_action_registry[system_id] = power_change
208+ power.power_action_registry[system_id] = (
209+ current_power_change, sentinel.d)
210 with ExpectedException(exceptions.PowerActionAlreadyInProgress):
211 yield power.maybe_change_power_state(
212 system_id, hostname, power_type, power_change, context)
213
214 @inlineCallbacks
215+ def test_does_nothing_when_change_matches_in_progress_change(self):
216+ system_id = factory.make_name('system_id')
217+ hostname = factory.make_name('hostname')
218+ power_type = random.choice(power.QUERY_POWER_TYPES)
219+ current_power_change = power_change = random.choice(['on', 'off'])
220+ context = {
221+ factory.make_name('context-key'): factory.make_name('context-val')
222+ }
223+ power.power_action_registry[system_id] = (
224+ current_power_change, sentinel.d)
225+ yield power.maybe_change_power_state(
226+ system_id, hostname, power_type, power_change, context)
227+ self.assertThat(power.power_action_registry, Equals(
228+ {system_id: (power_change, sentinel.d)}))
229+
230+ @inlineCallbacks
231 def test_calls_change_power_state_later(self):
232 self.patch_methods_using_rpc()
233
234@@ -865,7 +883,6 @@
235
236 @inlineCallbacks
237 def test_clears_lock_if_change_power_state_success(self):
238- self.patch_power_action_registry()
239 self.patch_methods_using_rpc()
240
241 system_id = factory.make_name('system_id')
242@@ -883,7 +900,6 @@
243
244 @inlineCallbacks
245 def test_clears_lock_if_change_power_state_fails(self):
246- self.patch_power_action_registry()
247
248 class TestException(Exception):
249 pass
250@@ -892,7 +908,7 @@
251 power.power_change_starting.side_effect = TestException('boom')
252
253 system_id = factory.make_name('system_id')
254- hostname = sentinel.hostname
255+ hostname = factory.make_hostname()
256 power_type = sentinel.power_type
257 power_change = random.choice(['on', 'off'])
258 context = sentinel.context
259@@ -904,12 +920,46 @@
260 reactor.runUntilCurrent() # Run all delayed calls.
261 self.assertNotIn(system_id, power.power_action_registry)
262 self.assertDocTestMatches(
263- "Unhandled Error...TestException: boom",
264+ """\
265+ %s: Power could not be turned %s.
266+ Traceback (most recent call last):
267+ ...
268+ %s.TestException: boom
269+ """ % (hostname, power_change, __name__),
270+ logger.dump())
271+
272+ @inlineCallbacks
273+ def test_clears_lock_if_change_power_state_is_cancelled(self):
274+ # Patch in an unfired Deferred here. This will pause the call so that
275+ # we can grab the delayed call from the registry in time to cancel it.
276+ self.patch_autospec(power, 'change_power_state')
277+ power.change_power_state.return_value = Deferred()
278+
279+ system_id = factory.make_name('system_id')
280+ hostname = factory.make_hostname()
281+ power_type = sentinel.power_type
282+ power_change = random.choice(['on', 'off'])
283+ context = sentinel.context
284+
285+ logger = self.useFixture(TwistedLoggerFixture())
286+
287+ yield power.maybe_change_power_state(
288+ system_id, hostname, power_type, power_change, context)
289+
290+ # Get the Deferred from the registry and cancel it.
291+ _, d = power.power_action_registry[system_id]
292+ d.cancel()
293+ yield d
294+
295+ self.assertNotIn(system_id, power.power_action_registry)
296+ self.assertDocTestMatches(
297+ """\
298+ %s: Power could not be turned %s; timed out.
299+ """ % (hostname, power_change),
300 logger.dump())
301
302 @inlineCallbacks
303 def test__calls_change_power_state_with_timeout(self):
304- self.patch_power_action_registry()
305 self.patch_methods_using_rpc()
306 defer_with_timeout = self.patch(power, 'deferWithTimeout')
307