Merge ~ltrager/maas:hmcz_transitional_states into maas:master

Proposed by Lee Trager
Status: Merged
Approved by: Lee Trager
Approved revision: a3f418656a3810cd78ba3ad2a2f8f5ff484d97ee
Merge reported by: MAAS Lander
Merged at revision: not available
Proposed branch: ~ltrager/maas:hmcz_transitional_states
Merge into: maas:master
Diff against target: 206 lines (+125/-3)
2 files modified
src/provisioningserver/drivers/power/hmcz.py (+44/-2)
src/provisioningserver/drivers/power/tests/test_hmcz.py (+81/-1)
Reviewer Review Type Date Requested Status
Adam Collard (community) Approve
MAAS Lander Approve
Review via email: mp+400156@code.launchpad.net

Commit message

HMCZ - Wait for transitional states

IBM Z has transitional states starting and stopping. When these operations
are occurring the HMC will reject any call to change states or properties.
If detected wait for the transition to complete before making a call to
the HMC.

To post a comment you must log in.
Revision history for this message
MAAS Lander (maas-lander) wrote :

UNIT TESTS
-b hmcz_transitional_states lp:~ltrager/maas/+git/maas into -b master lp:~maas-committers/maas

STATUS: SUCCESS
COMMIT: 1fe22b9f21ecd0fa38ec5832a4bf0f31aab30f87

review: Approve
Revision history for this message
MAAS Lander (maas-lander) wrote :

UNIT TESTS
-b hmcz_transitional_states lp:~ltrager/maas/+git/maas into -b master lp:~maas-committers/maas

STATUS: SUCCESS
COMMIT: 0fb0f0eaefb085fc8e19037daf24c390c66282e9

review: Approve
Revision history for this message
Adam Collard (adam-collard) wrote :

basically looks good - some nit picks

review: Approve
c27b242... by Lee Trager

Merge branch 'master' into hmcz_transitional_states

a3f4186... by Lee Trager

adam-collard fixes

Revision history for this message
Lee Trager (ltrager) wrote :

Thanks for the review. Updated as suggested!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/src/provisioningserver/drivers/power/hmcz.py b/src/provisioningserver/drivers/power/hmcz.py
2index 5ccef9a..a4d7a56 100644
3--- a/src/provisioningserver/drivers/power/hmcz.py
4+++ b/src/provisioningserver/drivers/power/hmcz.py
5@@ -18,14 +18,18 @@ from provisioningserver.drivers import (
6 make_setting_field,
7 SETTING_SCOPE,
8 )
9-from provisioningserver.drivers.power import PowerActionError, PowerDriver
10+from provisioningserver.drivers.power import (
11+ PowerActionError,
12+ PowerDriver,
13+ PowerError,
14+)
15 from provisioningserver.logger import get_maas_logger
16 from provisioningserver.rpc.utils import commission_node, create_node
17 from provisioningserver.utils import typed
18 from provisioningserver.utils.twisted import asynchronous, threadDeferred
19
20 try:
21- from zhmcclient import Client, NotFound, Session
22+ from zhmcclient import Client, NotFound, Session, StatusTimeout
23 except ImportError:
24 no_zhmcclient = True
25 else:
26@@ -101,6 +105,21 @@ class HMCZPowerDriver(PowerDriver):
27 # IBM is aware this isn't optimal for us so they are looking into
28 # modifying IBM Z to go into a stopped state.
29 partition.stop(wait_for_completion=True)
30+ elif status == "stopping":
31+ # The HMC does not allow a machine to be powered on if its
32+ # currently stopping. Wait 120s for it which should be more
33+ # than enough time.
34+ try:
35+ partition.wait_for_status("stopped", 120)
36+ except StatusTimeout:
37+ # If 120s isn't enough time raise a PowerError() which will
38+ # trigger the builtin retry code in the base PowerDriver()
39+ # class.
40+ raise PowerError(
41+ "Partition is stuck in a "
42+ f"{partition.get_property('status')} state!"
43+ )
44+
45 partition.start(wait_for_completion=False)
46
47 @typed
48@@ -109,6 +128,21 @@ class HMCZPowerDriver(PowerDriver):
49 def power_off(self, system_id: str, context: dict):
50 """Power off IBM Z DPM."""
51 partition = self._get_partition(context)
52+ status = partition.get_property("status")
53+ if status == "starting":
54+ # The HMC does not allow a machine to be powered off if its
55+ # currently starting. Wait 120s for it which should be more
56+ # than enough time.
57+ try:
58+ partition.wait_for_status("active", 120)
59+ except StatusTimeout:
60+ # If 120s isn't enough time raise a PowerError() which will
61+ # trigger the builtin retry code in the base PowerDriver()
62+ # class.
63+ raise PowerError(
64+ "Partition is stuck in a "
65+ f"{partition.get_property('status')} state!"
66+ )
67 partition.stop(wait_for_completion=False)
68
69 @typed
70@@ -147,6 +181,14 @@ class HMCZPowerDriver(PowerDriver):
71 :param order: An ordered list of network or storage devices.
72 """
73 partition = self._get_partition(context)
74+ status = partition.get_property("status")
75+
76+ if status in {"starting", "stopping"}:
77+ # The HMC does not allow a machine's boot order to be reconfigured
78+ # while in a transitional state. Wait for it to complete. If this
79+ # times out allow it to be raised so the region can log it.
80+ partition.wait_for_status(["stopped", "active"], 120)
81+
82 # You can only specify one boot device on IBM Z
83 boot_device = order[0]
84 if boot_device.get("mac_address"):
85diff --git a/src/provisioningserver/drivers/power/tests/test_hmcz.py b/src/provisioningserver/drivers/power/tests/test_hmcz.py
86index 20d4f76..d76b347 100644
87--- a/src/provisioningserver/drivers/power/tests/test_hmcz.py
88+++ b/src/provisioningserver/drivers/power/tests/test_hmcz.py
89@@ -9,7 +9,9 @@ import random
90 import typing
91 from unittest.mock import Mock
92
93+import pytest
94 from twisted.internet.defer import inlineCallbacks, returnValue, succeed
95+from zhmcclient import StatusTimeout
96 from zhmcclient_mock import FakedSession
97
98 from maastesting.factory import factory
99@@ -20,7 +22,7 @@ from maastesting.matchers import (
100 )
101 from maastesting.testcase import MAASTestCase, MAASTwistedRunTest
102 from provisioningserver.drivers.power import hmcz as hmcz_module
103-from provisioningserver.drivers.power import PowerActionError
104+from provisioningserver.drivers.power import PowerActionError, PowerError
105 from provisioningserver.rpc import clusterservice, region
106 from provisioningserver.rpc.testing import MockLiveClusterToRegionRPCFixture
107
108@@ -160,6 +162,28 @@ class TestHMCZPowerDriver(MAASTestCase):
109 )
110
111 @inlineCallbacks
112+ def test_power_on_waits_for_stopping_state(self):
113+ mock_get_partition = self.patch(self.hmcz, "_get_partition")
114+ mock_get_partition.return_value.get_property.return_value = "stopping"
115+ yield self.hmcz.power_on(None, self.make_context())
116+ mock_get_partition.return_value.wait_for_status.assert_called_once_with(
117+ "stopped", 120
118+ )
119+ mock_get_partition.return_value.start.assert_called_once_with(
120+ wait_for_completion=False
121+ )
122+
123+ @inlineCallbacks
124+ def test_power_on_times_out_waiting_for_stopping_state(self):
125+ mock_get_partition = self.patch(self.hmcz, "_get_partition")
126+ mock_get_partition.return_value.get_property.return_value = "stopping"
127+ mock_get_partition.return_value.wait_for_status.side_effect = (
128+ StatusTimeout(None, None, None, None)
129+ )
130+ with pytest.raises(PowerError):
131+ yield self.hmcz.power_on(None, self.make_context())
132+
133+ @inlineCallbacks
134 def test_power_off(self):
135 mock_get_partition = self.patch(self.hmcz, "_get_partition")
136 yield self.hmcz.power_off(None, self.make_context())
137@@ -169,6 +193,28 @@ class TestHMCZPowerDriver(MAASTestCase):
138 )
139
140 @inlineCallbacks
141+ def test_power_off_waits_for_starting_state(self):
142+ mock_get_partition = self.patch(self.hmcz, "_get_partition")
143+ mock_get_partition.return_value.get_property.return_value = "starting"
144+ yield self.hmcz.power_off(None, self.make_context())
145+ mock_get_partition.return_value.wait_for_status.assert_called_once_with(
146+ "active", 120
147+ )
148+ mock_get_partition.return_value.stop.assert_called_once_with(
149+ wait_for_completion=False
150+ )
151+
152+ @inlineCallbacks
153+ def test_power_off_timesout_waiting_for_stopping_state(self):
154+ mock_get_partition = self.patch(self.hmcz, "_get_partition")
155+ mock_get_partition.return_value.get_property.return_value = "starting"
156+ mock_get_partition.return_value.wait_for_status.side_effect = (
157+ StatusTimeout(None, None, None, None)
158+ )
159+ with pytest.raises(PowerError):
160+ yield self.hmcz.power_off(None, self.make_context())
161+
162+ @inlineCallbacks
163 def test_power_query_starting(self):
164 power_partition_name = factory.make_name("power_partition_name")
165 cpc = self.fake_session.hmc.cpcs.add(
166@@ -430,6 +476,40 @@ class TestHMCZPowerDriver(MAASTestCase):
167 }
168 )
169
170+ @inlineCallbacks
171+ def test_set_boot_order_waits_for_starting_state(self):
172+ # Mock must be used as partition.wait_for_status() hangs when not
173+ # connected to a real HMC
174+ mock_get_partition = self.patch(self.hmcz, "_get_partition")
175+ mock_get_partition.return_value.get_property.return_value = "starting"
176+
177+ yield self.hmcz.set_boot_order(
178+ None,
179+ self.make_context(),
180+ [{"mac_address": factory.make_mac_address()}],
181+ )
182+
183+ mock_get_partition.return_value.wait_for_status.assert_called_once_with(
184+ ["stopped", "active"], 120
185+ )
186+
187+ @inlineCallbacks
188+ def test_set_boot_order_waits_for_stopping_state(self):
189+ # Mock must be used as partition.wait_for_status() hangs when not
190+ # connected to a real HMC
191+ mock_get_partition = self.patch(self.hmcz, "_get_partition")
192+ mock_get_partition.return_value.get_property.return_value = "stopping"
193+
194+ yield self.hmcz.set_boot_order(
195+ None,
196+ self.make_context(),
197+ [{"mac_address": factory.make_mac_address()}],
198+ )
199+
200+ mock_get_partition.return_value.wait_for_status.assert_called_once_with(
201+ ["stopped", "active"], 120
202+ )
203+
204
205 @dataclass
206 class FakeNode:

Subscribers

People subscribed via source and target branches