Merge lp:~andreserl/maas/lp1604962_lp1604987_2.0 into lp:maas/2.0

Proposed by Andres Rodriguez
Status: Merged
Approved by: Andres Rodriguez
Approved revision: 5174
Merged at revision: 5174
Proposed branch: lp:~andreserl/maas/lp1604962_lp1604987_2.0
Merge into: lp:maas/2.0
Diff against target: 306 lines (+89/-40)
8 files modified
docs/changelog.rst (+4/-0)
src/maasserver/models/node.py (+33/-20)
src/maasserver/models/tests/test_node.py (+12/-3)
src/maasserver/rpc/nodes.py (+1/-1)
src/maasserver/status_monitor.py (+1/-1)
src/metadataserver/api.py (+18/-12)
src/metadataserver/tests/test_api_status.py (+5/-3)
src/provisioningserver/events.py (+15/-0)
To merge this branch: bzr merge lp:~andreserl/maas/lp1604962_lp1604987_2.0
Reviewer Review Type Date Requested Status
Andres Rodriguez (community) Approve
Review via email: mp+301122@code.launchpad.net

Commit message

Fixes to correctly log cloud-init/curtin FAIL events in the node event log.

Fixes to correctly log "mark_failed" events. We did this by combining the mark_failed with _mark_failed functions, provided that the latter wasn't really storing anything in the event log and it should. It now differentiates between a user driven "Mark Failed" vs a system driven.

Drive-by fix to improve mark_failed function.

To post a comment you must log in.
Revision history for this message
Andres Rodriguez (andreserl) wrote :

self approving as this has landed in trunk.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'docs/changelog.rst'
2--- docs/changelog.rst 2016-07-20 13:51:17 +0000
3+++ docs/changelog.rst 2016-07-25 23:28:12 +0000
4@@ -25,6 +25,10 @@
5
6 LP: #1604169 [2.0] maas login yields "ImportError: No module named 'maasserver'"
7
8+LP: #1604962 Fixes to correctly log cloud-init/curtin FAIL events in the node event log.
9+
10+LP: #1604987 Fixes to correctly log "mark_failed" events.
11+
12
13 2.0.0 (rc2)
14 ===========
15
16=== modified file 'src/maasserver/models/node.py'
17--- src/maasserver/models/node.py 2016-07-22 16:11:04 +0000
18+++ src/maasserver/models/node.py 2016-07-25 23:28:12 +0000
19@@ -1009,20 +1009,29 @@
20
21 def _register_request_event(
22 self, user, type_name, action='', comment=None):
23- """Register a node user request event."""
24- # don't register system generated non-user requests
25+ """Register a node request event.
26+
27+ It registers events like start_commission (started by a user),
28+ or mark_failed (started by the system)"""
29+
30+ # the description will be the comment, if any.
31+ description = comment if comment else ''
32+ # if the user exists, we need to construct the description with
33+ # the user. as it would be a user-driven request.
34 if user is not None:
35- event_details = EVENT_DETAILS[type_name]
36- description = "(%s)" % user.username
37- if comment:
38- description = "%s - %s" % (description, comment)
39- # Avoid circular imports.
40- from maasserver.models.event import Event
41- Event.objects.register_event_and_event_type(
42- self.system_id, type_name, type_level=event_details.level,
43- type_description=event_details.description,
44- event_action=action,
45- event_description=description)
46+ if len(description) == 0:
47+ description = "(%s)" % user
48+ else:
49+ description = "(%s) - %s" % (user, description)
50+ event_details = EVENT_DETAILS[type_name]
51+
52+ # Avoid circular imports.
53+ from maasserver.models.event import Event
54+ Event.objects.register_event_and_event_type(
55+ self.system_id, type_name, type_level=event_details.level,
56+ type_description=event_details.description,
57+ event_action=action,
58+ event_description=description)
59
60 def storage_layout_issues(self):
61 """Return any errors with the storage layout.
62@@ -2271,18 +2280,22 @@
63 arch, subarch = self.architecture.split('/')
64 return (arch, subarch)
65
66- def mark_failed(self, user, comment=None):
67- self._register_request_event(
68- user, EVENT_TYPES.REQUEST_NODE_MARK_FAILED, action='mark_failed',
69- comment=comment)
70- self._mark_failed(user, comment)
71-
72- def _mark_failed(self, user, comment=None, commit=True):
73+ def mark_failed(self, user=None, comment=None, commit=True):
74 """Mark this node as failed.
75
76 The actual 'failed' state depends on the current status of the
77 node.
78 """
79+ if user is None:
80+ # This is a system-driven event. Log level is ERROR.
81+ event_type = EVENT_TYPES.REQUEST_NODE_MARK_FAILED_SYSTEM
82+ else:
83+ # This is a user-driven event. Log level is INFO.
84+ event_type = EVENT_TYPES.REQUEST_NODE_MARK_FAILED
85+ self._register_request_event(
86+ user, event_type, action='mark_failed',
87+ comment=comment)
88+
89 new_status = get_failed_status(self.status)
90 if new_status is not None:
91 self.status = new_status
92
93=== modified file 'src/maasserver/models/tests/test_node.py'
94--- src/maasserver/models/tests/test_node.py 2016-07-15 02:18:08 +0000
95+++ src/maasserver/models/tests/test_node.py 2016-07-25 23:28:12 +0000
96@@ -2916,14 +2916,23 @@
97 event_action=event_action,
98 event_description=event_description))
99
100- def test__register_request_event_with_none_user_saves_no_event(self):
101+ def test__register_request_event_none_user_saves_comment_not_user(self):
102 node = factory.make_Node()
103 log_mock = self.patch_autospec(
104 Event.objects, 'register_event_and_event_type')
105 event_name = EVENT_TYPES.REQUEST_NODE_START
106+ event_action = factory.make_name('action')
107+ event_details = EVENT_DETAILS[event_name]
108 comment = factory.make_name("comment")
109- node._register_request_event(None, event_name, comment)
110- self.assertThat(log_mock, MockNotCalled())
111+ event_description = "%s" % (comment)
112+ node._register_request_event(None, event_name, event_action, comment)
113+ self.assertThat(log_mock, MockCalledOnceWith(
114+ node.system_id,
115+ EVENT_TYPES.REQUEST_NODE_START,
116+ type_level=event_details.level,
117+ type_description=event_details.description,
118+ event_action=event_action,
119+ event_description=event_description))
120
121 def test__status_message_returns_most_recent_event(self):
122 # The first event won't be returned.
123
124=== modified file 'src/maasserver/rpc/nodes.py'
125--- src/maasserver/rpc/nodes.py 2016-05-11 00:25:14 +0000
126+++ src/maasserver/rpc/nodes.py 2016-07-25 23:28:12 +0000
127@@ -49,7 +49,7 @@
128 except Node.DoesNotExist:
129 raise NoSuchNode.from_system_id(system_id)
130 try:
131- node.mark_failed(None, error_description)
132+ node.mark_failed(comment=error_description)
133 except exceptions.NodeStateViolation as e:
134 raise NodeStateViolation(e)
135
136
137=== modified file 'src/maasserver/status_monitor.py'
138--- src/maasserver/status_monitor.py 2016-07-22 16:11:04 +0000
139+++ src/maasserver/status_monitor.py 2016-07-25 23:28:12 +0000
140@@ -43,7 +43,7 @@
141 NODE_STATUS_CHOICES_DICT[node.status],
142 NODE_FAILURE_MONITORED_STATUS_TIMEOUTS[node.status],
143 )
144- node._mark_failed(None, commit=False, comment=comment)
145+ node.mark_failed(commit=False, comment=comment)
146 node.status_expires = None
147 node.save()
148
149
150=== modified file 'src/metadataserver/api.py'
151--- src/metadataserver/api.py 2016-07-22 16:11:04 +0000
152+++ src/metadataserver/api.py 2016-07-25 23:28:12 +0000
153@@ -157,12 +157,19 @@
154 raise UnknownMetadataVersion("Unknown metadata version: %s" % version)
155
156
157-def add_event_to_node_event_log(node, origin, action, description):
158+def add_event_to_node_event_log(
159+ node, origin, action, description, result=None):
160 """Add an entry to the node's event log."""
161 if node.status == NODE_STATUS.COMMISSIONING:
162- type_name = EVENT_TYPES.NODE_COMMISSIONING_EVENT
163+ if result in ['SUCCESS', None]:
164+ type_name = EVENT_TYPES.NODE_COMMISSIONING_EVENT
165+ else:
166+ type_name = EVENT_TYPES.NODE_COMMISSIONING_EVENT_FAILED
167 elif node.status == NODE_STATUS.DEPLOYING:
168- type_name = EVENT_TYPES.NODE_INSTALL_EVENT
169+ if result in ['SUCCESS', None]:
170+ type_name = EVENT_TYPES.NODE_INSTALL_EVENT
171+ else:
172+ type_name = EVENT_TYPES.NODE_INSTALL_EVENT_FAILED
173 elif node.node_type in [
174 NODE_TYPE.RACK_CONTROLLER,
175 NODE_TYPE.REGION_AND_RACK_CONTROLLER]:
176@@ -327,7 +334,8 @@
177 result = message.get('result')
178
179 # Add this event to the node event log.
180- add_event_to_node_event_log(node, origin, activity_name, description)
181+ add_event_to_node_event_log(
182+ node, origin, activity_name, description, result)
183
184 # Save attached files, if any.
185 for sent_file in message.get('files', []):
186@@ -358,12 +366,11 @@
187 elif node.status == NODE_STATUS.DEPLOYING:
188 if result in ['FAIL', 'FAILURE']:
189 node.mark_failed(
190- None,
191- "Installation failed (refer to the "
192- "installation log for more information).")
193+ comment="Installation failed (refer to the "
194+ "installation log for more information).")
195 elif node.status == NODE_STATUS.DISK_ERASING:
196 if result in ['FAIL', 'FAILURE']:
197- node.mark_failed(None, "Failed to erase disks.")
198+ node.mark_failed(comment="Failed to erase disks.")
199
200 # Deallocate the node if we enter any terminal state.
201 if node.node_type == NODE_TYPE.MACHINE and node.status in [
202@@ -530,16 +537,15 @@
203 self._store_installation_results(node, request)
204 if status == SIGNAL_STATUS.FAILED:
205 node.mark_failed(
206- None,
207- "Installation failed (refer to the "
208- "installation log for more information).")
209+ comment="Installation failed (refer to the "
210+ "installation log for more information).")
211 target_status = None
212 elif node.status == NODE_STATUS.DISK_ERASING:
213 if status == SIGNAL_STATUS.OK:
214 # disk erasing complete, release node
215 node.release()
216 elif status == SIGNAL_STATUS.FAILED:
217- node.mark_failed(None, "Failed to erase disks.")
218+ node.mark_failed(comment="Failed to erase disks.")
219 target_status = None
220
221 if target_status in (None, node.status):
222
223=== modified file 'src/metadataserver/tests/test_api_status.py'
224--- src/metadataserver/tests/test_api_status.py 2016-06-11 23:58:38 +0000
225+++ src/metadataserver/tests/test_api_status.py 2016-07-25 23:28:12 +0000
226@@ -193,7 +193,8 @@
227 NODE_STATUS.FAILED_DEPLOYMENT, reload_object(node).status)
228 # Check last node event.
229 self.assertEqual(
230- "'curtin' Command Install",
231+ "Installation failed (refer to the installation"
232+ " log for more information).",
233 Event.objects.filter(node=node).last().description)
234
235 def test_status_installation_fail_leaves_node_failed(self):
236@@ -212,7 +213,8 @@
237 NODE_STATUS.FAILED_DEPLOYMENT, reload_object(node).status)
238 # Check last node event.
239 self.assertEqual(
240- "'curtin' Command Install",
241+ "Installation failed (refer to the installation"
242+ " log for more information).",
243 Event.objects.filter(node=node).last().description)
244
245 def test_status_installation_failure_doesnt_clear_owner(self):
246@@ -270,7 +272,7 @@
247 NODE_STATUS.FAILED_DISK_ERASING, reload_object(node).status)
248 # Check last node event.
249 self.assertEqual(
250- "'curtin' Erasing disk",
251+ "Failed to erase disks.",
252 Event.objects.filter(node=node).last().description)
253
254 def test_status_erasure_failure_does_not_populate_tags(self):
255
256=== modified file 'src/provisioningserver/events.py'
257--- src/provisioningserver/events.py 2016-07-13 19:06:35 +0000
258+++ src/provisioningserver/events.py 2016-07-25 23:28:12 +0000
259@@ -66,7 +66,9 @@
260 # Node status events
261 NODE_STATUS_EVENT = "NODE_STATUS_EVENT"
262 NODE_COMMISSIONING_EVENT = "NODE_COMMISSIONING_EVENT"
263+ NODE_COMMISSIONING_EVENT_FAILED = "NODE_COMMISSIONING_EVENT_FAILED"
264 NODE_INSTALL_EVENT = "NODE_INSTALL_EVENT"
265+ NODE_INSTALL_EVENT_FAILED = "NODE_INSTALL_EVENT_FAILED"
266 # Node user request events
267 REQUEST_NODE_START_COMMISSIONING = "REQUEST_NODE_START_COMMISSIONING"
268 REQUEST_NODE_ABORT_COMMISSIONING = "REQUEST_NODE_ABORT_COMMISSIONING"
269@@ -76,6 +78,7 @@
270 REQUEST_NODE_ABORT_ERASE_DISK = "REQUEST_NODE_ABORT_ERASE_DISK"
271 REQUEST_NODE_RELEASE = "REQUEST_NODE_RELEASE"
272 REQUEST_NODE_MARK_FAILED = "REQUEST_NODE_MARK_FAILED"
273+ REQUEST_NODE_MARK_FAILED_SYSTEM = "REQUEST_NODE_MARK_FAILED_SYSTEM"
274 REQUEST_NODE_MARK_BROKEN = "REQUEST_NODE_MARK_BROKEN"
275 REQUEST_NODE_MARK_FIXED = "REQUEST_NODE_MARK_FIXED"
276 REQUEST_NODE_START_DEPLOYMENT = "REQUEST_NODE_START_DEPLOYMENT"
277@@ -151,10 +154,18 @@
278 description="Node commissioning",
279 level=DEBUG,
280 ),
281+ EVENT_TYPES.NODE_COMMISSIONING_EVENT_FAILED: EventDetail(
282+ description="Node commissioning failure",
283+ level=ERROR,
284+ ),
285 EVENT_TYPES.NODE_INSTALL_EVENT: EventDetail(
286 description="Node installation",
287 level=DEBUG,
288 ),
289+ EVENT_TYPES.NODE_INSTALL_EVENT_FAILED: EventDetail(
290+ description="Node installation failure",
291+ level=ERROR,
292+ ),
293 EVENT_TYPES.REQUEST_NODE_START_COMMISSIONING: EventDetail(
294 description="User starting node commissioning",
295 level=INFO,
296@@ -187,6 +198,10 @@
297 description="User marking node failed",
298 level=INFO,
299 ),
300+ EVENT_TYPES.REQUEST_NODE_MARK_FAILED_SYSTEM: EventDetail(
301+ description="Marking node failed",
302+ level=ERROR,
303+ ),
304 EVENT_TYPES.REQUEST_NODE_MARK_BROKEN: EventDetail(
305 description="User marking node broken",
306 level=INFO,

Subscribers

People subscribed via source and target branches