Merge ~cgrabowski/maas:fix_lp2051988_hardware_sync_updates_without_change into maas:master

Proposed by Christian Grabowski
Status: Merged
Approved by: Christian Grabowski
Approved revision: 2ba74b7167939783af35235c24050d598f31c473
Merge reported by: MAAS Lander
Merged at revision: not available
Proposed branch: ~cgrabowski/maas:fix_lp2051988_hardware_sync_updates_without_change
Merge into: maas:master
Diff against target: 408 lines (+286/-21)
3 files modified
src/metadataserver/builtin_scripts/hooks.py (+56/-14)
src/metadataserver/builtin_scripts/tests/test_hooks.py (+223/-0)
src/provisioningserver/events.py (+7/-7)
Reviewer Review Type Date Requested Status
Jack Lloyd-Walters Approve
MAAS Lander Approve
Review via email: mp+465307@code.launchpad.net

Commit message

fix: only create HW sync events when hardware has changed

To post a comment you must log in.
Revision history for this message
MAAS Lander (maas-lander) wrote :

UNIT TESTS
-b fix_lp2051988_hardware_sync_updates_without_change lp:~cgrabowski/maas/+git/maas into -b master lp:~maas-committers/maas

STATUS: FAILED
LOG: http://maas-ci.internal:8080/job/maas-tester/5429/console
COMMIT: adacf7ed450000ebf4c25c309ccd9cb686ca167f

review: Needs Fixing
Revision history for this message
MAAS Lander (maas-lander) wrote :

UNIT TESTS
-b fix_lp2051988_hardware_sync_updates_without_change lp:~cgrabowski/maas/+git/maas into -b master lp:~maas-committers/maas

STATUS: SUCCESS
COMMIT: a221000b4208f3c00a6c96cb0f614a82a9826c68

review: Approve
Revision history for this message
Jack Lloyd-Walters (lloydwaltersj) wrote :

Looks good! +1

Revision history for this message
Jack Lloyd-Walters (lloydwaltersj) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/src/metadataserver/builtin_scripts/hooks.py b/src/metadataserver/builtin_scripts/hooks.py
2index 80b7f8e..431b2c0 100644
3--- a/src/metadataserver/builtin_scripts/hooks.py
4+++ b/src/metadataserver/builtin_scripts/hooks.py
5@@ -14,6 +14,7 @@ import logging
6 import operator
7 from operator import itemgetter
8 import re
9+from typing import Any
10
11 from django.core.exceptions import ValidationError
12 from django.db import IntegrityError, transaction
13@@ -368,6 +369,34 @@ def _process_system_information(node, system_data):
14 node.tags.add(tag)
15
16
17+def _device_diff(
18+ old: NodeDevice,
19+ new: dict[str, Any],
20+ new_hardware_type: HARDWARE_TYPE,
21+ numa_node: int,
22+ physical_blockdevice: PhysicalBlockDevice,
23+ physical_interface: PhysicalInterface,
24+ commissioning_driver: str,
25+ device_vpd: dict[str, Any],
26+) -> bool:
27+ if not old:
28+ return True
29+
30+ diff = not (
31+ old.hardware_type == new_hardware_type
32+ and old.numa_node == numa_node
33+ and old.physical_blockdevice == physical_blockdevice
34+ and old.physical_interface == physical_interface
35+ and old.vendor_name == new.get("vendor")
36+ and old.product_name == new.get("product")
37+ and old.commissioning_driver == commissioning_driver
38+ )
39+
40+ for vpd in NodeDeviceVPD.objects.filter(node_device=old):
41+ diff = diff or (device_vpd.get(vpd.key) != vpd.value)
42+ return diff
43+
44+
45 def _add_or_update_node_device(
46 node,
47 numa_nodes,
48@@ -404,18 +433,29 @@ def _add_or_update_node_device(
49
50 if key in old_devices:
51 node_device = old_devices.pop(key)
52- node_device.hardware_type = hardware_type
53- node_device.numa_node = numa_node
54- node_device.physical_block_device = storage_device
55- node_device.physical_interface = network_device
56- node_device.vendor_name = device.get("vendor")
57- node_device.product_name = device.get("product")
58- node_device.commissioning_driver = commissioning_driver
59- node_device.save()
60- _add_node_device_vpd(node_device, device_vpd)
61- _hardware_sync_node_device_notify(
62- node, node_device, HARDWARE_SYNC_ACTIONS.UPDATED
63- )
64+
65+ if _device_diff(
66+ node_device,
67+ device,
68+ hardware_type,
69+ numa_node,
70+ storage_device,
71+ network_device,
72+ commissioning_driver,
73+ device_vpd,
74+ ):
75+ node_device.hardware_type = hardware_type
76+ node_device.numa_node = numa_node
77+ node_device.physical_block_device = storage_device
78+ node_device.physical_interface = network_device
79+ node_device.vendor_name = device.get("vendor")
80+ node_device.product_name = device.get("product")
81+ node_device.commissioning_driver = commissioning_driver
82+ node_device.save()
83+ _add_node_device_vpd(node_device, device_vpd)
84+ _hardware_sync_node_device_notify(
85+ node, node_device, HARDWARE_SYNC_ACTIONS.UPDATED
86+ )
87 else:
88 pci_address = device.get("pci_address")
89 create_args = {
90@@ -609,6 +649,7 @@ def _process_lxd_resources(node, data):
91 resources = data["resources"]
92 update_deployment_resources = node.status == NODE_STATUS.DEPLOYED
93 # Update CPU details.
94+ old_cpu_count = node.cpu_count
95 node.cpu_count, node.cpu_speed, cpu_model, numa_nodes = parse_lxd_cpuinfo(
96 resources
97 )
98@@ -619,7 +660,8 @@ def _process_lxd_resources(node, data):
99 resources.get("memory", {}), numa_nodes
100 )
101
102- _hardware_sync_memory_notify(node, old_memory)
103+ if old_memory != node.memory:
104+ _hardware_sync_memory_notify(node, old_memory)
105
106 # Create or update NUMA nodes. This must be kept as a dictionary as not all
107 # systems maintain linear continuity. e.g the PPC64 machine in our CI uses
108@@ -657,7 +699,7 @@ def _process_lxd_resources(node, data):
109 )
110
111 # in the case of hardware sync for a cpu, a new one is added if this value has been updated
112- if not created:
113+ if created and old_cpu_count > 0:
114 _hardware_sync_cpu_notify(
115 node, cpu_model, HARDWARE_SYNC_ACTIONS.ADDED
116 )
117diff --git a/src/metadataserver/builtin_scripts/tests/test_hooks.py b/src/metadataserver/builtin_scripts/tests/test_hooks.py
118index 406c179..6393a15 100644
119--- a/src/metadataserver/builtin_scripts/tests/test_hooks.py
120+++ b/src/metadataserver/builtin_scripts/tests/test_hooks.py
121@@ -44,6 +44,7 @@ from maasserver.utils.orm import reload_object
122 from maastesting.testcase import MAASTestCase
123 import metadataserver.builtin_scripts.hooks as hooks_module
124 from metadataserver.builtin_scripts.hooks import (
125+ _device_diff,
126 _hardware_sync_block_device_notify,
127 _hardware_sync_cpu_notify,
128 _hardware_sync_memory_notify,
129@@ -60,6 +61,7 @@ from metadataserver.builtin_scripts.hooks import (
130 parse_interfaces,
131 process_lxd_results,
132 retag_node_for_hardware_by_modalias,
133+ update_node_devices,
134 update_node_fruid_metadata,
135 update_node_network_information,
136 )
137@@ -4086,6 +4088,42 @@ class TestUpdateNodePhysicalBlockDevices(MAASServerTestCase):
138 )
139
140
141+class TestDeviceDiff(MAASServerTestCase):
142+ def test_device_has_not_changed(self):
143+ old = factory.make_NodeDevice()
144+ new = {"vendor": old.vendor_name, "product": old.product_name}
145+ result = _device_diff(
146+ old,
147+ new,
148+ old.hardware_type,
149+ old.numa_node,
150+ old.physical_blockdevice,
151+ old.physical_interface,
152+ old.commissioning_driver,
153+ {},
154+ )
155+ self.assertFalse(result)
156+
157+ def test_device_has_changed(self):
158+ old = factory.make_NodeDevice()
159+ factory.make_NodeDeviceVPD(node_device=old)
160+ new = {"vendor": factory.make_name(), "product": factory.make_name()}
161+ new_numa_node = factory.make_NUMANode()
162+ new_block_device = factory.make_BlockDevice()
163+ new_interface = factory.make_Interface()
164+ result = _device_diff(
165+ old,
166+ new,
167+ HARDWARE_TYPE.NODE,
168+ new_numa_node,
169+ new_block_device,
170+ new_interface,
171+ old.commissioning_driver,
172+ {factory.make_name(): factory.make_name()},
173+ )
174+ self.assertTrue(result)
175+
176+
177 class TestUpdateNodeNetworkInformation(MAASServerTestCase):
178 """Tests the update_node_network_information function using data from LXD.
179
180@@ -4968,6 +5006,191 @@ class TestUpdateBootInterface(MAASServerTestCase):
181 self.assertIn("kernel-cmdline failed with status: 1", logger.output)
182
183
184+class TestUpdateNodeDevices(MAASServerTestCase):
185+ def test_update_node_devices_does_not_create_hwsync_event_if_nothing_changes(
186+ self,
187+ ):
188+ node = factory.make_Node()
189+ numa = factory.make_NUMANode(node=node)
190+ block_device = factory.make_PhysicalBlockDevice(node=node)
191+ node_device = factory.make_NodeDevice(
192+ node=node,
193+ physical_blockdevice=block_device,
194+ hardware_type=HARDWARE_TYPE.STORAGE,
195+ numa_node=numa,
196+ bus=NODE_DEVICE_BUS.PCIE,
197+ )
198+
199+ mock_hardware_sync_notify = self.patch(
200+ hooks_module, "_hardware_sync_notify"
201+ )
202+
203+ data = {
204+ "storage": {
205+ "disks": [
206+ {
207+ "id": block_device.name,
208+ "pci_address": node_device.pci_address,
209+ }
210+ ]
211+ },
212+ "pci": {
213+ "devices": [
214+ {
215+ "vendor_id": node_device.vendor_id,
216+ "product_id": node_device.product_id,
217+ "vendor": node_device.vendor_name,
218+ "product": node_device.product_name,
219+ "pci_address": node_device.pci_address,
220+ "driver": node_device.commissioning_driver,
221+ }
222+ ]
223+ },
224+ }
225+
226+ update_node_devices(
227+ node,
228+ data,
229+ [numa],
230+ storage_devices={node_device.pci_address: block_device},
231+ )
232+
233+ mock_hardware_sync_notify.assert_not_called()
234+
235+ def test_update_node_devices_creates_hwsync_event_when_device_added(self):
236+ node = factory.make_Node()
237+ numa = factory.make_NUMANode(node=node)
238+ block_device = factory.make_PhysicalBlockDevice(node=node)
239+
240+ mock_hardware_sync_notify = self.patch(
241+ hooks_module, "_hardware_sync_notify"
242+ )
243+
244+ pci_address = "0000:23:00.0"
245+ data = {
246+ "storage": {
247+ "disks": [
248+ {
249+ "id": block_device.name,
250+ "pci_address": pci_address,
251+ }
252+ ]
253+ },
254+ "pci": {
255+ "devices": [
256+ {
257+ "vendor_id": 8086,
258+ "product_id": 8086,
259+ "vendor": factory.make_name(),
260+ "product": factory.make_name(),
261+ "pci_address": pci_address,
262+ "driver": factory.make_name(),
263+ }
264+ ]
265+ },
266+ }
267+
268+ update_node_devices(
269+ node, data, [numa], storage_devices={pci_address: block_device}
270+ )
271+ mock_hardware_sync_notify.assert_called_once_with(
272+ "NODE_HARDWARE_SYNC_PCI_DEVICE",
273+ node,
274+ 0,
275+ "added",
276+ device_type="pci device",
277+ )
278+
279+ def test_update_node_devices_creates_hwsync_event_when_device_updated(
280+ self,
281+ ):
282+ node = factory.make_Node()
283+ numa = factory.make_NUMANode(node=node)
284+ block_device = factory.make_PhysicalBlockDevice(node=node)
285+ node_device = factory.make_NodeDevice(
286+ node=node,
287+ physical_blockdevice=block_device,
288+ hardware_type=HARDWARE_TYPE.STORAGE,
289+ numa_node=numa,
290+ bus=NODE_DEVICE_BUS.PCIE,
291+ )
292+ data = {
293+ "storage": {
294+ "disks": [
295+ {
296+ "id": block_device.name,
297+ "pci_address": node_device.pci_address,
298+ }
299+ ]
300+ },
301+ "pci": {
302+ "devices": [
303+ {
304+ "vendor_id": node_device.vendor_id,
305+ "product_id": node_device.product_id,
306+ "vendor": factory.make_name(),
307+ "product": node_device.product_name,
308+ "pci_address": node_device.pci_address,
309+ "driver": node_device.commissioning_driver,
310+ }
311+ ]
312+ },
313+ }
314+
315+ mock_hardware_sync_notify = self.patch(
316+ hooks_module, "_hardware_sync_notify"
317+ )
318+
319+ update_node_devices(
320+ node,
321+ data,
322+ [numa],
323+ storage_devices={node_device.pci_address: block_device},
324+ )
325+ mock_hardware_sync_notify.assert_called_once_with(
326+ "NODE_HARDWARE_SYNC_PCI_DEVICE",
327+ node,
328+ node_device.device_number,
329+ "updated",
330+ device_type="pci device",
331+ )
332+
333+ def test_update_node_devices_creates_hwsync_event_when_device_removed(
334+ self,
335+ ):
336+ node = factory.make_Node()
337+ numa = factory.make_NUMANode(node=node)
338+ block_device = factory.make_PhysicalBlockDevice(node=node)
339+ node_device = factory.make_NodeDevice(
340+ node=node,
341+ physical_blockdevice=block_device,
342+ hardware_type=HARDWARE_TYPE.STORAGE,
343+ numa_node=numa,
344+ bus=NODE_DEVICE_BUS.PCIE,
345+ )
346+
347+ mock_hardware_sync_notify = self.patch(
348+ hooks_module, "_hardware_sync_notify"
349+ )
350+
351+ data = {}
352+
353+ update_node_devices(
354+ node,
355+ data,
356+ [numa],
357+ storage_devices={node_device.pci_address: block_device},
358+ )
359+
360+ mock_hardware_sync_notify.assert_called_once_with(
361+ "NODE_HARDWARE_SYNC_PCI_DEVICE",
362+ node,
363+ node_device.device_number,
364+ "removed",
365+ device_type="pci device",
366+ )
367+
368+
369 class TestHardwareSyncNotify(MAASServerTestCase):
370 def setup(self):
371 details = EVENT_DETAILS[EVENT_TYPES.NODE_HARDWARE_SYNC_BLOCK_DEVICE]
372diff --git a/src/provisioningserver/events.py b/src/provisioningserver/events.py
373index bffd6ae..04b8605 100644
374--- a/src/provisioningserver/events.py
375+++ b/src/provisioningserver/events.py
376@@ -252,25 +252,25 @@ EVENT_DETAILS = {
377 description="Node exiting rescue mode failure", level=ERROR
378 ),
379 EVENT_TYPES.NODE_HARDWARE_SYNC_BMC: EventDetail(
380- description="Node BMC hardware sync state change", level=AUDIT
381+ description="Node BMC hardware sync state change", level=INFO
382 ),
383 EVENT_TYPES.NODE_HARDWARE_SYNC_BLOCK_DEVICE: EventDetail(
384- description="Node Block Device hardware sync state change", level=AUDIT
385+ description="Node Block Device hardware sync state change", level=INFO
386 ),
387 EVENT_TYPES.NODE_HARDWARE_SYNC_CPU: EventDetail(
388- description="Node CPU hardware sync state change", level=AUDIT
389+ description="Node CPU hardware sync state change", level=INFO
390 ),
391 EVENT_TYPES.NODE_HARDWARE_SYNC_INTERFACE: EventDetail(
392- description="Node Interface hardware sync state change", level=AUDIT
393+ description="Node Interface hardware sync state change", level=INFO
394 ),
395 EVENT_TYPES.NODE_HARDWARE_SYNC_MEMORY: EventDetail(
396- description="Node Memory hardware sync state change", level=AUDIT
397+ description="Node Memory hardware sync state change", level=INFO
398 ),
399 EVENT_TYPES.NODE_HARDWARE_SYNC_PCI_DEVICE: EventDetail(
400- description="Node PCI Device hardware sync state change", level=AUDIT
401+ description="Node PCI Device hardware sync state change", level=INFO
402 ),
403 EVENT_TYPES.NODE_HARDWARE_SYNC_USB_DEVICE: EventDetail(
404- description="Node USB Device hardware sync state chage", level=AUDIT
405+ description="Node USB Device hardware sync state chage", level=INFO
406 ),
407 EVENT_TYPES.REQUEST_NODE_START_COMMISSIONING: EventDetail(
408 description="User starting node commissioning", level=DEBUG

Subscribers

People subscribed via source and target branches