Merge ~cjwatson/launchpad-buildd:fix-nvidia-device-handling into launchpad-buildd:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 1dcbb8f658c753c99b7ac7b9adc295622e9abee4
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad-buildd:fix-nvidia-device-handling
Merge into: launchpad-buildd:master
Diff against target: 119 lines (+45/-31)
2 files modified
lpbuildd/target/lxd.py (+1/-1)
lpbuildd/target/tests/test_lxd.py (+44/-30)
Reviewer Review Type Date Requested Status
Jürgen Gmach Approve
Steve Langasek (community) Approve
Review via email: mp+443287@code.launchpad.net

Commit message

Fix nvidia device node handling for devtmpfs

Description of the change

https://code.launchpad.net/~vorlon/launchpad-buildd/+git/launchpad-buildd/+merge/442776 turned out to break the NVIDIA GPU case on dogfood: mounting devtmpfs already creates the device nodes (although the kernel team thinks this shouldn't be relied upon), so let's guard against mknod failing due to that. (`path_exists` is the equivalent of `test -e` within the container.)

To post a comment you must log in.
Revision history for this message
Steve Langasek (vorlon) :
review: Approve
Revision history for this message
Jürgen Gmach (jugmac00) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lpbuildd/target/lxd.py b/lpbuildd/target/lxd.py
2index 583b9ca..7b71a88 100644
3--- a/lpbuildd/target/lxd.py
4+++ b/lpbuildd/target/lxd.py
5@@ -615,7 +615,7 @@ class LXD(Backend):
6 for path in self._nvidia_container_paths:
7 if path.startswith("/dev/"):
8 st = os.stat(path)
9- if stat.S_ISCHR(st.st_mode):
10+ if stat.S_ISCHR(st.st_mode) and not self.path_exists(path):
11 self.run(
12 [
13 "mknod",
14diff --git a/lpbuildd/target/tests/test_lxd.py b/lpbuildd/target/tests/test_lxd.py
15index 22e9eb4..2cbe972 100644
16--- a/lpbuildd/target/tests/test_lxd.py
17+++ b/lpbuildd/target/tests/test_lxd.py
18@@ -449,7 +449,11 @@ class TestLXD(TestCase):
19 # XXX cjwatson 2022-08-25: Refactor this to use some more sensible kind
20 # of test parameterization.
21 def test_start(
22- self, arch="amd64", unmounts_cpuinfo=False, gpu_nvidia=False
23+ self,
24+ arch="amd64",
25+ unmounts_cpuinfo=False,
26+ gpu_nvidia=False,
27+ gpu_nvidia_device_nodes_exist=False,
28 ):
29 self.fakeFS()
30 DM_BLOCK_MAJOR = random.randrange(128, 255)
31@@ -482,6 +486,9 @@ class TestLXD(TestCase):
32 os.mknod(
33 "/dev/nvidiactl", stat.S_IFCHR | 0o666, os.makedev(195, 255)
34 )
35+ if gpu_nvidia_device_nodes_exist:
36+ existing_files["/dev/nvidia0"] = []
37+ existing_files["/dev/nvidiactl"] = []
38 gpu_nvidia_paths = [
39 "/dev/nvidia0",
40 "/dev/nvidiactl",
41@@ -612,35 +619,36 @@ class TestLXD(TestCase):
42 )
43 )
44 if gpu_nvidia:
45- expected_args.extend(
46- [
47- Equals(
48- lxc
49- + [
50- "mknod",
51- "-m",
52- "0666",
53- "/dev/nvidia0",
54- "c",
55- "195",
56- "0",
57- ]
58- ),
59- Equals(
60- lxc
61- + [
62- "mknod",
63- "-m",
64- "0666",
65- "/dev/nvidiactl",
66- "c",
67- "195",
68- "255",
69- ]
70- ),
71- Equals(lxc + ["/sbin/ldconfig"]),
72- ]
73- )
74+ if not gpu_nvidia_device_nodes_exist:
75+ expected_args.extend(
76+ [
77+ Equals(
78+ lxc
79+ + [
80+ "mknod",
81+ "-m",
82+ "0666",
83+ "/dev/nvidia0",
84+ "c",
85+ "195",
86+ "0",
87+ ]
88+ ),
89+ Equals(
90+ lxc
91+ + [
92+ "mknod",
93+ "-m",
94+ "0666",
95+ "/dev/nvidiactl",
96+ "c",
97+ "195",
98+ "255",
99+ ]
100+ ),
101+ ]
102+ )
103+ expected_args.append(Equals(lxc + ["/sbin/ldconfig"]))
104 expected_args.extend(
105 [
106 Equals(
107@@ -809,6 +817,12 @@ class TestLXD(TestCase):
108 def test_start_gpu_nvidia(self):
109 self.test_start(gpu_nvidia=True)
110
111+ def test_start_gpu_nvidia_device_nodes_exist(self):
112+ # Starting a container with NVIDIA GPU support works even if
113+ # mounting devtmpfs inside the container causes the device nodes to
114+ # exist.
115+ self.test_start(gpu_nvidia=True, gpu_nvidia_device_nodes_exist=True)
116+
117 def test_run(self):
118 processes_fixture = self.useFixture(FakeProcesses())
119 processes_fixture.add(lambda _: {}, name="lxc")

Subscribers

People subscribed via source and target branches