Merge ~dbungert/curtin:flock-ex into curtin:master

Proposed by Dan Bungert
Status: Merged
Merged at revision: 204562c277b096229ca20ba546149deef1d85e38
Proposed branch: ~dbungert/curtin:flock-ex
Merge into: curtin:master
Diff against target: 328 lines (+132/-47)
6 files modified
curtin/block/zfs.py (+29/-20)
curtin/commands/block_meta.py (+23/-20)
curtin/util.py (+37/-0)
tests/unittests/test_block_zfs.py (+7/-7)
tests/unittests/test_commands_block_meta.py (+3/-0)
tests/unittests/test_util.py (+33/-0)
Reviewer Review Type Date Requested Status
Server Team CI bot continuous-integration Approve
Michael Hudson-Doyle Approve
Review via email: mp+462753@code.launchpad.net

Commit message

do not squash

Description of the change

Add exclusive flocks to known problem areas:

* ZFS encryption LP: #2057661
* wipefs LP: #2016860

To post a comment you must log in.
Revision history for this message
Dan Bungert (dbungert) wrote :

poorly vm tested at this point, opening the MP for discussion purposes

Revision history for this message
Server Team CI bot (server-team-bot) wrote :
review: Approve (continuous-integration)
Revision history for this message
Server Team CI bot (server-team-bot) wrote :
review: Approve (continuous-integration)
Revision history for this message
Dan Bungert (dbungert) wrote :

I believe this is ready for review.

* Frank has helped test, as listed at https://bugs.launchpad.net/ubuntu-z-systems/+bug/2016860 - on the systems most likely to encounter errors when attempting to `wipefs`
* I've done testing as well with several VM iterations each on lvm, raid1, and zfs and all appear to be ok

Surely we have not covered all cases but I think these will help.

Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

This looks fine as far as it goes -- I suspect that there will be other places that need locking too (like disk_handler_v2? but we don't use v2 on server yet...) but that shouldn't be an obstacle to getting this landed.

review: Approve
Revision history for this message
Server Team CI bot (server-team-bot) wrote :
review: Approve (continuous-integration)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/curtin/block/zfs.py b/curtin/block/zfs.py
2index 767ad30..ccdddbb 100644
3--- a/curtin/block/zfs.py
4+++ b/curtin/block/zfs.py
5@@ -8,6 +8,7 @@ import os
6 import tempfile
7 import secrets
8 import shutil
9+from contextlib import ExitStack
10 from pathlib import Path
11
12 from curtin.config import merge_config
13@@ -31,11 +32,12 @@ ZFS_UNSUPPORTED_RELEASES = ['precise', 'trusty']
14
15
16 class ZPoolEncryption:
17- def __init__(self, poolname, style, keyfile):
18+ def __init__(self, vdevs, poolname, style, keyfile):
19 self.poolname = poolname
20 self.style = style
21 self.keyfile = keyfile
22 self.system_key = None
23+ self.vdevs = vdevs
24
25 def get_system_key(self):
26 if self.system_key is None:
27@@ -88,24 +90,31 @@ class ZPoolEncryption:
28 self.poolname, "keystore", {"encryption": "off"}, keystore_size,
29 )
30
31- # cryptsetup format and open this keystore
32- keystore_volume = f"/dev/zvol/{self.poolname}/keystore"
33- cmd = ["cryptsetup", "luksFormat", keystore_volume, self.keyfile]
34- util.subp(cmd)
35- dm_name = f"keystore-{self.poolname}"
36- cmd = [
37- "cryptsetup", "open", "--type", "luks", keystore_volume, dm_name,
38- "--key-file", self.keyfile,
39- ]
40- util.subp(cmd)
41-
42- # format as ext4, mount it, move the previously-generated system key
43- dmpath = f"/dev/mapper/{dm_name}"
44- cmd = ["mke2fs", "-t", "ext4", dmpath, "-L", dm_name]
45- util.subp(cmd, capture=True)
46-
47- keystore_root = f"/run/keystore/{self.poolname}"
48- with util.mount(dmpath, keystore_root):
49+ with ExitStack() as es:
50+ for vdev in self.vdevs:
51+ es.enter_context(util.FlockEx(vdev))
52+
53+ # cryptsetup format and open this keystore
54+ keystore_volume = f"/dev/zvol/{self.poolname}/keystore"
55+ cmd = ["cryptsetup", "luksFormat", keystore_volume, self.keyfile]
56+ util.subp(cmd)
57+ dm_name = f"keystore-{self.poolname}"
58+ cmd = [
59+ "cryptsetup", "open", "--type", "luks", keystore_volume,
60+ dm_name, "--key-file", self.keyfile,
61+ ]
62+ util.subp(cmd)
63+
64+ with ExitStack() as es:
65+ # format as ext4, mount it, move the previously-generated systemkey
66+ dmpath = f"/dev/mapper/{dm_name}"
67+ es.enter_context(util.FlockEx(dmpath))
68+ cmd = ["mke2fs", "-t", "ext4", dmpath, "-L", dm_name]
69+ util.subp(cmd, capture=True)
70+
71+ keystore_root = f"/run/keystore/{self.poolname}"
72+
73+ es.enter_context(util.mount(dmpath, keystore_root))
74 ks_system_key = f"{keystore_root}/system.key"
75 shutil.move(self.system_key, ks_system_key)
76 Path(ks_system_key).chmod(0o400)
77@@ -256,7 +265,7 @@ def zpool_create(poolname, vdevs, storage_config=None, context=None,
78 if zfs_properties:
79 merge_config(zfs_cfg, zfs_properties)
80
81- encryption = ZPoolEncryption(poolname, encryption_style, keyfile)
82+ encryption = ZPoolEncryption(vdevs, poolname, encryption_style, keyfile)
83 encryption.validate()
84 if encryption.in_use():
85 merge_config(zfs_cfg, encryption.dataset_properties())
86diff --git a/curtin/commands/block_meta.py b/curtin/commands/block_meta.py
87index 58889b9..dc26a8c 100644
88--- a/curtin/commands/block_meta.py
89+++ b/curtin/commands/block_meta.py
90@@ -795,26 +795,29 @@ def disk_handler(info, storage_config, context):
91 "table" % disk)
92 else:
93 # wipe the disk and create the partition table if instructed to do so
94- if config.value_as_boolean(info.get('wipe')):
95- block.wipe_volume(disk, mode=info.get('wipe'))
96- if config.value_as_boolean(ptable):
97- LOG.info("labeling device: '%s' with '%s' partition table", disk,
98- ptable)
99- if ptable == "gpt":
100- # Wipe both MBR and GPT that may be present on the disk.
101- # N.B.: wipe_volume wipes 1M at front and end of the disk.
102- # This could destroy disk data in filesystems that lived
103- # there.
104- block.wipe_volume(disk, mode='superblock')
105- elif ptable in _dos_names:
106- util.subp(["parted", disk, "--script", "mklabel", "msdos"])
107- elif ptable == "vtoc":
108- util.subp(["fdasd", "-c", "/dev/null", disk])
109- holders = clear_holders.get_holders(disk)
110- if len(holders) > 0:
111- LOG.info('Detected block holders on disk %s: %s', disk, holders)
112- clear_holders.clear_holders(disk)
113- clear_holders.assert_clear(disk)
114+ with util.FlockEx(disk):
115+ if config.value_as_boolean(info.get('wipe')):
116+ block.wipe_volume(disk, mode=info.get('wipe'))
117+ if config.value_as_boolean(ptable):
118+ LOG.info("labeling device: '%s' with '%s' partition table",
119+ disk, ptable)
120+ if ptable == "gpt":
121+ # Wipe both MBR and GPT that may be present on the disk.
122+ # N.B.: wipe_volume wipes 1M at front and end of the disk.
123+ # This could destroy disk data in filesystems that lived
124+ # there.
125+ block.wipe_volume(disk, mode='superblock')
126+ elif ptable in _dos_names:
127+ util.subp(["parted", disk, "--script", "mklabel", "msdos"])
128+ elif ptable == "vtoc":
129+ util.subp(["fdasd", "-c", "/dev/null", disk])
130+ holders = clear_holders.get_holders(disk)
131+ if len(holders) > 0:
132+ LOG.info(
133+ 'Detected block holders on disk %s: %s', disk, holders
134+ )
135+ clear_holders.clear_holders(disk)
136+ clear_holders.assert_clear(disk)
137
138 # Make the name if needed
139 if info.get('name'):
140diff --git a/curtin/util.py b/curtin/util.py
141index dcf9422..66735e8 100644
142--- a/curtin/util.py
143+++ b/curtin/util.py
144@@ -4,6 +4,7 @@ import argparse
145 import collections
146 from contextlib import contextmanager, suppress
147 import errno
148+import fcntl
149 import json
150 import os
151 import platform
152@@ -1420,4 +1421,40 @@ def not_exclusive_retry(fun, *args, **kwargs):
153 time.sleep(1)
154 return fun(*args, **kwargs)
155
156+
157+class FlockEx:
158+ """Acquire an exclusive lock on device.
159+ See https://systemd.io/BLOCK_DEVICE_LOCKING/ for details and
160+ motivation, which has the summary:
161+ Use BSD file locks (flock(2)) on block device nodes to synchronize
162+ access for partitioning and file system formatting tools.
163+
164+ params: device: block device node to exclusively lock
165+ """
166+ def __init__(self, device, timeout=60):
167+ self.device = device
168+ self.timeout = timeout
169+ self.retries = 60
170+
171+ def __enter__(self):
172+ self.lock_fd = os.open(self.device, os.O_RDONLY)
173+
174+ LOG.debug(f"Acquiring fcntl LOCK_EX on {self.device}")
175+
176+ for i in range(self.retries):
177+ try:
178+ fcntl.flock(self.lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
179+ return
180+ except OSError as ex:
181+ LOG.debug(f"Try {i}: lock acquisition failed with {ex}")
182+ time.sleep(self.timeout / self.retries)
183+ else:
184+ raise TimeoutError("Failed to acquire LOCK_EX on {self.device}")
185+
186+ def __exit__(self, *args):
187+ with suppress(Exception):
188+ fcntl.flock(self.lock_fd, fcntl.LOCK_UN)
189+ with suppress(Exception):
190+ os.close(self.lock_fd)
191+
192 # vi: ts=4 expandtab syntax=python
193diff --git a/tests/unittests/test_block_zfs.py b/tests/unittests/test_block_zfs.py
194index ef24b5d..23dba50 100644
195--- a/tests/unittests/test_block_zfs.py
196+++ b/tests/unittests/test_block_zfs.py
197@@ -614,7 +614,7 @@ class TestZfsKeystore(CiTestCase):
198 def test_create_system_key(self, m_token_bytes):
199 m_token_bytes.return_value = key = self.random_string()
200 m_open = mock.mock_open()
201- zpe = zfs.ZPoolEncryption(None, None, None)
202+ zpe = zfs.ZPoolEncryption(None, None, None, None)
203 with mock.patch("builtins.open", m_open):
204 system_key = zpe.get_system_key()
205 self.assertIsNotNone(system_key)
206@@ -622,7 +622,7 @@ class TestZfsKeystore(CiTestCase):
207 m_open.return_value.write.assert_called_with(key)
208
209 def test_validate_good(self):
210- zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", self.keyfile)
211+ zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", self.keyfile)
212 try:
213 zpe.validate()
214 except Exception:
215@@ -630,22 +630,22 @@ class TestZfsKeystore(CiTestCase):
216 self.assertTrue(zpe.in_use())
217
218 def test_validate_missing_pool(self):
219- zpe = zfs.ZPoolEncryption(None, "luks_keystore", self.keyfile)
220+ zpe = zfs.ZPoolEncryption(None, None, "luks_keystore", self.keyfile)
221 with self.assertRaises(ValueError):
222 zpe.validate()
223
224 def test_validate_missing_key(self):
225- zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", None)
226+ zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", None)
227 with self.assertRaises(ValueError):
228 zpe.validate()
229
230 def test_validate_missing_key_file(self):
231- zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", "not-exist")
232+ zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", "not-exist")
233 with self.assertRaises(ValueError):
234 zpe.validate()
235
236 def test_validate_unencrypted_ok(self):
237- zpe = zfs.ZPoolEncryption("pool1", None, None)
238+ zpe = zfs.ZPoolEncryption(None, "pool1", None, None)
239 try:
240 zpe.validate()
241 except Exception:
242@@ -653,7 +653,7 @@ class TestZfsKeystore(CiTestCase):
243 self.assertFalse(zpe.in_use())
244
245 def test_dataset_properties(self):
246- zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", self.keyfile)
247+ zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", self.keyfile)
248 keyloc = self.random_string()
249 with mock.patch.object(zpe, "get_system_key", return_value=keyloc):
250 props = zpe.dataset_properties()
251diff --git a/tests/unittests/test_commands_block_meta.py b/tests/unittests/test_commands_block_meta.py
252index 78cba06..7320fc1 100644
253--- a/tests/unittests/test_commands_block_meta.py
254+++ b/tests/unittests/test_commands_block_meta.py
255@@ -5,6 +5,7 @@ from collections import OrderedDict
256 import copy
257 from unittest.mock import (
258 call,
259+ MagicMock,
260 Mock,
261 patch,
262 )
263@@ -685,6 +686,7 @@ class TestBlockMeta(CiTestCase):
264 self.m_mp.is_mpath_device.return_value = False
265 self.m_mp.is_mpath_member.return_value = False
266
267+ @patch('curtin.commands.block_meta.util.FlockEx', new=MagicMock())
268 def test_disk_handler_calls_clear_holder(self):
269 info = self.storage_config.get('sda')
270 disk = info.get('path')
271@@ -1789,6 +1791,7 @@ class TestDiskHandler(CiTestCase):
272 m_getpath.assert_called_with(info['id'], storage_config)
273 m_block.get_part_table_type.assert_called_with(disk_path)
274
275+ @patch('curtin.commands.block_meta.util.FlockEx', new=MagicMock())
276 @patch('curtin.commands.block_meta.util.subp')
277 @patch('curtin.commands.block_meta.clear_holders.get_holders')
278 @patch('curtin.commands.block_meta.get_path_to_storage_volume')
279diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
280index 93dde91..90303e9 100644
281--- a/tests/unittests/test_util.py
282+++ b/tests/unittests/test_util.py
283@@ -1,7 +1,9 @@
284 # This file is part of curtin. See LICENSE file for copyright and license info.
285
286+from pathlib import Path
287 from unittest import mock
288 import errno
289+import fcntl
290 import os
291 import stat
292 from textwrap import dedent
293@@ -1406,4 +1408,35 @@ class TestEFIVarFSBug(CiTestCase):
294 util.EFIVarFSBug.apply_workaround_if_affected()
295 apply_wa.assert_not_called()
296
297+
298+class TestFlockEx(CiTestCase):
299+ def setUp(self):
300+ self.tmpf = self.tmp_path('testfile')
301+ Path(self.tmpf).touch()
302+
303+ def test_acquires_lock(self):
304+ with util.FlockEx(self.tmpf):
305+ with open(self.tmpf) as fp:
306+ with self.assertRaises(BlockingIOError):
307+ fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
308+
309+ def test_releases_lock(self):
310+ with util.FlockEx(self.tmpf):
311+ pass
312+
313+ with open(self.tmpf) as fp:
314+ fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
315+ fcntl.flock(fp, fcntl.LOCK_UN)
316+
317+ def test_timeout(self):
318+ with open(self.tmpf) as fp:
319+ fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
320+
321+ with self.assertRaises(TimeoutError):
322+ with util.FlockEx(self.tmpf, timeout=.01):
323+ pass
324+
325+ fcntl.flock(fp, fcntl.LOCK_UN)
326+
327+
328 # vi: ts=4 expandtab syntax=python

Subscribers

People subscribed via source and target branches