Merge ~smoser/cloud-init:feature/ds-init into cloud-init:master
- Git
- lp:~smoser/cloud-init
- feature/ds-init
- Merge into master
Proposed by
Scott Moser
Status: | Merged |
---|---|
Merged at revision: | 9e904bbc3336b96475bfd00fb3bf1262ae4de49f |
Proposed branch: | ~smoser/cloud-init:feature/ds-init |
Merge into: | cloud-init:master |
Diff against target: |
381 lines (+136/-107) 6 files modified
cloudinit/cmd/main.py (+3/-0) cloudinit/config/cc_mounts.py (+9/-3) cloudinit/sources/DataSourceAzure.py (+104/-95) cloudinit/sources/__init__.py (+12/-0) cloudinit/stages.py (+7/-0) tests/unittests/test_datasource/test_azure.py (+1/-9) |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
cloud-init Commiters | Pending | ||
Review via email: mp+311205@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Ryan Harper (raharper) : | # |
There was an error fetching revisions from git servers. Please try again in a few minutes. If the problem persists, contact Launchpad support.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py |
2 | index 83eb02c..fe37075 100644 |
3 | --- a/cloudinit/cmd/main.py |
4 | +++ b/cloudinit/cmd/main.py |
5 | @@ -326,6 +326,9 @@ def main_init(name, args): |
6 | util.logexc(LOG, "Failed to re-adjust output redirection!") |
7 | logging.setupLogging(mods.cfg) |
8 | |
9 | + # give the activated datasource a chance to adjust |
10 | + init.activate_datasource() |
11 | + |
12 | # Stage 10 |
13 | return (init.datasource, run_module_section(mods, name, name)) |
14 | |
15 | diff --git a/cloudinit/config/cc_mounts.py b/cloudinit/config/cc_mounts.py |
16 | index dfc4b59..452c9e8 100644 |
17 | --- a/cloudinit/config/cc_mounts.py |
18 | +++ b/cloudinit/config/cc_mounts.py |
19 | @@ -312,7 +312,8 @@ def handle_swapcfg(swapcfg): |
20 | def handle(_name, cfg, cloud, log, _args): |
21 | # fs_spec, fs_file, fs_vfstype, fs_mntops, fs-freq, fs_passno |
22 | def_mnt_opts = "defaults,nobootwait" |
23 | - if cloud.distro.uses_systemd(): |
24 | + uses_systemd = cloud.distro.uses_systemd() |
25 | + if uses_systemd: |
26 | def_mnt_opts = "defaults,nofail,x-systemd.requires=cloud-init.service" |
27 | |
28 | defvals = [None, None, "auto", def_mnt_opts, "0", "2"] |
29 | @@ -447,7 +448,12 @@ def handle(_name, cfg, cloud, log, _args): |
30 | except Exception: |
31 | util.logexc(log, "Failed to make '%s' config-mount", d) |
32 | |
33 | + activate_cmd = ["mount", "-a"] |
34 | + if uses_systemd: |
35 | + activate_cmd = ["systemctl", "daemon-reload"] |
36 | + fmt = "Activate mounts: %s:" + ' '.join(activate_cmd) |
37 | try: |
38 | - util.subp(("mount", "-a")) |
39 | + util.subp(activate_cmd) |
40 | + LOG.debug(fmt, "PASS") |
41 | except util.ProcessExecutionError: |
42 | - util.logexc(log, "Activating mounts via 'mount -a' failed") |
43 | + util.logexc(log, fmt, "FAIL") |
44 | diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py |
45 | index b802b03..22f9004 100644 |
46 | --- a/cloudinit/sources/DataSourceAzure.py |
47 | +++ b/cloudinit/sources/DataSourceAzure.py |
48 | @@ -19,7 +19,6 @@ |
49 | import base64 |
50 | import contextlib |
51 | import crypt |
52 | -import fnmatch |
53 | from functools import partial |
54 | import os |
55 | import os.path |
56 | @@ -28,7 +27,6 @@ from xml.dom import minidom |
57 | import xml.etree.ElementTree as ET |
58 | |
59 | from cloudinit import log as logging |
60 | -from cloudinit.settings import PER_ALWAYS |
61 | from cloudinit import sources |
62 | from cloudinit.sources.helpers.azure import get_metadata_from_fabric |
63 | from cloudinit import util |
64 | @@ -42,6 +40,9 @@ BOUNCE_COMMAND = [ |
65 | 'sh', '-xc', |
66 | "i=$interface; x=0; ifdown $i || x=$?; ifup $i || x=$?; exit $x" |
67 | ] |
68 | +# azure systems will always have a resource disk, and 66-azure-ephemeral.rules |
69 | +# ensures that it gets linked to this path. |
70 | +RESOURCE_DISK_PATH = '/dev/disk/cloud/azure_resource' |
71 | |
72 | BUILTIN_DS_CONFIG = { |
73 | 'agent_command': AGENT_START, |
74 | @@ -53,7 +54,7 @@ BUILTIN_DS_CONFIG = { |
75 | 'command': BOUNCE_COMMAND, |
76 | 'hostname_command': 'hostname', |
77 | }, |
78 | - 'disk_aliases': {'ephemeral0': '/dev/sdb'}, |
79 | + 'disk_aliases': {'ephemeral0': RESOURCE_DISK_PATH}, |
80 | 'dhclient_lease_file': '/var/lib/dhcp/dhclient.eth0.leases', |
81 | } |
82 | |
83 | @@ -245,15 +246,6 @@ class DataSourceAzureNet(sources.DataSource): |
84 | self.metadata['instance-id'] = util.read_dmi_data('system-uuid') |
85 | self.metadata.update(fabric_data) |
86 | |
87 | - found_ephemeral = find_fabric_formatted_ephemeral_disk() |
88 | - if found_ephemeral: |
89 | - self.ds_cfg['disk_aliases']['ephemeral0'] = found_ephemeral |
90 | - LOG.debug("using detected ephemeral0 of %s", found_ephemeral) |
91 | - |
92 | - cc_modules_override = support_new_ephemeral(self.sys_cfg) |
93 | - if cc_modules_override: |
94 | - self.cfg['cloud_init_modules'] = cc_modules_override |
95 | - |
96 | return True |
97 | |
98 | def device_name_to_device(self, name): |
99 | @@ -266,97 +258,104 @@ class DataSourceAzureNet(sources.DataSource): |
100 | # quickly (local check only) if self.instance_id is still valid |
101 | return sources.instance_id_matches_system_uuid(self.get_instance_id()) |
102 | |
103 | - |
104 | -def count_files(mp): |
105 | - return len(fnmatch.filter(os.listdir(mp), '*[!cdrom]*')) |
106 | + def activate(self, cfg, is_new_instance): |
107 | + address_ephemeral_resize(is_new_instance=is_new_instance) |
108 | + return |
109 | |
110 | |
111 | -def find_fabric_formatted_ephemeral_part(): |
112 | - """ |
113 | - Locate the first fabric formatted ephemeral device. |
114 | - """ |
115 | - potential_locations = ['/dev/disk/cloud/azure_resource-part1', |
116 | - '/dev/disk/azure/resource-part1'] |
117 | - device_location = None |
118 | - for potential_location in potential_locations: |
119 | - if os.path.exists(potential_location): |
120 | - device_location = potential_location |
121 | +def can_dev_be_reformatted(devpath): |
122 | + # determine if the ephemeral block device path devpath |
123 | + # is newly formatted after a resize. |
124 | + if not os.path.exists(devpath): |
125 | + return False, 'device %s does not exist' % devpath |
126 | + |
127 | + realpath = os.path.realpath(devpath) |
128 | + LOG.debug('Resolving realpath of %s -> %s', devpath, realpath) |
129 | + |
130 | + # it is possible that the block device might exist, but the kernel |
131 | + # have not yet read the partition table and sent events. we udevadm settle |
132 | + # to hope to resolve that. Better here would probably be to test and see, |
133 | + # and then settle if we didn't find anything and try again. |
134 | + if util.which("udevadm"): |
135 | + util.subp(["udevadm", "settle"]) |
136 | + |
137 | + # devpath of /dev/sd[a-z] or /dev/disk/cloud/azure_resource |
138 | + # where partitions are "<devpath>1" or "<devpath>-part1" or "<devpath>p1" |
139 | + part1path = None |
140 | + for suff in ("-part", "p", ""): |
141 | + cand = devpath + suff + "1" |
142 | + if os.path.exists(cand): |
143 | + if os.path.exists(devpath + suff + "2"): |
144 | + msg = ('device %s had more than 1 partition: %s, %s' % |
145 | + devpath, cand, devpath + suff + "2") |
146 | + return False, msg |
147 | + part1path = cand |
148 | break |
149 | - if device_location is None: |
150 | - LOG.debug("no azure resource disk partition path found") |
151 | - return None |
152 | - ntfs_devices = util.find_devs_with("TYPE=ntfs") |
153 | - real_device = os.path.realpath(device_location) |
154 | - if real_device in ntfs_devices: |
155 | - return device_location |
156 | - LOG.debug("'%s' existed (%s) but was not ntfs formated", |
157 | - device_location, real_device) |
158 | - return None |
159 | - |
160 | - |
161 | -def find_fabric_formatted_ephemeral_disk(): |
162 | - """ |
163 | - Get the ephemeral disk. |
164 | - """ |
165 | - part_dev = find_fabric_formatted_ephemeral_part() |
166 | - if part_dev: |
167 | - return part_dev.split('-')[0] |
168 | - return None |
169 | |
170 | + if part1path is None: |
171 | + return False, 'device %s was not partitioned' % devpath |
172 | |
173 | -def support_new_ephemeral(cfg): |
174 | - """ |
175 | - Windows Azure makes ephemeral devices ephemeral to boot; a ephemeral device |
176 | - may be presented as a fresh device, or not. |
177 | + real_part1path = os.path.realpath(part1path) |
178 | + ntfs_devices = util.find_devs_with("TYPE=ntfs", no_cache=True) |
179 | + LOG.debug('ntfs_devices found = %s', ntfs_devices) |
180 | + if real_part1path not in ntfs_devices: |
181 | + msg = ('partition 1 (%s -> %s) on device %s was not ntfs formatted' % |
182 | + (part1path, real_part1path, devpath)) |
183 | + return False, msg |
184 | |
185 | - Since the knowledge of when a disk is supposed to be plowed under is |
186 | - specific to Windows Azure, the logic resides here in the datasource. When a |
187 | - new ephemeral device is detected, cloud-init overrides the default |
188 | - frequency for both disk-setup and mounts for the current boot only. |
189 | - """ |
190 | - device = find_fabric_formatted_ephemeral_part() |
191 | - if not device: |
192 | - LOG.debug("no default fabric formated ephemeral0.1 found") |
193 | - return None |
194 | - LOG.debug("fabric formated ephemeral0.1 device at %s", device) |
195 | + def count_files(mp): |
196 | + ignored = {'dataloss_warning_readme.txt'} |
197 | + return len([f for f in os.listdir(mp) if f.lower() not in ignored]) |
198 | |
199 | - file_count = 0 |
200 | + bmsg = ('partition 1 (%s -> %s) on device %s was ntfs formatted' % |
201 | + (part1path, real_part1path, devpath)) |
202 | try: |
203 | - file_count = util.mount_cb(device, count_files) |
204 | - except Exception: |
205 | - return None |
206 | - LOG.debug("fabric prepared ephmeral0.1 has %s files on it", file_count) |
207 | - |
208 | - if file_count >= 1: |
209 | - LOG.debug("fabric prepared ephemeral0.1 will be preserved") |
210 | - return None |
211 | + file_count = util.mount_cb(part1path, count_files) |
212 | + except util.MountFailedError as e: |
213 | + return False, bmsg + ' but mount of %s failed: %s' % (part1path, e) |
214 | + |
215 | + if file_count != 0: |
216 | + return False, bmsg + ' but had %d files on it.' % file_count |
217 | + |
218 | + return True, bmsg + ' and had no important files. Safe for reformatting.' |
219 | + |
220 | + |
221 | +def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120, |
222 | + is_new_instance=False): |
223 | + # wait for ephemeral disk to come up |
224 | + naplen = .2 |
225 | + missing = wait_for_files([devpath], maxwait=maxwait, naplen=naplen, |
226 | + log_pre="Azure ephemeral disk: ") |
227 | + |
228 | + if missing: |
229 | + LOG.warn("ephemeral device '%s' did not appear after %d seconds.", |
230 | + devpath, maxwait) |
231 | + return |
232 | + |
233 | + result = False |
234 | + msg = None |
235 | + if is_new_instance: |
236 | + result, msg = (True, "First instance boot.") |
237 | else: |
238 | - # if device was already mounted, then we need to unmount it |
239 | - # race conditions could allow for a check-then-unmount |
240 | - # to have a false positive. so just unmount and then check. |
241 | - try: |
242 | - util.subp(['umount', device]) |
243 | - except util.ProcessExecutionError as e: |
244 | - if device in util.mounts(): |
245 | - LOG.warn("Failed to unmount %s, will not reformat.", device) |
246 | - LOG.debug("Failed umount: %s", e) |
247 | - return None |
248 | - |
249 | - LOG.debug("cloud-init will format ephemeral0.1 this boot.") |
250 | - LOG.debug("setting disk_setup and mounts modules 'always' for this boot") |
251 | - |
252 | - cc_modules = cfg.get('cloud_init_modules') |
253 | - if not cc_modules: |
254 | - return None |
255 | - |
256 | - mod_list = [] |
257 | - for mod in cc_modules: |
258 | - if mod in ("disk_setup", "mounts"): |
259 | - mod_list.append([mod, PER_ALWAYS]) |
260 | - LOG.debug("set module '%s' to 'always' for this boot", mod) |
261 | + result, msg = can_dev_be_reformatted(devpath) |
262 | + |
263 | + LOG.debug("reformattable=%s: %s" % (result, msg)) |
264 | + if not result: |
265 | + return |
266 | + |
267 | + for mod in ['disk_setup', 'mounts']: |
268 | + sempath = '/var/lib/cloud/instance/sem/config_' + mod |
269 | + bmsg = 'Marker "%s" for module "%s"' % (sempath, mod) |
270 | + if os.path.exists(sempath): |
271 | + try: |
272 | + os.unlink(sempath) |
273 | + LOG.debug(bmsg + " removed.") |
274 | + except Exception as e: |
275 | + # python3 throws FileNotFoundError, python2 throws OSError |
276 | + LOG.warn(bmsg + ": remove failed! (%s)" % e) |
277 | else: |
278 | - mod_list.append(mod) |
279 | - return mod_list |
280 | + LOG.debug(bmsg + " did not exist.") |
281 | + return |
282 | |
283 | |
284 | def perform_hostname_bounce(hostname, cfg, prev_hostname): |
285 | @@ -408,15 +407,25 @@ def pubkeys_from_crt_files(flist): |
286 | return pubkeys |
287 | |
288 | |
289 | -def wait_for_files(flist, maxwait=60, naplen=.5): |
290 | +def wait_for_files(flist, maxwait=60, naplen=.5, log_pre=""): |
291 | need = set(flist) |
292 | waited = 0 |
293 | - while waited < maxwait: |
294 | + while True: |
295 | need -= set([f for f in need if os.path.exists(f)]) |
296 | if len(need) == 0: |
297 | + LOG.debug("%sAll files appeared after %s seconds: %s", |
298 | + log_pre, waited, flist) |
299 | return [] |
300 | + if waited == 0: |
301 | + LOG.info("%sWaiting up to %s seconds for the following files: %s", |
302 | + log_pre, maxwait, flist) |
303 | + if waited + naplen > maxwait: |
304 | + break |
305 | time.sleep(naplen) |
306 | waited += naplen |
307 | + |
308 | + LOG.warn("%sStill missing files after %s seconds: %s", |
309 | + log_pre, maxwait, need) |
310 | return need |
311 | |
312 | |
313 | diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py |
314 | index d139527..13fb7c6 100644 |
315 | --- a/cloudinit/sources/__init__.py |
316 | +++ b/cloudinit/sources/__init__.py |
317 | @@ -261,6 +261,18 @@ class DataSource(object): |
318 | def first_instance_boot(self): |
319 | return |
320 | |
321 | + def activate(self, cfg, is_new_instance): |
322 | + """activate(cfg, is_new_instance) |
323 | + |
324 | + This is called before the init_modules will be called. |
325 | + The cfg is fully up to date config, it contains a merged view of |
326 | + system config, datasource config, user config, vendor config. |
327 | + It should be used rather than the sys_cfg passed to __init__. |
328 | + |
329 | + is_new_instance is a boolean indicating if this is a new instance. |
330 | + """ |
331 | + return |
332 | + |
333 | |
334 | def normalize_pubkey_data(pubkey_data): |
335 | keys = [] |
336 | diff --git a/cloudinit/stages.py b/cloudinit/stages.py |
337 | index 47deac6..86a1378 100644 |
338 | --- a/cloudinit/stages.py |
339 | +++ b/cloudinit/stages.py |
340 | @@ -371,6 +371,13 @@ class Init(object): |
341 | self._store_userdata() |
342 | self._store_vendordata() |
343 | |
344 | + def activate_datasource(self): |
345 | + if self.datasource is None: |
346 | + raise RuntimeError("Datasource is None, cannot activate.") |
347 | + self.datasource.activate(cfg=self.cfg, |
348 | + is_new_instance=self.is_new_instance()) |
349 | + self._write_to_cache() |
350 | + |
351 | def _store_userdata(self): |
352 | raw_ud = self.datasource.get_userdata_raw() |
353 | if raw_ud is None: |
354 | diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py |
355 | index e90e903..0712700 100644 |
356 | --- a/tests/unittests/test_datasource/test_azure.py |
357 | +++ b/tests/unittests/test_datasource/test_azure.py |
358 | @@ -349,7 +349,7 @@ class TestAzureDataSource(TestCase): |
359 | cfg = dsrc.get_config_obj() |
360 | |
361 | self.assertEqual(dsrc.device_name_to_device("ephemeral0"), |
362 | - "/dev/sdb") |
363 | + DataSourceAzure.RESOURCE_DISK_PATH) |
364 | assert 'disk_setup' in cfg |
365 | assert 'fs_setup' in cfg |
366 | self.assertIsInstance(cfg['disk_setup'], dict) |
367 | @@ -462,14 +462,6 @@ class TestAzureBounce(TestCase): |
368 | mock.patch.object(DataSourceAzure, 'list_possible_azure_ds_devs', |
369 | mock.MagicMock(return_value=[]))) |
370 | self.patches.enter_context( |
371 | - mock.patch.object(DataSourceAzure, |
372 | - 'find_fabric_formatted_ephemeral_disk', |
373 | - mock.MagicMock(return_value=None))) |
374 | - self.patches.enter_context( |
375 | - mock.patch.object(DataSourceAzure, |
376 | - 'find_fabric_formatted_ephemeral_part', |
377 | - mock.MagicMock(return_value=None))) |
378 | - self.patches.enter_context( |
379 | mock.patch.object(DataSourceAzure, 'get_metadata_from_fabric', |
380 | mock.MagicMock(return_value={}))) |
381 | self.patches.enter_context( |