Merge lp:~mskalka/juju-ci-tools/add-more-network-tests into lp:juju-ci-tools
- add-more-network-tests
- Merge into trunk
Proposed by
Michael Skalka
Status: | Merged |
---|---|
Merged at revision: | 1909 |
Proposed branch: | lp:~mskalka/juju-ci-tools/add-more-network-tests |
Merge into: | lp:juju-ci-tools |
Diff against target: |
516 lines (+165/-81) 4 files modified
.bzrignore (+0/-1) assess_container_networking.py (+0/-1) assess_network_health.py (+122/-44) tests/test_assess_network_health.py (+43/-35) |
To merge this branch: | bzr merge lp:~mskalka/juju-ci-tools/add-more-network-tests |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Curtis Hovey (community) | code | Approve | |
Review via email: mp+318248@code.launchpad.net |
Commit message
Description of the change
Adds an additional network test and changes the way the agnostic visibility test is run. This changes it from having the script itself pinging the units to having the Juju controller do it. Ideally this would give us a better idea if an agent is 'lost' or just dropped off the network. The additional network test is borrowed from assess_
To post a comment you must log in.
- 1908. By Michael Skalka
-
updated bzr ignore
- 1909. By Michael Skalka
-
revert makefile change
- 1910. By Michael Skalka
-
cleanup, fixed series
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file '.bzrignore' |
2 | --- .bzrignore 2016-12-14 12:40:18 +0000 |
3 | +++ .bzrignore 2017-02-24 18:28:22 +0000 |
4 | @@ -4,4 +4,3 @@ |
5 | /logs |
6 | /repository |
7 | ./.coverage |
8 | - |
9 | |
10 | === modified file 'assess_container_networking.py' |
11 | --- assess_container_networking.py 2017-01-20 20:58:41 +0000 |
12 | +++ assess_container_networking.py 2017-02-24 18:28:22 +0000 |
13 | @@ -127,7 +127,6 @@ |
14 | host, type, id = c[0].split('/') |
15 | if type in containers and host in containers[type]: |
16 | containers[type][host].append(c[0]) |
17 | - |
18 | return hosts, containers |
19 | |
20 | |
21 | |
22 | === modified file 'assess_network_health.py' |
23 | --- assess_network_health.py 2017-02-22 19:05:46 +0000 |
24 | +++ assess_network_health.py 2017-02-24 18:28:22 +0000 |
25 | @@ -9,14 +9,14 @@ |
26 | import yaml |
27 | import ast |
28 | import subprocess |
29 | +import re |
30 | +import time |
31 | |
32 | from jujupy import client_from_config |
33 | from deploy_stack import ( |
34 | BootstrapManager, |
35 | ) |
36 | -from jujucharm import ( |
37 | - local_charm_path, |
38 | - ) |
39 | + |
40 | from utility import ( |
41 | add_basic_testing_arguments, |
42 | configure_logging, |
43 | @@ -36,7 +36,8 @@ |
44 | self.message = message |
45 | |
46 | |
47 | -def assess_network_health(client, bundle=None, target_model=None, series=None): |
48 | +def assess_network_health(client, bundle=None, target_model=None, reboot=False, |
49 | + series=None): |
50 | """Assesses network health for a given deployment or bundle. |
51 | |
52 | :param client: The juju client in use |
53 | @@ -44,22 +45,50 @@ |
54 | :param model: Optional existing model to test under |
55 | """ |
56 | setup_testing_environment(client, bundle, target_model, series) |
57 | - log.info("Starting network tests") |
58 | - agnostic_result = ensure_juju_agnostic_visibility(client) |
59 | - log.info('Agnostic result:\n {}'.format(json.dumps(agnostic_result, |
60 | - indent=4, |
61 | - sort_keys=True))) |
62 | - visibility_result = neighbor_visibility(client) |
63 | - log.info('Visibility result:\n {}'.format(json.dumps(visibility_result, |
64 | + log.info('Starting network tests') |
65 | + testing_iterations(client, series, '') |
66 | + if not reboot: |
67 | + return |
68 | + log.info('Units passed pre-reboot tests, rebooting machines') |
69 | + # TODO: Need to use itermachines and need to divide containers |
70 | + # from reboot cycle |
71 | + machines = get_juju_status(client)['machines'] |
72 | + client.run('sudo reboot', machines=machines) |
73 | + log.info('Waiting for units to restart') |
74 | + client.wait_for_started() |
75 | + client.wait_for_workloads() |
76 | + log.info('Starting post-reboot network tests') |
77 | + testing_iterations(client, series, 'Post-reboot ') |
78 | + |
79 | + |
80 | +def testing_iterations(client, series, reboot_msg): |
81 | + """Runs through each test given for a given client and series |
82 | + |
83 | + :param client: Client |
84 | + """ |
85 | + con_result = juju_controller_visibility(client) |
86 | + log.info('{}Controller Visibility ' |
87 | + 'result:\n {}'.format(reboot_msg, json.dumps(con_result, |
88 | + indent=4, |
89 | + sort_keys=True))) |
90 | + int_result = internet_connection(client) |
91 | + log.info('{}Internet Test result:\n {}'.format(reboot_msg, |
92 | + json.dumps(int_result, |
93 | + indent=4, |
94 | + sort_keys=True))) |
95 | + vis_result = neighbor_visibility(client) |
96 | + log.info('{}Visibility result:\n {}'.format(reboot_msg, |
97 | + json.dumps(vis_result, |
98 | + indent=4, |
99 | + sort_keys=True))) |
100 | + exp_result = ensure_exposed(client, series) |
101 | + log.info('{}Exposure result:\n {}'.format(reboot_msg, |
102 | + json.dumps(exp_result, |
103 | indent=4, |
104 | - sort_keys=True))) |
105 | - exposed_result = ensure_exposed(client, series) |
106 | - log.info('Exposure result:\n {}'.format(json.dumps(exposed_result, |
107 | - indent=4, |
108 | - sort_keys=True)) or |
109 | - NO_EXPOSED_UNITS) |
110 | - log.info('Network tests complete, parsing results.') |
111 | - parse_final_results(agnostic_result, visibility_result, exposed_result) |
112 | + sort_keys=True)) or |
113 | + NO_EXPOSED_UNITS) |
114 | + log.info('Tests complete.') |
115 | + parse_final_results(con_result, vis_result, int_result, exp_result) |
116 | |
117 | |
118 | def setup_testing_environment(client, bundle, target_model, series=None): |
119 | @@ -77,9 +106,7 @@ |
120 | elif bundle is None and target_model is None: |
121 | setup_dummy_deployment(client, series) |
122 | |
123 | - charm_path = local_charm_path(charm='network-health', series=series, |
124 | - juju_ver=client.version) |
125 | - client.deploy(charm_path) |
126 | + client.deploy('~juju-qa/network-health', series=series) |
127 | client.wait_for_started() |
128 | client.wait_for_workloads() |
129 | applications = get_juju_status(client)['applications'].keys() |
130 | @@ -88,7 +115,7 @@ |
131 | for app in applications: |
132 | try: |
133 | client.juju('add-relation', (app, 'network-health')) |
134 | - except subprocess.CalledProcessError: |
135 | + except subprocess.cessError: |
136 | log.error('Could not relate {} & network-health'.format(app)) |
137 | |
138 | client.wait_for_workloads() |
139 | @@ -113,9 +140,7 @@ |
140 | :param client: Bootstrapped juju client |
141 | """ |
142 | log.info("Deploying dummy charm for basic testing") |
143 | - dummy_path = local_charm_path(charm='ubuntu', series=series, |
144 | - juju_ver=client.version) |
145 | - client.deploy(dummy_path, num=2) |
146 | + client.deploy('ubuntu', num=2, series=series) |
147 | client.juju('expose', ('ubuntu',)) |
148 | |
149 | |
150 | @@ -136,12 +161,13 @@ |
151 | return client.get_status().status |
152 | |
153 | |
154 | -def ensure_juju_agnostic_visibility(client): |
155 | - """Determine if known juju machines are visible. |
156 | +def juju_controller_visibility(client): |
157 | + """Determine if known juju machines are visible from controller. |
158 | |
159 | :param machine: List of machine IPs to test |
160 | :return: Connection attempt results |
161 | """ |
162 | + controller_client = client.get_controller_client() |
163 | log.info('Starting agnostic visibility test') |
164 | machines = get_juju_status(client)['machines'] |
165 | result = {} |
166 | @@ -149,14 +175,40 @@ |
167 | result[machine] = {} |
168 | for ip in info['ip-addresses']: |
169 | try: |
170 | - output = subprocess.check_output("ping -c 1 " + ip, shell=True) |
171 | - except subprocess.CalledProcessError, e: |
172 | + ssh(controller_client, '0', "ping -c 1 " + ip) |
173 | + except subprocess.CalledProcessError as e: |
174 | log.error('Error with ping attempt to {}: {}'.format(ip, e)) |
175 | result[machine][ip] = False |
176 | result[machine][ip] = True |
177 | return result |
178 | |
179 | |
180 | +def internet_connection(client): |
181 | + """Test that targets can ping their default route. |
182 | + |
183 | + :param client: Juju client |
184 | + :return: Dict of results by machine |
185 | + """ |
186 | + log.info('Assessing internet connection.') |
187 | + results = {} |
188 | + units = get_juju_status(client)['machines'] |
189 | + for unit in units: |
190 | + log.info("Assessing internet connection for {}".format(unit)) |
191 | + routes = ssh(client, unit, 'ip route show') |
192 | + d = re.search(r'^default\s+via\s+([\d\.]+)\s+', routes, re.MULTILINE) |
193 | + if d: |
194 | + rc = client.juju('ssh', ('--proxy', unit, |
195 | + 'ping -c1 -q ' + d.group(1)), check=False) |
196 | + if rc != 0: |
197 | + log.error('{0} unable to ping default route'.format(unit)) |
198 | + results[unit] = False |
199 | + else: |
200 | + log.error("Default route not found") |
201 | + results[unit] = False |
202 | + results[unit] = True |
203 | + return results |
204 | + |
205 | + |
206 | def neighbor_visibility(client): |
207 | """Check if each application's units are visible, including our own. |
208 | |
209 | @@ -208,12 +260,8 @@ |
210 | :return: New juju client object |
211 | """ |
212 | new_client = client.add_model('exposetest') |
213 | - dummy_path = local_charm_path(charm='ubuntu', series=series, |
214 | - juju_ver=client.version) |
215 | - new_client.deploy(dummy_path) |
216 | - charm_path = local_charm_path(charm='network-health', series=series, |
217 | - juju_ver=client.version) |
218 | - new_client.deploy(charm_path) |
219 | + new_client.deploy('ubuntu', series=series) |
220 | + new_client.deploy('~juju-qa/network-health', series=series) |
221 | new_client.wait_for_started() |
222 | new_client.wait_for_workloads() |
223 | new_client.juju('add-relation', ('ubuntu', 'network-health')) |
224 | @@ -244,18 +292,19 @@ |
225 | return result |
226 | |
227 | |
228 | -def parse_final_results(agnostic, visibility, exposed=None): |
229 | +def parse_final_results(controller, visibility, internet, exposed=None): |
230 | """Parses test results and raises an error if any failed. |
231 | |
232 | - :param agnostic: Agnostic test result |
233 | + :param controller: Controller test result |
234 | :param visibility: Visibility test result |
235 | :param exposed: Exposure test result |
236 | """ |
237 | + log.info('Parsing final results.') |
238 | error_string = [] |
239 | - for machine, machine_result in agnostic.items(): |
240 | + for machine, machine_result in controller.items(): |
241 | for ip, res in machine_result.items(): |
242 | if res is False: |
243 | - error = ('Failed to ping machine {0} ' |
244 | + error = ('Failed to contact controller from machine {0} ' |
245 | 'at address {1}\n'.format(machine, ip)) |
246 | error_string.append(error) |
247 | for nh_source, service_result in visibility.items(): |
248 | @@ -265,18 +314,44 @@ |
249 | error = ('NH-Unit {0} failed to contact ' |
250 | 'unit(s): {1}\n'.format(nh_source, failed)) |
251 | error_string.append(error) |
252 | - |
253 | + for machine, res in internet.items(): |
254 | + if not res: |
255 | + error = 'Machine {} failed internet connection.'.format(machine) |
256 | + error_string.append(error) |
257 | if exposed and exposed['fail'] is not (): |
258 | error = ('Application(s) {0} failed expose ' |
259 | 'test\n'.format(exposed['fail'])) |
260 | error_string.append(error) |
261 | - |
262 | if error_string: |
263 | raise ConnectionError('\n'.join(error_string)) |
264 | |
265 | return |
266 | |
267 | |
268 | +def ssh(client, machine, cmd): |
269 | + """Convenience function: run a juju ssh command and get back the output |
270 | + :param client: A Juju client |
271 | + :param machine: ID of the machine on which to run a command |
272 | + :param cmd: the command to run |
273 | + :return: text output of the command |
274 | + """ |
275 | + back_off = 2 |
276 | + attempts = 4 |
277 | + for attempt in range(attempts): |
278 | + try: |
279 | + return client.get_juju_output('ssh', '--proxy', machine, cmd) |
280 | + except subprocess.CalledProcessError as e: |
281 | + # If the connection to the host failed, try again in a couple of |
282 | + # seconds. This is usually due to heavy load. |
283 | + if(attempt < attempts - 1 and |
284 | + re.search('ssh_exchange_identification: ' |
285 | + 'Connection closed by remote host', e.stderr)): |
286 | + time.sleep(back_off) |
287 | + back_off *= 2 |
288 | + else: |
289 | + raise |
290 | + |
291 | + |
292 | def ping_units(client, source, units): |
293 | """Calls out to our subordinate network-health charm to ping targets. |
294 | |
295 | @@ -286,8 +361,8 @@ |
296 | """ |
297 | units = to_json(units) |
298 | args = "targets='{}'".format(units) |
299 | - retval = client.action_do(source, 'ping', args) |
300 | - result = client.action_fetch(retval) |
301 | + action_id = client.action_do(source, 'ping', args) |
302 | + result = client.action_fetch(action_id) |
303 | result = yaml.safe_load(result)['results']['results'] |
304 | return result |
305 | |
306 | @@ -326,6 +401,9 @@ |
307 | add_basic_testing_arguments(parser) |
308 | parser.add_argument('--bundle', help='Bundle to test network against') |
309 | parser.add_argument('--model', help='Existing Juju model to test under') |
310 | + parser.add_argument('--reboot', help='Reboot machines and re-run tests' |
311 | + 'default=False') |
312 | + parser.set_defaults(reboot=False) |
313 | parser.set_defaults(series='trusty') |
314 | return parser.parse_args(argv) |
315 | |
316 | |
317 | === modified file 'tests/test_assess_network_health.py' |
318 | --- tests/test_assess_network_health.py 2017-02-22 20:34:37 +0000 |
319 | +++ tests/test_assess_network_health.py 2017-02-24 18:28:22 +0000 |
320 | @@ -1,4 +1,3 @@ |
321 | -import json |
322 | import yaml |
323 | import StringIO |
324 | import logging |
325 | @@ -24,13 +23,12 @@ |
326 | ) |
327 | from assess_network_health import ( |
328 | main, |
329 | - assess_network_health, |
330 | setup_testing_environment, |
331 | - connect_to_existing_model, |
332 | setup_dummy_deployment, |
333 | setup_bundle_deployment, |
334 | get_juju_status, |
335 | - ensure_juju_agnostic_visibility, |
336 | + juju_controller_visibility, |
337 | + internet_connection, |
338 | neighbor_visibility, |
339 | ensure_exposed, |
340 | setup_expose_test, |
341 | @@ -118,14 +116,9 @@ |
342 | since: 01 Jan 2017 00:00:00-00:00 |
343 | subordinate-to: |
344 | - ubuntu |
345 | - |
346 | - relations: |
347 | - juju-info: |
348 | - - network-health |
349 | - network-health: |
350 | - exposed: false |
351 | - application-status: |
352 | - current: unknown |
353 | + relations: |
354 | + juju-info: |
355 | + - network-health |
356 | """) |
357 | status = Status(yaml.safe_load(status_value), status_value) |
358 | |
359 | @@ -162,12 +155,11 @@ |
360 | series) |
361 | return mock_client |
362 | |
363 | - client = setup_iteration(bundle=None, |
364 | - target_model=None, series=series) |
365 | + client = setup_iteration(bundle=None, target_model=None, series=series) |
366 | self.assertEqual( |
367 | - [call.deploy('ubuntu', num=2), |
368 | + [call.deploy('ubuntu', num=2, series='trusty'), |
369 | call.juju('expose', ('ubuntu',)), |
370 | - call.deploy('network-health'), |
371 | + call.deploy('~juju-qa/network-health', series='trusty'), |
372 | call.wait_for_started(), |
373 | call.wait_for_workloads(), |
374 | call.get_status(), |
375 | @@ -175,8 +167,8 @@ |
376 | call.wait_for_workloads(), |
377 | call.wait_for_subordinate_units('ubuntu', 'network-health')], |
378 | client.mock_calls) |
379 | - client = setup_iteration(bundle=bundle_string, |
380 | - target_model=None, series=series) |
381 | + client = setup_iteration(bundle=bundle_string, target_model=None, |
382 | + series=series) |
383 | self.assertEqual( |
384 | [call.deploy_bundle('services:\n foo:\n ' |
385 | 'charm: local:trusty/foo\n ' |
386 | @@ -184,7 +176,7 @@ |
387 | 'bar:\n charm: local:trusty/bar\n ' |
388 | 'num_units: 1\nseries: trusty\nrelations:\n' |
389 | '- - foo:baz\n - bar:baz\n'), |
390 | - call.deploy('network-health'), |
391 | + call.deploy('~juju-qa/network-health', series='trusty'), |
392 | call.wait_for_started(), |
393 | call.wait_for_workloads(), |
394 | call.get_status(), |
395 | @@ -193,16 +185,15 @@ |
396 | call.wait_for_subordinate_units('ubuntu', 'network-health')], |
397 | client.mock_calls) |
398 | |
399 | - def test_ensure_juju_agnostic_visibility(self): |
400 | + def test_juju_controller_visibility(self): |
401 | client = fake_juju_client() |
402 | client.bootstrap() |
403 | - ag_return = True |
404 | now = datetime.now() + timedelta(days=1) |
405 | with patch('utility.until_timeout.now', return_value=now): |
406 | with patch.object(client, 'get_status', return_value=status): |
407 | with patch('subprocess.check_output', |
408 | return_value=0): |
409 | - out = ensure_juju_agnostic_visibility(client) |
410 | + out = juju_controller_visibility(client) |
411 | expected = {'1': {'1.1.1.2': True}, '0': {'1.1.1.1': True}} |
412 | self.assertEqual(expected, out) |
413 | |
414 | @@ -220,8 +211,8 @@ |
415 | now = datetime.now() + timedelta(days=1) |
416 | with patch('utility.until_timeout.now', return_value=now): |
417 | with patch.object(client, 'get_status', return_value=status): |
418 | - client.deploy('ubuntu', num=2) |
419 | - client.deploy('network-health') |
420 | + client.deploy('ubuntu', num=2, series='trusty') |
421 | + client.deploy('network-health', series='trusty') |
422 | out = neighbor_visibility(client) |
423 | expected = {'network-health/0': {'ubuntu': {u'ubuntu/0': True, |
424 | u'ubuntu/1': True}}, |
425 | @@ -229,6 +220,18 @@ |
426 | u'ubuntu/1': True}}} |
427 | self.assertEqual(expected, out) |
428 | |
429 | + def test_internet_connection(self): |
430 | + client = fake_juju_client() |
431 | + client.bootstrap() |
432 | + now = datetime.now() + timedelta(days=1) |
433 | + with patch('utility.until_timeout.now', return_value=now): |
434 | + with patch.object(client, 'get_status', return_value=status): |
435 | + with patch('subprocess.check_output', |
436 | + return_value=0): |
437 | + out = internet_connection(client) |
438 | + expected = {'1': True, '0': True} |
439 | + self.assertEqual(expected, out) |
440 | + |
441 | def test_ensure_exposed(self): |
442 | client = Mock(wraps=fake_juju_client()) |
443 | client.bootstrap() |
444 | @@ -236,8 +239,8 @@ |
445 | new_client.bootstrap() |
446 | new_client._backend.set_action_result('network-health/0', 'ping', |
447 | ping_result) |
448 | - new_client.deploy('ubuntu', num=2) |
449 | - new_client.deploy('network-health') |
450 | + new_client.deploy('ubuntu', num=2, series='trusty') |
451 | + new_client.deploy('network-health', series='trusty') |
452 | now = datetime.now() + timedelta(days=1) |
453 | with patch('utility.until_timeout.now', return_value=now): |
454 | with patch.object(client, 'get_status', return_value=status): |
455 | @@ -255,7 +258,7 @@ |
456 | client = Mock(wraps=fake_juju_client()) |
457 | client.bootstrap() |
458 | setup_dummy_deployment(client, series) |
459 | - client.deploy.assert_called_once_with('ubuntu', num=2) |
460 | + client.deploy.assert_called_once_with('ubuntu', num=2, series='trusty') |
461 | |
462 | def test_bundle_deployment(self): |
463 | client = Mock(wraps=fake_juju_client()) |
464 | @@ -275,8 +278,9 @@ |
465 | setup_expose_test(mock_client, series) |
466 | self.assertEqual( |
467 | [call.add_model('exposetest'), |
468 | - call.add_model().deploy('ubuntu'), |
469 | - call.add_model().deploy('network-health'), |
470 | + call.add_model().deploy('ubuntu', series='trusty'), |
471 | + call.add_model().deploy('~juju-qa/network-health', |
472 | + series='trusty'), |
473 | call.add_model().wait_for_started(), |
474 | call.add_model().wait_for_workloads(), |
475 | call.add_model().juju('add-relation', ('ubuntu', |
476 | @@ -302,22 +306,27 @@ |
477 | self.assertEqual(expected, result) |
478 | |
479 | def test_parse_final_results_with_fail(self): |
480 | - agnostic = {"0": {"1.1.1.1": False}} |
481 | + controller = {"0": {"1.1.1.1": False}, |
482 | + "1": {"1.1.1.2": True}} |
483 | visible = {"bar/0": {"foo": {"foo/0": False, "foo/1": True}}} |
484 | + internet = {"0": False, "1": True} |
485 | exposed = {"fail": ("foo"), "pass": ("bar", "baz")} |
486 | with self.assertRaises(ConnectionError) as context: |
487 | - parse_final_results(agnostic, visible, exposed) |
488 | - error_strings = ["Failed to ping machine 0 at address 1.1.1.1", |
489 | + parse_final_results(controller, visible, internet, exposed) |
490 | + error_strings = ["Failed to contact controller from machine 0 " |
491 | + "at address 1.1.1.1", |
492 | + "Machine 0 failed internet connection.", |
493 | "NH-Unit bar/0 failed to contact unit(s): ['foo/0']", |
494 | "Application(s) foo failed expose test"] |
495 | for line in error_strings: |
496 | self.assertTrue(line in context.exception.message) |
497 | |
498 | def test_parse_final_results_without_fail(self): |
499 | - agnostic = {"0": {"1.1.1.1": True}} |
500 | + controller = {"0": {"1.1.1.1": True}} |
501 | visible = {"bar/0": {"foo": {"foo/0": True, "foo/1": True}}} |
502 | + internet = {"0": True, "1": True} |
503 | exposed = {"fail": (), "pass": ("foo", "bar", "baz")} |
504 | - parse_final_results(agnostic, visible, exposed) |
505 | + parse_final_results(controller, visible, internet, exposed) |
506 | |
507 | def test_ping_units(self): |
508 | client = fake_juju_client() |
509 | @@ -344,7 +353,6 @@ |
510 | |
511 | def test_main(self): |
512 | argv = ["an-env", "/bin/juju", "/tmp/logs", "an-env-mod", "--verbose"] |
513 | - env = object() |
514 | client = Mock(spec=["is_jes_enabled"]) |
515 | with patch("assess_network_health.configure_logging", |
516 | autospec=True) as mock_cl: |
We agreed to make some series fixes and that the machine handling of reboot wont work. We We can fix the reboot feature in another branch.