Merge lp:~mskalka/juju-ci-tools/add-more-network-tests into lp:juju-ci-tools

Proposed by Michael Skalka
Status: Merged
Merged at revision: 1909
Proposed branch: lp:~mskalka/juju-ci-tools/add-more-network-tests
Merge into: lp:juju-ci-tools
Diff against target: 516 lines (+165/-81)
4 files modified
.bzrignore (+0/-1)
assess_container_networking.py (+0/-1)
assess_network_health.py (+122/-44)
tests/test_assess_network_health.py (+43/-35)
To merge this branch: bzr merge lp:~mskalka/juju-ci-tools/add-more-network-tests
Reviewer Review Type Date Requested Status
Curtis Hovey (community) code Approve
Review via email: mp+318248@code.launchpad.net

Description of the change

Adds an additional network test and changes the way the agnostic visibility test is run. This changes it from having the script itself pinging the units to having the Juju controller do it. Ideally this would give us a better idea if an agent is 'lost' or just dropped off the network. The additional network test is borrowed from assess_container_networking and ensures the unit can ping its default gateway.

To post a comment you must log in.
1908. By Michael Skalka

updated bzr ignore

1909. By Michael Skalka

revert makefile change

Revision history for this message
Curtis Hovey (sinzui) wrote :

We agreed to make some series fixes and that the machine handling of reboot wont work. We We can fix the reboot feature in another branch.

review: Approve (code)
1910. By Michael Skalka

cleanup, fixed series

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file '.bzrignore'
2--- .bzrignore 2016-12-14 12:40:18 +0000
3+++ .bzrignore 2017-02-24 18:28:22 +0000
4@@ -4,4 +4,3 @@
5 /logs
6 /repository
7 ./.coverage
8-
9
10=== modified file 'assess_container_networking.py'
11--- assess_container_networking.py 2017-01-20 20:58:41 +0000
12+++ assess_container_networking.py 2017-02-24 18:28:22 +0000
13@@ -127,7 +127,6 @@
14 host, type, id = c[0].split('/')
15 if type in containers and host in containers[type]:
16 containers[type][host].append(c[0])
17-
18 return hosts, containers
19
20
21
22=== modified file 'assess_network_health.py'
23--- assess_network_health.py 2017-02-22 19:05:46 +0000
24+++ assess_network_health.py 2017-02-24 18:28:22 +0000
25@@ -9,14 +9,14 @@
26 import yaml
27 import ast
28 import subprocess
29+import re
30+import time
31
32 from jujupy import client_from_config
33 from deploy_stack import (
34 BootstrapManager,
35 )
36-from jujucharm import (
37- local_charm_path,
38- )
39+
40 from utility import (
41 add_basic_testing_arguments,
42 configure_logging,
43@@ -36,7 +36,8 @@
44 self.message = message
45
46
47-def assess_network_health(client, bundle=None, target_model=None, series=None):
48+def assess_network_health(client, bundle=None, target_model=None, reboot=False,
49+ series=None):
50 """Assesses network health for a given deployment or bundle.
51
52 :param client: The juju client in use
53@@ -44,22 +45,50 @@
54 :param model: Optional existing model to test under
55 """
56 setup_testing_environment(client, bundle, target_model, series)
57- log.info("Starting network tests")
58- agnostic_result = ensure_juju_agnostic_visibility(client)
59- log.info('Agnostic result:\n {}'.format(json.dumps(agnostic_result,
60- indent=4,
61- sort_keys=True)))
62- visibility_result = neighbor_visibility(client)
63- log.info('Visibility result:\n {}'.format(json.dumps(visibility_result,
64+ log.info('Starting network tests')
65+ testing_iterations(client, series, '')
66+ if not reboot:
67+ return
68+ log.info('Units passed pre-reboot tests, rebooting machines')
69+ # TODO: Need to use itermachines and need to divide containers
70+ # from reboot cycle
71+ machines = get_juju_status(client)['machines']
72+ client.run('sudo reboot', machines=machines)
73+ log.info('Waiting for units to restart')
74+ client.wait_for_started()
75+ client.wait_for_workloads()
76+ log.info('Starting post-reboot network tests')
77+ testing_iterations(client, series, 'Post-reboot ')
78+
79+
80+def testing_iterations(client, series, reboot_msg):
81+ """Runs through each test given for a given client and series
82+
83+ :param client: Client
84+ """
85+ con_result = juju_controller_visibility(client)
86+ log.info('{}Controller Visibility '
87+ 'result:\n {}'.format(reboot_msg, json.dumps(con_result,
88+ indent=4,
89+ sort_keys=True)))
90+ int_result = internet_connection(client)
91+ log.info('{}Internet Test result:\n {}'.format(reboot_msg,
92+ json.dumps(int_result,
93+ indent=4,
94+ sort_keys=True)))
95+ vis_result = neighbor_visibility(client)
96+ log.info('{}Visibility result:\n {}'.format(reboot_msg,
97+ json.dumps(vis_result,
98+ indent=4,
99+ sort_keys=True)))
100+ exp_result = ensure_exposed(client, series)
101+ log.info('{}Exposure result:\n {}'.format(reboot_msg,
102+ json.dumps(exp_result,
103 indent=4,
104- sort_keys=True)))
105- exposed_result = ensure_exposed(client, series)
106- log.info('Exposure result:\n {}'.format(json.dumps(exposed_result,
107- indent=4,
108- sort_keys=True)) or
109- NO_EXPOSED_UNITS)
110- log.info('Network tests complete, parsing results.')
111- parse_final_results(agnostic_result, visibility_result, exposed_result)
112+ sort_keys=True)) or
113+ NO_EXPOSED_UNITS)
114+ log.info('Tests complete.')
115+ parse_final_results(con_result, vis_result, int_result, exp_result)
116
117
118 def setup_testing_environment(client, bundle, target_model, series=None):
119@@ -77,9 +106,7 @@
120 elif bundle is None and target_model is None:
121 setup_dummy_deployment(client, series)
122
123- charm_path = local_charm_path(charm='network-health', series=series,
124- juju_ver=client.version)
125- client.deploy(charm_path)
126+ client.deploy('~juju-qa/network-health', series=series)
127 client.wait_for_started()
128 client.wait_for_workloads()
129 applications = get_juju_status(client)['applications'].keys()
130@@ -88,7 +115,7 @@
131 for app in applications:
132 try:
133 client.juju('add-relation', (app, 'network-health'))
134- except subprocess.CalledProcessError:
135+ except subprocess.cessError:
136 log.error('Could not relate {} & network-health'.format(app))
137
138 client.wait_for_workloads()
139@@ -113,9 +140,7 @@
140 :param client: Bootstrapped juju client
141 """
142 log.info("Deploying dummy charm for basic testing")
143- dummy_path = local_charm_path(charm='ubuntu', series=series,
144- juju_ver=client.version)
145- client.deploy(dummy_path, num=2)
146+ client.deploy('ubuntu', num=2, series=series)
147 client.juju('expose', ('ubuntu',))
148
149
150@@ -136,12 +161,13 @@
151 return client.get_status().status
152
153
154-def ensure_juju_agnostic_visibility(client):
155- """Determine if known juju machines are visible.
156+def juju_controller_visibility(client):
157+ """Determine if known juju machines are visible from controller.
158
159 :param machine: List of machine IPs to test
160 :return: Connection attempt results
161 """
162+ controller_client = client.get_controller_client()
163 log.info('Starting agnostic visibility test')
164 machines = get_juju_status(client)['machines']
165 result = {}
166@@ -149,14 +175,40 @@
167 result[machine] = {}
168 for ip in info['ip-addresses']:
169 try:
170- output = subprocess.check_output("ping -c 1 " + ip, shell=True)
171- except subprocess.CalledProcessError, e:
172+ ssh(controller_client, '0', "ping -c 1 " + ip)
173+ except subprocess.CalledProcessError as e:
174 log.error('Error with ping attempt to {}: {}'.format(ip, e))
175 result[machine][ip] = False
176 result[machine][ip] = True
177 return result
178
179
180+def internet_connection(client):
181+ """Test that targets can ping their default route.
182+
183+ :param client: Juju client
184+ :return: Dict of results by machine
185+ """
186+ log.info('Assessing internet connection.')
187+ results = {}
188+ units = get_juju_status(client)['machines']
189+ for unit in units:
190+ log.info("Assessing internet connection for {}".format(unit))
191+ routes = ssh(client, unit, 'ip route show')
192+ d = re.search(r'^default\s+via\s+([\d\.]+)\s+', routes, re.MULTILINE)
193+ if d:
194+ rc = client.juju('ssh', ('--proxy', unit,
195+ 'ping -c1 -q ' + d.group(1)), check=False)
196+ if rc != 0:
197+ log.error('{0} unable to ping default route'.format(unit))
198+ results[unit] = False
199+ else:
200+ log.error("Default route not found")
201+ results[unit] = False
202+ results[unit] = True
203+ return results
204+
205+
206 def neighbor_visibility(client):
207 """Check if each application's units are visible, including our own.
208
209@@ -208,12 +260,8 @@
210 :return: New juju client object
211 """
212 new_client = client.add_model('exposetest')
213- dummy_path = local_charm_path(charm='ubuntu', series=series,
214- juju_ver=client.version)
215- new_client.deploy(dummy_path)
216- charm_path = local_charm_path(charm='network-health', series=series,
217- juju_ver=client.version)
218- new_client.deploy(charm_path)
219+ new_client.deploy('ubuntu', series=series)
220+ new_client.deploy('~juju-qa/network-health', series=series)
221 new_client.wait_for_started()
222 new_client.wait_for_workloads()
223 new_client.juju('add-relation', ('ubuntu', 'network-health'))
224@@ -244,18 +292,19 @@
225 return result
226
227
228-def parse_final_results(agnostic, visibility, exposed=None):
229+def parse_final_results(controller, visibility, internet, exposed=None):
230 """Parses test results and raises an error if any failed.
231
232- :param agnostic: Agnostic test result
233+ :param controller: Controller test result
234 :param visibility: Visibility test result
235 :param exposed: Exposure test result
236 """
237+ log.info('Parsing final results.')
238 error_string = []
239- for machine, machine_result in agnostic.items():
240+ for machine, machine_result in controller.items():
241 for ip, res in machine_result.items():
242 if res is False:
243- error = ('Failed to ping machine {0} '
244+ error = ('Failed to contact controller from machine {0} '
245 'at address {1}\n'.format(machine, ip))
246 error_string.append(error)
247 for nh_source, service_result in visibility.items():
248@@ -265,18 +314,44 @@
249 error = ('NH-Unit {0} failed to contact '
250 'unit(s): {1}\n'.format(nh_source, failed))
251 error_string.append(error)
252-
253+ for machine, res in internet.items():
254+ if not res:
255+ error = 'Machine {} failed internet connection.'.format(machine)
256+ error_string.append(error)
257 if exposed and exposed['fail'] is not ():
258 error = ('Application(s) {0} failed expose '
259 'test\n'.format(exposed['fail']))
260 error_string.append(error)
261-
262 if error_string:
263 raise ConnectionError('\n'.join(error_string))
264
265 return
266
267
268+def ssh(client, machine, cmd):
269+ """Convenience function: run a juju ssh command and get back the output
270+ :param client: A Juju client
271+ :param machine: ID of the machine on which to run a command
272+ :param cmd: the command to run
273+ :return: text output of the command
274+ """
275+ back_off = 2
276+ attempts = 4
277+ for attempt in range(attempts):
278+ try:
279+ return client.get_juju_output('ssh', '--proxy', machine, cmd)
280+ except subprocess.CalledProcessError as e:
281+ # If the connection to the host failed, try again in a couple of
282+ # seconds. This is usually due to heavy load.
283+ if(attempt < attempts - 1 and
284+ re.search('ssh_exchange_identification: '
285+ 'Connection closed by remote host', e.stderr)):
286+ time.sleep(back_off)
287+ back_off *= 2
288+ else:
289+ raise
290+
291+
292 def ping_units(client, source, units):
293 """Calls out to our subordinate network-health charm to ping targets.
294
295@@ -286,8 +361,8 @@
296 """
297 units = to_json(units)
298 args = "targets='{}'".format(units)
299- retval = client.action_do(source, 'ping', args)
300- result = client.action_fetch(retval)
301+ action_id = client.action_do(source, 'ping', args)
302+ result = client.action_fetch(action_id)
303 result = yaml.safe_load(result)['results']['results']
304 return result
305
306@@ -326,6 +401,9 @@
307 add_basic_testing_arguments(parser)
308 parser.add_argument('--bundle', help='Bundle to test network against')
309 parser.add_argument('--model', help='Existing Juju model to test under')
310+ parser.add_argument('--reboot', help='Reboot machines and re-run tests'
311+ 'default=False')
312+ parser.set_defaults(reboot=False)
313 parser.set_defaults(series='trusty')
314 return parser.parse_args(argv)
315
316
317=== modified file 'tests/test_assess_network_health.py'
318--- tests/test_assess_network_health.py 2017-02-22 20:34:37 +0000
319+++ tests/test_assess_network_health.py 2017-02-24 18:28:22 +0000
320@@ -1,4 +1,3 @@
321-import json
322 import yaml
323 import StringIO
324 import logging
325@@ -24,13 +23,12 @@
326 )
327 from assess_network_health import (
328 main,
329- assess_network_health,
330 setup_testing_environment,
331- connect_to_existing_model,
332 setup_dummy_deployment,
333 setup_bundle_deployment,
334 get_juju_status,
335- ensure_juju_agnostic_visibility,
336+ juju_controller_visibility,
337+ internet_connection,
338 neighbor_visibility,
339 ensure_exposed,
340 setup_expose_test,
341@@ -118,14 +116,9 @@
342 since: 01 Jan 2017 00:00:00-00:00
343 subordinate-to:
344 - ubuntu
345-
346- relations:
347- juju-info:
348- - network-health
349- network-health:
350- exposed: false
351- application-status:
352- current: unknown
353+ relations:
354+ juju-info:
355+ - network-health
356 """)
357 status = Status(yaml.safe_load(status_value), status_value)
358
359@@ -162,12 +155,11 @@
360 series)
361 return mock_client
362
363- client = setup_iteration(bundle=None,
364- target_model=None, series=series)
365+ client = setup_iteration(bundle=None, target_model=None, series=series)
366 self.assertEqual(
367- [call.deploy('ubuntu', num=2),
368+ [call.deploy('ubuntu', num=2, series='trusty'),
369 call.juju('expose', ('ubuntu',)),
370- call.deploy('network-health'),
371+ call.deploy('~juju-qa/network-health', series='trusty'),
372 call.wait_for_started(),
373 call.wait_for_workloads(),
374 call.get_status(),
375@@ -175,8 +167,8 @@
376 call.wait_for_workloads(),
377 call.wait_for_subordinate_units('ubuntu', 'network-health')],
378 client.mock_calls)
379- client = setup_iteration(bundle=bundle_string,
380- target_model=None, series=series)
381+ client = setup_iteration(bundle=bundle_string, target_model=None,
382+ series=series)
383 self.assertEqual(
384 [call.deploy_bundle('services:\n foo:\n '
385 'charm: local:trusty/foo\n '
386@@ -184,7 +176,7 @@
387 'bar:\n charm: local:trusty/bar\n '
388 'num_units: 1\nseries: trusty\nrelations:\n'
389 '- - foo:baz\n - bar:baz\n'),
390- call.deploy('network-health'),
391+ call.deploy('~juju-qa/network-health', series='trusty'),
392 call.wait_for_started(),
393 call.wait_for_workloads(),
394 call.get_status(),
395@@ -193,16 +185,15 @@
396 call.wait_for_subordinate_units('ubuntu', 'network-health')],
397 client.mock_calls)
398
399- def test_ensure_juju_agnostic_visibility(self):
400+ def test_juju_controller_visibility(self):
401 client = fake_juju_client()
402 client.bootstrap()
403- ag_return = True
404 now = datetime.now() + timedelta(days=1)
405 with patch('utility.until_timeout.now', return_value=now):
406 with patch.object(client, 'get_status', return_value=status):
407 with patch('subprocess.check_output',
408 return_value=0):
409- out = ensure_juju_agnostic_visibility(client)
410+ out = juju_controller_visibility(client)
411 expected = {'1': {'1.1.1.2': True}, '0': {'1.1.1.1': True}}
412 self.assertEqual(expected, out)
413
414@@ -220,8 +211,8 @@
415 now = datetime.now() + timedelta(days=1)
416 with patch('utility.until_timeout.now', return_value=now):
417 with patch.object(client, 'get_status', return_value=status):
418- client.deploy('ubuntu', num=2)
419- client.deploy('network-health')
420+ client.deploy('ubuntu', num=2, series='trusty')
421+ client.deploy('network-health', series='trusty')
422 out = neighbor_visibility(client)
423 expected = {'network-health/0': {'ubuntu': {u'ubuntu/0': True,
424 u'ubuntu/1': True}},
425@@ -229,6 +220,18 @@
426 u'ubuntu/1': True}}}
427 self.assertEqual(expected, out)
428
429+ def test_internet_connection(self):
430+ client = fake_juju_client()
431+ client.bootstrap()
432+ now = datetime.now() + timedelta(days=1)
433+ with patch('utility.until_timeout.now', return_value=now):
434+ with patch.object(client, 'get_status', return_value=status):
435+ with patch('subprocess.check_output',
436+ return_value=0):
437+ out = internet_connection(client)
438+ expected = {'1': True, '0': True}
439+ self.assertEqual(expected, out)
440+
441 def test_ensure_exposed(self):
442 client = Mock(wraps=fake_juju_client())
443 client.bootstrap()
444@@ -236,8 +239,8 @@
445 new_client.bootstrap()
446 new_client._backend.set_action_result('network-health/0', 'ping',
447 ping_result)
448- new_client.deploy('ubuntu', num=2)
449- new_client.deploy('network-health')
450+ new_client.deploy('ubuntu', num=2, series='trusty')
451+ new_client.deploy('network-health', series='trusty')
452 now = datetime.now() + timedelta(days=1)
453 with patch('utility.until_timeout.now', return_value=now):
454 with patch.object(client, 'get_status', return_value=status):
455@@ -255,7 +258,7 @@
456 client = Mock(wraps=fake_juju_client())
457 client.bootstrap()
458 setup_dummy_deployment(client, series)
459- client.deploy.assert_called_once_with('ubuntu', num=2)
460+ client.deploy.assert_called_once_with('ubuntu', num=2, series='trusty')
461
462 def test_bundle_deployment(self):
463 client = Mock(wraps=fake_juju_client())
464@@ -275,8 +278,9 @@
465 setup_expose_test(mock_client, series)
466 self.assertEqual(
467 [call.add_model('exposetest'),
468- call.add_model().deploy('ubuntu'),
469- call.add_model().deploy('network-health'),
470+ call.add_model().deploy('ubuntu', series='trusty'),
471+ call.add_model().deploy('~juju-qa/network-health',
472+ series='trusty'),
473 call.add_model().wait_for_started(),
474 call.add_model().wait_for_workloads(),
475 call.add_model().juju('add-relation', ('ubuntu',
476@@ -302,22 +306,27 @@
477 self.assertEqual(expected, result)
478
479 def test_parse_final_results_with_fail(self):
480- agnostic = {"0": {"1.1.1.1": False}}
481+ controller = {"0": {"1.1.1.1": False},
482+ "1": {"1.1.1.2": True}}
483 visible = {"bar/0": {"foo": {"foo/0": False, "foo/1": True}}}
484+ internet = {"0": False, "1": True}
485 exposed = {"fail": ("foo"), "pass": ("bar", "baz")}
486 with self.assertRaises(ConnectionError) as context:
487- parse_final_results(agnostic, visible, exposed)
488- error_strings = ["Failed to ping machine 0 at address 1.1.1.1",
489+ parse_final_results(controller, visible, internet, exposed)
490+ error_strings = ["Failed to contact controller from machine 0 "
491+ "at address 1.1.1.1",
492+ "Machine 0 failed internet connection.",
493 "NH-Unit bar/0 failed to contact unit(s): ['foo/0']",
494 "Application(s) foo failed expose test"]
495 for line in error_strings:
496 self.assertTrue(line in context.exception.message)
497
498 def test_parse_final_results_without_fail(self):
499- agnostic = {"0": {"1.1.1.1": True}}
500+ controller = {"0": {"1.1.1.1": True}}
501 visible = {"bar/0": {"foo": {"foo/0": True, "foo/1": True}}}
502+ internet = {"0": True, "1": True}
503 exposed = {"fail": (), "pass": ("foo", "bar", "baz")}
504- parse_final_results(agnostic, visible, exposed)
505+ parse_final_results(controller, visible, internet, exposed)
506
507 def test_ping_units(self):
508 client = fake_juju_client()
509@@ -344,7 +353,6 @@
510
511 def test_main(self):
512 argv = ["an-env", "/bin/juju", "/tmp/logs", "an-env-mod", "--verbose"]
513- env = object()
514 client = Mock(spec=["is_jes_enabled"])
515 with patch("assess_network_health.configure_logging",
516 autospec=True) as mock_cl:

Subscribers

People subscribed via source and target branches