Merge lp:~sseman/juju-chaos-monkey/replay-commands into lp:juju-chaos-monkey

Proposed by Seman
Status: Merged
Merged at revision: 22
Proposed branch: lp:~sseman/juju-chaos-monkey/replay-commands
Merge into: lp:juju-chaos-monkey
Diff against target: 324 lines (+164/-29)
3 files modified
chaos_monkey.py (+3/-8)
runner.py (+60/-9)
tests/test_runner.py (+101/-12)
To merge this branch: bzr merge lp:~sseman/juju-chaos-monkey/replay-commands
Reviewer Review Type Date Requested Status
John George (community) Approve
Review via email: mp+261583@code.launchpad.net

Description of the change

This branch adds support to replay Chaos Monkey commands from a structured (YAML) data file. It handles a reboot requests as follows:
  - Write the remaining Chaos Monkey commands to a temporary file.
  - After a reboot, read the temporary file and continue the run.
  - Delete the temporary file.

Here is output from using the --replay option:

$ cat /home/ubuntu/cmd_list.log
- [deny-incoming, 2]
- [restart-unit, 2]
- [deny-all, 2]

$runner.py --replay /home/ubuntu/cmd_list.log /home/ubuntu/cm

$cat results.log
2015-06-10 01:02:41 INFO Chaos Monkey started in /home/ubuntu/cm
2015-06-10 01:02:41 INFO Replaying commands from /home/ubuntu/cm/cmd_list.log
2015-06-10 01:02:41 INFO Deny all incoming network traffic except ssh.
2015-06-10 01:02:45 INFO Restart the unit.
2015-06-10 01:02:45 INFO Init script generated:
 cmd: --replay /home/ubuntu/cm/cmd_list.log /home/ubuntu/cm
 expire_time: 1433898167.64
 runner_path: /home/ubuntu/cm/runner.py
2015-06-10 01:02:45 INFO Chaos Monkey stopped.

2015-06-10 01:03:08 INFO Chaos Monkey restarted after a reboot in /home/ubuntu/cm
2015-06-10 01:03:08 INFO Init script removed from /etc/init/chaos-monkey-restart.conf
2015-06-10 01:03:08 INFO Replaying commands from /home/ubuntu/cm/cmd_list.log
2015-06-10 01:03:08 INFO Deny all incoming and outgoing network traffic except ssh.
2015-06-10 01:03:15 INFO Chaos Monkey stopped.

To post a comment you must log in.
Revision history for this message
John George (jog) wrote :

This looks good, thanks for the example output in the description, it helps a lot while reviewing.

If you passed args returned from parse_args() to Runner.factory and setup the constructor to accept each value, with some minimal default, then all the instance data can be stored in one place. The function definitions can then be smaller. Rather than passing in args to each function, self can be referenced. Changing the data storage location is not required to land this branch but we've talked about a later branch that will change argument passing for the random_chaos() function. Please strongly consider this proposed change for that branch.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'chaos_monkey.py'
2--- chaos_monkey.py 2015-06-04 17:49:00 +0000
3+++ chaos_monkey.py 2015-06-10 01:20:20 +0000
4@@ -18,14 +18,6 @@
5 all_chaos, factory_obj = ChaosMonkey.get_all_chaos()
6 return cls([], factory_obj)
7
8- @property
9- def command_tag(self):
10- return ":CHAOS_CMD:"
11-
12- @property
13- def description_tag(self):
14- return ":CHAOS_DSCR:"
15-
16 @staticmethod
17 def get_all_chaos():
18 all_chaos = []
19@@ -85,3 +77,6 @@
20 if item.command_str == command_str:
21 return item
22 return None
23+
24+ def reset_command_selection(self):
25+ self.chaos = []
26
27=== modified file 'runner.py'
28--- runner.py 2015-06-08 23:51:53 +0000
29+++ runner.py 2015-06-10 01:20:20 +0000
30@@ -10,6 +10,8 @@
31 sleep
32 )
33
34+import yaml
35+
36 from chaos.kill import Kill
37 from chaos_monkey import ChaosMonkey
38 from utility import (
39@@ -35,6 +37,7 @@
40 self.chaos_monkey = chaos_monkey
41 self.expire_time = None
42 self.cmd_log_name = cmd_log_name
43+ self.replay_filename_ext = '.part'
44
45 @classmethod
46 def factory(cls, workspace, log_count=1, dry_run=False):
47@@ -131,6 +134,7 @@
48 include_command=None, exclude_command=None):
49 all_groups = ChaosMonkey.get_all_groups()
50 all_commands = ChaosMonkey.get_all_commands()
51+ self.chaos_monkey.reset_command_selection()
52
53 # If any groups and any commands are not included, assume the intent
54 # is to include all groups and all commands.
55@@ -151,6 +155,42 @@
56 exclude_command, all_commands)
57 self.chaos_monkey.exclude_command(exclude_command)
58
59+ def replay_commands(self, args):
60+ """Replay Chaos Monkey commands from a file."""
61+ commands = self.get_command_list(args)
62+ while commands:
63+ command = commands.pop()
64+ command_str = command[0]
65+ enablement_timeout = command[1]
66+ if command_str == Kill.restart_cmd and commands:
67+ # Save the commands to a temporary file before a reboot.
68+ self.save_command_list(commands, args)
69+ self.random_chaos(
70+ run_timeout=enablement_timeout,
71+ enablement_timeout=enablement_timeout,
72+ include_command=command_str)
73+ if command_str == Kill.restart_cmd:
74+ break
75+
76+ def get_command_list(self, args):
77+ """Get the command list from a file."""
78+ file_path = (args.replay + self.replay_filename_ext
79+ if args.restart else args.replay)
80+ with open(file_path) as f:
81+ commands = yaml.load(f.read())
82+ commands.reverse()
83+ # If it is a restart, remove the temporary file.
84+ if args.restart:
85+ os.remove(file_path)
86+ return commands
87+
88+ def save_command_list(self, commands, args):
89+ """Before a shutdown and restart request, this method is called
90+ to save the command list to a temporary file."""
91+ file_path = args.replay + self.replay_filename_ext
92+ with open(file_path, 'w') as f:
93+ f.write(yaml.dump(commands))
94+
95 @staticmethod
96 def _validate(sub_string, all_list):
97 sub_list = split_arg_string(sub_string)
98@@ -245,6 +285,9 @@
99 parser.add_argument(
100 '-ep', '--expire-time', type=float,
101 help='Chaos Monkey expire time (UNIX timestamp).', default=None)
102+ parser.add_argument(
103+ '-rp', '--replay', metavar='FULL-FILE-PATH',
104+ help='Replay Chaos Monkey commands from a file.', default=None)
105 args = parser.parse_args(argv)
106
107 if args.run_once and args.total_timeout:
108@@ -262,8 +305,12 @@
109 if args.enablement_timeout < 0:
110 parser.error("Invalid enablement-timeout value: timeout must be "
111 "zero or greater.")
112+ if args.replay and not os.path.isabs(args.replay):
113+ parser.error("Please provide an absolute file path to the replay "
114+ "argument: {}".format(args.replay))
115 return args
116
117+
118 if __name__ == '__main__':
119 args = parse_args()
120 runner = Runner.factory(workspace=args.path, log_count=args.log_count,
121@@ -278,15 +325,19 @@
122
123 runner.acquire_lock(restart=args.restart)
124 try:
125- runner.random_chaos(
126- run_timeout=args.total_timeout,
127- enablement_timeout=args.enablement_timeout,
128- include_group=args.include_group,
129- exclude_group=args.exclude_group,
130- include_command=args.include_command,
131- exclude_command=args.exclude_command,
132- run_once=args.run_once,
133- expire_time=args.expire_time)
134+ if args.replay:
135+ logging.info('Replaying commands from {}'.format(args.replay))
136+ runner.replay_commands(args=args)
137+ else:
138+ runner.random_chaos(
139+ run_timeout=args.total_timeout,
140+ enablement_timeout=args.enablement_timeout,
141+ include_group=args.include_group,
142+ exclude_group=args.exclude_group,
143+ include_command=args.include_command,
144+ exclude_command=args.exclude_command,
145+ run_once=args.run_once,
146+ expire_time=args.expire_time)
147 except Exception as e:
148 logging.error('{} ({})'.format(e, type(e).__name__))
149 sys.exit(1)
150
151=== modified file 'tests/test_runner.py'
152--- tests/test_runner.py 2015-06-08 23:51:53 +0000
153+++ tests/test_runner.py 2015-06-10 01:20:20 +0000
154@@ -4,9 +4,11 @@
155 import signal
156 import subprocess
157 from StringIO import StringIO
158+from tempfile import NamedTemporaryFile
159 from time import time
160
161 from mock import patch, call
162+import yaml
163
164 from chaos.kill import Kill
165 from chaos_monkey import ChaosMonkey
166@@ -530,7 +532,8 @@
167 total_timeout=10, log_count=2, include_group=None,
168 exclude_group=None, include_command=None,
169 exclude_command=None, dry_run=False,
170- run_once=False, restart=False, expire_time=None))
171+ run_once=False, restart=False, expire_time=None,
172+ replay=None))
173
174 def test_parse_args_non_default_values(self):
175 args = parse_args(['path',
176@@ -543,14 +546,16 @@
177 '--exclude-command', 'deny-incoming',
178 '--dry-run',
179 '--restart',
180- '--expire-time', '111.11'])
181+ '--expire-time', '111.11',
182+ '--replay', '/path/to/foo'])
183 self.assertEqual(
184 args, Namespace(path='path', enablement_timeout=30,
185 total_timeout=600, log_count=4,
186 include_group='net', exclude_group=Kill.group,
187 include_command='deny-all',
188 exclude_command='deny-incoming', dry_run=True,
189- run_once=False, restart=True, expire_time=111.11))
190+ run_once=False, restart=True, expire_time=111.11,
191+ replay='/path/to/foo'))
192
193 def test_parse_args_non_default_values_set_run_once(self):
194 args = parse_args(['path',
195@@ -568,7 +573,8 @@
196 include_group='net', exclude_group=Kill.group,
197 include_command='deny-all',
198 exclude_command='deny-incoming', dry_run=True,
199- run_once=True, restart=False, expire_time=None))
200+ run_once=True, restart=False, expire_time=None,
201+ replay=None))
202
203 def test_parse_args_error_enablement_greater_than_total_timeout(self):
204 with parse_error(self) as stderr:
205@@ -636,14 +642,7 @@
206 with temp_dir() as directory:
207 runner = Runner(directory, ChaosMonkey.factory())
208 runner._run_command(enablement_timeout=0)
209- self.assertEqual(mock.mock_calls, [
210- call(['ufw', 'deny', '37017']),
211- call(['ufw', 'allow', 'in', 'to', 'any']),
212- call(['ufw', '--force', 'enable']),
213- call(['ufw', 'disable']),
214- call(['ufw', 'delete', 'allow', 'in', 'to', 'any']),
215- call(['ufw', 'delete', 'deny', '37017']),
216- ])
217+ self.assertEqual(mock.mock_calls, self._deny_port_call_list())
218
219 def test_run_command_select_restart_unit(self):
220 chaos = self._get_chaos_object(Kill(), Kill.restart_cmd)
221@@ -657,6 +656,80 @@
222 self.assertEqual(mock.mock_calls, [call(['shutdown', '-r', 'now'])])
223 ri_mock.upstart.assert_called_once_with()
224
225+ def test_replay_commands(self):
226+ with patch('utility.check_output', autospec=True) as mock:
227+ with temp_dir() as directory:
228+ runner = Runner(directory, ChaosMonkey.factory())
229+ with NamedTemporaryFile() as temp_file:
230+ self._write_command_list_to_file(temp_file)
231+ args = Namespace(replay=temp_file.name, restart=False)
232+ runner.replay_commands(args)
233+ expected = self._deny_port_call_list()
234+ expected.extend(self._deny_port_call_list('17017'))
235+ self.assertEqual(mock.mock_calls, expected)
236+
237+ def test_replay_commands_with_restart_command(self):
238+ commands = "- [restart-unit, 1]\n- [deny-api-server, 1]\n"
239+ with patch('utility.check_output', autospec=True) as mock:
240+ with patch('runner.Init.install'):
241+ with temp_dir() as directory:
242+ runner = Runner(directory, ChaosMonkey.factory())
243+ with NamedTemporaryFile() as temp_file:
244+ self._write_command_list_to_file(
245+ temp_file, data=commands)
246+ args = Namespace(replay=temp_file.name, restart=False)
247+ runner.replay_commands(args)
248+ # Verify a temporary file is created because there
249+ # is a restart-unit command in the list.
250+ self.assertIs(os.path.isfile(
251+ temp_file.name + runner.replay_filename_ext), True)
252+ args = Namespace(replay=temp_file.name, restart=True)
253+ file_content = runner.get_command_list(args)
254+ # Verify the temporary files is deleted.
255+ self.assertIsNot(os.path.isfile(
256+ temp_file.name + runner.replay_filename_ext), True)
257+ self.assertEqual(mock.mock_calls, [call(['shutdown', '-r', 'now'])])
258+ self.assertEqual(file_content, [yaml.load(commands)[1]])
259+
260+ def test_replay_commands_after_reboot(self):
261+ with patch('utility.check_output', autospec=True) as mock:
262+ with temp_dir() as directory:
263+ runner = Runner(directory, ChaosMonkey.factory())
264+ temp_file = NamedTemporaryFile(
265+ suffix=runner.replay_filename_ext, delete=False)
266+ self._write_command_list_to_file(temp_file)
267+ args = Namespace(
268+ replay=temp_file.name.split('.')[0], restart=True)
269+ runner.replay_commands(args)
270+ # Verify replay_commands() has deleted the temp file.
271+ self.assertIsNot(os.path.isfile(temp_file.name), True)
272+ expected = self._deny_port_call_list()
273+ expected.extend(self._deny_port_call_list('17017'))
274+ self.assertEqual(mock.mock_calls, expected)
275+
276+ def test_get_command_list(self):
277+ with temp_dir() as directory:
278+ runner = Runner(directory, ChaosMonkey.factory())
279+ with NamedTemporaryFile() as temp_file:
280+ self._write_command_list_to_file(temp_file)
281+ args = Namespace(replay=temp_file.name, restart=False)
282+ commands = runner.get_command_list(args)
283+ expected = [['deny-state-server', 1], ['deny-api-server', 1]]
284+ self.assertItemsEqual(commands, expected)
285+
286+ def test_save_replay_command_list(self):
287+ commands = [['deny-state-server', 1], ['deny-api-server', 1]]
288+ with temp_dir() as directory:
289+ runner = Runner(directory, ChaosMonkey.factory())
290+ with NamedTemporaryFile(
291+ suffix=runner.replay_filename_ext) as temp_file:
292+ args = Namespace(replay=temp_file.name.split('.')[0],
293+ restart=False)
294+ runner.save_command_list(commands, args)
295+ file_content = temp_file.read()
296+ expected = yaml.dump(commands)
297+ self.assertItemsEqual(file_content, expected)
298+
299 def _get_chaos_object(self, obj, command_str):
300 for chaos in obj.get_chaos():
301 if chaos.command_str == command_str:
302@@ -665,6 +738,22 @@
303 self.fail("'{}' chaos not found".format(command_str))
304 return chaos
305
306+ def _write_command_list_to_file(self, fd, data=None):
307+ data = ("- [deny-state-server, 1]\n- [deny-api-server, 1]\n"
308+ if not data else data)
309+ fd.write(data)
310+ fd.flush()
311+ return data
312+
313+ def _deny_port_call_list(self, port='37017'):
314+ return [
315+ call(['ufw', 'deny', port]),
316+ call(['ufw', 'allow', 'in', 'to', 'any']),
317+ call(['ufw', '--force', 'enable']),
318+ call(['ufw', 'disable']),
319+ call(['ufw', 'delete', 'allow', 'in', 'to', 'any']),
320+ call(['ufw', 'delete', 'deny', port])]
321+
322
323 def add_fake_group(chaos_monkey):
324 chaos = Chaos(None, None, 'fake_group', 'fake_command_str', 'description')

Subscribers

People subscribed via source and target branches