Merge lp:~jtv/maas/bug-1059453 into lp:maas/trunk

Proposed by Jeroen T. Vermeulen on 2012-10-03
Status: Merged
Approved by: Gavin Panella on 2012-10-03
Approved revision: 1142
Merged at revision: 1147
Proposed branch: lp:~jtv/maas/bug-1059453
Merge into: lp:maas/trunk
Diff against target: 145 lines (+27/-40)
3 files modified
src/provisioningserver/start_cluster_controller.py (+11/-8)
src/provisioningserver/tests/test_start_cluster_controller.py (+15/-31)
src/provisioningserver/utils.py (+1/-1)
To merge this branch: bzr merge lp:~jtv/maas/bug-1059453
Reviewer Review Type Date Requested Status
Gavin Panella (community) 2012-10-03 Approve on 2012-10-03
Review via email: mp+127680@code.launchpad.net

Commit Message

Run celeryd synchronously, so Upstart doesn't need to track fork()s (many of which precede the operative one as maas-provision starts up).

Description of the Change

Discussed with Julian and many others. The "maas-provision start-cluster-controller" command will now keep running until either the cluster controller is rejected by an administrator, or celeryd exits for whatever reason.

Jeroen

To post a comment you must log in.
Gavin Panella (allenap) wrote :

[1]

+    return_code = check_call(command, env=env, preexec_fn=Become(uid, gid))
+    if return_code != 0:
+        raise SystemExit(return_code)

So, we've gone full-circle :) Which means we also get back to my
original comment: just exec here. It's absolutely pointless leaving an
idle Python interpreter around just to wait for a return code.

Also, check_call will raise a CalledProcessError if the process exits
with a non-zero code, so the conditional branch above will never be
taken.

review: Needs Fixing
Jeroen T. Vermeulen (jtv) wrote :

Changes made. I've got a final test run going that I fully expect to pass; I've set things up to commit and push after a successful run.

lp:~jtv/maas/bug-1059453 updated on 2012-10-03
1142. By Jeroen T. Vermeulen on 2012-10-03

Review changes: don't fork, just exec.

Gavin Panella (allenap) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/provisioningserver/start_cluster_controller.py'
2--- src/provisioningserver/start_cluster_controller.py 2012-10-02 10:20:17 +0000
3+++ src/provisioningserver/start_cluster_controller.py 2012-10-03 11:41:38 +0000
4@@ -137,12 +137,12 @@
5 '-Q', get_cluster_uuid(),
6 ]
7
8- pid = os.fork()
9- if pid == 0:
10- # Child process. Become the right user, and start celery.
11- os.setuid(uid)
12- os.setgid(gid)
13- os.execvpe(command[0], command, env)
14+ # Change gid first, just in case changing the uid might deprive
15+ # us of the privileges required to setgid.
16+ os.setgid(gid)
17+ os.setuid(uid)
18+
19+ os.execvpe(command[0], command, env=env)
20
21
22 def request_refresh(server_url):
23@@ -161,9 +161,12 @@
24 This starts up celeryd, listening to the broker that the region
25 controller pointed us to, and on the appropriate queue.
26 """
27+ # Get the region controller to send out credentials. If it arrives
28+ # before celeryd has started up, we should find the message waiting
29+ # in our queue. Even if we're new and the queue did not exist yet,
30+ # the arriving task will create the queue.
31+ request_refresh(server_url)
32 start_celery(connection_details, user=user, group=group)
33- sleep(10)
34- request_refresh(server_url)
35
36
37 def run(args):
38
39=== modified file 'src/provisioningserver/tests/test_start_cluster_controller.py'
40--- src/provisioningserver/tests/test_start_cluster_controller.py 2012-10-02 10:20:17 +0000
41+++ src/provisioningserver/tests/test_start_cluster_controller.py 2012-10-03 11:41:38 +0000
42@@ -87,10 +87,9 @@
43 self.patch(start_cluster_controller, 'getpwnam')
44 start_cluster_controller.getpwnam.pw_uid = randint(3000, 4000)
45 start_cluster_controller.getpwnam.pw_gid = randint(3000, 4000)
46- self.patch(os, 'fork').side_effect = Executing()
47- self.patch(os, 'execvpe').side_effect = Executing()
48 self.patch(os, 'setuid')
49 self.patch(os, 'setgid')
50+ self.patch(os, 'execvpe').side_effect = Executing()
51 get_uuid = self.patch(start_cluster_controller, 'get_cluster_uuid')
52 get_uuid.return_value = factory.getRandomUUID()
53
54@@ -133,36 +132,19 @@
55 """Prepare to return "request pending" from API request."""
56 self.prepare_response(httplib.ACCEPTED)
57
58- def pretend_to_fork_into_child(self):
59- """Make `fork` act as if it's returning into the child process.
60-
61- The start_cluster_controller child process then executes celeryd,
62- so this call also patches up the call that does that so it pretends
63- to be successful.
64- """
65- self.patch(os, 'fork').return_value = 0
66- self.patch(os, 'execvpe')
67-
68- def pretend_to_fork_into_parent(self):
69- """Make `fork` act as if it's returning into the parent process."""
70- self.patch(os, 'fork').return_value = randint(2, 65535)
71-
72 def test_run_command(self):
73 # We can't really run the script, but we can verify that (with
74 # the right system functions patched out) we can run it
75 # directly.
76- self.pretend_to_fork_into_child()
77- self.patch(start_cluster_controller, 'sleep')
78+ start_cluster_controller.sleep.side_effect = None
79 self.prepare_success_response()
80 parser = ArgumentParser()
81 start_cluster_controller.add_arguments(parser)
82- start_cluster_controller.run(parser.parse_args((make_url(),)))
83- self.assertEqual(1, os.fork.call_count)
84+ self.assertRaises(
85+ Executing,
86+ start_cluster_controller.run,
87+ parser.parse_args((make_url(),)))
88 self.assertEqual(1, os.execvpe.call_count)
89- os.setuid.assert_called_once_with(
90- start_cluster_controller.getpwnam.return_value.pw_uid)
91- os.setgid.assert_called_once_with(
92- start_cluster_controller.getpwnam.return_value.pw_gid)
93
94 def test_uses_given_url(self):
95 url = make_url('region')
96@@ -227,13 +209,14 @@
97 (server_url, connection_details))
98
99 def test_start_up_calls_refresh_secrets(self):
100+ start_cluster_controller.sleep.side_effect = None
101 url = make_url('region')
102 connection_details = self.make_connection_details()
103- self.pretend_to_fork_into_parent()
104- self.patch(start_cluster_controller, 'sleep')
105 self.prepare_success_response()
106
107- start_cluster_controller.start_up(
108+ self.assertRaises(
109+ Executing,
110+ start_cluster_controller.start_up,
111 url, connection_details,
112 factory.make_name('user'), factory.make_name('group'))
113
114@@ -246,13 +229,14 @@
115 self.assertEqual("refresh_workers", post["op"])
116
117 def test_start_up_ignores_failure_on_refresh_secrets(self):
118- self.pretend_to_fork_into_parent()
119- self.patch(start_cluster_controller, 'sleep')
120+ start_cluster_controller.sleep.side_effect = None
121 self.patch(MAASDispatcher, 'dispatch_query').side_effect = URLError(
122 "Simulated HTTP failure.")
123
124- start_cluster_controller.start_up(
125+ self.assertRaises(
126+ Executing,
127+ start_cluster_controller.start_up,
128 make_url(), self.make_connection_details(),
129 factory.make_name('user'), factory.make_name('group'))
130
131- self.assertEqual(1, os.fork.call_count)
132+ self.assertEqual(1, os.execvpe.call_count)
133
134=== modified file 'src/provisioningserver/utils.py'
135--- src/provisioningserver/utils.py 2012-09-25 05:21:24 +0000
136+++ src/provisioningserver/utils.py 2012-10-03 11:41:38 +0000
137@@ -376,7 +376,7 @@
138 try:
139 self.setup()
140 self.execute(argv)
141- except CalledProcessError, error:
142+ except CalledProcessError as error:
143 # Print error.cmd and error.output too?
144 raise SystemExit(error.returncode)
145 except KeyboardInterrupt: