Merge lp:~akopytov/percona-xtradb-cluster/bug1382797-5.5 into lp:percona-xtradb-cluster/5.5

Proposed by Alexey Kopytov
Status: Merged
Approved by: Raghavendra D Prabhu
Approved revision: no longer in the source branch.
Merged at revision: 835
Proposed branch: lp:~akopytov/percona-xtradb-cluster/bug1382797-5.5
Merge into: lp:percona-xtradb-cluster/5.5
Diff against target: 265 lines (+110/-100)
2 files modified
sql/wsrep_utils.cc (+96/-100)
storage/innobase/os/os0proc.c (+14/-0)
To merge this branch: bzr merge lp:~akopytov/percona-xtradb-cluster/bug1382797-5.5
Reviewer Review Type Date Requested Status
Alexey Kopytov (community) Needs Fixing
Raghavendra D Prabhu Pending
Review via email: mp+243696@code.launchpad.net

Description of the change

Bug #1382797: Modify SST code to use fork()/exec() to allow cleanup on
              fatal signals
Bug #1399175: Incorrect POSIX_SPAWN_SETSIGDEF usage in wsrep_utils.cc

Modified wsp::process() to use fork()/exec() instead of posix_spawn() to
have more control on the child process. More specifically:

- the child process now calls prctl() with appropriate arguments (if
  available) before calling exec() so that the SIGTERM signal is
  received by it in case the parent process (i.e. mysqld) is terminated
  either gracefully or ungracefully.

- the child process also call setsid() to create a new session and a
  process group. This is to simplify killing all spawned processes at
  once for child processes. Instead of keeping track of all processes
  started by an SST script and sending termination signals to the
  individually, an SST script can simply kill the entire process group.

Additionally, an optimization to speed up fork() has been implemented to
reduce page table copying overhead as compared to vfork() /
posix_spawn(). Since the InnoDB buffer pool is the major part of mysqld
RSS in most installations, use madvise(..., MADV_DONTFORK) to exclude
buffer pool from memory available to the child on fork().

This revision also fixes bug #1399175 as a side effect, since all code
related to the posix_spawn*() family of functions is replaced.

To post a comment you must log in.
Revision history for this message
Alexey Kopytov (akopytov) wrote :

Forgot to add signal signal-related code. Will repush.

review: Needs Fixing
Revision history for this message
Alexey Kopytov (akopytov) wrote :

Passes Jenkins now.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'sql/wsrep_utils.cc'
--- sql/wsrep_utils.cc 2013-11-19 07:24:09 +0000
+++ sql/wsrep_utils.cc 2014-12-08 16:17:50 +0000
@@ -25,7 +25,6 @@
2525
26#include <sql_class.h>26#include <sql_class.h>
2727
28#include <spawn.h> // posix_spawn()
29#include <unistd.h> // pipe()28#include <unistd.h> // pipe()
30#include <errno.h> // errno29#include <errno.h> // errno
31#include <string.h> // strerror()30#include <string.h> // strerror()
@@ -34,6 +33,10 @@
34#include <sys/socket.h>33#include <sys/socket.h>
35#include <netdb.h> // getaddrinfo()34#include <netdb.h> // getaddrinfo()
3635
36#ifdef HAVE_SYS_PRCTL_H
37#include <sys/prctl.h>
38#endif
39
37extern char** environ; // environment variables40extern char** environ; // environment variables
3841
39static wsp::string wsrep_PATH;42static wsp::string wsrep_PATH;
@@ -89,13 +92,12 @@
89#define STDIN_FD 092#define STDIN_FD 0
90#define STDOUT_FD 193#define STDOUT_FD 1
9194
92#ifndef POSIX_SPAWN_USEVFORK
93# define POSIX_SPAWN_USEVFORK 0
94#endif
95
96process::process (const char* cmd, const char* type)95process::process (const char* cmd, const char* type)
97 : str_(cmd ? strdup(cmd) : strdup("")), io_(NULL), err_(EINVAL), pid_(0)96 : str_(cmd ? strdup(cmd) : strdup("")), io_(NULL), err_(0), pid_(0)
98{97{
98 int sig;
99 struct sigaction sa;
100
99 if (0 == str_)101 if (0 == str_)
100 {102 {
101 WSREP_ERROR ("Can't allocate command line of size: %zu", strlen(cmd));103 WSREP_ERROR ("Can't allocate command line of size: %zu", strlen(cmd));
@@ -128,105 +130,99 @@
128 int const child_end (parent_end == PIPE_READ ? PIPE_WRITE : PIPE_READ);130 int const child_end (parent_end == PIPE_READ ? PIPE_WRITE : PIPE_READ);
129 int const close_fd (parent_end == PIPE_READ ? STDOUT_FD : STDIN_FD);131 int const close_fd (parent_end == PIPE_READ ? STDOUT_FD : STDIN_FD);
130132
131 char* const pargv[4] = { strdup("sh"), strdup("-c"), strdup(str_), NULL };133 pid_ = fork();
132 if (!(pargv[0] && pargv[1] && pargv[2]))134
133 {135 if (pid_ == -1)
134 err_ = ENOMEM;136 {
135 WSREP_ERROR ("Failed to allocate pargv[] array.");137 err_= errno;
136 goto cleanup_pipe;138 WSREP_ERROR ("fork() failed: %d (%s)", err_, strerror(err_));
137 }139 pid_ = 0;
138140 goto cleanup;
139 posix_spawnattr_t attr;141 }
140 err_ = posix_spawnattr_init (&attr);142 else if (pid_ > 0)
141 if (err_)143 {
142 {144 /* Parent */
143 WSREP_ERROR ("posix_spawnattr_init() failed: %d (%s)",145
144 err_, strerror(err_));146 io_ = fdopen (pipe_fds[parent_end], type);
145 goto cleanup_pipe;147
146 }148 if (io_)
147149 {
148 err_ = posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF |
149 POSIX_SPAWN_USEVFORK);
150 if (err_)
151 {
152 WSREP_ERROR ("posix_spawnattr_setflags() failed: %d (%s)",
153 err_, strerror(err_));
154 goto cleanup_attr;
155 }
156
157 posix_spawn_file_actions_t fact;
158 err_ = posix_spawn_file_actions_init (&fact);
159 if (err_)
160 {
161 WSREP_ERROR ("posix_spawn_file_actions_init() failed: %d (%s)",
162 err_, strerror(err_));
163 goto cleanup_attr;
164 }
165
166 // close child's stdout|stdin depending on what we returning
167 err_ = posix_spawn_file_actions_addclose (&fact, close_fd);
168 if (err_)
169 {
170 WSREP_ERROR ("posix_spawn_file_actions_addclose() failed: %d (%s)",
171 err_, strerror(err_));
172 goto cleanup_fact;
173 }
174
175 // substitute our pipe descriptor in place of the closed one
176 err_ = posix_spawn_file_actions_adddup2 (&fact,
177 pipe_fds[child_end], close_fd);
178 if (err_)
179 {
180 WSREP_ERROR ("posix_spawn_file_actions_addup2() failed: %d (%s)",
181 err_, strerror(err_));
182 goto cleanup_fact;
183 }
184
185 err_ = posix_spawnp (&pid_, pargv[0], &fact, &attr, pargv, environ);
186 if (err_)
187 {
188 WSREP_ERROR ("posix_spawnp(%s) failed: %d (%s)",
189 pargv[2], err_, strerror(err_));
190 pid_ = 0; // just to make sure it was not messed up in the call
191 goto cleanup_fact;
192 }
193
194 io_ = fdopen (pipe_fds[parent_end], type);
195
196 if (io_)
197 {
198 pipe_fds[parent_end] = -1; // skip close on cleanup150 pipe_fds[parent_end] = -1; // skip close on cleanup
199 }151 }
200 else152 else
201 {153 {
202 err_ = errno;154 err_ = errno;
203 WSREP_ERROR ("fdopen() failed: %d (%s)", err_, strerror(err_));155 WSREP_ERROR ("fdopen() failed: %d (%s)", err_, strerror(err_));
204 }156 }
205157
206cleanup_fact:158 goto cleanup;
207 int err; // to preserve err_ code159 }
208 err = posix_spawn_file_actions_destroy (&fact);160
209 if (err)161 /* Child */
210 {162
211 WSREP_ERROR ("posix_spawn_file_actions_destroy() failed: %d (%s)\n",163#ifdef PR_SET_PDEATHSIG
212 err, strerror(err));164 /*
213 }165 Ensure this process gets SIGTERM when the server is terminated for
214166 whatever reasons.
215cleanup_attr:167 */
216 err = posix_spawnattr_destroy (&attr);168
217 if (err)169 if (prctl(PR_SET_PDEATHSIG, SIGTERM))
218 {170 {
219 WSREP_ERROR ("posix_spawnattr_destroy() failed: %d (%s)",171 sql_perror("prctl() failed");
220 err, strerror(err));172 _exit(EXIT_FAILURE);
221 }173 }
222174#endif
223cleanup_pipe:175
176 /* Reset all ignored signals to SIG_DFL */
177
178 memset(&sa, 0, sizeof(sa));
179 sa.sa_handler = SIG_DFL;
180
181 for (sig = 1; sig < NSIG; sig++)
182 {
183 /*
184 For some signals sigaction() even when SIG_DFL handler is set, so don't
185 check for return value here.
186 */
187 sigaction(sig, &sa, NULL);
188 }
189
190 /*
191 Make the child process a session/group leader. This is required to
192 simplify cleanup procedure for SST process, so that all processes spawned
193 by the SST script could be killed by killing the entire process group.
194 */
195
196 if (setsid() < 0)
197 {
198 sql_perror("setsid() failed");
199 _exit(EXIT_FAILURE);
200 }
201
202 /* close child's stdout|stdin depending on what we returning */
203
204 if (close(close_fd) < 0)
205 {
206 sql_perror("close() failed");
207 _exit(EXIT_FAILURE);
208 }
209
210 /* substitute our pipe descriptor in place of the closed one */
211
212 if (dup2(pipe_fds[child_end], close_fd) < 0)
213 {
214 sql_perror("dup2() failed");
215 _exit(EXIT_FAILURE);
216 }
217
218 execlp("sh", "sh", "-c", str_, NULL);
219
220 sql_perror("execlp() failed");
221 _exit(EXIT_FAILURE);
222
223cleanup:
224 if (pipe_fds[0] >= 0) close (pipe_fds[0]);224 if (pipe_fds[0] >= 0) close (pipe_fds[0]);
225 if (pipe_fds[1] >= 0) close (pipe_fds[1]);225 if (pipe_fds[1] >= 0) close (pipe_fds[1]);
226
227 free (pargv[0]);
228 free (pargv[1]);
229 free (pargv[2]);
230}226}
231227
232process::~process ()228process::~process ()
233229
=== modified file 'storage/innobase/os/os0proc.c'
--- storage/innobase/os/os0proc.c 2013-08-13 11:26:01 +0000
+++ storage/innobase/os/os0proc.c 2014-12-08 16:17:50 +0000
@@ -38,6 +38,10 @@
38#include <sys/utsname.h> /* uname() */38#include <sys/utsname.h> /* uname() */
39#endif39#endif
4040
41#if defined(WITH_WSREP) && defined(UNIV_LINUX)
42#include <sys/mman.h> /* madvise() */
43#endif
44
41/* FreeBSD for example has only MAP_ANON, Linux has MAP_ANONYMOUS and45/* FreeBSD for example has only MAP_ANON, Linux has MAP_ANONYMOUS and
42MAP_ANON but MAP_ANON is marked as deprecated */46MAP_ANON but MAP_ANON is marked as deprecated */
43#if defined(MAP_ANONYMOUS)47#if defined(MAP_ANONYMOUS)
@@ -229,6 +233,16 @@
229 memset(ptr, '\0', size);233 memset(ptr, '\0', size);
230 }234 }
231235
236#if defined(WITH_WSREP) && defined(UNIV_LINUX)
237 /* Do not make the pages from this block available to the child after a
238 fork(). This is required to speed up process spawning for Galera SST. */
239
240 if (madvise(ptr, size, MADV_DONTFORK)) {
241 fprintf(stderr, "InnoDB: Warning: madvise(MADV_DONTFORK) is "
242 "not supported by the kernel. Spawning SST processes "
243 "can be slow.\n");
244 }
245#endif
232 return(ptr);246 return(ptr);
233}247}
234248

Subscribers

People subscribed via source and target branches