Merge lp:~laurynas-biveinis/percona-server/bug1012715-5.1 into lp:percona-server/5.1

Proposed by Laurynas Biveinis
Status: Superseded
Proposed branch: lp:~laurynas-biveinis/percona-server/bug1012715-5.1
Merge into: lp:percona-server/5.1
Diff against target: 281 lines (+186/-38)
5 files modified
Percona-Server/mysql-test/suite/rpl/r/rpl_percona_crash_resistant_rpl.result (+42/-0)
Percona-Server/mysql-test/suite/rpl/t/rpl_percona_crash_resistant_rpl-slave.opt (+1/-0)
Percona-Server/mysql-test/suite/rpl/t/rpl_percona_crash_resistant_rpl.test (+95/-0)
Percona-Server/storage/innodb_plugin/handler/ha_innodb.cc (+31/-20)
Percona-Server/storage/innodb_plugin/trx/trx0trx.c (+17/-18)
To merge this branch: bzr merge lp:~laurynas-biveinis/percona-server/bug1012715-5.1
Reviewer Review Type Date Requested Status
Laurynas Biveinis (community) Needs Fixing
Review via email: mp+112346@code.launchpad.net

This proposal has been superseded by a proposal from 2012-06-29.

Description of the change

Fix bug 1012715 (Crash resistant replication breaks with binlog XA
transaction recovery). The cause is that slave position is recorded
to InnoDB transaction system header at the XA COMMIT stage. If a
crash happens between XA PREPARE and COMMIT stages, the prepared
InnoDB transaction will not have the slave position recorded and thus
will fail to update it once it is replayed during binlog crash
recovery. The slave log position update is inherent part of changes
made by transaction, thus the correct place is the XA PREPARE stage.

- Store the slave log positions to the current InnoDB transaction in
  innobase_xa_prepare(), not innobase_commit_low().
- Write the slave log positions in the current InnoDB transaction to
  the transaction system header page in trx_prepare_off_kernel()
  instead of trx_commit_off_kernel().
- New crash injection site "crash_innodb_before_commit" at
  innobase_commit() that triggers just before the "real" transaction
  commits.
- New test case rpl_percona_crash_resistant_rpl to test both this bug
  fix and the feature in general.

Jenkins: http://jenkins.percona.com/job/percona-server-5.1-param/333/

Issue #22478

To post a comment you must log in.
Revision history for this message
Laurynas Biveinis (laurynas-biveinis) wrote :

Found one issue myself.

The testcase has one sync bug: lines 88--91 do not sync slave with master first, thus might result in slave shutdown while slave SQL thread is still executing.

review: Needs Fixing

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'Percona-Server/mysql-test/suite/rpl/r/rpl_percona_crash_resistant_rpl.result'
2--- Percona-Server/mysql-test/suite/rpl/r/rpl_percona_crash_resistant_rpl.result 1970-01-01 00:00:00 +0000
3+++ Percona-Server/mysql-test/suite/rpl/r/rpl_percona_crash_resistant_rpl.result 2012-06-28 14:03:20 +0000
4@@ -0,0 +1,42 @@
5+include/master-slave.inc
6+[connection master]
7+DROP TABLE IF EXISTS t1;
8+CREATE TABLE t1 (id INT(11) NOT NULL AUTO_INCREMENT, PRIMARY KEY(id)) ENGINE=InnoDB;
9+INSERT INTO t1 VALUES ();
10+SELECT COUNT(*) FROM t1;
11+COUNT(*)
12+1
13+include/rpl_restart_server.inc [server_number=2]
14+include/start_slave.inc
15+SELECT COUNT(*) FROM t1;
16+COUNT(*)
17+1
18+STOP SLAVE;
19+include/wait_for_slave_to_stop.inc
20+INSERT INTO t1 VALUES();
21+SELECT COUNT(*) FROM t1;
22+COUNT(*)
23+2
24+SET GLOBAL debug="d,crash_commit_before";
25+START SLAVE;
26+include/rpl_start_server.inc [server_number=2]
27+include/start_slave.inc
28+SELECT COUNT(*) FROM t1;
29+COUNT(*)
30+2
31+STOP SLAVE;
32+include/wait_for_slave_to_stop.inc
33+INSERT INTO t1 VALUES();
34+SELECT COUNT(*) FROM t1;
35+COUNT(*)
36+3
37+SET GLOBAL debug="d,crash_innodb_before_commit";
38+START SLAVE;
39+include/rpl_start_server.inc [server_number=2]
40+include/start_slave.inc
41+SELECT COUNT(*) FROM t1;
42+COUNT(*)
43+3
44+DROP TABLE t1;
45+STOP SLAVE;
46+include/wait_for_slave_to_stop.inc
47
48=== added file 'Percona-Server/mysql-test/suite/rpl/t/rpl_percona_crash_resistant_rpl-slave.opt'
49--- Percona-Server/mysql-test/suite/rpl/t/rpl_percona_crash_resistant_rpl-slave.opt 1970-01-01 00:00:00 +0000
50+++ Percona-Server/mysql-test/suite/rpl/t/rpl_percona_crash_resistant_rpl-slave.opt 2012-06-28 14:03:20 +0000
51@@ -0,0 +1,1 @@
52+--innodb-overwrite-relay-log-info=TRUE --skip-core-file --skip-stack-trace
53
54=== added file 'Percona-Server/mysql-test/suite/rpl/t/rpl_percona_crash_resistant_rpl.test'
55--- Percona-Server/mysql-test/suite/rpl/t/rpl_percona_crash_resistant_rpl.test 1970-01-01 00:00:00 +0000
56+++ Percona-Server/mysql-test/suite/rpl/t/rpl_percona_crash_resistant_rpl.test 2012-06-28 14:03:20 +0000
57@@ -0,0 +1,95 @@
58+# Tests for Percona crash-resistant replication feature
59+--source include/have_innodb_plugin.inc
60+--source include/master-slave.inc
61+--source include/not_valgrind.inc
62+--source include/not_crashrep.inc
63+--source include/have_debug.inc
64+
65+#
66+# Setup
67+#
68+
69+--disable_query_log
70+call mtr.add_suppression("InnoDB: Warning: innodb_overwrite_relay_log_info is enabled.");
71+--enable_query_log
72+
73+connection master;
74+
75+--disable_warnings
76+DROP TABLE IF EXISTS t1;
77+--enable_warnings
78+
79+CREATE TABLE t1 (id INT(11) NOT NULL AUTO_INCREMENT, PRIMARY KEY(id)) ENGINE=InnoDB;
80+
81+#
82+# Test the non-crashing case
83+#
84+
85+INSERT INTO t1 VALUES ();
86+SELECT COUNT(*) FROM t1;
87+
88+sync_slave_with_master;
89+--let $rpl_server_number= 2
90+--source include/rpl_restart_server.inc
91+--source include/start_slave.inc
92+SELECT COUNT(*) FROM t1;
93+
94+#
95+# Test the crashing case where relay-log.info needs not to be overwritten
96+#
97+
98+STOP SLAVE;
99+--source include/wait_for_slave_to_stop.inc
100+
101+connection master;
102+INSERT INTO t1 VALUES();
103+SELECT COUNT(*) FROM t1;
104+
105+connection slave;
106+SET GLOBAL debug="d,crash_commit_before";
107+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
108+--error 0,2013
109+START SLAVE;
110+--source include/wait_until_disconnected.inc
111+--enable_reconnect
112+
113+--let $rpl_server_number= 2
114+--source include/rpl_start_server.inc
115+--source include/start_slave.inc
116+connection master;
117+sync_slave_with_master;
118+SELECT COUNT(*) FROM t1;
119+
120+#
121+# Test crash with XA transaction recovery (bug 1012715)
122+#
123+STOP SLAVE;
124+--source include/wait_for_slave_to_stop.inc
125+connection master;
126+INSERT INTO t1 VALUES();
127+SELECT COUNT(*) FROM t1;
128+
129+connection slave;
130+SET GLOBAL debug="d,crash_innodb_before_commit";
131+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
132+--error 0,2013
133+START SLAVE;
134+--source include/wait_until_disconnected.inc
135+--enable_reconnect
136+
137+--let $rpl_server_number= 2
138+--source include/rpl_start_server.inc
139+--source include/start_slave.inc
140+SELECT COUNT(*) FROM t1;
141+
142+#
143+# Cleanup
144+#
145+
146+connection master;
147+DROP TABLE t1;
148+
149+sync_slave_with_master;
150+
151+STOP SLAVE;
152+--source include/wait_for_slave_to_stop.inc
153
154=== modified file 'Percona-Server/storage/innodb_plugin/handler/ha_innodb.cc'
155--- Percona-Server/storage/innodb_plugin/handler/ha_innodb.cc 2012-05-09 04:14:12 +0000
156+++ Percona-Server/storage/innodb_plugin/handler/ha_innodb.cc 2012-06-28 14:03:20 +0000
157@@ -2770,26 +2770,6 @@
158 return;
159 }
160
161-#ifdef HAVE_REPLICATION
162-#ifdef MYSQL_SERVER
163- THD *thd=current_thd;
164-
165- if (thd && thd->slave_thread) {
166- /* Update the replication position info inside InnoDB */
167- trx->mysql_master_log_file_name
168- = active_mi->rli.group_master_log_name;
169- trx->mysql_master_log_pos
170- = ((ib_int64_t)active_mi->rli.group_master_log_pos +
171- ((ib_int64_t)active_mi->rli.future_event_relay_log_pos -
172- (ib_int64_t)active_mi->rli.group_relay_log_pos));
173- trx->mysql_relay_log_file_name
174- = active_mi->rli.group_relay_log_name;
175- trx->mysql_relay_log_pos
176- = (ib_int64_t)active_mi->rli.future_event_relay_log_pos;
177- }
178-#endif /* MYSQL_SERVER */
179-#endif /* HAVE_REPLICATION */
180-
181 trx_commit_for_mysql(trx);
182 }
183
184@@ -2898,6 +2878,9 @@
185 if (all
186 || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
187
188+ DBUG_EXECUTE_IF("crash_innodb_before_commit",
189+ DBUG_SUICIDE(););
190+
191 /* We were instructed to commit the whole transaction, or
192 this is an SQL statement end and autocommit is on */
193
194@@ -10657,6 +10640,34 @@
195
196 ut_ad(trx->active_trans);
197
198+ /* Update the replication position info inside InnoDB. This is
199+ different from the binlog position update that happens during
200+ XA COMMIT. In contrast to that, the slave position is an
201+ actual part of the changes made by this transaction and thus
202+ must be updated in the XA PREPARE stage. */
203+
204+ /* Since currently there might be only one slave SQL thread, we
205+ don't need to take any precautions (e.g. prepare_commit_mutex)
206+ to ensure position ordering. Revisit this in 5.6 which has
207+ both the multi-threaded replication to cause us problems and
208+ the group commit to solve them. */
209+
210+ if (thd->slave_thread) {
211+ const Relay_log_info* rli = &active_mi->rli;
212+
213+ trx->mysql_master_log_file_name
214+ = rli->group_master_log_name;
215+ trx->mysql_master_log_pos
216+ = ((ib_int64_t)rli->group_master_log_pos
217+ + ((ib_int64_t)
218+ rli->future_event_relay_log_pos
219+ - (ib_int64_t)rli->group_relay_log_pos));
220+ trx->mysql_relay_log_file_name
221+ = rli->group_relay_log_name;
222+ trx->mysql_relay_log_pos
223+ = (ib_int64_t)rli->future_event_relay_log_pos;
224+ }
225+
226 error = (int) trx_prepare_for_mysql(trx);
227 } else {
228 /* We just mark the SQL statement ended and do not do a
229
230=== modified file 'Percona-Server/storage/innodb_plugin/trx/trx0trx.c'
231--- Percona-Server/storage/innodb_plugin/trx/trx0trx.c 2012-04-02 02:09:15 +0000
232+++ Percona-Server/storage/innodb_plugin/trx/trx0trx.c 2012-06-28 14:03:20 +0000
233@@ -895,24 +895,6 @@
234 trx->mysql_log_file_name = NULL;
235 }
236
237- if (trx->mysql_master_log_file_name[0] != '\0') {
238- /* This database server is a MySQL replication slave */
239- if (!sys_header) {
240- sys_header = trx_sysf_get(&mtr);
241- }
242- trx_sys_update_mysql_binlog_offset(
243- sys_header,
244- trx->mysql_relay_log_file_name,
245- trx->mysql_relay_log_pos,
246- TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr);
247- trx_sys_update_mysql_binlog_offset(
248- sys_header,
249- trx->mysql_master_log_file_name,
250- trx->mysql_master_log_pos,
251- TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr);
252- trx->mysql_master_log_file_name = "";
253- }
254-
255 /* The following call commits the mini-transaction, making the
256 whole transaction committed in the file-based world, at this
257 log sequence number. The transaction becomes 'durable' when
258@@ -2002,6 +1984,23 @@
259
260 mutex_exit(&(rseg->mutex));
261
262+ if (trx->mysql_master_log_file_name[0] != '\0') {
263+ /* This database server is a MySQL replication slave */
264+ trx_sysf_t* sys_header = trx_sysf_get(&mtr);
265+
266+ trx_sys_update_mysql_binlog_offset(
267+ sys_header,
268+ trx->mysql_relay_log_file_name,
269+ trx->mysql_relay_log_pos,
270+ TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr);
271+ trx_sys_update_mysql_binlog_offset(
272+ sys_header,
273+ trx->mysql_master_log_file_name,
274+ trx->mysql_master_log_pos,
275+ TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr);
276+ trx->mysql_master_log_file_name = "";
277+ }
278+
279 /*--------------*/
280 mtr_commit(&mtr); /* This mtr commit makes the
281 transaction prepared in the file-based

Subscribers

People subscribed via source and target branches