Merge lp:~percona-toolkit-dev/percona-toolkit/pt-osc-retry-timeouts into lp:percona-toolkit/2.2

Proposed by Daniel Nichter
Status: Merged
Merged at revision: 547
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-osc-retry-timeouts
Merge into: lp:percona-toolkit/2.2
Diff against target: 560 lines (+190/-80)
6 files modified
bin/pt-online-schema-change (+166/-78)
lib/Retry.pm (+2/-0)
t/pt-online-schema-change/metadata_locks.t (+4/-2)
t/pt-online-schema-change/samples/stats-dry-run.txt (+6/-0)
t/pt-online-schema-change/samples/stats-execute-5.5.txt (+6/-0)
t/pt-online-schema-change/samples/stats-execute.txt (+6/-0)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-osc-retry-timeouts
Reviewer Review Type Date Requested Status
Daniel Nichter Approve
Review via email: mp+151854@code.launchpad.net
To post a comment you must log in.
550. By Daniel Nichter

Update sample file with --tries output.

Revision history for this message
Daniel Nichter (daniel-nichter) wrote :

Tool's tests pass in all envs.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bin/pt-online-schema-change'
2--- bin/pt-online-schema-change 2013-03-04 18:08:22 +0000
3+++ bin/pt-online-schema-change 2013-03-05 23:44:21 +0000
4@@ -3578,6 +3578,8 @@
5 use English qw(-no_match_vars);
6 use constant PTDEBUG => $ENV{PTDEBUG} || 0;
7
8+use Time::HiRes qw(sleep);
9+
10 sub new {
11 my ( $class, %args ) = @_;
12 my $self = {
13@@ -7617,6 +7619,13 @@
14 . $n_chunk_index_cols);
15 }
16
17+ my $tries = eval {
18+ validate_tries($o);
19+ };
20+ if ( $EVAL_ERROR ) {
21+ $o->save_error($EVAL_ERROR);
22+ }
23+
24 if ( !$o->get('help') ) {
25 if ( @ARGV ) {
26 $o->save_error('Specify only one DSN on the command line');
27@@ -7974,6 +7983,17 @@
28 );
29
30 # ########################################################################
31+ # Print --tries.
32+ # ########################################################################
33+ print "Operation, tries, wait:\n";
34+ {
35+ my $fmt = " %s, %s, %s\n";
36+ foreach my $op ( sort keys %$tries ) {
37+ printf $fmt, $op, $tries->{$op}->{tries}, $tries->{$op}->{wait};
38+ }
39+ }
40+
41+ # ########################################################################
42 # Get child tables of the original table, if necessary.
43 # ########################################################################
44 my $child_tables;
45@@ -8427,7 +8447,7 @@
46 Quoter => $q,
47 OptionParser => $o,
48 Retry => $retry,
49- retries => $o->get('retries'),
50+ tries => $tries,
51 stats => \%stats,
52 );
53 }
54@@ -8453,7 +8473,7 @@
55 Quoter => $q,
56 OptionParser => $o,
57 Retry => $retry,
58- retries => $o->get('retries'),
59+ tries => $tries,
60 stats => \%stats,
61 );
62 };
63@@ -8642,7 +8662,7 @@
64 # Exec and time the chunk checksum query.
65 $tbl->{nibble_time} = exec_nibble(
66 %args,
67- retries => $o->get('retries'),
68+ tries => $tries,
69 Retry => $retry,
70 Quoter => $q,
71 stats => \%stats,
72@@ -8852,7 +8872,7 @@
73 Quoter => $q,
74 OptionParser => $o,
75 Retry => $retry,
76- retries => $o->get('retries'),
77+ tries => $tries,
78 stats => \%stats,
79 );
80 };
81@@ -8900,7 +8920,7 @@
82 TableParser => $tp,
83 stats => \%stats,
84 Retry => $retry,
85- retries => $o->get('retries'),
86+ tries => $tries,
87 );
88 }
89 elsif ( $alter_fk_method eq 'drop_swap' ) {
90@@ -8911,7 +8931,7 @@
91 OptionParser => $o,
92 stats => \%stats,
93 Retry => $retry,
94- retries => $o->get('retries'),
95+ tries => $tries,
96 );
97 }
98 elsif ( !$alter_fk_method
99@@ -9003,6 +9023,56 @@
100 # Subroutines.
101 # ############################################################################
102
103+sub validate_tries {
104+ my ($o) = @_;
105+ my @ops = qw(
106+ create_triggers
107+ drop_triggers
108+ copy_rows
109+ swap_tables
110+ update_foreign_keys
111+ );
112+ my %user_tries;
113+ my $user_tries = $o->get('tries');
114+ if ( $user_tries ) {
115+ foreach my $var_val ( @$user_tries ) {
116+ my ($op, $tries, $wait) = split(':', $var_val);
117+ die "Invalid --tries value: $var_val\n" unless $op && $tries && $wait;
118+ die "Invalid --tries operation: $op\n" unless grep { $op eq $_ } @ops;
119+ die "Invalid --tries tries: $tries\n" unless $tries > 0;
120+ die "Invalid --tries wait: $wait\n" unless $wait > 0;
121+ $user_tries{$op} = {
122+ tries => $tries,
123+ wait => $wait,
124+ };
125+ }
126+ }
127+
128+ my %default_tries;
129+ my $default_tries = $o->read_para_after(__FILE__, qr/MAGIC_tries/);
130+ if ( $default_tries ) {
131+ %default_tries = map {
132+ my $var_val = $_;
133+ my ($op, $tries, $wait) = $var_val =~ m/(\S+)/g;
134+ die "Invalid --tries value: $var_val\n" unless $op && $tries && $wait;
135+ die "Invalid --tries operation: $op\n" unless grep { $op eq $_ } @ops;
136+ die "Invalid --tries tries: $tries\n" unless $tries > 0;
137+ die "Invalid --tries wait: $wait\n" unless $wait > 0;
138+ $op => {
139+ tries => $tries,
140+ wait => $wait,
141+ };
142+ } grep { m/^\s+\w+\s+\d+\s+[\d\.]+/ } split("\n", $default_tries);
143+ }
144+
145+ my %tries = (
146+ %default_tries, # first the tool's defaults
147+ %user_tries, # then the user's which overwrite the defaults
148+ );
149+ PTDEBUG && _d('--tries:', Dumper(\%tries));
150+ return \%tries;
151+}
152+
153 sub check_alter {
154 my (%args) = @_;
155 my @required_args = qw(alter tbl dry_run Cxn TableParser);
156@@ -9300,15 +9370,15 @@
157
158 sub swap_tables {
159 my (%args) = @_;
160- my @required_args = qw(orig_tbl new_tbl Cxn Quoter OptionParser Retry retries stats);
161+ my @required_args = qw(orig_tbl new_tbl Cxn Quoter OptionParser Retry tries stats);
162 foreach my $arg ( @required_args ) {
163 die "I need a $arg argument" unless $args{$arg};
164 }
165- my ($orig_tbl, $new_tbl, $cxn, $q, $o, $retry, $retries, $stats) = @args{@required_args};
166+ my ($orig_tbl, $new_tbl, $cxn, $q, $o, $retry, $tries, $stats) = @args{@required_args};
167
168 my $prefix = '_';
169 my $table_name = $orig_tbl->{tbl} . ($args{suffix} || '');
170- my $tries = 10; # don't try forever
171+ my $name_tries = 10; # don't try forever
172 my $table_exists = qr/table.+?already exists/i;
173
174 # This sub only works for --execute. Since the options are
175@@ -9326,7 +9396,7 @@
176 elsif ( $o->get('execute') ) {
177 print "Swapping tables...\n";
178
179- while ( $tries-- ) {
180+ while ( $name_tries-- ) {
181 $table_name = $prefix . $table_name;
182
183 if ( length($table_name) > 64 ) {
184@@ -9344,7 +9414,7 @@
185 osc_retry(
186 Cxn => $cxn,
187 Retry => $retry,
188- retries => $retries,
189+ tries => $tries->{swap_tables},
190 stats => $stats,
191 code => sub {
192 PTDEBUG && _d($sql);
193@@ -9380,7 +9450,7 @@
194
195 # This shouldn't happen.
196 # Here and in the attempt to find a new table name we probably ought to
197- # use --retries (and maybe a Retry object?)
198+ # use --tries (and maybe a Retry object?)
199 die "Failed to find a unique old table name after serveral attempts.\n";
200 }
201 }
202@@ -9530,11 +9600,11 @@
203 my ( %args ) = @_;
204 my @required_args = qw(orig_tbl old_tbl child_tables stats
205 Cxn Quoter OptionParser TableParser
206- Retry retries);
207+ Retry tries);
208 foreach my $arg ( @required_args ) {
209 die "I need a $arg argument" unless $args{$arg};
210 }
211- my ($orig_tbl, $old_tbl, $child_tables, $stats, $cxn, $q, $o, $tp, $retry, $retries)
212+ my ($orig_tbl, $old_tbl, $child_tables, $stats, $cxn, $q, $o, $tp, $retry, $tries)
213 = @args{@required_args};
214
215 # MySQL has a "feature" where if the parent tbl is in the same db,
216@@ -9612,7 +9682,7 @@
217 osc_retry(
218 Cxn => $cxn,
219 Retry => $retry,
220- retries => $retries,
221+ tries => $tries->{update_foreign_keys},
222 stats => $stats,
223 code => sub {
224 PTDEBUG && _d($sql);
225@@ -9632,11 +9702,11 @@
226
227 sub drop_swap {
228 my ( %args ) = @_;
229- my @required_args = qw(orig_tbl new_tbl Cxn OptionParser stats Retry retries);
230+ my @required_args = qw(orig_tbl new_tbl Cxn OptionParser stats Retry tries);
231 foreach my $arg ( @required_args ) {
232 die "I need a $arg argument" unless $args{$arg};
233 }
234- my ($orig_tbl, $new_tbl, $cxn, $o, $stats, $retry, $retries) = @args{@required_args};
235+ my ($orig_tbl, $new_tbl, $cxn, $o, $stats, $retry, $tries) = @args{@required_args};
236
237 if ( $o->get('dry-run') ) {
238 print "Not drop-swapping tables because this is a dry run.\n";
239@@ -9658,7 +9728,7 @@
240 osc_retry(
241 Cxn => $cxn,
242 Retry => $retry,
243- retries => $retries,
244+ tries => $tries->{update_foreign_keys},
245 stats => $stats,
246 code => sub {
247 PTDEBUG && _d($sql);
248@@ -9677,11 +9747,11 @@
249
250 sub create_triggers {
251 my ( %args ) = @_;
252- my @required_args = qw(orig_tbl new_tbl del_tbl columns Cxn Quoter OptionParser Retry retries stats);
253+ my @required_args = qw(orig_tbl new_tbl del_tbl columns Cxn Quoter OptionParser Retry tries stats);
254 foreach my $arg ( @required_args ) {
255 die "I need a $arg argument" unless $args{$arg};
256 }
257- my ($orig_tbl, $new_tbl, $del_tbl, $cols, $cxn, $q, $o, $retry, $retries, $stats) = @args{@required_args};
258+ my ($orig_tbl, $new_tbl, $del_tbl, $cols, $cxn, $q, $o, $retry, $tries, $stats) = @args{@required_args};
259
260 # This sub works for --dry-run and --execute. With --dry-run it's
261 # only interesting if --print is specified, too; then the user can
262@@ -9752,7 +9822,7 @@
263 osc_retry(
264 Cxn => $cxn,
265 Retry => $retry,
266- retries => $retries,
267+ tries => $tries->{create_triggers},
268 stats => $stats,
269 code => sub {
270 PTDEBUG && _d($sql);
271@@ -9778,11 +9848,11 @@
272
273 sub drop_triggers {
274 my ( %args ) = @_;
275- my @required_args = qw(tbl Cxn Quoter OptionParser Retry retries stats);
276+ my @required_args = qw(tbl Cxn Quoter OptionParser Retry tries stats);
277 foreach my $arg ( @required_args ) {
278 die "I need a $arg argument" unless $args{$arg};
279 }
280- my ($tbl, $cxn, $q, $o, $retry, $retries, $stats) = @args{@required_args};
281+ my ($tbl, $cxn, $q, $o, $retry, $tries, $stats) = @args{@required_args};
282
283 # This sub works for --dry-run and --execute, although --dry-run is
284 # only interesting with --print so the user can see the drop trigger
285@@ -9802,7 +9872,7 @@
286 osc_retry(
287 Cxn => $cxn,
288 Retry => $retry,
289- retries => $retries,
290+ tries => $tries->{drop_triggers},
291 stats => $stats,
292 code => sub {
293 PTDEBUG && _d($sql);
294@@ -9846,20 +9916,20 @@
295
296 sub osc_retry {
297 my (%args) = @_;
298- my @required_args = qw(Cxn Retry retries code stats);
299+ my @required_args = qw(Cxn Retry tries code stats);
300 foreach my $arg ( @required_args ) {
301 die "I need a $arg argument" unless $args{$arg};
302 }
303 my $cxn = $args{Cxn};
304 my $retry = $args{Retry};
305- my $retries = $args{retries};
306+ my $tries = $args{tries};
307 my $code = $args{code};
308 my $stats = $args{stats};
309 my $ignore_errors = $args{ignore_errors};
310
311 return $retry->retry(
312- tries => $retries,
313- wait => sub { sleep 0.25; return; },
314+ tries => $tries->{tries},
315+ wait => sub { sleep ($tries->{wait} || 0.25) },
316 try => $code,
317 fail => sub {
318 my (%args) = @_;
319@@ -9911,11 +9981,11 @@
320
321 sub exec_nibble {
322 my (%args) = @_;
323- my @required_args = qw(Cxn tbl stats retries Retry NibbleIterator Quoter);
324+ my @required_args = qw(Cxn tbl stats tries Retry NibbleIterator Quoter);
325 foreach my $arg ( @required_args ) {
326 die "I need a $arg argument" unless $args{$arg};
327 }
328- my ($cxn, $tbl, $stats, $retries, $retry, $nibble_iter, $q)
329+ my ($cxn, $tbl, $stats, $tries, $retry, $nibble_iter, $q)
330 = @args{@required_args};
331
332 my $sth = $nibble_iter->statements();
333@@ -9952,7 +10022,7 @@
334 return osc_retry(
335 Cxn => $cxn,
336 Retry => $retry,
337- retries => $retries,
338+ tries => $tries->{copy_rows},
339 stats => $stats,
340 code => sub {
341 # ###################################################################
342@@ -10396,13 +10466,6 @@
343 NAMES UTF8 after connecting to MySQL. Any other value sets binmode on STDOUT
344 without the utf8 layer, and runs SET NAMES after connecting to MySQL.
345
346-=item --check-interval
347-
348-type: time; default: 1
349-
350-Sleep time between checks for L<"--max-lag">.
351-
352-
353 =item --[no]check-alter
354
355 default: yes
356@@ -10434,6 +10497,12 @@
357
358 =back
359
360+=item --check-interval
361+
362+type: time; default: 1
363+
364+Sleep time between checks for L<"--max-lag">.
365+
366 =item --[no]check-plan
367
368 default: yes
369@@ -10814,46 +10883,6 @@
370 table. Currently, the DSNs are ordered by id, but id and parent_id are otherwise
371 ignored.
372
373-=item --retries
374-
375-type: int; default: 10
376-
377-Retry critical operations and recover from non-fatal errors. The tool
378-retries these operations:
379-
380- Creating triggers
381- Dropping triggers
382- Copying chunks
383- Swapping tables
384- Rebuilding foreign key constraints
385-
386-For creating and dropping triggers, the number of retries applies to each
387-C<CREATE TRIGGER> and C<DROP TRIGGER> statement for each trigger.
388-For copying chunks, the number of retries applies to each chunk, not the
389-entire table. For swapping tables, the number of retries usually applies
390-once because there is usually only one C<RENAME TABLE> statement.
391-For rebuilding foreign key constraints, the number of retries applies to
392-each statement (C<ALTER> statements for the C<rebuild_constraints>
393-L<"--alter-foreign-keys-method">; other statements for the C<drop_swap>
394-method).
395-
396-The tool retries each operation if these errors occur:
397-
398- Lock wait timeout (innodb_lock_wait_timeout and lock_wait_timeout)
399- Deadlock found
400- Query is killed (KILL QUERY <thread_id>)
401- Connection is killed (KILL CONNECTION <thread_id>)
402- Lost connection to MySQL
403-
404-In the case of lost and killed connections, the tool will automatically
405-reconnect.
406-
407-To alter extremely busy tables, it may be necessary to increase L<"--retries">,
408-and also C<innodb_lock_wait_timeout> and (for MySQL 5.5 and newer)
409-C<lock_wait_timeout> by specifying higher values with L<"--set-vars">.
410-
411-Failures and retries are recorded in the L<"--statistics">.
412-
413 =item --set-vars
414
415 type: Array
416@@ -10895,6 +10924,65 @@
417 place of the original table. The original table becomes the "old table," and
418 the tool drops it unless you disable L<"--[no]drop-old-table">.
419
420+=item --tries
421+
422+type: array
423+
424+How many times to try critical operations. If certain operations fail due
425+to non-fatal, recoverable errors, the tool waits and tries the operation
426+again. These are the operations that are retried, with their default number
427+of tries and wait time between tries (in seconds):
428+
429+=for comment ignore-pt-internal-value
430+MAGIC_tries
431+
432+ OPERATION TRIES WAIT
433+ =================== ===== ====
434+ create_triggers 10 1
435+ drop_triggers 10 1
436+ copy_rows 10 0.25
437+ swap_tables 10 1
438+ update_foreign_keys 10 1
439+
440+To change the defaults, specify the new values like:
441+
442+ --tries create_triggers:5:0.5,drop_triggers:5:0.5
443+
444+That makes the tool try C<create_triggers> and C<drop_triggers> 2 times
445+with a 0.5 second wait between tries. So the format is:
446+
447+ operation:tries:wait[,operation:tries:wait]
448+
449+All three values must be specified.
450+
451+Note that most operations are affected only in MySQL 5.5 and newer by
452+C<lock_wait_timeout> (see L<"--set-vars">) because of metadata locks.
453+The C<copy_rows> operation is affected in any version of MySQL by
454+C<innodb_lock_wait_timeout>.
455+
456+For creating and dropping triggers, the number of tries applies to each
457+C<CREATE TRIGGER> and C<DROP TRIGGER> statement for each trigger.
458+For copying rows, the number of tries applies to each chunk, not the
459+entire table. For swapping tables, the number of tries usually applies
460+once because there is usually only one C<RENAME TABLE> statement.
461+For rebuilding foreign key constraints, the number of tries applies to
462+each statement (C<ALTER> statements for the C<rebuild_constraints>
463+L<"--alter-foreign-keys-method">; other statements for the C<drop_swap>
464+method).
465+
466+The tool retries each operation if these errors occur:
467+
468+ Lock wait timeout (innodb_lock_wait_timeout and lock_wait_timeout)
469+ Deadlock found
470+ Query is killed (KILL QUERY <thread_id>)
471+ Connection is killed (KILL CONNECTION <thread_id>)
472+ Lost connection to MySQL
473+
474+In the case of lost and killed connections, the tool will automatically
475+reconnect.
476+
477+Failures and retries are recorded in the L<"--statistics">.
478+
479 =item --user
480
481 short form: -u; type: string
482
483=== modified file 'lib/Retry.pm'
484--- lib/Retry.pm 2013-01-03 00:19:16 +0000
485+++ lib/Retry.pm 2013-03-05 23:44:21 +0000
486@@ -27,6 +27,8 @@
487 use English qw(-no_match_vars);
488 use constant PTDEBUG => $ENV{PTDEBUG} || 0;
489
490+use Time::HiRes qw(sleep);
491+
492 sub new {
493 my ( $class, %args ) = @_;
494 my $self = {
495
496=== modified file 't/pt-online-schema-change/metadata_locks.t'
497--- t/pt-online-schema-change/metadata_locks.t 2013-02-28 19:29:17 +0000
498+++ t/pt-online-schema-change/metadata_locks.t 2013-03-05 23:44:21 +0000
499@@ -51,7 +51,8 @@
500 ($output) = full_output(
501 sub { pt_online_schema_change::main(
502 "$master_dsn,D=pt_osc,t=t",
503- qw(--statistics --execute --retries 2 --set-vars lock_wait_timeout=1),
504+ qw(--statistics --execute --tries create_triggers:2:0.1),
505+ qw(--set-vars lock_wait_timeout=1),
506 '--plugin', "$plugin/block_create_triggers.pm",
507 )},
508 stderr => 1,
509@@ -76,7 +77,8 @@
510 ($output) = full_output(
511 sub { pt_online_schema_change::main(
512 "$master_dsn,D=pt_osc,t=t",
513- qw(--statistics --execute --retries 2 --set-vars lock_wait_timeout=1),
514+ qw(--statistics --execute --tries swap_tables:2:0.1),
515+ qw(--set-vars lock_wait_timeout=1),
516 '--plugin', "$plugin/block_swap_tables.pm",
517 )},
518 stderr => 1,
519
520=== modified file 't/pt-online-schema-change/samples/stats-dry-run.txt'
521--- t/pt-online-schema-change/samples/stats-dry-run.txt 2013-02-28 19:29:17 +0000
522+++ t/pt-online-schema-change/samples/stats-dry-run.txt 2013-03-05 23:44:21 +0000
523@@ -1,3 +1,9 @@
524+Operation, tries, wait:
525+ copy_rows, 10, 0.25
526+ create_triggers, 10, 1
527+ drop_triggers, 10, 1
528+ swap_tables, 10, 1
529+ update_foreign_keys, 10, 1
530 Starting a dry run. `bug_1045317`.`bits` will not be altered. Specify --execute instead of --dry-run to alter the table.
531 Not dropping triggers because this is a dry run.
532 Dropping new table...
533
534=== modified file 't/pt-online-schema-change/samples/stats-execute-5.5.txt'
535--- t/pt-online-schema-change/samples/stats-execute-5.5.txt 2013-02-28 19:29:17 +0000
536+++ t/pt-online-schema-change/samples/stats-execute-5.5.txt 2013-03-05 23:44:21 +0000
537@@ -1,3 +1,9 @@
538+Operation, tries, wait:
539+ copy_rows, 10, 0.25
540+ create_triggers, 10, 1
541+ drop_triggers, 10, 1
542+ swap_tables, 10, 1
543+ update_foreign_keys, 10, 1
544 Altering `bug_1045317`.`bits`...
545 Dropping triggers...
546 Dropped triggers OK.
547
548=== modified file 't/pt-online-schema-change/samples/stats-execute.txt'
549--- t/pt-online-schema-change/samples/stats-execute.txt 2013-03-01 02:00:07 +0000
550+++ t/pt-online-schema-change/samples/stats-execute.txt 2013-03-05 23:44:21 +0000
551@@ -1,3 +1,9 @@
552+Operation, tries, wait:
553+ copy_rows, 10, 0.25
554+ create_triggers, 10, 1
555+ drop_triggers, 10, 1
556+ swap_tables, 10, 1
557+ update_foreign_keys, 10, 1
558 Altering `bug_1045317`.`bits`...
559 Dropping triggers...
560 Dropped triggers OK.

Subscribers

People subscribed via source and target branches