Merge lp:~percona-toolkit-dev/percona-toolkit/explain-checksum-chunks into lp:percona-toolkit/2.1

Proposed by Daniel Nichter
Status: Merged
Merged at revision: 279
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/explain-checksum-chunks
Merge into: lp:percona-toolkit/2.1
Diff against target: 1357 lines (+767/-194)
8 files modified
bin/pt-online-schema-change (+304/-86)
bin/pt-table-checksum (+323/-85)
lib/NibbleIterator.pm (+24/-20)
lib/TableNibbler.pm (+6/-1)
t/lib/TableNibbler.t (+29/-1)
t/pt-table-checksum/chunk_index.t (+45/-1)
t/pt-table-checksum/samples/bad-plan-bug-1010232.sql (+17/-0)
t/pt-table-checksum/samples/n-chunk-index-cols.txt (+19/-0)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/explain-checksum-chunks
Reviewer Review Type Date Requested Status
Baron Schwartz (community) Approve
Brian Fraser (community) Approve
Daniel Nichter Approve
Review via email: mp+109530@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) :
review: Approve
286. By Daniel Nichter

Fix docs about skipping chunks with bad plans.

Revision history for this message
Brian Fraser (fraserbn) :
review: Approve
287. By Daniel Nichter

Clarify --chunk-index name:n validation.

Revision history for this message
Baron Schwartz (baron-xaprb) wrote :

OK.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bin/pt-online-schema-change'
2--- bin/pt-online-schema-change 2012-05-30 17:27:07 +0000
3+++ bin/pt-online-schema-change 2012-06-10 17:15:24 +0000
4@@ -1828,8 +1828,12 @@
5
6 my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
7 if ( $args{asc_first} ) {
8+ PTDEBUG && _d('Ascending only first column');
9 @asc_cols = $asc_cols[0];
10- PTDEBUG && _d('Ascending only first column');
11+ }
12+ elsif ( my $n = $args{n_index_cols} ) {
13+ PTDEBUG && _d('Ascending only first', $n, 'columns');
14+ @asc_cols = @asc_cols[0..($n-1)];
15 }
16 PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
17
18@@ -4029,10 +4033,11 @@
19
20 my $asc = $args{TableNibbler}->generate_asc_stmt(
21 %args,
22- tbl_struct => $tbl->{tbl_struct},
23- index => $index,
24- cols => \@cols,
25- asc_only => 1,
26+ tbl_struct => $tbl->{tbl_struct},
27+ index => $index,
28+ n_index_cols => $args{n_chunk_index_cols},
29+ cols => \@cols,
30+ asc_only => 1,
31 );
32 PTDEBUG && _d('Ascend params:', Dumper($asc));
33
34@@ -4114,16 +4119,17 @@
35
36 $self = {
37 %args,
38- index => $index,
39- limit => $limit,
40- first_lb_sql => $first_lb_sql,
41- last_ub_sql => $last_ub_sql,
42- ub_sql => $ub_sql,
43- nibble_sql => $nibble_sql,
44- explain_ub_sql => "EXPLAIN $ub_sql",
45- explain_nibble_sql => $explain_nibble_sql,
46- resume_lb_sql => $resume_lb_sql,
47- sql => {
48+ index => $index,
49+ limit => $limit,
50+ first_lb_sql => $first_lb_sql,
51+ last_ub_sql => $last_ub_sql,
52+ ub_sql => $ub_sql,
53+ nibble_sql => $nibble_sql,
54+ explain_first_lb_sql => "EXPLAIN $first_lb_sql",
55+ explain_ub_sql => "EXPLAIN $ub_sql",
56+ explain_nibble_sql => $explain_nibble_sql,
57+ resume_lb_sql => $resume_lb_sql,
58+ sql => {
59 columns => $asc->{scols},
60 from => $from,
61 where => $where,
62@@ -4231,10 +4237,11 @@
63 sub statements {
64 my ($self) = @_;
65 return {
66- nibble => $self->{nibble_sth},
67- explain_nibble => $self->{explain_nibble_sth},
68- upper_boundary => $self->{ub_sth},
69- explain_upper_boundary => $self->{explain_ub_sth},
70+ explain_first_lower_boundary => $self->{explain_first_lb_sth},
71+ nibble => $self->{nibble_sth},
72+ explain_nibble => $self->{explain_nibble_sth},
73+ upper_boundary => $self->{ub_sth},
74+ explain_upper_boundary => $self->{explain_ub_sth},
75 }
76 }
77
78@@ -4463,8 +4470,9 @@
79 $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
80
81 if ( !$self->{one_nibble} ) {
82- $self->{ub_sth} = $dbh->prepare($self->{ub_sql});
83- $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
84+ $self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql});
85+ $self->{ub_sth} = $dbh->prepare($self->{ub_sql});
86+ $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
87 }
88
89 return;
90@@ -4652,6 +4660,7 @@
91
92 use Time::Local qw(timegm timelocal);
93 use Digest::MD5 qw(md5_hex);
94+use B qw();
95
96 require Exporter;
97 our @ISA = qw(Exporter);
98@@ -4669,6 +4678,7 @@
99 any_unix_timestamp
100 make_checksum
101 crc32
102+ encode_json
103 );
104
105 our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;
106@@ -4876,6 +4886,96 @@
107 return $crc ^ 0xFFFFFFFF;
108 }
109
110+my $got_json = eval { require JSON };
111+sub encode_json {
112+ return JSON::encode_json(@_) if $got_json;
113+ my ( $data ) = @_;
114+ return (object_to_json($data) || '');
115+}
116+
117+
118+sub object_to_json {
119+ my ($obj) = @_;
120+ my $type = ref($obj);
121+
122+ if($type eq 'HASH'){
123+ return hash_to_json($obj);
124+ }
125+ elsif($type eq 'ARRAY'){
126+ return array_to_json($obj);
127+ }
128+ else {
129+ return value_to_json($obj);
130+ }
131+}
132+
133+sub hash_to_json {
134+ my ($obj) = @_;
135+ my @res;
136+ for my $k ( sort { $a cmp $b } keys %$obj ) {
137+ push @res, string_to_json( $k )
138+ . ":"
139+ . ( object_to_json( $obj->{$k} ) || value_to_json( $obj->{$k} ) );
140+ }
141+ return '{' . ( @res ? join( ",", @res ) : '' ) . '}';
142+}
143+
144+sub array_to_json {
145+ my ($obj) = @_;
146+ my @res;
147+
148+ for my $v (@$obj) {
149+ push @res, object_to_json($v) || value_to_json($v);
150+ }
151+
152+ return '[' . ( @res ? join( ",", @res ) : '' ) . ']';
153+}
154+
155+sub value_to_json {
156+ my ($value) = @_;
157+
158+ return 'null' if(!defined $value);
159+
160+ my $b_obj = B::svref_2object(\$value); # for round trip problem
161+ my $flags = $b_obj->FLAGS;
162+ return $value # as is
163+ if $flags & ( B::SVp_IOK | B::SVp_NOK ) and !( $flags & B::SVp_POK ); # SvTYPE is IV or NV?
164+
165+ my $type = ref($value);
166+
167+ if( !$type ) {
168+ return string_to_json($value);
169+ }
170+ else {
171+ return 'null';
172+ }
173+
174+}
175+
176+my %esc = (
177+ "\n" => '\n',
178+ "\r" => '\r',
179+ "\t" => '\t',
180+ "\f" => '\f',
181+ "\b" => '\b',
182+ "\"" => '\"',
183+ "\\" => '\\\\',
184+ "\'" => '\\\'',
185+);
186+
187+sub string_to_json {
188+ my ($arg) = @_;
189+
190+ $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g;
191+ $arg =~ s/\//\\\//g;
192+ $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg;
193+
194+ utf8::upgrade($arg);
195+ utf8::encode($arg);
196+
197+ return '"' . $arg . '"';
198+}
199+
200 sub _d {
201 my ($package, undef, $line) = caller 0;
202 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
203@@ -5027,6 +5127,22 @@
204 }
205 }
206
207+ # Parse --chunk-index INDEX:N where N is the number of
208+ # left-most columns of INDEX to use.
209+ # https://bugs.launchpad.net/percona-toolkit/+bug/1010232
210+ my ($chunk_index, $n_chunk_index_cols)
211+ = split(':', $o->get('chunk-index') || '');
212+ if ( defined $chunk_index && $chunk_index eq '' ) {
213+ $o->save_error('--chunk-index cannot be an empty string');
214+ }
215+ if ( defined $n_chunk_index_cols
216+ && (!$n_chunk_index_cols
217+ || $n_chunk_index_cols =~ m/\D/
218+ || $n_chunk_index_cols < 1) ) {
219+ $o->save_error('Invalid number of --chunk-index columns: '
220+ . $n_chunk_index_cols);
221+ }
222+
223 if ( !$o->get('help') ) {
224 if ( @ARGV ) {
225 $o->save_error('Specify only one DSN on the command line');
226@@ -5641,6 +5757,7 @@
227 my (%args) = @_;
228 my $tbl = $args{tbl};
229 my $nibble_iter = $args{NibbleIterator};
230+ my $statements = $nibble_iter->statements();
231
232 if ( $o->get('dry-run') ) {
233 print "Not copying rows because this is a dry run.\n";
234@@ -5652,7 +5769,6 @@
235
236 if ( $o->get('print') ) {
237 # Print the checksum and next boundary statements.
238- my $statements = $nibble_iter->statements();
239 foreach my $sth ( sort keys %$statements ) {
240 next if $sth =~ m/^explain/;
241 if ( $statements->{$sth} ) {
242@@ -5696,6 +5812,34 @@
243 die $msg;
244 }
245 }
246+ else { # chunking the table
247+ if ( $o->get('check-plan') ) {
248+ my $expl = explain_statement(
249+ sth => $statements->{explain_first_lower_boundary},
250+ tbl => $tbl,
251+ vals => [],
252+ );
253+ if ( !$expl->{key}
254+ || lc($expl->{key}) ne lc($nibble_iter->nibble_index()) )
255+ {
256+ die "Cannot determine the key_len of the chunk index "
257+ . "because MySQL chose "
258+ . ($expl->{key} ? "the $expl->{key}" : "no") . " index "
259+ . "instead of the " . $nibble_iter->nibble_index()
260+ . " index for the first lower boundary statement. "
261+ . "See --[no]check-plan in the documentation for more "
262+ . "information.";
263+ }
264+ elsif ( !$expl->{key_len} ) {
265+ die "The key_len of the $expl->{key} index is "
266+ . (defined $expl->{key_len} ? "zero" : "NULL")
267+ . ", but this should not be possible. "
268+ . "See --[no]check-plan in the documentation for more "
269+ . "information.";
270+ }
271+ $tbl->{key_len} = $expl->{key_len};
272+ }
273+ }
274
275 return 1; # continue nibbling table
276 },
277@@ -5755,58 +5899,11 @@
278 # Count every chunk, even if it's ultimately skipped, etc.
279 $tbl->{results}->{n_chunks}++;
280
281- # If the table is being chunk (i.e., it's not small enough to be
282- # consumed by one nibble), then check index usage and chunk size.
283- if ( !$nibble_iter->one_nibble() ) {
284- my $expl = explain_statement(
285- tbl => $tbl,
286- sth => $sth->{explain_nibble},
287- vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
288- );
289-
290- # Ensure that MySQL is using the chunk index.
291- if ( lc($expl->{key} || '')
292- ne lc($nibble_iter->nibble_index() || '') ) {
293- my $msg
294- = "Aborting copying table $tbl->{name} at chunk "
295- . $nibble_iter->nibble_number()
296- . " because it is not safe to chunk. Chunking should "
297- . "use the "
298- . ($nibble_iter->nibble_index() || '?')
299- . " index, but MySQL EXPLAIN reports that "
300- . ($expl->{key} ? "the $expl->{key}" : "no")
301- . " index will be used for "
302- . $sth->{explain_nibble}->{Statement}
303- . " with values "
304- . join(", ", map { defined $_ ? $_ : "NULL" }
305- (@{$boundary->{lower}}, @{$boundary->{upper}}))
306- . "\n";
307- die $msg;
308- }
309-
310- # Check chunk size limit if the upper boundary and next lower
311- # boundary are identical.
312- if ( $limit ) {
313- my $boundary = $nibble_iter->boundaries();
314- my $oversize_chunk
315- = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
316- : 0;
317- if ( $oversize_chunk
318- && $nibble_iter->identical_boundaries(
319- $boundary->{upper}, $boundary->{next_lower}) )
320- {
321- my $msg
322- = "Aborting copying table $tbl->{name} at chunk "
323- . $nibble_iter->nibble_number()
324- . " because the chunk is too large: MySQL estimates "
325- . ($expl->{rows} || 0) . "rows. The current chunk "
326- . "size limit is " . ($tbl->{chunk_size} * $limit)
327- . " rows (chunk size=$tbl->{chunk_size}"
328- . " * chunk size limit=$limit).\n";
329- die $msg;
330- }
331- }
332- }
333+ # Die unless the nibble is safe.
334+ nibble_is_safe(
335+ %args,
336+ OptionParser => $o,
337+ );
338
339 # Exec and time the chunk checksum query.
340 $tbl->{nibble_time} = exec_nibble(
341@@ -5909,18 +6006,19 @@
342 # This won't (shouldn't) fail because we already verified in
343 # check_orig_table() table we can NibbleIterator::can_nibble().
344 my $nibble_iter = new NibbleIterator(
345- Cxn => $cxn,
346- tbl => $orig_tbl,
347- chunk_size => $orig_tbl->{chunk_size},
348- chunk_index => $o->get('chunk-index'),
349- dml => $dml,
350- select => $select,
351- callbacks => $callbacks,
352- OptionParser => $o,
353- Quoter => $q,
354- TableParser => $tp,
355- TableNibbler => new TableNibbler(TableParser => $tp, Quoter => $q),
356- comments => {
357+ Cxn => $cxn,
358+ tbl => $orig_tbl,
359+ chunk_size => $orig_tbl->{chunk_size},
360+ chunk_index => $chunk_index,
361+ n_chunk_index_cols => $n_chunk_index_cols,
362+ dml => $dml,
363+ select => $select,
364+ callbacks => $callbacks,
365+ OptionParser => $o,
366+ Quoter => $q,
367+ TableParser => $tp,
368+ TableNibbler => new TableNibbler(TableParser => $tp, Quoter => $q),
369+ comments => {
370 bite => "pt-online-schema-change $PID copy table",
371 nibble => "pt-online-schema-change $PID copy nibble",
372 },
373@@ -6110,6 +6208,82 @@
374 # ############################################################################
375 # Subroutines.
376 # ############################################################################
377+
378+sub nibble_is_safe {
379+ my (%args) = @_;
380+ my @required_args = qw(Cxn tbl NibbleIterator OptionParser);
381+ foreach my $arg ( @required_args ) {
382+ die "I need a $arg argument" unless $args{$arg};
383+ }
384+ my ($cxn, $tbl, $nibble_iter, $o)= @args{@required_args};
385+
386+ # EXPLAIN the checksum chunk query to get its row estimate and index.
387+ # XXX This call and others like it are relying on a Perl oddity.
388+ # See https://bugs.launchpad.net/percona-toolkit/+bug/987393
389+ my $sth = $nibble_iter->statements();
390+ my $boundary = $nibble_iter->boundaries();
391+ my $expl = explain_statement(
392+ tbl => $tbl,
393+ sth => $sth->{explain_nibble},
394+ vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
395+ );
396+
397+ # Ensure that MySQL is using the chunk index if the table is being chunked.
398+ if ( !$nibble_iter->one_nibble()
399+ && lc($expl->{key} || '') ne lc($nibble_iter->nibble_index() || '') ) {
400+ if ( !$tbl->{warned}->{not_using_chunk_index}++
401+ && $o->get('quiet') < 2 ) {
402+ die "Error copying rows at chunk " . $nibble_iter->nibble_number()
403+ . " of $tbl->{db}.$tbl->{tbl} because MySQL chose "
404+ . ($expl->{key} ? "the $expl->{key}" : "no") . " index "
405+ . " instead of the " . $nibble_iter->nibble_index() . "index.\n";
406+ }
407+ }
408+
409+ # Ensure that the chunk isn't too large if there's a --chunk-size-limit.
410+ # If single-chunking the table, this has already been checked, so it
411+ # shouldn't have changed. If chunking the table with a non-unique key,
412+ # oversize chunks are possible.
413+ if ( my $limit = $o->get('chunk-size-limit') ) {
414+ my $oversize_chunk
415+ = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
416+ : 0;
417+ if ( $oversize_chunk
418+ && $nibble_iter->identical_boundaries($boundary->{upper},
419+ $boundary->{next_lower}) ) {
420+ if ( !$tbl->{warned}->{oversize_chunk}++
421+ && $o->get('quiet') < 2 ) {
422+ die "Error copying rows at chunk " . $nibble_iter->nibble_number()
423+ . " of $tbl->{db}.$tbl->{tbl} because it is oversized. "
424+ . "The current chunk size limit is "
425+ . ($tbl->{chunk_size} * $limit)
426+ . " rows (chunk size=$tbl->{chunk_size}"
427+ . " * chunk size limit=$limit), but MySQL estimates "
428+ . "that there are " . ($expl->{rows} || 0)
429+ . " rows in the chunk.\n";
430+ }
431+ }
432+ }
433+
434+ # Ensure that MySQL is still using the entire index.
435+ # https://bugs.launchpad.net/percona-toolkit/+bug/1010232
436+ if ( !$nibble_iter->one_nibble()
437+ && $tbl->{key_len}
438+ && ($expl->{key_len} || 0) < $tbl->{key_len} ) {
439+ if ( !$tbl->{warned}->{key_len}++
440+ && $o->get('quiet') < 2 ) {
441+ die "Error copying rows at chunk " . $nibble_iter->nibble_number()
442+ . " of $tbl->{db}.$tbl->{tbl} because MySQL used "
443+ . "only " . ($expl->{key_len} || 0) . " bytes "
444+ . "of the " . ($expl->{key} || '?') . " index instead of "
445+ . $tbl->{key_len} . ". See the --[no]check-plan documentation "
446+ . "for more information.\n";
447+ }
448+ }
449+
450+ return 1; # safe
451+}
452+
453 sub create_new_table{
454 my (%args) = @_;
455 my @required_args = qw(orig_tbl Cxn Quoter OptionParser TableParser);
456@@ -7109,6 +7283,39 @@
457
458 Sleep time between checks for L<"--max-lag">.
459
460+=item --[no]check-plan
461+
462+default: yes
463+
464+Check query execution plans for safety. By default, this option causes
465+the tool to run EXPLAIN before running queries that are meant to access
466+a small amount of data, but which could access many rows if MySQL chooses a bad
467+execution plan. These include the queries to determine chunk boundaries and the
468+chunk queries themselves. If it appears that MySQL will use a bad query
469+execution plan, the tool will skip the chunk of the table.
470+
471+The tool uses several heuristics to determine whether an execution plan is bad.
472+The first is whether EXPLAIN reports that MySQL intends to use the desired index
473+to access the rows. If MySQL chooses a different index, the tool considers the
474+query unsafe.
475+
476+The tool also checks how much of the index MySQL reports that it will use for
477+the query. The EXPLAIN output shows this in the key_len column. The tool
478+remembers the largest key_len seen, and skips chunks where MySQL reports that it
479+will use a smaller prefix of the index. This heuristic can be understood as
480+skipping chunks that have a worse execution plan than other chunks.
481+
482+The tool prints a warning the first time a chunk is skipped due to
483+a bad execution plan in each table. Subsequent chunks are skipped silently,
484+although you can see the count of skipped chunks in the SKIPPED column in
485+the tool's output.
486+
487+This option adds some setup work to each table and chunk. Although the work is
488+not intrusive for MySQL, it results in more round-trips to the server, which
489+consumes time. Making chunks too small will cause the overhead to become
490+relatively larger. It is therefore recommended that you not make chunks too
491+small, because the tool may take a very long time to complete if you do.
492+
493 =item --[no]check-replication-filters
494
495 default: yes
496@@ -7145,6 +7352,17 @@
497 a C<FORCE INDEX> clause. Be careful when using this option; a poor choice of
498 index could cause bad performance.
499
500+This option supports a special syntax to select a prefix of the index instead of
501+the entire index. The syntax is NAME:N, where NAME is the name of the index, and
502+N is the number of columns you wish to use. This works only for compound
503+indexes, and is useful in cases where a bug in the MySQL query optimizer
504+(planner) causes it to scan a large range of rows instead of using the index to
505+locate starting and ending points precisely. This problem sometimes occurs on
506+indexes with many columns, such as 4 or more. If this happens, the tool might
507+print a warning related to the L<"--[no]check-plan"> option. Instructing
508+the tool to use only the first N columns from the index is a workaround for
509+the bug in some cases.
510+
511 =item --chunk-size
512
513 type: size; default: 1000
514
515=== modified file 'bin/pt-table-checksum'
516--- bin/pt-table-checksum 2012-05-25 21:34:58 +0000
517+++ bin/pt-table-checksum 2012-06-10 17:15:24 +0000
518@@ -2208,8 +2208,12 @@
519
520 my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
521 if ( $args{asc_first} ) {
522+ PTDEBUG && _d('Ascending only first column');
523 @asc_cols = $asc_cols[0];
524- PTDEBUG && _d('Ascending only first column');
525+ }
526+ elsif ( my $n = $args{n_index_cols} ) {
527+ PTDEBUG && _d('Ascending only first', $n, 'columns');
528+ @asc_cols = @asc_cols[0..($n-1)];
529 }
530 PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
531
532@@ -3594,10 +3598,11 @@
533
534 my $asc = $args{TableNibbler}->generate_asc_stmt(
535 %args,
536- tbl_struct => $tbl->{tbl_struct},
537- index => $index,
538- cols => \@cols,
539- asc_only => 1,
540+ tbl_struct => $tbl->{tbl_struct},
541+ index => $index,
542+ n_index_cols => $args{n_chunk_index_cols},
543+ cols => \@cols,
544+ asc_only => 1,
545 );
546 PTDEBUG && _d('Ascend params:', Dumper($asc));
547
548@@ -3679,16 +3684,17 @@
549
550 $self = {
551 %args,
552- index => $index,
553- limit => $limit,
554- first_lb_sql => $first_lb_sql,
555- last_ub_sql => $last_ub_sql,
556- ub_sql => $ub_sql,
557- nibble_sql => $nibble_sql,
558- explain_ub_sql => "EXPLAIN $ub_sql",
559- explain_nibble_sql => $explain_nibble_sql,
560- resume_lb_sql => $resume_lb_sql,
561- sql => {
562+ index => $index,
563+ limit => $limit,
564+ first_lb_sql => $first_lb_sql,
565+ last_ub_sql => $last_ub_sql,
566+ ub_sql => $ub_sql,
567+ nibble_sql => $nibble_sql,
568+ explain_first_lb_sql => "EXPLAIN $first_lb_sql",
569+ explain_ub_sql => "EXPLAIN $ub_sql",
570+ explain_nibble_sql => $explain_nibble_sql,
571+ resume_lb_sql => $resume_lb_sql,
572+ sql => {
573 columns => $asc->{scols},
574 from => $from,
575 where => $where,
576@@ -3796,10 +3802,11 @@
577 sub statements {
578 my ($self) = @_;
579 return {
580- nibble => $self->{nibble_sth},
581- explain_nibble => $self->{explain_nibble_sth},
582- upper_boundary => $self->{ub_sth},
583- explain_upper_boundary => $self->{explain_ub_sth},
584+ explain_first_lower_boundary => $self->{explain_first_lb_sth},
585+ nibble => $self->{nibble_sth},
586+ explain_nibble => $self->{explain_nibble_sth},
587+ upper_boundary => $self->{ub_sth},
588+ explain_upper_boundary => $self->{explain_ub_sth},
589 }
590 }
591
592@@ -4028,8 +4035,9 @@
593 $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
594
595 if ( !$self->{one_nibble} ) {
596- $self->{ub_sth} = $dbh->prepare($self->{ub_sql});
597- $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
598+ $self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql});
599+ $self->{ub_sth} = $dbh->prepare($self->{ub_sql});
600+ $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
601 }
602
603 return;
604@@ -5113,6 +5121,7 @@
605
606 use Time::Local qw(timegm timelocal);
607 use Digest::MD5 qw(md5_hex);
608+use B qw();
609
610 require Exporter;
611 our @ISA = qw(Exporter);
612@@ -5130,6 +5139,7 @@
613 any_unix_timestamp
614 make_checksum
615 crc32
616+ encode_json
617 );
618
619 our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;
620@@ -5337,6 +5347,96 @@
621 return $crc ^ 0xFFFFFFFF;
622 }
623
624+my $got_json = eval { require JSON };
625+sub encode_json {
626+ return JSON::encode_json(@_) if $got_json;
627+ my ( $data ) = @_;
628+ return (object_to_json($data) || '');
629+}
630+
631+
632+sub object_to_json {
633+ my ($obj) = @_;
634+ my $type = ref($obj);
635+
636+ if($type eq 'HASH'){
637+ return hash_to_json($obj);
638+ }
639+ elsif($type eq 'ARRAY'){
640+ return array_to_json($obj);
641+ }
642+ else {
643+ return value_to_json($obj);
644+ }
645+}
646+
647+sub hash_to_json {
648+ my ($obj) = @_;
649+ my @res;
650+ for my $k ( sort { $a cmp $b } keys %$obj ) {
651+ push @res, string_to_json( $k )
652+ . ":"
653+ . ( object_to_json( $obj->{$k} ) || value_to_json( $obj->{$k} ) );
654+ }
655+ return '{' . ( @res ? join( ",", @res ) : '' ) . '}';
656+}
657+
658+sub array_to_json {
659+ my ($obj) = @_;
660+ my @res;
661+
662+ for my $v (@$obj) {
663+ push @res, object_to_json($v) || value_to_json($v);
664+ }
665+
666+ return '[' . ( @res ? join( ",", @res ) : '' ) . ']';
667+}
668+
669+sub value_to_json {
670+ my ($value) = @_;
671+
672+ return 'null' if(!defined $value);
673+
674+ my $b_obj = B::svref_2object(\$value); # for round trip problem
675+ my $flags = $b_obj->FLAGS;
676+ return $value # as is
677+ if $flags & ( B::SVp_IOK | B::SVp_NOK ) and !( $flags & B::SVp_POK ); # SvTYPE is IV or NV?
678+
679+ my $type = ref($value);
680+
681+ if( !$type ) {
682+ return string_to_json($value);
683+ }
684+ else {
685+ return 'null';
686+ }
687+
688+}
689+
690+my %esc = (
691+ "\n" => '\n',
692+ "\r" => '\r',
693+ "\t" => '\t',
694+ "\f" => '\f',
695+ "\b" => '\b',
696+ "\"" => '\"',
697+ "\\" => '\\\\',
698+ "\'" => '\\\'',
699+);
700+
701+sub string_to_json {
702+ my ($arg) = @_;
703+
704+ $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g;
705+ $arg =~ s/\//\\\//g;
706+ $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg;
707+
708+ utf8::upgrade($arg);
709+ utf8::encode($arg);
710+
711+ return '"' . $arg . '"';
712+}
713+
714 sub _d {
715 my ($package, undef, $line) = caller 0;
716 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
717@@ -5988,6 +6088,22 @@
718 }
719 }
720
721+ # Parse --chunk-index INDEX:N where N is the number of
722+ # left-most columns of INDEX to use.
723+ # https://bugs.launchpad.net/percona-toolkit/+bug/1010232
724+ my ($chunk_index, $n_chunk_index_cols)
725+ = split(':', $o->get('chunk-index') || '');
726+ if ( defined $chunk_index && $chunk_index eq '' ) {
727+ $o->save_error('--chunk-index cannot be an empty string');
728+ }
729+ if ( defined $n_chunk_index_cols
730+ && (!$n_chunk_index_cols
731+ || $n_chunk_index_cols =~ m/\D/
732+ || $n_chunk_index_cols < 1) ) {
733+ $o->save_error('Invalid number of --chunk-index columns: '
734+ . $n_chunk_index_cols);
735+ }
736+
737 if ( !$o->get('help') ) {
738 if ( @ARGV > 1 ) {
739 $o->save_error("More than one host specified; only one allowed");
740@@ -6465,6 +6581,7 @@
741 my (%args) = @_;
742 my $tbl = $args{tbl};
743 my $nibble_iter = $args{NibbleIterator};
744+ my $statements = $nibble_iter->statements();
745 my $oktonibble = 1;
746
747 if ( $last_chunk ) { # resuming
748@@ -6493,7 +6610,7 @@
749 print "--\n",
750 "-- $tbl->{db}.$tbl->{tbl}\n",
751 "--\n\n";
752- my $statements = $nibble_iter->statements();
753+
754 foreach my $sth ( sort keys %$statements ) {
755 next if $sth =~ m/^explain/;
756 if ( $statements->{$sth} ) {
757@@ -6551,6 +6668,34 @@
758 $oktonibble = 0;
759 }
760 }
761+ else { # chunking the table
762+ if ( $o->get('check-plan') ) {
763+ my $expl = explain_statement(
764+ sth => $statements->{explain_first_lower_boundary},
765+ tbl => $tbl,
766+ vals => [],
767+ );
768+ if ( !$expl->{key}
769+ || lc($expl->{key}) ne lc($nibble_iter->nibble_index()) )
770+ {
771+ die "Cannot determine the key_len of the chunk index "
772+ . "because MySQL chose "
773+ . ($expl->{key} ? "the $expl->{key}" : "no") . " index "
774+ . "instead of the " . $nibble_iter->nibble_index()
775+ . " index for the first lower boundary statement. "
776+ . "See --[no]check-plan in the documentation for more "
777+ . "information.";
778+ }
779+ elsif ( !$expl->{key_len} ) {
780+ die "The key_len of the $expl->{key} index is "
781+ . (defined $expl->{key_len} ? "zero" : "NULL")
782+ . ", but this should not be possible. "
783+ . "See --[no]check-plan in the documentation for more "
784+ . "information.";
785+ }
786+ $tbl->{key_len} = $expl->{key_len};
787+ }
788+ }
789
790 if ( $oktonibble && $o->get('empty-replicate-table') ) {
791 use_repl_db(
792@@ -6604,16 +6749,14 @@
793 ne lc($nibble_iter->nibble_index() || '') ) {
794 PTDEBUG && _d('Cannot nibble next chunk, aborting table');
795 if ( $o->get('quiet') < 2 ) {
796- my $msg
797- = "Aborting table $tbl->{db}.$tbl->{tbl} at chunk "
798+ warn ts("Aborting table $tbl->{db}.$tbl->{tbl} at chunk "
799 . ($nibble_iter->nibble_number() + 1)
800 . " because it is not safe to chunk. Chunking should "
801 . "use the "
802 . ($nibble_iter->nibble_index() || '?')
803- . " index, but MySQL EXPLAIN reports that "
804+ . " index, but MySQL chose "
805 . ($expl->{key} ? "the $expl->{key}" : "no")
806- . " index will be used.\n";
807- warn ts($msg);
808+ . " index.\n");
809 }
810 $tbl->{checksum_results}->{errors}++;
811 return 0; # stop nibbling table
812@@ -6658,43 +6801,13 @@
813 return 0; # next boundary
814 }
815
816- # If the table is being chunk (i.e., it's not small enough to be
817- # consumed by one nibble), then check index usage and chunk size.
818- # XXX This call and others like it are relying on a Perl oddity.
819- # See https://bugs.launchpad.net/percona-toolkit/+bug/987393
820- if ( !$nibble_iter->one_nibble() ) {
821- my $expl = explain_statement(
822- tbl => $tbl,
823- sth => $sth->{explain_nibble},
824- vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
825- );
826- my $oversize_chunk
827- = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
828- : 0;
829-
830- # Ensure that MySQL is using the chunk index.
831- if ( lc($expl->{key} || '')
832- ne lc($nibble_iter->nibble_index() || '') ) {
833- PTDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
834- "$tbl->{db}.$tbl->{tbl} not using chunk index, skipping");
835- return 0; # next boundary
836- }
837-
838- # Check chunk size limit if the upper boundary and next lower
839- # boundary are identical.
840- if ( $limit ) {
841- my $boundary = $nibble_iter->boundaries();
842- if ( $nibble_iter->identical_boundaries(
843- $boundary->{upper}, $boundary->{next_lower})
844- && $oversize_chunk ) {
845- PTDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
846- "$tbl->{db}.$tbl->{tbl} is too large, skipping");
847- return 0; # next boundary
848- }
849- }
850- }
851-
852- # Exec and time the chunk checksum query.
853+ # Skip this nibble unless it's safe.
854+ return 0 unless nibble_is_safe(
855+ %args,
856+ OptionParser => $o,
857+ );
858+
859+ # Exec and time the nibble.
860 $tbl->{nibble_time} = exec_nibble(
861 %args,
862 Retry => $retry,
863@@ -6776,7 +6889,7 @@
864 $tbl->{chunk_size} = 1;
865
866 # This warning is printed once per table.
867- if ( !$tbl->{warned_slow} && $o->get('quiet') < 2 ) {
868+ if ( !$tbl->{warned}->{slow}++ && $o->get('quiet') < 2 ) {
869 warn ts("Checksum queries for table "
870 . "$tbl->{db}.$tbl->{tbl} are executing very slowly. "
871 . "--chunk-size has been automatically reduced to 1. "
872@@ -6786,7 +6899,6 @@
873 . "selected $cnt rows and took "
874 . sprintf('%.3f', $tbl->{nibble_time})
875 . " seconds to execute.\n");
876- $tbl->{warned_slow} = 1;
877 }
878 }
879
880@@ -6918,22 +7030,23 @@
881 my $nibble_iter;
882 eval {
883 $nibble_iter = new OobNibbleIterator(
884- Cxn => $master_cxn,
885- tbl => $tbl,
886- chunk_size => $tbl->{chunk_size},
887- chunk_index => $o->get('chunk-index'),
888- dml => $checksum_dml,
889- select => $checksum_cols,
890- past_dml => $checksum_dml,
891- past_select => $past_cols,
892- callbacks => $callbacks,
893- resume => $last_chunk,
894- OptionParser => $o,
895- Quoter => $q,
896- TableNibbler => $tn,
897- TableParser => $tp,
898- RowChecksum => $rc,
899- comments => {
900+ Cxn => $master_cxn,
901+ tbl => $tbl,
902+ chunk_size => $tbl->{chunk_size},
903+ chunk_index => $chunk_index,
904+ n_chunk_index_cols => $n_chunk_index_cols,
905+ dml => $checksum_dml,
906+ select => $checksum_cols,
907+ past_dml => $checksum_dml,
908+ past_select => $past_cols,
909+ callbacks => $callbacks,
910+ resume => $last_chunk,
911+ OptionParser => $o,
912+ Quoter => $q,
913+ TableNibbler => $tn,
914+ TableParser => $tp,
915+ RowChecksum => $rc,
916+ comments => {
917 bite => "checksum table",
918 nibble => "checksum chunk",
919 },
920@@ -7008,6 +7121,84 @@
921 return $msg ? "$ts $msg" : $ts;
922 }
923
924+sub nibble_is_safe {
925+ my (%args) = @_;
926+ my @required_args = qw(Cxn tbl NibbleIterator OptionParser);
927+ foreach my $arg ( @required_args ) {
928+ die "I need a $arg argument" unless $args{$arg};
929+ }
930+ my ($cxn, $tbl, $nibble_iter, $o)= @args{@required_args};
931+
932+ # EXPLAIN the checksum chunk query to get its row estimate and index.
933+ # XXX This call and others like it are relying on a Perl oddity.
934+ # See https://bugs.launchpad.net/percona-toolkit/+bug/987393
935+ my $sth = $nibble_iter->statements();
936+ my $boundary = $nibble_iter->boundaries();
937+ my $expl = explain_statement(
938+ tbl => $tbl,
939+ sth => $sth->{explain_nibble},
940+ vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
941+ );
942+
943+ # Ensure that MySQL is using the chunk index if the table is being chunked.
944+ if ( !$nibble_iter->one_nibble()
945+ && lc($expl->{key} || '') ne lc($nibble_iter->nibble_index() || '') ) {
946+ if ( !$tbl->{warned}->{not_using_chunk_index}++
947+ && $o->get('quiet') < 2 ) {
948+ warn ts("Skipping chunk " . $nibble_iter->nibble_number()
949+ . " of $tbl->{db}.$tbl->{tbl} because MySQL chose "
950+ . ($expl->{key} ? "the $expl->{key}" : "no") . " index "
951+ . " instead of the " . $nibble_iter->nibble_index() . "index.\n");
952+ }
953+ return 0; # not safe
954+ }
955+
956+ # Ensure that the chunk isn't too large if there's a --chunk-size-limit.
957+ # If single-chunking the table, this has already been checked, so it
958+ # shouldn't have changed. If chunking the table with a non-unique key,
959+ # oversize chunks are possible.
960+ if ( my $limit = $o->get('chunk-size-limit') ) {
961+ my $oversize_chunk
962+ = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
963+ : 0;
964+ if ( $oversize_chunk
965+ && $nibble_iter->identical_boundaries($boundary->{upper},
966+ $boundary->{next_lower}) ) {
967+ if ( !$tbl->{warned}->{oversize_chunk}++
968+ && $o->get('quiet') < 2 ) {
969+ warn ts("Skipping chunk " . $nibble_iter->nibble_number()
970+ . " of $tbl->{db}.$tbl->{tbl} because it is oversized. "
971+ . "The current chunk size limit is "
972+ . ($tbl->{chunk_size} * $limit)
973+ . " rows (chunk size=$tbl->{chunk_size}"
974+ . " * chunk size limit=$limit), but MySQL estimates "
975+ . "that there are " . ($expl->{rows} || 0)
976+ . " rows in the chunk.\n");
977+ }
978+ return 0; # not safe
979+ }
980+ }
981+
982+ # Ensure that MySQL is still using the entire index.
983+ # https://bugs.launchpad.net/percona-toolkit/+bug/1010232
984+ if ( !$nibble_iter->one_nibble()
985+ && $tbl->{key_len}
986+ && ($expl->{key_len} || 0) < $tbl->{key_len} ) {
987+ if ( !$tbl->{warned}->{key_len}++
988+ && $o->get('quiet') < 2 ) {
989+ warn ts("Skipping chunk " . $nibble_iter->nibble_number()
990+ . " of $tbl->{db}.$tbl->{tbl} because MySQL used "
991+ . "only " . ($expl->{key_len} || 0) . " bytes "
992+ . "of the " . ($expl->{key} || '?') . " index instead of "
993+ . $tbl->{key_len} . ". See the --[no]check-plan documentation "
994+ . "for more information.\n");
995+ }
996+ return 0; # not safe
997+ }
998+
999+ return 1; # safe
1000+}
1001+
1002 sub exec_nibble {
1003 my (%args) = @_;
1004 my @required_args = qw(Cxn tbl NibbleIterator Retry Quoter OptionParser);
1005@@ -7074,7 +7265,7 @@
1006 && (!$warn_code{$code}->{pattern}
1007 || $message =~ m/$warn_code{$code}->{pattern}/) )
1008 {
1009- if ( !$tbl->{"warned_code_$code"} ) { # warn once per table
1010+ if ( !$tbl->{warned}->{$code}++ ) { # warn once per table
1011 if ( $o->get('quiet') < 2 ) {
1012 warn ts("Checksum query for table $tbl->{db}.$tbl->{tbl} "
1013 . "caused MySQL error $code: "
1014@@ -7083,7 +7274,6 @@
1015 : $message)
1016 . "\n");
1017 }
1018- $tbl->{"warned_code_$code"} = 1;
1019 $tbl->{checksum_results}->{errors}++;
1020 }
1021 }
1022@@ -7910,6 +8100,39 @@
1023
1024 Sleep time between checks for L<"--max-lag">.
1025
1026+=item --[no]check-plan
1027+
1028+default: yes
1029+
1030+Check query execution plans for safety. By default, this option causes
1031+pt-table-checksum to run EXPLAIN before running queries that are meant to access
1032+a small amount of data, but which could access many rows if MySQL chooses a bad
1033+execution plan. These include the queries to determine chunk boundaries and the
1034+chunk queries themselves. If it appears that MySQL will use a bad query
1035+execution plan, the tool will skip the chunk of the table.
1036+
1037+The tool uses several heuristics to determine whether an execution plan is bad.
1038+The first is whether EXPLAIN reports that MySQL intends to use the desired index
1039+to access the rows. If MySQL chooses a different index, the tool considers the
1040+query unsafe.
1041+
1042+The tool also checks how much of the index MySQL reports that it will use for
1043+the query. The EXPLAIN output shows this in the key_len column. The tool
1044+remembers the largest key_len seen, and skips chunks where MySQL reports that it
1045+will use a smaller prefix of the index. This heuristic can be understood as
1046+skipping chunks that have a worse execution plan than other chunks.
1047+
1048+The tool prints a warning the first time a chunk is skipped due to
1049+a bad execution plan in each table. Subsequent chunks are skipped silently,
1050+although you can see the count of skipped chunks in the SKIPPED column in
1051+the tool's output.
1052+
1053+This option adds some setup work to each table and chunk. Although the work is
1054+not intrusive for MySQL, it results in more round-trips to the server, which
1055+consumes time. Making chunks too small will cause the overhead to become
1056+relatively larger. It is therefore recommended that you not make chunks too
1057+small, because the tool may take a very long time to complete if you do.
1058+
1059 =item --[no]check-replication-filters
1060
1061 default: yes; group: Safety
1062@@ -7959,12 +8182,24 @@
1063 This is probably best to use when you are checksumming only a single table, not
1064 an entire server.
1065
1066+This option supports a special syntax to select a prefix of the index instead of
1067+the entire index. The syntax is NAME:N, where NAME is the name of the index, and
1068+N is the number of columns you wish to use. This works only for compound
1069+indexes, and is useful in cases where a bug in the MySQL query optimizer
1070+(planner) causes it to scan a large range of rows instead of using the index to
1071+locate starting and ending points precisely. This problem sometimes occurs on
1072+indexes with many columns, such as 4 or more. If this happens, the tool might
1073+print a warning related to the L<"--[no]check-plan"> option. Instructing
1074+the tool to use only the first N columns from the index is a workaround for
1075+the bug in some cases.
1076+
1077 =item --chunk-size
1078
1079 type: size; default: 1000
1080
1081 Number of rows to select for each checksum query. Allowable suffixes are
1082-k, M, G.
1083+k, M, G. You should not use this option in most cases; prefer L<"--chunk-time">
1084+instead.
1085
1086 This option can override the default behavior, which is to adjust chunk size
1087 dynamically to try to make chunks run in exactly L<"--chunk-time"> seconds.
1088@@ -7980,6 +8215,9 @@
1089 10,000 rows large. Such a chunk will probably be skipped because of
1090 L<"--chunk-size-limit">.
1091
1092+Selecting a small chunk size will cause the tool to become much slower, in part
1093+because of the setup work required for L<"--[no]check-plan">.
1094+
1095 =item --chunk-size-limit
1096
1097 type: float; default: 2.0; group: Safety
1098
1099=== modified file 'lib/NibbleIterator.pm'
1100--- lib/NibbleIterator.pm 2012-05-10 16:21:44 +0000
1101+++ lib/NibbleIterator.pm 2012-06-10 17:15:24 +0000
1102@@ -124,10 +124,11 @@
1103 # Figure out how to nibble the table with the index.
1104 my $asc = $args{TableNibbler}->generate_asc_stmt(
1105 %args,
1106- tbl_struct => $tbl->{tbl_struct},
1107- index => $index,
1108- cols => \@cols,
1109- asc_only => 1,
1110+ tbl_struct => $tbl->{tbl_struct},
1111+ index => $index,
1112+ n_index_cols => $args{n_chunk_index_cols},
1113+ cols => \@cols,
1114+ asc_only => 1,
1115 );
1116 PTDEBUG && _d('Ascend params:', Dumper($asc));
1117
1118@@ -229,16 +230,17 @@
1119
1120 $self = {
1121 %args,
1122- index => $index,
1123- limit => $limit,
1124- first_lb_sql => $first_lb_sql,
1125- last_ub_sql => $last_ub_sql,
1126- ub_sql => $ub_sql,
1127- nibble_sql => $nibble_sql,
1128- explain_ub_sql => "EXPLAIN $ub_sql",
1129- explain_nibble_sql => $explain_nibble_sql,
1130- resume_lb_sql => $resume_lb_sql,
1131- sql => {
1132+ index => $index,
1133+ limit => $limit,
1134+ first_lb_sql => $first_lb_sql,
1135+ last_ub_sql => $last_ub_sql,
1136+ ub_sql => $ub_sql,
1137+ nibble_sql => $nibble_sql,
1138+ explain_first_lb_sql => "EXPLAIN $first_lb_sql",
1139+ explain_ub_sql => "EXPLAIN $ub_sql",
1140+ explain_nibble_sql => $explain_nibble_sql,
1141+ resume_lb_sql => $resume_lb_sql,
1142+ sql => {
1143 columns => $asc->{scols},
1144 from => $from,
1145 where => $where,
1146@@ -357,10 +359,11 @@
1147 sub statements {
1148 my ($self) = @_;
1149 return {
1150- nibble => $self->{nibble_sth},
1151- explain_nibble => $self->{explain_nibble_sth},
1152- upper_boundary => $self->{ub_sth},
1153- explain_upper_boundary => $self->{explain_ub_sth},
1154+ explain_first_lower_boundary => $self->{explain_first_lb_sth},
1155+ nibble => $self->{nibble_sth},
1156+ explain_nibble => $self->{explain_nibble_sth},
1157+ upper_boundary => $self->{ub_sth},
1158+ explain_upper_boundary => $self->{explain_ub_sth},
1159 }
1160 }
1161
1162@@ -613,8 +616,9 @@
1163 $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
1164
1165 if ( !$self->{one_nibble} ) {
1166- $self->{ub_sth} = $dbh->prepare($self->{ub_sql});
1167- $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
1168+ $self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql});
1169+ $self->{ub_sth} = $dbh->prepare($self->{ub_sql});
1170+ $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
1171 }
1172
1173 return;
1174
1175=== modified file 'lib/TableNibbler.pm'
1176--- lib/TableNibbler.pm 2012-01-19 19:46:56 +0000
1177+++ lib/TableNibbler.pm 2012-06-10 17:15:24 +0000
1178@@ -41,6 +41,7 @@
1179 # * tbl_struct Hashref returned from TableParser::parse().
1180 # * cols Arrayref of columns to SELECT from the table
1181 # * index Which index to ascend; optional.
1182+# * n_index_cols The number of left-most index columns to use.
1183 # * asc_only Whether to ascend strictly, that is, the WHERE clause for
1184 # the asc_stmt will fetch the next row > the given arguments.
1185 # The option is to fetch the row >=, which could loop
1186@@ -77,8 +78,12 @@
1187 # These are the columns we'll ascend.
1188 my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
1189 if ( $args{asc_first} ) {
1190+ PTDEBUG && _d('Ascending only first column');
1191 @asc_cols = $asc_cols[0];
1192- PTDEBUG && _d('Ascending only first column');
1193+ }
1194+ elsif ( my $n = $args{n_index_cols} ) {
1195+ PTDEBUG && _d('Ascending only first', $n, 'columns');
1196+ @asc_cols = @asc_cols[0..($n-1)];
1197 }
1198 PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
1199
1200
1201=== modified file 't/lib/TableNibbler.t'
1202--- t/lib/TableNibbler.t 2012-03-06 13:56:08 +0000
1203+++ t/lib/TableNibbler.t 2012-06-10 17:15:24 +0000
1204@@ -9,7 +9,7 @@
1205 use strict;
1206 use warnings FATAL => 'all';
1207 use English qw(-no_match_vars);
1208-use Test::More tests => 24;
1209+use Test::More tests => 25;
1210
1211 use TableParser;
1212 use TableNibbler;
1213@@ -299,6 +299,34 @@
1214
1215 is_deeply(
1216 $n->generate_asc_stmt(
1217+ tbl_struct => $t,
1218+ cols => $t->{cols},
1219+ index => 'rental_date',
1220+ n_index_cols => 2,
1221+ ),
1222+ {
1223+ cols => [qw(rental_id rental_date inventory_id customer_id
1224+ return_date staff_id last_update)],
1225+ index => 'rental_date',
1226+ where => '((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?))',
1227+ slice => [qw(1 1 2)],
1228+ scols => [qw(rental_date rental_date inventory_id)],
1229+ boundaries => {
1230+ '<' =>
1231+ '((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` < ?))',
1232+ '<=' =>
1233+ '((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` <= ?))',
1234+ '>' =>
1235+ '((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` > ?))',
1236+ '>=' =>
1237+ '((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?))'
1238+ },
1239+ },
1240+ 'Use only N left-most columns of the index',
1241+);
1242+
1243+is_deeply(
1244+ $n->generate_asc_stmt(
1245 tbl_struct => $t,
1246 cols => $t->{cols},
1247 index => 'rental_date',
1248
1249=== modified file 't/pt-table-checksum/chunk_index.t'
1250--- t/pt-table-checksum/chunk_index.t 2012-05-04 23:46:34 +0000
1251+++ t/pt-table-checksum/chunk_index.t 2012-06-10 17:15:24 +0000
1252@@ -25,7 +25,7 @@
1253 plan skip_all => 'Cannot connect to sandbox master';
1254 }
1255 else {
1256- plan tests => 11;
1257+ plan tests => 14;
1258 }
1259
1260 # The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
1261@@ -158,6 +158,50 @@
1262 );
1263
1264 # #############################################################################
1265+# PK but bad explain plan.
1266+# https://bugs.launchpad.net/percona-toolkit/+bug/1010232
1267+# #############################################################################
1268+$sb->load_file('master', "t/pt-table-checksum/samples/bad-plan-bug-1010232.sql");
1269+PerconaTest::wait_for_table($dbh, "bad_plan.t", "(c1,c2,c3,c4)=(1,1,2,100)");
1270+
1271+$output = output(sub {
1272+ $exit_status = pt_table_checksum::main(
1273+ $master_dsn, '--max-load', '',
1274+ qw(--lock-wait-timeout 3 --chunk-size 10 -t bad_plan.t)
1275+ ) },
1276+ stderr => 1,
1277+);
1278+
1279+is(
1280+ $exit_status,
1281+ 0,
1282+ "Bad key_len chunks are not errors"
1283+);
1284+
1285+cmp_ok(
1286+ PerconaTest::count_checksum_results($output, 'skipped'),
1287+ '>',
1288+ 1,
1289+ "Skipped bad key_len chunks"
1290+);
1291+
1292+# Use --chunk-index:3 to use only the first 3 left-most columns of the index.
1293+# Can't use bad_plan.t, however, because its row are almost all identical,
1294+# so using 3 of 4 pk cols creates an infinite loop.
1295+ok(
1296+ no_diff(
1297+ sub {
1298+ pt_table_checksum::main(
1299+ $master_dsn, '--max-load', '',
1300+ qw(--lock-wait-timeout 3 --chunk-size 5000 -t sakila.rental),
1301+ qw(--chunk-index rental_date:2 --explain --explain));
1302+ },
1303+ "t/pt-table-checksum/samples/n-chunk-index-cols.txt",
1304+ ),
1305+ "--chunk-index index:n"
1306+);
1307+
1308+# #############################################################################
1309 # Done.
1310 # #############################################################################
1311 $sb->wipe_clean($dbh);
1312
1313=== added file 't/pt-table-checksum/samples/bad-plan-bug-1010232.sql'
1314--- t/pt-table-checksum/samples/bad-plan-bug-1010232.sql 1970-01-01 00:00:00 +0000
1315+++ t/pt-table-checksum/samples/bad-plan-bug-1010232.sql 2012-06-10 17:15:24 +0000
1316@@ -0,0 +1,17 @@
1317+DROP DATABASE IF EXISTS bad_plan;
1318+CREATE DATABASE bad_plan;
1319+USE bad_plan;
1320+
1321+CREATE TABLE t (
1322+ `c1` smallint(5) unsigned NOT NULL,
1323+ `c2` mediumint(8) unsigned NOT NULL DEFAULT '0',
1324+ `c3` smallint(5) unsigned NOT NULL DEFAULT '0',
1325+ `c4` smallint(5) unsigned NOT NULL DEFAULT '0',
1326+ PRIMARY KEY (`c1`,`c2`,`c3`,`c4`)
1327+) ENGINE=InnoDB;
1328+
1329+INSERT INTO t VALUES
1330+(1,1,1,1),(1,1,1,2),(1,1,1,3),(1,1,1,4),(1,1,1,5),(1,1,1,6),(1,1,1,7),(1,1,1,8),(1,1,1,9),(1,1,1,10),(1,1,1,11),(1,1,1,12),(1,1,1,13),(1,1,1,14),(1,1,1,15),(1,1,1,16),(1,1,1,17),(1,1,1,18),(1,1,1,19),(1,1,1,20),(1,1,1,21),(1,1,1,22),(1,1,1,23),(1,1,1,24),(1,1,1,25),(1,1,1,26),(1,1,1,27),(1,1,1,28),(1,1,1,29),(1,1,1,30),(1,1,1,31),(1,1,1,32),(1,1,1,33),(1,1,1,34),(1,1,1,35),(1,1,1,36),(1,1,1,37),(1,1,1,38),(1,1,1,39),(1,1,1,40),(1,1,1,41),(1,1,1,42),(1,1,1,43),(1,1,1,44),(1,1,1,45),(1,1,1,46),(1,1,1,47),(1,1,1,48),(1,1,1,49),(1,1,1,50),(1,1,1,51),(1,1,1,52),(1,1,1,53),(1,1,1,54),(1,1,1,55),(1,1,1,56),(1,1,1,57),(1,1,1,58),(1,1,1,59),(1,1,1,60),(1,1,1,61),(1,1,1,62),(1,1,1,63),(1,1,1,64),(1,1,1,65),(1,1,1,66),(1,1,1,67),(1,1,1,68),(1,1,1,69),(1,1,1,70),(1,1,1,71),(1,1,1,72),(1,1,1,73),(1,1,1,74),(1,1,1,75),(1,1,1,76),(1,1,1,77),(1,1,1,78),(1,1,1,79),(1,1,1,80),(1,1,1,81),(1,1,1,82),(1,1,1,83),(1,1,1,84),(1,1,1,85),(1,1,1,86),(1,1,1,87),(1,1,1,88),(1,1,1,89),(1,1,1,90),(1,1,1,91),(1,1,1,92),(1,1,1,93),(1,1,1,94),(1,1,1,95),(1,1,1,96),(1,1,1,97),(1,1,1,98),(1,1,1,99),(1,1,1,100),
1331+(1,1,2,1),(1,1,2,2),(1,1,2,3),(1,1,2,4),(1,1,2,5),(1,1,2,6),(1,1,2,7),(1,1,2,8),(1,1,2,9),(1,1,2,10),(1,1,2,11),(1,1,2,12),(1,1,2,13),(1,1,2,14),(1,1,2,15),(1,1,2,16),(1,1,2,17),(1,1,2,18),(1,1,2,19),(1,1,2,20),(1,1,2,21),(1,1,2,22),(1,1,2,23),(1,1,2,24),(1,1,2,25),(1,1,2,26),(1,1,2,27),(1,1,2,28),(1,1,2,29),(1,1,2,30),(1,1,2,31),(1,1,2,32),(1,1,2,33),(1,1,2,34),(1,1,2,35),(1,1,2,36),(1,1,2,37),(1,1,2,38),(1,1,2,39),(1,1,2,40),(1,1,2,41),(1,1,2,42),(1,1,2,43),(1,1,2,44),(1,1,2,45),(1,1,2,46),(1,1,2,47),(1,1,2,48),(1,1,2,49),(1,1,2,50),(1,1,2,51),(1,1,2,52),(1,1,2,53),(1,1,2,54),(1,1,2,55),(1,1,2,56),(1,1,2,57),(1,1,2,58),(1,1,2,59),(1,1,2,60),(1,1,2,61),(1,1,2,62),(1,1,2,63),(1,1,2,64),(1,1,2,65),(1,1,2,66),(1,1,2,67),(1,1,2,68),(1,1,2,69),(1,1,2,70),(1,1,2,71),(1,1,2,72),(1,1,2,73),(1,1,2,74),(1,1,2,75),(1,1,2,76),(1,1,2,77),(1,1,2,78),(1,1,2,79),(1,1,2,80),(1,1,2,81),(1,1,2,82),(1,1,2,83),(1,1,2,84),(1,1,2,85),(1,1,2,86),(1,1,2,87),(1,1,2,88),(1,1,2,89),(1,1,2,90),(1,1,2,91),(1,1,2,92),(1,1,2,93),(1,1,2,94),(1,1,2,95),(1,1,2,96),(1,1,2,97),(1,1,2,98),(1,1,2,99),(1,1,2,100);
1332+
1333+ANALYZE TABLE bad_plan.t;
1334
1335=== added file 't/pt-table-checksum/samples/n-chunk-index-cols.txt'
1336--- t/pt-table-checksum/samples/n-chunk-index-cols.txt 1970-01-01 00:00:00 +0000
1337+++ t/pt-table-checksum/samples/n-chunk-index-cols.txt 2012-06-10 17:15:24 +0000
1338@@ -0,0 +1,19 @@
1339+--
1340+-- sakila.rental
1341+--
1342+
1343+REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `rental_id`, `rental_date`, `inventory_id`, `customer_id`, `return_date`, `staff_id`, `last_update` + 0, CONCAT(ISNULL(`return_date`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?)) AND ((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` <= ?)) /*checksum chunk*/
1344+
1345+REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` < ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` /*past lower chunk*/
1346+
1347+REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` > ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` /*past upper chunk*/
1348+
1349+SELECT /*!40001 SQL_NO_CACHE */ `rental_date`, `rental_date`, `inventory_id` FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` LIMIT ?, 2 /*next chunk boundary*/
1350+
1351+1 2005-05-24 22:53:30,2005-05-24 22:53:30,367 2005-07-09 01:17:08,2005-07-09 01:17:08,2388
1352+2 2005-07-09 01:19:03,2005-07-09 01:19:03,3438 2005-07-31 17:51:23,2005-07-31 17:51:23,2396
1353+3 2005-07-31 17:53:51,2005-07-31 17:53:51,928 2005-08-22 13:58:23,2005-08-22 13:58:23,2553
1354+4 2005-08-22 13:59:19,2005-08-22 13:59:19,3704 2006-02-14 15:16:03,2006-02-14 15:16:03,4568
1355+5 2005-05-24 22:53:30,2005-05-24 22:53:30,367
1356+6 2006-02-14 15:16:03,2006-02-14 15:16:03,4568
1357+

Subscribers

People subscribed via source and target branches