Merge lp:~percona-toolkit-dev/percona-toolkit/explain-checksum-chunks into lp:percona-toolkit/2.1

Proposed by Daniel Nichter
Status: Merged
Merged at revision: 279
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/explain-checksum-chunks
Merge into: lp:percona-toolkit/2.1
Diff against target: 1357 lines (+767/-194)
8 files modified
bin/pt-online-schema-change (+304/-86)
bin/pt-table-checksum (+323/-85)
lib/NibbleIterator.pm (+24/-20)
lib/TableNibbler.pm (+6/-1)
t/lib/TableNibbler.t (+29/-1)
t/pt-table-checksum/chunk_index.t (+45/-1)
t/pt-table-checksum/samples/bad-plan-bug-1010232.sql (+17/-0)
t/pt-table-checksum/samples/n-chunk-index-cols.txt (+19/-0)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/explain-checksum-chunks
Reviewer Review Type Date Requested Status
Baron Schwartz (community) Approve
Brian Fraser (community) Approve
Daniel Nichter Approve
Review via email: mp+109530@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) :
review: Approve
286. By Daniel Nichter

Fix docs about skipping chunks with bad plans.

Revision history for this message
Brian Fraser (fraserbn) :
review: Approve
287. By Daniel Nichter

Clarify --chunk-index name:n validation.

Revision history for this message
Baron Schwartz (baron-xaprb) wrote :

OK.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'bin/pt-online-schema-change'
--- bin/pt-online-schema-change 2012-05-30 17:27:07 +0000
+++ bin/pt-online-schema-change 2012-06-10 17:15:24 +0000
@@ -1828,8 +1828,12 @@
18281828
1829 my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};1829 my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
1830 if ( $args{asc_first} ) {1830 if ( $args{asc_first} ) {
1831 PTDEBUG && _d('Ascending only first column');
1831 @asc_cols = $asc_cols[0];1832 @asc_cols = $asc_cols[0];
1832 PTDEBUG && _d('Ascending only first column');1833 }
1834 elsif ( my $n = $args{n_index_cols} ) {
1835 PTDEBUG && _d('Ascending only first', $n, 'columns');
1836 @asc_cols = @asc_cols[0..($n-1)];
1833 }1837 }
1834 PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));1838 PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
18351839
@@ -4029,10 +4033,11 @@
40294033
4030 my $asc = $args{TableNibbler}->generate_asc_stmt(4034 my $asc = $args{TableNibbler}->generate_asc_stmt(
4031 %args,4035 %args,
4032 tbl_struct => $tbl->{tbl_struct},4036 tbl_struct => $tbl->{tbl_struct},
4033 index => $index,4037 index => $index,
4034 cols => \@cols,4038 n_index_cols => $args{n_chunk_index_cols},
4035 asc_only => 1,4039 cols => \@cols,
4040 asc_only => 1,
4036 );4041 );
4037 PTDEBUG && _d('Ascend params:', Dumper($asc));4042 PTDEBUG && _d('Ascend params:', Dumper($asc));
40384043
@@ -4114,16 +4119,17 @@
41144119
4115 $self = {4120 $self = {
4116 %args,4121 %args,
4117 index => $index,4122 index => $index,
4118 limit => $limit,4123 limit => $limit,
4119 first_lb_sql => $first_lb_sql,4124 first_lb_sql => $first_lb_sql,
4120 last_ub_sql => $last_ub_sql,4125 last_ub_sql => $last_ub_sql,
4121 ub_sql => $ub_sql,4126 ub_sql => $ub_sql,
4122 nibble_sql => $nibble_sql,4127 nibble_sql => $nibble_sql,
4123 explain_ub_sql => "EXPLAIN $ub_sql",4128 explain_first_lb_sql => "EXPLAIN $first_lb_sql",
4124 explain_nibble_sql => $explain_nibble_sql,4129 explain_ub_sql => "EXPLAIN $ub_sql",
4125 resume_lb_sql => $resume_lb_sql,4130 explain_nibble_sql => $explain_nibble_sql,
4126 sql => {4131 resume_lb_sql => $resume_lb_sql,
4132 sql => {
4127 columns => $asc->{scols},4133 columns => $asc->{scols},
4128 from => $from,4134 from => $from,
4129 where => $where,4135 where => $where,
@@ -4231,10 +4237,11 @@
4231sub statements {4237sub statements {
4232 my ($self) = @_;4238 my ($self) = @_;
4233 return {4239 return {
4234 nibble => $self->{nibble_sth},4240 explain_first_lower_boundary => $self->{explain_first_lb_sth},
4235 explain_nibble => $self->{explain_nibble_sth},4241 nibble => $self->{nibble_sth},
4236 upper_boundary => $self->{ub_sth},4242 explain_nibble => $self->{explain_nibble_sth},
4237 explain_upper_boundary => $self->{explain_ub_sth},4243 upper_boundary => $self->{ub_sth},
4244 explain_upper_boundary => $self->{explain_ub_sth},
4238 }4245 }
4239}4246}
42404247
@@ -4463,8 +4470,9 @@
4463 $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});4470 $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
44644471
4465 if ( !$self->{one_nibble} ) {4472 if ( !$self->{one_nibble} ) {
4466 $self->{ub_sth} = $dbh->prepare($self->{ub_sql});4473 $self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql});
4467 $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});4474 $self->{ub_sth} = $dbh->prepare($self->{ub_sql});
4475 $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
4468 }4476 }
44694477
4470 return;4478 return;
@@ -4652,6 +4660,7 @@
46524660
4653use Time::Local qw(timegm timelocal);4661use Time::Local qw(timegm timelocal);
4654use Digest::MD5 qw(md5_hex);4662use Digest::MD5 qw(md5_hex);
4663use B qw();
46554664
4656require Exporter;4665require Exporter;
4657our @ISA = qw(Exporter);4666our @ISA = qw(Exporter);
@@ -4669,6 +4678,7 @@
4669 any_unix_timestamp4678 any_unix_timestamp
4670 make_checksum4679 make_checksum
4671 crc324680 crc32
4681 encode_json
4672);4682);
46734683
4674our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;4684our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;
@@ -4876,6 +4886,96 @@
4876 return $crc ^ 0xFFFFFFFF;4886 return $crc ^ 0xFFFFFFFF;
4877}4887}
48784888
4889my $got_json = eval { require JSON };
4890sub encode_json {
4891 return JSON::encode_json(@_) if $got_json;
4892 my ( $data ) = @_;
4893 return (object_to_json($data) || '');
4894}
4895
4896
4897sub object_to_json {
4898 my ($obj) = @_;
4899 my $type = ref($obj);
4900
4901 if($type eq 'HASH'){
4902 return hash_to_json($obj);
4903 }
4904 elsif($type eq 'ARRAY'){
4905 return array_to_json($obj);
4906 }
4907 else {
4908 return value_to_json($obj);
4909 }
4910}
4911
4912sub hash_to_json {
4913 my ($obj) = @_;
4914 my @res;
4915 for my $k ( sort { $a cmp $b } keys %$obj ) {
4916 push @res, string_to_json( $k )
4917 . ":"
4918 . ( object_to_json( $obj->{$k} ) || value_to_json( $obj->{$k} ) );
4919 }
4920 return '{' . ( @res ? join( ",", @res ) : '' ) . '}';
4921}
4922
4923sub array_to_json {
4924 my ($obj) = @_;
4925 my @res;
4926
4927 for my $v (@$obj) {
4928 push @res, object_to_json($v) || value_to_json($v);
4929 }
4930
4931 return '[' . ( @res ? join( ",", @res ) : '' ) . ']';
4932}
4933
4934sub value_to_json {
4935 my ($value) = @_;
4936
4937 return 'null' if(!defined $value);
4938
4939 my $b_obj = B::svref_2object(\$value); # for round trip problem
4940 my $flags = $b_obj->FLAGS;
4941 return $value # as is
4942 if $flags & ( B::SVp_IOK | B::SVp_NOK ) and !( $flags & B::SVp_POK ); # SvTYPE is IV or NV?
4943
4944 my $type = ref($value);
4945
4946 if( !$type ) {
4947 return string_to_json($value);
4948 }
4949 else {
4950 return 'null';
4951 }
4952
4953}
4954
4955my %esc = (
4956 "\n" => '\n',
4957 "\r" => '\r',
4958 "\t" => '\t',
4959 "\f" => '\f',
4960 "\b" => '\b',
4961 "\"" => '\"',
4962 "\\" => '\\\\',
4963 "\'" => '\\\'',
4964);
4965
4966sub string_to_json {
4967 my ($arg) = @_;
4968
4969 $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g;
4970 $arg =~ s/\//\\\//g;
4971 $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg;
4972
4973 utf8::upgrade($arg);
4974 utf8::encode($arg);
4975
4976 return '"' . $arg . '"';
4977}
4978
4879sub _d {4979sub _d {
4880 my ($package, undef, $line) = caller 0;4980 my ($package, undef, $line) = caller 0;
4881 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }4981 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
@@ -5027,6 +5127,22 @@
5027 }5127 }
5028 }5128 }
50295129
5130 # Parse --chunk-index INDEX:N where N is the number of
5131 # left-most columns of INDEX to use.
5132 # https://bugs.launchpad.net/percona-toolkit/+bug/1010232
5133 my ($chunk_index, $n_chunk_index_cols)
5134 = split(':', $o->get('chunk-index') || '');
5135 if ( defined $chunk_index && $chunk_index eq '' ) {
5136 $o->save_error('--chunk-index cannot be an empty string');
5137 }
5138 if ( defined $n_chunk_index_cols
5139 && (!$n_chunk_index_cols
5140 || $n_chunk_index_cols =~ m/\D/
5141 || $n_chunk_index_cols < 1) ) {
5142 $o->save_error('Invalid number of --chunk-index columns: '
5143 . $n_chunk_index_cols);
5144 }
5145
5030 if ( !$o->get('help') ) {5146 if ( !$o->get('help') ) {
5031 if ( @ARGV ) {5147 if ( @ARGV ) {
5032 $o->save_error('Specify only one DSN on the command line');5148 $o->save_error('Specify only one DSN on the command line');
@@ -5641,6 +5757,7 @@
5641 my (%args) = @_;5757 my (%args) = @_;
5642 my $tbl = $args{tbl};5758 my $tbl = $args{tbl};
5643 my $nibble_iter = $args{NibbleIterator};5759 my $nibble_iter = $args{NibbleIterator};
5760 my $statements = $nibble_iter->statements();
56445761
5645 if ( $o->get('dry-run') ) {5762 if ( $o->get('dry-run') ) {
5646 print "Not copying rows because this is a dry run.\n";5763 print "Not copying rows because this is a dry run.\n";
@@ -5652,7 +5769,6 @@
56525769
5653 if ( $o->get('print') ) {5770 if ( $o->get('print') ) {
5654 # Print the checksum and next boundary statements.5771 # Print the checksum and next boundary statements.
5655 my $statements = $nibble_iter->statements();
5656 foreach my $sth ( sort keys %$statements ) {5772 foreach my $sth ( sort keys %$statements ) {
5657 next if $sth =~ m/^explain/;5773 next if $sth =~ m/^explain/;
5658 if ( $statements->{$sth} ) {5774 if ( $statements->{$sth} ) {
@@ -5696,6 +5812,34 @@
5696 die $msg;5812 die $msg;
5697 }5813 }
5698 }5814 }
5815 else { # chunking the table
5816 if ( $o->get('check-plan') ) {
5817 my $expl = explain_statement(
5818 sth => $statements->{explain_first_lower_boundary},
5819 tbl => $tbl,
5820 vals => [],
5821 );
5822 if ( !$expl->{key}
5823 || lc($expl->{key}) ne lc($nibble_iter->nibble_index()) )
5824 {
5825 die "Cannot determine the key_len of the chunk index "
5826 . "because MySQL chose "
5827 . ($expl->{key} ? "the $expl->{key}" : "no") . " index "
5828 . "instead of the " . $nibble_iter->nibble_index()
5829 . " index for the first lower boundary statement. "
5830 . "See --[no]check-plan in the documentation for more "
5831 . "information.";
5832 }
5833 elsif ( !$expl->{key_len} ) {
5834 die "The key_len of the $expl->{key} index is "
5835 . (defined $expl->{key_len} ? "zero" : "NULL")
5836 . ", but this should not be possible. "
5837 . "See --[no]check-plan in the documentation for more "
5838 . "information.";
5839 }
5840 $tbl->{key_len} = $expl->{key_len};
5841 }
5842 }
56995843
5700 return 1; # continue nibbling table5844 return 1; # continue nibbling table
5701 },5845 },
@@ -5755,58 +5899,11 @@
5755 # Count every chunk, even if it's ultimately skipped, etc.5899 # Count every chunk, even if it's ultimately skipped, etc.
5756 $tbl->{results}->{n_chunks}++;5900 $tbl->{results}->{n_chunks}++;
57575901
5758 # If the table is being chunk (i.e., it's not small enough to be5902 # Die unless the nibble is safe.
5759 # consumed by one nibble), then check index usage and chunk size.5903 nibble_is_safe(
5760 if ( !$nibble_iter->one_nibble() ) {5904 %args,
5761 my $expl = explain_statement(5905 OptionParser => $o,
5762 tbl => $tbl,5906 );
5763 sth => $sth->{explain_nibble},
5764 vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
5765 );
5766
5767 # Ensure that MySQL is using the chunk index.
5768 if ( lc($expl->{key} || '')
5769 ne lc($nibble_iter->nibble_index() || '') ) {
5770 my $msg
5771 = "Aborting copying table $tbl->{name} at chunk "
5772 . $nibble_iter->nibble_number()
5773 . " because it is not safe to chunk. Chunking should "
5774 . "use the "
5775 . ($nibble_iter->nibble_index() || '?')
5776 . " index, but MySQL EXPLAIN reports that "
5777 . ($expl->{key} ? "the $expl->{key}" : "no")
5778 . " index will be used for "
5779 . $sth->{explain_nibble}->{Statement}
5780 . " with values "
5781 . join(", ", map { defined $_ ? $_ : "NULL" }
5782 (@{$boundary->{lower}}, @{$boundary->{upper}}))
5783 . "\n";
5784 die $msg;
5785 }
5786
5787 # Check chunk size limit if the upper boundary and next lower
5788 # boundary are identical.
5789 if ( $limit ) {
5790 my $boundary = $nibble_iter->boundaries();
5791 my $oversize_chunk
5792 = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
5793 : 0;
5794 if ( $oversize_chunk
5795 && $nibble_iter->identical_boundaries(
5796 $boundary->{upper}, $boundary->{next_lower}) )
5797 {
5798 my $msg
5799 = "Aborting copying table $tbl->{name} at chunk "
5800 . $nibble_iter->nibble_number()
5801 . " because the chunk is too large: MySQL estimates "
5802 . ($expl->{rows} || 0) . "rows. The current chunk "
5803 . "size limit is " . ($tbl->{chunk_size} * $limit)
5804 . " rows (chunk size=$tbl->{chunk_size}"
5805 . " * chunk size limit=$limit).\n";
5806 die $msg;
5807 }
5808 }
5809 }
58105907
5811 # Exec and time the chunk checksum query.5908 # Exec and time the chunk checksum query.
5812 $tbl->{nibble_time} = exec_nibble(5909 $tbl->{nibble_time} = exec_nibble(
@@ -5909,18 +6006,19 @@
5909 # This won't (shouldn't) fail because we already verified in6006 # This won't (shouldn't) fail because we already verified in
5910 # check_orig_table() table we can NibbleIterator::can_nibble().6007 # check_orig_table() table we can NibbleIterator::can_nibble().
5911 my $nibble_iter = new NibbleIterator(6008 my $nibble_iter = new NibbleIterator(
5912 Cxn => $cxn,6009 Cxn => $cxn,
5913 tbl => $orig_tbl,6010 tbl => $orig_tbl,
5914 chunk_size => $orig_tbl->{chunk_size},6011 chunk_size => $orig_tbl->{chunk_size},
5915 chunk_index => $o->get('chunk-index'),6012 chunk_index => $chunk_index,
5916 dml => $dml,6013 n_chunk_index_cols => $n_chunk_index_cols,
5917 select => $select,6014 dml => $dml,
5918 callbacks => $callbacks,6015 select => $select,
5919 OptionParser => $o,6016 callbacks => $callbacks,
5920 Quoter => $q,6017 OptionParser => $o,
5921 TableParser => $tp,6018 Quoter => $q,
5922 TableNibbler => new TableNibbler(TableParser => $tp, Quoter => $q),6019 TableParser => $tp,
5923 comments => {6020 TableNibbler => new TableNibbler(TableParser => $tp, Quoter => $q),
6021 comments => {
5924 bite => "pt-online-schema-change $PID copy table",6022 bite => "pt-online-schema-change $PID copy table",
5925 nibble => "pt-online-schema-change $PID copy nibble",6023 nibble => "pt-online-schema-change $PID copy nibble",
5926 },6024 },
@@ -6110,6 +6208,82 @@
6110# ############################################################################6208# ############################################################################
6111# Subroutines.6209# Subroutines.
6112# ############################################################################6210# ############################################################################
6211
6212sub nibble_is_safe {
6213 my (%args) = @_;
6214 my @required_args = qw(Cxn tbl NibbleIterator OptionParser);
6215 foreach my $arg ( @required_args ) {
6216 die "I need a $arg argument" unless $args{$arg};
6217 }
6218 my ($cxn, $tbl, $nibble_iter, $o)= @args{@required_args};
6219
6220 # EXPLAIN the checksum chunk query to get its row estimate and index.
6221 # XXX This call and others like it are relying on a Perl oddity.
6222 # See https://bugs.launchpad.net/percona-toolkit/+bug/987393
6223 my $sth = $nibble_iter->statements();
6224 my $boundary = $nibble_iter->boundaries();
6225 my $expl = explain_statement(
6226 tbl => $tbl,
6227 sth => $sth->{explain_nibble},
6228 vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
6229 );
6230
6231 # Ensure that MySQL is using the chunk index if the table is being chunked.
6232 if ( !$nibble_iter->one_nibble()
6233 && lc($expl->{key} || '') ne lc($nibble_iter->nibble_index() || '') ) {
6234 if ( !$tbl->{warned}->{not_using_chunk_index}++
6235 && $o->get('quiet') < 2 ) {
6236 die "Error copying rows at chunk " . $nibble_iter->nibble_number()
6237 . " of $tbl->{db}.$tbl->{tbl} because MySQL chose "
6238 . ($expl->{key} ? "the $expl->{key}" : "no") . " index "
6239 . " instead of the " . $nibble_iter->nibble_index() . "index.\n";
6240 }
6241 }
6242
6243 # Ensure that the chunk isn't too large if there's a --chunk-size-limit.
6244 # If single-chunking the table, this has already been checked, so it
6245 # shouldn't have changed. If chunking the table with a non-unique key,
6246 # oversize chunks are possible.
6247 if ( my $limit = $o->get('chunk-size-limit') ) {
6248 my $oversize_chunk
6249 = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
6250 : 0;
6251 if ( $oversize_chunk
6252 && $nibble_iter->identical_boundaries($boundary->{upper},
6253 $boundary->{next_lower}) ) {
6254 if ( !$tbl->{warned}->{oversize_chunk}++
6255 && $o->get('quiet') < 2 ) {
6256 die "Error copying rows at chunk " . $nibble_iter->nibble_number()
6257 . " of $tbl->{db}.$tbl->{tbl} because it is oversized. "
6258 . "The current chunk size limit is "
6259 . ($tbl->{chunk_size} * $limit)
6260 . " rows (chunk size=$tbl->{chunk_size}"
6261 . " * chunk size limit=$limit), but MySQL estimates "
6262 . "that there are " . ($expl->{rows} || 0)
6263 . " rows in the chunk.\n";
6264 }
6265 }
6266 }
6267
6268 # Ensure that MySQL is still using the entire index.
6269 # https://bugs.launchpad.net/percona-toolkit/+bug/1010232
6270 if ( !$nibble_iter->one_nibble()
6271 && $tbl->{key_len}
6272 && ($expl->{key_len} || 0) < $tbl->{key_len} ) {
6273 if ( !$tbl->{warned}->{key_len}++
6274 && $o->get('quiet') < 2 ) {
6275 die "Error copying rows at chunk " . $nibble_iter->nibble_number()
6276 . " of $tbl->{db}.$tbl->{tbl} because MySQL used "
6277 . "only " . ($expl->{key_len} || 0) . " bytes "
6278 . "of the " . ($expl->{key} || '?') . " index instead of "
6279 . $tbl->{key_len} . ". See the --[no]check-plan documentation "
6280 . "for more information.\n";
6281 }
6282 }
6283
6284 return 1; # safe
6285}
6286
6113sub create_new_table{6287sub create_new_table{
6114 my (%args) = @_;6288 my (%args) = @_;
6115 my @required_args = qw(orig_tbl Cxn Quoter OptionParser TableParser);6289 my @required_args = qw(orig_tbl Cxn Quoter OptionParser TableParser);
@@ -7109,6 +7283,39 @@
71097283
7110Sleep time between checks for L<"--max-lag">.7284Sleep time between checks for L<"--max-lag">.
71117285
7286=item --[no]check-plan
7287
7288default: yes
7289
7290Check query execution plans for safety. By default, this option causes
7291the tool to run EXPLAIN before running queries that are meant to access
7292a small amount of data, but which could access many rows if MySQL chooses a bad
7293execution plan. These include the queries to determine chunk boundaries and the
7294chunk queries themselves. If it appears that MySQL will use a bad query
7295execution plan, the tool will skip the chunk of the table.
7296
7297The tool uses several heuristics to determine whether an execution plan is bad.
7298The first is whether EXPLAIN reports that MySQL intends to use the desired index
7299to access the rows. If MySQL chooses a different index, the tool considers the
7300query unsafe.
7301
7302The tool also checks how much of the index MySQL reports that it will use for
7303the query. The EXPLAIN output shows this in the key_len column. The tool
7304remembers the largest key_len seen, and skips chunks where MySQL reports that it
7305will use a smaller prefix of the index. This heuristic can be understood as
7306skipping chunks that have a worse execution plan than other chunks.
7307
7308The tool prints a warning the first time a chunk is skipped due to
7309a bad execution plan in each table. Subsequent chunks are skipped silently,
7310although you can see the count of skipped chunks in the SKIPPED column in
7311the tool's output.
7312
7313This option adds some setup work to each table and chunk. Although the work is
7314not intrusive for MySQL, it results in more round-trips to the server, which
7315consumes time. Making chunks too small will cause the overhead to become
7316relatively larger. It is therefore recommended that you not make chunks too
7317small, because the tool may take a very long time to complete if you do.
7318
7112=item --[no]check-replication-filters7319=item --[no]check-replication-filters
71137320
7114default: yes7321default: yes
@@ -7145,6 +7352,17 @@
7145a C<FORCE INDEX> clause. Be careful when using this option; a poor choice of7352a C<FORCE INDEX> clause. Be careful when using this option; a poor choice of
7146index could cause bad performance.7353index could cause bad performance.
71477354
7355This option supports a special syntax to select a prefix of the index instead of
7356the entire index. The syntax is NAME:N, where NAME is the name of the index, and
7357N is the number of columns you wish to use. This works only for compound
7358indexes, and is useful in cases where a bug in the MySQL query optimizer
7359(planner) causes it to scan a large range of rows instead of using the index to
7360locate starting and ending points precisely. This problem sometimes occurs on
7361indexes with many columns, such as 4 or more. If this happens, the tool might
7362print a warning related to the L<"--[no]check-plan"> option. Instructing
7363the tool to use only the first N columns from the index is a workaround for
7364the bug in some cases.
7365
7148=item --chunk-size7366=item --chunk-size
71497367
7150type: size; default: 10007368type: size; default: 1000
71517369
=== modified file 'bin/pt-table-checksum'
--- bin/pt-table-checksum 2012-05-25 21:34:58 +0000
+++ bin/pt-table-checksum 2012-06-10 17:15:24 +0000
@@ -2208,8 +2208,12 @@
22082208
2209 my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};2209 my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
2210 if ( $args{asc_first} ) {2210 if ( $args{asc_first} ) {
2211 PTDEBUG && _d('Ascending only first column');
2211 @asc_cols = $asc_cols[0];2212 @asc_cols = $asc_cols[0];
2212 PTDEBUG && _d('Ascending only first column');2213 }
2214 elsif ( my $n = $args{n_index_cols} ) {
2215 PTDEBUG && _d('Ascending only first', $n, 'columns');
2216 @asc_cols = @asc_cols[0..($n-1)];
2213 }2217 }
2214 PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));2218 PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
22152219
@@ -3594,10 +3598,11 @@
35943598
3595 my $asc = $args{TableNibbler}->generate_asc_stmt(3599 my $asc = $args{TableNibbler}->generate_asc_stmt(
3596 %args,3600 %args,
3597 tbl_struct => $tbl->{tbl_struct},3601 tbl_struct => $tbl->{tbl_struct},
3598 index => $index,3602 index => $index,
3599 cols => \@cols,3603 n_index_cols => $args{n_chunk_index_cols},
3600 asc_only => 1,3604 cols => \@cols,
3605 asc_only => 1,
3601 );3606 );
3602 PTDEBUG && _d('Ascend params:', Dumper($asc));3607 PTDEBUG && _d('Ascend params:', Dumper($asc));
36033608
@@ -3679,16 +3684,17 @@
36793684
3680 $self = {3685 $self = {
3681 %args,3686 %args,
3682 index => $index,3687 index => $index,
3683 limit => $limit,3688 limit => $limit,
3684 first_lb_sql => $first_lb_sql,3689 first_lb_sql => $first_lb_sql,
3685 last_ub_sql => $last_ub_sql,3690 last_ub_sql => $last_ub_sql,
3686 ub_sql => $ub_sql,3691 ub_sql => $ub_sql,
3687 nibble_sql => $nibble_sql,3692 nibble_sql => $nibble_sql,
3688 explain_ub_sql => "EXPLAIN $ub_sql",3693 explain_first_lb_sql => "EXPLAIN $first_lb_sql",
3689 explain_nibble_sql => $explain_nibble_sql,3694 explain_ub_sql => "EXPLAIN $ub_sql",
3690 resume_lb_sql => $resume_lb_sql,3695 explain_nibble_sql => $explain_nibble_sql,
3691 sql => {3696 resume_lb_sql => $resume_lb_sql,
3697 sql => {
3692 columns => $asc->{scols},3698 columns => $asc->{scols},
3693 from => $from,3699 from => $from,
3694 where => $where,3700 where => $where,
@@ -3796,10 +3802,11 @@
3796sub statements {3802sub statements {
3797 my ($self) = @_;3803 my ($self) = @_;
3798 return {3804 return {
3799 nibble => $self->{nibble_sth},3805 explain_first_lower_boundary => $self->{explain_first_lb_sth},
3800 explain_nibble => $self->{explain_nibble_sth},3806 nibble => $self->{nibble_sth},
3801 upper_boundary => $self->{ub_sth},3807 explain_nibble => $self->{explain_nibble_sth},
3802 explain_upper_boundary => $self->{explain_ub_sth},3808 upper_boundary => $self->{ub_sth},
3809 explain_upper_boundary => $self->{explain_ub_sth},
3803 }3810 }
3804}3811}
38053812
@@ -4028,8 +4035,9 @@
4028 $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});4035 $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
40294036
4030 if ( !$self->{one_nibble} ) {4037 if ( !$self->{one_nibble} ) {
4031 $self->{ub_sth} = $dbh->prepare($self->{ub_sql});4038 $self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql});
4032 $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});4039 $self->{ub_sth} = $dbh->prepare($self->{ub_sql});
4040 $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
4033 }4041 }
40344042
4035 return;4043 return;
@@ -5113,6 +5121,7 @@
51135121
5114use Time::Local qw(timegm timelocal);5122use Time::Local qw(timegm timelocal);
5115use Digest::MD5 qw(md5_hex);5123use Digest::MD5 qw(md5_hex);
5124use B qw();
51165125
5117require Exporter;5126require Exporter;
5118our @ISA = qw(Exporter);5127our @ISA = qw(Exporter);
@@ -5130,6 +5139,7 @@
5130 any_unix_timestamp5139 any_unix_timestamp
5131 make_checksum5140 make_checksum
5132 crc325141 crc32
5142 encode_json
5133);5143);
51345144
5135our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;5145our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;
@@ -5337,6 +5347,96 @@
5337 return $crc ^ 0xFFFFFFFF;5347 return $crc ^ 0xFFFFFFFF;
5338}5348}
53395349
5350my $got_json = eval { require JSON };
5351sub encode_json {
5352 return JSON::encode_json(@_) if $got_json;
5353 my ( $data ) = @_;
5354 return (object_to_json($data) || '');
5355}
5356
5357
5358sub object_to_json {
5359 my ($obj) = @_;
5360 my $type = ref($obj);
5361
5362 if($type eq 'HASH'){
5363 return hash_to_json($obj);
5364 }
5365 elsif($type eq 'ARRAY'){
5366 return array_to_json($obj);
5367 }
5368 else {
5369 return value_to_json($obj);
5370 }
5371}
5372
5373sub hash_to_json {
5374 my ($obj) = @_;
5375 my @res;
5376 for my $k ( sort { $a cmp $b } keys %$obj ) {
5377 push @res, string_to_json( $k )
5378 . ":"
5379 . ( object_to_json( $obj->{$k} ) || value_to_json( $obj->{$k} ) );
5380 }
5381 return '{' . ( @res ? join( ",", @res ) : '' ) . '}';
5382}
5383
5384sub array_to_json {
5385 my ($obj) = @_;
5386 my @res;
5387
5388 for my $v (@$obj) {
5389 push @res, object_to_json($v) || value_to_json($v);
5390 }
5391
5392 return '[' . ( @res ? join( ",", @res ) : '' ) . ']';
5393}
5394
5395sub value_to_json {
5396 my ($value) = @_;
5397
5398 return 'null' if(!defined $value);
5399
5400 my $b_obj = B::svref_2object(\$value); # for round trip problem
5401 my $flags = $b_obj->FLAGS;
5402 return $value # as is
5403 if $flags & ( B::SVp_IOK | B::SVp_NOK ) and !( $flags & B::SVp_POK ); # SvTYPE is IV or NV?
5404
5405 my $type = ref($value);
5406
5407 if( !$type ) {
5408 return string_to_json($value);
5409 }
5410 else {
5411 return 'null';
5412 }
5413
5414}
5415
5416my %esc = (
5417 "\n" => '\n',
5418 "\r" => '\r',
5419 "\t" => '\t',
5420 "\f" => '\f',
5421 "\b" => '\b',
5422 "\"" => '\"',
5423 "\\" => '\\\\',
5424 "\'" => '\\\'',
5425);
5426
5427sub string_to_json {
5428 my ($arg) = @_;
5429
5430 $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g;
5431 $arg =~ s/\//\\\//g;
5432 $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg;
5433
5434 utf8::upgrade($arg);
5435 utf8::encode($arg);
5436
5437 return '"' . $arg . '"';
5438}
5439
5340sub _d {5440sub _d {
5341 my ($package, undef, $line) = caller 0;5441 my ($package, undef, $line) = caller 0;
5342 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }5442 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
@@ -5988,6 +6088,22 @@
5988 }6088 }
5989 }6089 }
59906090
6091 # Parse --chunk-index INDEX:N where N is the number of
6092 # left-most columns of INDEX to use.
6093 # https://bugs.launchpad.net/percona-toolkit/+bug/1010232
6094 my ($chunk_index, $n_chunk_index_cols)
6095 = split(':', $o->get('chunk-index') || '');
6096 if ( defined $chunk_index && $chunk_index eq '' ) {
6097 $o->save_error('--chunk-index cannot be an empty string');
6098 }
6099 if ( defined $n_chunk_index_cols
6100 && (!$n_chunk_index_cols
6101 || $n_chunk_index_cols =~ m/\D/
6102 || $n_chunk_index_cols < 1) ) {
6103 $o->save_error('Invalid number of --chunk-index columns: '
6104 . $n_chunk_index_cols);
6105 }
6106
5991 if ( !$o->get('help') ) {6107 if ( !$o->get('help') ) {
5992 if ( @ARGV > 1 ) {6108 if ( @ARGV > 1 ) {
5993 $o->save_error("More than one host specified; only one allowed");6109 $o->save_error("More than one host specified; only one allowed");
@@ -6465,6 +6581,7 @@
6465 my (%args) = @_;6581 my (%args) = @_;
6466 my $tbl = $args{tbl};6582 my $tbl = $args{tbl};
6467 my $nibble_iter = $args{NibbleIterator};6583 my $nibble_iter = $args{NibbleIterator};
6584 my $statements = $nibble_iter->statements();
6468 my $oktonibble = 1;6585 my $oktonibble = 1;
64696586
6470 if ( $last_chunk ) { # resuming6587 if ( $last_chunk ) { # resuming
@@ -6493,7 +6610,7 @@
6493 print "--\n",6610 print "--\n",
6494 "-- $tbl->{db}.$tbl->{tbl}\n",6611 "-- $tbl->{db}.$tbl->{tbl}\n",
6495 "--\n\n";6612 "--\n\n";
6496 my $statements = $nibble_iter->statements();6613
6497 foreach my $sth ( sort keys %$statements ) {6614 foreach my $sth ( sort keys %$statements ) {
6498 next if $sth =~ m/^explain/;6615 next if $sth =~ m/^explain/;
6499 if ( $statements->{$sth} ) {6616 if ( $statements->{$sth} ) {
@@ -6551,6 +6668,34 @@
6551 $oktonibble = 0;6668 $oktonibble = 0;
6552 }6669 }
6553 }6670 }
6671 else { # chunking the table
6672 if ( $o->get('check-plan') ) {
6673 my $expl = explain_statement(
6674 sth => $statements->{explain_first_lower_boundary},
6675 tbl => $tbl,
6676 vals => [],
6677 );
6678 if ( !$expl->{key}
6679 || lc($expl->{key}) ne lc($nibble_iter->nibble_index()) )
6680 {
6681 die "Cannot determine the key_len of the chunk index "
6682 . "because MySQL chose "
6683 . ($expl->{key} ? "the $expl->{key}" : "no") . " index "
6684 . "instead of the " . $nibble_iter->nibble_index()
6685 . " index for the first lower boundary statement. "
6686 . "See --[no]check-plan in the documentation for more "
6687 . "information.";
6688 }
6689 elsif ( !$expl->{key_len} ) {
6690 die "The key_len of the $expl->{key} index is "
6691 . (defined $expl->{key_len} ? "zero" : "NULL")
6692 . ", but this should not be possible. "
6693 . "See --[no]check-plan in the documentation for more "
6694 . "information.";
6695 }
6696 $tbl->{key_len} = $expl->{key_len};
6697 }
6698 }
65546699
6555 if ( $oktonibble && $o->get('empty-replicate-table') ) {6700 if ( $oktonibble && $o->get('empty-replicate-table') ) {
6556 use_repl_db(6701 use_repl_db(
@@ -6604,16 +6749,14 @@
6604 ne lc($nibble_iter->nibble_index() || '') ) {6749 ne lc($nibble_iter->nibble_index() || '') ) {
6605 PTDEBUG && _d('Cannot nibble next chunk, aborting table');6750 PTDEBUG && _d('Cannot nibble next chunk, aborting table');
6606 if ( $o->get('quiet') < 2 ) {6751 if ( $o->get('quiet') < 2 ) {
6607 my $msg6752 warn ts("Aborting table $tbl->{db}.$tbl->{tbl} at chunk "
6608 = "Aborting table $tbl->{db}.$tbl->{tbl} at chunk "
6609 . ($nibble_iter->nibble_number() + 1)6753 . ($nibble_iter->nibble_number() + 1)
6610 . " because it is not safe to chunk. Chunking should "6754 . " because it is not safe to chunk. Chunking should "
6611 . "use the "6755 . "use the "
6612 . ($nibble_iter->nibble_index() || '?')6756 . ($nibble_iter->nibble_index() || '?')
6613 . " index, but MySQL EXPLAIN reports that "6757 . " index, but MySQL chose "
6614 . ($expl->{key} ? "the $expl->{key}" : "no")6758 . ($expl->{key} ? "the $expl->{key}" : "no")
6615 . " index will be used.\n";6759 . " index.\n");
6616 warn ts($msg);
6617 }6760 }
6618 $tbl->{checksum_results}->{errors}++;6761 $tbl->{checksum_results}->{errors}++;
6619 return 0; # stop nibbling table6762 return 0; # stop nibbling table
@@ -6658,43 +6801,13 @@
6658 return 0; # next boundary6801 return 0; # next boundary
6659 }6802 }
66606803
6661 # If the table is being chunk (i.e., it's not small enough to be6804 # Skip this nibble unless it's safe.
6662 # consumed by one nibble), then check index usage and chunk size.6805 return 0 unless nibble_is_safe(
6663 # XXX This call and others like it are relying on a Perl oddity.6806 %args,
6664 # See https://bugs.launchpad.net/percona-toolkit/+bug/9873936807 OptionParser => $o,
6665 if ( !$nibble_iter->one_nibble() ) {6808 );
6666 my $expl = explain_statement(6809
6667 tbl => $tbl,6810 # Exec and time the nibble.
6668 sth => $sth->{explain_nibble},
6669 vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
6670 );
6671 my $oversize_chunk
6672 = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
6673 : 0;
6674
6675 # Ensure that MySQL is using the chunk index.
6676 if ( lc($expl->{key} || '')
6677 ne lc($nibble_iter->nibble_index() || '') ) {
6678 PTDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
6679 "$tbl->{db}.$tbl->{tbl} not using chunk index, skipping");
6680 return 0; # next boundary
6681 }
6682
6683 # Check chunk size limit if the upper boundary and next lower
6684 # boundary are identical.
6685 if ( $limit ) {
6686 my $boundary = $nibble_iter->boundaries();
6687 if ( $nibble_iter->identical_boundaries(
6688 $boundary->{upper}, $boundary->{next_lower})
6689 && $oversize_chunk ) {
6690 PTDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
6691 "$tbl->{db}.$tbl->{tbl} is too large, skipping");
6692 return 0; # next boundary
6693 }
6694 }
6695 }
6696
6697 # Exec and time the chunk checksum query.
6698 $tbl->{nibble_time} = exec_nibble(6811 $tbl->{nibble_time} = exec_nibble(
6699 %args,6812 %args,
6700 Retry => $retry,6813 Retry => $retry,
@@ -6776,7 +6889,7 @@
6776 $tbl->{chunk_size} = 1;6889 $tbl->{chunk_size} = 1;
67776890
6778 # This warning is printed once per table.6891 # This warning is printed once per table.
6779 if ( !$tbl->{warned_slow} && $o->get('quiet') < 2 ) {6892 if ( !$tbl->{warned}->{slow}++ && $o->get('quiet') < 2 ) {
6780 warn ts("Checksum queries for table "6893 warn ts("Checksum queries for table "
6781 . "$tbl->{db}.$tbl->{tbl} are executing very slowly. "6894 . "$tbl->{db}.$tbl->{tbl} are executing very slowly. "
6782 . "--chunk-size has been automatically reduced to 1. "6895 . "--chunk-size has been automatically reduced to 1. "
@@ -6786,7 +6899,6 @@
6786 . "selected $cnt rows and took "6899 . "selected $cnt rows and took "
6787 . sprintf('%.3f', $tbl->{nibble_time})6900 . sprintf('%.3f', $tbl->{nibble_time})
6788 . " seconds to execute.\n");6901 . " seconds to execute.\n");
6789 $tbl->{warned_slow} = 1;
6790 }6902 }
6791 }6903 }
67926904
@@ -6918,22 +7030,23 @@
6918 my $nibble_iter;7030 my $nibble_iter;
6919 eval {7031 eval {
6920 $nibble_iter = new OobNibbleIterator(7032 $nibble_iter = new OobNibbleIterator(
6921 Cxn => $master_cxn,7033 Cxn => $master_cxn,
6922 tbl => $tbl,7034 tbl => $tbl,
6923 chunk_size => $tbl->{chunk_size},7035 chunk_size => $tbl->{chunk_size},
6924 chunk_index => $o->get('chunk-index'),7036 chunk_index => $chunk_index,
6925 dml => $checksum_dml,7037 n_chunk_index_cols => $n_chunk_index_cols,
6926 select => $checksum_cols,7038 dml => $checksum_dml,
6927 past_dml => $checksum_dml,7039 select => $checksum_cols,
6928 past_select => $past_cols,7040 past_dml => $checksum_dml,
6929 callbacks => $callbacks,7041 past_select => $past_cols,
6930 resume => $last_chunk,7042 callbacks => $callbacks,
6931 OptionParser => $o,7043 resume => $last_chunk,
6932 Quoter => $q,7044 OptionParser => $o,
6933 TableNibbler => $tn,7045 Quoter => $q,
6934 TableParser => $tp,7046 TableNibbler => $tn,
6935 RowChecksum => $rc,7047 TableParser => $tp,
6936 comments => {7048 RowChecksum => $rc,
7049 comments => {
6937 bite => "checksum table",7050 bite => "checksum table",
6938 nibble => "checksum chunk",7051 nibble => "checksum chunk",
6939 },7052 },
@@ -7008,6 +7121,84 @@
7008 return $msg ? "$ts $msg" : $ts;7121 return $msg ? "$ts $msg" : $ts;
7009}7122}
70107123
7124sub nibble_is_safe {
7125 my (%args) = @_;
7126 my @required_args = qw(Cxn tbl NibbleIterator OptionParser);
7127 foreach my $arg ( @required_args ) {
7128 die "I need a $arg argument" unless $args{$arg};
7129 }
7130 my ($cxn, $tbl, $nibble_iter, $o)= @args{@required_args};
7131
7132 # EXPLAIN the checksum chunk query to get its row estimate and index.
7133 # XXX This call and others like it are relying on a Perl oddity.
7134 # See https://bugs.launchpad.net/percona-toolkit/+bug/987393
7135 my $sth = $nibble_iter->statements();
7136 my $boundary = $nibble_iter->boundaries();
7137 my $expl = explain_statement(
7138 tbl => $tbl,
7139 sth => $sth->{explain_nibble},
7140 vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
7141 );
7142
7143 # Ensure that MySQL is using the chunk index if the table is being chunked.
7144 if ( !$nibble_iter->one_nibble()
7145 && lc($expl->{key} || '') ne lc($nibble_iter->nibble_index() || '') ) {
7146 if ( !$tbl->{warned}->{not_using_chunk_index}++
7147 && $o->get('quiet') < 2 ) {
7148 warn ts("Skipping chunk " . $nibble_iter->nibble_number()
7149 . " of $tbl->{db}.$tbl->{tbl} because MySQL chose "
7150 . ($expl->{key} ? "the $expl->{key}" : "no") . " index "
7151 . " instead of the " . $nibble_iter->nibble_index() . "index.\n");
7152 }
7153 return 0; # not safe
7154 }
7155
7156 # Ensure that the chunk isn't too large if there's a --chunk-size-limit.
7157 # If single-chunking the table, this has already been checked, so it
7158 # shouldn't have changed. If chunking the table with a non-unique key,
7159 # oversize chunks are possible.
7160 if ( my $limit = $o->get('chunk-size-limit') ) {
7161 my $oversize_chunk
7162 = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
7163 : 0;
7164 if ( $oversize_chunk
7165 && $nibble_iter->identical_boundaries($boundary->{upper},
7166 $boundary->{next_lower}) ) {
7167 if ( !$tbl->{warned}->{oversize_chunk}++
7168 && $o->get('quiet') < 2 ) {
7169 warn ts("Skipping chunk " . $nibble_iter->nibble_number()
7170 . " of $tbl->{db}.$tbl->{tbl} because it is oversized. "
7171 . "The current chunk size limit is "
7172 . ($tbl->{chunk_size} * $limit)
7173 . " rows (chunk size=$tbl->{chunk_size}"
7174 . " * chunk size limit=$limit), but MySQL estimates "
7175 . "that there are " . ($expl->{rows} || 0)
7176 . " rows in the chunk.\n");
7177 }
7178 return 0; # not safe
7179 }
7180 }
7181
7182 # Ensure that MySQL is still using the entire index.
7183 # https://bugs.launchpad.net/percona-toolkit/+bug/1010232
7184 if ( !$nibble_iter->one_nibble()
7185 && $tbl->{key_len}
7186 && ($expl->{key_len} || 0) < $tbl->{key_len} ) {
7187 if ( !$tbl->{warned}->{key_len}++
7188 && $o->get('quiet') < 2 ) {
7189 warn ts("Skipping chunk " . $nibble_iter->nibble_number()
7190 . " of $tbl->{db}.$tbl->{tbl} because MySQL used "
7191 . "only " . ($expl->{key_len} || 0) . " bytes "
7192 . "of the " . ($expl->{key} || '?') . " index instead of "
7193 . $tbl->{key_len} . ". See the --[no]check-plan documentation "
7194 . "for more information.\n");
7195 }
7196 return 0; # not safe
7197 }
7198
7199 return 1; # safe
7200}
7201
7011sub exec_nibble {7202sub exec_nibble {
7012 my (%args) = @_;7203 my (%args) = @_;
7013 my @required_args = qw(Cxn tbl NibbleIterator Retry Quoter OptionParser);7204 my @required_args = qw(Cxn tbl NibbleIterator Retry Quoter OptionParser);
@@ -7074,7 +7265,7 @@
7074 && (!$warn_code{$code}->{pattern}7265 && (!$warn_code{$code}->{pattern}
7075 || $message =~ m/$warn_code{$code}->{pattern}/) )7266 || $message =~ m/$warn_code{$code}->{pattern}/) )
7076 {7267 {
7077 if ( !$tbl->{"warned_code_$code"} ) { # warn once per table7268 if ( !$tbl->{warned}->{$code}++ ) { # warn once per table
7078 if ( $o->get('quiet') < 2 ) {7269 if ( $o->get('quiet') < 2 ) {
7079 warn ts("Checksum query for table $tbl->{db}.$tbl->{tbl} "7270 warn ts("Checksum query for table $tbl->{db}.$tbl->{tbl} "
7080 . "caused MySQL error $code: "7271 . "caused MySQL error $code: "
@@ -7083,7 +7274,6 @@
7083 : $message)7274 : $message)
7084 . "\n");7275 . "\n");
7085 }7276 }
7086 $tbl->{"warned_code_$code"} = 1;
7087 $tbl->{checksum_results}->{errors}++;7277 $tbl->{checksum_results}->{errors}++;
7088 }7278 }
7089 }7279 }
@@ -7910,6 +8100,39 @@
79108100
7911Sleep time between checks for L<"--max-lag">.8101Sleep time between checks for L<"--max-lag">.
79128102
8103=item --[no]check-plan
8104
8105default: yes
8106
8107Check query execution plans for safety. By default, this option causes
8108pt-table-checksum to run EXPLAIN before running queries that are meant to access
8109a small amount of data, but which could access many rows if MySQL chooses a bad
8110execution plan. These include the queries to determine chunk boundaries and the
8111chunk queries themselves. If it appears that MySQL will use a bad query
8112execution plan, the tool will skip the chunk of the table.
8113
8114The tool uses several heuristics to determine whether an execution plan is bad.
8115The first is whether EXPLAIN reports that MySQL intends to use the desired index
8116to access the rows. If MySQL chooses a different index, the tool considers the
8117query unsafe.
8118
8119The tool also checks how much of the index MySQL reports that it will use for
8120the query. The EXPLAIN output shows this in the key_len column. The tool
8121remembers the largest key_len seen, and skips chunks where MySQL reports that it
8122will use a smaller prefix of the index. This heuristic can be understood as
8123skipping chunks that have a worse execution plan than other chunks.
8124
8125The tool prints a warning the first time a chunk is skipped due to
8126a bad execution plan in each table. Subsequent chunks are skipped silently,
8127although you can see the count of skipped chunks in the SKIPPED column in
8128the tool's output.
8129
8130This option adds some setup work to each table and chunk. Although the work is
8131not intrusive for MySQL, it results in more round-trips to the server, which
8132consumes time. Making chunks too small will cause the overhead to become
8133relatively larger. It is therefore recommended that you not make chunks too
8134small, because the tool may take a very long time to complete if you do.
8135
7913=item --[no]check-replication-filters8136=item --[no]check-replication-filters
79148137
7915default: yes; group: Safety8138default: yes; group: Safety
@@ -7959,12 +8182,24 @@
7959This is probably best to use when you are checksumming only a single table, not8182This is probably best to use when you are checksumming only a single table, not
7960an entire server.8183an entire server.
79618184
8185This option supports a special syntax to select a prefix of the index instead of
8186the entire index. The syntax is NAME:N, where NAME is the name of the index, and
8187N is the number of columns you wish to use. This works only for compound
8188indexes, and is useful in cases where a bug in the MySQL query optimizer
8189(planner) causes it to scan a large range of rows instead of using the index to
8190locate starting and ending points precisely. This problem sometimes occurs on
8191indexes with many columns, such as 4 or more. If this happens, the tool might
8192print a warning related to the L<"--[no]check-plan"> option. Instructing
8193the tool to use only the first N columns from the index is a workaround for
8194the bug in some cases.
8195
7962=item --chunk-size8196=item --chunk-size
79638197
7964type: size; default: 10008198type: size; default: 1000
79658199
7966Number of rows to select for each checksum query. Allowable suffixes are8200Number of rows to select for each checksum query. Allowable suffixes are
7967k, M, G.8201k, M, G. You should not use this option in most cases; prefer L<"--chunk-time">
8202instead.
79688203
7969This option can override the default behavior, which is to adjust chunk size8204This option can override the default behavior, which is to adjust chunk size
7970dynamically to try to make chunks run in exactly L<"--chunk-time"> seconds.8205dynamically to try to make chunks run in exactly L<"--chunk-time"> seconds.
@@ -7980,6 +8215,9 @@
798010,000 rows large. Such a chunk will probably be skipped because of821510,000 rows large. Such a chunk will probably be skipped because of
7981L<"--chunk-size-limit">.8216L<"--chunk-size-limit">.
79828217
8218Selecting a small chunk size will cause the tool to become much slower, in part
8219because of the setup work required for L<"--[no]check-plan">.
8220
7983=item --chunk-size-limit8221=item --chunk-size-limit
79848222
7985type: float; default: 2.0; group: Safety8223type: float; default: 2.0; group: Safety
79868224
=== modified file 'lib/NibbleIterator.pm'
--- lib/NibbleIterator.pm 2012-05-10 16:21:44 +0000
+++ lib/NibbleIterator.pm 2012-06-10 17:15:24 +0000
@@ -124,10 +124,11 @@
124 # Figure out how to nibble the table with the index.124 # Figure out how to nibble the table with the index.
125 my $asc = $args{TableNibbler}->generate_asc_stmt(125 my $asc = $args{TableNibbler}->generate_asc_stmt(
126 %args,126 %args,
127 tbl_struct => $tbl->{tbl_struct},127 tbl_struct => $tbl->{tbl_struct},
128 index => $index,128 index => $index,
129 cols => \@cols,129 n_index_cols => $args{n_chunk_index_cols},
130 asc_only => 1,130 cols => \@cols,
131 asc_only => 1,
131 );132 );
132 PTDEBUG && _d('Ascend params:', Dumper($asc));133 PTDEBUG && _d('Ascend params:', Dumper($asc));
133134
@@ -229,16 +230,17 @@
229230
230 $self = {231 $self = {
231 %args,232 %args,
232 index => $index,233 index => $index,
233 limit => $limit,234 limit => $limit,
234 first_lb_sql => $first_lb_sql,235 first_lb_sql => $first_lb_sql,
235 last_ub_sql => $last_ub_sql,236 last_ub_sql => $last_ub_sql,
236 ub_sql => $ub_sql,237 ub_sql => $ub_sql,
237 nibble_sql => $nibble_sql,238 nibble_sql => $nibble_sql,
238 explain_ub_sql => "EXPLAIN $ub_sql",239 explain_first_lb_sql => "EXPLAIN $first_lb_sql",
239 explain_nibble_sql => $explain_nibble_sql,240 explain_ub_sql => "EXPLAIN $ub_sql",
240 resume_lb_sql => $resume_lb_sql,241 explain_nibble_sql => $explain_nibble_sql,
241 sql => {242 resume_lb_sql => $resume_lb_sql,
243 sql => {
242 columns => $asc->{scols},244 columns => $asc->{scols},
243 from => $from,245 from => $from,
244 where => $where,246 where => $where,
@@ -357,10 +359,11 @@
357sub statements {359sub statements {
358 my ($self) = @_;360 my ($self) = @_;
359 return {361 return {
360 nibble => $self->{nibble_sth},362 explain_first_lower_boundary => $self->{explain_first_lb_sth},
361 explain_nibble => $self->{explain_nibble_sth},363 nibble => $self->{nibble_sth},
362 upper_boundary => $self->{ub_sth},364 explain_nibble => $self->{explain_nibble_sth},
363 explain_upper_boundary => $self->{explain_ub_sth},365 upper_boundary => $self->{ub_sth},
366 explain_upper_boundary => $self->{explain_ub_sth},
364 }367 }
365}368}
366369
@@ -613,8 +616,9 @@
613 $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});616 $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
614617
615 if ( !$self->{one_nibble} ) {618 if ( !$self->{one_nibble} ) {
616 $self->{ub_sth} = $dbh->prepare($self->{ub_sql});619 $self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql});
617 $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});620 $self->{ub_sth} = $dbh->prepare($self->{ub_sql});
621 $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
618 }622 }
619623
620 return;624 return;
621625
=== modified file 'lib/TableNibbler.pm'
--- lib/TableNibbler.pm 2012-01-19 19:46:56 +0000
+++ lib/TableNibbler.pm 2012-06-10 17:15:24 +0000
@@ -41,6 +41,7 @@
41# * tbl_struct Hashref returned from TableParser::parse().41# * tbl_struct Hashref returned from TableParser::parse().
42# * cols Arrayref of columns to SELECT from the table42# * cols Arrayref of columns to SELECT from the table
43# * index Which index to ascend; optional.43# * index Which index to ascend; optional.
44# * n_index_cols The number of left-most index columns to use.
44# * asc_only Whether to ascend strictly, that is, the WHERE clause for45# * asc_only Whether to ascend strictly, that is, the WHERE clause for
45# the asc_stmt will fetch the next row > the given arguments.46# the asc_stmt will fetch the next row > the given arguments.
46# The option is to fetch the row >=, which could loop47# The option is to fetch the row >=, which could loop
@@ -77,8 +78,12 @@
77 # These are the columns we'll ascend.78 # These are the columns we'll ascend.
78 my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};79 my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
79 if ( $args{asc_first} ) {80 if ( $args{asc_first} ) {
81 PTDEBUG && _d('Ascending only first column');
80 @asc_cols = $asc_cols[0];82 @asc_cols = $asc_cols[0];
81 PTDEBUG && _d('Ascending only first column');83 }
84 elsif ( my $n = $args{n_index_cols} ) {
85 PTDEBUG && _d('Ascending only first', $n, 'columns');
86 @asc_cols = @asc_cols[0..($n-1)];
82 }87 }
83 PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));88 PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
8489
8590
=== modified file 't/lib/TableNibbler.t'
--- t/lib/TableNibbler.t 2012-03-06 13:56:08 +0000
+++ t/lib/TableNibbler.t 2012-06-10 17:15:24 +0000
@@ -9,7 +9,7 @@
9use strict;9use strict;
10use warnings FATAL => 'all';10use warnings FATAL => 'all';
11use English qw(-no_match_vars);11use English qw(-no_match_vars);
12use Test::More tests => 24;12use Test::More tests => 25;
1313
14use TableParser;14use TableParser;
15use TableNibbler;15use TableNibbler;
@@ -299,6 +299,34 @@
299299
300is_deeply(300is_deeply(
301 $n->generate_asc_stmt(301 $n->generate_asc_stmt(
302 tbl_struct => $t,
303 cols => $t->{cols},
304 index => 'rental_date',
305 n_index_cols => 2,
306 ),
307 {
308 cols => [qw(rental_id rental_date inventory_id customer_id
309 return_date staff_id last_update)],
310 index => 'rental_date',
311 where => '((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?))',
312 slice => [qw(1 1 2)],
313 scols => [qw(rental_date rental_date inventory_id)],
314 boundaries => {
315 '<' =>
316 '((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` < ?))',
317 '<=' =>
318 '((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` <= ?))',
319 '>' =>
320 '((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` > ?))',
321 '>=' =>
322 '((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?))'
323 },
324 },
325 'Use only N left-most columns of the index',
326);
327
328is_deeply(
329 $n->generate_asc_stmt(
302 tbl_struct => $t,330 tbl_struct => $t,
303 cols => $t->{cols},331 cols => $t->{cols},
304 index => 'rental_date',332 index => 'rental_date',
305333
=== modified file 't/pt-table-checksum/chunk_index.t'
--- t/pt-table-checksum/chunk_index.t 2012-05-04 23:46:34 +0000
+++ t/pt-table-checksum/chunk_index.t 2012-06-10 17:15:24 +0000
@@ -25,7 +25,7 @@
25 plan skip_all => 'Cannot connect to sandbox master';25 plan skip_all => 'Cannot connect to sandbox master';
26}26}
27else {27else {
28 plan tests => 11;28 plan tests => 14;
29}29}
3030
31# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic31# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
@@ -158,6 +158,50 @@
158);158);
159159
160# #############################################################################160# #############################################################################
161# PK but bad explain plan.
162# https://bugs.launchpad.net/percona-toolkit/+bug/1010232
163# #############################################################################
164$sb->load_file('master', "t/pt-table-checksum/samples/bad-plan-bug-1010232.sql");
165PerconaTest::wait_for_table($dbh, "bad_plan.t", "(c1,c2,c3,c4)=(1,1,2,100)");
166
167$output = output(sub {
168 $exit_status = pt_table_checksum::main(
169 $master_dsn, '--max-load', '',
170 qw(--lock-wait-timeout 3 --chunk-size 10 -t bad_plan.t)
171 ) },
172 stderr => 1,
173);
174
175is(
176 $exit_status,
177 0,
178 "Bad key_len chunks are not errors"
179);
180
181cmp_ok(
182 PerconaTest::count_checksum_results($output, 'skipped'),
183 '>',
184 1,
185 "Skipped bad key_len chunks"
186);
187
188# Use --chunk-index:3 to use only the first 3 left-most columns of the index.
189# Can't use bad_plan.t, however, because its row are almost all identical,
190# so using 3 of 4 pk cols creates an infinite loop.
191ok(
192 no_diff(
193 sub {
194 pt_table_checksum::main(
195 $master_dsn, '--max-load', '',
196 qw(--lock-wait-timeout 3 --chunk-size 5000 -t sakila.rental),
197 qw(--chunk-index rental_date:2 --explain --explain));
198 },
199 "t/pt-table-checksum/samples/n-chunk-index-cols.txt",
200 ),
201 "--chunk-index index:n"
202);
203
204# #############################################################################
161# Done.205# Done.
162# #############################################################################206# #############################################################################
163$sb->wipe_clean($dbh);207$sb->wipe_clean($dbh);
164208
=== added file 't/pt-table-checksum/samples/bad-plan-bug-1010232.sql'
--- t/pt-table-checksum/samples/bad-plan-bug-1010232.sql 1970-01-01 00:00:00 +0000
+++ t/pt-table-checksum/samples/bad-plan-bug-1010232.sql 2012-06-10 17:15:24 +0000
@@ -0,0 +1,17 @@
1DROP DATABASE IF EXISTS bad_plan;
2CREATE DATABASE bad_plan;
3USE bad_plan;
4
5CREATE TABLE t (
6 `c1` smallint(5) unsigned NOT NULL,
7 `c2` mediumint(8) unsigned NOT NULL DEFAULT '0',
8 `c3` smallint(5) unsigned NOT NULL DEFAULT '0',
9 `c4` smallint(5) unsigned NOT NULL DEFAULT '0',
10 PRIMARY KEY (`c1`,`c2`,`c3`,`c4`)
11) ENGINE=InnoDB;
12
13INSERT INTO t VALUES
14(1,1,1,1),(1,1,1,2),(1,1,1,3),(1,1,1,4),(1,1,1,5),(1,1,1,6),(1,1,1,7),(1,1,1,8),(1,1,1,9),(1,1,1,10),(1,1,1,11),(1,1,1,12),(1,1,1,13),(1,1,1,14),(1,1,1,15),(1,1,1,16),(1,1,1,17),(1,1,1,18),(1,1,1,19),(1,1,1,20),(1,1,1,21),(1,1,1,22),(1,1,1,23),(1,1,1,24),(1,1,1,25),(1,1,1,26),(1,1,1,27),(1,1,1,28),(1,1,1,29),(1,1,1,30),(1,1,1,31),(1,1,1,32),(1,1,1,33),(1,1,1,34),(1,1,1,35),(1,1,1,36),(1,1,1,37),(1,1,1,38),(1,1,1,39),(1,1,1,40),(1,1,1,41),(1,1,1,42),(1,1,1,43),(1,1,1,44),(1,1,1,45),(1,1,1,46),(1,1,1,47),(1,1,1,48),(1,1,1,49),(1,1,1,50),(1,1,1,51),(1,1,1,52),(1,1,1,53),(1,1,1,54),(1,1,1,55),(1,1,1,56),(1,1,1,57),(1,1,1,58),(1,1,1,59),(1,1,1,60),(1,1,1,61),(1,1,1,62),(1,1,1,63),(1,1,1,64),(1,1,1,65),(1,1,1,66),(1,1,1,67),(1,1,1,68),(1,1,1,69),(1,1,1,70),(1,1,1,71),(1,1,1,72),(1,1,1,73),(1,1,1,74),(1,1,1,75),(1,1,1,76),(1,1,1,77),(1,1,1,78),(1,1,1,79),(1,1,1,80),(1,1,1,81),(1,1,1,82),(1,1,1,83),(1,1,1,84),(1,1,1,85),(1,1,1,86),(1,1,1,87),(1,1,1,88),(1,1,1,89),(1,1,1,90),(1,1,1,91),(1,1,1,92),(1,1,1,93),(1,1,1,94),(1,1,1,95),(1,1,1,96),(1,1,1,97),(1,1,1,98),(1,1,1,99),(1,1,1,100),
15(1,1,2,1),(1,1,2,2),(1,1,2,3),(1,1,2,4),(1,1,2,5),(1,1,2,6),(1,1,2,7),(1,1,2,8),(1,1,2,9),(1,1,2,10),(1,1,2,11),(1,1,2,12),(1,1,2,13),(1,1,2,14),(1,1,2,15),(1,1,2,16),(1,1,2,17),(1,1,2,18),(1,1,2,19),(1,1,2,20),(1,1,2,21),(1,1,2,22),(1,1,2,23),(1,1,2,24),(1,1,2,25),(1,1,2,26),(1,1,2,27),(1,1,2,28),(1,1,2,29),(1,1,2,30),(1,1,2,31),(1,1,2,32),(1,1,2,33),(1,1,2,34),(1,1,2,35),(1,1,2,36),(1,1,2,37),(1,1,2,38),(1,1,2,39),(1,1,2,40),(1,1,2,41),(1,1,2,42),(1,1,2,43),(1,1,2,44),(1,1,2,45),(1,1,2,46),(1,1,2,47),(1,1,2,48),(1,1,2,49),(1,1,2,50),(1,1,2,51),(1,1,2,52),(1,1,2,53),(1,1,2,54),(1,1,2,55),(1,1,2,56),(1,1,2,57),(1,1,2,58),(1,1,2,59),(1,1,2,60),(1,1,2,61),(1,1,2,62),(1,1,2,63),(1,1,2,64),(1,1,2,65),(1,1,2,66),(1,1,2,67),(1,1,2,68),(1,1,2,69),(1,1,2,70),(1,1,2,71),(1,1,2,72),(1,1,2,73),(1,1,2,74),(1,1,2,75),(1,1,2,76),(1,1,2,77),(1,1,2,78),(1,1,2,79),(1,1,2,80),(1,1,2,81),(1,1,2,82),(1,1,2,83),(1,1,2,84),(1,1,2,85),(1,1,2,86),(1,1,2,87),(1,1,2,88),(1,1,2,89),(1,1,2,90),(1,1,2,91),(1,1,2,92),(1,1,2,93),(1,1,2,94),(1,1,2,95),(1,1,2,96),(1,1,2,97),(1,1,2,98),(1,1,2,99),(1,1,2,100);
16
17ANALYZE TABLE bad_plan.t;
018
=== added file 't/pt-table-checksum/samples/n-chunk-index-cols.txt'
--- t/pt-table-checksum/samples/n-chunk-index-cols.txt 1970-01-01 00:00:00 +0000
+++ t/pt-table-checksum/samples/n-chunk-index-cols.txt 2012-06-10 17:15:24 +0000
@@ -0,0 +1,19 @@
1--
2-- sakila.rental
3--
4
5REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `rental_id`, `rental_date`, `inventory_id`, `customer_id`, `return_date`, `staff_id`, `last_update` + 0, CONCAT(ISNULL(`return_date`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?)) AND ((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` <= ?)) /*checksum chunk*/
6
7REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` < ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` /*past lower chunk*/
8
9REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` > ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` /*past upper chunk*/
10
11SELECT /*!40001 SQL_NO_CACHE */ `rental_date`, `rental_date`, `inventory_id` FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` LIMIT ?, 2 /*next chunk boundary*/
12
131 2005-05-24 22:53:30,2005-05-24 22:53:30,367 2005-07-09 01:17:08,2005-07-09 01:17:08,2388
142 2005-07-09 01:19:03,2005-07-09 01:19:03,3438 2005-07-31 17:51:23,2005-07-31 17:51:23,2396
153 2005-07-31 17:53:51,2005-07-31 17:53:51,928 2005-08-22 13:58:23,2005-08-22 13:58:23,2553
164 2005-08-22 13:59:19,2005-08-22 13:59:19,3704 2006-02-14 15:16:03,2006-02-14 15:16:03,4568
175 2005-05-24 22:53:30,2005-05-24 22:53:30,367
186 2006-02-14 15:16:03,2006-02-14 15:16:03,4568
19

Subscribers

People subscribed via source and target branches