Merge lp:~percona-toolkit-dev/percona-toolkit/explain-checksum-chunks into lp:percona-toolkit/2.1
- explain-checksum-chunks
- Merge into 2.1
Proposed by
Daniel Nichter
Status: | Merged |
---|---|
Merged at revision: | 279 |
Proposed branch: | lp:~percona-toolkit-dev/percona-toolkit/explain-checksum-chunks |
Merge into: | lp:percona-toolkit/2.1 |
Diff against target: |
1357 lines (+767/-194) 8 files modified
bin/pt-online-schema-change (+304/-86) bin/pt-table-checksum (+323/-85) lib/NibbleIterator.pm (+24/-20) lib/TableNibbler.pm (+6/-1) t/lib/TableNibbler.t (+29/-1) t/pt-table-checksum/chunk_index.t (+45/-1) t/pt-table-checksum/samples/bad-plan-bug-1010232.sql (+17/-0) t/pt-table-checksum/samples/n-chunk-index-cols.txt (+19/-0) |
To merge this branch: | bzr merge lp:~percona-toolkit-dev/percona-toolkit/explain-checksum-chunks |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Baron Schwartz (community) | Approve | ||
Brian Fraser (community) | Approve | ||
Daniel Nichter | Approve | ||
Review via email: mp+109530@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) : | # |
review:
Approve
- 286. By Daniel Nichter
-
Fix docs about skipping chunks with bad plans.
Revision history for this message
Brian Fraser (fraserbn) : | # |
review:
Approve
- 287. By Daniel Nichter
-
Clarify --chunk-index name:n validation.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'bin/pt-online-schema-change' |
2 | --- bin/pt-online-schema-change 2012-05-30 17:27:07 +0000 |
3 | +++ bin/pt-online-schema-change 2012-06-10 17:15:24 +0000 |
4 | @@ -1828,8 +1828,12 @@ |
5 | |
6 | my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}}; |
7 | if ( $args{asc_first} ) { |
8 | + PTDEBUG && _d('Ascending only first column'); |
9 | @asc_cols = $asc_cols[0]; |
10 | - PTDEBUG && _d('Ascending only first column'); |
11 | + } |
12 | + elsif ( my $n = $args{n_index_cols} ) { |
13 | + PTDEBUG && _d('Ascending only first', $n, 'columns'); |
14 | + @asc_cols = @asc_cols[0..($n-1)]; |
15 | } |
16 | PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols)); |
17 | |
18 | @@ -4029,10 +4033,11 @@ |
19 | |
20 | my $asc = $args{TableNibbler}->generate_asc_stmt( |
21 | %args, |
22 | - tbl_struct => $tbl->{tbl_struct}, |
23 | - index => $index, |
24 | - cols => \@cols, |
25 | - asc_only => 1, |
26 | + tbl_struct => $tbl->{tbl_struct}, |
27 | + index => $index, |
28 | + n_index_cols => $args{n_chunk_index_cols}, |
29 | + cols => \@cols, |
30 | + asc_only => 1, |
31 | ); |
32 | PTDEBUG && _d('Ascend params:', Dumper($asc)); |
33 | |
34 | @@ -4114,16 +4119,17 @@ |
35 | |
36 | $self = { |
37 | %args, |
38 | - index => $index, |
39 | - limit => $limit, |
40 | - first_lb_sql => $first_lb_sql, |
41 | - last_ub_sql => $last_ub_sql, |
42 | - ub_sql => $ub_sql, |
43 | - nibble_sql => $nibble_sql, |
44 | - explain_ub_sql => "EXPLAIN $ub_sql", |
45 | - explain_nibble_sql => $explain_nibble_sql, |
46 | - resume_lb_sql => $resume_lb_sql, |
47 | - sql => { |
48 | + index => $index, |
49 | + limit => $limit, |
50 | + first_lb_sql => $first_lb_sql, |
51 | + last_ub_sql => $last_ub_sql, |
52 | + ub_sql => $ub_sql, |
53 | + nibble_sql => $nibble_sql, |
54 | + explain_first_lb_sql => "EXPLAIN $first_lb_sql", |
55 | + explain_ub_sql => "EXPLAIN $ub_sql", |
56 | + explain_nibble_sql => $explain_nibble_sql, |
57 | + resume_lb_sql => $resume_lb_sql, |
58 | + sql => { |
59 | columns => $asc->{scols}, |
60 | from => $from, |
61 | where => $where, |
62 | @@ -4231,10 +4237,11 @@ |
63 | sub statements { |
64 | my ($self) = @_; |
65 | return { |
66 | - nibble => $self->{nibble_sth}, |
67 | - explain_nibble => $self->{explain_nibble_sth}, |
68 | - upper_boundary => $self->{ub_sth}, |
69 | - explain_upper_boundary => $self->{explain_ub_sth}, |
70 | + explain_first_lower_boundary => $self->{explain_first_lb_sth}, |
71 | + nibble => $self->{nibble_sth}, |
72 | + explain_nibble => $self->{explain_nibble_sth}, |
73 | + upper_boundary => $self->{ub_sth}, |
74 | + explain_upper_boundary => $self->{explain_ub_sth}, |
75 | } |
76 | } |
77 | |
78 | @@ -4463,8 +4470,9 @@ |
79 | $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql}); |
80 | |
81 | if ( !$self->{one_nibble} ) { |
82 | - $self->{ub_sth} = $dbh->prepare($self->{ub_sql}); |
83 | - $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql}); |
84 | + $self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql}); |
85 | + $self->{ub_sth} = $dbh->prepare($self->{ub_sql}); |
86 | + $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql}); |
87 | } |
88 | |
89 | return; |
90 | @@ -4652,6 +4660,7 @@ |
91 | |
92 | use Time::Local qw(timegm timelocal); |
93 | use Digest::MD5 qw(md5_hex); |
94 | +use B qw(); |
95 | |
96 | require Exporter; |
97 | our @ISA = qw(Exporter); |
98 | @@ -4669,6 +4678,7 @@ |
99 | any_unix_timestamp |
100 | make_checksum |
101 | crc32 |
102 | + encode_json |
103 | ); |
104 | |
105 | our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/; |
106 | @@ -4876,6 +4886,96 @@ |
107 | return $crc ^ 0xFFFFFFFF; |
108 | } |
109 | |
110 | +my $got_json = eval { require JSON }; |
111 | +sub encode_json { |
112 | + return JSON::encode_json(@_) if $got_json; |
113 | + my ( $data ) = @_; |
114 | + return (object_to_json($data) || ''); |
115 | +} |
116 | + |
117 | + |
118 | +sub object_to_json { |
119 | + my ($obj) = @_; |
120 | + my $type = ref($obj); |
121 | + |
122 | + if($type eq 'HASH'){ |
123 | + return hash_to_json($obj); |
124 | + } |
125 | + elsif($type eq 'ARRAY'){ |
126 | + return array_to_json($obj); |
127 | + } |
128 | + else { |
129 | + return value_to_json($obj); |
130 | + } |
131 | +} |
132 | + |
133 | +sub hash_to_json { |
134 | + my ($obj) = @_; |
135 | + my @res; |
136 | + for my $k ( sort { $a cmp $b } keys %$obj ) { |
137 | + push @res, string_to_json( $k ) |
138 | + . ":" |
139 | + . ( object_to_json( $obj->{$k} ) || value_to_json( $obj->{$k} ) ); |
140 | + } |
141 | + return '{' . ( @res ? join( ",", @res ) : '' ) . '}'; |
142 | +} |
143 | + |
144 | +sub array_to_json { |
145 | + my ($obj) = @_; |
146 | + my @res; |
147 | + |
148 | + for my $v (@$obj) { |
149 | + push @res, object_to_json($v) || value_to_json($v); |
150 | + } |
151 | + |
152 | + return '[' . ( @res ? join( ",", @res ) : '' ) . ']'; |
153 | +} |
154 | + |
155 | +sub value_to_json { |
156 | + my ($value) = @_; |
157 | + |
158 | + return 'null' if(!defined $value); |
159 | + |
160 | + my $b_obj = B::svref_2object(\$value); # for round trip problem |
161 | + my $flags = $b_obj->FLAGS; |
162 | + return $value # as is |
163 | + if $flags & ( B::SVp_IOK | B::SVp_NOK ) and !( $flags & B::SVp_POK ); # SvTYPE is IV or NV? |
164 | + |
165 | + my $type = ref($value); |
166 | + |
167 | + if( !$type ) { |
168 | + return string_to_json($value); |
169 | + } |
170 | + else { |
171 | + return 'null'; |
172 | + } |
173 | + |
174 | +} |
175 | + |
176 | +my %esc = ( |
177 | + "\n" => '\n', |
178 | + "\r" => '\r', |
179 | + "\t" => '\t', |
180 | + "\f" => '\f', |
181 | + "\b" => '\b', |
182 | + "\"" => '\"', |
183 | + "\\" => '\\\\', |
184 | + "\'" => '\\\'', |
185 | +); |
186 | + |
187 | +sub string_to_json { |
188 | + my ($arg) = @_; |
189 | + |
190 | + $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g; |
191 | + $arg =~ s/\//\\\//g; |
192 | + $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg; |
193 | + |
194 | + utf8::upgrade($arg); |
195 | + utf8::encode($arg); |
196 | + |
197 | + return '"' . $arg . '"'; |
198 | +} |
199 | + |
200 | sub _d { |
201 | my ($package, undef, $line) = caller 0; |
202 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
203 | @@ -5027,6 +5127,22 @@ |
204 | } |
205 | } |
206 | |
207 | + # Parse --chunk-index INDEX:N where N is the number of |
208 | + # left-most columns of INDEX to use. |
209 | + # https://bugs.launchpad.net/percona-toolkit/+bug/1010232 |
210 | + my ($chunk_index, $n_chunk_index_cols) |
211 | + = split(':', $o->get('chunk-index') || ''); |
212 | + if ( defined $chunk_index && $chunk_index eq '' ) { |
213 | + $o->save_error('--chunk-index cannot be an empty string'); |
214 | + } |
215 | + if ( defined $n_chunk_index_cols |
216 | + && (!$n_chunk_index_cols |
217 | + || $n_chunk_index_cols =~ m/\D/ |
218 | + || $n_chunk_index_cols < 1) ) { |
219 | + $o->save_error('Invalid number of --chunk-index columns: ' |
220 | + . $n_chunk_index_cols); |
221 | + } |
222 | + |
223 | if ( !$o->get('help') ) { |
224 | if ( @ARGV ) { |
225 | $o->save_error('Specify only one DSN on the command line'); |
226 | @@ -5641,6 +5757,7 @@ |
227 | my (%args) = @_; |
228 | my $tbl = $args{tbl}; |
229 | my $nibble_iter = $args{NibbleIterator}; |
230 | + my $statements = $nibble_iter->statements(); |
231 | |
232 | if ( $o->get('dry-run') ) { |
233 | print "Not copying rows because this is a dry run.\n"; |
234 | @@ -5652,7 +5769,6 @@ |
235 | |
236 | if ( $o->get('print') ) { |
237 | # Print the checksum and next boundary statements. |
238 | - my $statements = $nibble_iter->statements(); |
239 | foreach my $sth ( sort keys %$statements ) { |
240 | next if $sth =~ m/^explain/; |
241 | if ( $statements->{$sth} ) { |
242 | @@ -5696,6 +5812,34 @@ |
243 | die $msg; |
244 | } |
245 | } |
246 | + else { # chunking the table |
247 | + if ( $o->get('check-plan') ) { |
248 | + my $expl = explain_statement( |
249 | + sth => $statements->{explain_first_lower_boundary}, |
250 | + tbl => $tbl, |
251 | + vals => [], |
252 | + ); |
253 | + if ( !$expl->{key} |
254 | + || lc($expl->{key}) ne lc($nibble_iter->nibble_index()) ) |
255 | + { |
256 | + die "Cannot determine the key_len of the chunk index " |
257 | + . "because MySQL chose " |
258 | + . ($expl->{key} ? "the $expl->{key}" : "no") . " index " |
259 | + . "instead of the " . $nibble_iter->nibble_index() |
260 | + . " index for the first lower boundary statement. " |
261 | + . "See --[no]check-plan in the documentation for more " |
262 | + . "information."; |
263 | + } |
264 | + elsif ( !$expl->{key_len} ) { |
265 | + die "The key_len of the $expl->{key} index is " |
266 | + . (defined $expl->{key_len} ? "zero" : "NULL") |
267 | + . ", but this should not be possible. " |
268 | + . "See --[no]check-plan in the documentation for more " |
269 | + . "information."; |
270 | + } |
271 | + $tbl->{key_len} = $expl->{key_len}; |
272 | + } |
273 | + } |
274 | |
275 | return 1; # continue nibbling table |
276 | }, |
277 | @@ -5755,58 +5899,11 @@ |
278 | # Count every chunk, even if it's ultimately skipped, etc. |
279 | $tbl->{results}->{n_chunks}++; |
280 | |
281 | - # If the table is being chunk (i.e., it's not small enough to be |
282 | - # consumed by one nibble), then check index usage and chunk size. |
283 | - if ( !$nibble_iter->one_nibble() ) { |
284 | - my $expl = explain_statement( |
285 | - tbl => $tbl, |
286 | - sth => $sth->{explain_nibble}, |
287 | - vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ], |
288 | - ); |
289 | - |
290 | - # Ensure that MySQL is using the chunk index. |
291 | - if ( lc($expl->{key} || '') |
292 | - ne lc($nibble_iter->nibble_index() || '') ) { |
293 | - my $msg |
294 | - = "Aborting copying table $tbl->{name} at chunk " |
295 | - . $nibble_iter->nibble_number() |
296 | - . " because it is not safe to chunk. Chunking should " |
297 | - . "use the " |
298 | - . ($nibble_iter->nibble_index() || '?') |
299 | - . " index, but MySQL EXPLAIN reports that " |
300 | - . ($expl->{key} ? "the $expl->{key}" : "no") |
301 | - . " index will be used for " |
302 | - . $sth->{explain_nibble}->{Statement} |
303 | - . " with values " |
304 | - . join(", ", map { defined $_ ? $_ : "NULL" } |
305 | - (@{$boundary->{lower}}, @{$boundary->{upper}})) |
306 | - . "\n"; |
307 | - die $msg; |
308 | - } |
309 | - |
310 | - # Check chunk size limit if the upper boundary and next lower |
311 | - # boundary are identical. |
312 | - if ( $limit ) { |
313 | - my $boundary = $nibble_iter->boundaries(); |
314 | - my $oversize_chunk |
315 | - = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit |
316 | - : 0; |
317 | - if ( $oversize_chunk |
318 | - && $nibble_iter->identical_boundaries( |
319 | - $boundary->{upper}, $boundary->{next_lower}) ) |
320 | - { |
321 | - my $msg |
322 | - = "Aborting copying table $tbl->{name} at chunk " |
323 | - . $nibble_iter->nibble_number() |
324 | - . " because the chunk is too large: MySQL estimates " |
325 | - . ($expl->{rows} || 0) . "rows. The current chunk " |
326 | - . "size limit is " . ($tbl->{chunk_size} * $limit) |
327 | - . " rows (chunk size=$tbl->{chunk_size}" |
328 | - . " * chunk size limit=$limit).\n"; |
329 | - die $msg; |
330 | - } |
331 | - } |
332 | - } |
333 | + # Die unless the nibble is safe. |
334 | + nibble_is_safe( |
335 | + %args, |
336 | + OptionParser => $o, |
337 | + ); |
338 | |
339 | # Exec and time the chunk checksum query. |
340 | $tbl->{nibble_time} = exec_nibble( |
341 | @@ -5909,18 +6006,19 @@ |
342 | # This won't (shouldn't) fail because we already verified in |
343 | # check_orig_table() table we can NibbleIterator::can_nibble(). |
344 | my $nibble_iter = new NibbleIterator( |
345 | - Cxn => $cxn, |
346 | - tbl => $orig_tbl, |
347 | - chunk_size => $orig_tbl->{chunk_size}, |
348 | - chunk_index => $o->get('chunk-index'), |
349 | - dml => $dml, |
350 | - select => $select, |
351 | - callbacks => $callbacks, |
352 | - OptionParser => $o, |
353 | - Quoter => $q, |
354 | - TableParser => $tp, |
355 | - TableNibbler => new TableNibbler(TableParser => $tp, Quoter => $q), |
356 | - comments => { |
357 | + Cxn => $cxn, |
358 | + tbl => $orig_tbl, |
359 | + chunk_size => $orig_tbl->{chunk_size}, |
360 | + chunk_index => $chunk_index, |
361 | + n_chunk_index_cols => $n_chunk_index_cols, |
362 | + dml => $dml, |
363 | + select => $select, |
364 | + callbacks => $callbacks, |
365 | + OptionParser => $o, |
366 | + Quoter => $q, |
367 | + TableParser => $tp, |
368 | + TableNibbler => new TableNibbler(TableParser => $tp, Quoter => $q), |
369 | + comments => { |
370 | bite => "pt-online-schema-change $PID copy table", |
371 | nibble => "pt-online-schema-change $PID copy nibble", |
372 | }, |
373 | @@ -6110,6 +6208,82 @@ |
374 | # ############################################################################ |
375 | # Subroutines. |
376 | # ############################################################################ |
377 | + |
378 | +sub nibble_is_safe { |
379 | + my (%args) = @_; |
380 | + my @required_args = qw(Cxn tbl NibbleIterator OptionParser); |
381 | + foreach my $arg ( @required_args ) { |
382 | + die "I need a $arg argument" unless $args{$arg}; |
383 | + } |
384 | + my ($cxn, $tbl, $nibble_iter, $o)= @args{@required_args}; |
385 | + |
386 | + # EXPLAIN the checksum chunk query to get its row estimate and index. |
387 | + # XXX This call and others like it are relying on a Perl oddity. |
388 | + # See https://bugs.launchpad.net/percona-toolkit/+bug/987393 |
389 | + my $sth = $nibble_iter->statements(); |
390 | + my $boundary = $nibble_iter->boundaries(); |
391 | + my $expl = explain_statement( |
392 | + tbl => $tbl, |
393 | + sth => $sth->{explain_nibble}, |
394 | + vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ], |
395 | + ); |
396 | + |
397 | + # Ensure that MySQL is using the chunk index if the table is being chunked. |
398 | + if ( !$nibble_iter->one_nibble() |
399 | + && lc($expl->{key} || '') ne lc($nibble_iter->nibble_index() || '') ) { |
400 | + if ( !$tbl->{warned}->{not_using_chunk_index}++ |
401 | + && $o->get('quiet') < 2 ) { |
402 | + die "Error copying rows at chunk " . $nibble_iter->nibble_number() |
403 | + . " of $tbl->{db}.$tbl->{tbl} because MySQL chose " |
404 | + . ($expl->{key} ? "the $expl->{key}" : "no") . " index " |
405 | + . " instead of the " . $nibble_iter->nibble_index() . "index.\n"; |
406 | + } |
407 | + } |
408 | + |
409 | + # Ensure that the chunk isn't too large if there's a --chunk-size-limit. |
410 | + # If single-chunking the table, this has already been checked, so it |
411 | + # shouldn't have changed. If chunking the table with a non-unique key, |
412 | + # oversize chunks are possible. |
413 | + if ( my $limit = $o->get('chunk-size-limit') ) { |
414 | + my $oversize_chunk |
415 | + = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit |
416 | + : 0; |
417 | + if ( $oversize_chunk |
418 | + && $nibble_iter->identical_boundaries($boundary->{upper}, |
419 | + $boundary->{next_lower}) ) { |
420 | + if ( !$tbl->{warned}->{oversize_chunk}++ |
421 | + && $o->get('quiet') < 2 ) { |
422 | + die "Error copying rows at chunk " . $nibble_iter->nibble_number() |
423 | + . " of $tbl->{db}.$tbl->{tbl} because it is oversized. " |
424 | + . "The current chunk size limit is " |
425 | + . ($tbl->{chunk_size} * $limit) |
426 | + . " rows (chunk size=$tbl->{chunk_size}" |
427 | + . " * chunk size limit=$limit), but MySQL estimates " |
428 | + . "that there are " . ($expl->{rows} || 0) |
429 | + . " rows in the chunk.\n"; |
430 | + } |
431 | + } |
432 | + } |
433 | + |
434 | + # Ensure that MySQL is still using the entire index. |
435 | + # https://bugs.launchpad.net/percona-toolkit/+bug/1010232 |
436 | + if ( !$nibble_iter->one_nibble() |
437 | + && $tbl->{key_len} |
438 | + && ($expl->{key_len} || 0) < $tbl->{key_len} ) { |
439 | + if ( !$tbl->{warned}->{key_len}++ |
440 | + && $o->get('quiet') < 2 ) { |
441 | + die "Error copying rows at chunk " . $nibble_iter->nibble_number() |
442 | + . " of $tbl->{db}.$tbl->{tbl} because MySQL used " |
443 | + . "only " . ($expl->{key_len} || 0) . " bytes " |
444 | + . "of the " . ($expl->{key} || '?') . " index instead of " |
445 | + . $tbl->{key_len} . ". See the --[no]check-plan documentation " |
446 | + . "for more information.\n"; |
447 | + } |
448 | + } |
449 | + |
450 | + return 1; # safe |
451 | +} |
452 | + |
453 | sub create_new_table{ |
454 | my (%args) = @_; |
455 | my @required_args = qw(orig_tbl Cxn Quoter OptionParser TableParser); |
456 | @@ -7109,6 +7283,39 @@ |
457 | |
458 | Sleep time between checks for L<"--max-lag">. |
459 | |
460 | +=item --[no]check-plan |
461 | + |
462 | +default: yes |
463 | + |
464 | +Check query execution plans for safety. By default, this option causes |
465 | +the tool to run EXPLAIN before running queries that are meant to access |
466 | +a small amount of data, but which could access many rows if MySQL chooses a bad |
467 | +execution plan. These include the queries to determine chunk boundaries and the |
468 | +chunk queries themselves. If it appears that MySQL will use a bad query |
469 | +execution plan, the tool will skip the chunk of the table. |
470 | + |
471 | +The tool uses several heuristics to determine whether an execution plan is bad. |
472 | +The first is whether EXPLAIN reports that MySQL intends to use the desired index |
473 | +to access the rows. If MySQL chooses a different index, the tool considers the |
474 | +query unsafe. |
475 | + |
476 | +The tool also checks how much of the index MySQL reports that it will use for |
477 | +the query. The EXPLAIN output shows this in the key_len column. The tool |
478 | +remembers the largest key_len seen, and skips chunks where MySQL reports that it |
479 | +will use a smaller prefix of the index. This heuristic can be understood as |
480 | +skipping chunks that have a worse execution plan than other chunks. |
481 | + |
482 | +The tool prints a warning the first time a chunk is skipped due to |
483 | +a bad execution plan in each table. Subsequent chunks are skipped silently, |
484 | +although you can see the count of skipped chunks in the SKIPPED column in |
485 | +the tool's output. |
486 | + |
487 | +This option adds some setup work to each table and chunk. Although the work is |
488 | +not intrusive for MySQL, it results in more round-trips to the server, which |
489 | +consumes time. Making chunks too small will cause the overhead to become |
490 | +relatively larger. It is therefore recommended that you not make chunks too |
491 | +small, because the tool may take a very long time to complete if you do. |
492 | + |
493 | =item --[no]check-replication-filters |
494 | |
495 | default: yes |
496 | @@ -7145,6 +7352,17 @@ |
497 | a C<FORCE INDEX> clause. Be careful when using this option; a poor choice of |
498 | index could cause bad performance. |
499 | |
500 | +This option supports a special syntax to select a prefix of the index instead of |
501 | +the entire index. The syntax is NAME:N, where NAME is the name of the index, and |
502 | +N is the number of columns you wish to use. This works only for compound |
503 | +indexes, and is useful in cases where a bug in the MySQL query optimizer |
504 | +(planner) causes it to scan a large range of rows instead of using the index to |
505 | +locate starting and ending points precisely. This problem sometimes occurs on |
506 | +indexes with many columns, such as 4 or more. If this happens, the tool might |
507 | +print a warning related to the L<"--[no]check-plan"> option. Instructing |
508 | +the tool to use only the first N columns from the index is a workaround for |
509 | +the bug in some cases. |
510 | + |
511 | =item --chunk-size |
512 | |
513 | type: size; default: 1000 |
514 | |
515 | === modified file 'bin/pt-table-checksum' |
516 | --- bin/pt-table-checksum 2012-05-25 21:34:58 +0000 |
517 | +++ bin/pt-table-checksum 2012-06-10 17:15:24 +0000 |
518 | @@ -2208,8 +2208,12 @@ |
519 | |
520 | my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}}; |
521 | if ( $args{asc_first} ) { |
522 | + PTDEBUG && _d('Ascending only first column'); |
523 | @asc_cols = $asc_cols[0]; |
524 | - PTDEBUG && _d('Ascending only first column'); |
525 | + } |
526 | + elsif ( my $n = $args{n_index_cols} ) { |
527 | + PTDEBUG && _d('Ascending only first', $n, 'columns'); |
528 | + @asc_cols = @asc_cols[0..($n-1)]; |
529 | } |
530 | PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols)); |
531 | |
532 | @@ -3594,10 +3598,11 @@ |
533 | |
534 | my $asc = $args{TableNibbler}->generate_asc_stmt( |
535 | %args, |
536 | - tbl_struct => $tbl->{tbl_struct}, |
537 | - index => $index, |
538 | - cols => \@cols, |
539 | - asc_only => 1, |
540 | + tbl_struct => $tbl->{tbl_struct}, |
541 | + index => $index, |
542 | + n_index_cols => $args{n_chunk_index_cols}, |
543 | + cols => \@cols, |
544 | + asc_only => 1, |
545 | ); |
546 | PTDEBUG && _d('Ascend params:', Dumper($asc)); |
547 | |
548 | @@ -3679,16 +3684,17 @@ |
549 | |
550 | $self = { |
551 | %args, |
552 | - index => $index, |
553 | - limit => $limit, |
554 | - first_lb_sql => $first_lb_sql, |
555 | - last_ub_sql => $last_ub_sql, |
556 | - ub_sql => $ub_sql, |
557 | - nibble_sql => $nibble_sql, |
558 | - explain_ub_sql => "EXPLAIN $ub_sql", |
559 | - explain_nibble_sql => $explain_nibble_sql, |
560 | - resume_lb_sql => $resume_lb_sql, |
561 | - sql => { |
562 | + index => $index, |
563 | + limit => $limit, |
564 | + first_lb_sql => $first_lb_sql, |
565 | + last_ub_sql => $last_ub_sql, |
566 | + ub_sql => $ub_sql, |
567 | + nibble_sql => $nibble_sql, |
568 | + explain_first_lb_sql => "EXPLAIN $first_lb_sql", |
569 | + explain_ub_sql => "EXPLAIN $ub_sql", |
570 | + explain_nibble_sql => $explain_nibble_sql, |
571 | + resume_lb_sql => $resume_lb_sql, |
572 | + sql => { |
573 | columns => $asc->{scols}, |
574 | from => $from, |
575 | where => $where, |
576 | @@ -3796,10 +3802,11 @@ |
577 | sub statements { |
578 | my ($self) = @_; |
579 | return { |
580 | - nibble => $self->{nibble_sth}, |
581 | - explain_nibble => $self->{explain_nibble_sth}, |
582 | - upper_boundary => $self->{ub_sth}, |
583 | - explain_upper_boundary => $self->{explain_ub_sth}, |
584 | + explain_first_lower_boundary => $self->{explain_first_lb_sth}, |
585 | + nibble => $self->{nibble_sth}, |
586 | + explain_nibble => $self->{explain_nibble_sth}, |
587 | + upper_boundary => $self->{ub_sth}, |
588 | + explain_upper_boundary => $self->{explain_ub_sth}, |
589 | } |
590 | } |
591 | |
592 | @@ -4028,8 +4035,9 @@ |
593 | $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql}); |
594 | |
595 | if ( !$self->{one_nibble} ) { |
596 | - $self->{ub_sth} = $dbh->prepare($self->{ub_sql}); |
597 | - $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql}); |
598 | + $self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql}); |
599 | + $self->{ub_sth} = $dbh->prepare($self->{ub_sql}); |
600 | + $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql}); |
601 | } |
602 | |
603 | return; |
604 | @@ -5113,6 +5121,7 @@ |
605 | |
606 | use Time::Local qw(timegm timelocal); |
607 | use Digest::MD5 qw(md5_hex); |
608 | +use B qw(); |
609 | |
610 | require Exporter; |
611 | our @ISA = qw(Exporter); |
612 | @@ -5130,6 +5139,7 @@ |
613 | any_unix_timestamp |
614 | make_checksum |
615 | crc32 |
616 | + encode_json |
617 | ); |
618 | |
619 | our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/; |
620 | @@ -5337,6 +5347,96 @@ |
621 | return $crc ^ 0xFFFFFFFF; |
622 | } |
623 | |
624 | +my $got_json = eval { require JSON }; |
625 | +sub encode_json { |
626 | + return JSON::encode_json(@_) if $got_json; |
627 | + my ( $data ) = @_; |
628 | + return (object_to_json($data) || ''); |
629 | +} |
630 | + |
631 | + |
632 | +sub object_to_json { |
633 | + my ($obj) = @_; |
634 | + my $type = ref($obj); |
635 | + |
636 | + if($type eq 'HASH'){ |
637 | + return hash_to_json($obj); |
638 | + } |
639 | + elsif($type eq 'ARRAY'){ |
640 | + return array_to_json($obj); |
641 | + } |
642 | + else { |
643 | + return value_to_json($obj); |
644 | + } |
645 | +} |
646 | + |
647 | +sub hash_to_json { |
648 | + my ($obj) = @_; |
649 | + my @res; |
650 | + for my $k ( sort { $a cmp $b } keys %$obj ) { |
651 | + push @res, string_to_json( $k ) |
652 | + . ":" |
653 | + . ( object_to_json( $obj->{$k} ) || value_to_json( $obj->{$k} ) ); |
654 | + } |
655 | + return '{' . ( @res ? join( ",", @res ) : '' ) . '}'; |
656 | +} |
657 | + |
658 | +sub array_to_json { |
659 | + my ($obj) = @_; |
660 | + my @res; |
661 | + |
662 | + for my $v (@$obj) { |
663 | + push @res, object_to_json($v) || value_to_json($v); |
664 | + } |
665 | + |
666 | + return '[' . ( @res ? join( ",", @res ) : '' ) . ']'; |
667 | +} |
668 | + |
669 | +sub value_to_json { |
670 | + my ($value) = @_; |
671 | + |
672 | + return 'null' if(!defined $value); |
673 | + |
674 | + my $b_obj = B::svref_2object(\$value); # for round trip problem |
675 | + my $flags = $b_obj->FLAGS; |
676 | + return $value # as is |
677 | + if $flags & ( B::SVp_IOK | B::SVp_NOK ) and !( $flags & B::SVp_POK ); # SvTYPE is IV or NV? |
678 | + |
679 | + my $type = ref($value); |
680 | + |
681 | + if( !$type ) { |
682 | + return string_to_json($value); |
683 | + } |
684 | + else { |
685 | + return 'null'; |
686 | + } |
687 | + |
688 | +} |
689 | + |
690 | +my %esc = ( |
691 | + "\n" => '\n', |
692 | + "\r" => '\r', |
693 | + "\t" => '\t', |
694 | + "\f" => '\f', |
695 | + "\b" => '\b', |
696 | + "\"" => '\"', |
697 | + "\\" => '\\\\', |
698 | + "\'" => '\\\'', |
699 | +); |
700 | + |
701 | +sub string_to_json { |
702 | + my ($arg) = @_; |
703 | + |
704 | + $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g; |
705 | + $arg =~ s/\//\\\//g; |
706 | + $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg; |
707 | + |
708 | + utf8::upgrade($arg); |
709 | + utf8::encode($arg); |
710 | + |
711 | + return '"' . $arg . '"'; |
712 | +} |
713 | + |
714 | sub _d { |
715 | my ($package, undef, $line) = caller 0; |
716 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
717 | @@ -5988,6 +6088,22 @@ |
718 | } |
719 | } |
720 | |
721 | + # Parse --chunk-index INDEX:N where N is the number of |
722 | + # left-most columns of INDEX to use. |
723 | + # https://bugs.launchpad.net/percona-toolkit/+bug/1010232 |
724 | + my ($chunk_index, $n_chunk_index_cols) |
725 | + = split(':', $o->get('chunk-index') || ''); |
726 | + if ( defined $chunk_index && $chunk_index eq '' ) { |
727 | + $o->save_error('--chunk-index cannot be an empty string'); |
728 | + } |
729 | + if ( defined $n_chunk_index_cols |
730 | + && (!$n_chunk_index_cols |
731 | + || $n_chunk_index_cols =~ m/\D/ |
732 | + || $n_chunk_index_cols < 1) ) { |
733 | + $o->save_error('Invalid number of --chunk-index columns: ' |
734 | + . $n_chunk_index_cols); |
735 | + } |
736 | + |
737 | if ( !$o->get('help') ) { |
738 | if ( @ARGV > 1 ) { |
739 | $o->save_error("More than one host specified; only one allowed"); |
740 | @@ -6465,6 +6581,7 @@ |
741 | my (%args) = @_; |
742 | my $tbl = $args{tbl}; |
743 | my $nibble_iter = $args{NibbleIterator}; |
744 | + my $statements = $nibble_iter->statements(); |
745 | my $oktonibble = 1; |
746 | |
747 | if ( $last_chunk ) { # resuming |
748 | @@ -6493,7 +6610,7 @@ |
749 | print "--\n", |
750 | "-- $tbl->{db}.$tbl->{tbl}\n", |
751 | "--\n\n"; |
752 | - my $statements = $nibble_iter->statements(); |
753 | + |
754 | foreach my $sth ( sort keys %$statements ) { |
755 | next if $sth =~ m/^explain/; |
756 | if ( $statements->{$sth} ) { |
757 | @@ -6551,6 +6668,34 @@ |
758 | $oktonibble = 0; |
759 | } |
760 | } |
761 | + else { # chunking the table |
762 | + if ( $o->get('check-plan') ) { |
763 | + my $expl = explain_statement( |
764 | + sth => $statements->{explain_first_lower_boundary}, |
765 | + tbl => $tbl, |
766 | + vals => [], |
767 | + ); |
768 | + if ( !$expl->{key} |
769 | + || lc($expl->{key}) ne lc($nibble_iter->nibble_index()) ) |
770 | + { |
771 | + die "Cannot determine the key_len of the chunk index " |
772 | + . "because MySQL chose " |
773 | + . ($expl->{key} ? "the $expl->{key}" : "no") . " index " |
774 | + . "instead of the " . $nibble_iter->nibble_index() |
775 | + . " index for the first lower boundary statement. " |
776 | + . "See --[no]check-plan in the documentation for more " |
777 | + . "information."; |
778 | + } |
779 | + elsif ( !$expl->{key_len} ) { |
780 | + die "The key_len of the $expl->{key} index is " |
781 | + . (defined $expl->{key_len} ? "zero" : "NULL") |
782 | + . ", but this should not be possible. " |
783 | + . "See --[no]check-plan in the documentation for more " |
784 | + . "information."; |
785 | + } |
786 | + $tbl->{key_len} = $expl->{key_len}; |
787 | + } |
788 | + } |
789 | |
790 | if ( $oktonibble && $o->get('empty-replicate-table') ) { |
791 | use_repl_db( |
792 | @@ -6604,16 +6749,14 @@ |
793 | ne lc($nibble_iter->nibble_index() || '') ) { |
794 | PTDEBUG && _d('Cannot nibble next chunk, aborting table'); |
795 | if ( $o->get('quiet') < 2 ) { |
796 | - my $msg |
797 | - = "Aborting table $tbl->{db}.$tbl->{tbl} at chunk " |
798 | + warn ts("Aborting table $tbl->{db}.$tbl->{tbl} at chunk " |
799 | . ($nibble_iter->nibble_number() + 1) |
800 | . " because it is not safe to chunk. Chunking should " |
801 | . "use the " |
802 | . ($nibble_iter->nibble_index() || '?') |
803 | - . " index, but MySQL EXPLAIN reports that " |
804 | + . " index, but MySQL chose " |
805 | . ($expl->{key} ? "the $expl->{key}" : "no") |
806 | - . " index will be used.\n"; |
807 | - warn ts($msg); |
808 | + . " index.\n"); |
809 | } |
810 | $tbl->{checksum_results}->{errors}++; |
811 | return 0; # stop nibbling table |
812 | @@ -6658,43 +6801,13 @@ |
813 | return 0; # next boundary |
814 | } |
815 | |
816 | - # If the table is being chunk (i.e., it's not small enough to be |
817 | - # consumed by one nibble), then check index usage and chunk size. |
818 | - # XXX This call and others like it are relying on a Perl oddity. |
819 | - # See https://bugs.launchpad.net/percona-toolkit/+bug/987393 |
820 | - if ( !$nibble_iter->one_nibble() ) { |
821 | - my $expl = explain_statement( |
822 | - tbl => $tbl, |
823 | - sth => $sth->{explain_nibble}, |
824 | - vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ], |
825 | - ); |
826 | - my $oversize_chunk |
827 | - = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit |
828 | - : 0; |
829 | - |
830 | - # Ensure that MySQL is using the chunk index. |
831 | - if ( lc($expl->{key} || '') |
832 | - ne lc($nibble_iter->nibble_index() || '') ) { |
833 | - PTDEBUG && _d('Chunk', $args{nibbleno}, 'of table', |
834 | - "$tbl->{db}.$tbl->{tbl} not using chunk index, skipping"); |
835 | - return 0; # next boundary |
836 | - } |
837 | - |
838 | - # Check chunk size limit if the upper boundary and next lower |
839 | - # boundary are identical. |
840 | - if ( $limit ) { |
841 | - my $boundary = $nibble_iter->boundaries(); |
842 | - if ( $nibble_iter->identical_boundaries( |
843 | - $boundary->{upper}, $boundary->{next_lower}) |
844 | - && $oversize_chunk ) { |
845 | - PTDEBUG && _d('Chunk', $args{nibbleno}, 'of table', |
846 | - "$tbl->{db}.$tbl->{tbl} is too large, skipping"); |
847 | - return 0; # next boundary |
848 | - } |
849 | - } |
850 | - } |
851 | - |
852 | - # Exec and time the chunk checksum query. |
853 | + # Skip this nibble unless it's safe. |
854 | + return 0 unless nibble_is_safe( |
855 | + %args, |
856 | + OptionParser => $o, |
857 | + ); |
858 | + |
859 | + # Exec and time the nibble. |
860 | $tbl->{nibble_time} = exec_nibble( |
861 | %args, |
862 | Retry => $retry, |
863 | @@ -6776,7 +6889,7 @@ |
864 | $tbl->{chunk_size} = 1; |
865 | |
866 | # This warning is printed once per table. |
867 | - if ( !$tbl->{warned_slow} && $o->get('quiet') < 2 ) { |
868 | + if ( !$tbl->{warned}->{slow}++ && $o->get('quiet') < 2 ) { |
869 | warn ts("Checksum queries for table " |
870 | . "$tbl->{db}.$tbl->{tbl} are executing very slowly. " |
871 | . "--chunk-size has been automatically reduced to 1. " |
872 | @@ -6786,7 +6899,6 @@ |
873 | . "selected $cnt rows and took " |
874 | . sprintf('%.3f', $tbl->{nibble_time}) |
875 | . " seconds to execute.\n"); |
876 | - $tbl->{warned_slow} = 1; |
877 | } |
878 | } |
879 | |
880 | @@ -6918,22 +7030,23 @@ |
881 | my $nibble_iter; |
882 | eval { |
883 | $nibble_iter = new OobNibbleIterator( |
884 | - Cxn => $master_cxn, |
885 | - tbl => $tbl, |
886 | - chunk_size => $tbl->{chunk_size}, |
887 | - chunk_index => $o->get('chunk-index'), |
888 | - dml => $checksum_dml, |
889 | - select => $checksum_cols, |
890 | - past_dml => $checksum_dml, |
891 | - past_select => $past_cols, |
892 | - callbacks => $callbacks, |
893 | - resume => $last_chunk, |
894 | - OptionParser => $o, |
895 | - Quoter => $q, |
896 | - TableNibbler => $tn, |
897 | - TableParser => $tp, |
898 | - RowChecksum => $rc, |
899 | - comments => { |
900 | + Cxn => $master_cxn, |
901 | + tbl => $tbl, |
902 | + chunk_size => $tbl->{chunk_size}, |
903 | + chunk_index => $chunk_index, |
904 | + n_chunk_index_cols => $n_chunk_index_cols, |
905 | + dml => $checksum_dml, |
906 | + select => $checksum_cols, |
907 | + past_dml => $checksum_dml, |
908 | + past_select => $past_cols, |
909 | + callbacks => $callbacks, |
910 | + resume => $last_chunk, |
911 | + OptionParser => $o, |
912 | + Quoter => $q, |
913 | + TableNibbler => $tn, |
914 | + TableParser => $tp, |
915 | + RowChecksum => $rc, |
916 | + comments => { |
917 | bite => "checksum table", |
918 | nibble => "checksum chunk", |
919 | }, |
920 | @@ -7008,6 +7121,84 @@ |
921 | return $msg ? "$ts $msg" : $ts; |
922 | } |
923 | |
924 | +sub nibble_is_safe { |
925 | + my (%args) = @_; |
926 | + my @required_args = qw(Cxn tbl NibbleIterator OptionParser); |
927 | + foreach my $arg ( @required_args ) { |
928 | + die "I need a $arg argument" unless $args{$arg}; |
929 | + } |
930 | + my ($cxn, $tbl, $nibble_iter, $o)= @args{@required_args}; |
931 | + |
932 | + # EXPLAIN the checksum chunk query to get its row estimate and index. |
933 | + # XXX This call and others like it are relying on a Perl oddity. |
934 | + # See https://bugs.launchpad.net/percona-toolkit/+bug/987393 |
935 | + my $sth = $nibble_iter->statements(); |
936 | + my $boundary = $nibble_iter->boundaries(); |
937 | + my $expl = explain_statement( |
938 | + tbl => $tbl, |
939 | + sth => $sth->{explain_nibble}, |
940 | + vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ], |
941 | + ); |
942 | + |
943 | + # Ensure that MySQL is using the chunk index if the table is being chunked. |
944 | + if ( !$nibble_iter->one_nibble() |
945 | + && lc($expl->{key} || '') ne lc($nibble_iter->nibble_index() || '') ) { |
946 | + if ( !$tbl->{warned}->{not_using_chunk_index}++ |
947 | + && $o->get('quiet') < 2 ) { |
948 | + warn ts("Skipping chunk " . $nibble_iter->nibble_number() |
949 | + . " of $tbl->{db}.$tbl->{tbl} because MySQL chose " |
950 | + . ($expl->{key} ? "the $expl->{key}" : "no") . " index " |
951 | + . " instead of the " . $nibble_iter->nibble_index() . "index.\n"); |
952 | + } |
953 | + return 0; # not safe |
954 | + } |
955 | + |
956 | + # Ensure that the chunk isn't too large if there's a --chunk-size-limit. |
957 | + # If single-chunking the table, this has already been checked, so it |
958 | + # shouldn't have changed. If chunking the table with a non-unique key, |
959 | + # oversize chunks are possible. |
960 | + if ( my $limit = $o->get('chunk-size-limit') ) { |
961 | + my $oversize_chunk |
962 | + = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit |
963 | + : 0; |
964 | + if ( $oversize_chunk |
965 | + && $nibble_iter->identical_boundaries($boundary->{upper}, |
966 | + $boundary->{next_lower}) ) { |
967 | + if ( !$tbl->{warned}->{oversize_chunk}++ |
968 | + && $o->get('quiet') < 2 ) { |
969 | + warn ts("Skipping chunk " . $nibble_iter->nibble_number() |
970 | + . " of $tbl->{db}.$tbl->{tbl} because it is oversized. " |
971 | + . "The current chunk size limit is " |
972 | + . ($tbl->{chunk_size} * $limit) |
973 | + . " rows (chunk size=$tbl->{chunk_size}" |
974 | + . " * chunk size limit=$limit), but MySQL estimates " |
975 | + . "that there are " . ($expl->{rows} || 0) |
976 | + . " rows in the chunk.\n"); |
977 | + } |
978 | + return 0; # not safe |
979 | + } |
980 | + } |
981 | + |
982 | + # Ensure that MySQL is still using the entire index. |
983 | + # https://bugs.launchpad.net/percona-toolkit/+bug/1010232 |
984 | + if ( !$nibble_iter->one_nibble() |
985 | + && $tbl->{key_len} |
986 | + && ($expl->{key_len} || 0) < $tbl->{key_len} ) { |
987 | + if ( !$tbl->{warned}->{key_len}++ |
988 | + && $o->get('quiet') < 2 ) { |
989 | + warn ts("Skipping chunk " . $nibble_iter->nibble_number() |
990 | + . " of $tbl->{db}.$tbl->{tbl} because MySQL used " |
991 | + . "only " . ($expl->{key_len} || 0) . " bytes " |
992 | + . "of the " . ($expl->{key} || '?') . " index instead of " |
993 | + . $tbl->{key_len} . ". See the --[no]check-plan documentation " |
994 | + . "for more information.\n"); |
995 | + } |
996 | + return 0; # not safe |
997 | + } |
998 | + |
999 | + return 1; # safe |
1000 | +} |
1001 | + |
1002 | sub exec_nibble { |
1003 | my (%args) = @_; |
1004 | my @required_args = qw(Cxn tbl NibbleIterator Retry Quoter OptionParser); |
1005 | @@ -7074,7 +7265,7 @@ |
1006 | && (!$warn_code{$code}->{pattern} |
1007 | || $message =~ m/$warn_code{$code}->{pattern}/) ) |
1008 | { |
1009 | - if ( !$tbl->{"warned_code_$code"} ) { # warn once per table |
1010 | + if ( !$tbl->{warned}->{$code}++ ) { # warn once per table |
1011 | if ( $o->get('quiet') < 2 ) { |
1012 | warn ts("Checksum query for table $tbl->{db}.$tbl->{tbl} " |
1013 | . "caused MySQL error $code: " |
1014 | @@ -7083,7 +7274,6 @@ |
1015 | : $message) |
1016 | . "\n"); |
1017 | } |
1018 | - $tbl->{"warned_code_$code"} = 1; |
1019 | $tbl->{checksum_results}->{errors}++; |
1020 | } |
1021 | } |
1022 | @@ -7910,6 +8100,39 @@ |
1023 | |
1024 | Sleep time between checks for L<"--max-lag">. |
1025 | |
1026 | +=item --[no]check-plan |
1027 | + |
1028 | +default: yes |
1029 | + |
1030 | +Check query execution plans for safety. By default, this option causes |
1031 | +pt-table-checksum to run EXPLAIN before running queries that are meant to access |
1032 | +a small amount of data, but which could access many rows if MySQL chooses a bad |
1033 | +execution plan. These include the queries to determine chunk boundaries and the |
1034 | +chunk queries themselves. If it appears that MySQL will use a bad query |
1035 | +execution plan, the tool will skip the chunk of the table. |
1036 | + |
1037 | +The tool uses several heuristics to determine whether an execution plan is bad. |
1038 | +The first is whether EXPLAIN reports that MySQL intends to use the desired index |
1039 | +to access the rows. If MySQL chooses a different index, the tool considers the |
1040 | +query unsafe. |
1041 | + |
1042 | +The tool also checks how much of the index MySQL reports that it will use for |
1043 | +the query. The EXPLAIN output shows this in the key_len column. The tool |
1044 | +remembers the largest key_len seen, and skips chunks where MySQL reports that it |
1045 | +will use a smaller prefix of the index. This heuristic can be understood as |
1046 | +skipping chunks that have a worse execution plan than other chunks. |
1047 | + |
1048 | +The tool prints a warning the first time a chunk is skipped due to |
1049 | +a bad execution plan in each table. Subsequent chunks are skipped silently, |
1050 | +although you can see the count of skipped chunks in the SKIPPED column in |
1051 | +the tool's output. |
1052 | + |
1053 | +This option adds some setup work to each table and chunk. Although the work is |
1054 | +not intrusive for MySQL, it results in more round-trips to the server, which |
1055 | +consumes time. Making chunks too small will cause the overhead to become |
1056 | +relatively larger. It is therefore recommended that you not make chunks too |
1057 | +small, because the tool may take a very long time to complete if you do. |
1058 | + |
1059 | =item --[no]check-replication-filters |
1060 | |
1061 | default: yes; group: Safety |
1062 | @@ -7959,12 +8182,24 @@ |
1063 | This is probably best to use when you are checksumming only a single table, not |
1064 | an entire server. |
1065 | |
1066 | +This option supports a special syntax to select a prefix of the index instead of |
1067 | +the entire index. The syntax is NAME:N, where NAME is the name of the index, and |
1068 | +N is the number of columns you wish to use. This works only for compound |
1069 | +indexes, and is useful in cases where a bug in the MySQL query optimizer |
1070 | +(planner) causes it to scan a large range of rows instead of using the index to |
1071 | +locate starting and ending points precisely. This problem sometimes occurs on |
1072 | +indexes with many columns, such as 4 or more. If this happens, the tool might |
1073 | +print a warning related to the L<"--[no]check-plan"> option. Instructing |
1074 | +the tool to use only the first N columns from the index is a workaround for |
1075 | +the bug in some cases. |
1076 | + |
1077 | =item --chunk-size |
1078 | |
1079 | type: size; default: 1000 |
1080 | |
1081 | Number of rows to select for each checksum query. Allowable suffixes are |
1082 | -k, M, G. |
1083 | +k, M, G. You should not use this option in most cases; prefer L<"--chunk-time"> |
1084 | +instead. |
1085 | |
1086 | This option can override the default behavior, which is to adjust chunk size |
1087 | dynamically to try to make chunks run in exactly L<"--chunk-time"> seconds. |
1088 | @@ -7980,6 +8215,9 @@ |
1089 | 10,000 rows large. Such a chunk will probably be skipped because of |
1090 | L<"--chunk-size-limit">. |
1091 | |
1092 | +Selecting a small chunk size will cause the tool to become much slower, in part |
1093 | +because of the setup work required for L<"--[no]check-plan">. |
1094 | + |
1095 | =item --chunk-size-limit |
1096 | |
1097 | type: float; default: 2.0; group: Safety |
1098 | |
1099 | === modified file 'lib/NibbleIterator.pm' |
1100 | --- lib/NibbleIterator.pm 2012-05-10 16:21:44 +0000 |
1101 | +++ lib/NibbleIterator.pm 2012-06-10 17:15:24 +0000 |
1102 | @@ -124,10 +124,11 @@ |
1103 | # Figure out how to nibble the table with the index. |
1104 | my $asc = $args{TableNibbler}->generate_asc_stmt( |
1105 | %args, |
1106 | - tbl_struct => $tbl->{tbl_struct}, |
1107 | - index => $index, |
1108 | - cols => \@cols, |
1109 | - asc_only => 1, |
1110 | + tbl_struct => $tbl->{tbl_struct}, |
1111 | + index => $index, |
1112 | + n_index_cols => $args{n_chunk_index_cols}, |
1113 | + cols => \@cols, |
1114 | + asc_only => 1, |
1115 | ); |
1116 | PTDEBUG && _d('Ascend params:', Dumper($asc)); |
1117 | |
1118 | @@ -229,16 +230,17 @@ |
1119 | |
1120 | $self = { |
1121 | %args, |
1122 | - index => $index, |
1123 | - limit => $limit, |
1124 | - first_lb_sql => $first_lb_sql, |
1125 | - last_ub_sql => $last_ub_sql, |
1126 | - ub_sql => $ub_sql, |
1127 | - nibble_sql => $nibble_sql, |
1128 | - explain_ub_sql => "EXPLAIN $ub_sql", |
1129 | - explain_nibble_sql => $explain_nibble_sql, |
1130 | - resume_lb_sql => $resume_lb_sql, |
1131 | - sql => { |
1132 | + index => $index, |
1133 | + limit => $limit, |
1134 | + first_lb_sql => $first_lb_sql, |
1135 | + last_ub_sql => $last_ub_sql, |
1136 | + ub_sql => $ub_sql, |
1137 | + nibble_sql => $nibble_sql, |
1138 | + explain_first_lb_sql => "EXPLAIN $first_lb_sql", |
1139 | + explain_ub_sql => "EXPLAIN $ub_sql", |
1140 | + explain_nibble_sql => $explain_nibble_sql, |
1141 | + resume_lb_sql => $resume_lb_sql, |
1142 | + sql => { |
1143 | columns => $asc->{scols}, |
1144 | from => $from, |
1145 | where => $where, |
1146 | @@ -357,10 +359,11 @@ |
1147 | sub statements { |
1148 | my ($self) = @_; |
1149 | return { |
1150 | - nibble => $self->{nibble_sth}, |
1151 | - explain_nibble => $self->{explain_nibble_sth}, |
1152 | - upper_boundary => $self->{ub_sth}, |
1153 | - explain_upper_boundary => $self->{explain_ub_sth}, |
1154 | + explain_first_lower_boundary => $self->{explain_first_lb_sth}, |
1155 | + nibble => $self->{nibble_sth}, |
1156 | + explain_nibble => $self->{explain_nibble_sth}, |
1157 | + upper_boundary => $self->{ub_sth}, |
1158 | + explain_upper_boundary => $self->{explain_ub_sth}, |
1159 | } |
1160 | } |
1161 | |
1162 | @@ -613,8 +616,9 @@ |
1163 | $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql}); |
1164 | |
1165 | if ( !$self->{one_nibble} ) { |
1166 | - $self->{ub_sth} = $dbh->prepare($self->{ub_sql}); |
1167 | - $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql}); |
1168 | + $self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql}); |
1169 | + $self->{ub_sth} = $dbh->prepare($self->{ub_sql}); |
1170 | + $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql}); |
1171 | } |
1172 | |
1173 | return; |
1174 | |
1175 | === modified file 'lib/TableNibbler.pm' |
1176 | --- lib/TableNibbler.pm 2012-01-19 19:46:56 +0000 |
1177 | +++ lib/TableNibbler.pm 2012-06-10 17:15:24 +0000 |
1178 | @@ -41,6 +41,7 @@ |
1179 | # * tbl_struct Hashref returned from TableParser::parse(). |
1180 | # * cols Arrayref of columns to SELECT from the table |
1181 | # * index Which index to ascend; optional. |
1182 | +# * n_index_cols The number of left-most index columns to use. |
1183 | # * asc_only Whether to ascend strictly, that is, the WHERE clause for |
1184 | # the asc_stmt will fetch the next row > the given arguments. |
1185 | # The option is to fetch the row >=, which could loop |
1186 | @@ -77,8 +78,12 @@ |
1187 | # These are the columns we'll ascend. |
1188 | my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}}; |
1189 | if ( $args{asc_first} ) { |
1190 | + PTDEBUG && _d('Ascending only first column'); |
1191 | @asc_cols = $asc_cols[0]; |
1192 | - PTDEBUG && _d('Ascending only first column'); |
1193 | + } |
1194 | + elsif ( my $n = $args{n_index_cols} ) { |
1195 | + PTDEBUG && _d('Ascending only first', $n, 'columns'); |
1196 | + @asc_cols = @asc_cols[0..($n-1)]; |
1197 | } |
1198 | PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols)); |
1199 | |
1200 | |
1201 | === modified file 't/lib/TableNibbler.t' |
1202 | --- t/lib/TableNibbler.t 2012-03-06 13:56:08 +0000 |
1203 | +++ t/lib/TableNibbler.t 2012-06-10 17:15:24 +0000 |
1204 | @@ -9,7 +9,7 @@ |
1205 | use strict; |
1206 | use warnings FATAL => 'all'; |
1207 | use English qw(-no_match_vars); |
1208 | -use Test::More tests => 24; |
1209 | +use Test::More tests => 25; |
1210 | |
1211 | use TableParser; |
1212 | use TableNibbler; |
1213 | @@ -299,6 +299,34 @@ |
1214 | |
1215 | is_deeply( |
1216 | $n->generate_asc_stmt( |
1217 | + tbl_struct => $t, |
1218 | + cols => $t->{cols}, |
1219 | + index => 'rental_date', |
1220 | + n_index_cols => 2, |
1221 | + ), |
1222 | + { |
1223 | + cols => [qw(rental_id rental_date inventory_id customer_id |
1224 | + return_date staff_id last_update)], |
1225 | + index => 'rental_date', |
1226 | + where => '((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?))', |
1227 | + slice => [qw(1 1 2)], |
1228 | + scols => [qw(rental_date rental_date inventory_id)], |
1229 | + boundaries => { |
1230 | + '<' => |
1231 | + '((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` < ?))', |
1232 | + '<=' => |
1233 | + '((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` <= ?))', |
1234 | + '>' => |
1235 | + '((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` > ?))', |
1236 | + '>=' => |
1237 | + '((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?))' |
1238 | + }, |
1239 | + }, |
1240 | + 'Use only N left-most columns of the index', |
1241 | +); |
1242 | + |
1243 | +is_deeply( |
1244 | + $n->generate_asc_stmt( |
1245 | tbl_struct => $t, |
1246 | cols => $t->{cols}, |
1247 | index => 'rental_date', |
1248 | |
1249 | === modified file 't/pt-table-checksum/chunk_index.t' |
1250 | --- t/pt-table-checksum/chunk_index.t 2012-05-04 23:46:34 +0000 |
1251 | +++ t/pt-table-checksum/chunk_index.t 2012-06-10 17:15:24 +0000 |
1252 | @@ -25,7 +25,7 @@ |
1253 | plan skip_all => 'Cannot connect to sandbox master'; |
1254 | } |
1255 | else { |
1256 | - plan tests => 11; |
1257 | + plan tests => 14; |
1258 | } |
1259 | |
1260 | # The sandbox servers run with lock_wait_timeout=3 and it's not dynamic |
1261 | @@ -158,6 +158,50 @@ |
1262 | ); |
1263 | |
1264 | # ############################################################################# |
1265 | +# PK but bad explain plan. |
1266 | +# https://bugs.launchpad.net/percona-toolkit/+bug/1010232 |
1267 | +# ############################################################################# |
1268 | +$sb->load_file('master', "t/pt-table-checksum/samples/bad-plan-bug-1010232.sql"); |
1269 | +PerconaTest::wait_for_table($dbh, "bad_plan.t", "(c1,c2,c3,c4)=(1,1,2,100)"); |
1270 | + |
1271 | +$output = output(sub { |
1272 | + $exit_status = pt_table_checksum::main( |
1273 | + $master_dsn, '--max-load', '', |
1274 | + qw(--lock-wait-timeout 3 --chunk-size 10 -t bad_plan.t) |
1275 | + ) }, |
1276 | + stderr => 1, |
1277 | +); |
1278 | + |
1279 | +is( |
1280 | + $exit_status, |
1281 | + 0, |
1282 | + "Bad key_len chunks are not errors" |
1283 | +); |
1284 | + |
1285 | +cmp_ok( |
1286 | + PerconaTest::count_checksum_results($output, 'skipped'), |
1287 | + '>', |
1288 | + 1, |
1289 | + "Skipped bad key_len chunks" |
1290 | +); |
1291 | + |
1292 | +# Use --chunk-index:3 to use only the first 3 left-most columns of the index. |
1293 | +# Can't use bad_plan.t, however, because its row are almost all identical, |
1294 | +# so using 3 of 4 pk cols creates an infinite loop. |
1295 | +ok( |
1296 | + no_diff( |
1297 | + sub { |
1298 | + pt_table_checksum::main( |
1299 | + $master_dsn, '--max-load', '', |
1300 | + qw(--lock-wait-timeout 3 --chunk-size 5000 -t sakila.rental), |
1301 | + qw(--chunk-index rental_date:2 --explain --explain)); |
1302 | + }, |
1303 | + "t/pt-table-checksum/samples/n-chunk-index-cols.txt", |
1304 | + ), |
1305 | + "--chunk-index index:n" |
1306 | +); |
1307 | + |
1308 | +# ############################################################################# |
1309 | # Done. |
1310 | # ############################################################################# |
1311 | $sb->wipe_clean($dbh); |
1312 | |
1313 | === added file 't/pt-table-checksum/samples/bad-plan-bug-1010232.sql' |
1314 | --- t/pt-table-checksum/samples/bad-plan-bug-1010232.sql 1970-01-01 00:00:00 +0000 |
1315 | +++ t/pt-table-checksum/samples/bad-plan-bug-1010232.sql 2012-06-10 17:15:24 +0000 |
1316 | @@ -0,0 +1,17 @@ |
1317 | +DROP DATABASE IF EXISTS bad_plan; |
1318 | +CREATE DATABASE bad_plan; |
1319 | +USE bad_plan; |
1320 | + |
1321 | +CREATE TABLE t ( |
1322 | + `c1` smallint(5) unsigned NOT NULL, |
1323 | + `c2` mediumint(8) unsigned NOT NULL DEFAULT '0', |
1324 | + `c3` smallint(5) unsigned NOT NULL DEFAULT '0', |
1325 | + `c4` smallint(5) unsigned NOT NULL DEFAULT '0', |
1326 | + PRIMARY KEY (`c1`,`c2`,`c3`,`c4`) |
1327 | +) ENGINE=InnoDB; |
1328 | + |
1329 | +INSERT INTO t VALUES |
1330 | +(1,1,1,1),(1,1,1,2),(1,1,1,3),(1,1,1,4),(1,1,1,5),(1,1,1,6),(1,1,1,7),(1,1,1,8),(1,1,1,9),(1,1,1,10),(1,1,1,11),(1,1,1,12),(1,1,1,13),(1,1,1,14),(1,1,1,15),(1,1,1,16),(1,1,1,17),(1,1,1,18),(1,1,1,19),(1,1,1,20),(1,1,1,21),(1,1,1,22),(1,1,1,23),(1,1,1,24),(1,1,1,25),(1,1,1,26),(1,1,1,27),(1,1,1,28),(1,1,1,29),(1,1,1,30),(1,1,1,31),(1,1,1,32),(1,1,1,33),(1,1,1,34),(1,1,1,35),(1,1,1,36),(1,1,1,37),(1,1,1,38),(1,1,1,39),(1,1,1,40),(1,1,1,41),(1,1,1,42),(1,1,1,43),(1,1,1,44),(1,1,1,45),(1,1,1,46),(1,1,1,47),(1,1,1,48),(1,1,1,49),(1,1,1,50),(1,1,1,51),(1,1,1,52),(1,1,1,53),(1,1,1,54),(1,1,1,55),(1,1,1,56),(1,1,1,57),(1,1,1,58),(1,1,1,59),(1,1,1,60),(1,1,1,61),(1,1,1,62),(1,1,1,63),(1,1,1,64),(1,1,1,65),(1,1,1,66),(1,1,1,67),(1,1,1,68),(1,1,1,69),(1,1,1,70),(1,1,1,71),(1,1,1,72),(1,1,1,73),(1,1,1,74),(1,1,1,75),(1,1,1,76),(1,1,1,77),(1,1,1,78),(1,1,1,79),(1,1,1,80),(1,1,1,81),(1,1,1,82),(1,1,1,83),(1,1,1,84),(1,1,1,85),(1,1,1,86),(1,1,1,87),(1,1,1,88),(1,1,1,89),(1,1,1,90),(1,1,1,91),(1,1,1,92),(1,1,1,93),(1,1,1,94),(1,1,1,95),(1,1,1,96),(1,1,1,97),(1,1,1,98),(1,1,1,99),(1,1,1,100), |
1331 | +(1,1,2,1),(1,1,2,2),(1,1,2,3),(1,1,2,4),(1,1,2,5),(1,1,2,6),(1,1,2,7),(1,1,2,8),(1,1,2,9),(1,1,2,10),(1,1,2,11),(1,1,2,12),(1,1,2,13),(1,1,2,14),(1,1,2,15),(1,1,2,16),(1,1,2,17),(1,1,2,18),(1,1,2,19),(1,1,2,20),(1,1,2,21),(1,1,2,22),(1,1,2,23),(1,1,2,24),(1,1,2,25),(1,1,2,26),(1,1,2,27),(1,1,2,28),(1,1,2,29),(1,1,2,30),(1,1,2,31),(1,1,2,32),(1,1,2,33),(1,1,2,34),(1,1,2,35),(1,1,2,36),(1,1,2,37),(1,1,2,38),(1,1,2,39),(1,1,2,40),(1,1,2,41),(1,1,2,42),(1,1,2,43),(1,1,2,44),(1,1,2,45),(1,1,2,46),(1,1,2,47),(1,1,2,48),(1,1,2,49),(1,1,2,50),(1,1,2,51),(1,1,2,52),(1,1,2,53),(1,1,2,54),(1,1,2,55),(1,1,2,56),(1,1,2,57),(1,1,2,58),(1,1,2,59),(1,1,2,60),(1,1,2,61),(1,1,2,62),(1,1,2,63),(1,1,2,64),(1,1,2,65),(1,1,2,66),(1,1,2,67),(1,1,2,68),(1,1,2,69),(1,1,2,70),(1,1,2,71),(1,1,2,72),(1,1,2,73),(1,1,2,74),(1,1,2,75),(1,1,2,76),(1,1,2,77),(1,1,2,78),(1,1,2,79),(1,1,2,80),(1,1,2,81),(1,1,2,82),(1,1,2,83),(1,1,2,84),(1,1,2,85),(1,1,2,86),(1,1,2,87),(1,1,2,88),(1,1,2,89),(1,1,2,90),(1,1,2,91),(1,1,2,92),(1,1,2,93),(1,1,2,94),(1,1,2,95),(1,1,2,96),(1,1,2,97),(1,1,2,98),(1,1,2,99),(1,1,2,100); |
1332 | + |
1333 | +ANALYZE TABLE bad_plan.t; |
1334 | |
1335 | === added file 't/pt-table-checksum/samples/n-chunk-index-cols.txt' |
1336 | --- t/pt-table-checksum/samples/n-chunk-index-cols.txt 1970-01-01 00:00:00 +0000 |
1337 | +++ t/pt-table-checksum/samples/n-chunk-index-cols.txt 2012-06-10 17:15:24 +0000 |
1338 | @@ -0,0 +1,19 @@ |
1339 | +-- |
1340 | +-- sakila.rental |
1341 | +-- |
1342 | + |
1343 | +REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `rental_id`, `rental_date`, `inventory_id`, `customer_id`, `return_date`, `staff_id`, `last_update` + 0, CONCAT(ISNULL(`return_date`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?)) AND ((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` <= ?)) /*checksum chunk*/ |
1344 | + |
1345 | +REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` < ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` /*past lower chunk*/ |
1346 | + |
1347 | +REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` > ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` /*past upper chunk*/ |
1348 | + |
1349 | +SELECT /*!40001 SQL_NO_CACHE */ `rental_date`, `rental_date`, `inventory_id` FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` LIMIT ?, 2 /*next chunk boundary*/ |
1350 | + |
1351 | +1 2005-05-24 22:53:30,2005-05-24 22:53:30,367 2005-07-09 01:17:08,2005-07-09 01:17:08,2388 |
1352 | +2 2005-07-09 01:19:03,2005-07-09 01:19:03,3438 2005-07-31 17:51:23,2005-07-31 17:51:23,2396 |
1353 | +3 2005-07-31 17:53:51,2005-07-31 17:53:51,928 2005-08-22 13:58:23,2005-08-22 13:58:23,2553 |
1354 | +4 2005-08-22 13:59:19,2005-08-22 13:59:19,3704 2006-02-14 15:16:03,2006-02-14 15:16:03,4568 |
1355 | +5 2005-05-24 22:53:30,2005-05-24 22:53:30,367 |
1356 | +6 2006-02-14 15:16:03,2006-02-14 15:16:03,4568 |
1357 | + |
OK.