Merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0 into lp:percona-toolkit/2.0

Proposed by Daniel Nichter
Status: Merged
Approved by: Daniel Nichter
Approved revision: 263
Merged at revision: 121
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0
Merge into: lp:percona-toolkit/2.0
Diff against target: 27153 lines (+12572/-10320) (has conflicts)
177 files modified
Changelog (+8/-2)
bin/pt-table-checksum (+5183/-5510)
bin/pt-table-sync (+120/-19)
lib/CleanupTask.pm (+69/-0)
lib/CompareResults.pm (+6/-4)
lib/CopyRowsInsertSelect.pm (+17/-18)
lib/Cxn.pm (+215/-0)
lib/DSNParser.pm (+7/-5)
lib/MasterSlave.pm (+85/-0)
lib/MySQLDump.pm (+0/-322)
lib/MySQLStatusWaiter.pm (+185/-0)
lib/NibbleIterator.pm (+545/-269)
lib/OobNibbleIterator.pm (+230/-0)
lib/OptionParser.pm (+1/-1)
lib/PerconaTest.pm (+66/-4)
lib/Progress.pm (+13/-2)
lib/Quoter.pm (+68/-0)
lib/ReplicaLagWaiter.pm (+173/-0)
lib/Retry.pm (+35/-41)
lib/RowChecksum.pm (+82/-71)
lib/Sandbox.pm (+2/-0)
lib/SchemaIterator.pm (+99/-26)
lib/TableChecksum.pm (+10/-12)
lib/TableChunker.pm (+5/-5)
lib/TableParser.pm (+75/-60)
lib/TableSyncer.pm (+17/-18)
lib/WeightedAvgRate.pm (+102/-0)
sandbox/load-sakila-db (+0/-3)
sandbox/start-sandbox (+3/-0)
sandbox/test-env (+10/-2)
t/lib/CleanupTask.t (+36/-0)
t/lib/CompareResults.t (+0/-3)
t/lib/Cxn.t (+259/-0)
t/lib/DSNParser.t (+1/-1)
t/lib/MasterSlave.t (+113/-4)
t/lib/MySQLDump.t (+0/-92)
t/lib/MySQLStatusWaiter.t (+221/-0)
t/lib/NibbleIterator.t (+512/-157)
t/lib/OobNibbleIterator.t (+264/-0)
t/lib/OptionParser.t (+2/-2)
t/lib/Progress.t (+49/-4)
t/lib/QueryReview.t (+3/-5)
t/lib/Quoter.t (+114/-1)
t/lib/ReplicaLagWaiter.t (+121/-0)
t/lib/Retry.t (+83/-118)
t/lib/RowChecksum.t (+64/-53)
t/lib/RowDiff-custom.t (+1/-3)
t/lib/RowDiff.t (+1/-3)
t/lib/SQLParser.t (+1/-1)
t/lib/Schema.t (+1/-1)
t/lib/SchemaIterator.t (+28/-11)
t/lib/TableChecksum.t (+1/-3)
t/lib/TableChunker.t (+26/-28)
t/lib/TableParser.t (+25/-156)
t/lib/TableSyncChunk.t (+5/-7)
t/lib/TableSyncNibble.t (+7/-9)
t/lib/TableSyncer.t (+32/-41)
t/lib/WeightedAvgRate.t (+85/-0)
t/lib/samples/MasterSlave/dsn_table.sql (+12/-0)
t/lib/samples/NibbleIterator/bad_tables.sql (+21/-0)
t/lib/samples/SchemaIterator/all-dbs-tbls.txt (+0/-2)
t/lib/samples/SchemaIterator/mysql-user-ddl-5.0.txt (+1/-0)
t/lib/samples/SchemaIterator/mysql-user-ddl.txt (+1/-0)
t/lib/samples/SchemaIterator/resume-from-ignored-sakila-payment.txt (+3/-0)
t/lib/samples/SchemaIterator/resume-from-sakila-payment.txt (+4/-0)
t/lib/samples/char-chunking/ascii.sql (+146/-0)
t/lib/samples/tables/sakila.actor (+8/-0)
t/pt-table-checksum/arg_table.t (+0/-106)
t/pt-table-checksum/basics.t (+280/-118)
t/pt-table-checksum/char_chunking.t (+38/-20)
t/pt-table-checksum/checksum.t (+0/-59)
t/pt-table-checksum/chunk_column.t (+0/-104)
t/pt-table-checksum/chunk_index.t (+52/-81)
t/pt-table-checksum/chunk_size.t (+78/-18)
t/pt-table-checksum/create_replicate_table.t (+84/-60)
t/pt-table-checksum/error_handling.t (+79/-6)
t/pt-table-checksum/filters.t (+102/-53)
t/pt-table-checksum/float_precision.t (+55/-15)
t/pt-table-checksum/fnv_64.t (+72/-20)
t/pt-table-checksum/force_index.t (+0/-67)
t/pt-table-checksum/ignore_columns.t (+40/-20)
t/pt-table-checksum/issue_1020.t (+0/-53)
t/pt-table-checksum/issue_1182.t (+0/-59)
t/pt-table-checksum/issue_122.t (+0/-70)
t/pt-table-checksum/issue_1319.t (+0/-52)
t/pt-table-checksum/issue_21.t (+0/-92)
t/pt-table-checksum/issue_35.t (+0/-62)
t/pt-table-checksum/issue_388.t (+11/-3)
t/pt-table-checksum/issue_47.t (+32/-12)
t/pt-table-checksum/issue_51.t (+0/-77)
t/pt-table-checksum/issue_602.t (+12/-6)
t/pt-table-checksum/issue_69.t (+0/-75)
t/pt-table-checksum/issue_947.t (+0/-49)
t/pt-table-checksum/issue_982.t (+0/-226)
t/pt-table-checksum/offset_modulo.t (+0/-52)
t/pt-table-checksum/option_sanity.t (+132/-7)
t/pt-table-checksum/oversize_chunks.t (+0/-63)
t/pt-table-checksum/probability.t (+0/-60)
t/pt-table-checksum/progress.t (+45/-43)
t/pt-table-checksum/replication_filters.t (+241/-42)
t/pt-table-checksum/resume.t (+662/-69)
t/pt-table-checksum/retry_timeouts.t (+0/-112)
t/pt-table-checksum/samples/3tbl-resume.sql (+27/-0)
t/pt-table-checksum/samples/arg-table.sql (+0/-7)
t/pt-table-checksum/samples/basic_replicate_output (+0/-2)
t/pt-table-checksum/samples/before.sql (+0/-80)
t/pt-table-checksum/samples/char-chunk-ascii-explain.txt (+23/-9)
t/pt-table-checksum/samples/char-chunk-ascii-oversize.txt (+0/-9)
t/pt-table-checksum/samples/char-chunk-ascii.txt (+2/-9)
t/pt-table-checksum/samples/char-chunking.sql (+0/-110)
t/pt-table-checksum/samples/checksum_results/3tbl-resume (+24/-0)
t/pt-table-checksum/samples/checksum_results/3tbl-resume-bar (+11/-0)
t/pt-table-checksum/samples/checksum_results/sakila-done-1k-chunks (+65/-0)
t/pt-table-checksum/samples/checksum_results/sakila-done-singles (+16/-0)
t/pt-table-checksum/samples/checksum_tbl.sql (+0/-14)
t/pt-table-checksum/samples/checksum_tbl_truncated.sql (+16/-14)
t/pt-table-checksum/samples/chunkidx001.txt (+19/-0)
t/pt-table-checksum/samples/chunkidx002.txt (+19/-0)
t/pt-table-checksum/samples/chunkidx003.txt (+19/-0)
t/pt-table-checksum/samples/chunkidx004.txt (+12/-0)
t/pt-table-checksum/samples/chunkidx005.txt (+12/-0)
t/pt-table-checksum/samples/default-results-5.1.txt (+38/-0)
t/pt-table-checksum/samples/float_precision.sql (+7/-0)
t/pt-table-checksum/samples/fnv64-sakila-city.txt (+24/-0)
t/pt-table-checksum/samples/issue_122.sql (+0/-15)
t/pt-table-checksum/samples/issue_21.sql (+5/-3)
t/pt-table-checksum/samples/issue_467.txt (+0/-5)
t/pt-table-checksum/samples/issue_922.sql (+0/-10)
t/pt-table-checksum/samples/no-recheck.txt (+10/-0)
t/pt-table-checksum/samples/oversize-chunks-allowed.txt (+0/-6)
t/pt-table-checksum/samples/oversize-chunks.sql (+24/-3)
t/pt-table-checksum/samples/oversize-chunks.txt (+18/-6)
t/pt-table-checksum/samples/replicate.sql (+0/-22)
t/pt-table-checksum/samples/resume-chunked-complete.txt (+0/-9)
t/pt-table-checksum/samples/resume-chunked-partial.txt (+0/-5)
t/pt-table-checksum/samples/resume-complete.txt (+0/-3)
t/pt-table-checksum/samples/resume-partial.txt (+0/-2)
t/pt-table-checksum/samples/resume.sql (+0/-5)
t/pt-table-checksum/samples/resume2-chunked-complete.txt (+0/-17)
t/pt-table-checksum/samples/resume2-chunked-partial.txt (+0/-6)
t/pt-table-checksum/samples/resume2.sql (+0/-7)
t/pt-table-checksum/samples/sample_1 (+0/-6)
t/pt-table-checksum/samples/sample_2 (+0/-5)
t/pt-table-checksum/samples/sample_schema_opt (+0/-35)
t/pt-table-checksum/samples/static-chunk-size-results-5.1.txt (+38/-0)
t/pt-table-checksum/samples/unchunkable-table-small.txt (+0/-2)
t/pt-table-checksum/samples/unchunkable-table.txt (+0/-1)
t/pt-table-checksum/samples/where01.out (+0/-3)
t/pt-table-checksum/samples/where01.sql (+0/-26)
t/pt-table-checksum/samples/where02.out (+0/-4)
t/pt-table-checksum/samples/where02.sql (+0/-26)
t/pt-table-checksum/schema.t (+0/-172)
t/pt-table-checksum/scripts/exec-wait-exec.sh (+10/-0)
t/pt-table-checksum/scripts/wait-for-chunk.sh (+8/-0)
t/pt-table-checksum/since.t (+0/-79)
t/pt-table-checksum/standard_options.t (+106/-13)
t/pt-table-checksum/throttle.t (+39/-226)
t/pt-table-checksum/unchunkable_tables.t (+0/-57)
t/pt-table-checksum/zero_chunk.t (+0/-56)
t/pt-table-sync/bidirectional.t (+17/-18)
t/pt-table-sync/diff_where.t (+63/-0)
t/pt-table-sync/filters.t (+6/-8)
t/pt-table-sync/issue_408.t (+5/-5)
t/pt-table-sync/issue_560.t (+6/-11)
t/pt-table-sync/issue_627.t (+1/-2)
t/pt-table-sync/issue_79.t (+2/-3)
t/pt-table-sync/issue_996.t (+5/-8)
t/pt-table-sync/samples/bidirectional/queries001.txt (+7/-0)
t/pt-table-sync/samples/diff001.sql (+12/-0)
t/pt-table-sync/samples/issue_560.sql (+0/-13)
t/pt-table-sync/samples/issue_560_output_2.txt (+12/-12)
t/pt-table-sync/samples/simple-tbl-ddl.sql (+10/-0)
t/pt-table-sync/samples/simple-tbls.sql (+62/-0)
t/pt-table-sync/sync_to_differnt_db.t (+5/-6)
t/pt-table-sync/triggers.t (+4/-4)
util/build-packages (+26/-11)
util/wait-to-exec (+40/-0)
Text conflict in Changelog
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0
Reviewer Review Type Date Requested Status
Daniel Nichter Approve
Review via email: mp+87117@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file '.bzrignore'
2=== modified file 'Changelog'
3--- Changelog 2011-10-11 14:31:42 +0000
4+++ Changelog 2011-12-30 00:34:26 +0000
5@@ -1,7 +1,13 @@
6 Changelog for Percona Toolkit
7
8- * Fixed bug 821717: pt-tcp-model --type=requests crashes
9-
10+<<<<<<< TREE
11+ * Fixed bug 821717: pt-tcp-model --type=requests crashes
12+
13+=======
14+ * Completely redesigned pt-table-checksum.
15+ * Fixed bug 821717: pt-tcp-model --type=requests crashes
16+
17+>>>>>>> MERGE-SOURCE
18 v1.0.1 released 2011-09-01
19
20 * Fixed bug 819421: MasterSlave::is_replication_thread() doesn't match all
21
22=== modified file 'bin/pt-table-checksum'
23--- bin/pt-table-checksum 2011-09-01 16:00:38 +0000
24+++ bin/pt-table-checksum 2011-12-30 00:34:26 +0000
25@@ -9,15 +9,15 @@
26 use constant MKDEBUG => $ENV{MKDEBUG} || 0;
27
28 # ###########################################################################
29-# TableParser package
30+# DSNParser package
31 # This package is a copy without comments from the original. The original
32 # with comments and its test file can be found in the Bazaar repository at,
33-# lib/TableParser.pm
34-# t/lib/TableParser.t
35+# lib/DSNParser.pm
36+# t/lib/DSNParser.t
37 # See https://launchpad.net/percona-toolkit for more information.
38 # ###########################################################################
39 {
40-package TableParser;
41+package DSNParser;
42
43 use strict;
44 use warnings FATAL => 'all';
45@@ -25,757 +25,331 @@
46 use constant MKDEBUG => $ENV{MKDEBUG} || 0;
47
48 use Data::Dumper;
49-$Data::Dumper::Indent = 1;
50-$Data::Dumper::Sortkeys = 1;
51+$Data::Dumper::Indent = 0;
52 $Data::Dumper::Quotekeys = 0;
53
54+eval {
55+ require DBI;
56+};
57+my $have_dbi = $EVAL_ERROR ? 0 : 1;
58+
59 sub new {
60 my ( $class, %args ) = @_;
61- my @required_args = qw(Quoter);
62- foreach my $arg ( @required_args ) {
63+ foreach my $arg ( qw(opts) ) {
64 die "I need a $arg argument" unless $args{$arg};
65 }
66- my $self = { %args };
67+ my $self = {
68+ opts => {} # h, P, u, etc. Should come from DSN OPTIONS section in POD.
69+ };
70+ foreach my $opt ( @{$args{opts}} ) {
71+ if ( !$opt->{key} || !$opt->{desc} ) {
72+ die "Invalid DSN option: ", Dumper($opt);
73+ }
74+ MKDEBUG && _d('DSN option:',
75+ join(', ',
76+ map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') }
77+ keys %$opt
78+ )
79+ );
80+ $self->{opts}->{$opt->{key}} = {
81+ dsn => $opt->{dsn},
82+ desc => $opt->{desc},
83+ copy => $opt->{copy} || 0,
84+ };
85+ }
86 return bless $self, $class;
87 }
88
89+sub prop {
90+ my ( $self, $prop, $value ) = @_;
91+ if ( @_ > 2 ) {
92+ MKDEBUG && _d('Setting', $prop, 'property');
93+ $self->{$prop} = $value;
94+ }
95+ return $self->{$prop};
96+}
97+
98 sub parse {
99- my ( $self, $ddl, $opts ) = @_;
100- return unless $ddl;
101- if ( ref $ddl eq 'ARRAY' ) {
102- if ( lc $ddl->[0] eq 'table' ) {
103- $ddl = $ddl->[1];
104- }
105- else {
106- return {
107- engine => 'VIEW',
108- };
109- }
110- }
111-
112- if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
113- die "Cannot parse table definition; is ANSI quoting "
114- . "enabled or SQL_QUOTE_SHOW_CREATE disabled?";
115- }
116-
117- my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/;
118- (undef, $name) = $self->{Quoter}->split_unquote($name) if $name;
119-
120- $ddl =~ s/(`[^`]+`)/\L$1/g;
121-
122- my $engine = $self->get_engine($ddl);
123-
124- my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
125- my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
126- MKDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
127-
128- my %def_for;
129- @def_for{@cols} = @defs;
130-
131- my (@nums, @null);
132- my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
133- foreach my $col ( @cols ) {
134- my $def = $def_for{$col};
135- my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
136- die "Can't determine column type for $def" unless $type;
137- $type_for{$col} = $type;
138- if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
139- push @nums, $col;
140- $is_numeric{$col} = 1;
141- }
142- if ( $def !~ m/NOT NULL/ ) {
143- push @null, $col;
144- $is_nullable{$col} = 1;
145- }
146- $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
147- }
148-
149- my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable);
150-
151- my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/;
152-
153- return {
154- name => $name,
155- cols => \@cols,
156- col_posn => { map { $cols[$_] => $_ } 0..$#cols },
157- is_col => { map { $_ => 1 } @cols },
158- null_cols => \@null,
159- is_nullable => \%is_nullable,
160- is_autoinc => \%is_autoinc,
161- clustered_key => $clustered_key,
162- keys => $keys,
163- defs => \%def_for,
164- numeric_cols => \@nums,
165- is_numeric => \%is_numeric,
166- engine => $engine,
167- type_for => \%type_for,
168- charset => $charset,
169- };
170-}
171-
172-sub sort_indexes {
173- my ( $self, $tbl ) = @_;
174-
175- my @indexes
176- = sort {
177- (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
178- || ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} )
179- || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
180- || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
181- }
182- grep {
183- $tbl->{keys}->{$_}->{type} eq 'BTREE'
184- }
185- sort keys %{$tbl->{keys}};
186-
187- MKDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes));
188- return @indexes;
189-}
190-
191-sub find_best_index {
192- my ( $self, $tbl, $index ) = @_;
193- my $best;
194- if ( $index ) {
195- ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
196- }
197- if ( !$best ) {
198- if ( $index ) {
199- die "Index '$index' does not exist in table";
200- }
201- else {
202- ($best) = $self->sort_indexes($tbl);
203- }
204- }
205- MKDEBUG && _d('Best index found is', $best);
206- return $best;
207-}
208-
209-sub find_possible_keys {
210- my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
211- return () unless $where;
212- my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
213- . ' WHERE ' . $where;
214- MKDEBUG && _d($sql);
215- my $expl = $dbh->selectrow_hashref($sql);
216- $expl = { map { lc($_) => $expl->{$_} } keys %$expl };
217- if ( $expl->{possible_keys} ) {
218- MKDEBUG && _d('possible_keys =', $expl->{possible_keys});
219- my @candidates = split(',', $expl->{possible_keys});
220- my %possible = map { $_ => 1 } @candidates;
221- if ( $expl->{key} ) {
222- MKDEBUG && _d('MySQL chose', $expl->{key});
223- unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
224- MKDEBUG && _d('Before deduping:', join(', ', @candidates));
225- my %seen;
226- @candidates = grep { !$seen{$_}++ } @candidates;
227- }
228- MKDEBUG && _d('Final list:', join(', ', @candidates));
229- return @candidates;
230- }
231- else {
232- MKDEBUG && _d('No keys in possible_keys');
233- return ();
234- }
235-}
236-
237-sub check_table {
238- my ( $self, %args ) = @_;
239- my @required_args = qw(dbh db tbl);
240- foreach my $arg ( @required_args ) {
241- die "I need a $arg argument" unless $args{$arg};
242- }
243- my ($dbh, $db, $tbl) = @args{@required_args};
244- my $q = $self->{Quoter};
245- my $db_tbl = $q->quote($db, $tbl);
246- MKDEBUG && _d('Checking', $db_tbl);
247-
248- my $sql = "SHOW TABLES FROM " . $q->quote($db)
249- . ' LIKE ' . $q->literal_like($tbl);
250- MKDEBUG && _d($sql);
251- my $row;
252- eval {
253- $row = $dbh->selectrow_arrayref($sql);
254- };
255- if ( $EVAL_ERROR ) {
256- MKDEBUG && _d($EVAL_ERROR);
257- return 0;
258- }
259- if ( !$row->[0] || $row->[0] ne $tbl ) {
260- MKDEBUG && _d('Table does not exist');
261- return 0;
262- }
263-
264- MKDEBUG && _d('Table exists; no privs to check');
265- return 1 unless $args{all_privs};
266-
267- $sql = "SHOW FULL COLUMNS FROM $db_tbl";
268- MKDEBUG && _d($sql);
269- eval {
270- $row = $dbh->selectrow_hashref($sql);
271- };
272- if ( $EVAL_ERROR ) {
273- MKDEBUG && _d($EVAL_ERROR);
274- return 0;
275- }
276- if ( !scalar keys %$row ) {
277- MKDEBUG && _d('Table has no columns:', Dumper($row));
278- return 0;
279- }
280- my $privs = $row->{privileges} || $row->{Privileges};
281-
282- $sql = "DELETE FROM $db_tbl LIMIT 0";
283- MKDEBUG && _d($sql);
284- eval {
285- $dbh->do($sql);
286- };
287- my $can_delete = $EVAL_ERROR ? 0 : 1;
288-
289- MKDEBUG && _d('User privs on', $db_tbl, ':', $privs,
290- ($can_delete ? 'delete' : ''));
291-
292- if ( !($privs =~ m/select/ && $privs =~ m/insert/ && $privs =~ m/update/
293- && $can_delete) ) {
294- MKDEBUG && _d('User does not have all privs');
295- return 0;
296- }
297-
298- MKDEBUG && _d('User has all privs');
299- return 1;
300-}
301-
302-sub get_engine {
303- my ( $self, $ddl, $opts ) = @_;
304- my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
305- MKDEBUG && _d('Storage engine:', $engine);
306- return $engine || undef;
307-}
308-
309-sub get_keys {
310- my ( $self, $ddl, $opts, $is_nullable ) = @_;
311- my $engine = $self->get_engine($ddl);
312- my $keys = {};
313- my $clustered_key = undef;
314-
315- KEY:
316- foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
317-
318- next KEY if $key =~ m/FOREIGN/;
319-
320- my $key_ddl = $key;
321- MKDEBUG && _d('Parsed key:', $key_ddl);
322-
323- if ( $engine !~ m/MEMORY|HEAP/ ) {
324- $key =~ s/USING HASH/USING BTREE/;
325- }
326-
327- my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
328- my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
329- $type = $type || $special || 'BTREE';
330- if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000'
331- && $engine =~ m/HEAP|MEMORY/i )
332+ my ( $self, $dsn, $prev, $defaults ) = @_;
333+ if ( !$dsn ) {
334+ MKDEBUG && _d('No DSN to parse');
335+ return;
336+ }
337+ MKDEBUG && _d('Parsing', $dsn);
338+ $prev ||= {};
339+ $defaults ||= {};
340+ my %given_props;
341+ my %final_props;
342+ my $opts = $self->{opts};
343+
344+ foreach my $dsn_part ( split(/,/, $dsn) ) {
345+ if ( my ($prop_key, $prop_val) = $dsn_part =~ m/^(.)=(.*)$/ ) {
346+ $given_props{$prop_key} = $prop_val;
347+ }
348+ else {
349+ MKDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part);
350+ $given_props{h} = $dsn_part;
351+ }
352+ }
353+
354+ foreach my $key ( keys %$opts ) {
355+ MKDEBUG && _d('Finding value for', $key);
356+ $final_props{$key} = $given_props{$key};
357+ if ( !defined $final_props{$key}
358+ && defined $prev->{$key} && $opts->{$key}->{copy} )
359 {
360- $type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP
361- }
362-
363- my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
364- my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
365- my @cols;
366- my @col_prefixes;
367- foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) {
368- my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
369- push @cols, $name;
370- push @col_prefixes, $prefix;
371- }
372- $name =~ s/`//g;
373-
374- MKDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols));
375-
376- $keys->{$name} = {
377- name => $name,
378- type => $type,
379- colnames => $cols,
380- cols => \@cols,
381- col_prefixes => \@col_prefixes,
382- is_unique => $unique,
383- is_nullable => scalar(grep { $is_nullable->{$_} } @cols),
384- is_col => { map { $_ => 1 } @cols },
385- ddl => $key_ddl,
386- };
387-
388- if ( $engine =~ m/InnoDB/i && !$clustered_key ) {
389- my $this_key = $keys->{$name};
390- if ( $this_key->{name} eq 'PRIMARY' ) {
391- $clustered_key = 'PRIMARY';
392- }
393- elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) {
394- $clustered_key = $this_key->{name};
395- }
396- MKDEBUG && $clustered_key && _d('This key is the clustered key');
397- }
398- }
399-
400- return $keys, $clustered_key;
401-}
402-
403-sub get_fks {
404- my ( $self, $ddl, $opts ) = @_;
405- my $q = $self->{Quoter};
406- my $fks = {};
407-
408- foreach my $fk (
409- $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg )
410- {
411- my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/;
412- my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/;
413- my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/;
414-
415- my ($db, $tbl) = $q->split_unquote($parent, $opts->{database});
416- my %parent_tbl = (tbl => $tbl);
417- $parent_tbl{db} = $db if $db;
418-
419- if ( $parent !~ m/\./ && $opts->{database} ) {
420- $parent = $q->quote($opts->{database}) . ".$parent";
421- }
422-
423- $fks->{$name} = {
424- name => $name,
425- colnames => $cols,
426- cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ],
427- parent_tbl => \%parent_tbl,
428- parent_tblname => $parent,
429- parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ],
430- parent_colnames=> $parent_cols,
431- ddl => $fk,
432- };
433- }
434-
435- return $fks;
436-}
437-
438-sub remove_auto_increment {
439- my ( $self, $ddl ) = @_;
440- $ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m;
441- return $ddl;
442-}
443-
444-sub remove_secondary_indexes {
445- my ( $self, $ddl ) = @_;
446- my $sec_indexes_ddl;
447- my $tbl_struct = $self->parse($ddl);
448-
449- if ( ($tbl_struct->{engine} || '') =~ m/InnoDB/i ) {
450- my $clustered_key = $tbl_struct->{clustered_key};
451- $clustered_key ||= '';
452-
453- my @sec_indexes = map {
454- my $key_def = $_->{ddl};
455- $key_def =~ s/([\(\)])/\\$1/g;
456- $ddl =~ s/\s+$key_def//i;
457-
458- my $key_ddl = "ADD $_->{ddl}";
459- $key_ddl .= ',' unless $key_ddl =~ m/,$/;
460- $key_ddl;
461- }
462- grep { $_->{name} ne $clustered_key }
463- values %{$tbl_struct->{keys}};
464- MKDEBUG && _d('Secondary indexes:', Dumper(\@sec_indexes));
465-
466- if ( @sec_indexes ) {
467- $sec_indexes_ddl = join(' ', @sec_indexes);
468- $sec_indexes_ddl =~ s/,$//;
469- }
470-
471- $ddl =~ s/,(\n\) )/$1/s;
472+ $final_props{$key} = $prev->{$key};
473+ MKDEBUG && _d('Copying value for', $key, 'from previous DSN');
474+ }
475+ if ( !defined $final_props{$key} ) {
476+ $final_props{$key} = $defaults->{$key};
477+ MKDEBUG && _d('Copying value for', $key, 'from defaults');
478+ }
479+ }
480+
481+ foreach my $key ( keys %given_props ) {
482+ die "Unknown DSN option '$key' in '$dsn'. For more details, "
483+ . "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
484+ . "for complete documentation."
485+ unless exists $opts->{$key};
486+ }
487+ if ( (my $required = $self->prop('required')) ) {
488+ foreach my $key ( keys %$required ) {
489+ die "Missing required DSN option '$key' in '$dsn'. For more details, "
490+ . "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
491+ . "for complete documentation."
492+ unless $final_props{$key};
493+ }
494+ }
495+
496+ return \%final_props;
497+}
498+
499+sub parse_options {
500+ my ( $self, $o ) = @_;
501+ die 'I need an OptionParser object' unless ref $o eq 'OptionParser';
502+ my $dsn_string
503+ = join(',',
504+ map { "$_=".$o->get($_); }
505+ grep { $o->has($_) && $o->get($_) }
506+ keys %{$self->{opts}}
507+ );
508+ MKDEBUG && _d('DSN string made from options:', $dsn_string);
509+ return $self->parse($dsn_string);
510+}
511+
512+sub as_string {
513+ my ( $self, $dsn, $props ) = @_;
514+ return $dsn unless ref $dsn;
515+ my @keys = $props ? @$props : sort keys %$dsn;
516+ return join(',',
517+ map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
518+ grep {
519+ exists $self->{opts}->{$_}
520+ && exists $dsn->{$_}
521+ && defined $dsn->{$_}
522+ } @keys);
523+}
524+
525+sub usage {
526+ my ( $self ) = @_;
527+ my $usage
528+ = "DSN syntax is key=value[,key=value...] Allowable DSN keys:\n\n"
529+ . " KEY COPY MEANING\n"
530+ . " === ==== =============================================\n";
531+ my %opts = %{$self->{opts}};
532+ foreach my $key ( sort keys %opts ) {
533+ $usage .= " $key "
534+ . ($opts{$key}->{copy} ? 'yes ' : 'no ')
535+ . ($opts{$key}->{desc} || '[No description]')
536+ . "\n";
537+ }
538+ $usage .= "\n If the DSN is a bareword, the word is treated as the 'h' key.\n";
539+ return $usage;
540+}
541+
542+sub get_cxn_params {
543+ my ( $self, $info ) = @_;
544+ my $dsn;
545+ my %opts = %{$self->{opts}};
546+ my $driver = $self->prop('dbidriver') || '';
547+ if ( $driver eq 'Pg' ) {
548+ $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
549+ . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
550+ grep { defined $info->{$_} }
551+ qw(h P));
552 }
553 else {
554- MKDEBUG && _d('Not removing secondary indexes from',
555- $tbl_struct->{engine}, 'table');
556- }
557-
558- return $ddl, $sec_indexes_ddl, $tbl_struct;
559-}
560-
561-sub _d {
562- my ($package, undef, $line) = caller 0;
563- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
564- map { defined $_ ? $_ : 'undef' }
565- @_;
566- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
567-}
568-
569-1;
570-}
571-# ###########################################################################
572-# End TableParser package
573-# ###########################################################################
574-
575-# ###########################################################################
576-# TableChecksum package
577-# This package is a copy without comments from the original. The original
578-# with comments and its test file can be found in the Bazaar repository at,
579-# lib/TableChecksum.pm
580-# t/lib/TableChecksum.t
581-# See https://launchpad.net/percona-toolkit for more information.
582-# ###########################################################################
583-{
584-package TableChecksum;
585-
586-use strict;
587-use warnings FATAL => 'all';
588-use English qw(-no_match_vars);
589-use constant MKDEBUG => $ENV{MKDEBUG} || 0;
590-
591-use List::Util qw(max);
592-
593-our %ALGOS = (
594- CHECKSUM => { pref => 0, hash => 0 },
595- BIT_XOR => { pref => 2, hash => 1 },
596- ACCUM => { pref => 3, hash => 1 },
597-);
598-
599-sub new {
600- my ( $class, %args ) = @_;
601- foreach my $arg ( qw(Quoter VersionParser) ) {
602- die "I need a $arg argument" unless defined $args{$arg};
603- }
604- my $self = { %args };
605- return bless $self, $class;
606-}
607-
608-sub crc32 {
609- my ( $self, $string ) = @_;
610- my $poly = 0xEDB88320;
611- my $crc = 0xFFFFFFFF;
612- foreach my $char ( split(//, $string) ) {
613- my $comp = ($crc ^ ord($char)) & 0xFF;
614- for ( 1 .. 8 ) {
615- $comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1;
616- }
617- $crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp;
618- }
619- return $crc ^ 0xFFFFFFFF;
620-}
621-
622-sub get_crc_wid {
623- my ( $self, $dbh, $func ) = @_;
624- my $crc_wid = 16;
625- if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
626- eval {
627- my ($val) = $dbh->selectrow_array("SELECT $func('a')");
628- $crc_wid = max(16, length($val));
629- };
630- }
631- return $crc_wid;
632-}
633-
634-sub get_crc_type {
635- my ( $self, $dbh, $func ) = @_;
636- my $type = '';
637- my $length = 0;
638- my $sql = "SELECT $func('a')";
639- my $sth = $dbh->prepare($sql);
640- eval {
641- $sth->execute();
642- $type = $sth->{mysql_type_name}->[0];
643- $length = $sth->{mysql_length}->[0];
644- MKDEBUG && _d($sql, $type, $length);
645- if ( $type eq 'bigint' && $length < 20 ) {
646- $type = 'int';
647- }
648+ $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
649+ . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
650+ grep { defined $info->{$_} }
651+ qw(F h P S A))
652+ . ';mysql_read_default_group=client';
653+ }
654+ MKDEBUG && _d($dsn);
655+ return ($dsn, $info->{u}, $info->{p});
656+}
657+
658+sub fill_in_dsn {
659+ my ( $self, $dbh, $dsn ) = @_;
660+ my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name');
661+ my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()');
662+ $user =~ s/@.*//;
663+ $dsn->{h} ||= $vars->{hostname}->{Value};
664+ $dsn->{S} ||= $vars->{'socket'}->{Value};
665+ $dsn->{P} ||= $vars->{port}->{Value};
666+ $dsn->{u} ||= $user;
667+ $dsn->{D} ||= $db;
668+}
669+
670+sub get_dbh {
671+ my ( $self, $cxn_string, $user, $pass, $opts ) = @_;
672+ $opts ||= {};
673+ my $defaults = {
674+ AutoCommit => 0,
675+ RaiseError => 1,
676+ PrintError => 0,
677+ ShowErrorStatement => 1,
678+ mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0),
679 };
680- $sth->finish;
681- MKDEBUG && _d('crc_type:', $type, 'length:', $length);
682- return ($type, $length);
683-}
684-
685-sub best_algorithm {
686- my ( $self, %args ) = @_;
687- my ( $alg, $dbh ) = @args{ qw(algorithm dbh) };
688- my $vp = $self->{VersionParser};
689- my @choices = sort { $ALGOS{$a}->{pref} <=> $ALGOS{$b}->{pref} } keys %ALGOS;
690- die "Invalid checksum algorithm $alg"
691- if $alg && !$ALGOS{$alg};
692-
693- if (
694- $args{where} || $args{chunk} # CHECKSUM does whole table
695- || $args{replicate} # CHECKSUM can't do INSERT.. SELECT
696- || !$vp->version_ge($dbh, '4.1.1')) # CHECKSUM doesn't exist
697- {
698- MKDEBUG && _d('Cannot use CHECKSUM algorithm');
699- @choices = grep { $_ ne 'CHECKSUM' } @choices;
700- }
701-
702- if ( !$vp->version_ge($dbh, '4.1.1') ) {
703- MKDEBUG && _d('Cannot use BIT_XOR algorithm because MySQL < 4.1.1');
704- @choices = grep { $_ ne 'BIT_XOR' } @choices;
705- }
706-
707- if ( $alg && grep { $_ eq $alg } @choices ) {
708- MKDEBUG && _d('User requested', $alg, 'algorithm');
709- return $alg;
710- }
711-
712- if ( $args{count} && grep { $_ ne 'CHECKSUM' } @choices ) {
713- MKDEBUG && _d('Not using CHECKSUM algorithm because COUNT desired');
714- @choices = grep { $_ ne 'CHECKSUM' } @choices;
715- }
716-
717- MKDEBUG && _d('Algorithms, in order:', @choices);
718- return $choices[0];
719-}
720-
721-sub is_hash_algorithm {
722- my ( $self, $algorithm ) = @_;
723- return $ALGOS{$algorithm} && $ALGOS{$algorithm}->{hash};
724-}
725-
726-sub choose_hash_func {
727- my ( $self, %args ) = @_;
728- my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
729- if ( $args{function} ) {
730- unshift @funcs, $args{function};
731- }
732- my ($result, $error);
733- do {
734- my $func;
735+ @{$defaults}{ keys %$opts } = values %$opts;
736+
737+ if ( $opts->{mysql_use_result} ) {
738+ $defaults->{mysql_use_result} = 1;
739+ }
740+
741+ if ( !$have_dbi ) {
742+ die "Cannot connect to MySQL because the Perl DBI module is not "
743+ . "installed or not found. Run 'perl -MDBI' to see the directories "
744+ . "that Perl searches for DBI. If DBI is not installed, try:\n"
745+ . " Debian/Ubuntu apt-get install libdbi-perl\n"
746+ . " RHEL/CentOS yum install perl-DBI\n"
747+ . " OpenSolaris pgk install pkg:/SUNWpmdbi\n";
748+
749+ }
750+
751+ my $dbh;
752+ my $tries = 2;
753+ while ( !$dbh && $tries-- ) {
754+ MKDEBUG && _d($cxn_string, ' ', $user, ' ', $pass,
755+ join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults ));
756+
757 eval {
758- $func = shift(@funcs);
759- my $sql = "SELECT $func('test-string')";
760- MKDEBUG && _d($sql);
761- $args{dbh}->do($sql);
762- $result = $func;
763+ $dbh = DBI->connect($cxn_string, $user, $pass, $defaults);
764+
765+ if ( $cxn_string =~ m/mysql/i ) {
766+ my $sql;
767+
768+ $sql = 'SELECT @@SQL_MODE';
769+ MKDEBUG && _d($dbh, $sql);
770+ my ($sql_mode) = $dbh->selectrow_array($sql);
771+
772+ $sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1'
773+ . '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO'
774+ . ($sql_mode ? ",$sql_mode" : '')
775+ . '\'*/';
776+ MKDEBUG && _d($dbh, $sql);
777+ $dbh->do($sql);
778+
779+ if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) {
780+ $sql = "/*!40101 SET NAMES $charset*/";
781+ MKDEBUG && _d($dbh, ':', $sql);
782+ $dbh->do($sql);
783+ MKDEBUG && _d('Enabling charset for STDOUT');
784+ if ( $charset eq 'utf8' ) {
785+ binmode(STDOUT, ':utf8')
786+ or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
787+ }
788+ else {
789+ binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR";
790+ }
791+ }
792+
793+ if ( $self->prop('set-vars') ) {
794+ $sql = "SET " . $self->prop('set-vars');
795+ MKDEBUG && _d($dbh, ':', $sql);
796+ $dbh->do($sql);
797+ }
798+ }
799 };
800- if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
801- $error .= qq{$func cannot be used because "$1"\n};
802- MKDEBUG && _d($func, 'cannot be used because', $1);
803- }
804- } while ( @funcs && !$result );
805-
806- die $error unless $result;
807- MKDEBUG && _d('Chosen hash func:', $result);
808- return $result;
809-}
810-
811-sub optimize_xor {
812- my ( $self, %args ) = @_;
813- my ($dbh, $func) = @args{qw(dbh function)};
814-
815- die "$func never needs the BIT_XOR optimization"
816- if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
817-
818- my $opt_slice = 0;
819- my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
820- my $sliced = '';
821- my $start = 1;
822- my $crc_wid = length($unsliced) < 16 ? 16 : length($unsliced);
823-
824- do { # Try different positions till sliced result equals non-sliced.
825- MKDEBUG && _d('Trying slice', $opt_slice);
826- $dbh->do('SET @crc := "", @cnt := 0');
827- my $slices = $self->make_xor_slices(
828- query => "\@crc := $func('a')",
829- crc_wid => $crc_wid,
830- opt_slice => $opt_slice,
831- );
832-
833- my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
834- $sliced = ($dbh->selectrow_array($sql))[0];
835- if ( $sliced ne $unsliced ) {
836- MKDEBUG && _d('Slice', $opt_slice, 'does not work');
837- $start += 16;
838- ++$opt_slice;
839- }
840- } while ( $start < $crc_wid && $sliced ne $unsliced );
841-
842- if ( $sliced eq $unsliced ) {
843- MKDEBUG && _d('Slice', $opt_slice, 'works');
844- return $opt_slice;
845- }
846- else {
847- MKDEBUG && _d('No slice works');
848- return undef;
849- }
850-}
851-
852-sub make_xor_slices {
853- my ( $self, %args ) = @_;
854- foreach my $arg ( qw(query crc_wid) ) {
855- die "I need a $arg argument" unless defined $args{$arg};
856- }
857- my ( $query, $crc_wid, $opt_slice ) = @args{qw(query crc_wid opt_slice)};
858-
859- my @slices;
860- for ( my $start = 1; $start <= $crc_wid; $start += 16 ) {
861- my $len = $crc_wid - $start + 1;
862- if ( $len > 16 ) {
863- $len = 16;
864- }
865- push @slices,
866- "LPAD(CONV(BIT_XOR("
867- . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
868- . ", 10, 16), $len, '0')";
869- }
870-
871- if ( defined $opt_slice && $opt_slice < @slices ) {
872- $slices[$opt_slice] =~ s/\@crc/\@crc := $query/;
873- }
874- else {
875- map { s/\@crc/$query/ } @slices;
876- }
877-
878- return join(', ', @slices);
879-}
880-
881-sub make_row_checksum {
882- my ( $self, %args ) = @_;
883- my ( $tbl_struct, $func ) = @args{ qw(tbl_struct function) };
884- my $q = $self->{Quoter};
885-
886- my $sep = $args{sep} || '#';
887- $sep =~ s/'//g;
888- $sep ||= '#';
889-
890- my $ignorecols = $args{ignorecols} || {};
891-
892- my %cols = map { lc($_) => 1 }
893- grep { !exists $ignorecols->{$_} }
894- ($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}});
895- my %seen;
896- my @cols =
897- map {
898- my $type = $tbl_struct->{type_for}->{$_};
899- my $result = $q->quote($_);
900- if ( $type eq 'timestamp' ) {
901- $result .= ' + 0';
902- }
903- elsif ( $args{float_precision} && $type =~ m/float|double/ ) {
904- $result = "ROUND($result, $args{float_precision})";
905- }
906- elsif ( $args{trim} && $type =~ m/varchar/ ) {
907- $result = "TRIM($result)";
908- }
909- $result;
910- }
911- grep {
912- $cols{$_} && !$seen{$_}++
913- }
914- @{$tbl_struct->{cols}};
915-
916- my $query;
917- if ( !$args{no_cols} ) {
918- $query = join(', ',
919- map {
920- my $col = $_;
921- if ( $col =~ m/\+ 0/ ) {
922- my ($real_col) = /^(\S+)/;
923- $col .= " AS $real_col";
924- }
925- elsif ( $col =~ m/TRIM/ ) {
926- my ($real_col) = m/TRIM\(([^\)]+)\)/;
927- $col .= " AS $real_col";
928- }
929- $col;
930- } @cols)
931- . ', ';
932- }
933-
934- if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
935- my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}};
936- if ( @nulls ) {
937- my $bitmap = "CONCAT("
938- . join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls)
939- . ")";
940- push @cols, $bitmap;
941- }
942-
943- $query .= @cols > 1
944- ? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))'
945- : "$func($cols[0])";
946- }
947- else {
948- my $fnv_func = uc $func;
949- $query .= "$fnv_func(" . join(', ', @cols) . ')';
950- }
951-
952- return $query;
953-}
954-
955-sub make_checksum_query {
956- my ( $self, %args ) = @_;
957- my @required_args = qw(db tbl tbl_struct algorithm crc_wid crc_type);
958- foreach my $arg( @required_args ) {
959- die "I need a $arg argument" unless $args{$arg};
960- }
961- my ( $db, $tbl, $tbl_struct, $algorithm,
962- $crc_wid, $crc_type) = @args{@required_args};
963- my $func = $args{function};
964- my $q = $self->{Quoter};
965- my $result;
966-
967- die "Invalid or missing checksum algorithm"
968- unless $algorithm && $ALGOS{$algorithm};
969-
970- if ( $algorithm eq 'CHECKSUM' ) {
971- return "CHECKSUM TABLE " . $q->quote($db, $tbl);
972- }
973-
974- my $expr = $self->make_row_checksum(%args, no_cols=>1);
975-
976- if ( $algorithm eq 'BIT_XOR' ) {
977- if ( $crc_type =~ m/int$/ ) {
978- $result = "COALESCE(LOWER(CONV(BIT_XOR(CAST($expr AS UNSIGNED)), 10, 16)), 0) AS crc ";
979- }
980- else {
981- my $slices = $self->make_xor_slices( query => $expr, %args );
982- $result = "COALESCE(LOWER(CONCAT($slices)), 0) AS crc ";
983- }
984- }
985- else {
986- if ( $crc_type =~ m/int$/ ) {
987- $result = "COALESCE(RIGHT(MAX("
988- . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
989- . "CONV(CAST($func(CONCAT(\@crc, $expr)) AS UNSIGNED), 10, 16))"
990- . "), $crc_wid), 0) AS crc ";
991- }
992- else {
993- $result = "COALESCE(RIGHT(MAX("
994- . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
995- . "$func(CONCAT(\@crc, $expr)))"
996- . "), $crc_wid), 0) AS crc ";
997- }
998- }
999- if ( $args{replicate} ) {
1000- $result = "REPLACE /*PROGRESS_COMMENT*/ INTO $args{replicate} "
1001- . "(db, tbl, chunk, boundaries, this_cnt, this_crc) "
1002- . "SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, $result";
1003- }
1004- else {
1005- $result = "SELECT "
1006- . ($args{buffer} ? 'SQL_BUFFER_RESULT ' : '')
1007- . "/*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, $result";
1008- }
1009- return $result . "FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/";
1010-}
1011-
1012-sub find_replication_differences {
1013- my ( $self, $dbh, $table ) = @_;
1014-
1015- (my $sql = <<" EOF") =~ s/\s+/ /gm;
1016- SELECT db, tbl, chunk, boundaries,
1017- COALESCE(this_cnt-master_cnt, 0) AS cnt_diff,
1018- COALESCE(
1019- this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc),
1020- 0
1021- ) AS crc_diff,
1022- this_cnt, master_cnt, this_crc, master_crc
1023- FROM $table
1024- WHERE master_cnt <> this_cnt OR master_crc <> this_crc
1025- OR ISNULL(master_crc) <> ISNULL(this_crc)
1026- EOF
1027-
1028- MKDEBUG && _d($sql);
1029- my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} });
1030- return @$diffs;
1031+ if ( !$dbh && $EVAL_ERROR ) {
1032+ MKDEBUG && _d($EVAL_ERROR);
1033+ if ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) {
1034+ MKDEBUG && _d('Going to try again without utf8 support');
1035+ delete $defaults->{mysql_enable_utf8};
1036+ }
1037+ elsif ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) {
1038+ die "Cannot connect to MySQL because the Perl DBD::mysql module is "
1039+ . "not installed or not found. Run 'perl -MDBD::mysql' to see "
1040+ . "the directories that Perl searches for DBD::mysql. If "
1041+ . "DBD::mysql is not installed, try:\n"
1042+ . " Debian/Ubuntu apt-get install libdbd-mysql-perl\n"
1043+ . " RHEL/CentOS yum install perl-DBD-MySQL\n"
1044+ . " OpenSolaris pgk install pkg:/SUNWapu13dbd-mysql\n";
1045+ }
1046+ if ( !$tries ) {
1047+ die $EVAL_ERROR;
1048+ }
1049+ }
1050+ }
1051+
1052+ MKDEBUG && _d('DBH info: ',
1053+ $dbh,
1054+ Dumper($dbh->selectrow_hashref(
1055+ 'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')),
1056+ 'Connection info:', $dbh->{mysql_hostinfo},
1057+ 'Character set info:', Dumper($dbh->selectall_arrayref(
1058+ 'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})),
1059+ '$DBD::mysql::VERSION:', $DBD::mysql::VERSION,
1060+ '$DBI::VERSION:', $DBI::VERSION,
1061+ );
1062+
1063+ return $dbh;
1064+}
1065+
1066+sub get_hostname {
1067+ my ( $self, $dbh ) = @_;
1068+ if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) {
1069+ return $host;
1070+ }
1071+ my ( $hostname, $one ) = $dbh->selectrow_array(
1072+ 'SELECT /*!50038 @@hostname, */ 1');
1073+ return $hostname;
1074+}
1075+
1076+sub disconnect {
1077+ my ( $self, $dbh ) = @_;
1078+ MKDEBUG && $self->print_active_handles($dbh);
1079+ $dbh->disconnect;
1080+}
1081+
1082+sub print_active_handles {
1083+ my ( $self, $thing, $level ) = @_;
1084+ $level ||= 0;
1085+ printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level,
1086+ $thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : ''))
1087+ or die "Cannot print: $OS_ERROR";
1088+ foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) {
1089+ $self->print_active_handles( $handle, $level + 1 );
1090+ }
1091+}
1092+
1093+sub copy {
1094+ my ( $self, $dsn_1, $dsn_2, %args ) = @_;
1095+ die 'I need a dsn_1 argument' unless $dsn_1;
1096+ die 'I need a dsn_2 argument' unless $dsn_2;
1097+ my %new_dsn = map {
1098+ my $key = $_;
1099+ my $val;
1100+ if ( $args{overwrite} ) {
1101+ $val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key};
1102+ }
1103+ else {
1104+ $val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key};
1105+ }
1106+ $key => $val;
1107+ } keys %{$self->{opts}};
1108+ return \%new_dsn;
1109 }
1110
1111 sub _d {
1112@@ -789,7 +363,7 @@
1113 1;
1114 }
1115 # ###########################################################################
1116-# End TableChecksum package
1117+# End DSNParser package
1118 # ###########################################################################
1119
1120 # ###########################################################################
1121@@ -1743,7 +1317,7 @@
1122 $opt->{value} = ($pre || '') . $num;
1123 }
1124 else {
1125- $self->save_error("Invalid size for --$opt->{long}");
1126+ $self->save_error("Invalid size for --$opt->{long}: $val");
1127 }
1128 return;
1129 }
1130@@ -1818,345 +1392,129 @@
1131 # ###########################################################################
1132
1133 # ###########################################################################
1134-# DSNParser package
1135+# Cxn package
1136 # This package is a copy without comments from the original. The original
1137 # with comments and its test file can be found in the Bazaar repository at,
1138-# lib/DSNParser.pm
1139-# t/lib/DSNParser.t
1140+# lib/Cxn.pm
1141+# t/lib/Cxn.t
1142 # See https://launchpad.net/percona-toolkit for more information.
1143 # ###########################################################################
1144 {
1145-package DSNParser;
1146+package Cxn;
1147
1148 use strict;
1149 use warnings FATAL => 'all';
1150 use English qw(-no_match_vars);
1151 use constant MKDEBUG => $ENV{MKDEBUG} || 0;
1152
1153-use Data::Dumper;
1154-$Data::Dumper::Indent = 0;
1155-$Data::Dumper::Quotekeys = 0;
1156-
1157-eval {
1158- require DBI;
1159-};
1160-my $have_dbi = $EVAL_ERROR ? 0 : 1;
1161+use constant PERCONA_TOOLKIT_TEST_USE_DSN_NAMES => $ENV{PERCONA_TOOLKIT_TEST_USE_DSN_NAMES} || 0;
1162
1163 sub new {
1164 my ( $class, %args ) = @_;
1165- foreach my $arg ( qw(opts) ) {
1166+ my @required_args = qw(DSNParser OptionParser);
1167+ foreach my $arg ( @required_args ) {
1168 die "I need a $arg argument" unless $args{$arg};
1169- }
1170+ };
1171+ my ($dp, $o) = @args{@required_args};
1172+
1173+ my $dsn_defaults = $dp->parse_options($o);
1174+ my $prev_dsn = $args{prev_dsn};
1175+ my $dsn = $args{dsn};
1176+ if ( !$dsn ) {
1177+ $args{dsn_string} ||= 'h=' . ($dsn_defaults->{h} || 'localhost');
1178+
1179+ $dsn = $dp->parse(
1180+ $args{dsn_string}, $prev_dsn, $dsn_defaults);
1181+ }
1182+ elsif ( $prev_dsn ) {
1183+ $dsn = $dp->copy($prev_dsn, $dsn);
1184+ }
1185+
1186 my $self = {
1187- opts => {} # h, P, u, etc. Should come from DSN OPTIONS section in POD.
1188+ dsn => $dsn,
1189+ dbh => $args{dbh},
1190+ dsn_name => $dp->as_string($dsn, [qw(h P S)]),
1191+ hostname => '',
1192+ set => $args{set},
1193+ dbh_set => 0,
1194+ OptionParser => $o,
1195+ DSNParser => $dp,
1196 };
1197- foreach my $opt ( @{$args{opts}} ) {
1198- if ( !$opt->{key} || !$opt->{desc} ) {
1199- die "Invalid DSN option: ", Dumper($opt);
1200- }
1201- MKDEBUG && _d('DSN option:',
1202- join(', ',
1203- map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') }
1204- keys %$opt
1205- )
1206- );
1207- $self->{opts}->{$opt->{key}} = {
1208- dsn => $opt->{dsn},
1209- desc => $opt->{desc},
1210- copy => $opt->{copy} || 0,
1211- };
1212- }
1213+
1214 return bless $self, $class;
1215 }
1216
1217-sub prop {
1218- my ( $self, $prop, $value ) = @_;
1219- if ( @_ > 2 ) {
1220- MKDEBUG && _d('Setting', $prop, 'property');
1221- $self->{$prop} = $value;
1222- }
1223- return $self->{$prop};
1224-}
1225-
1226-sub parse {
1227- my ( $self, $dsn, $prev, $defaults ) = @_;
1228- if ( !$dsn ) {
1229- MKDEBUG && _d('No DSN to parse');
1230- return;
1231- }
1232- MKDEBUG && _d('Parsing', $dsn);
1233- $prev ||= {};
1234- $defaults ||= {};
1235- my %given_props;
1236- my %final_props;
1237- my $opts = $self->{opts};
1238-
1239- foreach my $dsn_part ( split(/,/, $dsn) ) {
1240- if ( my ($prop_key, $prop_val) = $dsn_part =~ m/^(.)=(.*)$/ ) {
1241- $given_props{$prop_key} = $prop_val;
1242- }
1243- else {
1244- MKDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part);
1245- $given_props{h} = $dsn_part;
1246- }
1247- }
1248-
1249- foreach my $key ( keys %$opts ) {
1250- MKDEBUG && _d('Finding value for', $key);
1251- $final_props{$key} = $given_props{$key};
1252- if ( !defined $final_props{$key}
1253- && defined $prev->{$key} && $opts->{$key}->{copy} )
1254- {
1255- $final_props{$key} = $prev->{$key};
1256- MKDEBUG && _d('Copying value for', $key, 'from previous DSN');
1257- }
1258- if ( !defined $final_props{$key} ) {
1259- $final_props{$key} = $defaults->{$key};
1260- MKDEBUG && _d('Copying value for', $key, 'from defaults');
1261- }
1262- }
1263-
1264- foreach my $key ( keys %given_props ) {
1265- die "Unknown DSN option '$key' in '$dsn'. For more details, "
1266- . "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
1267- . "for complete documentation."
1268- unless exists $opts->{$key};
1269- }
1270- if ( (my $required = $self->prop('required')) ) {
1271- foreach my $key ( keys %$required ) {
1272- die "Missing required DSN option '$key' in '$dsn'. For more details, "
1273- . "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
1274- . "for complete documentation."
1275- unless $final_props{$key};
1276- }
1277- }
1278-
1279- return \%final_props;
1280-}
1281-
1282-sub parse_options {
1283- my ( $self, $o ) = @_;
1284- die 'I need an OptionParser object' unless ref $o eq 'OptionParser';
1285- my $dsn_string
1286- = join(',',
1287- map { "$_=".$o->get($_); }
1288- grep { $o->has($_) && $o->get($_) }
1289- keys %{$self->{opts}}
1290- );
1291- MKDEBUG && _d('DSN string made from options:', $dsn_string);
1292- return $self->parse($dsn_string);
1293-}
1294-
1295-sub as_string {
1296- my ( $self, $dsn, $props ) = @_;
1297- return $dsn unless ref $dsn;
1298- my %allowed = $props ? map { $_=>1 } @$props : ();
1299- return join(',',
1300- map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
1301- grep { defined $dsn->{$_} && $self->{opts}->{$_} }
1302- grep { !$props || $allowed{$_} }
1303- sort keys %$dsn );
1304-}
1305-
1306-sub usage {
1307+sub connect {
1308 my ( $self ) = @_;
1309- my $usage
1310- = "DSN syntax is key=value[,key=value...] Allowable DSN keys:\n\n"
1311- . " KEY COPY MEANING\n"
1312- . " === ==== =============================================\n";
1313- my %opts = %{$self->{opts}};
1314- foreach my $key ( sort keys %opts ) {
1315- $usage .= " $key "
1316- . ($opts{$key}->{copy} ? 'yes ' : 'no ')
1317- . ($opts{$key}->{desc} || '[No description]')
1318- . "\n";
1319- }
1320- $usage .= "\n If the DSN is a bareword, the word is treated as the 'h' key.\n";
1321- return $usage;
1322-}
1323-
1324-sub get_cxn_params {
1325- my ( $self, $info ) = @_;
1326- my $dsn;
1327- my %opts = %{$self->{opts}};
1328- my $driver = $self->prop('dbidriver') || '';
1329- if ( $driver eq 'Pg' ) {
1330- $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
1331- . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
1332- grep { defined $info->{$_} }
1333- qw(h P));
1334- }
1335- else {
1336- $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
1337- . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
1338- grep { defined $info->{$_} }
1339- qw(F h P S A))
1340- . ';mysql_read_default_group=client';
1341- }
1342- MKDEBUG && _d($dsn);
1343- return ($dsn, $info->{u}, $info->{p});
1344-}
1345-
1346-sub fill_in_dsn {
1347- my ( $self, $dbh, $dsn ) = @_;
1348- my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name');
1349- my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()');
1350- $user =~ s/@.*//;
1351- $dsn->{h} ||= $vars->{hostname}->{Value};
1352- $dsn->{S} ||= $vars->{'socket'}->{Value};
1353- $dsn->{P} ||= $vars->{port}->{Value};
1354- $dsn->{u} ||= $user;
1355- $dsn->{D} ||= $db;
1356-}
1357-
1358-sub get_dbh {
1359- my ( $self, $cxn_string, $user, $pass, $opts ) = @_;
1360- $opts ||= {};
1361- my $defaults = {
1362- AutoCommit => 0,
1363- RaiseError => 1,
1364- PrintError => 0,
1365- ShowErrorStatement => 1,
1366- mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0),
1367- };
1368- @{$defaults}{ keys %$opts } = values %$opts;
1369-
1370- if ( $opts->{mysql_use_result} ) {
1371- $defaults->{mysql_use_result} = 1;
1372- }
1373-
1374- if ( !$have_dbi ) {
1375- die "Cannot connect to MySQL because the Perl DBI module is not "
1376- . "installed or not found. Run 'perl -MDBI' to see the directories "
1377- . "that Perl searches for DBI. If DBI is not installed, try:\n"
1378- . " Debian/Ubuntu apt-get install libdbi-perl\n"
1379- . " RHEL/CentOS yum install perl-DBI\n"
1380- . " OpenSolaris pgk install pkg:/SUNWpmdbi\n";
1381-
1382- }
1383-
1384- my $dbh;
1385- my $tries = 2;
1386- while ( !$dbh && $tries-- ) {
1387- MKDEBUG && _d($cxn_string, ' ', $user, ' ', $pass,
1388- join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults ));
1389-
1390- eval {
1391- $dbh = DBI->connect($cxn_string, $user, $pass, $defaults);
1392-
1393- if ( $cxn_string =~ m/mysql/i ) {
1394- my $sql;
1395-
1396- $sql = 'SELECT @@SQL_MODE';
1397- MKDEBUG && _d($dbh, $sql);
1398- my ($sql_mode) = $dbh->selectrow_array($sql);
1399-
1400- $sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1'
1401- . '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO'
1402- . ($sql_mode ? ",$sql_mode" : '')
1403- . '\'*/';
1404- MKDEBUG && _d($dbh, $sql);
1405- $dbh->do($sql);
1406-
1407- if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) {
1408- $sql = "/*!40101 SET NAMES $charset*/";
1409- MKDEBUG && _d($dbh, ':', $sql);
1410- $dbh->do($sql);
1411- MKDEBUG && _d('Enabling charset for STDOUT');
1412- if ( $charset eq 'utf8' ) {
1413- binmode(STDOUT, ':utf8')
1414- or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
1415- }
1416- else {
1417- binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR";
1418- }
1419- }
1420-
1421- if ( $self->prop('set-vars') ) {
1422- $sql = "SET " . $self->prop('set-vars');
1423- MKDEBUG && _d($dbh, ':', $sql);
1424- $dbh->do($sql);
1425- }
1426- }
1427- };
1428- if ( !$dbh && $EVAL_ERROR ) {
1429- MKDEBUG && _d($EVAL_ERROR);
1430- if ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) {
1431- MKDEBUG && _d('Going to try again without utf8 support');
1432- delete $defaults->{mysql_enable_utf8};
1433- }
1434- elsif ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) {
1435- die "Cannot connect to MySQL because the Perl DBD::mysql module is "
1436- . "not installed or not found. Run 'perl -MDBD::mysql' to see "
1437- . "the directories that Perl searches for DBD::mysql. If "
1438- . "DBD::mysql is not installed, try:\n"
1439- . " Debian/Ubuntu apt-get install libdbd-mysql-perl\n"
1440- . " RHEL/CentOS yum install perl-DBD-MySQL\n"
1441- . " OpenSolaris pgk install pkg:/SUNWapu13dbd-mysql\n";
1442- }
1443- if ( !$tries ) {
1444- die $EVAL_ERROR;
1445- }
1446+ my $dsn = $self->{dsn};
1447+ my $dp = $self->{DSNParser};
1448+ my $o = $self->{OptionParser};
1449+
1450+ my $dbh = $self->{dbh};
1451+ if ( !$dbh || !$dbh->ping() ) {
1452+ if ( $o->get('ask-pass') && !$self->{asked_for_pass} ) {
1453+ $dsn->{p} = OptionParser::prompt_noecho("Enter MySQL password: ");
1454+ $self->{asked_for_pass} = 1;
1455 }
1456- }
1457-
1458- MKDEBUG && _d('DBH info: ',
1459- $dbh,
1460- Dumper($dbh->selectrow_hashref(
1461- 'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')),
1462- 'Connection info:', $dbh->{mysql_hostinfo},
1463- 'Character set info:', Dumper($dbh->selectall_arrayref(
1464- 'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})),
1465- '$DBD::mysql::VERSION:', $DBD::mysql::VERSION,
1466- '$DBI::VERSION:', $DBI::VERSION,
1467- );
1468-
1469+ $dbh = $dp->get_dbh($dp->get_cxn_params($dsn), { AutoCommit => 1 });
1470+ }
1471+ MKDEBUG && _d($dbh, 'Connected dbh to', $self->{name});
1472+
1473+ return $self->set_dbh($dbh);
1474+}
1475+
1476+sub set_dbh {
1477+ my ($self, $dbh) = @_;
1478+
1479+ if ( $self->{dbh} && $self->{dbh} == $dbh && $self->{dbh_set} ) {
1480+ MKDEBUG && _d($dbh, 'Already set dbh');
1481+ return $dbh;
1482+ }
1483+
1484+ MKDEBUG && _d($dbh, 'Setting dbh');
1485+
1486+ $dbh->{FetchHashKeyName} = 'NAME_lc';
1487+
1488+ my $sql = 'SELECT @@hostname, @@server_id';
1489+ MKDEBUG && _d($dbh, $sql);
1490+ my ($hostname, $server_id) = $dbh->selectrow_array($sql);
1491+ MKDEBUG && _d($dbh, 'hostname:', $hostname, $server_id);
1492+ if ( $hostname ) {
1493+ $self->{hostname} = $hostname;
1494+ }
1495+
1496+ if ( my $set = $self->{set}) {
1497+ $set->($dbh);
1498+ }
1499+
1500+ $self->{dbh} = $dbh;
1501+ $self->{dbh_set} = 1;
1502 return $dbh;
1503 }
1504
1505-sub get_hostname {
1506- my ( $self, $dbh ) = @_;
1507- if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) {
1508- return $host;
1509- }
1510- my ( $hostname, $one ) = $dbh->selectrow_array(
1511- 'SELECT /*!50038 @@hostname, */ 1');
1512- return $hostname;
1513-}
1514-
1515-sub disconnect {
1516- my ( $self, $dbh ) = @_;
1517- MKDEBUG && $self->print_active_handles($dbh);
1518- $dbh->disconnect;
1519-}
1520-
1521-sub print_active_handles {
1522- my ( $self, $thing, $level ) = @_;
1523- $level ||= 0;
1524- printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level,
1525- $thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : ''))
1526- or die "Cannot print: $OS_ERROR";
1527- foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) {
1528- $self->print_active_handles( $handle, $level + 1 );
1529- }
1530-}
1531-
1532-sub copy {
1533- my ( $self, $dsn_1, $dsn_2, %args ) = @_;
1534- die 'I need a dsn_1 argument' unless $dsn_1;
1535- die 'I need a dsn_2 argument' unless $dsn_2;
1536- my %new_dsn = map {
1537- my $key = $_;
1538- my $val;
1539- if ( $args{overwrite} ) {
1540- $val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key};
1541- }
1542- else {
1543- $val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key};
1544- }
1545- $key => $val;
1546- } keys %{$self->{opts}};
1547- return \%new_dsn;
1548+sub dbh {
1549+ my ($self) = @_;
1550+ return $self->{dbh};
1551+}
1552+
1553+sub dsn {
1554+ my ($self) = @_;
1555+ return $self->{dsn};
1556+}
1557+
1558+sub name {
1559+ my ($self) = @_;
1560+ return $self->{dsn_name} if PERCONA_TOOLKIT_TEST_USE_DSN_NAMES;
1561+ return $self->{hostname} || $self->{dsn_name} || 'unknown host';
1562+}
1563+
1564+sub DESTROY {
1565+ my ($self) = @_;
1566+ if ( $self->{dbh} ) {
1567+ MKDEBUG && _d('Disconnecting dbh', $self->{dbh}, $self->{name});
1568+ $self->{dbh}->disconnect();
1569+ }
1570+ return;
1571 }
1572
1573 sub _d {
1574@@ -2170,7 +1528,126 @@
1575 1;
1576 }
1577 # ###########################################################################
1578-# End DSNParser package
1579+# End Cxn package
1580+# ###########################################################################
1581+
1582+# ###########################################################################
1583+# Quoter package
1584+# This package is a copy without comments from the original. The original
1585+# with comments and its test file can be found in the Bazaar repository at,
1586+# lib/Quoter.pm
1587+# t/lib/Quoter.t
1588+# See https://launchpad.net/percona-toolkit for more information.
1589+# ###########################################################################
1590+{
1591+package Quoter;
1592+
1593+use strict;
1594+use warnings FATAL => 'all';
1595+use English qw(-no_match_vars);
1596+use constant MKDEBUG => $ENV{MKDEBUG} || 0;
1597+
1598+sub new {
1599+ my ( $class, %args ) = @_;
1600+ return bless {}, $class;
1601+}
1602+
1603+sub quote {
1604+ my ( $self, @vals ) = @_;
1605+ foreach my $val ( @vals ) {
1606+ $val =~ s/`/``/g;
1607+ }
1608+ return join('.', map { '`' . $_ . '`' } @vals);
1609+}
1610+
1611+sub quote_val {
1612+ my ( $self, $val ) = @_;
1613+
1614+ return 'NULL' unless defined $val; # undef = NULL
1615+ return "''" if $val eq ''; # blank string = ''
1616+ return $val if $val =~ m/^0x[0-9a-fA-F]+$/; # hex data
1617+
1618+ $val =~ s/(['\\])/\\$1/g;
1619+ return "'$val'";
1620+}
1621+
1622+sub split_unquote {
1623+ my ( $self, $db_tbl, $default_db ) = @_;
1624+ $db_tbl =~ s/`//g;
1625+ my ( $db, $tbl ) = split(/[.]/, $db_tbl);
1626+ if ( !$tbl ) {
1627+ $tbl = $db;
1628+ $db = $default_db;
1629+ }
1630+ return ($db, $tbl);
1631+}
1632+
1633+sub literal_like {
1634+ my ( $self, $like ) = @_;
1635+ return unless $like;
1636+ $like =~ s/([%_])/\\$1/g;
1637+ return "'$like'";
1638+}
1639+
1640+sub join_quote {
1641+ my ( $self, $default_db, $db_tbl ) = @_;
1642+ return unless $db_tbl;
1643+ my ($db, $tbl) = split(/[.]/, $db_tbl);
1644+ if ( !$tbl ) {
1645+ $tbl = $db;
1646+ $db = $default_db;
1647+ }
1648+ $db = "`$db`" if $db && $db !~ m/^`/;
1649+ $tbl = "`$tbl`" if $tbl && $tbl !~ m/^`/;
1650+ return $db ? "$db.$tbl" : $tbl;
1651+}
1652+
1653+sub serialize_list {
1654+ my ( $self, @args ) = @_;
1655+ return unless @args;
1656+
1657+ return $args[0] if @args == 1 && !defined $args[0];
1658+
1659+ die "Cannot serialize multiple values with undef/NULL"
1660+ if grep { !defined $_ } @args;
1661+
1662+ return join ',', map { quotemeta } @args;
1663+}
1664+
1665+sub deserialize_list {
1666+ my ( $self, $string ) = @_;
1667+ return $string unless defined $string;
1668+ my @escaped_parts = $string =~ /
1669+ \G # Start of string, or end of previous match.
1670+ ( # Each of these is an element in the original list.
1671+ [^\\,]* # Anything not a backslash or a comma
1672+ (?: # When we get here, we found one of the above.
1673+ \\. # A backslash followed by something so we can continue
1674+ [^\\,]* # Same as above.
1675+ )* # Repeat zero of more times.
1676+ )
1677+ , # Comma dividing elements
1678+ /sxgc;
1679+
1680+ push @escaped_parts, pos($string) ? substr( $string, pos($string) ) : $string;
1681+
1682+ my @unescaped_parts = map {
1683+ my $part = $_;
1684+
1685+ my $char_class = utf8::is_utf8($part) # If it's a UTF-8 string,
1686+ ? qr/(?=\p{ASCII})\W/ # We only care about non-word
1687+ : qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise,
1688+ $part =~ s/\\($char_class)/$1/g;
1689+ $part;
1690+ } @escaped_parts;
1691+
1692+ return @unescaped_parts;
1693+}
1694+
1695+1;
1696+}
1697+# ###########################################################################
1698+# End Quoter package
1699 # ###########################################################################
1700
1701 # ###########################################################################
1702@@ -2257,294 +1734,417 @@
1703 # ###########################################################################
1704
1705 # ###########################################################################
1706-# MySQLDump package
1707+# TableParser package
1708 # This package is a copy without comments from the original. The original
1709 # with comments and its test file can be found in the Bazaar repository at,
1710-# lib/MySQLDump.pm
1711-# t/lib/MySQLDump.t
1712+# lib/TableParser.pm
1713+# t/lib/TableParser.t
1714 # See https://launchpad.net/percona-toolkit for more information.
1715 # ###########################################################################
1716 {
1717-package MySQLDump;
1718+package TableParser;
1719
1720 use strict;
1721 use warnings FATAL => 'all';
1722 use English qw(-no_match_vars);
1723 use constant MKDEBUG => $ENV{MKDEBUG} || 0;
1724
1725-( our $before = <<'EOF') =~ s/^ //gm;
1726- /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
1727- /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
1728- /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
1729- /*!40101 SET NAMES utf8 */;
1730- /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
1731- /*!40103 SET TIME_ZONE='+00:00' */;
1732- /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
1733- /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
1734- /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
1735- /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
1736-EOF
1737-
1738-( our $after = <<'EOF') =~ s/^ //gm;
1739- /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
1740- /*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
1741- /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
1742- /*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
1743- /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
1744- /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
1745- /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
1746- /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
1747-EOF
1748+use Data::Dumper;
1749+$Data::Dumper::Indent = 1;
1750+$Data::Dumper::Sortkeys = 1;
1751+$Data::Dumper::Quotekeys = 0;
1752
1753 sub new {
1754 my ( $class, %args ) = @_;
1755- my $self = {
1756- cache => 0, # Afaik no script uses this cache any longer because
1757- };
1758+ my @required_args = qw(Quoter);
1759+ foreach my $arg ( @required_args ) {
1760+ die "I need a $arg argument" unless $args{$arg};
1761+ }
1762+ my $self = { %args };
1763 return bless $self, $class;
1764 }
1765
1766-sub dump {
1767- my ( $self, $dbh, $quoter, $db, $tbl, $what ) = @_;
1768-
1769- if ( $what eq 'table' ) {
1770- my $ddl = $self->get_create_table($dbh, $quoter, $db, $tbl);
1771- return unless $ddl;
1772- if ( $ddl->[0] eq 'table' ) {
1773- return $before
1774- . 'DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . ";\n"
1775- . $ddl->[1] . ";\n";
1776- }
1777- else {
1778- return 'DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . ";\n"
1779- . '/*!50001 DROP VIEW IF EXISTS '
1780- . $quoter->quote($tbl) . "*/;\n/*!50001 "
1781- . $self->get_tmp_table($dbh, $quoter, $db, $tbl) . "*/;\n";
1782- }
1783- }
1784- elsif ( $what eq 'triggers' ) {
1785- my $trgs = $self->get_triggers($dbh, $quoter, $db, $tbl);
1786- if ( $trgs && @$trgs ) {
1787- my $result = $before . "\nDELIMITER ;;\n";
1788- foreach my $trg ( @$trgs ) {
1789- if ( $trg->{sql_mode} ) {
1790- $result .= qq{/*!50003 SET SESSION SQL_MODE='$trg->{sql_mode}' */;;\n};
1791- }
1792- $result .= "/*!50003 CREATE */ ";
1793- if ( $trg->{definer} ) {
1794- my ( $user, $host )
1795- = map { s/'/''/g; "'$_'"; }
1796- split('@', $trg->{definer}, 2);
1797- $result .= "/*!50017 DEFINER=$user\@$host */ ";
1798- }
1799- $result .= sprintf("/*!50003 TRIGGER %s %s %s ON %s\nFOR EACH ROW %s */;;\n\n",
1800- $quoter->quote($trg->{trigger}),
1801- @{$trg}{qw(timing event)},
1802- $quoter->quote($trg->{table}),
1803- $trg->{statement});
1804- }
1805- $result .= "DELIMITER ;\n\n/*!50003 SET SESSION SQL_MODE=\@OLD_SQL_MODE */;\n\n";
1806- return $result;
1807- }
1808- else {
1809- return undef;
1810- }
1811- }
1812- elsif ( $what eq 'view' ) {
1813- my $ddl = $self->get_create_table($dbh, $quoter, $db, $tbl);
1814- return '/*!50001 DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . "*/;\n"
1815- . '/*!50001 DROP VIEW IF EXISTS ' . $quoter->quote($tbl) . "*/;\n"
1816- . '/*!50001 ' . $ddl->[1] . "*/;\n";
1817- }
1818- else {
1819- die "You didn't say what to dump.";
1820- }
1821-}
1822-
1823-sub _use_db {
1824- my ( $self, $dbh, $quoter, $new ) = @_;
1825- if ( !$new ) {
1826- MKDEBUG && _d('No new DB to use');
1827- return;
1828- }
1829- my $sql = 'USE ' . $quoter->quote($new);
1830- MKDEBUG && _d($dbh, $sql);
1831- $dbh->do($sql);
1832- return;
1833-}
1834-
1835 sub get_create_table {
1836- my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
1837- if ( !$self->{cache} || !$self->{tables}->{$db}->{$tbl} ) {
1838- my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
1839- . q{@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, 'ANSI_QUOTES', ''), ',,', ','), }
1840- . '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
1841- . '@@SQL_QUOTE_SHOW_CREATE := 1 */';
1842- MKDEBUG && _d($sql);
1843- eval { $dbh->do($sql); };
1844- MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
1845- $self->_use_db($dbh, $quoter, $db);
1846- $sql = "SHOW CREATE TABLE " . $quoter->quote($db, $tbl);
1847- MKDEBUG && _d($sql);
1848- my $href;
1849- eval { $href = $dbh->selectrow_hashref($sql); };
1850- if ( $EVAL_ERROR ) {
1851- warn "Failed to $sql. The table may be damaged.\nError: $EVAL_ERROR";
1852- return;
1853- }
1854-
1855- $sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
1856- . '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */';
1857- MKDEBUG && _d($sql);
1858- $dbh->do($sql);
1859- my ($key) = grep { m/create table/i } keys %$href;
1860- if ( $key ) {
1861- MKDEBUG && _d('This table is a base table');
1862- $self->{tables}->{$db}->{$tbl} = [ 'table', $href->{$key} ];
1863+ my ( $self, $dbh, $db, $tbl ) = @_;
1864+ die "I need a dbh parameter" unless $dbh;
1865+ die "I need a db parameter" unless $db;
1866+ die "I need a tbl parameter" unless $tbl;
1867+ my $q = $self->{Quoter};
1868+
1869+ my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
1870+ . q{@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, 'ANSI_QUOTES', ''), ',,', ','), }
1871+ . '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
1872+ . '@@SQL_QUOTE_SHOW_CREATE := 1 */';
1873+ MKDEBUG && _d($sql);
1874+ eval { $dbh->do($sql); };
1875+ MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
1876+
1877+ $sql = 'USE ' . $q->quote($db);
1878+ MKDEBUG && _d($dbh, $sql);
1879+ $dbh->do($sql);
1880+
1881+ $sql = "SHOW CREATE TABLE " . $q->quote($db, $tbl);
1882+ MKDEBUG && _d($sql);
1883+ my $href;
1884+ eval { $href = $dbh->selectrow_hashref($sql); };
1885+ if ( $EVAL_ERROR ) {
1886+ MKDEBUG && _d($EVAL_ERROR);
1887+ return;
1888+ }
1889+
1890+ $sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
1891+ . '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */';
1892+ MKDEBUG && _d($sql);
1893+ $dbh->do($sql);
1894+
1895+ my ($key) = grep { m/create table/i } keys %$href;
1896+ if ( $key ) {
1897+ MKDEBUG && _d('This table is a base table');
1898+ $href->{$key} =~ s/\b[ ]{2,}/ /g;
1899+ $href->{$key} .= "\n";
1900+ }
1901+ else {
1902+ MKDEBUG && _d('This table is a view');
1903+ ($key) = grep { m/create view/i } keys %$href;
1904+ }
1905+
1906+ return $href->{$key};
1907+}
1908+
1909+sub parse {
1910+ my ( $self, $ddl, $opts ) = @_;
1911+ return unless $ddl;
1912+
1913+ if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
1914+ die "Cannot parse table definition; is ANSI quoting "
1915+ . "enabled or SQL_QUOTE_SHOW_CREATE disabled?";
1916+ }
1917+
1918+ my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/;
1919+ (undef, $name) = $self->{Quoter}->split_unquote($name) if $name;
1920+
1921+ $ddl =~ s/(`[^`]+`)/\L$1/g;
1922+
1923+ my $engine = $self->get_engine($ddl);
1924+
1925+ my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
1926+ my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
1927+ MKDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
1928+
1929+ my %def_for;
1930+ @def_for{@cols} = @defs;
1931+
1932+ my (@nums, @null);
1933+ my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
1934+ foreach my $col ( @cols ) {
1935+ my $def = $def_for{$col};
1936+ my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
1937+ die "Can't determine column type for $def" unless $type;
1938+ $type_for{$col} = $type;
1939+ if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
1940+ push @nums, $col;
1941+ $is_numeric{$col} = 1;
1942+ }
1943+ if ( $def !~ m/NOT NULL/ ) {
1944+ push @null, $col;
1945+ $is_nullable{$col} = 1;
1946+ }
1947+ $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
1948+ }
1949+
1950+ my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable);
1951+
1952+ my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/;
1953+
1954+ return {
1955+ name => $name,
1956+ cols => \@cols,
1957+ col_posn => { map { $cols[$_] => $_ } 0..$#cols },
1958+ is_col => { map { $_ => 1 } @cols },
1959+ null_cols => \@null,
1960+ is_nullable => \%is_nullable,
1961+ is_autoinc => \%is_autoinc,
1962+ clustered_key => $clustered_key,
1963+ keys => $keys,
1964+ defs => \%def_for,
1965+ numeric_cols => \@nums,
1966+ is_numeric => \%is_numeric,
1967+ engine => $engine,
1968+ type_for => \%type_for,
1969+ charset => $charset,
1970+ };
1971+}
1972+
1973+sub sort_indexes {
1974+ my ( $self, $tbl ) = @_;
1975+
1976+ my @indexes
1977+ = sort {
1978+ (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
1979+ || ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} )
1980+ || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
1981+ || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
1982+ }
1983+ grep {
1984+ $tbl->{keys}->{$_}->{type} eq 'BTREE'
1985+ }
1986+ sort keys %{$tbl->{keys}};
1987+
1988+ MKDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes));
1989+ return @indexes;
1990+}
1991+
1992+sub find_best_index {
1993+ my ( $self, $tbl, $index ) = @_;
1994+ my $best;
1995+ if ( $index ) {
1996+ ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
1997+ }
1998+ if ( !$best ) {
1999+ if ( $index ) {
2000+ die "Index '$index' does not exist in table";
2001 }
2002 else {
2003- MKDEBUG && _d('This table is a view');
2004- ($key) = grep { m/create view/i } keys %$href;
2005- $self->{tables}->{$db}->{$tbl} = [ 'view', $href->{$key} ];
2006- }
2007- }
2008- return $self->{tables}->{$db}->{$tbl};
2009-}
2010-
2011-sub get_columns {
2012- my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2013- MKDEBUG && _d('Get columns for', $db, $tbl);
2014- if ( !$self->{cache} || !$self->{columns}->{$db}->{$tbl} ) {
2015- $self->_use_db($dbh, $quoter, $db);
2016- my $sql = "SHOW COLUMNS FROM " . $quoter->quote($db, $tbl);
2017- MKDEBUG && _d($sql);
2018- my $cols = $dbh->selectall_arrayref($sql, { Slice => {} });
2019-
2020- $self->{columns}->{$db}->{$tbl} = [
2021- map {
2022- my %row;
2023- @row{ map { lc $_ } keys %$_ } = values %$_;
2024- \%row;
2025- } @$cols
2026- ];
2027- }
2028- return $self->{columns}->{$db}->{$tbl};
2029-}
2030-
2031-sub get_tmp_table {
2032- my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2033- my $result = 'CREATE TABLE ' . $quoter->quote($tbl) . " (\n";
2034- $result .= join(",\n",
2035- map { ' ' . $quoter->quote($_->{field}) . ' ' . $_->{type} }
2036- @{$self->get_columns($dbh, $quoter, $db, $tbl)});
2037- $result .= "\n)";
2038- MKDEBUG && _d($result);
2039- return $result;
2040-}
2041-
2042-sub get_triggers {
2043- my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2044- if ( !$self->{cache} || !$self->{triggers}->{$db} ) {
2045- $self->{triggers}->{$db} = {};
2046- my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
2047- . q{@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, 'ANSI_QUOTES', ''), ',,', ','), }
2048- . '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
2049- . '@@SQL_QUOTE_SHOW_CREATE := 1 */';
2050- MKDEBUG && _d($sql);
2051- eval { $dbh->do($sql); };
2052- MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
2053- $sql = "SHOW TRIGGERS FROM " . $quoter->quote($db);
2054- MKDEBUG && _d($sql);
2055- my $sth = $dbh->prepare($sql);
2056- $sth->execute();
2057- if ( $sth->rows ) {
2058- my $trgs = $sth->fetchall_arrayref({});
2059- foreach my $trg (@$trgs) {
2060- my %trg;
2061- @trg{ map { lc $_ } keys %$trg } = values %$trg;
2062- push @{ $self->{triggers}->{$db}->{ $trg{table} } }, \%trg;
2063- }
2064- }
2065- $sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
2066- . '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */';
2067- MKDEBUG && _d($sql);
2068+ ($best) = $self->sort_indexes($tbl);
2069+ }
2070+ }
2071+ MKDEBUG && _d('Best index found is', $best);
2072+ return $best;
2073+}
2074+
2075+sub find_possible_keys {
2076+ my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
2077+ return () unless $where;
2078+ my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
2079+ . ' WHERE ' . $where;
2080+ MKDEBUG && _d($sql);
2081+ my $expl = $dbh->selectrow_hashref($sql);
2082+ $expl = { map { lc($_) => $expl->{$_} } keys %$expl };
2083+ if ( $expl->{possible_keys} ) {
2084+ MKDEBUG && _d('possible_keys =', $expl->{possible_keys});
2085+ my @candidates = split(',', $expl->{possible_keys});
2086+ my %possible = map { $_ => 1 } @candidates;
2087+ if ( $expl->{key} ) {
2088+ MKDEBUG && _d('MySQL chose', $expl->{key});
2089+ unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
2090+ MKDEBUG && _d('Before deduping:', join(', ', @candidates));
2091+ my %seen;
2092+ @candidates = grep { !$seen{$_}++ } @candidates;
2093+ }
2094+ MKDEBUG && _d('Final list:', join(', ', @candidates));
2095+ return @candidates;
2096+ }
2097+ else {
2098+ MKDEBUG && _d('No keys in possible_keys');
2099+ return ();
2100+ }
2101+}
2102+
2103+sub check_table {
2104+ my ( $self, %args ) = @_;
2105+ my @required_args = qw(dbh db tbl);
2106+ foreach my $arg ( @required_args ) {
2107+ die "I need a $arg argument" unless $args{$arg};
2108+ }
2109+ my ($dbh, $db, $tbl) = @args{@required_args};
2110+ my $q = $self->{Quoter};
2111+ my $db_tbl = $q->quote($db, $tbl);
2112+ MKDEBUG && _d('Checking', $db_tbl);
2113+
2114+ my $sql = "SHOW TABLES FROM " . $q->quote($db)
2115+ . ' LIKE ' . $q->literal_like($tbl);
2116+ MKDEBUG && _d($sql);
2117+ my $row;
2118+ eval {
2119+ $row = $dbh->selectrow_arrayref($sql);
2120+ };
2121+ if ( $EVAL_ERROR ) {
2122+ MKDEBUG && _d($EVAL_ERROR);
2123+ return 0;
2124+ }
2125+ if ( !$row->[0] || $row->[0] ne $tbl ) {
2126+ MKDEBUG && _d('Table does not exist');
2127+ return 0;
2128+ }
2129+
2130+ MKDEBUG && _d('Table exists; no privs to check');
2131+ return 1 unless $args{all_privs};
2132+
2133+ $sql = "SHOW FULL COLUMNS FROM $db_tbl";
2134+ MKDEBUG && _d($sql);
2135+ eval {
2136+ $row = $dbh->selectrow_hashref($sql);
2137+ };
2138+ if ( $EVAL_ERROR ) {
2139+ MKDEBUG && _d($EVAL_ERROR);
2140+ return 0;
2141+ }
2142+ if ( !scalar keys %$row ) {
2143+ MKDEBUG && _d('Table has no columns:', Dumper($row));
2144+ return 0;
2145+ }
2146+ my $privs = $row->{privileges} || $row->{Privileges};
2147+
2148+ $sql = "DELETE FROM $db_tbl LIMIT 0";
2149+ MKDEBUG && _d($sql);
2150+ eval {
2151 $dbh->do($sql);
2152- }
2153- if ( $tbl ) {
2154- return $self->{triggers}->{$db}->{$tbl};
2155- }
2156- return values %{$self->{triggers}->{$db}};
2157-}
2158-
2159-sub get_databases {
2160- my ( $self, $dbh, $quoter, $like ) = @_;
2161- if ( !$self->{cache} || !$self->{databases} || $like ) {
2162- my $sql = 'SHOW DATABASES';
2163- my @params;
2164- if ( $like ) {
2165- $sql .= ' LIKE ?';
2166- push @params, $like;
2167- }
2168- my $sth = $dbh->prepare($sql);
2169- MKDEBUG && _d($sql, @params);
2170- $sth->execute( @params );
2171- my @dbs = map { $_->[0] } @{$sth->fetchall_arrayref()};
2172- $self->{databases} = \@dbs unless $like;
2173- return @dbs;
2174- }
2175- return @{$self->{databases}};
2176+ };
2177+ my $can_delete = $EVAL_ERROR ? 0 : 1;
2178+
2179+ MKDEBUG && _d('User privs on', $db_tbl, ':', $privs,
2180+ ($can_delete ? 'delete' : ''));
2181+
2182+ if ( !($privs =~ m/select/ && $privs =~ m/insert/ && $privs =~ m/update/
2183+ && $can_delete) ) {
2184+ MKDEBUG && _d('User does not have all privs');
2185+ return 0;
2186+ }
2187+
2188+ MKDEBUG && _d('User has all privs');
2189+ return 1;
2190+}
2191+
2192+sub get_engine {
2193+ my ( $self, $ddl, $opts ) = @_;
2194+ my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
2195+ MKDEBUG && _d('Storage engine:', $engine);
2196+ return $engine || undef;
2197+}
2198+
2199+sub get_keys {
2200+ my ( $self, $ddl, $opts, $is_nullable ) = @_;
2201+ my $engine = $self->get_engine($ddl);
2202+ my $keys = {};
2203+ my $clustered_key = undef;
2204+
2205+ KEY:
2206+ foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
2207+
2208+ next KEY if $key =~ m/FOREIGN/;
2209+
2210+ my $key_ddl = $key;
2211+ MKDEBUG && _d('Parsed key:', $key_ddl);
2212+
2213+ if ( $engine !~ m/MEMORY|HEAP/ ) {
2214+ $key =~ s/USING HASH/USING BTREE/;
2215+ }
2216+
2217+ my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
2218+ my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
2219+ $type = $type || $special || 'BTREE';
2220+ if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000'
2221+ && $engine =~ m/HEAP|MEMORY/i )
2222+ {
2223+ $type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP
2224+ }
2225+
2226+ my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
2227+ my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
2228+ my @cols;
2229+ my @col_prefixes;
2230+ foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) {
2231+ my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
2232+ push @cols, $name;
2233+ push @col_prefixes, $prefix;
2234+ }
2235+ $name =~ s/`//g;
2236+
2237+ MKDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols));
2238+
2239+ $keys->{$name} = {
2240+ name => $name,
2241+ type => $type,
2242+ colnames => $cols,
2243+ cols => \@cols,
2244+ col_prefixes => \@col_prefixes,
2245+ is_unique => $unique,
2246+ is_nullable => scalar(grep { $is_nullable->{$_} } @cols),
2247+ is_col => { map { $_ => 1 } @cols },
2248+ ddl => $key_ddl,
2249+ };
2250+
2251+ if ( $engine =~ m/InnoDB/i && !$clustered_key ) {
2252+ my $this_key = $keys->{$name};
2253+ if ( $this_key->{name} eq 'PRIMARY' ) {
2254+ $clustered_key = 'PRIMARY';
2255+ }
2256+ elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) {
2257+ $clustered_key = $this_key->{name};
2258+ }
2259+ MKDEBUG && $clustered_key && _d('This key is the clustered key');
2260+ }
2261+ }
2262+
2263+ return $keys, $clustered_key;
2264+}
2265+
2266+sub get_fks {
2267+ my ( $self, $ddl, $opts ) = @_;
2268+ my $q = $self->{Quoter};
2269+ my $fks = {};
2270+
2271+ foreach my $fk (
2272+ $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg )
2273+ {
2274+ my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/;
2275+ my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/;
2276+ my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/;
2277+
2278+ my ($db, $tbl) = $q->split_unquote($parent, $opts->{database});
2279+ my %parent_tbl = (tbl => $tbl);
2280+ $parent_tbl{db} = $db if $db;
2281+
2282+ if ( $parent !~ m/\./ && $opts->{database} ) {
2283+ $parent = $q->quote($opts->{database}) . ".$parent";
2284+ }
2285+
2286+ $fks->{$name} = {
2287+ name => $name,
2288+ colnames => $cols,
2289+ cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ],
2290+ parent_tbl => \%parent_tbl,
2291+ parent_tblname => $parent,
2292+ parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ],
2293+ parent_colnames=> $parent_cols,
2294+ ddl => $fk,
2295+ };
2296+ }
2297+
2298+ return $fks;
2299+}
2300+
2301+sub remove_auto_increment {
2302+ my ( $self, $ddl ) = @_;
2303+ $ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m;
2304+ return $ddl;
2305 }
2306
2307 sub get_table_status {
2308- my ( $self, $dbh, $quoter, $db, $like ) = @_;
2309- if ( !$self->{cache} || !$self->{table_status}->{$db} || $like ) {
2310- my $sql = "SHOW TABLE STATUS FROM " . $quoter->quote($db);
2311- my @params;
2312- if ( $like ) {
2313- $sql .= ' LIKE ?';
2314- push @params, $like;
2315- }
2316- MKDEBUG && _d($sql, @params);
2317- my $sth = $dbh->prepare($sql);
2318- $sth->execute(@params);
2319- my @tables = @{$sth->fetchall_arrayref({})};
2320- @tables = map {
2321- my %tbl; # Make a copy with lowercased keys
2322- @tbl{ map { lc $_ } keys %$_ } = values %$_;
2323- $tbl{engine} ||= $tbl{type} || $tbl{comment};
2324- delete $tbl{type};
2325- \%tbl;
2326- } @tables;
2327- $self->{table_status}->{$db} = \@tables unless $like;
2328- return @tables;
2329- }
2330- return @{$self->{table_status}->{$db}};
2331-}
2332-
2333-sub get_table_list {
2334- my ( $self, $dbh, $quoter, $db, $like ) = @_;
2335- if ( !$self->{cache} || !$self->{table_list}->{$db} || $like ) {
2336- my $sql = "SHOW /*!50002 FULL*/ TABLES FROM " . $quoter->quote($db);
2337- my @params;
2338- if ( $like ) {
2339- $sql .= ' LIKE ?';
2340- push @params, $like;
2341- }
2342- MKDEBUG && _d($sql, @params);
2343- my $sth = $dbh->prepare($sql);
2344- $sth->execute(@params);
2345- my @tables = @{$sth->fetchall_arrayref()};
2346- @tables = map {
2347- my %tbl = (
2348- name => $_->[0],
2349- engine => ($_->[1] || '') eq 'VIEW' ? 'VIEW' : '',
2350- );
2351- \%tbl;
2352- } @tables;
2353- $self->{table_list}->{$db} = \@tables unless $like;
2354- return @tables;
2355- }
2356- return @{$self->{table_list}->{$db}};
2357+ my ( $self, $dbh, $db, $like ) = @_;
2358+ my $q = $self->{Quoter};
2359+ my $sql = "SHOW TABLE STATUS FROM " . $q->quote($db);
2360+ my @params;
2361+ if ( $like ) {
2362+ $sql .= ' LIKE ?';
2363+ push @params, $like;
2364+ }
2365+ MKDEBUG && _d($sql, @params);
2366+ my $sth = $dbh->prepare($sql);
2367+ eval { $sth->execute(@params); };
2368+ if ($EVAL_ERROR) {
2369+ MKDEBUG && _d($EVAL_ERROR);
2370+ return;
2371+ }
2372+ my @tables = @{$sth->fetchall_arrayref({})};
2373+ @tables = map {
2374+ my %tbl; # Make a copy with lowercased keys
2375+ @tbl{ map { lc $_ } keys %$_ } = values %$_;
2376+ $tbl{engine} ||= $tbl{type} || $tbl{comment};
2377+ delete $tbl{type};
2378+ \%tbl;
2379+ } @tables;
2380+ return @tables;
2381 }
2382
2383 sub _d {
2384@@ -2558,923 +2158,250 @@
2385 1;
2386 }
2387 # ###########################################################################
2388-# End MySQLDump package
2389+# End TableParser package
2390 # ###########################################################################
2391
2392 # ###########################################################################
2393-# TableChunker package
2394+# TableNibbler package
2395 # This package is a copy without comments from the original. The original
2396 # with comments and its test file can be found in the Bazaar repository at,
2397-# lib/TableChunker.pm
2398-# t/lib/TableChunker.t
2399+# lib/TableNibbler.pm
2400+# t/lib/TableNibbler.t
2401 # See https://launchpad.net/percona-toolkit for more information.
2402 # ###########################################################################
2403 {
2404-package TableChunker;
2405+package TableNibbler;
2406
2407 use strict;
2408 use warnings FATAL => 'all';
2409 use English qw(-no_match_vars);
2410 use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2411
2412-use POSIX qw(floor ceil);
2413-use List::Util qw(min max);
2414-use Data::Dumper;
2415-$Data::Dumper::Indent = 1;
2416-$Data::Dumper::Sortkeys = 1;
2417-$Data::Dumper::Quotekeys = 0;
2418-
2419 sub new {
2420 my ( $class, %args ) = @_;
2421- foreach my $arg ( qw(Quoter MySQLDump) ) {
2422+ my @required_args = qw(TableParser Quoter);
2423+ foreach my $arg ( @required_args ) {
2424 die "I need a $arg argument" unless $args{$arg};
2425 }
2426-
2427- my %int_types = map { $_ => 1 } qw(bigint date datetime int mediumint smallint time timestamp tinyint year);
2428- my %real_types = map { $_ => 1 } qw(decimal double float);
2429-
2430- my $self = {
2431- %args,
2432- int_types => \%int_types,
2433- real_types => \%real_types,
2434- EPOCH => '1970-01-01',
2435- };
2436-
2437+ my $self = { %args };
2438 return bless $self, $class;
2439 }
2440
2441-sub find_chunk_columns {
2442- my ( $self, %args ) = @_;
2443- foreach my $arg ( qw(tbl_struct) ) {
2444- die "I need a $arg argument" unless $args{$arg};
2445- }
2446- my $tbl_struct = $args{tbl_struct};
2447-
2448- my @possible_indexes;
2449- foreach my $index ( values %{ $tbl_struct->{keys} } ) {
2450-
2451- next unless $index->{type} eq 'BTREE';
2452-
2453- next if grep { defined } @{$index->{col_prefixes}};
2454-
2455- if ( $args{exact} ) {
2456- next unless $index->{is_unique} && @{$index->{cols}} == 1;
2457- }
2458-
2459- push @possible_indexes, $index;
2460- }
2461- MKDEBUG && _d('Possible chunk indexes in order:',
2462- join(', ', map { $_->{name} } @possible_indexes));
2463-
2464- my $can_chunk_exact = 0;
2465- my @candidate_cols;
2466- foreach my $index ( @possible_indexes ) {
2467- my $col = $index->{cols}->[0];
2468-
2469- my $col_type = $tbl_struct->{type_for}->{$col};
2470- next unless $self->{int_types}->{$col_type}
2471- || $self->{real_types}->{$col_type}
2472- || $col_type =~ m/char/;
2473-
2474- push @candidate_cols, { column => $col, index => $index->{name} };
2475- }
2476-
2477- $can_chunk_exact = 1 if $args{exact} && scalar @candidate_cols;
2478-
2479- if ( MKDEBUG ) {
2480- my $chunk_type = $args{exact} ? 'Exact' : 'Inexact';
2481- _d($chunk_type, 'chunkable:',
2482- join(', ', map { "$_->{column} on $_->{index}" } @candidate_cols));
2483- }
2484-
2485- my @result;
2486- MKDEBUG && _d('Ordering columns by order in tbl, PK first');
2487- if ( $tbl_struct->{keys}->{PRIMARY} ) {
2488- my $pk_first_col = $tbl_struct->{keys}->{PRIMARY}->{cols}->[0];
2489- @result = grep { $_->{column} eq $pk_first_col } @candidate_cols;
2490- @candidate_cols = grep { $_->{column} ne $pk_first_col } @candidate_cols;
2491- }
2492- my $i = 0;
2493- my %col_pos = map { $_ => $i++ } @{$tbl_struct->{cols}};
2494- push @result, sort { $col_pos{$a->{column}} <=> $col_pos{$b->{column}} }
2495- @candidate_cols;
2496-
2497- if ( MKDEBUG ) {
2498- _d('Chunkable columns:',
2499- join(', ', map { "$_->{column} on $_->{index}" } @result));
2500- _d('Can chunk exactly:', $can_chunk_exact);
2501- }
2502-
2503- return ($can_chunk_exact, @result);
2504-}
2505-
2506-sub calculate_chunks {
2507- my ( $self, %args ) = @_;
2508- my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
2509+sub generate_asc_stmt {
2510+ my ( $self, %args ) = @_;
2511+ my @required_args = qw(tbl_struct index);
2512 foreach my $arg ( @required_args ) {
2513 die "I need a $arg argument" unless defined $args{$arg};
2514 }
2515- MKDEBUG && _d('Calculate chunks for',
2516- join(", ", map {"$_=".(defined $args{$_} ? $args{$_} : "undef")}
2517- qw(db tbl chunk_col min max rows_in_range chunk_size zero_chunk exact)
2518- ));
2519-
2520- if ( !$args{rows_in_range} ) {
2521- MKDEBUG && _d("Empty table");
2522- return '1=1';
2523- }
2524-
2525- if ( $args{rows_in_range} < $args{chunk_size} ) {
2526- MKDEBUG && _d("Chunk size larger than rows in range");
2527- return '1=1';
2528- }
2529-
2530- my $q = $self->{Quoter};
2531- my $dbh = $args{dbh};
2532- my $chunk_col = $args{chunk_col};
2533- my $tbl_struct = $args{tbl_struct};
2534- my $col_type = $tbl_struct->{type_for}->{$chunk_col};
2535- MKDEBUG && _d('chunk col type:', $col_type);
2536-
2537- my %chunker;
2538- if ( $tbl_struct->{is_numeric}->{$chunk_col} || $col_type =~ /date|time/ ) {
2539- %chunker = $self->_chunk_numeric(%args);
2540- }
2541- elsif ( $col_type =~ m/char/ ) {
2542- %chunker = $self->_chunk_char(%args);
2543- }
2544- else {
2545- die "Cannot chunk $col_type columns";
2546- }
2547- MKDEBUG && _d("Chunker:", Dumper(\%chunker));
2548- my ($col, $start_point, $end_point, $interval, $range_func)
2549- = @chunker{qw(col start_point end_point interval range_func)};
2550-
2551- my @chunks;
2552- if ( $start_point < $end_point ) {
2553-
2554- push @chunks, "$col = 0" if $chunker{have_zero_chunk};
2555-
2556- my ($beg, $end);
2557- my $iter = 0;
2558- for ( my $i = $start_point; $i < $end_point; $i += $interval ) {
2559- ($beg, $end) = $self->$range_func($dbh, $i, $interval, $end_point);
2560-
2561- if ( $iter++ == 0 ) {
2562- push @chunks,
2563- ($chunker{have_zero_chunk} ? "$col > 0 AND " : "")
2564- ."$col < " . $q->quote_val($end);
2565+ my ($tbl_struct, $index) = @args{@required_args};
2566+ my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}};
2567+ my $q = $self->{Quoter};
2568+
2569+ die "Index '$index' does not exist in table"
2570+ unless exists $tbl_struct->{keys}->{$index};
2571+ MKDEBUG && _d('Will ascend index', $index);
2572+
2573+ my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
2574+ if ( $args{asc_first} ) {
2575+ @asc_cols = $asc_cols[0];
2576+ MKDEBUG && _d('Ascending only first column');
2577+ }
2578+ MKDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
2579+
2580+ my @asc_slice;
2581+ my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
2582+ foreach my $col ( @asc_cols ) {
2583+ if ( !exists $col_posn{$col} ) {
2584+ push @cols, $col;
2585+ $col_posn{$col} = $#cols;
2586+ }
2587+ push @asc_slice, $col_posn{$col};
2588+ }
2589+ MKDEBUG && _d('Will ascend, in ordinal position:', join(', ', @asc_slice));
2590+
2591+ my $asc_stmt = {
2592+ cols => \@cols,
2593+ index => $index,
2594+ where => '',
2595+ slice => [],
2596+ scols => [],
2597+ };
2598+
2599+ if ( @asc_slice ) {
2600+ my $cmp_where;
2601+ foreach my $cmp ( qw(< <= >= >) ) {
2602+ $cmp_where = $self->generate_cmp_where(
2603+ type => $cmp,
2604+ slice => \@asc_slice,
2605+ cols => \@cols,
2606+ quoter => $q,
2607+ is_nullable => $tbl_struct->{is_nullable},
2608+ );
2609+ $asc_stmt->{boundaries}->{$cmp} = $cmp_where->{where};
2610+ }
2611+ my $cmp = $args{asc_only} ? '>' : '>=';
2612+ $asc_stmt->{where} = $asc_stmt->{boundaries}->{$cmp};
2613+ $asc_stmt->{slice} = $cmp_where->{slice};
2614+ $asc_stmt->{scols} = $cmp_where->{scols};
2615+ }
2616+
2617+ return $asc_stmt;
2618+}
2619+
2620+sub generate_cmp_where {
2621+ my ( $self, %args ) = @_;
2622+ foreach my $arg ( qw(type slice cols is_nullable) ) {
2623+ die "I need a $arg arg" unless defined $args{$arg};
2624+ }
2625+ my @slice = @{$args{slice}};
2626+ my @cols = @{$args{cols}};
2627+ my $is_nullable = $args{is_nullable};
2628+ my $type = $args{type};
2629+ my $q = $self->{Quoter};
2630+
2631+ (my $cmp = $type) =~ s/=//;
2632+
2633+ my @r_slice; # Resulting slice columns, by ordinal
2634+ my @r_scols; # Ditto, by name
2635+
2636+ my @clauses;
2637+ foreach my $i ( 0 .. $#slice ) {
2638+ my @clause;
2639+
2640+ foreach my $j ( 0 .. $i - 1 ) {
2641+ my $ord = $slice[$j];
2642+ my $col = $cols[$ord];
2643+ my $quo = $q->quote($col);
2644+ if ( $is_nullable->{$col} ) {
2645+ push @clause, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))";
2646+ push @r_slice, $ord, $ord;
2647+ push @r_scols, $col, $col;
2648 }
2649 else {
2650- push @chunks, "$col >= " . $q->quote_val($beg) . " AND $col < " . $q->quote_val($end);
2651- }
2652- }
2653-
2654- my $chunk_range = lc $args{chunk_range} || 'open';
2655- my $nullable = $args{tbl_struct}->{is_nullable}->{$args{chunk_col}};
2656- pop @chunks;
2657- if ( @chunks ) {
2658- push @chunks, "$col >= " . $q->quote_val($beg)
2659- . ($chunk_range eq 'openclosed'
2660- ? " AND $col <= " . $q->quote_val($args{max}) : "");
2661- }
2662- else {
2663- push @chunks, $nullable ? "$col IS NOT NULL" : '1=1';
2664- }
2665- if ( $nullable ) {
2666- push @chunks, "$col IS NULL";
2667- }
2668- }
2669- else {
2670- MKDEBUG && _d('No chunks; using single chunk 1=1');
2671- push @chunks, '1=1';
2672- }
2673-
2674- return @chunks;
2675-}
2676-
2677-sub _chunk_numeric {
2678- my ( $self, %args ) = @_;
2679- my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
2680- foreach my $arg ( @required_args ) {
2681- die "I need a $arg argument" unless defined $args{$arg};
2682- }
2683- my $q = $self->{Quoter};
2684- my $db_tbl = $q->quote($args{db}, $args{tbl});
2685- my $col_type = $args{tbl_struct}->{type_for}->{$args{chunk_col}};
2686-
2687- my $range_func;
2688- if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
2689- $range_func = 'range_num';
2690- }
2691- elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {
2692- $range_func = "range_$col_type";
2693- }
2694- elsif ( $col_type eq 'datetime' ) {
2695- $range_func = 'range_datetime';
2696- }
2697-
2698- my ($start_point, $end_point);
2699- eval {
2700- $start_point = $self->value_to_number(
2701- value => $args{min},
2702- column_type => $col_type,
2703- dbh => $args{dbh},
2704- );
2705- $end_point = $self->value_to_number(
2706- value => $args{max},
2707- column_type => $col_type,
2708- dbh => $args{dbh},
2709- );
2710- };
2711- if ( $EVAL_ERROR ) {
2712- if ( $EVAL_ERROR =~ m/don't know how to chunk/ ) {
2713- die $EVAL_ERROR;
2714- }
2715- else {
2716- die "Error calculating chunk start and end points for table "
2717- . "`$args{tbl_struct}->{name}` on column `$args{chunk_col}` "
2718- . "with min/max values "
2719- . join('/',
2720- map { defined $args{$_} ? $args{$_} : 'undef' } qw(min max))
2721- . ":\n\n"
2722- . $EVAL_ERROR
2723- . "\nVerify that the min and max values are valid for the column. "
2724- . "If they are valid, this error could be caused by a bug in the "
2725- . "tool.";
2726- }
2727- }
2728-
2729- if ( !defined $start_point ) {
2730- MKDEBUG && _d('Start point is undefined');
2731- $start_point = 0;
2732- }
2733- if ( !defined $end_point || $end_point < $start_point ) {
2734- MKDEBUG && _d('End point is undefined or before start point');
2735- $end_point = 0;
2736- }
2737- MKDEBUG && _d("Actual chunk range:", $start_point, "to", $end_point);
2738-
2739- my $have_zero_chunk = 0;
2740- if ( $args{zero_chunk} ) {
2741- if ( $start_point != $end_point && $start_point >= 0 ) {
2742- MKDEBUG && _d('Zero chunking');
2743- my $nonzero_val = $self->get_nonzero_value(
2744- %args,
2745- db_tbl => $db_tbl,
2746- col => $args{chunk_col},
2747- col_type => $col_type,
2748- val => $args{min}
2749- );
2750- $start_point = $self->value_to_number(
2751- value => $nonzero_val,
2752- column_type => $col_type,
2753- dbh => $args{dbh},
2754- );
2755- $have_zero_chunk = 1;
2756- }
2757- else {
2758- MKDEBUG && _d("Cannot zero chunk");
2759- }
2760- }
2761- MKDEBUG && _d("Using chunk range:", $start_point, "to", $end_point);
2762-
2763- my $interval = $args{chunk_size}
2764- * ($end_point - $start_point)
2765- / $args{rows_in_range};
2766- if ( $self->{int_types}->{$col_type} ) {
2767- $interval = ceil($interval);
2768- }
2769- $interval ||= $args{chunk_size};
2770- if ( $args{exact} ) {
2771- $interval = $args{chunk_size};
2772- }
2773- MKDEBUG && _d('Chunk interval:', $interval, 'units');
2774-
2775- return (
2776- col => $q->quote($args{chunk_col}),
2777- start_point => $start_point,
2778- end_point => $end_point,
2779- interval => $interval,
2780- range_func => $range_func,
2781- have_zero_chunk => $have_zero_chunk,
2782- );
2783-}
2784-
2785-sub _chunk_char {
2786- my ( $self, %args ) = @_;
2787- my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size);
2788- foreach my $arg ( @required_args ) {
2789- die "I need a $arg argument" unless defined $args{$arg};
2790- }
2791- my $q = $self->{Quoter};
2792- my $db_tbl = $q->quote($args{db}, $args{tbl});
2793- my $dbh = $args{dbh};
2794- my $chunk_col = $args{chunk_col};
2795- my $row;
2796- my $sql;
2797-
2798- my ($min_col, $max_col) = @{args}{qw(min max)};
2799- $sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";
2800- MKDEBUG && _d($dbh, $sql);
2801- my $ord_sth = $dbh->prepare($sql); # avoid quoting issues
2802- $ord_sth->execute($min_col, $max_col);
2803- $row = $ord_sth->fetchrow_arrayref();
2804- my ($min_col_ord, $max_col_ord) = ($row->[0], $row->[1]);
2805- MKDEBUG && _d("Min/max col char code:", $min_col_ord, $max_col_ord);
2806-
2807- my $base;
2808- my @chars;
2809- MKDEBUG && _d("Table charset:", $args{tbl_struct}->{charset});
2810- if ( ($args{tbl_struct}->{charset} || "") eq "latin1" ) {
2811- my @sorted_latin1_chars = (
2812- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
2813- 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
2814- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
2815- 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
2816- 88, 89, 90, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, 161,
2817- 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
2818- 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
2819- 190, 191, 215, 216, 222, 223, 247, 255);
2820-
2821- my ($first_char, $last_char);
2822- for my $i ( 0..$#sorted_latin1_chars ) {
2823- $first_char = $i and last if $sorted_latin1_chars[$i] >= $min_col_ord;
2824- }
2825- for my $i ( $first_char..$#sorted_latin1_chars ) {
2826- $last_char = $i and last if $sorted_latin1_chars[$i] >= $max_col_ord;
2827- };
2828-
2829- @chars = map { chr $_; } @sorted_latin1_chars[$first_char..$last_char];
2830- $base = scalar @chars;
2831- }
2832- else {
2833-
2834- my $tmp_tbl = '__maatkit_char_chunking_map';
2835- my $tmp_db_tbl = $q->quote($args{db}, $tmp_tbl);
2836- $sql = "DROP TABLE IF EXISTS $tmp_db_tbl";
2837- MKDEBUG && _d($dbh, $sql);
2838- $dbh->do($sql);
2839- my $col_def = $args{tbl_struct}->{defs}->{$chunk_col};
2840- $sql = "CREATE TEMPORARY TABLE $tmp_db_tbl ($col_def) "
2841- . "ENGINE=MEMORY";
2842- MKDEBUG && _d($dbh, $sql);
2843- $dbh->do($sql);
2844-
2845- $sql = "INSERT INTO $tmp_db_tbl VALUE (CHAR(?))";
2846- MKDEBUG && _d($dbh, $sql);
2847- my $ins_char_sth = $dbh->prepare($sql); # avoid quoting issues
2848- for my $char_code ( $min_col_ord..$max_col_ord ) {
2849- $ins_char_sth->execute($char_code);
2850- }
2851-
2852- $sql = "SELECT `$chunk_col` FROM $tmp_db_tbl "
2853- . "WHERE `$chunk_col` BETWEEN ? AND ? "
2854- . "ORDER BY `$chunk_col`";
2855- MKDEBUG && _d($dbh, $sql);
2856- my $sel_char_sth = $dbh->prepare($sql);
2857- $sel_char_sth->execute($min_col, $max_col);
2858-
2859- @chars = map { $_->[0] } @{ $sel_char_sth->fetchall_arrayref() };
2860- $base = scalar @chars;
2861-
2862- $sql = "DROP TABLE $tmp_db_tbl";
2863- MKDEBUG && _d($dbh, $sql);
2864- $dbh->do($sql);
2865- }
2866- MKDEBUG && _d("Base", $base, "chars:", @chars);
2867-
2868-
2869- $sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl "
2870- . ($args{where} ? "WHERE $args{where} " : "")
2871- . "ORDER BY `$chunk_col`";
2872- MKDEBUG && _d($dbh, $sql);
2873- $row = $dbh->selectrow_arrayref($sql);
2874- my $max_col_len = $row->[0];
2875- MKDEBUG && _d("Max column value:", $max_col, $max_col_len);
2876- my $n_values;
2877- for my $n_chars ( 1..$max_col_len ) {
2878- $n_values = $base**$n_chars;
2879- if ( $n_values >= $args{chunk_size} ) {
2880- MKDEBUG && _d($n_chars, "chars in base", $base, "expresses",
2881- $n_values, "values");
2882- last;
2883- }
2884- }
2885-
2886- my $n_chunks = $args{rows_in_range} / $args{chunk_size};
2887- my $interval = floor($n_values / $n_chunks) || 1;
2888-
2889- my $range_func = sub {
2890- my ( $self, $dbh, $start, $interval, $max ) = @_;
2891- my $start_char = $self->base_count(
2892- count_to => $start,
2893- base => $base,
2894- symbols => \@chars,
2895- );
2896- my $end_char = $self->base_count(
2897- count_to => min($max, $start + $interval),
2898- base => $base,
2899- symbols => \@chars,
2900- );
2901- return $start_char, $end_char;
2902- };
2903-
2904- return (
2905- col => $q->quote($chunk_col),
2906- start_point => 0,
2907- end_point => $n_values,
2908- interval => $interval,
2909- range_func => $range_func,
2910- );
2911-}
2912-
2913-sub get_first_chunkable_column {
2914- my ( $self, %args ) = @_;
2915- foreach my $arg ( qw(tbl_struct) ) {
2916- die "I need a $arg argument" unless $args{$arg};
2917- }
2918-
2919- my ($exact, @cols) = $self->find_chunk_columns(%args);
2920- my $col = $cols[0]->{column};
2921- my $idx = $cols[0]->{index};
2922-
2923- my $wanted_col = $args{chunk_column};
2924- my $wanted_idx = $args{chunk_index};
2925- MKDEBUG && _d("Preferred chunk col/idx:", $wanted_col, $wanted_idx);
2926-
2927- if ( $wanted_col && $wanted_idx ) {
2928- foreach my $chunkable_col ( @cols ) {
2929- if ( $wanted_col eq $chunkable_col->{column}
2930- && $wanted_idx eq $chunkable_col->{index} ) {
2931- $col = $wanted_col;
2932- $idx = $wanted_idx;
2933- last;
2934- }
2935- }
2936- }
2937- elsif ( $wanted_col ) {
2938- foreach my $chunkable_col ( @cols ) {
2939- if ( $wanted_col eq $chunkable_col->{column} ) {
2940- $col = $wanted_col;
2941- $idx = $chunkable_col->{index};
2942- last;
2943- }
2944- }
2945- }
2946- elsif ( $wanted_idx ) {
2947- foreach my $chunkable_col ( @cols ) {
2948- if ( $wanted_idx eq $chunkable_col->{index} ) {
2949- $col = $chunkable_col->{column};
2950- $idx = $wanted_idx;
2951- last;
2952- }
2953- }
2954- }
2955-
2956- MKDEBUG && _d('First chunkable col/index:', $col, $idx);
2957- return $col, $idx;
2958-}
2959-
2960-sub size_to_rows {
2961- my ( $self, %args ) = @_;
2962- my @required_args = qw(dbh db tbl chunk_size);
2963- foreach my $arg ( @required_args ) {
2964- die "I need a $arg argument" unless $args{$arg};
2965- }
2966- my ($dbh, $db, $tbl, $chunk_size) = @args{@required_args};
2967- my $q = $self->{Quoter};
2968- my $du = $self->{MySQLDump};
2969-
2970- my ($n_rows, $avg_row_length);
2971-
2972- my ( $num, $suffix ) = $chunk_size =~ m/^(\d+)([MGk])?$/;
2973- if ( $suffix ) { # Convert to bytes.
2974- $chunk_size = $suffix eq 'k' ? $num * 1_024
2975- : $suffix eq 'M' ? $num * 1_024 * 1_024
2976- : $num * 1_024 * 1_024 * 1_024;
2977- }
2978- elsif ( $num ) {
2979- $n_rows = $num;
2980- }
2981- else {
2982- die "Invalid chunk size $chunk_size; must be an integer "
2983- . "with optional suffix kMG";
2984- }
2985-
2986- if ( $suffix || $args{avg_row_length} ) {
2987- my ($status) = $du->get_table_status($dbh, $q, $db, $tbl);
2988- $avg_row_length = $status->{avg_row_length};
2989- if ( !defined $n_rows ) {
2990- $n_rows = $avg_row_length ? ceil($chunk_size / $avg_row_length) : undef;
2991- }
2992- }
2993-
2994- return $n_rows, $avg_row_length;
2995-}
2996-
2997-sub get_range_statistics {
2998- my ( $self, %args ) = @_;
2999- my @required_args = qw(dbh db tbl chunk_col tbl_struct);
3000- foreach my $arg ( @required_args ) {
3001- die "I need a $arg argument" unless $args{$arg};
3002- }
3003- my ($dbh, $db, $tbl, $col) = @args{@required_args};
3004- my $where = $args{where};
3005- my $q = $self->{Quoter};
3006-
3007- my $col_type = $args{tbl_struct}->{type_for}->{$col};
3008- my $col_is_numeric = $args{tbl_struct}->{is_numeric}->{$col};
3009-
3010- my $db_tbl = $q->quote($db, $tbl);
3011- $col = $q->quote($col);
3012-
3013- my ($min, $max);
3014- eval {
3015- my $sql = "SELECT MIN($col), MAX($col) FROM $db_tbl"
3016- . ($args{index_hint} ? " $args{index_hint}" : "")
3017- . ($where ? " WHERE ($where)" : '');
3018- MKDEBUG && _d($dbh, $sql);
3019- ($min, $max) = $dbh->selectrow_array($sql);
3020- MKDEBUG && _d("Actual end points:", $min, $max);
3021-
3022- ($min, $max) = $self->get_valid_end_points(
3023- %args,
3024- dbh => $dbh,
3025- db_tbl => $db_tbl,
3026- col => $col,
3027- col_type => $col_type,
3028- min => $min,
3029- max => $max,
3030- );
3031- MKDEBUG && _d("Valid end points:", $min, $max);
3032- };
3033- if ( $EVAL_ERROR ) {
3034- die "Error getting min and max values for table $db_tbl "
3035- . "on column $col: $EVAL_ERROR";
3036- }
3037-
3038- my $sql = "EXPLAIN SELECT * FROM $db_tbl"
3039- . ($args{index_hint} ? " $args{index_hint}" : "")
3040- . ($where ? " WHERE $where" : '');
3041- MKDEBUG && _d($sql);
3042- my $expl = $dbh->selectrow_hashref($sql);
3043-
3044- return (
3045- min => $min,
3046- max => $max,
3047- rows_in_range => $expl->{rows},
3048- );
3049-}
3050-
3051-sub inject_chunks {
3052- my ( $self, %args ) = @_;
3053- foreach my $arg ( qw(database table chunks chunk_num query) ) {
3054- die "I need a $arg argument" unless defined $args{$arg};
3055- }
3056- MKDEBUG && _d('Injecting chunk', $args{chunk_num});
3057- my $query = $args{query};
3058- my $comment = sprintf("/*%s.%s:%d/%d*/",
3059- $args{database}, $args{table},
3060- $args{chunk_num} + 1, scalar @{$args{chunks}});
3061- $query =~ s!/\*PROGRESS_COMMENT\*/!$comment!;
3062- my $where = "WHERE (" . $args{chunks}->[$args{chunk_num}] . ')';
3063- if ( $args{where} && grep { $_ } @{$args{where}} ) {
3064- $where .= " AND ("
3065- . join(" AND ", map { "($_)" } grep { $_ } @{$args{where}} )
3066- . ")";
3067- }
3068- my $db_tbl = $self->{Quoter}->quote(@args{qw(database table)});
3069- my $index_hint = $args{index_hint} || '';
3070-
3071- MKDEBUG && _d('Parameters:',
3072- Dumper({WHERE => $where, DB_TBL => $db_tbl, INDEX_HINT => $index_hint}));
3073- $query =~ s!/\*WHERE\*/! $where!;
3074- $query =~ s!/\*DB_TBL\*/!$db_tbl!;
3075- $query =~ s!/\*INDEX_HINT\*/! $index_hint!;
3076- $query =~ s!/\*CHUNK_NUM\*/! $args{chunk_num} AS chunk_num,!;
3077-
3078- return $query;
3079-}
3080-
3081-
3082-sub value_to_number {
3083- my ( $self, %args ) = @_;
3084- my @required_args = qw(column_type dbh);
3085- foreach my $arg ( @required_args ) {
3086- die "I need a $arg argument" unless defined $args{$arg};
3087- }
3088- my $val = $args{value};
3089- my ($col_type, $dbh) = @args{@required_args};
3090- MKDEBUG && _d('Converting MySQL', $col_type, $val);
3091-
3092- return unless defined $val; # value is NULL
3093-
3094- my %mysql_conv_func_for = (
3095- timestamp => 'UNIX_TIMESTAMP',
3096- date => 'TO_DAYS',
3097- time => 'TIME_TO_SEC',
3098- datetime => 'TO_DAYS',
3099- );
3100-
3101- my $num;
3102- if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
3103- $num = $val;
3104- }
3105- elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {
3106- my $func = $mysql_conv_func_for{$col_type};
3107- my $sql = "SELECT $func(?)";
3108- MKDEBUG && _d($dbh, $sql, $val);
3109- my $sth = $dbh->prepare($sql);
3110- $sth->execute($val);
3111- ($num) = $sth->fetchrow_array();
3112- }
3113- elsif ( $col_type eq 'datetime' ) {
3114- $num = $self->timestampdiff($dbh, $val);
3115- }
3116- else {
3117- die "I don't know how to chunk $col_type\n";
3118- }
3119- MKDEBUG && _d('Converts to', $num);
3120- return $num;
3121-}
3122-
3123-sub range_num {
3124- my ( $self, $dbh, $start, $interval, $max ) = @_;
3125- my $end = min($max, $start + $interval);
3126-
3127-
3128- $start = sprintf('%.17f', $start) if $start =~ /e/;
3129- $end = sprintf('%.17f', $end) if $end =~ /e/;
3130-
3131- $start =~ s/\.(\d{5}).*$/.$1/;
3132- $end =~ s/\.(\d{5}).*$/.$1/;
3133-
3134- if ( $end > $start ) {
3135- return ( $start, $end );
3136- }
3137- else {
3138- die "Chunk size is too small: $end !> $start\n";
3139- }
3140-}
3141-
3142-sub range_time {
3143- my ( $self, $dbh, $start, $interval, $max ) = @_;
3144- my $sql = "SELECT SEC_TO_TIME($start), SEC_TO_TIME(LEAST($max, $start + $interval))";
3145- MKDEBUG && _d($sql);
3146- return $dbh->selectrow_array($sql);
3147-}
3148-
3149-sub range_date {
3150- my ( $self, $dbh, $start, $interval, $max ) = @_;
3151- my $sql = "SELECT FROM_DAYS($start), FROM_DAYS(LEAST($max, $start + $interval))";
3152- MKDEBUG && _d($sql);
3153- return $dbh->selectrow_array($sql);
3154-}
3155-
3156-sub range_datetime {
3157- my ( $self, $dbh, $start, $interval, $max ) = @_;
3158- my $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $start SECOND), "
3159- . "DATE_ADD('$self->{EPOCH}', INTERVAL LEAST($max, $start + $interval) SECOND)";
3160- MKDEBUG && _d($sql);
3161- return $dbh->selectrow_array($sql);
3162-}
3163-
3164-sub range_timestamp {
3165- my ( $self, $dbh, $start, $interval, $max ) = @_;
3166- my $sql = "SELECT FROM_UNIXTIME($start), FROM_UNIXTIME(LEAST($max, $start + $interval))";
3167- MKDEBUG && _d($sql);
3168- return $dbh->selectrow_array($sql);
3169-}
3170-
3171-sub timestampdiff {
3172- my ( $self, $dbh, $time ) = @_;
3173- my $sql = "SELECT (COALESCE(TO_DAYS('$time'), 0) * 86400 + TIME_TO_SEC('$time')) "
3174- . "- TO_DAYS('$self->{EPOCH} 00:00:00') * 86400";
3175- MKDEBUG && _d($sql);
3176- my ( $diff ) = $dbh->selectrow_array($sql);
3177- $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $diff SECOND)";
3178- MKDEBUG && _d($sql);
3179- my ( $check ) = $dbh->selectrow_array($sql);
3180- die <<" EOF"
3181- Incorrect datetime math: given $time, calculated $diff but checked to $check.
3182- This could be due to a version of MySQL that overflows on large interval
3183- values to DATE_ADD(), or the given datetime is not a valid date. If not,
3184- please report this as a bug.
3185- EOF
3186- unless $check eq $time;
3187- return $diff;
3188-}
3189-
3190-
3191-
3192-
3193-sub get_valid_end_points {
3194- my ( $self, %args ) = @_;
3195- my @required_args = qw(dbh db_tbl col col_type);
3196- foreach my $arg ( @required_args ) {
3197- die "I need a $arg argument" unless $args{$arg};
3198- }
3199- my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3200- my ($real_min, $real_max) = @args{qw(min max)};
3201-
3202- my $err_fmt = "Error finding a valid %s value for table $db_tbl on "
3203- . "column $col. The real %s value %s is invalid and "
3204- . "no other valid values were found. Verify that the table "
3205- . "has at least one valid value for this column"
3206- . ($args{where} ? " where $args{where}." : ".");
3207-
3208- my $valid_min = $real_min;
3209- if ( defined $valid_min ) {
3210- MKDEBUG && _d("Validating min end point:", $real_min);
3211- $valid_min = $self->_get_valid_end_point(
3212- %args,
3213- val => $real_min,
3214- endpoint => 'min',
3215- );
3216- die sprintf($err_fmt, 'minimum', 'minimum',
3217- (defined $real_min ? $real_min : "NULL"))
3218- unless defined $valid_min;
3219- }
3220-
3221- my $valid_max = $real_max;
3222- if ( defined $valid_max ) {
3223- MKDEBUG && _d("Validating max end point:", $real_min);
3224- $valid_max = $self->_get_valid_end_point(
3225- %args,
3226- val => $real_max,
3227- endpoint => 'max',
3228- );
3229- die sprintf($err_fmt, 'maximum', 'maximum',
3230- (defined $real_max ? $real_max : "NULL"))
3231- unless defined $valid_max;
3232- }
3233-
3234- return $valid_min, $valid_max;
3235-}
3236-
3237-sub _get_valid_end_point {
3238- my ( $self, %args ) = @_;
3239- my @required_args = qw(dbh db_tbl col col_type);
3240- foreach my $arg ( @required_args ) {
3241- die "I need a $arg argument" unless $args{$arg};
3242- }
3243- my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3244- my $val = $args{val};
3245-
3246- return $val unless defined $val;
3247-
3248- my $validate = $col_type =~ m/time|date/ ? \&_validate_temporal_value
3249- : undef;
3250-
3251- if ( !$validate ) {
3252- MKDEBUG && _d("No validator for", $col_type, "values");
3253- return $val;
3254- }
3255-
3256- return $val if defined $validate->($dbh, $val);
3257-
3258- MKDEBUG && _d("Value is invalid, getting first valid value");
3259- $val = $self->get_first_valid_value(
3260- %args,
3261- val => $val,
3262- validate => $validate,
3263- );
3264-
3265- return $val;
3266-}
3267-
3268-sub get_first_valid_value {
3269- my ( $self, %args ) = @_;
3270- my @required_args = qw(dbh db_tbl col validate endpoint);
3271- foreach my $arg ( @required_args ) {
3272- die "I need a $arg argument" unless $args{$arg};
3273- }
3274- my ($dbh, $db_tbl, $col, $validate, $endpoint) = @args{@required_args};
3275- my $tries = defined $args{tries} ? $args{tries} : 5;
3276- my $val = $args{val};
3277-
3278- return unless defined $val;
3279-
3280- my $cmp = $endpoint =~ m/min/i ? '>'
3281- : $endpoint =~ m/max/i ? '<'
3282- : die "Invalid endpoint arg: $endpoint";
3283- my $sql = "SELECT $col FROM $db_tbl "
3284- . ($args{index_hint} ? "$args{index_hint} " : "")
3285- . "WHERE $col $cmp ? AND $col IS NOT NULL "
3286- . ($args{where} ? "AND ($args{where}) " : "")
3287- . "ORDER BY $col LIMIT 1";
3288- MKDEBUG && _d($dbh, $sql);
3289- my $sth = $dbh->prepare($sql);
3290-
3291- my $last_val = $val;
3292- while ( $tries-- ) {
3293- $sth->execute($last_val);
3294- my ($next_val) = $sth->fetchrow_array();
3295- MKDEBUG && _d('Next value:', $next_val, '; tries left:', $tries);
3296- if ( !defined $next_val ) {
3297- MKDEBUG && _d('No more rows in table');
3298- last;
3299- }
3300- if ( defined $validate->($dbh, $next_val) ) {
3301- MKDEBUG && _d('First valid value:', $next_val);
3302- $sth->finish();
3303- return $next_val;
3304- }
3305- $last_val = $next_val;
3306- }
3307- $sth->finish();
3308- $val = undef; # no valid value found
3309-
3310- return $val;
3311-}
3312-
3313-sub _validate_temporal_value {
3314- my ( $dbh, $val ) = @_;
3315- my $sql = "SELECT IF(TIME_FORMAT(?,'%H:%i:%s')=?, TIME_TO_SEC(?), TO_DAYS(?))";
3316- my $res;
3317- eval {
3318- MKDEBUG && _d($dbh, $sql, $val);
3319- my $sth = $dbh->prepare($sql);
3320- $sth->execute($val, $val, $val, $val);
3321- ($res) = $sth->fetchrow_array();
3322- $sth->finish();
3323- };
3324- if ( $EVAL_ERROR ) {
3325- MKDEBUG && _d($EVAL_ERROR);
3326- }
3327- return $res;
3328-}
3329-
3330-sub get_nonzero_value {
3331- my ( $self, %args ) = @_;
3332- my @required_args = qw(dbh db_tbl col col_type);
3333- foreach my $arg ( @required_args ) {
3334- die "I need a $arg argument" unless $args{$arg};
3335- }
3336- my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3337- my $tries = defined $args{tries} ? $args{tries} : 5;
3338- my $val = $args{val};
3339-
3340- my $is_nonzero = $col_type =~ m/time|date/ ? \&_validate_temporal_value
3341- : sub { return $_[1]; };
3342-
3343- if ( !$is_nonzero->($dbh, $val) ) { # quasi-double-negative, sorry
3344- MKDEBUG && _d('Discarding zero value:', $val);
3345- my $sql = "SELECT $col FROM $db_tbl "
3346- . ($args{index_hint} ? "$args{index_hint} " : "")
3347- . "WHERE $col > ? AND $col IS NOT NULL "
3348- . ($args{where} ? "AND ($args{where}) " : '')
3349- . "ORDER BY $col LIMIT 1";
3350- MKDEBUG && _d($sql);
3351- my $sth = $dbh->prepare($sql);
3352-
3353- my $last_val = $val;
3354- while ( $tries-- ) {
3355- $sth->execute($last_val);
3356- my ($next_val) = $sth->fetchrow_array();
3357- if ( $is_nonzero->($dbh, $next_val) ) {
3358- MKDEBUG && _d('First non-zero value:', $next_val);
3359- $sth->finish();
3360- return $next_val;
3361- }
3362- $last_val = $next_val;
3363- }
3364- $sth->finish();
3365- $val = undef; # no non-zero value found
3366- }
3367-
3368- return $val;
3369-}
3370-
3371-sub base_count {
3372- my ( $self, %args ) = @_;
3373- my @required_args = qw(count_to base symbols);
3374- foreach my $arg ( @required_args ) {
3375- die "I need a $arg argument" unless defined $args{$arg};
3376- }
3377- my ($n, $base, $symbols) = @args{@required_args};
3378-
3379- return $symbols->[0] if $n == 0;
3380-
3381- my $highest_power = floor(log($n)/log($base));
3382- if ( $highest_power == 0 ){
3383- return $symbols->[$n];
3384- }
3385-
3386- my @base_powers;
3387- for my $power ( 0..$highest_power ) {
3388- push @base_powers, ($base**$power) || 1;
3389- }
3390-
3391- my @base_multiples;
3392- foreach my $base_power ( reverse @base_powers ) {
3393- my $multiples = floor($n / $base_power);
3394- push @base_multiples, $multiples;
3395- $n -= $multiples * $base_power;
3396- }
3397-
3398- return join('', map { $symbols->[$_] } @base_multiples);
3399+ push @clause, "$quo = ?";
3400+ push @r_slice, $ord;
3401+ push @r_scols, $col;
3402+ }
3403+ }
3404+
3405+ my $ord = $slice[$i];
3406+ my $col = $cols[$ord];
3407+ my $quo = $q->quote($col);
3408+ my $end = $i == $#slice; # Last clause of the whole group.
3409+ if ( $is_nullable->{$col} ) {
3410+ if ( $type =~ m/=/ && $end ) {
3411+ push @clause, "(? IS NULL OR $quo $type ?)";
3412+ }
3413+ elsif ( $type =~ m/>/ ) {
3414+ push @clause, "((? IS NULL AND $quo IS NOT NULL) OR ($quo $cmp ?))";
3415+ }
3416+ else { # If $type =~ m/</ ) {
3417+ push @clause, "((? IS NOT NULL AND $quo IS NULL) OR ($quo $cmp ?))";
3418+ }
3419+ push @r_slice, $ord, $ord;
3420+ push @r_scols, $col, $col;
3421+ }
3422+ else {
3423+ push @r_slice, $ord;
3424+ push @r_scols, $col;
3425+ push @clause, ($type =~ m/=/ && $end ? "$quo $type ?" : "$quo $cmp ?");
3426+ }
3427+
3428+ push @clauses, '(' . join(' AND ', @clause) . ')';
3429+ }
3430+ my $result = '(' . join(' OR ', @clauses) . ')';
3431+ my $where = {
3432+ slice => \@r_slice,
3433+ scols => \@r_scols,
3434+ where => $result,
3435+ };
3436+ return $where;
3437+}
3438+
3439+sub generate_del_stmt {
3440+ my ( $self, %args ) = @_;
3441+
3442+ my $tbl = $args{tbl_struct};
3443+ my @cols = $args{cols} ? @{$args{cols}} : ();
3444+ my $tp = $self->{TableParser};
3445+ my $q = $self->{Quoter};
3446+
3447+ my @del_cols;
3448+ my @del_slice;
3449+
3450+ my $index = $tp->find_best_index($tbl, $args{index});
3451+ die "Cannot find an ascendable index in table" unless $index;
3452+
3453+ if ( $index ) {
3454+ @del_cols = @{$tbl->{keys}->{$index}->{cols}};
3455+ }
3456+ else {
3457+ @del_cols = @{$tbl->{cols}};
3458+ }
3459+ MKDEBUG && _d('Columns needed for DELETE:', join(', ', @del_cols));
3460+
3461+ my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
3462+ foreach my $col ( @del_cols ) {
3463+ if ( !exists $col_posn{$col} ) {
3464+ push @cols, $col;
3465+ $col_posn{$col} = $#cols;
3466+ }
3467+ push @del_slice, $col_posn{$col};
3468+ }
3469+ MKDEBUG && _d('Ordinals needed for DELETE:', join(', ', @del_slice));
3470+
3471+ my $del_stmt = {
3472+ cols => \@cols,
3473+ index => $index,
3474+ where => '',
3475+ slice => [],
3476+ scols => [],
3477+ };
3478+
3479+ my @clauses;
3480+ foreach my $i ( 0 .. $#del_slice ) {
3481+ my $ord = $del_slice[$i];
3482+ my $col = $cols[$ord];
3483+ my $quo = $q->quote($col);
3484+ if ( $tbl->{is_nullable}->{$col} ) {
3485+ push @clauses, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))";
3486+ push @{$del_stmt->{slice}}, $ord, $ord;
3487+ push @{$del_stmt->{scols}}, $col, $col;
3488+ }
3489+ else {
3490+ push @clauses, "$quo = ?";
3491+ push @{$del_stmt->{slice}}, $ord;
3492+ push @{$del_stmt->{scols}}, $col;
3493+ }
3494+ }
3495+
3496+ $del_stmt->{where} = '(' . join(' AND ', @clauses) . ')';
3497+
3498+ return $del_stmt;
3499+}
3500+
3501+sub generate_ins_stmt {
3502+ my ( $self, %args ) = @_;
3503+ foreach my $arg ( qw(ins_tbl sel_cols) ) {
3504+ die "I need a $arg argument" unless $args{$arg};
3505+ }
3506+ my $ins_tbl = $args{ins_tbl};
3507+ my @sel_cols = @{$args{sel_cols}};
3508+
3509+ die "You didn't specify any SELECT columns" unless @sel_cols;
3510+
3511+ my @ins_cols;
3512+ my @ins_slice;
3513+ for my $i ( 0..$#sel_cols ) {
3514+ next unless $ins_tbl->{is_col}->{$sel_cols[$i]};
3515+ push @ins_cols, $sel_cols[$i];
3516+ push @ins_slice, $i;
3517+ }
3518+
3519+ return {
3520+ cols => \@ins_cols,
3521+ slice => \@ins_slice,
3522+ };
3523 }
3524
3525 sub _d {
3526@@ -3488,84 +2415,7 @@
3527 1;
3528 }
3529 # ###########################################################################
3530-# End TableChunker package
3531-# ###########################################################################
3532-
3533-# ###########################################################################
3534-# Quoter package
3535-# This package is a copy without comments from the original. The original
3536-# with comments and its test file can be found in the Bazaar repository at,
3537-# lib/Quoter.pm
3538-# t/lib/Quoter.t
3539-# See https://launchpad.net/percona-toolkit for more information.
3540-# ###########################################################################
3541-{
3542-package Quoter;
3543-
3544-use strict;
3545-use warnings FATAL => 'all';
3546-use English qw(-no_match_vars);
3547-use constant MKDEBUG => $ENV{MKDEBUG} || 0;
3548-
3549-sub new {
3550- my ( $class, %args ) = @_;
3551- return bless {}, $class;
3552-}
3553-
3554-sub quote {
3555- my ( $self, @vals ) = @_;
3556- foreach my $val ( @vals ) {
3557- $val =~ s/`/``/g;
3558- }
3559- return join('.', map { '`' . $_ . '`' } @vals);
3560-}
3561-
3562-sub quote_val {
3563- my ( $self, $val ) = @_;
3564-
3565- return 'NULL' unless defined $val; # undef = NULL
3566- return "''" if $val eq ''; # blank string = ''
3567- return $val if $val =~ m/^0x[0-9a-fA-F]+$/; # hex data
3568-
3569- $val =~ s/(['\\])/\\$1/g;
3570- return "'$val'";
3571-}
3572-
3573-sub split_unquote {
3574- my ( $self, $db_tbl, $default_db ) = @_;
3575- $db_tbl =~ s/`//g;
3576- my ( $db, $tbl ) = split(/[.]/, $db_tbl);
3577- if ( !$tbl ) {
3578- $tbl = $db;
3579- $db = $default_db;
3580- }
3581- return ($db, $tbl);
3582-}
3583-
3584-sub literal_like {
3585- my ( $self, $like ) = @_;
3586- return unless $like;
3587- $like =~ s/([%_])/\\$1/g;
3588- return "'$like'";
3589-}
3590-
3591-sub join_quote {
3592- my ( $self, $default_db, $db_tbl ) = @_;
3593- return unless $db_tbl;
3594- my ($db, $tbl) = split(/[.]/, $db_tbl);
3595- if ( !$tbl ) {
3596- $tbl = $db;
3597- $db = $default_db;
3598- }
3599- $db = "`$db`" if $db && $db !~ m/^`/;
3600- $tbl = "`$tbl`" if $tbl && $tbl !~ m/^`/;
3601- return $db ? "$db.$tbl" : $tbl;
3602-}
3603-
3604-1;
3605-}
3606-# ###########################################################################
3607-# End Quoter package
3608+# End TableNibbler package
3609 # ###########################################################################
3610
3611 # ###########################################################################
3612@@ -3593,6 +2443,54 @@
3613 return bless $self, $class;
3614 }
3615
3616+sub get_slaves {
3617+ my ($self, %args) = @_;
3618+ my @required_args = qw(make_cxn OptionParser DSNParser Quoter);
3619+ foreach my $arg ( @required_args ) {
3620+ die "I need a $arg argument" unless $args{$arg};
3621+ }
3622+ my ($make_cxn, $o, $dp) = @args{@required_args};
3623+
3624+ my $slaves = [];
3625+ my $method = $o->get('recursion-method');
3626+ MKDEBUG && _d('Slave recursion method:', $method);
3627+ if ( !$method || $method =~ m/proocesslist|hosts/i ) {
3628+ my @required_args = qw(dbh dsn);
3629+ foreach my $arg ( @required_args ) {
3630+ die "I need a $arg argument" unless $args{$arg};
3631+ }
3632+ my ($dbh, $dsn) = @args{@required_args};
3633+ $self->recurse_to_slaves(
3634+ { dbh => $dbh,
3635+ dsn => $dsn,
3636+ dsn_parser => $dp,
3637+ recurse => $o->get('recurse'),
3638+ method => $o->get('recursion-method'),
3639+ callback => sub {
3640+ my ( $dsn, $dbh, $level, $parent ) = @_;
3641+ return unless $level;
3642+ MKDEBUG && _d('Found slave:', $dp->as_string($dsn));
3643+ push @$slaves, $make_cxn->(dsn => $dsn, dbh => $dbh);
3644+ return;
3645+ },
3646+ }
3647+ );
3648+ }
3649+ elsif ( $method =~ m/^dsn=/i ) {
3650+ my ($dsn_table_dsn) = $method =~ m/^dsn=(.+)/i;
3651+ $slaves = $self->get_cxn_from_dsn_table(
3652+ %args,
3653+ dsn_table_dsn => $dsn_table_dsn,
3654+ );
3655+ }
3656+ else {
3657+ die "Invalid --recusion-method: $method. Valid values are: "
3658+ . "dsn=DSN, hosts, or processlist.\n";
3659+ }
3660+
3661+ return $slaves;
3662+}
3663+
3664 sub recurse_to_slaves {
3665 my ( $self, $args, $level ) = @_;
3666 $level ||= 0;
3667@@ -4169,6 +3067,43 @@
3668 return;
3669 }
3670
3671+sub get_cxn_from_dsn_table {
3672+ my ($self, %args) = @_;
3673+ my @required_args = qw(dsn_table_dsn make_cxn DSNParser Quoter);
3674+ foreach my $arg ( @required_args ) {
3675+ die "I need a $arg argument" unless $args{$arg};
3676+ }
3677+ my ($dsn_table_dsn, $make_cxn, $dp, $q) = @args{@required_args};
3678+ MKDEBUG && _d('DSN table DSN:', $dsn_table_dsn);
3679+
3680+ my $dsn = $dp->parse($dsn_table_dsn);
3681+ my $dsn_table;
3682+ if ( $dsn->{D} && $dsn->{t} ) {
3683+ $dsn_table = $q->quote($dsn->{D}, $dsn->{t});
3684+ }
3685+ elsif ( $dsn->{t} && $dsn->{t} =~ m/\./ ) {
3686+ $dsn_table = $q->quote($q->split_unquote($dsn->{t}));
3687+ }
3688+ else {
3689+ die "DSN table DSN does not specify a database (D) "
3690+ . "or a database-qualified table (t)";
3691+ }
3692+
3693+ my $dsn_tbl_cxn = $make_cxn->(dsn => $dsn);
3694+ my $dbh = $dsn_tbl_cxn->connect();
3695+ my $sql = "SELECT dsn FROM $dsn_table ORDER BY id";
3696+ MKDEBUG && _d($sql);
3697+ my $dsn_strings = $dbh->selectcol_arrayref($sql);
3698+ my @cxn;
3699+ if ( $dsn_strings ) {
3700+ foreach my $dsn_string ( @$dsn_strings ) {
3701+ MKDEBUG && _d('DSN from DSN table:', $dsn_string);
3702+ push @cxn, $make_cxn->(dsn_string => $dsn_string);
3703+ }
3704+ }
3705+ return \@cxn;
3706+}
3707+
3708 sub _d {
3709 my ($package, undef, $line) = caller 0;
3710 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
3711@@ -4184,6 +3119,1206 @@
3712 # ###########################################################################
3713
3714 # ###########################################################################
3715+# RowChecksum package
3716+# This package is a copy without comments from the original. The original
3717+# with comments and its test file can be found in the Bazaar repository at,
3718+# lib/RowChecksum.pm
3719+# t/lib/RowChecksum.t
3720+# See https://launchpad.net/percona-toolkit for more information.
3721+# ###########################################################################
3722+{
3723+package RowChecksum;
3724+
3725+use strict;
3726+use warnings FATAL => 'all';
3727+use English qw(-no_match_vars);
3728+use constant MKDEBUG => $ENV{MKDEBUG} || 0;
3729+
3730+use List::Util qw(max);
3731+use Data::Dumper;
3732+$Data::Dumper::Indent = 1;
3733+$Data::Dumper::Sortkeys = 1;
3734+$Data::Dumper::Quotekeys = 0;
3735+
3736+sub new {
3737+ my ( $class, %args ) = @_;
3738+ foreach my $arg ( qw(OptionParser Quoter) ) {
3739+ die "I need a $arg argument" unless defined $args{$arg};
3740+ }
3741+ my $self = { %args };
3742+ return bless $self, $class;
3743+}
3744+
3745+sub make_row_checksum {
3746+ my ( $self, %args ) = @_;
3747+ my @required_args = qw(tbl);
3748+ foreach my $arg( @required_args ) {
3749+ die "I need a $arg argument" unless $args{$arg};
3750+ }
3751+ my ($tbl) = @args{@required_args};
3752+
3753+ my $o = $self->{OptionParser};
3754+ my $q = $self->{Quoter};
3755+ my $tbl_struct = $tbl->{tbl_struct};
3756+ my $func = $args{func} || uc($o->get('function'));
3757+ my $cols = $self->get_checksum_columns(%args);
3758+
3759+ my $query;
3760+ if ( !$args{no_cols} ) {
3761+ $query = join(', ',
3762+ map {
3763+ my $col = $_;
3764+ if ( $col =~ m/\+ 0/ ) {
3765+ my ($real_col) = /^(\S+)/;
3766+ $col .= " AS $real_col";
3767+ }
3768+ elsif ( $col =~ m/TRIM/ ) {
3769+ my ($real_col) = m/TRIM\(([^\)]+)\)/;
3770+ $col .= " AS $real_col";
3771+ }
3772+ $col;
3773+ } @{$cols->{select}})
3774+ . ', ';
3775+ }
3776+
3777+ if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
3778+ my $sep = $o->get('separator') || '#';
3779+ $sep =~ s/'//g;
3780+ $sep ||= '#';
3781+
3782+ my @nulls = grep { $cols->{allowed}->{$_} } @{$tbl_struct->{null_cols}};
3783+ if ( @nulls ) {
3784+ my $bitmap = "CONCAT("
3785+ . join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls)
3786+ . ")";
3787+ push @{$cols->{select}}, $bitmap;
3788+ }
3789+
3790+ $query .= @{$cols->{select}} > 1
3791+ ? "$func(CONCAT_WS('$sep', " . join(', ', @{$cols->{select}}) . '))'
3792+ : "$func($cols->{select}->[0])";
3793+ }
3794+ else {
3795+ my $fnv_func = uc $func;
3796+ $query .= "$fnv_func(" . join(', ', @{$cols->{select}}) . ')';
3797+ }
3798+
3799+ MKDEBUG && _d('Row checksum:', $query);
3800+ return $query;
3801+}
3802+
3803+sub make_chunk_checksum {
3804+ my ( $self, %args ) = @_;
3805+ my @required_args = qw(tbl);
3806+ foreach my $arg( @required_args ) {
3807+ die "I need a $arg argument" unless $args{$arg};
3808+ }
3809+ if ( !$args{dbh} && !($args{func} && $args{crc_width} && $args{crc_type}) ) {
3810+ die "I need a dbh argument"
3811+ }
3812+ my ($tbl) = @args{@required_args};
3813+ my $o = $self->{OptionParser};
3814+ my $q = $self->{Quoter};
3815+
3816+ my %crc_args = $self->get_crc_args(%args);
3817+ MKDEBUG && _d("Checksum strat:", Dumper(\%crc_args));
3818+
3819+ my $row_checksum = $self->make_row_checksum(
3820+ %args,
3821+ %crc_args,
3822+ no_cols => 1
3823+ );
3824+ my $crc;
3825+ if ( $crc_args{crc_type} =~ m/int$/ ) {
3826+ $crc = "COALESCE(LOWER(CONV(BIT_XOR(CAST($row_checksum AS UNSIGNED)), "
3827+ . "10, 16)), 0)";
3828+ }
3829+ else {
3830+ my $slices = $self->_make_xor_slices(
3831+ row_checksum => $row_checksum,
3832+ %crc_args,
3833+ );
3834+ $crc = "COALESCE(LOWER(CONCAT($slices)), 0)";
3835+ }
3836+
3837+ my $select = "COUNT(*) AS cnt, $crc AS crc";
3838+ MKDEBUG && _d('Chunk checksum:', $select);
3839+ return $select;
3840+}
3841+
3842+sub get_checksum_columns {
3843+ my ($self, %args) = @_;
3844+ my @required_args = qw(tbl);
3845+ foreach my $arg( @required_args ) {
3846+ die "I need a $arg argument" unless $args{$arg};
3847+ }
3848+ my ($tbl) = @args{@required_args};
3849+ my $o = $self->{OptionParser};
3850+ my $q = $self->{Quoter};
3851+
3852+ my $trim = $o->get('trim');
3853+ my $float_precision = $o->get('float-precision');
3854+
3855+ my $tbl_struct = $tbl->{tbl_struct};
3856+ my $ignore_col = $o->get('ignore-columns') || {};
3857+ my $all_cols = $o->get('columns') || $tbl_struct->{cols};
3858+ my %cols = map { lc($_) => 1 } grep { !$ignore_col->{$_} } @$all_cols;
3859+ my %seen;
3860+ my @cols =
3861+ map {
3862+ my $type = $tbl_struct->{type_for}->{$_};
3863+ my $result = $q->quote($_);
3864+ if ( $type eq 'timestamp' ) {
3865+ $result .= ' + 0';
3866+ }
3867+ elsif ( $float_precision && $type =~ m/float|double/ ) {
3868+ $result = "ROUND($result, $float_precision)";
3869+ }
3870+ elsif ( $trim && $type =~ m/varchar/ ) {
3871+ $result = "TRIM($result)";
3872+ }
3873+ $result;
3874+ }
3875+ grep {
3876+ $cols{$_} && !$seen{$_}++
3877+ }
3878+ @{$tbl_struct->{cols}};
3879+
3880+ return {
3881+ select => \@cols,
3882+ allowed => \%cols,
3883+ };
3884+}
3885+
3886+sub get_crc_args {
3887+ my ($self, %args) = @_;
3888+ my $func = $args{func} || $self->_get_hash_func(%args);
3889+ my $crc_width = $args{crc_width}|| $self->_get_crc_width(%args, func=>$func);
3890+ my $crc_type = $args{crc_type} || $self->_get_crc_type(%args, func=>$func);
3891+ my $opt_slice;
3892+ if ( $args{dbh} && $crc_type !~ m/int$/ ) {
3893+ $opt_slice = $self->_optimize_xor(%args, func=>$func);
3894+ }
3895+
3896+ return (
3897+ func => $func,
3898+ crc_width => $crc_width,
3899+ crc_type => $crc_type,
3900+ opt_slice => $opt_slice,
3901+ );
3902+}
3903+
3904+sub _get_hash_func {
3905+ my ( $self, %args ) = @_;
3906+ my @required_args = qw(dbh);
3907+ foreach my $arg( @required_args ) {
3908+ die "I need a $arg argument" unless $args{$arg};
3909+ }
3910+ my ($dbh) = @args{@required_args};
3911+ my $o = $self->{OptionParser};
3912+ my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
3913+
3914+ if ( my $func = $o->get('function') ) {
3915+ unshift @funcs, $func;
3916+ }
3917+
3918+ my ($result, $error);
3919+ foreach my $func ( @funcs ) {
3920+ eval {
3921+ my $sql = "SELECT $func('test-string')";
3922+ MKDEBUG && _d($sql);
3923+ $args{dbh}->do($sql);
3924+ };
3925+ if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
3926+ $error .= qq{$func cannot be used because "$1"\n};
3927+ MKDEBUG && _d($func, 'cannot be used because', $1);
3928+ }
3929+ MKDEBUG && _d('Chosen hash func:', $result);
3930+ return $func;
3931+ }
3932+ die $error || 'No hash functions (CRC32, MD5, etc.) are available';
3933+}
3934+
3935+sub _get_crc_width {
3936+ my ( $self, %args ) = @_;
3937+ my @required_args = qw(dbh func);
3938+ foreach my $arg( @required_args ) {
3939+ die "I need a $arg argument" unless $args{$arg};
3940+ }
3941+ my ($dbh, $func) = @args{@required_args};
3942+
3943+ my $crc_width = 16;
3944+ if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
3945+ eval {
3946+ my ($val) = $dbh->selectrow_array("SELECT $func('a')");
3947+ $crc_width = max(16, length($val));
3948+ };
3949+ }
3950+ return $crc_width;
3951+}
3952+
3953+sub _get_crc_type {
3954+ my ( $self, %args ) = @_;
3955+ my @required_args = qw(dbh func);
3956+ foreach my $arg( @required_args ) {
3957+ die "I need a $arg argument" unless $args{$arg};
3958+ }
3959+ my ($dbh, $func) = @args{@required_args};
3960+
3961+ my $type = '';
3962+ my $length = 0;
3963+ my $sql = "SELECT $func('a')";
3964+ my $sth = $dbh->prepare($sql);
3965+ eval {
3966+ $sth->execute();
3967+ $type = $sth->{mysql_type_name}->[0];
3968+ $length = $sth->{mysql_length}->[0];
3969+ MKDEBUG && _d($sql, $type, $length);
3970+ if ( $type eq 'bigint' && $length < 20 ) {
3971+ $type = 'int';
3972+ }
3973+ };
3974+ $sth->finish;
3975+ MKDEBUG && _d('crc_type:', $type, 'length:', $length);
3976+ return $type;
3977+}
3978+
3979+sub _optimize_xor {
3980+ my ( $self, %args ) = @_;
3981+ my @required_args = qw(dbh func);
3982+ foreach my $arg( @required_args ) {
3983+ die "I need a $arg argument" unless $args{$arg};
3984+ }
3985+ my ($dbh, $func) = @args{@required_args};
3986+
3987+ die "$func never needs BIT_XOR optimization"
3988+ if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
3989+
3990+ my $opt_slice = 0;
3991+ my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
3992+ my $sliced = '';
3993+ my $start = 1;
3994+ my $crc_width = length($unsliced) < 16 ? 16 : length($unsliced);
3995+
3996+ do { # Try different positions till sliced result equals non-sliced.
3997+ MKDEBUG && _d('Trying slice', $opt_slice);
3998+ $dbh->do('SET @crc := "", @cnt := 0');
3999+ my $slices = $self->_make_xor_slices(
4000+ row_checksum => "\@crc := $func('a')",
4001+ crc_width => $crc_width,
4002+ opt_slice => $opt_slice,
4003+ );
4004+
4005+ my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
4006+ $sliced = ($dbh->selectrow_array($sql))[0];
4007+ if ( $sliced ne $unsliced ) {
4008+ MKDEBUG && _d('Slice', $opt_slice, 'does not work');
4009+ $start += 16;
4010+ ++$opt_slice;
4011+ }
4012+ } while ( $start < $crc_width && $sliced ne $unsliced );
4013+
4014+ if ( $sliced eq $unsliced ) {
4015+ MKDEBUG && _d('Slice', $opt_slice, 'works');
4016+ return $opt_slice;
4017+ }
4018+ else {
4019+ MKDEBUG && _d('No slice works');
4020+ return undef;
4021+ }
4022+}
4023+
4024+sub _make_xor_slices {
4025+ my ( $self, %args ) = @_;
4026+ my @required_args = qw(row_checksum crc_width);
4027+ foreach my $arg( @required_args ) {
4028+ die "I need a $arg argument" unless $args{$arg};
4029+ }
4030+ my ($row_checksum, $crc_width) = @args{@required_args};
4031+ my ($opt_slice) = $args{opt_slice};
4032+
4033+ my @slices;
4034+ for ( my $start = 1; $start <= $crc_width; $start += 16 ) {
4035+ my $len = $crc_width - $start + 1;
4036+ if ( $len > 16 ) {
4037+ $len = 16;
4038+ }
4039+ push @slices,
4040+ "LPAD(CONV(BIT_XOR("
4041+ . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
4042+ . ", 10, 16), $len, '0')";
4043+ }
4044+
4045+ if ( defined $opt_slice && $opt_slice < @slices ) {
4046+ $slices[$opt_slice] =~ s/\@crc/\@crc := $row_checksum/;
4047+ }
4048+ else {
4049+ map { s/\@crc/$row_checksum/ } @slices;
4050+ }
4051+
4052+ return join(', ', @slices);
4053+}
4054+
4055+sub find_replication_differences {
4056+ my ($self, %args) = @_;
4057+ my @required_args = qw(dbh repl_table);
4058+ foreach my $arg( @required_args ) {
4059+ die "I need a $arg argument" unless $args{$arg};
4060+ }
4061+ my ($dbh, $repl_table) = @args{@required_args};
4062+
4063+ my $sql
4064+ = "SELECT CONCAT(db, '.', tbl) AS `table`, "
4065+ . "chunk, chunk_index, lower_boundary, upper_boundary, "
4066+ . "COALESCE(this_cnt-master_cnt, 0) AS cnt_diff, "
4067+ . "COALESCE("
4068+ . "this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc), 0"
4069+ . ") AS crc_diff, this_cnt, master_cnt, this_crc, master_crc "
4070+ . "FROM $repl_table "
4071+ . "WHERE (master_cnt <> this_cnt OR master_crc <> this_crc "
4072+ . "OR ISNULL(master_crc) <> ISNULL(this_crc))"
4073+ . ($args{where} ? " AND ($args{where})" : "");
4074+ MKDEBUG && _d($sql);
4075+ my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} });
4076+ return $diffs;
4077+}
4078+
4079+sub _d {
4080+ my ($package, undef, $line) = caller 0;
4081+ @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4082+ map { defined $_ ? $_ : 'undef' }
4083+ @_;
4084+ print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4085+}
4086+
4087+1;
4088+}
4089+# ###########################################################################
4090+# End RowChecksum package
4091+# ###########################################################################
4092+
4093+# ###########################################################################
4094+# NibbleIterator package
4095+# This package is a copy without comments from the original. The original
4096+# with comments and its test file can be found in the Bazaar repository at,
4097+# lib/NibbleIterator.pm
4098+# t/lib/NibbleIterator.t
4099+# See https://launchpad.net/percona-toolkit for more information.
4100+# ###########################################################################
4101+{
4102+package NibbleIterator;
4103+
4104+use strict;
4105+use warnings FATAL => 'all';
4106+use English qw(-no_match_vars);
4107+use constant MKDEBUG => $ENV{MKDEBUG} || 0;
4108+
4109+use Data::Dumper;
4110+$Data::Dumper::Indent = 1;
4111+$Data::Dumper::Sortkeys = 1;
4112+$Data::Dumper::Quotekeys = 0;
4113+
4114+sub new {
4115+ my ( $class, %args ) = @_;
4116+ my @required_args = qw(Cxn tbl chunk_size OptionParser Quoter TableNibbler TableParser);
4117+ foreach my $arg ( @required_args ) {
4118+ die "I need a $arg argument" unless $args{$arg};
4119+ }
4120+ my ($cxn, $tbl, $chunk_size, $o, $q) = @args{@required_args};
4121+
4122+ my $where = $o->get('where');
4123+ my ($row_est, $mysql_index) = get_row_estimate(%args, where => $where);
4124+ my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
4125+ ? $row_est <= $chunk_size * $o->get('chunk-size-limit')
4126+ : 0;
4127+ MKDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no');
4128+
4129+ if ( $args{resume}
4130+ && !defined $args{resume}->{lower_boundary}
4131+ && !defined $args{resume}->{upper_boundary} ) {
4132+ MKDEBUG && _d('Resuming from one nibble table');
4133+ $one_nibble = 1;
4134+ }
4135+
4136+ my $index = _find_best_index(%args, mysql_index => $mysql_index);
4137+ if ( !$index && !$one_nibble ) {
4138+ die "There is no good index and the table is oversized.";
4139+ }
4140+
4141+ my $tbl_struct = $tbl->{tbl_struct};
4142+ my $ignore_col = $o->get('ignore-columns') || {};
4143+ my $all_cols = $o->get('columns') || $tbl_struct->{cols};
4144+ my @cols = grep { !$ignore_col->{$_} } @$all_cols;
4145+ my $self;
4146+ if ( $one_nibble ) {
4147+ my $nibble_sql
4148+ = ($args{dml} ? "$args{dml} " : "SELECT ")
4149+ . ($args{select} ? $args{select}
4150+ : join(', ', map { $q->quote($_) } @cols))
4151+ . " FROM " . $q->quote(@{$tbl}{qw(db tbl)})
4152+ . ($where ? " WHERE $where" : '')
4153+ . " /*checksum table*/";
4154+ MKDEBUG && _d('One nibble statement:', $nibble_sql);
4155+
4156+ my $explain_nibble_sql
4157+ = "EXPLAIN SELECT "
4158+ . ($args{select} ? $args{select}
4159+ : join(', ', map { $q->quote($_) } @cols))
4160+ . " FROM " . $q->quote(@{$tbl}{qw(db tbl)})
4161+ . ($where ? " WHERE $where" : '')
4162+ . " /*explain checksum table*/";
4163+ MKDEBUG && _d('Explain one nibble statement:', $explain_nibble_sql);
4164+
4165+ $self = {
4166+ %args,
4167+ one_nibble => 1,
4168+ limit => 0,
4169+ nibble_sql => $nibble_sql,
4170+ explain_nibble_sql => $explain_nibble_sql,
4171+ };
4172+ }
4173+ else {
4174+ my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
4175+
4176+ my $asc = $args{TableNibbler}->generate_asc_stmt(
4177+ %args,
4178+ tbl_struct => $tbl->{tbl_struct},
4179+ index => $index,
4180+ cols => \@cols,
4181+ asc_only => 1,
4182+ );
4183+ MKDEBUG && _d('Ascend params:', Dumper($asc));
4184+
4185+ my $from = $q->quote(@{$tbl}{qw(db tbl)}) . " FORCE INDEX(`$index`)";
4186+ my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
4187+
4188+ my $first_lb_sql
4189+ = "SELECT /*!40001 SQL_NO_CACHE */ "
4190+ . join(', ', map { $q->quote($_) } @{$asc->{scols}})
4191+ . " FROM $from"
4192+ . ($where ? " WHERE $where" : '')
4193+ . " ORDER BY $order_by"
4194+ . " LIMIT 1"
4195+ . " /*first lower boundary*/";
4196+ MKDEBUG && _d('First lower boundary statement:', $first_lb_sql);
4197+
4198+ my $resume_lb_sql;
4199+ if ( $args{resume} ) {
4200+ $resume_lb_sql
4201+ = "SELECT /*!40001 SQL_NO_CACHE */ "
4202+ . join(', ', map { $q->quote($_) } @{$asc->{scols}})
4203+ . " FROM $from"
4204+ . " WHERE " . $asc->{boundaries}->{'>'}
4205+ . ($where ? " AND ($where)" : '')
4206+ . " ORDER BY $order_by"
4207+ . " LIMIT 1"
4208+ . " /*resume lower boundary*/";
4209+ MKDEBUG && _d('Resume lower boundary statement:', $resume_lb_sql);
4210+ }
4211+
4212+ my $last_ub_sql
4213+ = "SELECT /*!40001 SQL_NO_CACHE */ "
4214+ . join(', ', map { $q->quote($_) } @{$asc->{scols}})
4215+ . " FROM $from"
4216+ . ($where ? " WHERE $where" : '')
4217+ . " ORDER BY "
4218+ . join(' DESC, ', map {$q->quote($_)} @{$index_cols}) . ' DESC'
4219+ . " LIMIT 1"
4220+ . " /*last upper boundary*/";
4221+ MKDEBUG && _d('Last upper boundary statement:', $last_ub_sql);
4222+
4223+ my $ub_sql
4224+ = "SELECT /*!40001 SQL_NO_CACHE */ "
4225+ . join(', ', map { $q->quote($_) } @{$asc->{scols}})
4226+ . " FROM $from"
4227+ . " WHERE " . $asc->{boundaries}->{'>='}
4228+ . ($where ? " AND ($where)" : '')
4229+ . " ORDER BY $order_by"
4230+ . " LIMIT ?, 2"
4231+ . " /*next chunk boundary*/";
4232+ MKDEBUG && _d('Upper boundary statement:', $ub_sql);
4233+
4234+ my $nibble_sql
4235+ = ($args{dml} ? "$args{dml} " : "SELECT ")
4236+ . ($args{select} ? $args{select}
4237+ : join(', ', map { $q->quote($_) } @{$asc->{cols}}))
4238+ . " FROM $from"
4239+ . " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
4240+ . " AND " . $asc->{boundaries}->{'<='} # upper boundary
4241+ . ($where ? " AND ($where)" : '')
4242+ . ($args{order_by} ? " ORDER BY $order_by" : "")
4243+ . " /*checksum chunk*/";
4244+ MKDEBUG && _d('Nibble statement:', $nibble_sql);
4245+
4246+ my $explain_nibble_sql
4247+ = "EXPLAIN SELECT "
4248+ . ($args{select} ? $args{select}
4249+ : join(', ', map { $q->quote($_) } @{$asc->{cols}}))
4250+ . " FROM $from"
4251+ . " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
4252+ . " AND " . $asc->{boundaries}->{'<='} # upper boundary
4253+ . ($where ? " AND ($where)" : '')
4254+ . ($args{order_by} ? " ORDER BY $order_by" : "")
4255+ . " /*explain checksum chunk*/";
4256+ MKDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
4257+
4258+ my $limit = $chunk_size - 1;
4259+ MKDEBUG && _d('Initial chunk size (LIMIT):', $limit);
4260+
4261+ $self = {
4262+ %args,
4263+ index => $index,
4264+ limit => $limit,
4265+ first_lb_sql => $first_lb_sql,
4266+ last_ub_sql => $last_ub_sql,
4267+ ub_sql => $ub_sql,
4268+ nibble_sql => $nibble_sql,
4269+ explain_ub_sql => "EXPLAIN $ub_sql",
4270+ explain_nibble_sql => $explain_nibble_sql,
4271+ resume_lb_sql => $resume_lb_sql,
4272+ sql => {
4273+ columns => $asc->{scols},
4274+ from => $from,
4275+ where => $where,
4276+ boundaries => $asc->{boundaries},
4277+ order_by => $order_by,
4278+ },
4279+ };
4280+ }
4281+
4282+ $self->{row_est} = $row_est;
4283+ $self->{nibbleno} = 0;
4284+ $self->{have_rows} = 0;
4285+ $self->{rowno} = 0;
4286+ $self->{oktonibble} = 1;
4287+
4288+ return bless $self, $class;
4289+}
4290+
4291+sub next {
4292+ my ($self) = @_;
4293+
4294+ if ( !$self->{oktonibble} ) {
4295+ MKDEBUG && _d('Not ok to nibble');
4296+ return;
4297+ }
4298+
4299+ my %callback_args = (
4300+ Cxn => $self->{Cxn},
4301+ tbl => $self->{tbl},
4302+ NibbleIterator => $self,
4303+ );
4304+
4305+ if ($self->{nibbleno} == 0) {
4306+ $self->_prepare_sths();
4307+ $self->_get_bounds();
4308+ if ( my $callback = $self->{callbacks}->{init} ) {
4309+ $self->{oktonibble} = $callback->(%callback_args);
4310+ MKDEBUG && _d('init callback returned', $self->{oktonibble});
4311+ if ( !$self->{oktonibble} ) {
4312+ $self->{no_more_boundaries} = 1;
4313+ return;
4314+ }
4315+ }
4316+ }
4317+
4318+ NIBBLE:
4319+ while ( $self->{have_rows} || $self->_next_boundaries() ) {
4320+ if ( !$self->{have_rows} ) {
4321+ $self->{nibbleno}++;
4322+ MKDEBUG && _d($self->{nibble_sth}->{Statement}, 'params:',
4323+ join(', ', (@{$self->{lower}}, @{$self->{upper}})));
4324+ if ( my $callback = $self->{callbacks}->{exec_nibble} ) {
4325+ $self->{have_rows} = $callback->(%callback_args);
4326+ }
4327+ else {
4328+ $self->{nibble_sth}->execute(@{$self->{lower}}, @{$self->{upper}});
4329+ $self->{have_rows} = $self->{nibble_sth}->rows();
4330+ }
4331+ MKDEBUG && _d($self->{have_rows}, 'rows in nibble', $self->{nibbleno});
4332+ }
4333+
4334+ if ( $self->{have_rows} ) {
4335+ my $row = $self->{nibble_sth}->fetchrow_arrayref();
4336+ if ( $row ) {
4337+ $self->{rowno}++;
4338+ MKDEBUG && _d('Row', $self->{rowno}, 'in nibble',$self->{nibbleno});
4339+ return [ @$row ];
4340+ }
4341+ }
4342+
4343+ MKDEBUG && _d('No rows in nibble or nibble skipped');
4344+ if ( my $callback = $self->{callbacks}->{after_nibble} ) {
4345+ $callback->(%callback_args);
4346+ }
4347+ $self->{rowno} = 0;
4348+ $self->{have_rows} = 0;
4349+ }
4350+
4351+ MKDEBUG && _d('Done nibbling');
4352+ if ( my $callback = $self->{callbacks}->{done} ) {
4353+ $callback->(%callback_args);
4354+ }
4355+
4356+ return;
4357+}
4358+
4359+sub nibble_number {
4360+ my ($self) = @_;
4361+ return $self->{nibbleno};
4362+}
4363+
4364+sub set_nibble_number {
4365+ my ($self, $n) = @_;
4366+ die "I need a number" unless $n;
4367+ $self->{nibbleno} = $n;
4368+ MKDEBUG && _d('Set new nibble number:', $n);
4369+ return;
4370+}
4371+
4372+sub nibble_index {
4373+ my ($self) = @_;
4374+ return $self->{index};
4375+}
4376+
4377+sub statements {
4378+ my ($self) = @_;
4379+ return {
4380+ nibble => $self->{nibble_sth},
4381+ explain_nibble => $self->{explain_nibble_sth},
4382+ upper_boundary => $self->{ub_sth},
4383+ explain_upper_boundary => $self->{explain_ub_sth},
4384+ }
4385+}
4386+
4387+sub boundaries {
4388+ my ($self) = @_;
4389+ return {
4390+ first_lower => $self->{first_lower},
4391+ lower => $self->{lower},
4392+ upper => $self->{upper},
4393+ next_lower => $self->{next_lower},
4394+ last_upper => $self->{last_upper},
4395+ };
4396+}
4397+
4398+sub set_boundary {
4399+ my ($self, $boundary, $values) = @_;
4400+ die "I need a boundary parameter"
4401+ unless $boundary;
4402+ die "Invalid boundary: $boundary"
4403+ unless $boundary =~ m/^(?:lower|upper|next_lower|last_upper)$/;
4404+ die "I need a values arrayref parameter"
4405+ unless $values && ref $values eq 'ARRAY';
4406+ $self->{$boundary} = $values;
4407+ MKDEBUG && _d('Set new', $boundary, 'boundary:', Dumper($values));
4408+ return;
4409+}
4410+
4411+sub one_nibble {
4412+ my ($self) = @_;
4413+ return $self->{one_nibble};
4414+}
4415+
4416+sub chunk_size {
4417+ my ($self) = @_;
4418+ return $self->{limit} + 1;
4419+}
4420+
4421+sub set_chunk_size {
4422+ my ($self, $limit) = @_;
4423+ return if $self->{one_nibble};
4424+ die "Chunk size must be > 0" unless $limit;
4425+ $self->{limit} = $limit - 1;
4426+ MKDEBUG && _d('Set new chunk size (LIMIT):', $limit);
4427+ return;
4428+}
4429+
4430+sub sql {
4431+ my ($self) = @_;
4432+ return $self->{sql};
4433+}
4434+
4435+sub more_boundaries {
4436+ my ($self) = @_;
4437+ return !$self->{no_more_boundaries};
4438+}
4439+
4440+sub row_estimate {
4441+ my ($self) = @_;
4442+ return $self->{row_est};
4443+}
4444+
4445+sub _find_best_index {
4446+ my (%args) = @_;
4447+ my @required_args = qw(Cxn tbl TableParser);
4448+ my ($cxn, $tbl, $tp) = @args{@required_args};
4449+ my $tbl_struct = $tbl->{tbl_struct};
4450+ my $indexes = $tbl_struct->{keys};
4451+
4452+ my $want_index = $args{chunk_index};
4453+ if ( $want_index ) {
4454+ MKDEBUG && _d('User wants to use index', $want_index);
4455+ if ( !exists $indexes->{$want_index} ) {
4456+ MKDEBUG && _d('Cannot use user index because it does not exist');
4457+ $want_index = undef;
4458+ }
4459+ }
4460+
4461+ if ( !$want_index && $args{mysql_index} ) {
4462+ MKDEBUG && _d('MySQL wants to use index', $args{mysql_index});
4463+ $want_index = $args{mysql_index};
4464+ }
4465+
4466+ my $best_index;
4467+ my @possible_indexes;
4468+ if ( $want_index ) {
4469+ if ( $indexes->{$want_index}->{is_unique} ) {
4470+ MKDEBUG && _d('Will use wanted index');
4471+ $best_index = $want_index;
4472+ }
4473+ else {
4474+ MKDEBUG && _d('Wanted index is a possible index');
4475+ push @possible_indexes, $want_index;
4476+ }
4477+ }
4478+ else {
4479+ MKDEBUG && _d('Auto-selecting best index');
4480+ foreach my $index ( $tp->sort_indexes($tbl_struct) ) {
4481+ if ( $index eq 'PRIMARY' || $indexes->{$index}->{is_unique} ) {
4482+ $best_index = $index;
4483+ last;
4484+ }
4485+ else {
4486+ push @possible_indexes, $index;
4487+ }
4488+ }
4489+ }
4490+
4491+ if ( !$best_index && @possible_indexes ) {
4492+ MKDEBUG && _d('No PRIMARY or unique indexes;',
4493+ 'will use index with highest cardinality');
4494+ foreach my $index ( @possible_indexes ) {
4495+ $indexes->{$index}->{cardinality} = _get_index_cardinality(
4496+ %args,
4497+ index => $index,
4498+ );
4499+ }
4500+ @possible_indexes = sort {
4501+ my $cmp
4502+ = $indexes->{$b}->{cardinality} <=> $indexes->{$b}->{cardinality};
4503+ if ( $cmp == 0 ) {
4504+ $cmp = scalar @{$indexes->{$b}->{cols}}
4505+ <=> scalar @{$indexes->{$a}->{cols}};
4506+ }
4507+ $cmp;
4508+ } @possible_indexes;
4509+ $best_index = $possible_indexes[0];
4510+ }
4511+
4512+ MKDEBUG && _d('Best index:', $best_index);
4513+ return $best_index;
4514+}
4515+
4516+sub _get_index_cardinality {
4517+ my (%args) = @_;
4518+ my @required_args = qw(Cxn tbl index Quoter);
4519+ my ($cxn, $tbl, $index, $q) = @args{@required_args};
4520+
4521+ my $sql = "SHOW INDEXES FROM " . $q->quote(@{$tbl}{qw(db tbl)})
4522+ . " WHERE Key_name = '$index'";
4523+ MKDEBUG && _d($sql);
4524+ my $cardinality = 1;
4525+ my $rows = $cxn->dbh()->selectall_hashref($sql, 'key_name');
4526+ foreach my $row ( values %$rows ) {
4527+ $cardinality *= $row->{cardinality} if $row->{cardinality};
4528+ }
4529+ MKDEBUG && _d('Index', $index, 'cardinality:', $cardinality);
4530+ return $cardinality;
4531+}
4532+
4533+sub get_row_estimate {
4534+ my (%args) = @_;
4535+ my @required_args = qw(Cxn tbl OptionParser TableParser Quoter);
4536+ my ($cxn, $tbl, $o, $tp, $q) = @args{@required_args};
4537+
4538+ if ( $args{where} ) {
4539+ MKDEBUG && _d('WHERE clause, using explain plan for row estimate');
4540+ my $table = $q->quote(@{$tbl}{qw(db tbl)});
4541+ my $sql = "EXPLAIN SELECT * FROM $table WHERE $args{where}";
4542+ MKDEBUG && _d($sql);
4543+ my $expl = $cxn->dbh()->selectrow_hashref($sql);
4544+ MKDEBUG && _d(Dumper($expl));
4545+ return ($expl->{rows} || 0), $expl->{key};
4546+ }
4547+ else {
4548+ MKDEBUG && _d('No WHERE clause, using table status for row estimate');
4549+ return $tbl->{tbl_status}->{rows} || 0;
4550+ }
4551+}
4552+
4553+sub _prepare_sths {
4554+ my ($self) = @_;
4555+ MKDEBUG && _d('Preparing statement handles');
4556+
4557+ my $dbh = $self->{Cxn}->dbh();
4558+
4559+ $self->{nibble_sth} = $dbh->prepare($self->{nibble_sql});
4560+ $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
4561+
4562+ if ( !$self->{one_nibble} ) {
4563+ $self->{ub_sth} = $dbh->prepare($self->{ub_sql});
4564+ $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
4565+ }
4566+
4567+ return;
4568+}
4569+
4570+sub _get_bounds {
4571+ my ($self) = @_;
4572+
4573+ if ( $self->{one_nibble} ) {
4574+ if ( $self->{resume} ) {
4575+ $self->{no_more_boundaries} = 1;
4576+ }
4577+ return;
4578+ }
4579+
4580+ my $dbh = $self->{Cxn}->dbh();
4581+
4582+ $self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql});
4583+ MKDEBUG && _d('First lower boundary:', Dumper($self->{first_lower}));
4584+
4585+ if ( my $nibble = $self->{resume} ) {
4586+ if ( defined $nibble->{lower_boundary}
4587+ && defined $nibble->{upper_boundary} ) {
4588+ my $sth = $dbh->prepare($self->{resume_lb_sql});
4589+ my @ub = split ',', $nibble->{upper_boundary};
4590+ MKDEBUG && _d($sth->{Statement}, 'params:', @ub);
4591+ $sth->execute(@ub);
4592+ $self->{next_lower} = $sth->fetchrow_arrayref();
4593+ $sth->finish();
4594+ }
4595+ }
4596+ else {
4597+ $self->{next_lower} = $self->{first_lower};
4598+ }
4599+ MKDEBUG && _d('Next lower boundary:', Dumper($self->{next_lower}));
4600+
4601+ if ( !$self->{next_lower} ) {
4602+ MKDEBUG && _d('At end of table, or no more boundaries to resume');
4603+ $self->{no_more_boundaries} = 1;
4604+ }
4605+
4606+ $self->{last_upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
4607+ MKDEBUG && _d('Last upper boundary:', Dumper($self->{last_upper}));
4608+
4609+ return;
4610+}
4611+
4612+sub _next_boundaries {
4613+ my ($self) = @_;
4614+
4615+ if ( $self->{no_more_boundaries} ) {
4616+ MKDEBUG && _d('No more boundaries');
4617+ return; # stop nibbling
4618+ }
4619+
4620+ if ( $self->{one_nibble} ) {
4621+ $self->{lower} = $self->{upper} = [];
4622+ $self->{no_more_boundaries} = 1; # for next call
4623+ return 1; # continue nibbling
4624+ }
4625+
4626+ if ( $self->identical_boundaries($self->{lower}, $self->{next_lower}) ) {
4627+ MKDEBUG && _d('Infinite loop detected');
4628+ my $tbl = $self->{tbl};
4629+ my $index = $tbl->{tbl_struct}->{keys}->{$self->{index}};
4630+ my $n_cols = scalar @{$index->{cols}};
4631+ my $chunkno = $self->{nibbleno};
4632+ die "Possible infinite loop detected! "
4633+ . "The lower boundary for chunk $chunkno is "
4634+ . "<" . join(', ', @{$self->{lower}}) . "> and the lower "
4635+ . "boundary for chunk " . ($chunkno + 1) . " is also "
4636+ . "<" . join(', ', @{$self->{next_lower}}) . ">. "
4637+ . "This usually happens when using a non-unique single "
4638+ . "column index. The current chunk index for table "
4639+ . "$tbl->{db}.$tbl->{tbl} is $self->{index} which is"
4640+ . ($index->{is_unique} ? '' : ' not') . " unique and covers "
4641+ . ($n_cols > 1 ? "$n_cols columns" : "1 column") . ".\n";
4642+ }
4643+ $self->{lower} = $self->{next_lower};
4644+
4645+ if ( my $callback = $self->{callbacks}->{next_boundaries} ) {
4646+ my $oktonibble = $callback->(
4647+ Cxn => $self->{Cxn},
4648+ tbl => $self->{tbl},
4649+ NibbleIterator => $self,
4650+ );
4651+ MKDEBUG && _d('next_boundaries callback returned', $oktonibble);
4652+ if ( !$oktonibble ) {
4653+ $self->{no_more_boundaries} = 1;
4654+ return; # stop nibbling
4655+ }
4656+ }
4657+
4658+ MKDEBUG && _d($self->{ub_sth}->{Statement}, 'params:',
4659+ join(', ', @{$self->{lower}}), $self->{limit});
4660+ $self->{ub_sth}->execute(@{$self->{lower}}, $self->{limit});
4661+ my $boundary = $self->{ub_sth}->fetchall_arrayref();
4662+ MKDEBUG && _d('Next boundary:', Dumper($boundary));
4663+ if ( $boundary && @$boundary ) {
4664+ $self->{upper} = $boundary->[0]; # this nibble
4665+ if ( $boundary->[1] ) {
4666+ $self->{next_lower} = $boundary->[1]; # next nibble
4667+ }
4668+ else {
4669+ $self->{no_more_boundaries} = 1; # for next call
4670+ MKDEBUG && _d('Last upper boundary:', Dumper($boundary->[0]));
4671+ }
4672+ }
4673+ else {
4674+ $self->{no_more_boundaries} = 1; # for next call
4675+ $self->{upper} = $self->{last_upper};
4676+ MKDEBUG && _d('Last upper boundary:', Dumper($self->{upper}));
4677+ }
4678+ $self->{ub_sth}->finish();
4679+
4680+ return 1; # continue nibbling
4681+}
4682+
4683+sub identical_boundaries {
4684+ my ($self, $b1, $b2) = @_;
4685+
4686+ return 0 if ($b1 && !$b2) || (!$b1 && $b2);
4687+
4688+ return 1 if !$b1 && !$b2;
4689+
4690+ die "Boundaries have different numbers of values"
4691+ if scalar @$b1 != scalar @$b2; # shouldn't happen
4692+ my $n_vals = scalar @$b1;
4693+ for my $i ( 0..($n_vals-1) ) {
4694+ return 0 if $b1->[$i] ne $b2->[$i]; # diff
4695+ }
4696+ return 1;
4697+}
4698+
4699+sub DESTROY {
4700+ my ( $self ) = @_;
4701+ foreach my $key ( keys %$self ) {
4702+ if ( $key =~ m/_sth$/ ) {
4703+ MKDEBUG && _d('Finish', $key);
4704+ $self->{$key}->finish();
4705+ }
4706+ }
4707+ return;
4708+}
4709+
4710+sub _d {
4711+ my ($package, undef, $line) = caller 0;
4712+ @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4713+ map { defined $_ ? $_ : 'undef' }
4714+ @_;
4715+ print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4716+}
4717+
4718+1;
4719+}
4720+# ###########################################################################
4721+# End NibbleIterator package
4722+# ###########################################################################
4723+
4724+# ###########################################################################
4725+# OobNibbleIterator package
4726+# This package is a copy without comments from the original. The original
4727+# with comments and its test file can be found in the Bazaar repository at,
4728+# lib/OobNibbleIterator.pm
4729+# t/lib/OobNibbleIterator.t
4730+# See https://launchpad.net/percona-toolkit for more information.
4731+# ###########################################################################
4732+{
4733+package OobNibbleIterator;
4734+use base 'NibbleIterator';
4735+
4736+use strict;
4737+use warnings FATAL => 'all';
4738+use English qw(-no_match_vars);
4739+use constant MKDEBUG => $ENV{MKDEBUG} || 0;
4740+
4741+use Data::Dumper;
4742+$Data::Dumper::Indent = 1;
4743+$Data::Dumper::Sortkeys = 1;
4744+$Data::Dumper::Quotekeys = 0;
4745+
4746+sub new {
4747+ my ( $class, %args ) = @_;
4748+ my @required_args = qw();
4749+ foreach my $arg ( @required_args ) {
4750+ die "I need a $arg argument" unless $args{$arg};
4751+ }
4752+
4753+ my $self = $class->SUPER::new(%args);
4754+
4755+ my $q = $self->{Quoter};
4756+ my $o = $self->{OptionParser};
4757+ my $where = $o->get('where');
4758+
4759+ if ( !$self->one_nibble() ) {
4760+ my $head_sql
4761+ = ($args{past_dml} || "SELECT ")
4762+ . ($args{past_select}
4763+ || join(', ', map { $q->quote($_) } @{$self->{sql}->{columns}}))
4764+ . " FROM " . $self->{sql}->{from};
4765+
4766+ my $tail_sql
4767+ = ($where ? " AND ($where)" : '')
4768+ . " ORDER BY " . $self->{sql}->{order_by};
4769+
4770+ my $past_lower_sql
4771+ = $head_sql
4772+ . " WHERE " . $self->{sql}->{boundaries}->{'<'}
4773+ . $tail_sql
4774+ . " /*past lower chunk*/";
4775+ MKDEBUG && _d('Past lower statement:', $past_lower_sql);
4776+
4777+ my $explain_past_lower_sql
4778+ = "EXPLAIN SELECT "
4779+ . ($args{past_select}
4780+ || join(', ', map { $q->quote($_) } @{$self->{sql}->{columns}}))
4781+ . " FROM " . $self->{sql}->{from}
4782+ . " WHERE " . $self->{sql}->{boundaries}->{'<'}
4783+ . $tail_sql
4784+ . " /*explain past lower chunk*/";
4785+ MKDEBUG && _d('Past lower statement:', $explain_past_lower_sql);
4786+
4787+ my $past_upper_sql
4788+ = $head_sql
4789+ . " WHERE " . $self->{sql}->{boundaries}->{'>'}
4790+ . $tail_sql
4791+ . " /*past upper chunk*/";
4792+ MKDEBUG && _d('Past upper statement:', $past_upper_sql);
4793+
4794+ my $explain_past_upper_sql
4795+ = "EXPLAIN SELECT "
4796+ . ($args{past_select}
4797+ || join(', ', map { $q->quote($_) } @{$self->{sql}->{columns}}))
4798+ . " FROM " . $self->{sql}->{from}
4799+ . " WHERE " . $self->{sql}->{boundaries}->{'>'}
4800+ . $tail_sql
4801+ . " /*explain past upper chunk*/";
4802+ MKDEBUG && _d('Past upper statement:', $explain_past_upper_sql);
4803+
4804+ $self->{past_lower_sql} = $past_lower_sql;
4805+ $self->{past_upper_sql} = $past_upper_sql;
4806+ $self->{explain_past_lower_sql} = $explain_past_lower_sql;
4807+ $self->{explain_past_upper_sql} = $explain_past_upper_sql;
4808+
4809+ $self->{past_nibbles} = [qw(lower upper)];
4810+ if ( my $nibble = $args{resume} ) {
4811+ if ( !defined $nibble->{lower_boundary}
4812+ || !defined $nibble->{upper_boundary} ) {
4813+ $self->{past_nibbles} = !defined $nibble->{lower_boundary}
4814+ ? ['upper']
4815+ : [];
4816+ }
4817+ }
4818+ MKDEBUG && _d('Nibble past', @{$self->{past_nibbles}});
4819+
4820+ } # not one nibble
4821+
4822+ return bless $self, $class;
4823+}
4824+
4825+sub more_boundaries {
4826+ my ($self) = @_;
4827+ return $self->SUPER::more_boundaries() if $self->{one_nibble};
4828+ return scalar @{$self->{past_nibbles}} ? 1 : 0;
4829+}
4830+
4831+sub statements {
4832+ my ($self) = @_;
4833+
4834+ my $sths = $self->SUPER::statements();
4835+
4836+ $sths->{past_lower_boundary} = $self->{past_lower_sth};
4837+ $sths->{past_upper_boundary} = $self->{past_upper_sth};
4838+
4839+ return $sths;
4840+}
4841+
4842+sub _prepare_sths {
4843+ my ($self) = @_;
4844+ MKDEBUG && _d('Preparing out-of-bound statement handles');
4845+
4846+ if ( !$self->{one_nibble} ) {
4847+ my $dbh = $self->{Cxn}->dbh();
4848+ $self->{past_lower_sth} = $dbh->prepare($self->{past_lower_sql});
4849+ $self->{past_upper_sth} = $dbh->prepare($self->{past_upper_sql});
4850+ $self->{explain_past_lower_sth} = $dbh->prepare($self->{explain_past_lower_sql});
4851+ $self->{explain_past_upper_sth} = $dbh->prepare($self->{explain_past_upper_sql});
4852+ }
4853+
4854+ return $self->SUPER::_prepare_sths();
4855+}
4856+
4857+sub _next_boundaries {
4858+ my ($self) = @_;
4859+
4860+ return $self->SUPER::_next_boundaries() unless $self->{no_more_boundaries};
4861+
4862+ if ( my $past = shift @{$self->{past_nibbles}} ) {
4863+ if ( $past eq 'lower' ) {
4864+ MKDEBUG && _d('Nibbling values below lower boundary');
4865+ $self->{nibble_sth} = $self->{past_lower_sth};
4866+ $self->{explain_nibble_sth} = $self->{explain_past_lower_sth};
4867+ $self->{lower} = [];
4868+ $self->{upper} = $self->boundaries()->{first_lower};
4869+ $self->{next_lower} = undef;
4870+ }
4871+ elsif ( $past eq 'upper' ) {
4872+ MKDEBUG && _d('Nibbling values above upper boundary');
4873+ $self->{nibble_sth} = $self->{past_upper_sth};
4874+ $self->{explain_nibble_sth} = $self->{explain_past_upper_sth};
4875+ $self->{lower} = $self->boundaries()->{last_upper};
4876+ $self->{upper} = [];
4877+ $self->{next_lower} = undef;
4878+ }
4879+ else {
4880+ die "Invalid past nibble: $past";
4881+ }
4882+ return 1; # continue nibbling
4883+ }
4884+
4885+ MKDEBUG && _d('Done nibbling past boundaries');
4886+ return; # stop nibbling
4887+}
4888+
4889+sub DESTROY {
4890+ my ( $self ) = @_;
4891+ foreach my $key ( keys %$self ) {
4892+ if ( $key =~ m/_sth$/ ) {
4893+ MKDEBUG && _d('Finish', $key);
4894+ $self->{$key}->finish();
4895+ }
4896+ }
4897+ return;
4898+}
4899+
4900+sub _d {
4901+ my ($package, undef, $line) = caller 0;
4902+ @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4903+ map { defined $_ ? $_ : 'undef' }
4904+ @_;
4905+ print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4906+}
4907+
4908+1;
4909+}
4910+# ###########################################################################
4911+# End OobNibbleIterator package
4912+# ###########################################################################
4913+
4914+# ###########################################################################
4915 # Daemon package
4916 # This package is a copy without comments from the original. The original
4917 # with comments and its test file can be found in the Bazaar repository at,
4918@@ -4370,12 +4505,16 @@
4919 # ###########################################################################
4920
4921 # ###########################################################################
4922-# SchemaIterator r7512
4923-# Don't update this package!
4924+# SchemaIterator package
4925+# This package is a copy without comments from the original. The original
4926+# with comments and its test file can be found in the Bazaar repository at,
4927+# lib/SchemaIterator.pm
4928+# t/lib/SchemaIterator.t
4929+# See https://launchpad.net/percona-toolkit for more information.
4930 # ###########################################################################
4931+{
4932 package SchemaIterator;
4933
4934-{ # package scope
4935 use strict;
4936 use warnings FATAL => 'all';
4937 use English qw(-no_match_vars);
4938@@ -4407,8 +4546,19 @@
4939 die "I need either a dbh or file_itr argument"
4940 if (!$dbh && !$file_itr) || ($dbh && $file_itr);
4941
4942+ my %resume;
4943+ if ( my $table = $args{resume} ) {
4944+ MKDEBUG && _d('Will resume from or after', $table);
4945+ my ($db, $tbl) = $args{Quoter}->split_unquote($table);
4946+ die "Resume table must be database-qualified: $table"
4947+ unless $db && $tbl;
4948+ $resume{db} = $db;
4949+ $resume{tbl} = $tbl;
4950+ }
4951+
4952 my $self = {
4953 %args,
4954+ resume => \%resume,
4955 filters => _make_filters(%args),
4956 };
4957
4958@@ -4469,19 +4619,43 @@
4959 return \%filters;
4960 }
4961
4962-sub next_schema_object {
4963+sub next {
4964 my ( $self ) = @_;
4965
4966- my %schema_object;
4967+ if ( !$self->{initialized} ) {
4968+ $self->{initialized} = 1;
4969+ if ( $self->{resume}->{tbl}
4970+ && !$self->table_is_allowed(@{$self->{resume}}{qw(db tbl)}) ) {
4971+ MKDEBUG && _d('Will resume after',
4972+ join('.', @{$self->{resume}}{qw(db tbl)}));
4973+ $self->{resume}->{after} = 1;
4974+ }
4975+ }
4976+
4977+ my $schema_obj;
4978 if ( $self->{file_itr} ) {
4979- %schema_object = $self->_iterate_files();
4980+ $schema_obj= $self->_iterate_files();
4981 }
4982 else { # dbh
4983- %schema_object = $self->_iterate_dbh();
4984- }
4985-
4986- MKDEBUG && _d('Next schema object:', Dumper(\%schema_object));
4987- return %schema_object;
4988+ $schema_obj= $self->_iterate_dbh();
4989+ }
4990+
4991+ if ( $schema_obj ) {
4992+ if ( $schema_obj->{ddl} && $self->{TableParser} ) {
4993+ $schema_obj->{tbl_struct}
4994+ = $self->{TableParser}->parse($schema_obj->{ddl});
4995+ }
4996+
4997+ delete $schema_obj->{ddl} unless $self->{keep_ddl};
4998+ delete $schema_obj->{tbl_status} unless $self->{keep_tbl_status};
4999+
5000+ if ( my $schema = $self->{Schema} ) {
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches