Merge lp:~percona-toolkit-dev/percona-toolkit/pt-upgrade-2.2 into lp:percona-toolkit/2.2

Proposed by Daniel Nichter
Status: Merged
Approved by: Daniel Nichter
Approved revision: 544
Merged at revision: 552
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-upgrade-2.2
Merge into: lp:percona-toolkit/2.2
Diff against target: 21780 lines (+10022/-10718)
110 files modified
bin/pt-table-usage (+0/-3)
bin/pt-upgrade (+6364/-9150)
lib/Cxn.pm (+22/-50)
lib/EventExecutor.pm (+115/-0)
lib/FakeSth.pm (+51/-0)
lib/MySQLProtocolParser.pm (+9/-8)
lib/PerconaTest.pm (+16/-6)
lib/QueryIterator.pm (+312/-0)
lib/ResultIterator.pm (+175/-0)
lib/ResultWriter.pm (+184/-0)
lib/Sandbox.pm (+2/-0)
lib/UpgradeReportFormatter.pm (+0/-223)
lib/UpgradeResults.pm (+555/-0)
t/lib/QueryIterator.t (+87/-0)
t/lib/UpgradeReportFormatter.t (+0/-158)
t/lib/UpgradeResults.t (+50/-0)
t/lib/samples/UpgradeResults/format_query_times001 (+2/-0)
t/lib/samples/compare-warnings.sql (+0/-1)
t/lib/samples/rawlogs/rawlog002.txt (+4/-0)
t/pt-upgrade/compare_hosts.t (+80/-171)
t/pt-upgrade/compare_results.t (+133/-0)
t/pt-upgrade/daemon.t (+0/-53)
t/pt-upgrade/diff_query_times.t (+91/-0)
t/pt-upgrade/diff_rows.t (+119/-0)
t/pt-upgrade/diff_warnings.t (+120/-0)
t/pt-upgrade/log_types.t (+178/-0)
t/pt-upgrade/query_time_diffs.t (+53/-0)
t/pt-upgrade/rewrite_non_select.t (+0/-81)
t/pt-upgrade/run_time.t (+122/-0)
t/pt-upgrade/samples/001/insert.log (+4/-0)
t/pt-upgrade/samples/001/insert.txt (+35/-0)
t/pt-upgrade/samples/001/insert_results.txt (+32/-0)
t/pt-upgrade/samples/001/non-selects-rewritten.txt (+0/-112)
t/pt-upgrade/samples/001/non-selects.log (+0/-16)
t/pt-upgrade/samples/001/non-selects.txt (+0/-37)
t/pt-upgrade/samples/001/one-error-no-clear-warnings.txt (+0/-63)
t/pt-upgrade/samples/001/one-error.log (+0/-10)
t/pt-upgrade/samples/001/one-error.txt (+0/-66)
t/pt-upgrade/samples/001/select-everyone-no-queries.txt (+0/-3)
t/pt-upgrade/samples/001/select-everyone-no-stats.txt (+0/-32)
t/pt-upgrade/samples/001/select-everyone-rows.txt (+0/-35)
t/pt-upgrade/samples/001/select-everyone.log (+0/-22)
t/pt-upgrade/samples/001/select-everyone.txt (+0/-35)
t/pt-upgrade/samples/001/select-one-rows.txt (+0/-36)
t/pt-upgrade/samples/001/select-one.log (+0/-4)
t/pt-upgrade/samples/001/select-one.txt (+0/-36)
t/pt-upgrade/samples/001/select.log (+4/-0)
t/pt-upgrade/samples/001/select.txt (+35/-0)
t/pt-upgrade/samples/001/select_results.txt (+32/-0)
t/pt-upgrade/samples/001/select_results/query (+3/-0)
t/pt-upgrade/samples/001/select_results/results (+6/-0)
t/pt-upgrade/samples/001/select_results/rows (+34/-0)
t/pt-upgrade/samples/001/tables.sql (+10/-12)
t/pt-upgrade/samples/002/host2.sql (+9/-0)
t/pt-upgrade/samples/002/no-db.log (+0/-3)
t/pt-upgrade/samples/002/report-01.txt (+0/-44)
t/pt-upgrade/samples/002/select_missing_rows.log (+4/-0)
t/pt-upgrade/samples/002/select_missing_rows.txt (+60/-0)
t/pt-upgrade/samples/002/select_missing_rows_results.txt (+57/-0)
t/pt-upgrade/samples/002/select_missing_rows_results/query (+3/-0)
t/pt-upgrade/samples/002/select_missing_rows_results/results (+6/-0)
t/pt-upgrade/samples/002/select_missing_rows_results/rows (+34/-0)
t/pt-upgrade/samples/002/tables.sql (+13/-5)
t/pt-upgrade/samples/003/conf (+1/-0)
t/pt-upgrade/samples/003/double.log (+0/-4)
t/pt-upgrade/samples/003/host2.sql (+5/-0)
t/pt-upgrade/samples/003/insert_truncate_warning.log (+4/-0)
t/pt-upgrade/samples/003/insert_truncate_warning.txt (+63/-0)
t/pt-upgrade/samples/003/insert_truncate_warning_results.txt (+60/-0)
t/pt-upgrade/samples/003/insert_truncate_warning_results/query (+3/-0)
t/pt-upgrade/samples/003/insert_truncate_warning_results/results (+12/-0)
t/pt-upgrade/samples/003/insert_truncate_warning_results/rows (+2/-0)
t/pt-upgrade/samples/003/report001.txt (+0/-44)
t/pt-upgrade/samples/003/tables.sql (+5/-7)
t/pt-upgrade/samples/004/select-func.log (+0/-4)
t/pt-upgrade/samples/004/select-func.txt (+0/-36)
t/pt-upgrade/samples/004/select_function.log (+4/-0)
t/pt-upgrade/samples/004/select_function.sed (+1/-0)
t/pt-upgrade/samples/004/select_function.txt (+59/-0)
t/pt-upgrade/samples/004/tables.sql (+0/-2)
t/pt-upgrade/samples/005/error_on_both_hosts.log (+4/-0)
t/pt-upgrade/samples/005/error_on_both_hosts.txt (+59/-0)
t/pt-upgrade/samples/005/error_on_both_hosts_results.txt (+56/-0)
t/pt-upgrade/samples/005/error_on_both_hosts_results/query (+3/-0)
t/pt-upgrade/samples/005/error_on_both_hosts_results/results (+5/-0)
t/pt-upgrade/samples/005/error_on_both_hosts_results/rows (+2/-0)
t/pt-upgrade/samples/005/error_on_host1.log (+4/-0)
t/pt-upgrade/samples/005/error_on_host1.txt (+61/-0)
t/pt-upgrade/samples/005/error_on_host1_results.txt (+58/-0)
t/pt-upgrade/samples/005/error_on_host1_results/query (+3/-0)
t/pt-upgrade/samples/005/error_on_host1_results/results (+5/-0)
t/pt-upgrade/samples/005/error_on_host1_results/rows (+2/-0)
t/pt-upgrade/samples/005/error_on_host2.log (+4/-0)
t/pt-upgrade/samples/005/error_on_host2.txt (+61/-0)
t/pt-upgrade/samples/005/error_on_host2_results.txt (+58/-0)
t/pt-upgrade/samples/005/error_on_host2_results/query (+3/-0)
t/pt-upgrade/samples/005/error_on_host2_results/results (+6/-0)
t/pt-upgrade/samples/005/error_on_host2_results/rows (+7/-0)
t/pt-upgrade/samples/005/host1.sql (+5/-0)
t/pt-upgrade/samples/005/host2.sql (+5/-0)
t/pt-upgrade/samples/005/tables.sql (+2/-0)
t/pt-upgrade/samples/genlog001.txt (+7/-0)
t/pt-upgrade/samples/res001/query (+2/-0)
t/pt-upgrade/samples/res001/results (+6/-0)
t/pt-upgrade/samples/res001/rows (+7/-0)
t/pt-upgrade/samples/res001/sleep1.log (+3/-0)
t/pt-upgrade/samples/slow_slow.log (+30/-0)
t/pt-upgrade/save_results.t (+110/-0)
t/pt-upgrade/skip_non_select.t (+0/-56)
t/pt-upgrade/warnings.t (+0/-90)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-upgrade-2.2
Reviewer Review Type Date Requested Status
Daniel Nichter Approve
Review via email: mp+152801@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) wrote :
review: Approve
542. By Daniel Nichter

Remove --read-timeout. Update docs.

543. By Daniel Nichter

Fix and test UpgradeResults::format_query_times001().

544. By Daniel Nichter

Functional test and fix query time diffs.

Revision history for this message
Daniel Nichter (daniel-nichter) wrote :

The diff is worthless: the whole tool was rewritten.

Revision history for this message
Daniel Nichter (daniel-nichter) wrote :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bin/pt-table-usage'
2--- bin/pt-table-usage 2013-03-09 16:57:54 +0000
3+++ bin/pt-table-usage 2013-03-12 01:25:25 +0000
4@@ -6655,9 +6655,6 @@
5 # just tweak the code below to be like pt-query-digest.
6 my %alias_for = (
7 slowlog => ['SlowLogParser'],
8- # binlog => ['BinaryLogParser'],
9- # genlog => ['GeneralLogParser'],
10- # tcpdump => ['TcpdumpParser','MySQLProtocolParser'],
11 );
12 my $type = ['slowlog'];
13 $type = $alias_for{$type->[0]} if $alias_for{$type->[0]};
14
15=== modified file 'bin/pt-upgrade'
16--- bin/pt-upgrade 2013-03-09 16:57:54 +0000
17+++ bin/pt-upgrade 2013-03-12 01:25:25 +0000
18@@ -20,35 +20,33 @@
19 Lmo::Types
20 Lmo
21 DSNParser
22- TableParser
23 Quoter
24 OptionParser
25+ Cxn
26 Transformers
27- SlowLogParser
28- EventAggregator
29- QueryParser
30- QueryRewriter
31 Daemon
32- ChangeHandler
33- RowDiff
34- TableChunker
35- TableNibbler
36- TableChecksum
37- TableSyncer
38- TableSyncChunk
39- TableSyncNibble
40- TableSyncGroupBy
41 Outfile
42- MockSyncStream
43- MockSth
44- ReportFormatter
45- UpgradeReportFormatter
46- CompareResults
47- CompareQueryTimes
48- CompareWarnings
49 Retry
50 HTTPMicro
51 VersionCheck
52+ QueryRewriter
53+ VersionParser
54+ FileIterator
55+ QueryIterator
56+ EventExecutor
57+ UpgradeResults
58+ ResultWriter
59+ ResultIterator
60+ FakeSth
61+ SlowLogParser
62+ GeneralLogParser
63+ BinaryLogParser
64+ RawLogParser
65+ ProtocolParser
66+ TcpdumpParser
67+ MySQLProtocolParser
68+ Runtime
69+ Progress
70 ));
71 }
72
73@@ -1138,417 +1136,6 @@
74 # ###########################################################################
75
76 # ###########################################################################
77-# TableParser package
78-# This package is a copy without comments from the original. The original
79-# with comments and its test file can be found in the Bazaar repository at,
80-# lib/TableParser.pm
81-# t/lib/TableParser.t
82-# See https://launchpad.net/percona-toolkit for more information.
83-# ###########################################################################
84-{
85-package TableParser;
86-
87-use strict;
88-use warnings FATAL => 'all';
89-use English qw(-no_match_vars);
90-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
91-
92-use Data::Dumper;
93-$Data::Dumper::Indent = 1;
94-$Data::Dumper::Sortkeys = 1;
95-$Data::Dumper::Quotekeys = 0;
96-
97-local $EVAL_ERROR;
98-eval {
99- require Quoter;
100-};
101-
102-sub new {
103- my ( $class, %args ) = @_;
104- my $self = { %args };
105- $self->{Quoter} ||= Quoter->new();
106- return bless $self, $class;
107-}
108-
109-sub Quoter { shift->{Quoter} }
110-
111-sub get_create_table {
112- my ( $self, $dbh, $db, $tbl ) = @_;
113- die "I need a dbh parameter" unless $dbh;
114- die "I need a db parameter" unless $db;
115- die "I need a tbl parameter" unless $tbl;
116- my $q = $self->{Quoter};
117-
118- my $new_sql_mode
119- = q{/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, }
120- . q{@@SQL_MODE := '', }
121- . q{@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, }
122- . q{@@SQL_QUOTE_SHOW_CREATE := 1 */};
123-
124- my $old_sql_mode
125- = q{/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, }
126- . q{@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */};
127-
128- PTDEBUG && _d($new_sql_mode);
129- eval { $dbh->do($new_sql_mode); };
130- PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
131-
132- my $use_sql = 'USE ' . $q->quote($db);
133- PTDEBUG && _d($dbh, $use_sql);
134- $dbh->do($use_sql);
135-
136- my $show_sql = "SHOW CREATE TABLE " . $q->quote($db, $tbl);
137- PTDEBUG && _d($show_sql);
138- my $href;
139- eval { $href = $dbh->selectrow_hashref($show_sql); };
140- if ( my $e = $EVAL_ERROR ) {
141- PTDEBUG && _d($old_sql_mode);
142- $dbh->do($old_sql_mode);
143-
144- die $e;
145- }
146-
147- PTDEBUG && _d($old_sql_mode);
148- $dbh->do($old_sql_mode);
149-
150- my ($key) = grep { m/create (?:table|view)/i } keys %$href;
151- if ( !$key ) {
152- die "Error: no 'Create Table' or 'Create View' in result set from "
153- . "$show_sql: " . Dumper($href);
154- }
155-
156- return $href->{$key};
157-}
158-
159-sub parse {
160- my ( $self, $ddl, $opts ) = @_;
161- return unless $ddl;
162-
163- if ( $ddl =~ m/CREATE (?:TEMPORARY )?TABLE "/ ) {
164- $ddl = $self->ansi_to_legacy($ddl);
165- }
166- elsif ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
167- die "TableParser doesn't handle CREATE TABLE without quoting.";
168- }
169-
170- my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/;
171- (undef, $name) = $self->{Quoter}->split_unquote($name) if $name;
172-
173- $ddl =~ s/(`[^`]+`)/\L$1/g;
174-
175- my $engine = $self->get_engine($ddl);
176-
177- my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
178- my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
179- PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
180-
181- my %def_for;
182- @def_for{@cols} = @defs;
183-
184- my (@nums, @null);
185- my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
186- foreach my $col ( @cols ) {
187- my $def = $def_for{$col};
188- my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
189- die "Can't determine column type for $def" unless $type;
190- $type_for{$col} = $type;
191- if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
192- push @nums, $col;
193- $is_numeric{$col} = 1;
194- }
195- if ( $def !~ m/NOT NULL/ ) {
196- push @null, $col;
197- $is_nullable{$col} = 1;
198- }
199- $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
200- }
201-
202- my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable);
203-
204- my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/;
205-
206- return {
207- name => $name,
208- cols => \@cols,
209- col_posn => { map { $cols[$_] => $_ } 0..$#cols },
210- is_col => { map { $_ => 1 } @cols },
211- null_cols => \@null,
212- is_nullable => \%is_nullable,
213- is_autoinc => \%is_autoinc,
214- clustered_key => $clustered_key,
215- keys => $keys,
216- defs => \%def_for,
217- numeric_cols => \@nums,
218- is_numeric => \%is_numeric,
219- engine => $engine,
220- type_for => \%type_for,
221- charset => $charset,
222- };
223-}
224-
225-sub sort_indexes {
226- my ( $self, $tbl ) = @_;
227-
228- my @indexes
229- = sort {
230- (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
231- || ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} )
232- || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
233- || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
234- }
235- grep {
236- $tbl->{keys}->{$_}->{type} eq 'BTREE'
237- }
238- sort keys %{$tbl->{keys}};
239-
240- PTDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes));
241- return @indexes;
242-}
243-
244-sub find_best_index {
245- my ( $self, $tbl, $index ) = @_;
246- my $best;
247- if ( $index ) {
248- ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
249- }
250- if ( !$best ) {
251- if ( $index ) {
252- die "Index '$index' does not exist in table";
253- }
254- else {
255- ($best) = $self->sort_indexes($tbl);
256- }
257- }
258- PTDEBUG && _d('Best index found is', $best);
259- return $best;
260-}
261-
262-sub find_possible_keys {
263- my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
264- return () unless $where;
265- my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
266- . ' WHERE ' . $where;
267- PTDEBUG && _d($sql);
268- my $expl = $dbh->selectrow_hashref($sql);
269- $expl = { map { lc($_) => $expl->{$_} } keys %$expl };
270- if ( $expl->{possible_keys} ) {
271- PTDEBUG && _d('possible_keys =', $expl->{possible_keys});
272- my @candidates = split(',', $expl->{possible_keys});
273- my %possible = map { $_ => 1 } @candidates;
274- if ( $expl->{key} ) {
275- PTDEBUG && _d('MySQL chose', $expl->{key});
276- unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
277- PTDEBUG && _d('Before deduping:', join(', ', @candidates));
278- my %seen;
279- @candidates = grep { !$seen{$_}++ } @candidates;
280- }
281- PTDEBUG && _d('Final list:', join(', ', @candidates));
282- return @candidates;
283- }
284- else {
285- PTDEBUG && _d('No keys in possible_keys');
286- return ();
287- }
288-}
289-
290-sub check_table {
291- my ( $self, %args ) = @_;
292- my @required_args = qw(dbh db tbl);
293- foreach my $arg ( @required_args ) {
294- die "I need a $arg argument" unless $args{$arg};
295- }
296- my ($dbh, $db, $tbl) = @args{@required_args};
297- my $q = $self->{Quoter} || 'Quoter';
298- my $db_tbl = $q->quote($db, $tbl);
299- PTDEBUG && _d('Checking', $db_tbl);
300-
301- my $sql = "SHOW TABLES FROM " . $q->quote($db)
302- . ' LIKE ' . $q->literal_like($tbl);
303- PTDEBUG && _d($sql);
304- my $row;
305- eval {
306- $row = $dbh->selectrow_arrayref($sql);
307- };
308- if ( $EVAL_ERROR ) {
309- PTDEBUG && _d($EVAL_ERROR);
310- return 0;
311- }
312- if ( !$row->[0] || $row->[0] ne $tbl ) {
313- PTDEBUG && _d('Table does not exist');
314- return 0;
315- }
316-
317- PTDEBUG && _d('Table', $db, $tbl, 'exists');
318- return 1;
319-
320-}
321-
322-sub get_engine {
323- my ( $self, $ddl, $opts ) = @_;
324- my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
325- PTDEBUG && _d('Storage engine:', $engine);
326- return $engine || undef;
327-}
328-
329-sub get_keys {
330- my ( $self, $ddl, $opts, $is_nullable ) = @_;
331- my $engine = $self->get_engine($ddl);
332- my $keys = {};
333- my $clustered_key = undef;
334-
335- KEY:
336- foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
337-
338- next KEY if $key =~ m/FOREIGN/;
339-
340- my $key_ddl = $key;
341- PTDEBUG && _d('Parsed key:', $key_ddl);
342-
343- if ( !$engine || $engine !~ m/MEMORY|HEAP/ ) {
344- $key =~ s/USING HASH/USING BTREE/;
345- }
346-
347- my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
348- my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
349- $type = $type || $special || 'BTREE';
350- my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
351- my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
352- my @cols;
353- my @col_prefixes;
354- foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) {
355- my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
356- push @cols, $name;
357- push @col_prefixes, $prefix;
358- }
359- $name =~ s/`//g;
360-
361- PTDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols));
362-
363- $keys->{$name} = {
364- name => $name,
365- type => $type,
366- colnames => $cols,
367- cols => \@cols,
368- col_prefixes => \@col_prefixes,
369- is_unique => $unique,
370- is_nullable => scalar(grep { $is_nullable->{$_} } @cols),
371- is_col => { map { $_ => 1 } @cols },
372- ddl => $key_ddl,
373- };
374-
375- if ( ($engine || '') =~ m/InnoDB/i && !$clustered_key ) {
376- my $this_key = $keys->{$name};
377- if ( $this_key->{name} eq 'PRIMARY' ) {
378- $clustered_key = 'PRIMARY';
379- }
380- elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) {
381- $clustered_key = $this_key->{name};
382- }
383- PTDEBUG && $clustered_key && _d('This key is the clustered key');
384- }
385- }
386-
387- return $keys, $clustered_key;
388-}
389-
390-sub get_fks {
391- my ( $self, $ddl, $opts ) = @_;
392- my $q = $self->{Quoter};
393- my $fks = {};
394-
395- foreach my $fk (
396- $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg )
397- {
398- my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/;
399- my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/;
400- my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/;
401-
402- my ($db, $tbl) = $q->split_unquote($parent, $opts->{database});
403- my %parent_tbl = (tbl => $tbl);
404- $parent_tbl{db} = $db if $db;
405-
406- if ( $parent !~ m/\./ && $opts->{database} ) {
407- $parent = $q->quote($opts->{database}) . ".$parent";
408- }
409-
410- $fks->{$name} = {
411- name => $name,
412- colnames => $cols,
413- cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ],
414- parent_tbl => \%parent_tbl,
415- parent_tblname => $parent,
416- parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ],
417- parent_colnames=> $parent_cols,
418- ddl => $fk,
419- };
420- }
421-
422- return $fks;
423-}
424-
425-sub remove_auto_increment {
426- my ( $self, $ddl ) = @_;
427- $ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m;
428- return $ddl;
429-}
430-
431-sub get_table_status {
432- my ( $self, $dbh, $db, $like ) = @_;
433- my $q = $self->{Quoter};
434- my $sql = "SHOW TABLE STATUS FROM " . $q->quote($db);
435- my @params;
436- if ( $like ) {
437- $sql .= ' LIKE ?';
438- push @params, $like;
439- }
440- PTDEBUG && _d($sql, @params);
441- my $sth = $dbh->prepare($sql);
442- eval { $sth->execute(@params); };
443- if ($EVAL_ERROR) {
444- PTDEBUG && _d($EVAL_ERROR);
445- return;
446- }
447- my @tables = @{$sth->fetchall_arrayref({})};
448- @tables = map {
449- my %tbl; # Make a copy with lowercased keys
450- @tbl{ map { lc $_ } keys %$_ } = values %$_;
451- $tbl{engine} ||= $tbl{type} || $tbl{comment};
452- delete $tbl{type};
453- \%tbl;
454- } @tables;
455- return @tables;
456-}
457-
458-my $ansi_quote_re = qr/" [^"]* (?: "" [^"]* )* (?<=.) "/ismx;
459-sub ansi_to_legacy {
460- my ($self, $ddl) = @_;
461- $ddl =~ s/($ansi_quote_re)/ansi_quote_replace($1)/ge;
462- return $ddl;
463-}
464-
465-sub ansi_quote_replace {
466- my ($val) = @_;
467- $val =~ s/^"|"$//g;
468- $val =~ s/`/``/g;
469- $val =~ s/""/"/g;
470- return "`$val`";
471-}
472-
473-sub _d {
474- my ($package, undef, $line) = caller 0;
475- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
476- map { defined $_ ? $_ : 'undef' }
477- @_;
478- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
479-}
480-
481-1;
482-}
483-# ###########################################################################
484-# End TableParser package
485-# ###########################################################################
486-
487-# ###########################################################################
488 # Quoter package
489 # This package is a copy without comments from the original. The original
490 # with comments and its test file can be found in the Bazaar repository at,
491@@ -2765,6 +2352,155 @@
492 # ###########################################################################
493
494 # ###########################################################################
495+# Cxn package
496+# This package is a copy without comments from the original. The original
497+# with comments and its test file can be found in the Bazaar repository at,
498+# lib/Cxn.pm
499+# t/lib/Cxn.t
500+# See https://launchpad.net/percona-toolkit for more information.
501+# ###########################################################################
502+{
503+package Cxn;
504+
505+use strict;
506+use warnings FATAL => 'all';
507+use English qw(-no_match_vars);
508+use Scalar::Util qw(blessed);
509+use constant {
510+ PTDEBUG => $ENV{PTDEBUG} || 0,
511+ PERCONA_TOOLKIT_TEST_USE_DSN_NAMES => $ENV{PERCONA_TOOLKIT_TEST_USE_DSN_NAMES} || 0,
512+};
513+
514+sub new {
515+ my ( $class, %args ) = @_;
516+ my @required_args = qw(DSNParser OptionParser);
517+ foreach my $arg ( @required_args ) {
518+ die "I need a $arg argument" unless $args{$arg};
519+ };
520+ my ($dp, $o) = @args{@required_args};
521+
522+ my $dsn_defaults = $dp->parse_options($o);
523+ my $prev_dsn = $args{prev_dsn};
524+ my $dsn = $args{dsn};
525+ if ( !$dsn ) {
526+ $args{dsn_string} ||= 'h=' . ($dsn_defaults->{h} || 'localhost');
527+
528+ $dsn = $dp->parse(
529+ $args{dsn_string}, $prev_dsn, $dsn_defaults);
530+ }
531+ elsif ( $prev_dsn ) {
532+ $dsn = $dp->copy($prev_dsn, $dsn);
533+ }
534+
535+ my $dsn_name = $dp->as_string($dsn, [qw(h P S)])
536+ || $dp->as_string($dsn, [qw(F)])
537+ || '';
538+
539+ my $self = {
540+ dsn => $dsn,
541+ dbh => $args{dbh},
542+ dsn_name => $dsn_name,
543+ hostname => '',
544+ set => $args{set},
545+ NAME_lc => defined($args{NAME_lc}) ? $args{NAME_lc} : 1,
546+ dbh_set => 0,
547+ ask_pass => $o->get('ask-pass'),
548+ DSNParser => $dp,
549+ is_cluster_node => undef,
550+ };
551+
552+ return bless $self, $class;
553+}
554+
555+sub connect {
556+ my ( $self ) = @_;
557+ my $dsn = $self->{dsn};
558+ my $dp = $self->{DSNParser};
559+
560+ my $dbh = $self->{dbh};
561+ if ( !$dbh || !$dbh->ping() ) {
562+ if ( $self->{ask_pass} && !$self->{asked_for_pass} ) {
563+ $dsn->{p} = OptionParser::prompt_noecho("Enter MySQL password: ");
564+ $self->{asked_for_pass} = 1;
565+ }
566+ $dbh = $dp->get_dbh($dp->get_cxn_params($dsn), { AutoCommit => 1 });
567+ }
568+ PTDEBUG && _d($dbh, 'Connected dbh to', $self->{name});
569+
570+ return $self->set_dbh($dbh);
571+}
572+
573+sub set_dbh {
574+ my ($self, $dbh) = @_;
575+
576+ if ( $self->{dbh} && $self->{dbh} == $dbh && $self->{dbh_set} ) {
577+ PTDEBUG && _d($dbh, 'Already set dbh');
578+ return $dbh;
579+ }
580+
581+ PTDEBUG && _d($dbh, 'Setting dbh');
582+
583+ $dbh->{FetchHashKeyName} = 'NAME_lc' if $self->{NAME_lc};
584+
585+ my $sql = 'SELECT @@hostname, @@server_id';
586+ PTDEBUG && _d($dbh, $sql);
587+ my ($hostname, $server_id) = $dbh->selectrow_array($sql);
588+ PTDEBUG && _d($dbh, 'hostname:', $hostname, $server_id);
589+ if ( $hostname ) {
590+ $self->{hostname} = $hostname;
591+ }
592+
593+ if ( my $set = $self->{set}) {
594+ $set->($dbh);
595+ }
596+
597+ $self->{dbh} = $dbh;
598+ $self->{dbh_set} = 1;
599+ return $dbh;
600+}
601+
602+sub dbh {
603+ my ($self) = @_;
604+ return $self->{dbh};
605+}
606+
607+sub dsn {
608+ my ($self) = @_;
609+ return $self->{dsn};
610+}
611+
612+sub name {
613+ my ($self) = @_;
614+ return $self->{dsn_name} if PERCONA_TOOLKIT_TEST_USE_DSN_NAMES;
615+ return $self->{hostname} || $self->{dsn_name} || 'unknown host';
616+}
617+
618+sub DESTROY {
619+ my ($self) = @_;
620+ if ( $self->{dbh}
621+ && blessed($self->{dbh})
622+ && $self->{dbh}->can("disconnect") ) {
623+ PTDEBUG && _d('Disconnecting dbh', $self->{dbh}, $self->{name});
624+ $self->{dbh}->disconnect();
625+ }
626+ return;
627+}
628+
629+sub _d {
630+ my ($package, undef, $line) = caller 0;
631+ @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
632+ map { defined $_ ? $_ : 'undef' }
633+ @_;
634+ print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
635+}
636+
637+1;
638+}
639+# ###########################################################################
640+# End Cxn package
641+# ###########################################################################
642+
643+# ###########################################################################
644 # Transformers package
645 # This package is a copy without comments from the original. The original
646 # with comments and its test file can be found in the Bazaar repository at,
647@@ -3118,1909 +2854,6 @@
648 # ###########################################################################
649
650 # ###########################################################################
651-# SlowLogParser package
652-# This package is a copy without comments from the original. The original
653-# with comments and its test file can be found in the Bazaar repository at,
654-# lib/SlowLogParser.pm
655-# t/lib/SlowLogParser.t
656-# See https://launchpad.net/percona-toolkit for more information.
657-# ###########################################################################
658-{
659-package SlowLogParser;
660-
661-use strict;
662-use warnings FATAL => 'all';
663-use English qw(-no_match_vars);
664-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
665-
666-use Data::Dumper;
667-$Data::Dumper::Indent = 1;
668-$Data::Dumper::Sortkeys = 1;
669-$Data::Dumper::Quotekeys = 0;
670-
671-sub new {
672- my ( $class ) = @_;
673- my $self = {
674- pending => [],
675- };
676- return bless $self, $class;
677-}
678-
679-my $slow_log_ts_line = qr/^# Time: ([0-9: ]{15})/;
680-my $slow_log_uh_line = qr/# User\@Host: ([^\[]+|\[[^[]+\]).*?@ (\S*) \[(.*)\]/;
681-my $slow_log_hd_line = qr{
682- ^(?:
683- T[cC][pP]\s[pP]ort:\s+\d+ # case differs on windows/unix
684- |
685- [/A-Z].*mysqld,\sVersion.*(?:started\swith:|embedded\slibrary)
686- |
687- Time\s+Id\s+Command
688- ).*\n
689- }xm;
690-
691-sub parse_event {
692- my ( $self, %args ) = @_;
693- my @required_args = qw(next_event tell);
694- foreach my $arg ( @required_args ) {
695- die "I need a $arg argument" unless $args{$arg};
696- }
697- my ($next_event, $tell) = @args{@required_args};
698-
699- my $pending = $self->{pending};
700- local $INPUT_RECORD_SEPARATOR = ";\n#";
701- my $trimlen = length($INPUT_RECORD_SEPARATOR);
702- my $pos_in_log = $tell->();
703- my $stmt;
704-
705- EVENT:
706- while (
707- defined($stmt = shift @$pending)
708- or defined($stmt = $next_event->())
709- ) {
710- my @properties = ('cmd', 'Query', 'pos_in_log', $pos_in_log);
711- $pos_in_log = $tell->();
712-
713- if ( $stmt =~ s/$slow_log_hd_line//go ){ # Throw away header lines in log
714- my @chunks = split(/$INPUT_RECORD_SEPARATOR/o, $stmt);
715- if ( @chunks > 1 ) {
716- PTDEBUG && _d("Found multiple chunks");
717- $stmt = shift @chunks;
718- unshift @$pending, @chunks;
719- }
720- }
721-
722- $stmt = '#' . $stmt unless $stmt =~ m/\A#/;
723- $stmt =~ s/;\n#?\Z//;
724-
725-
726- my ($got_ts, $got_uh, $got_ac, $got_db, $got_set, $got_embed);
727- my $pos = 0;
728- my $len = length($stmt);
729- my $found_arg = 0;
730- LINE:
731- while ( $stmt =~ m/^(.*)$/mg ) { # /g is important, requires scalar match.
732- $pos = pos($stmt); # Be careful not to mess this up!
733- my $line = $1; # Necessary for /g and pos() to work.
734- PTDEBUG && _d($line);
735-
736- if ($line =~ m/^(?:#|use |SET (?:last_insert_id|insert_id|timestamp))/o) {
737-
738- if ( !$got_ts && (my ( $time ) = $line =~ m/$slow_log_ts_line/o)) {
739- PTDEBUG && _d("Got ts", $time);
740- push @properties, 'ts', $time;
741- ++$got_ts;
742- if ( !$got_uh
743- && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o )
744- ) {
745- PTDEBUG && _d("Got user, host, ip", $user, $host, $ip);
746- push @properties, 'user', $user, 'host', $host, 'ip', $ip;
747- ++$got_uh;
748- }
749- }
750-
751- elsif ( !$got_uh
752- && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o )
753- ) {
754- PTDEBUG && _d("Got user, host, ip", $user, $host, $ip);
755- push @properties, 'user', $user, 'host', $host, 'ip', $ip;
756- ++$got_uh;
757- }
758-
759- elsif (!$got_ac && $line =~ m/^# (?:administrator command:.*)$/) {
760- PTDEBUG && _d("Got admin command");
761- $line =~ s/^#\s+//; # string leading "# ".
762- push @properties, 'cmd', 'Admin', 'arg', $line;
763- push @properties, 'bytes', length($properties[-1]);
764- ++$found_arg;
765- ++$got_ac;
766- }
767-
768- elsif ( $line =~ m/^# +[A-Z][A-Za-z_]+: \S+/ ) { # Make the test cheap!
769- PTDEBUG && _d("Got some line with properties");
770-
771- if ( $line =~ m/Schema:\s+\w+: / ) {
772- PTDEBUG && _d('Removing empty Schema attrib');
773- $line =~ s/Schema:\s+//;
774- PTDEBUG && _d($line);
775- }
776-
777- my @temp = $line =~ m/(\w+):\s+(\S+|\Z)/g;
778- push @properties, @temp;
779- }
780-
781- elsif ( !$got_db && (my ( $db ) = $line =~ m/^use ([^;]+)/ ) ) {
782- PTDEBUG && _d("Got a default database:", $db);
783- push @properties, 'db', $db;
784- ++$got_db;
785- }
786-
787- elsif (!$got_set && (my ($setting) = $line =~ m/^SET\s+([^;]*)/)) {
788- PTDEBUG && _d("Got some setting:", $setting);
789- push @properties, split(/,|\s*=\s*/, $setting);
790- ++$got_set;
791- }
792-
793- if ( !$found_arg && $pos == $len ) {
794- PTDEBUG && _d("Did not find arg, looking for special cases");
795- local $INPUT_RECORD_SEPARATOR = ";\n"; # get next line
796- if ( defined(my $l = $next_event->()) ) {
797- if ( $l =~ /^\s*[A-Z][a-z_]+: / ) {
798- PTDEBUG && _d("Found NULL query before", $l);
799- local $INPUT_RECORD_SEPARATOR = ";\n#";
800- my $rest_of_event = $next_event->();
801- push @{$self->{pending}}, $l . $rest_of_event;
802- push @properties, 'cmd', 'Query', 'arg', '/* No query */';
803- push @properties, 'bytes', 0;
804- $found_arg++;
805- }
806- else {
807- chomp $l;
808- $l =~ s/^\s+//;
809- PTDEBUG && _d("Found admin statement", $l);
810- push @properties, 'cmd', 'Admin', 'arg', $l;
811- push @properties, 'bytes', length($properties[-1]);
812- $found_arg++;
813- }
814- }
815- else {
816- PTDEBUG && _d("I can't figure out what to do with this line");
817- next EVENT;
818- }
819- }
820- }
821- else {
822- PTDEBUG && _d("Got the query/arg line");
823- my $arg = substr($stmt, $pos - length($line));
824- push @properties, 'arg', $arg, 'bytes', length($arg);
825- if ( $args{misc} && $args{misc}->{embed}
826- && ( my ($e) = $arg =~ m/($args{misc}->{embed})/)
827- ) {
828- push @properties, $e =~ m/$args{misc}->{capture}/g;
829- }
830- last LINE;
831- }
832- }
833-
834- PTDEBUG && _d('Properties of event:', Dumper(\@properties));
835- my $event = { @properties };
836- if ( $args{stats} ) {
837- $args{stats}->{events_read}++;
838- $args{stats}->{events_parsed}++;
839- }
840- return $event;
841- } # EVENT
842-
843- @$pending = ();
844- $args{oktorun}->(0) if $args{oktorun};
845- return;
846-}
847-
848-sub _d {
849- my ($package, undef, $line) = caller 0;
850- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
851- map { defined $_ ? $_ : 'undef' }
852- @_;
853- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
854-}
855-
856-1;
857-}
858-# ###########################################################################
859-# End SlowLogParser package
860-# ###########################################################################
861-
862-# ###########################################################################
863-# EventAggregator package
864-# This package is a copy without comments from the original. The original
865-# with comments and its test file can be found in the Bazaar repository at,
866-# lib/EventAggregator.pm
867-# t/lib/EventAggregator.t
868-# See https://launchpad.net/percona-toolkit for more information.
869-# ###########################################################################
870-{
871-package EventAggregator;
872-
873-use strict;
874-use warnings FATAL => 'all';
875-use English qw(-no_match_vars);
876-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
877-
878-use List::Util qw(min max);
879-use Data::Dumper;
880-$Data::Dumper::Indent = 1;
881-$Data::Dumper::Sortkeys = 1;
882-$Data::Dumper::Quotekeys = 0;
883-
884-use constant BUCK_SIZE => 1.05;
885-use constant BASE_LOG => log(BUCK_SIZE);
886-use constant BASE_OFFSET => abs(1 - log(0.000001) / BASE_LOG); # 284.1617969
887-use constant NUM_BUCK => 1000;
888-use constant MIN_BUCK => .000001;
889-
890-my @buck_vals = map { bucket_value($_); } (0..NUM_BUCK-1);
891-
892-sub new {
893- my ( $class, %args ) = @_;
894- foreach my $arg ( qw(groupby worst) ) {
895- die "I need a $arg argument" unless $args{$arg};
896- }
897- my $attributes = $args{attributes} || {};
898- my $self = {
899- groupby => $args{groupby},
900- detect_attribs => scalar keys %$attributes == 0 ? 1 : 0,
901- all_attribs => [ keys %$attributes ],
902- ignore_attribs => {
903- map { $_ => $args{attributes}->{$_} }
904- grep { $_ ne $args{groupby} }
905- @{$args{ignore_attributes}}
906- },
907- attributes => {
908- map { $_ => $args{attributes}->{$_} }
909- grep { $_ ne $args{groupby} }
910- keys %$attributes
911- },
912- alt_attribs => {
913- map { $_ => make_alt_attrib(@{$args{attributes}->{$_}}) }
914- grep { $_ ne $args{groupby} }
915- keys %$attributes
916- },
917- worst => $args{worst},
918- unroll_limit => $args{unroll_limit} || 1000,
919- attrib_limit => $args{attrib_limit},
920- result_classes => {},
921- result_globals => {},
922- result_samples => {},
923- class_metrics => {},
924- global_metrics => {},
925- n_events => 0,
926- unrolled_loops => undef,
927- type_for => { %{$args{type_for} || { Query_time => 'num' }} },
928- };
929- return bless $self, $class;
930-}
931-
932-sub reset_aggregated_data {
933- my ( $self ) = @_;
934- foreach my $class ( values %{$self->{result_classes}} ) {
935- foreach my $attrib ( values %$class ) {
936- delete @{$attrib}{keys %$attrib};
937- }
938- }
939- foreach my $class ( values %{$self->{result_globals}} ) {
940- delete @{$class}{keys %$class};
941- }
942- delete @{$self->{result_samples}}{keys %{$self->{result_samples}}};
943- $self->{n_events} = 0;
944-}
945-
946-sub aggregate {
947- my ( $self, $event ) = @_;
948-
949- my $group_by = $event->{$self->{groupby}};
950- return unless defined $group_by;
951-
952- $self->{n_events}++;
953- PTDEBUG && _d('Event', $self->{n_events});
954-
955- return $self->{unrolled_loops}->($self, $event, $group_by)
956- if $self->{unrolled_loops};
957-
958- if ( $self->{n_events} <= $self->{unroll_limit} ) {
959-
960- $self->add_new_attributes($event) if $self->{detect_attribs};
961-
962- ATTRIB:
963- foreach my $attrib ( keys %{$self->{attributes}} ) {
964-
965- if ( !exists $event->{$attrib} ) {
966- PTDEBUG && _d("attrib doesn't exist in event:", $attrib);
967- my $alt_attrib = $self->{alt_attribs}->{$attrib}->($event);
968- PTDEBUG && _d('alt attrib:', $alt_attrib);
969- next ATTRIB unless $alt_attrib;
970- }
971-
972- GROUPBY:
973- foreach my $val ( ref $group_by ? @$group_by : ($group_by) ) {
974- my $class_attrib = $self->{result_classes}->{$val}->{$attrib} ||= {};
975- my $global_attrib = $self->{result_globals}->{$attrib} ||= {};
976- my $samples = $self->{result_samples};
977- my $handler = $self->{handlers}->{ $attrib };
978- if ( !$handler ) {
979- $handler = $self->make_handler(
980- event => $event,
981- attribute => $attrib,
982- alternates => $self->{attributes}->{$attrib},
983- worst => $self->{worst} eq $attrib,
984- );
985- $self->{handlers}->{$attrib} = $handler;
986- }
987- next GROUPBY unless $handler;
988- $samples->{$val} ||= $event; # Initialize to the first event.
989- $handler->($event, $class_attrib, $global_attrib, $samples, $group_by);
990- }
991- }
992- }
993- else {
994- $self->_make_unrolled_loops($event);
995- $self->{unrolled_loops}->($self, $event, $group_by);
996- }
997-
998- return;
999-}
1000-
1001-sub _make_unrolled_loops {
1002- my ( $self, $event ) = @_;
1003-
1004- my $group_by = $event->{$self->{groupby}};
1005-
1006- my @attrs = grep { $self->{handlers}->{$_} } keys %{$self->{attributes}};
1007- my $globs = $self->{result_globals}; # Global stats for each
1008- my $samples = $self->{result_samples};
1009-
1010- my @lines = (
1011- 'my ( $self, $event, $group_by ) = @_;',
1012- 'my ($val, $class, $global, $idx);',
1013- (ref $group_by ? ('foreach my $group_by ( @$group_by ) {') : ()),
1014- 'my $temp = $self->{result_classes}->{ $group_by }
1015- ||= { map { $_ => { } } @attrs };',
1016- '$samples->{$group_by} ||= $event;', # Always start with the first.
1017- );
1018- foreach my $i ( 0 .. $#attrs ) {
1019- push @lines, (
1020- '$class = $temp->{\'' . $attrs[$i] . '\'};',
1021- '$global = $globs->{\'' . $attrs[$i] . '\'};',
1022- $self->{unrolled_for}->{$attrs[$i]},
1023- );
1024- }
1025- if ( ref $group_by ) {
1026- push @lines, '}'; # Close the loop opened above
1027- }
1028- @lines = map { s/^/ /gm; $_ } @lines; # Indent for debugging
1029- unshift @lines, 'sub {';
1030- push @lines, '}';
1031-
1032- my $code = join("\n", @lines);
1033- PTDEBUG && _d('Unrolled subroutine:', @lines);
1034- my $sub = eval $code;
1035- die $EVAL_ERROR if $EVAL_ERROR;
1036- $self->{unrolled_loops} = $sub;
1037-
1038- return;
1039-}
1040-
1041-sub results {
1042- my ( $self ) = @_;
1043- return {
1044- classes => $self->{result_classes},
1045- globals => $self->{result_globals},
1046- samples => $self->{result_samples},
1047- };
1048-}
1049-
1050-sub set_results {
1051- my ( $self, $results ) = @_;
1052- $self->{result_classes} = $results->{classes};
1053- $self->{result_globals} = $results->{globals};
1054- $self->{result_samples} = $results->{samples};
1055- return;
1056-}
1057-
1058-sub stats {
1059- my ( $self ) = @_;
1060- return {
1061- classes => $self->{class_metrics},
1062- globals => $self->{global_metrics},
1063- };
1064-}
1065-
1066-sub attributes {
1067- my ( $self ) = @_;
1068- return $self->{type_for};
1069-}
1070-
1071-sub set_attribute_types {
1072- my ( $self, $attrib_types ) = @_;
1073- $self->{type_for} = $attrib_types;
1074- return;
1075-}
1076-
1077-sub type_for {
1078- my ( $self, $attrib ) = @_;
1079- return $self->{type_for}->{$attrib};
1080-}
1081-
1082-sub make_handler {
1083- my ( $self, %args ) = @_;
1084- my @required_args = qw(event attribute);
1085- foreach my $arg ( @required_args ) {
1086- die "I need a $arg argument" unless $args{$arg};
1087- }
1088- my ($event, $attrib) = @args{@required_args};
1089-
1090- my $val;
1091- eval { $val= $self->_get_value(%args); };
1092- if ( $EVAL_ERROR ) {
1093- PTDEBUG && _d("Cannot make", $attrib, "handler:", $EVAL_ERROR);
1094- return;
1095- }
1096- return unless defined $val; # can't determine type if it's undef
1097-
1098- my $float_re = qr{[+-]?(?:(?=\d|[.])\d+(?:[.])\d{0,})(?:E[+-]?\d+)?}i;
1099- my $type = $self->type_for($attrib) ? $self->type_for($attrib)
1100- : $attrib =~ m/_crc$/ ? 'string'
1101- : $val =~ m/^(?:\d+|$float_re)$/o ? 'num'
1102- : $val =~ m/^(?:Yes|No)$/ ? 'bool'
1103- : 'string';
1104- PTDEBUG && _d('Type for', $attrib, 'is', $type, '(sample:', $val, ')');
1105- $self->{type_for}->{$attrib} = $type;
1106-
1107- my @lines;
1108-
1109- my %track = (
1110- sum => $type =~ m/num|bool/ ? 1 : 0, # sum of values
1111- unq => $type =~ m/bool|string/ ? 1 : 0, # count of unique values seen
1112- all => $type eq 'num' ? 1 : 0, # all values in bucketed list
1113- );
1114-
1115- my $trf = ($type eq 'bool') ? q{(($val || '') eq 'Yes') ? 1 : 0}
1116- : undef;
1117- if ( $trf ) {
1118- push @lines, q{$val = } . $trf . ';';
1119- }
1120-
1121- if ( $attrib eq 'Query_time' ) {
1122- push @lines, (
1123- '$val =~ s/^(\d+(?:\.\d+)?).*/$1/;',
1124- '$event->{\''.$attrib.'\'} = $val;',
1125- );
1126- }
1127-
1128- if ( $type eq 'num' && $self->{attrib_limit} ) {
1129- push @lines, (
1130- "if ( \$val > $self->{attrib_limit} ) {",
1131- ' $val = $class->{last} ||= 0;',
1132- '}',
1133- '$class->{last} = $val;',
1134- );
1135- }
1136-
1137- my $lt = $type eq 'num' ? '<' : 'lt';
1138- my $gt = $type eq 'num' ? '>' : 'gt';
1139- foreach my $place ( qw($class $global) ) {
1140- my @tmp; # hold lines until PLACE placeholder is replaced
1141-
1142- push @tmp, '++PLACE->{cnt};'; # count of all values seen
1143-
1144- if ( $attrib =~ m/_crc$/ ) {
1145- push @tmp, '$val = $val % 1_000;';
1146- }
1147-
1148- push @tmp, (
1149- 'PLACE->{min} = $val if !defined PLACE->{min} || $val '
1150- . $lt . ' PLACE->{min};',
1151- );
1152- push @tmp, (
1153- 'PLACE->{max} = $val if !defined PLACE->{max} || $val '
1154- . $gt . ' PLACE->{max};',
1155- );
1156- if ( $track{sum} ) {
1157- push @tmp, 'PLACE->{sum} += $val;';
1158- }
1159-
1160- if ( $track{all} ) {
1161- push @tmp, (
1162- 'exists PLACE->{all} or PLACE->{all} = {};',
1163- '++PLACE->{all}->{ EventAggregator::bucket_idx($val) };',
1164- );
1165- }
1166-
1167- push @lines, map { s/PLACE/$place/g; $_ } @tmp;
1168- }
1169-
1170- if ( $track{unq} ) {
1171- push @lines, '++$class->{unq}->{$val}';
1172- }
1173-
1174- if ( $args{worst} ) {
1175- my $op = $type eq 'num' ? '>=' : 'ge';
1176- push @lines, (
1177- 'if ( $val ' . $op . ' ($class->{max} || 0) ) {',
1178- ' $samples->{$group_by} = $event;',
1179- '}',
1180- );
1181- }
1182-
1183- my @unrolled = (
1184- "\$val = \$event->{'$attrib'};",
1185-
1186- ( map { "\$val = \$event->{'$_'} unless defined \$val;" }
1187- grep { $_ ne $attrib } @{$args{alternates}}
1188- ),
1189-
1190- 'defined $val && do {',
1191- @lines,
1192- '};',
1193- );
1194- $self->{unrolled_for}->{$attrib} = join("\n", @unrolled);
1195-
1196- my @code = (
1197- 'sub {',
1198- 'my ( $event, $class, $global, $samples, $group_by ) = @_;',
1199- 'my ($val, $idx);',
1200-
1201- $self->{unrolled_for}->{$attrib},
1202-
1203- 'return;',
1204- '}',
1205- );
1206- $self->{code_for}->{$attrib} = join("\n", @code);
1207- PTDEBUG && _d($attrib, 'handler code:', $self->{code_for}->{$attrib});
1208- my $sub = eval $self->{code_for}->{$attrib};
1209- if ( $EVAL_ERROR ) {
1210- die "Failed to compile $attrib handler code: $EVAL_ERROR";
1211- }
1212-
1213- return $sub;
1214-}
1215-
1216-sub bucket_idx {
1217- my ( $val ) = @_;
1218- return 0 if $val < MIN_BUCK;
1219- my $idx = int(BASE_OFFSET + log($val)/BASE_LOG);
1220- return $idx > (NUM_BUCK-1) ? (NUM_BUCK-1) : $idx;
1221-}
1222-
1223-sub bucket_value {
1224- my ( $bucket ) = @_;
1225- return 0 if $bucket == 0;
1226- die "Invalid bucket: $bucket" if $bucket < 0 || $bucket > (NUM_BUCK-1);
1227- return (BUCK_SIZE**($bucket-1)) * MIN_BUCK;
1228-}
1229-
1230-{
1231- my @buck_tens;
1232- sub buckets_of {
1233- return @buck_tens if @buck_tens;
1234-
1235- my $start_bucket = 0;
1236- my @base10_starts = (0);
1237- map { push @base10_starts, (10**$_)*MIN_BUCK } (1..7);
1238-
1239- for my $base10_bucket ( 0..($#base10_starts-1) ) {
1240- my $next_bucket = bucket_idx( $base10_starts[$base10_bucket+1] );
1241- PTDEBUG && _d('Base 10 bucket', $base10_bucket, 'maps to',
1242- 'base 1.05 buckets', $start_bucket, '..', $next_bucket-1);
1243- for my $base1_05_bucket ($start_bucket..($next_bucket-1)) {
1244- $buck_tens[$base1_05_bucket] = $base10_bucket;
1245- }
1246- $start_bucket = $next_bucket;
1247- }
1248-
1249- map { $buck_tens[$_] = 7 } ($start_bucket..(NUM_BUCK-1));
1250-
1251- return @buck_tens;
1252- }
1253-}
1254-
1255-sub calculate_statistical_metrics {
1256- my ( $self, %args ) = @_;
1257- my $classes = $self->{result_classes};
1258- my $globals = $self->{result_globals};
1259- my $class_metrics = $self->{class_metrics};
1260- my $global_metrics = $self->{global_metrics};
1261- PTDEBUG && _d('Calculating statistical_metrics');
1262- foreach my $attrib ( keys %$globals ) {
1263- if ( exists $globals->{$attrib}->{all} ) {
1264- $global_metrics->{$attrib}
1265- = $self->_calc_metrics(
1266- $globals->{$attrib}->{all},
1267- $globals->{$attrib},
1268- );
1269- }
1270-
1271- foreach my $class ( keys %$classes ) {
1272- if ( exists $classes->{$class}->{$attrib}->{all} ) {
1273- $class_metrics->{$class}->{$attrib}
1274- = $self->_calc_metrics(
1275- $classes->{$class}->{$attrib}->{all},
1276- $classes->{$class}->{$attrib}
1277- );
1278- }
1279- }
1280- }
1281-
1282- return;
1283-}
1284-
1285-sub _calc_metrics {
1286- my ( $self, $vals, $args ) = @_;
1287- my $statistical_metrics = {
1288- pct_95 => 0,
1289- stddev => 0,
1290- median => 0,
1291- cutoff => undef,
1292- };
1293-
1294- return $statistical_metrics
1295- unless defined $vals && %$vals && $args->{cnt};
1296-
1297- my $n_vals = $args->{cnt};
1298- if ( $n_vals == 1 || $args->{max} == $args->{min} ) {
1299- my $v = $args->{max} || 0;
1300- my $bucket = int(6 + ( log($v > 0 ? $v : MIN_BUCK) / log(10)));
1301- $bucket = $bucket > 7 ? 7 : $bucket < 0 ? 0 : $bucket;
1302- return {
1303- pct_95 => $v,
1304- stddev => 0,
1305- median => $v,
1306- cutoff => $n_vals,
1307- };
1308- }
1309- elsif ( $n_vals == 2 ) {
1310- foreach my $v ( $args->{min}, $args->{max} ) {
1311- my $bucket = int(6 + ( log($v && $v > 0 ? $v : MIN_BUCK) / log(10)));
1312- $bucket = $bucket > 7 ? 7 : $bucket < 0 ? 0 : $bucket;
1313- }
1314- my $v = $args->{max} || 0;
1315- my $mean = (($args->{min} || 0) + $v) / 2;
1316- return {
1317- pct_95 => $v,
1318- stddev => sqrt((($v - $mean) ** 2) *2),
1319- median => $mean,
1320- cutoff => $n_vals,
1321- };
1322- }
1323-
1324- my $cutoff = $n_vals >= 10 ? int ( $n_vals * 0.95 ) : $n_vals;
1325- $statistical_metrics->{cutoff} = $cutoff;
1326-
1327- my $total_left = $n_vals;
1328- my $top_vals = $n_vals - $cutoff; # vals > 95th
1329- my $sum_excl = 0;
1330- my $sum = 0;
1331- my $sumsq = 0;
1332- my $mid = int($n_vals / 2);
1333- my $median = 0;
1334- my $prev = NUM_BUCK-1; # Used for getting median when $cutoff is odd
1335- my $bucket_95 = 0; # top bucket in 95th
1336-
1337- PTDEBUG && _d('total vals:', $total_left, 'top vals:', $top_vals, 'mid:', $mid);
1338-
1339- my @buckets = map { 0 } (0..NUM_BUCK-1);
1340- map { $buckets[$_] = $vals->{$_} } keys %$vals;
1341- $vals = \@buckets; # repoint vals from given hashref to our array
1342-
1343- BUCKET:
1344- for my $bucket ( reverse 0..(NUM_BUCK-1) ) {
1345- my $val = $vals->[$bucket];
1346- next BUCKET unless $val;
1347-
1348- $total_left -= $val;
1349- $sum_excl += $val;
1350- $bucket_95 = $bucket if !$bucket_95 && $sum_excl > $top_vals;
1351-
1352- if ( !$median && $total_left <= $mid ) {
1353- $median = (($cutoff % 2) || ($val > 1)) ? $buck_vals[$bucket]
1354- : ($buck_vals[$bucket] + $buck_vals[$prev]) / 2;
1355- }
1356-
1357- $sum += $val * $buck_vals[$bucket];
1358- $sumsq += $val * ($buck_vals[$bucket]**2);
1359- $prev = $bucket;
1360- }
1361-
1362- my $var = $sumsq/$n_vals - ( ($sum/$n_vals) ** 2 );
1363- my $stddev = $var > 0 ? sqrt($var) : 0;
1364- my $maxstdev = (($args->{max} || 0) - ($args->{min} || 0)) / 2;
1365- $stddev = $stddev > $maxstdev ? $maxstdev : $stddev;
1366-
1367- PTDEBUG && _d('sum:', $sum, 'sumsq:', $sumsq, 'stddev:', $stddev,
1368- 'median:', $median, 'prev bucket:', $prev,
1369- 'total left:', $total_left, 'sum excl', $sum_excl,
1370- 'bucket 95:', $bucket_95, $buck_vals[$bucket_95]);
1371-
1372- $statistical_metrics->{stddev} = $stddev;
1373- $statistical_metrics->{pct_95} = $buck_vals[$bucket_95];
1374- $statistical_metrics->{median} = $median;
1375-
1376- return $statistical_metrics;
1377-}
1378-
1379-sub metrics {
1380- my ( $self, %args ) = @_;
1381- foreach my $arg ( qw(attrib where) ) {
1382- die "I need a $arg argument" unless defined $args{$arg};
1383- }
1384- my $attrib = $args{attrib};
1385- my $where = $args{where};
1386-
1387- my $stats = $self->results();
1388- my $metrics = $self->stats();
1389- my $store = $stats->{classes}->{$where}->{$attrib};
1390- my $global_cnt = $stats->{globals}->{$attrib}->{cnt};
1391-
1392- return {
1393- cnt => $store->{cnt},
1394- pct => $global_cnt && $store->{cnt} ? $store->{cnt} / $global_cnt : 0,
1395- sum => $store->{sum},
1396- min => $store->{min},
1397- max => $store->{max},
1398- avg => $store->{sum} && $store->{cnt} ? $store->{sum} / $store->{cnt} : 0,
1399- median => $metrics->{classes}->{$where}->{$attrib}->{median} || 0,
1400- pct_95 => $metrics->{classes}->{$where}->{$attrib}->{pct_95} || 0,
1401- stddev => $metrics->{classes}->{$where}->{$attrib}->{stddev} || 0,
1402- };
1403-}
1404-
1405-sub top_events {
1406- my ( $self, %args ) = @_;
1407- my $classes = $self->{result_classes};
1408- my @sorted = reverse sort { # Sorted list of $groupby values
1409- $classes->{$a}->{$args{attrib}}->{$args{orderby}}
1410- <=> $classes->{$b}->{$args{attrib}}->{$args{orderby}}
1411- } grep {
1412- defined $classes->{$_}->{$args{attrib}}->{$args{orderby}}
1413- } keys %$classes;
1414- my @chosen; # top events
1415- my @other; # other events (< top)
1416- my ($total, $count) = (0, 0);
1417- foreach my $groupby ( @sorted ) {
1418- if (
1419- (!$args{total} || $total < $args{total} )
1420- && ( !$args{count} || $count < $args{count} )
1421- ) {
1422- push @chosen, [$groupby, 'top', $count+1];
1423- }
1424-
1425- elsif ( $args{ol_attrib} && (!$args{ol_freq}
1426- || $classes->{$groupby}->{$args{ol_attrib}}->{cnt} >= $args{ol_freq})
1427- ) {
1428- my $stats = $self->{class_metrics}->{$groupby}->{$args{ol_attrib}};
1429- if ( ($stats->{pct_95} || 0) >= $args{ol_limit} ) {
1430- push @chosen, [$groupby, 'outlier', $count+1];
1431- }
1432- else {
1433- push @other, [$groupby, 'misc', $count+1];
1434- }
1435- }
1436-
1437- else {
1438- push @other, [$groupby, 'misc', $count+1];
1439- }
1440-
1441- $total += $classes->{$groupby}->{$args{attrib}}->{$args{orderby}};
1442- $count++;
1443- }
1444- return \@chosen, \@other;
1445-}
1446-
1447-sub add_new_attributes {
1448- my ( $self, $event ) = @_;
1449- return unless $event;
1450-
1451- map {
1452- my $attrib = $_;
1453- $self->{attributes}->{$attrib} = [$attrib];
1454- $self->{alt_attribs}->{$attrib} = make_alt_attrib($attrib);
1455- push @{$self->{all_attribs}}, $attrib;
1456- PTDEBUG && _d('Added new attribute:', $attrib);
1457- }
1458- grep {
1459- $_ ne $self->{groupby}
1460- && !exists $self->{attributes}->{$_}
1461- && !exists $self->{ignore_attribs}->{$_}
1462- }
1463- keys %$event;
1464-
1465- return;
1466-}
1467-
1468-sub get_attributes {
1469- my ( $self ) = @_;
1470- return $self->{all_attribs};
1471-}
1472-
1473-sub events_processed {
1474- my ( $self ) = @_;
1475- return $self->{n_events};
1476-}
1477-
1478-sub make_alt_attrib {
1479- my ( @attribs ) = @_;
1480-
1481- my $attrib = shift @attribs; # Primary attribute.
1482- return sub {} unless @attribs; # No alternates.
1483-
1484- my @lines;
1485- push @lines, 'sub { my ( $event ) = @_; my $alt_attrib;';
1486- push @lines, map {
1487- "\$alt_attrib = '$_' if !defined \$alt_attrib "
1488- . "&& exists \$event->{'$_'};"
1489- } @attribs;
1490- push @lines, 'return $alt_attrib; }';
1491- PTDEBUG && _d('alt attrib sub for', $attrib, ':', @lines);
1492- my $sub = eval join("\n", @lines);
1493- die if $EVAL_ERROR;
1494- return $sub;
1495-}
1496-
1497-sub merge {
1498- my ( @ea_objs ) = @_;
1499- PTDEBUG && _d('Merging', scalar @ea_objs, 'ea');
1500- return unless scalar @ea_objs;
1501-
1502- my $ea1 = shift @ea_objs;
1503- my $r1 = $ea1->results;
1504- my $worst = $ea1->{worst}; # for merging, finding worst sample
1505-
1506- my %attrib_types = %{ $ea1->attributes() };
1507-
1508- foreach my $ea ( @ea_objs ) {
1509- die "EventAggregator objects have different groupby: "
1510- . "$ea1->{groupby} and $ea->{groupby}"
1511- unless $ea1->{groupby} eq $ea->{groupby};
1512- die "EventAggregator objects have different worst: "
1513- . "$ea1->{worst} and $ea->{worst}"
1514- unless $ea1->{worst} eq $ea->{worst};
1515-
1516- my $attrib_types = $ea->attributes();
1517- map {
1518- $attrib_types{$_} = $attrib_types->{$_}
1519- unless exists $attrib_types{$_};
1520- } keys %$attrib_types;
1521- }
1522-
1523- my $r_merged = {
1524- classes => {},
1525- globals => _deep_copy_attribs($r1->{globals}),
1526- samples => {},
1527- };
1528- map {
1529- $r_merged->{classes}->{$_}
1530- = _deep_copy_attribs($r1->{classes}->{$_});
1531-
1532- @{$r_merged->{samples}->{$_}}{keys %{$r1->{samples}->{$_}}}
1533- = values %{$r1->{samples}->{$_}};
1534- } keys %{$r1->{classes}};
1535-
1536- for my $i ( 0..$#ea_objs ) {
1537- PTDEBUG && _d('Merging ea obj', ($i + 1));
1538- my $r2 = $ea_objs[$i]->results;
1539-
1540- eval {
1541- CLASS:
1542- foreach my $class ( keys %{$r2->{classes}} ) {
1543- my $r1_class = $r_merged->{classes}->{$class};
1544- my $r2_class = $r2->{classes}->{$class};
1545-
1546- if ( $r1_class && $r2_class ) {
1547- CLASS_ATTRIB:
1548- foreach my $attrib ( keys %$r2_class ) {
1549- PTDEBUG && _d('merge', $attrib);
1550- if ( $r1_class->{$attrib} && $r2_class->{$attrib} ) {
1551- _add_attrib_vals($r1_class->{$attrib}, $r2_class->{$attrib});
1552- }
1553- elsif ( !$r1_class->{$attrib} ) {
1554- PTDEBUG && _d('copy', $attrib);
1555- $r1_class->{$attrib} =
1556- _deep_copy_attrib_vals($r2_class->{$attrib})
1557- }
1558- }
1559- }
1560- elsif ( !$r1_class ) {
1561- PTDEBUG && _d('copy class');
1562- $r_merged->{classes}->{$class} = _deep_copy_attribs($r2_class);
1563- }
1564-
1565- my $new_worst_sample;
1566- if ( $r_merged->{samples}->{$class} && $r2->{samples}->{$class} ) {
1567- if ( $r2->{samples}->{$class}->{$worst}
1568- > $r_merged->{samples}->{$class}->{$worst} ) {
1569- $new_worst_sample = $r2->{samples}->{$class}
1570- }
1571- }
1572- elsif ( !$r_merged->{samples}->{$class} ) {
1573- $new_worst_sample = $r2->{samples}->{$class};
1574- }
1575- if ( $new_worst_sample ) {
1576- PTDEBUG && _d('New worst sample:', $worst, '=',
1577- $new_worst_sample->{$worst}, 'item:', substr($class, 0, 100));
1578- my %new_sample;
1579- @new_sample{keys %$new_worst_sample}
1580- = values %$new_worst_sample;
1581- $r_merged->{samples}->{$class} = \%new_sample;
1582- }
1583- }
1584- };
1585- if ( $EVAL_ERROR ) {
1586- warn "Error merging class/sample: $EVAL_ERROR";
1587- }
1588-
1589- eval {
1590- GLOBAL_ATTRIB:
1591- PTDEBUG && _d('Merging global attributes');
1592- foreach my $attrib ( keys %{$r2->{globals}} ) {
1593- my $r1_global = $r_merged->{globals}->{$attrib};
1594- my $r2_global = $r2->{globals}->{$attrib};
1595-
1596- if ( $r1_global && $r2_global ) {
1597- PTDEBUG && _d('merge', $attrib);
1598- _add_attrib_vals($r1_global, $r2_global);
1599- }
1600- elsif ( !$r1_global ) {
1601- PTDEBUG && _d('copy', $attrib);
1602- $r_merged->{globals}->{$attrib}
1603- = _deep_copy_attrib_vals($r2_global);
1604- }
1605- }
1606- };
1607- if ( $EVAL_ERROR ) {
1608- warn "Error merging globals: $EVAL_ERROR";
1609- }
1610- }
1611-
1612- my $ea_merged = new EventAggregator(
1613- groupby => $ea1->{groupby},
1614- worst => $ea1->{worst},
1615- attributes => { map { $_=>[$_] } keys %attrib_types },
1616- );
1617- $ea_merged->set_results($r_merged);
1618- $ea_merged->set_attribute_types(\%attrib_types);
1619- return $ea_merged;
1620-}
1621-
1622-sub _add_attrib_vals {
1623- my ( $vals1, $vals2 ) = @_;
1624-
1625- foreach my $val ( keys %$vals1 ) {
1626- my $val1 = $vals1->{$val};
1627- my $val2 = $vals2->{$val};
1628-
1629- if ( (!ref $val1) && (!ref $val2) ) {
1630- die "undefined $val value" unless defined $val1 && defined $val2;
1631-
1632- my $is_num = exists $vals1->{sum} ? 1 : 0;
1633- if ( $val eq 'max' ) {
1634- if ( $is_num ) {
1635- $vals1->{$val} = $val1 > $val2 ? $val1 : $val2;
1636- }
1637- else {
1638- $vals1->{$val} = $val1 gt $val2 ? $val1 : $val2;
1639- }
1640- }
1641- elsif ( $val eq 'min' ) {
1642- if ( $is_num ) {
1643- $vals1->{$val} = $val1 < $val2 ? $val1 : $val2;
1644- }
1645- else {
1646- $vals1->{$val} = $val1 lt $val2 ? $val1 : $val2;
1647- }
1648- }
1649- else {
1650- $vals1->{$val} += $val2;
1651- }
1652- }
1653- elsif ( (ref $val1 eq 'ARRAY') && (ref $val2 eq 'ARRAY') ) {
1654- die "Empty $val arrayref" unless @$val1 && @$val2;
1655- my $n_buckets = (scalar @$val1) - 1;
1656- for my $i ( 0..$n_buckets ) {
1657- $vals1->{$val}->[$i] += $val2->[$i];
1658- }
1659- }
1660- elsif ( (ref $val1 eq 'HASH') && (ref $val2 eq 'HASH') ) {
1661- die "Empty $val hashref" unless %$val1 and %$val2;
1662- map { $vals1->{$val}->{$_} += $val2->{$_} } keys %$val2;
1663- }
1664- else {
1665- PTDEBUG && _d('vals1:', Dumper($vals1));
1666- PTDEBUG && _d('vals2:', Dumper($vals2));
1667- die "$val type mismatch";
1668- }
1669- }
1670-
1671- return;
1672-}
1673-
1674-sub _deep_copy_attribs {
1675- my ( $attribs ) = @_;
1676- my $copy = {};
1677- foreach my $attrib ( keys %$attribs ) {
1678- $copy->{$attrib} = _deep_copy_attrib_vals($attribs->{$attrib});
1679- }
1680- return $copy;
1681-}
1682-
1683-sub _deep_copy_attrib_vals {
1684- my ( $vals ) = @_;
1685- my $copy;
1686- if ( ref $vals eq 'HASH' ) {
1687- $copy = {};
1688- foreach my $val ( keys %$vals ) {
1689- if ( my $ref_type = ref $val ) {
1690- if ( $ref_type eq 'ARRAY' ) {
1691- my $n_elems = (scalar @$val) - 1;
1692- $copy->{$val} = [ map { undef } ( 0..$n_elems ) ];
1693- for my $i ( 0..$n_elems ) {
1694- $copy->{$val}->[$i] = $vals->{$val}->[$i];
1695- }
1696- }
1697- elsif ( $ref_type eq 'HASH' ) {
1698- $copy->{$val} = {};
1699- map { $copy->{$val}->{$_} += $vals->{$val}->{$_} }
1700- keys %{$vals->{$val}}
1701- }
1702- else {
1703- die "I don't know how to deep copy a $ref_type reference";
1704- }
1705- }
1706- else {
1707- $copy->{$val} = $vals->{$val};
1708- }
1709- }
1710- }
1711- else {
1712- $copy = $vals;
1713- }
1714- return $copy;
1715-}
1716-
1717-sub _get_value {
1718- my ( $self, %args ) = @_;
1719- my ($event, $attrib, $alts) = @args{qw(event attribute alternates)};
1720- return unless $event && $attrib;
1721-
1722- my $value;
1723- if ( exists $event->{$attrib} ) {
1724- $value = $event->{$attrib};
1725- }
1726- elsif ( $alts ) {
1727- my $found_value = 0;
1728- foreach my $alt_attrib( @$alts ) {
1729- if ( exists $event->{$alt_attrib} ) {
1730- $value = $event->{$alt_attrib};
1731- $found_value = 1;
1732- last;
1733- }
1734- }
1735- die "Event does not have attribute $attrib or any of its alternates"
1736- unless $found_value;
1737- }
1738- else {
1739- die "Event does not have attribute $attrib and there are no alterantes";
1740- }
1741-
1742- return $value;
1743-}
1744-
1745-sub _d {
1746- my ($package, undef, $line) = caller 0;
1747- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1748- map { defined $_ ? $_ : 'undef' }
1749- @_;
1750- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1751-}
1752-
1753-1;
1754-}
1755-# ###########################################################################
1756-# End EventAggregator package
1757-# ###########################################################################
1758-
1759-# ###########################################################################
1760-# QueryParser package
1761-# This package is a copy without comments from the original. The original
1762-# with comments and its test file can be found in the Bazaar repository at,
1763-# lib/QueryParser.pm
1764-# t/lib/QueryParser.t
1765-# See https://launchpad.net/percona-toolkit for more information.
1766-# ###########################################################################
1767-{
1768-package QueryParser;
1769-
1770-use strict;
1771-use warnings FATAL => 'all';
1772-use English qw(-no_match_vars);
1773-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1774-
1775-our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/;
1776-our $tbl_regex = qr{
1777- \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names
1778- \b\s*
1779- \(? # Optional paren around tables
1780- ($tbl_ident
1781- (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )*
1782- )
1783- }xio;
1784-our $has_derived = qr{
1785- \b(?:FROM|JOIN|,)
1786- \s*\(\s*SELECT
1787- }xi;
1788-
1789-our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i;
1790-
1791-our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i;
1792-
1793-sub new {
1794- my ( $class ) = @_;
1795- bless {}, $class;
1796-}
1797-
1798-sub get_tables {
1799- my ( $self, $query ) = @_;
1800- return unless $query;
1801- PTDEBUG && _d('Getting tables for', $query);
1802-
1803- my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i;
1804- if ( $ddl_stmt ) {
1805- PTDEBUG && _d('Special table type:', $ddl_stmt);
1806- $query =~ s/IF\s+(?:NOT\s+)?EXISTS//i;
1807- if ( $query =~ m/$ddl_stmt DATABASE\b/i ) {
1808- PTDEBUG && _d('Query alters a database, not a table');
1809- return ();
1810- }
1811- if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) {
1812- my ($select) = $query =~ m/\b(SELECT\b.+)/is;
1813- PTDEBUG && _d('CREATE TABLE ... SELECT:', $select);
1814- return $self->get_tables($select);
1815- }
1816- my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i;
1817- PTDEBUG && _d('Matches table:', $tbl);
1818- return ($tbl);
1819- }
1820-
1821- $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
1822-
1823- if ( $query =~ s/^\s*LOCK TABLES\s+//i ) {
1824- PTDEBUG && _d('Special table type: LOCK TABLES');
1825- $query =~ s/\s+(?:READ(?:\s+LOCAL)?|WRITE)\s*//gi;
1826- PTDEBUG && _d('Locked tables:', $query);
1827- $query = "FROM $query";
1828- }
1829-
1830- $query =~ s/\\["']//g; # quoted strings
1831- $query =~ s/".*?"/?/sg; # quoted strings
1832- $query =~ s/'.*?'/?/sg; # quoted strings
1833-
1834- my @tables;
1835- foreach my $tbls ( $query =~ m/$tbl_regex/gio ) {
1836- PTDEBUG && _d('Match tables:', $tbls);
1837-
1838- next if $tbls =~ m/\ASELECT\b/i;
1839-
1840- foreach my $tbl ( split(',', $tbls) ) {
1841- $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio;
1842-
1843- if ( $tbl !~ m/[a-zA-Z]/ ) {
1844- PTDEBUG && _d('Skipping suspicious table name:', $tbl);
1845- next;
1846- }
1847-
1848- push @tables, $tbl;
1849- }
1850- }
1851- return @tables;
1852-}
1853-
1854-sub has_derived_table {
1855- my ( $self, $query ) = @_;
1856- my $match = $query =~ m/$has_derived/;
1857- PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table');
1858- return $match;
1859-}
1860-
1861-sub get_aliases {
1862- my ( $self, $query, $list ) = @_;
1863-
1864- my $result = {
1865- DATABASE => {},
1866- TABLE => {},
1867- };
1868- return $result unless $query;
1869-
1870- $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
1871-
1872- $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig;
1873-
1874- my @tbl_refs;
1875- my ($tbl_refs, $from) = $query =~ m{
1876- (
1877- (FROM|INTO|UPDATE)\b\s* # Keyword before table refs
1878- .+? # Table refs
1879- )
1880- (?:\s+|\z) # If the query does not end with the table
1881- (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs
1882- }ix;
1883-
1884- if ( $tbl_refs ) {
1885-
1886- if ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
1887- $tbl_refs =~ s/\([^\)]+\)\s*//;
1888- }
1889-
1890- PTDEBUG && _d('tbl refs:', $tbl_refs);
1891-
1892- my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i;
1893-
1894- my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i;
1895-
1896- $tbl_refs =~ s/ = /=/g;
1897-
1898- while (
1899- $tbl_refs =~ m{
1900- $before_tbl\b\s*
1901- ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? )
1902- \s*$after_tbl
1903- }xgio )
1904- {
1905- my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3);
1906- PTDEBUG && _d('Match table:', $tbl_ref);
1907- push @tbl_refs, $tbl_ref;
1908- $alias = $self->trim_identifier($alias);
1909-
1910- if ( $tbl_ref =~ m/^AS\s+\w+/i ) {
1911- PTDEBUG && _d('Subquery', $tbl_ref);
1912- $result->{TABLE}->{$alias} = undef;
1913- next;
1914- }
1915-
1916- my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/;
1917- $db = $self->trim_identifier($db);
1918- $tbl = $self->trim_identifier($tbl);
1919- $result->{TABLE}->{$alias || $tbl} = $tbl;
1920- $result->{DATABASE}->{$tbl} = $db if $db;
1921- }
1922- }
1923- else {
1924- PTDEBUG && _d("No tables ref in", $query);
1925- }
1926-
1927- if ( $list ) {
1928- return \@tbl_refs;
1929- }
1930- else {
1931- return $result;
1932- }
1933-}
1934-
1935-sub split {
1936- my ( $self, $query ) = @_;
1937- return unless $query;
1938- $query = $self->clean_query($query);
1939- PTDEBUG && _d('Splitting', $query);
1940-
1941- my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i;
1942-
1943- my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query);
1944-
1945- my @statements;
1946- if ( @split_statements == 1 ) {
1947- push @statements, $query;
1948- }
1949- else {
1950- for ( my $i = 0; $i <= $#split_statements; $i += 2 ) {
1951- push @statements, $split_statements[$i].$split_statements[$i+1];
1952-
1953- if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) {
1954- $statements[-2] .= pop @statements;
1955- }
1956- }
1957- }
1958-
1959- PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements);
1960- return @statements;
1961-}
1962-
1963-sub clean_query {
1964- my ( $self, $query ) = @_;
1965- return unless $query;
1966- $query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */
1967- $query =~ s/^\s+//; # Remove leading spaces
1968- $query =~ s/\s+$//; # Remove trailing spaces
1969- $query =~ s/\s{2,}/ /g; # Remove extra spaces
1970- return $query;
1971-}
1972-
1973-sub split_subquery {
1974- my ( $self, $query ) = @_;
1975- return unless $query;
1976- $query = $self->clean_query($query);
1977- $query =~ s/;$//;
1978-
1979- my @subqueries;
1980- my $sqno = 0; # subquery number
1981- my $pos = 0;
1982- while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) {
1983- $pos = pos($query);
1984- my $word = $1;
1985- PTDEBUG && _d($word, $sqno);
1986- if ( $word =~ m/^\(?SELECT\b/i ) {
1987- my $start_pos = $pos - length($word) - 1;
1988- if ( $start_pos ) {
1989- $sqno++;
1990- PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos);
1991- $subqueries[$sqno] = {
1992- start_pos => $start_pos,
1993- end_pos => 0,
1994- len => 0,
1995- words => [$word],
1996- lp => 1, # left parentheses
1997- rp => 0, # right parentheses
1998- done => 0,
1999- };
2000- }
2001- else {
2002- PTDEBUG && _d('Main SELECT at pos 0');
2003- }
2004- }
2005- else {
2006- next unless $sqno; # next unless we're in a subquery
2007- PTDEBUG && _d('In subquery', $sqno);
2008- my $sq = $subqueries[$sqno];
2009- if ( $sq->{done} ) {
2010- PTDEBUG && _d('This subquery is done; SQL is for',
2011- ($sqno - 1 ? "subquery $sqno" : "the main SELECT"));
2012- next;
2013- }
2014- push @{$sq->{words}}, $word;
2015- my $lp = ($word =~ tr/\(//) || 0;
2016- my $rp = ($word =~ tr/\)//) || 0;
2017- PTDEBUG && _d('parentheses left', $lp, 'right', $rp);
2018- if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) {
2019- my $end_pos = $pos - 1;
2020- PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos);
2021- $sq->{end_pos} = $end_pos;
2022- $sq->{len} = $end_pos - $sq->{start_pos};
2023- }
2024- }
2025- }
2026-
2027- for my $i ( 1..$#subqueries ) {
2028- my $sq = $subqueries[$i];
2029- next unless $sq;
2030- $sq->{sql} = join(' ', @{$sq->{words}});
2031- substr $query,
2032- $sq->{start_pos} + 1, # +1 for (
2033- $sq->{len} - 1, # -1 for )
2034- "__subquery_$i";
2035- }
2036-
2037- return $query, map { $_->{sql} } grep { defined $_ } @subqueries;
2038-}
2039-
2040-sub query_type {
2041- my ( $self, $query, $qr ) = @_;
2042- my ($type, undef) = $qr->distill_verbs($query);
2043- my $rw;
2044- if ( $type =~ m/^SELECT\b/ ) {
2045- $rw = 'read';
2046- }
2047- elsif ( $type =~ m/^$data_manip_stmts\b/
2048- || $type =~ m/^$data_def_stmts\b/ ) {
2049- $rw = 'write'
2050- }
2051-
2052- return {
2053- type => $type,
2054- rw => $rw,
2055- }
2056-}
2057-
2058-sub get_columns {
2059- my ( $self, $query ) = @_;
2060- my $cols = [];
2061- return $cols unless $query;
2062- my $cols_def;
2063-
2064- if ( $query =~ m/^SELECT/i ) {
2065- $query =~ s/
2066- ^SELECT\s+
2067- (?:ALL
2068- |DISTINCT
2069- |DISTINCTROW
2070- |HIGH_PRIORITY
2071- |STRAIGHT_JOIN
2072- |SQL_SMALL_RESULT
2073- |SQL_BIG_RESULT
2074- |SQL_BUFFER_RESULT
2075- |SQL_CACHE
2076- |SQL_NO_CACHE
2077- |SQL_CALC_FOUND_ROWS
2078- )\s+
2079- /SELECT /xgi;
2080- ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i;
2081- }
2082- elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
2083- ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i;
2084- }
2085-
2086- PTDEBUG && _d('Columns:', $cols_def);
2087- if ( $cols_def ) {
2088- @$cols = split(',', $cols_def);
2089- map {
2090- my $col = $_;
2091- $col = s/^\s+//g;
2092- $col = s/\s+$//g;
2093- $col;
2094- } @$cols;
2095- }
2096-
2097- return $cols;
2098-}
2099-
2100-sub parse {
2101- my ( $self, $query ) = @_;
2102- return unless $query;
2103- my $parsed = {};
2104-
2105- $query =~ s/\n/ /g;
2106- $query = $self->clean_query($query);
2107-
2108- $parsed->{query} = $query,
2109- $parsed->{tables} = $self->get_aliases($query, 1);
2110- $parsed->{columns} = $self->get_columns($query);
2111-
2112- my ($type) = $query =~ m/^(\w+)/;
2113- $parsed->{type} = lc $type;
2114-
2115-
2116- $parsed->{sub_queries} = [];
2117-
2118- return $parsed;
2119-}
2120-
2121-sub extract_tables {
2122- my ( $self, %args ) = @_;
2123- my $query = $args{query};
2124- my $default_db = $args{default_db};
2125- my $q = $self->{Quoter} || $args{Quoter};
2126- return unless $query;
2127- PTDEBUG && _d('Extracting tables');
2128- my @tables;
2129- my %seen;
2130- foreach my $db_tbl ( $self->get_tables($query) ) {
2131- next unless $db_tbl;
2132- next if $seen{$db_tbl}++; # Unique-ify for issue 337.
2133- my ( $db, $tbl ) = $q->split_unquote($db_tbl);
2134- push @tables, [ $db || $default_db, $tbl ];
2135- }
2136- return @tables;
2137-}
2138-
2139-sub trim_identifier {
2140- my ($self, $str) = @_;
2141- return unless defined $str;
2142- $str =~ s/`//g;
2143- $str =~ s/^\s+//;
2144- $str =~ s/\s+$//;
2145- return $str;
2146-}
2147-
2148-sub _d {
2149- my ($package, undef, $line) = caller 0;
2150- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2151- map { defined $_ ? $_ : 'undef' }
2152- @_;
2153- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2154-}
2155-
2156-1;
2157-}
2158-# ###########################################################################
2159-# End QueryParser package
2160-# ###########################################################################
2161-
2162-# ###########################################################################
2163-# QueryRewriter package
2164-# This package is a copy without comments from the original. The original
2165-# with comments and its test file can be found in the Bazaar repository at,
2166-# lib/QueryRewriter.pm
2167-# t/lib/QueryRewriter.t
2168-# See https://launchpad.net/percona-toolkit for more information.
2169-# ###########################################################################
2170-{
2171-package QueryRewriter;
2172-
2173-use strict;
2174-use warnings FATAL => 'all';
2175-use English qw(-no_match_vars);
2176-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
2177-
2178-our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT
2179- |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi;
2180-my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly!
2181-my $bal;
2182-$bal = qr/
2183- \(
2184- (?:
2185- (?> [^()]+ ) # Non-parens without backtracking
2186- |
2187- (??{ $bal }) # Group with matching parens
2188- )*
2189- \)
2190- /x;
2191-
2192-my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
2193-my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
2194-my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
2195-my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
2196-
2197-
2198-sub new {
2199- my ( $class, %args ) = @_;
2200- my $self = { %args };
2201- return bless $self, $class;
2202-}
2203-
2204-sub strip_comments {
2205- my ( $self, $query ) = @_;
2206- return unless $query;
2207- $query =~ s/$olc_re//go;
2208- $query =~ s/$mlc_re//go;
2209- if ( $query =~ m/$vlc_rf/i ) { # contains show + version
2210- $query =~ s/$vlc_re//go;
2211- }
2212- return $query;
2213-}
2214-
2215-sub shorten {
2216- my ( $self, $query, $length ) = @_;
2217- $query =~ s{
2218- \A(
2219- (?:INSERT|REPLACE)
2220- (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)?
2221- (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\)
2222- )
2223- \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)}
2224- {$1 /*... omitted ...*/$2}xsi;
2225-
2226- return $query unless $query =~ m/IN\s*\(\s*(?!select)/i;
2227-
2228- my $last_length = 0;
2229- my $query_length = length($query);
2230- while (
2231- $length > 0
2232- && $query_length > $length
2233- && $query_length < ( $last_length || $query_length + 1 )
2234- ) {
2235- $last_length = $query_length;
2236- $query =~ s{
2237- (\bIN\s*\() # The opening of an IN list
2238- ([^\)]+) # Contents of the list, assuming no item contains paren
2239- (?=\)) # Close of the list
2240- }
2241- {
2242- $1 . __shorten($2)
2243- }gexsi;
2244- }
2245-
2246- return $query;
2247-}
2248-
2249-sub __shorten {
2250- my ( $snippet ) = @_;
2251- my @vals = split(/,/, $snippet);
2252- return $snippet unless @vals > 20;
2253- my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items
2254- return
2255- join(',', @keep)
2256- . "/*... omitted "
2257- . scalar(@vals)
2258- . " items ...*/";
2259-}
2260-
2261-sub fingerprint {
2262- my ( $self, $query ) = @_;
2263-
2264- $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query
2265- && return 'mysqldump';
2266- $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # pt-table-checksum, etc query
2267- && return 'percona-toolkit';
2268- $query =~ m/\Aadministrator command: /
2269- && return $query;
2270- $query =~ m/\A\s*(call\s+\S+)\(/i
2271- && return lc($1); # Warning! $1 used, be careful.
2272- if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) {
2273- $query = $beginning; # Shorten multi-value INSERT statements ASAP
2274- }
2275-
2276- $query =~ s/$olc_re//go;
2277- $query =~ s/$mlc_re//go;
2278- $query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
2279- && return $query;
2280-
2281- $query =~ s/\\["']//g; # quoted strings
2282- $query =~ s/".*?"/?/sg; # quoted strings
2283- $query =~ s/'.*?'/?/sg; # quoted strings
2284-
2285- if ( $self->{match_md5_checksums} ) {
2286- $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
2287- }
2288-
2289- if ( !$self->{match_embedded_numbers} ) {
2290- $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
2291- }
2292- else {
2293- $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
2294- }
2295-
2296- if ( $self->{match_md5_checksums} ) {
2297- $query =~ s/[xb+-]\?/?/g;
2298- }
2299- else {
2300- $query =~ s/[xb.+-]\?/?/g;
2301- }
2302-
2303- $query =~ s/\A\s+//; # Chop off leading whitespace
2304- chomp $query; # Kill trailing whitespace
2305- $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace
2306- $query = lc $query;
2307- $query =~ s/\bnull\b/?/g; # Get rid of NULLs
2308- $query =~ s{ # Collapse IN and VALUES lists
2309- \b(in|values?)(?:[\s,]*\([\s?,]*\))+
2310- }
2311- {$1(?+)}gx;
2312- $query =~ s{ # Collapse UNION
2313- \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+
2314- }
2315- {$1 /*repeat$2*/}xg;
2316- $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
2317-
2318- if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause
2319- 1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query;
2320- }
2321-
2322- return $query;
2323-}
2324-
2325-sub distill_verbs {
2326- my ( $self, $query ) = @_;
2327-
2328- $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1";
2329- $query =~ m/\A\s*use\s+/ && return "USE";
2330- $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
2331- $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
2332-
2333- if ( $query =~ m/\Aadministrator command:/ ) {
2334- $query =~ s/administrator command:/ADMIN/;
2335- $query = uc $query;
2336- return $query;
2337- }
2338-
2339- $query = $self->strip_comments($query);
2340-
2341- if ( $query =~ m/\A\s*SHOW\s+/i ) {
2342- PTDEBUG && _d($query);
2343-
2344- $query = uc $query;
2345- $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
2346- $query =~ s/\s+COUNT[^)]+\)//g;
2347-
2348- $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
2349-
2350- $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s;
2351- $query =~ s/\s+/ /g;
2352- PTDEBUG && _d($query);
2353- return $query;
2354- }
2355-
2356- eval $QueryParser::data_def_stmts;
2357- eval $QueryParser::tbl_ident;
2358- my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
2359- if ( $dds) {
2360- my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
2361- $obj = uc $obj if $obj;
2362- PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
2363- my ($db_or_tbl)
2364- = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i;
2365- PTDEBUG && _d('Matches db or table:', $db_or_tbl);
2366- return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl;
2367- }
2368-
2369- my @verbs = $query =~ m/\b($verbs)\b/gio;
2370- @verbs = do {
2371- my $last = '';
2372- grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs;
2373- };
2374-
2375- if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) {
2376- PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]);
2377- my $union = grep { $_ eq 'UNION' } @verbs;
2378- @verbs = $union ? qw(SELECT UNION) : qw(SELECT);
2379- }
2380-
2381- my $verb_str = join(q{ }, @verbs);
2382- return $verb_str;
2383-}
2384-
2385-sub __distill_tables {
2386- my ( $self, $query, $table, %args ) = @_;
2387- my $qp = $args{QueryParser} || $self->{QueryParser};
2388- die "I need a QueryParser argument" unless $qp;
2389-
2390- my @tables = map {
2391- $_ =~ s/`//g;
2392- $_ =~ s/(_?)[0-9]+/$1?/g;
2393- $_;
2394- } grep { defined $_ } $qp->get_tables($query);
2395-
2396- push @tables, $table if $table;
2397-
2398- @tables = do {
2399- my $last = '';
2400- grep { my $pass = $_ ne $last; $last = $_; $pass } @tables;
2401- };
2402-
2403- return @tables;
2404-}
2405-
2406-sub distill {
2407- my ( $self, $query, %args ) = @_;
2408-
2409- if ( $args{generic} ) {
2410- my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/;
2411- return '' unless $cmd;
2412- $query = (uc $cmd) . ($arg ? " $arg" : '');
2413- }
2414- else {
2415- my ($verbs, $table) = $self->distill_verbs($query, %args);
2416-
2417- if ( $verbs && $verbs =~ m/^SHOW/ ) {
2418- my %alias_for = qw(
2419- SCHEMA DATABASE
2420- KEYS INDEX
2421- INDEXES INDEX
2422- );
2423- map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
2424- $query = $verbs;
2425- }
2426- else {
2427- my @tables = $self->__distill_tables($query, $table, %args);
2428- $query = join(q{ }, $verbs, @tables);
2429- }
2430- }
2431-
2432- if ( $args{trf} ) {
2433- $query = $args{trf}->($query, %args);
2434- }
2435-
2436- return $query;
2437-}
2438-
2439-sub convert_to_select {
2440- my ( $self, $query ) = @_;
2441- return unless $query;
2442-
2443- return if $query =~ m/=\s*\(\s*SELECT /i;
2444-
2445- $query =~ s{
2446- \A.*?
2447- update(?:\s+(?:low_priority|ignore))?\s+(.*?)
2448- \s+set\b(.*?)
2449- (?:\s*where\b(.*?))?
2450- (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)?
2451- \Z
2452- }
2453- {__update_to_select($1, $2, $3, $4)}exsi
2454- || $query =~ s{
2455- \A.*?
2456- (?:insert(?:\s+ignore)?|replace)\s+
2457- .*?\binto\b(.*?)\(([^\)]+)\)\s*
2458- values?\s*(\(.*?\))\s*
2459- (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
2460- \Z
2461- }
2462- {__insert_to_select($1, $2, $3)}exsi
2463- || $query =~ s{
2464- \A.*?
2465- (?:insert(?:\s+ignore)?|replace)\s+
2466- (?:.*?\binto)\b(.*?)\s*
2467- set\s+(.*?)\s*
2468- (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
2469- \Z
2470- }
2471- {__insert_to_select_with_set($1, $2)}exsi
2472- || $query =~ s{
2473- \A.*?
2474- delete\s+(.*?)
2475- \bfrom\b(.*)
2476- \Z
2477- }
2478- {__delete_to_select($1, $2)}exsi;
2479- $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si;
2480- $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism;
2481- return $query;
2482-}
2483-
2484-sub convert_select_list {
2485- my ( $self, $query ) = @_;
2486- $query =~ s{
2487- \A\s*select(.*?)\bfrom\b
2488- }
2489- {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi;
2490- return $query;
2491-}
2492-
2493-sub __delete_to_select {
2494- my ( $delete, $join ) = @_;
2495- if ( $join =~ m/\bjoin\b/ ) {
2496- return "select 1 from $join";
2497- }
2498- return "select * from $join";
2499-}
2500-
2501-sub __insert_to_select {
2502- my ( $tbl, $cols, $vals ) = @_;
2503- PTDEBUG && _d('Args:', @_);
2504- my @cols = split(/,/, $cols);
2505- PTDEBUG && _d('Cols:', @cols);
2506- $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens
2507- my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g;
2508- PTDEBUG && _d('Vals:', @vals);
2509- if ( @cols == @vals ) {
2510- return "select * from $tbl where "
2511- . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols));
2512- }
2513- else {
2514- return "select * from $tbl limit 1";
2515- }
2516-}
2517-
2518-sub __insert_to_select_with_set {
2519- my ( $from, $set ) = @_;
2520- $set =~ s/,/ and /g;
2521- return "select * from $from where $set ";
2522-}
2523-
2524-sub __update_to_select {
2525- my ( $from, $set, $where, $limit ) = @_;
2526- return "select $set from $from "
2527- . ( $where ? "where $where" : '' )
2528- . ( $limit ? " $limit " : '' );
2529-}
2530-
2531-sub wrap_in_derived {
2532- my ( $self, $query ) = @_;
2533- return unless $query;
2534- return $query =~ m/\A\s*select/i
2535- ? "select 1 from ($query) as x limit 1"
2536- : $query;
2537-}
2538-
2539-sub _d {
2540- my ($package, undef, $line) = caller 0;
2541- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2542- map { defined $_ ? $_ : 'undef' }
2543- @_;
2544- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2545-}
2546-
2547-1;
2548-}
2549-# ###########################################################################
2550-# End QueryRewriter package
2551-# ###########################################################################
2552-
2553-# ###########################################################################
2554 # Daemon package
2555 # This package is a copy without comments from the original. The original
2556 # with comments and its test file can be found in the Bazaar repository at,
2557@@ -5223,3403 +3056,6 @@
2558 # ###########################################################################
2559
2560 # ###########################################################################
2561-# ChangeHandler package
2562-# This package is a copy without comments from the original. The original
2563-# with comments and its test file can be found in the Bazaar repository at,
2564-# lib/ChangeHandler.pm
2565-# t/lib/ChangeHandler.t
2566-# See https://launchpad.net/percona-toolkit for more information.
2567-# ###########################################################################
2568-{
2569-package ChangeHandler;
2570-
2571-use strict;
2572-use warnings FATAL => 'all';
2573-use English qw(-no_match_vars);
2574-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
2575-
2576-my $DUPE_KEY = qr/Duplicate entry/;
2577-our @ACTIONS = qw(DELETE REPLACE INSERT UPDATE);
2578-
2579-sub new {
2580- my ( $class, %args ) = @_;
2581- foreach my $arg ( qw(Quoter left_db left_tbl right_db right_tbl
2582- replace queue) ) {
2583- die "I need a $arg argument" unless defined $args{$arg};
2584- }
2585- my $q = $args{Quoter};
2586-
2587- my $self = {
2588- hex_blob => 1,
2589- %args,
2590- left_db_tbl => $q->quote(@args{qw(left_db left_tbl)}),
2591- right_db_tbl => $q->quote(@args{qw(right_db right_tbl)}),
2592- };
2593-
2594- $self->{src_db_tbl} = $self->{left_db_tbl};
2595- $self->{dst_db_tbl} = $self->{right_db_tbl};
2596-
2597- map { $self->{$_} = [] } @ACTIONS;
2598- $self->{changes} = { map { $_ => 0 } @ACTIONS };
2599-
2600- return bless $self, $class;
2601-}
2602-
2603-sub fetch_back {
2604- my ( $self, $dbh ) = @_;
2605- $self->{fetch_back} = $dbh;
2606- PTDEBUG && _d('Set fetch back dbh', $dbh);
2607- return;
2608-}
2609-
2610-sub set_src {
2611- my ( $self, $src, $dbh ) = @_;
2612- die "I need a src argument" unless $src;
2613- if ( lc $src eq 'left' ) {
2614- $self->{src_db_tbl} = $self->{left_db_tbl};
2615- $self->{dst_db_tbl} = $self->{right_db_tbl};
2616- }
2617- elsif ( lc $src eq 'right' ) {
2618- $self->{src_db_tbl} = $self->{right_db_tbl};
2619- $self->{dst_db_tbl} = $self->{left_db_tbl};
2620- }
2621- else {
2622- die "src argument must be either 'left' or 'right'"
2623- }
2624- PTDEBUG && _d('Set src to', $src);
2625- $self->fetch_back($dbh) if $dbh;
2626- return;
2627-}
2628-
2629-sub src {
2630- my ( $self ) = @_;
2631- return $self->{src_db_tbl};
2632-}
2633-
2634-sub dst {
2635- my ( $self ) = @_;
2636- return $self->{dst_db_tbl};
2637-}
2638-
2639-sub _take_action {
2640- my ( $self, $sql, $dbh ) = @_;
2641- PTDEBUG && _d('Calling subroutines on', $dbh, $sql);
2642- foreach my $action ( @{$self->{actions}} ) {
2643- $action->($sql, $dbh);
2644- }
2645- return;
2646-}
2647-
2648-sub change {
2649- my ( $self, $action, $row, $cols, $dbh ) = @_;
2650- PTDEBUG && _d($dbh, $action, 'where', $self->make_where_clause($row, $cols));
2651-
2652- return unless $action;
2653-
2654- $self->{changes}->{
2655- $self->{replace} && $action ne 'DELETE' ? 'REPLACE' : $action
2656- }++;
2657- if ( $self->{queue} ) {
2658- $self->__queue($action, $row, $cols, $dbh);
2659- }
2660- else {
2661- eval {
2662- my $func = "make_$action";
2663- $self->_take_action($self->$func($row, $cols), $dbh);
2664- };
2665- if ( $EVAL_ERROR =~ m/$DUPE_KEY/ ) {
2666- PTDEBUG && _d('Duplicate key violation; will queue and rewrite');
2667- $self->{queue}++;
2668- $self->{replace} = 1;
2669- $self->__queue($action, $row, $cols, $dbh);
2670- }
2671- elsif ( $EVAL_ERROR ) {
2672- die $EVAL_ERROR;
2673- }
2674- }
2675- return;
2676-}
2677-
2678-sub __queue {
2679- my ( $self, $action, $row, $cols, $dbh ) = @_;
2680- PTDEBUG && _d('Queueing change for later');
2681- if ( $self->{replace} ) {
2682- $action = $action eq 'DELETE' ? $action : 'REPLACE';
2683- }
2684- push @{$self->{$action}}, [ $row, $cols, $dbh ];
2685-}
2686-
2687-sub process_rows {
2688- my ( $self, $queue_level, $trace_msg ) = @_;
2689- my $error_count = 0;
2690- TRY: {
2691- if ( $queue_level && $queue_level < $self->{queue} ) { # see redo below!
2692- PTDEBUG && _d('Not processing now', $queue_level, '<', $self->{queue});
2693- return;
2694- }
2695- PTDEBUG && _d('Processing rows:');
2696- my ($row, $cur_act);
2697- eval {
2698- foreach my $action ( @ACTIONS ) {
2699- my $func = "make_$action";
2700- my $rows = $self->{$action};
2701- PTDEBUG && _d(scalar(@$rows), 'to', $action);
2702- $cur_act = $action;
2703- while ( @$rows ) {
2704- $row = shift @$rows;
2705- my $sql = $self->$func(@$row);
2706- $sql .= " /*percona-toolkit $trace_msg*/" if $trace_msg;
2707- $self->_take_action($sql, $row->[2]);
2708- }
2709- }
2710- $error_count = 0;
2711- };
2712- if ( !$error_count++ && $EVAL_ERROR =~ m/$DUPE_KEY/ ) {
2713- PTDEBUG && _d('Duplicate key violation; re-queueing and rewriting');
2714- $self->{queue}++; # Defer rows to the very end
2715- $self->{replace} = 1;
2716- $self->__queue($cur_act, @$row);
2717- redo TRY;
2718- }
2719- elsif ( $EVAL_ERROR ) {
2720- die $EVAL_ERROR;
2721- }
2722- }
2723-}
2724-
2725-sub make_DELETE {
2726- my ( $self, $row, $cols ) = @_;
2727- PTDEBUG && _d('Make DELETE');
2728- return "DELETE FROM $self->{dst_db_tbl} WHERE "
2729- . $self->make_where_clause($row, $cols)
2730- . ' LIMIT 1';
2731-}
2732-
2733-sub make_UPDATE {
2734- my ( $self, $row, $cols ) = @_;
2735- PTDEBUG && _d('Make UPDATE');
2736- if ( $self->{replace} ) {
2737- return $self->make_row('REPLACE', $row, $cols);
2738- }
2739- my %in_where = map { $_ => 1 } @$cols;
2740- my $where = $self->make_where_clause($row, $cols);
2741- my @cols;
2742- if ( my $dbh = $self->{fetch_back} ) {
2743- my $sql = $self->make_fetch_back_query($where);
2744- PTDEBUG && _d('Fetching data on dbh', $dbh, 'for UPDATE:', $sql);
2745- my $res = $dbh->selectrow_hashref($sql);
2746- @{$row}{keys %$res} = values %$res;
2747- @cols = $self->sort_cols($res);
2748- }
2749- else {
2750- @cols = $self->sort_cols($row);
2751- }
2752- my $types = $self->{tbl_struct}->{type_for};
2753- return "UPDATE $self->{dst_db_tbl} SET "
2754- . join(', ', map {
2755- my $is_char = ($types->{$_} || '') =~ m/char|text/i;
2756- $self->{Quoter}->quote($_)
2757- . '=' . $self->{Quoter}->quote_val($row->{$_},
2758- is_char => $is_char);
2759- } grep { !$in_where{$_} } @cols)
2760- . " WHERE $where LIMIT 1";
2761-}
2762-
2763-sub make_INSERT {
2764- my ( $self, $row, $cols ) = @_;
2765- PTDEBUG && _d('Make INSERT');
2766- if ( $self->{replace} ) {
2767- return $self->make_row('REPLACE', $row, $cols);
2768- }
2769- return $self->make_row('INSERT', $row, $cols);
2770-}
2771-
2772-sub make_REPLACE {
2773- my ( $self, $row, $cols ) = @_;
2774- PTDEBUG && _d('Make REPLACE');
2775- return $self->make_row('REPLACE', $row, $cols);
2776-}
2777-
2778-sub make_row {
2779- my ( $self, $verb, $row, $cols ) = @_;
2780- my @cols;
2781- if ( my $dbh = $self->{fetch_back} ) {
2782- my $where = $self->make_where_clause($row, $cols);
2783- my $sql = $self->make_fetch_back_query($where);
2784- PTDEBUG && _d('Fetching data on dbh', $dbh, 'for', $verb, ':', $sql);
2785- my $res = $dbh->selectrow_hashref($sql);
2786- @{$row}{keys %$res} = values %$res;
2787- @cols = $self->sort_cols($res);
2788- }
2789- else {
2790- @cols = $self->sort_cols($row);
2791- }
2792- my $q = $self->{Quoter};
2793- my $type_for = $self->{tbl_struct}->{type_for};
2794- return "$verb INTO $self->{dst_db_tbl}("
2795- . join(', ', map { $q->quote($_) } @cols)
2796- . ') VALUES ('
2797- . join(', ', map {
2798- my $is_char = ($type_for->{$_} || '') =~ m/char|text/i;
2799- $q->quote_val($row->{$_},
2800- is_char => $is_char) } @cols )
2801- . ')';
2802-}
2803-
2804-sub make_where_clause {
2805- my ( $self, $row, $cols ) = @_;
2806- my @clauses = map {
2807- my $val = $row->{$_};
2808- my $sep = defined $val ? '=' : ' IS ';
2809- my $is_char = ($self->{tbl_struct}->{type_for}->{$_} || '') =~ m/char|text/i;
2810- $self->{Quoter}->quote($_) . $sep . $self->{Quoter}->quote_val($val,
2811- is_char => $is_char);
2812- } @$cols;
2813- return join(' AND ', @clauses);
2814-}
2815-
2816-
2817-sub get_changes {
2818- my ( $self ) = @_;
2819- return %{$self->{changes}};
2820-}
2821-
2822-
2823-sub sort_cols {
2824- my ( $self, $row ) = @_;
2825- my @cols;
2826- if ( $self->{tbl_struct} ) {
2827- my $pos = $self->{tbl_struct}->{col_posn};
2828- my @not_in_tbl;
2829- @cols = sort {
2830- $pos->{$a} <=> $pos->{$b}
2831- }
2832- grep {
2833- if ( !defined $pos->{$_} ) {
2834- push @not_in_tbl, $_;
2835- 0;
2836- }
2837- else {
2838- 1;
2839- }
2840- }
2841- keys %$row;
2842- push @cols, @not_in_tbl if @not_in_tbl;
2843- }
2844- else {
2845- @cols = sort keys %$row;
2846- }
2847- return @cols;
2848-}
2849-
2850-sub make_fetch_back_query {
2851- my ( $self, $where ) = @_;
2852- die "I need a where argument" unless $where;
2853- my $cols = '*';
2854- my $tbl_struct = $self->{tbl_struct};
2855- if ( $tbl_struct ) {
2856- $cols = join(', ',
2857- map {
2858- my $col = $_;
2859- if ( $self->{hex_blob}
2860- && $tbl_struct->{type_for}->{$col} =~ m/b(?:lob|inary)/ ) {
2861- $col = "IF(BINARY(`$col`)='', '', CONCAT('0x', HEX(`$col`))) AS `$col`";
2862- }
2863- else {
2864- $col = "`$col`";
2865- }
2866- $col;
2867- } @{ $tbl_struct->{cols} }
2868- );
2869-
2870- if ( !$cols ) {
2871- PTDEBUG && _d('Failed to make explicit columns list from tbl struct');
2872- $cols = '*';
2873- }
2874- }
2875- return "SELECT $cols FROM $self->{src_db_tbl} WHERE $where LIMIT 1";
2876-}
2877-
2878-sub _d {
2879- my ($package, undef, $line) = caller 0;
2880- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2881- map { defined $_ ? $_ : 'undef' }
2882- @_;
2883- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2884-}
2885-
2886-1;
2887-}
2888-# ###########################################################################
2889-# End ChangeHandler package
2890-# ###########################################################################
2891-
2892-# ###########################################################################
2893-# RowDiff package
2894-# This package is a copy without comments from the original. The original
2895-# with comments and its test file can be found in the Bazaar repository at,
2896-# lib/RowDiff.pm
2897-# t/lib/RowDiff.t
2898-# See https://launchpad.net/percona-toolkit for more information.
2899-# ###########################################################################
2900-{
2901-package RowDiff;
2902-
2903-use strict;
2904-use warnings FATAL => 'all';
2905-use English qw(-no_match_vars);
2906-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
2907-
2908-sub new {
2909- my ( $class, %args ) = @_;
2910- die "I need a dbh" unless $args{dbh};
2911- my $self = { %args };
2912- return bless $self, $class;
2913-}
2914-
2915-sub compare_sets {
2916- my ( $self, %args ) = @_;
2917- my @required_args = qw(left_sth right_sth syncer tbl_struct);
2918- foreach my $arg ( @required_args ) {
2919- die "I need a $arg argument" unless defined $args{$arg};
2920- }
2921- my $left_sth = $args{left_sth};
2922- my $right_sth = $args{right_sth};
2923- my $syncer = $args{syncer};
2924- my $tbl_struct = $args{tbl_struct};
2925-
2926- my ($lr, $rr); # Current row from the left/right sths.
2927- $args{key_cols} = $syncer->key_cols(); # for key_cmp()
2928-
2929- my $left_done = 0;
2930- my $right_done = 0;
2931- my $done = $self->{done};
2932-
2933- do {
2934- if ( !$lr && !$left_done ) {
2935- PTDEBUG && _d('Fetching row from left');
2936- eval { $lr = $left_sth->fetchrow_hashref(); };
2937- PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
2938- $left_done = !$lr || $EVAL_ERROR ? 1 : 0;
2939- }
2940- elsif ( PTDEBUG ) {
2941- _d('Left still has rows');
2942- }
2943-
2944- if ( !$rr && !$right_done ) {
2945- PTDEBUG && _d('Fetching row from right');
2946- eval { $rr = $right_sth->fetchrow_hashref(); };
2947- PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
2948- $right_done = !$rr || $EVAL_ERROR ? 1 : 0;
2949- }
2950- elsif ( PTDEBUG ) {
2951- _d('Right still has rows');
2952- }
2953-
2954- my $cmp;
2955- if ( $lr && $rr ) {
2956- $cmp = $self->key_cmp(%args, lr => $lr, rr => $rr);
2957- PTDEBUG && _d('Key comparison on left and right:', $cmp);
2958- }
2959- if ( $lr || $rr ) {
2960- if ( $lr && $rr && defined $cmp && $cmp == 0 ) {
2961- PTDEBUG && _d('Left and right have the same key');
2962- $syncer->same_row(%args, lr => $lr, rr => $rr);
2963- $self->{same_row}->(%args, lr => $lr, rr => $rr)
2964- if $self->{same_row};
2965- $lr = $rr = undef; # Fetch another row from each side.
2966- }
2967- elsif ( !$rr || ( defined $cmp && $cmp < 0 ) ) {
2968- PTDEBUG && _d('Left is not in right');
2969- $syncer->not_in_right(%args, lr => $lr, rr => $rr);
2970- $self->{not_in_right}->(%args, lr => $lr, rr => $rr)
2971- if $self->{not_in_right};
2972- $lr = undef;
2973- }
2974- else {
2975- PTDEBUG && _d('Right is not in left');
2976- $syncer->not_in_left(%args, lr => $lr, rr => $rr);
2977- $self->{not_in_left}->(%args, lr => $lr, rr => $rr)
2978- if $self->{not_in_left};
2979- $rr = undef;
2980- }
2981- }
2982- $left_done = $right_done = 1 if $done && $done->(%args);
2983- } while ( !($left_done && $right_done) );
2984- PTDEBUG && _d('No more rows');
2985- $syncer->done_with_rows();
2986-}
2987-
2988-sub key_cmp {
2989- my ( $self, %args ) = @_;
2990- my @required_args = qw(lr rr key_cols tbl_struct);
2991- foreach my $arg ( @required_args ) {
2992- die "I need a $arg argument" unless exists $args{$arg};
2993- }
2994- my ($lr, $rr, $key_cols, $tbl_struct) = @args{@required_args};
2995- PTDEBUG && _d('Comparing keys using columns:', join(',', @$key_cols));
2996-
2997- my $callback = $self->{key_cmp};
2998- my $trf = $self->{trf};
2999-
3000- foreach my $col ( @$key_cols ) {
3001- my $l = $lr->{$col};
3002- my $r = $rr->{$col};
3003- if ( !defined $l || !defined $r ) {
3004- PTDEBUG && _d($col, 'is not defined in both rows');
3005- return defined $l ? 1 : defined $r ? -1 : 0;
3006- }
3007- else {
3008- if ( $tbl_struct->{is_numeric}->{$col} ) { # Numeric column
3009- PTDEBUG && _d($col, 'is numeric');
3010- ($l, $r) = $trf->($l, $r, $tbl_struct, $col) if $trf;
3011- my $cmp = $l <=> $r;
3012- if ( $cmp ) {
3013- PTDEBUG && _d('Column', $col, 'differs:', $l, '!=', $r);
3014- $callback->($col, $l, $r) if $callback;
3015- return $cmp;
3016- }
3017- }
3018- elsif ( $l ne $r ) {
3019- my $cmp;
3020- my $coll = $tbl_struct->{collation_for}->{$col};
3021- if ( $coll && ( $coll ne 'latin1_swedish_ci'
3022- || $l =~ m/[^\040-\177]/ || $r =~ m/[^\040-\177]/) )
3023- {
3024- PTDEBUG && _d('Comparing', $col, 'via MySQL');
3025- $cmp = $self->db_cmp($coll, $l, $r);
3026- }
3027- else {
3028- PTDEBUG && _d('Comparing', $col, 'in lowercase');
3029- $cmp = lc $l cmp lc $r;
3030- }
3031- if ( $cmp ) {
3032- PTDEBUG && _d('Column', $col, 'differs:', $l, 'ne', $r);
3033- $callback->($col, $l, $r) if $callback;
3034- return $cmp;
3035- }
3036- }
3037- }
3038- }
3039- return 0;
3040-}
3041-
3042-sub db_cmp {
3043- my ( $self, $collation, $l, $r ) = @_;
3044- if ( !$self->{sth}->{$collation} ) {
3045- if ( !$self->{charset_for} ) {
3046- PTDEBUG && _d('Fetching collations from MySQL');
3047- my @collations = @{$self->{dbh}->selectall_arrayref(
3048- 'SHOW COLLATION', {Slice => { collation => 1, charset => 1 }})};
3049- foreach my $collation ( @collations ) {
3050- $self->{charset_for}->{$collation->{collation}}
3051- = $collation->{charset};
3052- }
3053- }
3054- my $sql = "SELECT STRCMP(_$self->{charset_for}->{$collation}? COLLATE $collation, "
3055- . "_$self->{charset_for}->{$collation}? COLLATE $collation) AS res";
3056- PTDEBUG && _d($sql);
3057- $self->{sth}->{$collation} = $self->{dbh}->prepare($sql);
3058- }
3059- my $sth = $self->{sth}->{$collation};
3060- $sth->execute($l, $r);
3061- return $sth->fetchall_arrayref()->[0]->[0];
3062-}
3063-
3064-sub _d {
3065- my ($package, undef, $line) = caller 0;
3066- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
3067- map { defined $_ ? $_ : 'undef' }
3068- @_;
3069- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
3070-}
3071-
3072-1;
3073-}
3074-# ###########################################################################
3075-# End RowDiff package
3076-# ###########################################################################
3077-
3078-# ###########################################################################
3079-# TableChunker package
3080-# This package is a copy without comments from the original. The original
3081-# with comments and its test file can be found in the Bazaar repository at,
3082-# lib/TableChunker.pm
3083-# t/lib/TableChunker.t
3084-# See https://launchpad.net/percona-toolkit for more information.
3085-# ###########################################################################
3086-{
3087-package TableChunker;
3088-
3089-use strict;
3090-use warnings FATAL => 'all';
3091-use English qw(-no_match_vars);
3092-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
3093-
3094-use POSIX qw(floor ceil);
3095-use List::Util qw(min max);
3096-use Data::Dumper;
3097-$Data::Dumper::Indent = 1;
3098-$Data::Dumper::Sortkeys = 1;
3099-$Data::Dumper::Quotekeys = 0;
3100-
3101-sub new {
3102- my ( $class, %args ) = @_;
3103- foreach my $arg ( qw(Quoter TableParser) ) {
3104- die "I need a $arg argument" unless $args{$arg};
3105- }
3106-
3107- my %int_types = map { $_ => 1 } qw(bigint date datetime int mediumint smallint time timestamp tinyint year);
3108- my %real_types = map { $_ => 1 } qw(decimal double float);
3109-
3110- my $self = {
3111- %args,
3112- int_types => \%int_types,
3113- real_types => \%real_types,
3114- EPOCH => '1970-01-01',
3115- };
3116-
3117- return bless $self, $class;
3118-}
3119-
3120-sub find_chunk_columns {
3121- my ( $self, %args ) = @_;
3122- foreach my $arg ( qw(tbl_struct) ) {
3123- die "I need a $arg argument" unless $args{$arg};
3124- }
3125- my $tbl_struct = $args{tbl_struct};
3126-
3127- my @possible_indexes;
3128- foreach my $index ( values %{ $tbl_struct->{keys} } ) {
3129-
3130- next unless $index->{type} eq 'BTREE';
3131-
3132- next if grep { defined } @{$index->{col_prefixes}};
3133-
3134- if ( $args{exact} ) {
3135- next unless $index->{is_unique} && @{$index->{cols}} == 1;
3136- }
3137-
3138- push @possible_indexes, $index;
3139- }
3140- PTDEBUG && _d('Possible chunk indexes in order:',
3141- join(', ', map { $_->{name} } @possible_indexes));
3142-
3143- my $can_chunk_exact = 0;
3144- my @candidate_cols;
3145- foreach my $index ( @possible_indexes ) {
3146- my $col = $index->{cols}->[0];
3147-
3148- my $col_type = $tbl_struct->{type_for}->{$col};
3149- next unless $self->{int_types}->{$col_type}
3150- || $self->{real_types}->{$col_type}
3151- || $col_type =~ m/char/;
3152-
3153- push @candidate_cols, { column => $col, index => $index->{name} };
3154- }
3155-
3156- $can_chunk_exact = 1 if $args{exact} && scalar @candidate_cols;
3157-
3158- if ( PTDEBUG ) {
3159- my $chunk_type = $args{exact} ? 'Exact' : 'Inexact';
3160- _d($chunk_type, 'chunkable:',
3161- join(', ', map { "$_->{column} on $_->{index}" } @candidate_cols));
3162- }
3163-
3164- my @result;
3165- PTDEBUG && _d('Ordering columns by order in tbl, PK first');
3166- if ( $tbl_struct->{keys}->{PRIMARY} ) {
3167- my $pk_first_col = $tbl_struct->{keys}->{PRIMARY}->{cols}->[0];
3168- @result = grep { $_->{column} eq $pk_first_col } @candidate_cols;
3169- @candidate_cols = grep { $_->{column} ne $pk_first_col } @candidate_cols;
3170- }
3171- my $i = 0;
3172- my %col_pos = map { $_ => $i++ } @{$tbl_struct->{cols}};
3173- push @result, sort { $col_pos{$a->{column}} <=> $col_pos{$b->{column}} }
3174- @candidate_cols;
3175-
3176- if ( PTDEBUG ) {
3177- _d('Chunkable columns:',
3178- join(', ', map { "$_->{column} on $_->{index}" } @result));
3179- _d('Can chunk exactly:', $can_chunk_exact);
3180- }
3181-
3182- return ($can_chunk_exact, @result);
3183-}
3184-
3185-sub calculate_chunks {
3186- my ( $self, %args ) = @_;
3187- my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
3188- foreach my $arg ( @required_args ) {
3189- die "I need a $arg argument" unless defined $args{$arg};
3190- }
3191- PTDEBUG && _d('Calculate chunks for',
3192- join(", ", map {"$_=".(defined $args{$_} ? $args{$_} : "undef")}
3193- qw(db tbl chunk_col min max rows_in_range chunk_size zero_chunk exact)
3194- ));
3195-
3196- if ( !$args{rows_in_range} ) {
3197- PTDEBUG && _d("Empty table");
3198- return '1=1';
3199- }
3200-
3201- if ( $args{rows_in_range} < $args{chunk_size} ) {
3202- PTDEBUG && _d("Chunk size larger than rows in range");
3203- return '1=1';
3204- }
3205-
3206- my $q = $self->{Quoter};
3207- my $dbh = $args{dbh};
3208- my $chunk_col = $args{chunk_col};
3209- my $tbl_struct = $args{tbl_struct};
3210- my $col_type = $tbl_struct->{type_for}->{$chunk_col};
3211- PTDEBUG && _d('chunk col type:', $col_type);
3212-
3213- my %chunker;
3214- if ( $tbl_struct->{is_numeric}->{$chunk_col} || $col_type =~ /date|time/ ) {
3215- %chunker = $self->_chunk_numeric(%args);
3216- }
3217- elsif ( $col_type =~ m/char/ ) {
3218- %chunker = $self->_chunk_char(%args);
3219- }
3220- else {
3221- die "Cannot chunk $col_type columns";
3222- }
3223- PTDEBUG && _d("Chunker:", Dumper(\%chunker));
3224- my ($col, $start_point, $end_point, $interval, $range_func)
3225- = @chunker{qw(col start_point end_point interval range_func)};
3226-
3227- my @chunks;
3228- if ( $start_point < $end_point ) {
3229-
3230- push @chunks, "$col = 0" if $chunker{have_zero_chunk};
3231-
3232- my ($beg, $end);
3233- my $iter = 0;
3234- for ( my $i = $start_point; $i < $end_point; $i += $interval ) {
3235- ($beg, $end) = $self->$range_func($dbh, $i, $interval, $end_point);
3236-
3237- if ( $iter++ == 0 ) {
3238- push @chunks,
3239- ($chunker{have_zero_chunk} ? "$col > 0 AND " : "")
3240- ."$col < " . $q->quote_val($end);
3241- }
3242- else {
3243- push @chunks, "$col >= " . $q->quote_val($beg) . " AND $col < " . $q->quote_val($end);
3244- }
3245- }
3246-
3247- my $chunk_range = lc($args{chunk_range} || 'open');
3248- my $nullable = $args{tbl_struct}->{is_nullable}->{$args{chunk_col}};
3249- pop @chunks;
3250- if ( @chunks ) {
3251- push @chunks, "$col >= " . $q->quote_val($beg)
3252- . ($chunk_range eq 'openclosed'
3253- ? " AND $col <= " . $q->quote_val($args{max}) : "");
3254- }
3255- else {
3256- push @chunks, $nullable ? "$col IS NOT NULL" : '1=1';
3257- }
3258- if ( $nullable ) {
3259- push @chunks, "$col IS NULL";
3260- }
3261- }
3262- else {
3263- PTDEBUG && _d('No chunks; using single chunk 1=1');
3264- push @chunks, '1=1';
3265- }
3266-
3267- return @chunks;
3268-}
3269-
3270-sub _chunk_numeric {
3271- my ( $self, %args ) = @_;
3272- my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
3273- foreach my $arg ( @required_args ) {
3274- die "I need a $arg argument" unless defined $args{$arg};
3275- }
3276- my $q = $self->{Quoter};
3277- my $db_tbl = $q->quote($args{db}, $args{tbl});
3278- my $col_type = $args{tbl_struct}->{type_for}->{$args{chunk_col}};
3279-
3280- my $range_func;
3281- if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
3282- $range_func = 'range_num';
3283- }
3284- elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {
3285- $range_func = "range_$col_type";
3286- }
3287- elsif ( $col_type eq 'datetime' ) {
3288- $range_func = 'range_datetime';
3289- }
3290-
3291- my ($start_point, $end_point);
3292- eval {
3293- $start_point = $self->value_to_number(
3294- value => $args{min},
3295- column_type => $col_type,
3296- dbh => $args{dbh},
3297- );
3298- $end_point = $self->value_to_number(
3299- value => $args{max},
3300- column_type => $col_type,
3301- dbh => $args{dbh},
3302- );
3303- };
3304- if ( $EVAL_ERROR ) {
3305- if ( $EVAL_ERROR =~ m/don't know how to chunk/ ) {
3306- die $EVAL_ERROR;
3307- }
3308- else {
3309- die "Error calculating chunk start and end points for table "
3310- . "`$args{tbl_struct}->{name}` on column `$args{chunk_col}` "
3311- . "with min/max values "
3312- . join('/',
3313- map { defined $args{$_} ? $args{$_} : 'undef' } qw(min max))
3314- . ":\n\n"
3315- . $EVAL_ERROR
3316- . "\nVerify that the min and max values are valid for the column. "
3317- . "If they are valid, this error could be caused by a bug in the "
3318- . "tool.";
3319- }
3320- }
3321-
3322- if ( !defined $start_point ) {
3323- PTDEBUG && _d('Start point is undefined');
3324- $start_point = 0;
3325- }
3326- if ( !defined $end_point || $end_point < $start_point ) {
3327- PTDEBUG && _d('End point is undefined or before start point');
3328- $end_point = 0;
3329- }
3330- PTDEBUG && _d("Actual chunk range:", $start_point, "to", $end_point);
3331-
3332- my $have_zero_chunk = 0;
3333- if ( $args{zero_chunk} ) {
3334- if ( $start_point != $end_point && $start_point >= 0 ) {
3335- PTDEBUG && _d('Zero chunking');
3336- my $nonzero_val = $self->get_nonzero_value(
3337- %args,
3338- db_tbl => $db_tbl,
3339- col => $args{chunk_col},
3340- col_type => $col_type,
3341- val => $args{min}
3342- );
3343- $start_point = $self->value_to_number(
3344- value => $nonzero_val,
3345- column_type => $col_type,
3346- dbh => $args{dbh},
3347- );
3348- $have_zero_chunk = 1;
3349- }
3350- else {
3351- PTDEBUG && _d("Cannot zero chunk");
3352- }
3353- }
3354- PTDEBUG && _d("Using chunk range:", $start_point, "to", $end_point);
3355-
3356- my $interval = $args{chunk_size}
3357- * ($end_point - $start_point)
3358- / $args{rows_in_range};
3359- if ( $self->{int_types}->{$col_type} ) {
3360- $interval = ceil($interval);
3361- }
3362- $interval ||= $args{chunk_size};
3363- if ( $args{exact} ) {
3364- $interval = $args{chunk_size};
3365- }
3366- PTDEBUG && _d('Chunk interval:', $interval, 'units');
3367-
3368- return (
3369- col => $q->quote($args{chunk_col}),
3370- start_point => $start_point,
3371- end_point => $end_point,
3372- interval => $interval,
3373- range_func => $range_func,
3374- have_zero_chunk => $have_zero_chunk,
3375- );
3376-}
3377-
3378-sub _chunk_char {
3379- my ( $self, %args ) = @_;
3380- my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size);
3381- foreach my $arg ( @required_args ) {
3382- die "I need a $arg argument" unless defined $args{$arg};
3383- }
3384- my $q = $self->{Quoter};
3385- my $db_tbl = $q->quote($args{db}, $args{tbl});
3386- my $dbh = $args{dbh};
3387- my $chunk_col = $args{chunk_col};
3388- my $qchunk_col = $q->quote($args{chunk_col});
3389- my $row;
3390- my $sql;
3391-
3392- my ($min_col, $max_col) = @{args}{qw(min max)};
3393- $sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";
3394- PTDEBUG && _d($dbh, $sql);
3395- my $ord_sth = $dbh->prepare($sql); # avoid quoting issues
3396- $ord_sth->execute($min_col, $max_col);
3397- $row = $ord_sth->fetchrow_arrayref();
3398- my ($min_col_ord, $max_col_ord) = ($row->[0], $row->[1]);
3399- PTDEBUG && _d("Min/max col char code:", $min_col_ord, $max_col_ord);
3400-
3401- my $base;
3402- my @chars;
3403- PTDEBUG && _d("Table charset:", $args{tbl_struct}->{charset});
3404- if ( ($args{tbl_struct}->{charset} || "") eq "latin1" ) {
3405- my @sorted_latin1_chars = (
3406- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
3407- 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
3408- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
3409- 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
3410- 88, 89, 90, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, 161,
3411- 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
3412- 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
3413- 190, 191, 215, 216, 222, 223, 247, 255);
3414-
3415- my ($first_char, $last_char);
3416- for my $i ( 0..$#sorted_latin1_chars ) {
3417- $first_char = $i and last if $sorted_latin1_chars[$i] >= $min_col_ord;
3418- }
3419- for my $i ( $first_char..$#sorted_latin1_chars ) {
3420- $last_char = $i and last if $sorted_latin1_chars[$i] >= $max_col_ord;
3421- };
3422-
3423- @chars = map { chr $_; } @sorted_latin1_chars[$first_char..$last_char];
3424- $base = scalar @chars;
3425- }
3426- else {
3427-
3428- my $tmp_tbl = '__maatkit_char_chunking_map';
3429- my $tmp_db_tbl = $q->quote($args{db}, $tmp_tbl);
3430- $sql = "DROP TABLE IF EXISTS $tmp_db_tbl";
3431- PTDEBUG && _d($dbh, $sql);
3432- $dbh->do($sql);
3433- my $col_def = $args{tbl_struct}->{defs}->{$chunk_col};
3434- $sql = "CREATE TEMPORARY TABLE $tmp_db_tbl ($col_def) "
3435- . "ENGINE=MEMORY";
3436- PTDEBUG && _d($dbh, $sql);
3437- $dbh->do($sql);
3438-
3439- $sql = "INSERT INTO $tmp_db_tbl VALUES (CHAR(?))";
3440- PTDEBUG && _d($dbh, $sql);
3441- my $ins_char_sth = $dbh->prepare($sql); # avoid quoting issues
3442- for my $char_code ( $min_col_ord..$max_col_ord ) {
3443- $ins_char_sth->execute($char_code);
3444- }
3445-
3446- $sql = "SELECT $qchunk_col FROM $tmp_db_tbl "
3447- . "WHERE $qchunk_col BETWEEN ? AND ? "
3448- . "ORDER BY $qchunk_col";
3449- PTDEBUG && _d($dbh, $sql);
3450- my $sel_char_sth = $dbh->prepare($sql);
3451- $sel_char_sth->execute($min_col, $max_col);
3452-
3453- @chars = map { $_->[0] } @{ $sel_char_sth->fetchall_arrayref() };
3454- $base = scalar @chars;
3455-
3456- $sql = "DROP TABLE $tmp_db_tbl";
3457- PTDEBUG && _d($dbh, $sql);
3458- $dbh->do($sql);
3459- }
3460- PTDEBUG && _d("Base", $base, "chars:", @chars);
3461-
3462- die "Cannot chunk table $db_tbl using the character column "
3463- . "$chunk_col, most likely because all values start with the "
3464- . "same character. This table must be synced separately by "
3465- . "specifying a list of --algorithms without the Chunk algorithm"
3466- if $base == 1;
3467-
3468-
3469- $sql = "SELECT MAX(LENGTH($qchunk_col)) FROM $db_tbl "
3470- . ($args{where} ? "WHERE $args{where} " : "")
3471- . "ORDER BY $qchunk_col";
3472- PTDEBUG && _d($dbh, $sql);
3473- $row = $dbh->selectrow_arrayref($sql);
3474- my $max_col_len = $row->[0];
3475- PTDEBUG && _d("Max column value:", $max_col, $max_col_len);
3476- my $n_values;
3477- for my $n_chars ( 1..$max_col_len ) {
3478- $n_values = $base**$n_chars;
3479- if ( $n_values >= $args{chunk_size} ) {
3480- PTDEBUG && _d($n_chars, "chars in base", $base, "expresses",
3481- $n_values, "values");
3482- last;
3483- }
3484- }
3485-
3486- my $n_chunks = $args{rows_in_range} / $args{chunk_size};
3487- my $interval = floor(($n_values+0.00001) / $n_chunks) || 1;
3488-
3489- my $range_func = sub {
3490- my ( $self, $dbh, $start, $interval, $max ) = @_;
3491- my $start_char = $self->base_count(
3492- count_to => $start,
3493- base => $base,
3494- symbols => \@chars,
3495- );
3496- my $end_char = $self->base_count(
3497- count_to => min($max, $start + $interval),
3498- base => $base,
3499- symbols => \@chars,
3500- );
3501- return $start_char, $end_char;
3502- };
3503-
3504- return (
3505- col => $qchunk_col,
3506- start_point => 0,
3507- end_point => $n_values,
3508- interval => $interval,
3509- range_func => $range_func,
3510- );
3511-}
3512-
3513-sub get_first_chunkable_column {
3514- my ( $self, %args ) = @_;
3515- foreach my $arg ( qw(tbl_struct) ) {
3516- die "I need a $arg argument" unless $args{$arg};
3517- }
3518-
3519- my ($exact, @cols) = $self->find_chunk_columns(%args);
3520- my $col = $cols[0]->{column};
3521- my $idx = $cols[0]->{index};
3522-
3523- my $wanted_col = $args{chunk_column};
3524- my $wanted_idx = $args{chunk_index};
3525- PTDEBUG && _d("Preferred chunk col/idx:", $wanted_col, $wanted_idx);
3526-
3527- if ( $wanted_col && $wanted_idx ) {
3528- foreach my $chunkable_col ( @cols ) {
3529- if ( $wanted_col eq $chunkable_col->{column}
3530- && $wanted_idx eq $chunkable_col->{index} ) {
3531- $col = $wanted_col;
3532- $idx = $wanted_idx;
3533- last;
3534- }
3535- }
3536- }
3537- elsif ( $wanted_col ) {
3538- foreach my $chunkable_col ( @cols ) {
3539- if ( $wanted_col eq $chunkable_col->{column} ) {
3540- $col = $wanted_col;
3541- $idx = $chunkable_col->{index};
3542- last;
3543- }
3544- }
3545- }
3546- elsif ( $wanted_idx ) {
3547- foreach my $chunkable_col ( @cols ) {
3548- if ( $wanted_idx eq $chunkable_col->{index} ) {
3549- $col = $chunkable_col->{column};
3550- $idx = $wanted_idx;
3551- last;
3552- }
3553- }
3554- }
3555-
3556- PTDEBUG && _d('First chunkable col/index:', $col, $idx);
3557- return $col, $idx;
3558-}
3559-
3560-sub size_to_rows {
3561- my ( $self, %args ) = @_;
3562- my @required_args = qw(dbh db tbl chunk_size);
3563- foreach my $arg ( @required_args ) {
3564- die "I need a $arg argument" unless $args{$arg};
3565- }
3566- my ($dbh, $db, $tbl, $chunk_size) = @args{@required_args};
3567- my $q = $self->{Quoter};
3568- my $tp = $self->{TableParser};
3569-
3570- my ($n_rows, $avg_row_length);
3571-
3572- my ( $num, $suffix ) = $chunk_size =~ m/^(\d+)([MGk])?$/;
3573- if ( $suffix ) { # Convert to bytes.
3574- $chunk_size = $suffix eq 'k' ? $num * 1_024
3575- : $suffix eq 'M' ? $num * 1_024 * 1_024
3576- : $num * 1_024 * 1_024 * 1_024;
3577- }
3578- elsif ( $num ) {
3579- $n_rows = $num;
3580- }
3581- else {
3582- die "Invalid chunk size $chunk_size; must be an integer "
3583- . "with optional suffix kMG";
3584- }
3585-
3586- if ( $suffix || $args{avg_row_length} ) {
3587- my ($status) = $tp->get_table_status($dbh, $db, $tbl);
3588- $avg_row_length = $status->{avg_row_length};
3589- if ( !defined $n_rows ) {
3590- $n_rows = $avg_row_length ? ceil($chunk_size / $avg_row_length) : undef;
3591- }
3592- }
3593-
3594- return $n_rows, $avg_row_length;
3595-}
3596-
3597-sub get_range_statistics {
3598- my ( $self, %args ) = @_;
3599- my @required_args = qw(dbh db tbl chunk_col tbl_struct);
3600- foreach my $arg ( @required_args ) {
3601- die "I need a $arg argument" unless $args{$arg};
3602- }
3603- my ($dbh, $db, $tbl, $col) = @args{@required_args};
3604- my $where = $args{where};
3605- my $q = $self->{Quoter};
3606-
3607- my $col_type = $args{tbl_struct}->{type_for}->{$col};
3608- my $col_is_numeric = $args{tbl_struct}->{is_numeric}->{$col};
3609-
3610- my $db_tbl = $q->quote($db, $tbl);
3611- $col = $q->quote($col);
3612-
3613- my ($min, $max);
3614- eval {
3615- my $sql = "SELECT MIN($col), MAX($col) FROM $db_tbl"
3616- . ($args{index_hint} ? " $args{index_hint}" : "")
3617- . ($where ? " WHERE ($where)" : '');
3618- PTDEBUG && _d($dbh, $sql);
3619- ($min, $max) = $dbh->selectrow_array($sql);
3620- PTDEBUG && _d("Actual end points:", $min, $max);
3621-
3622- ($min, $max) = $self->get_valid_end_points(
3623- %args,
3624- dbh => $dbh,
3625- db_tbl => $db_tbl,
3626- col => $col,
3627- col_type => $col_type,
3628- min => $min,
3629- max => $max,
3630- );
3631- PTDEBUG && _d("Valid end points:", $min, $max);
3632- };
3633- if ( $EVAL_ERROR ) {
3634- die "Error getting min and max values for table $db_tbl "
3635- . "on column $col: $EVAL_ERROR";
3636- }
3637-
3638- my $sql = "EXPLAIN SELECT * FROM $db_tbl"
3639- . ($args{index_hint} ? " $args{index_hint}" : "")
3640- . ($where ? " WHERE $where" : '');
3641- PTDEBUG && _d($sql);
3642- my $expl = $dbh->selectrow_hashref($sql);
3643-
3644- return (
3645- min => $min,
3646- max => $max,
3647- rows_in_range => $expl->{rows},
3648- );
3649-}
3650-
3651-sub inject_chunks {
3652- my ( $self, %args ) = @_;
3653- foreach my $arg ( qw(database table chunks chunk_num query) ) {
3654- die "I need a $arg argument" unless defined $args{$arg};
3655- }
3656- PTDEBUG && _d('Injecting chunk', $args{chunk_num});
3657- my $query = $args{query};
3658- my $comment = sprintf("/*%s.%s:%d/%d*/",
3659- $args{database}, $args{table},
3660- $args{chunk_num} + 1, scalar @{$args{chunks}});
3661- $query =~ s!/\*PROGRESS_COMMENT\*/!$comment!;
3662- my $where = "WHERE (" . $args{chunks}->[$args{chunk_num}] . ')';
3663- if ( $args{where} && grep { $_ } @{$args{where}} ) {
3664- $where .= " AND ("
3665- . join(" AND ", map { "($_)" } grep { $_ } @{$args{where}} )
3666- . ")";
3667- }
3668- my $db_tbl = $self->{Quoter}->quote(@args{qw(database table)});
3669- my $index_hint = $args{index_hint} || '';
3670-
3671- PTDEBUG && _d('Parameters:',
3672- Dumper({WHERE => $where, DB_TBL => $db_tbl, INDEX_HINT => $index_hint}));
3673- $query =~ s!/\*WHERE\*/! $where!;
3674- $query =~ s!/\*DB_TBL\*/!$db_tbl!;
3675- $query =~ s!/\*INDEX_HINT\*/! $index_hint!;
3676- $query =~ s!/\*CHUNK_NUM\*/! $args{chunk_num} AS chunk_num,!;
3677-
3678- return $query;
3679-}
3680-
3681-
3682-sub value_to_number {
3683- my ( $self, %args ) = @_;
3684- my @required_args = qw(column_type dbh);
3685- foreach my $arg ( @required_args ) {
3686- die "I need a $arg argument" unless defined $args{$arg};
3687- }
3688- my $val = $args{value};
3689- my ($col_type, $dbh) = @args{@required_args};
3690- PTDEBUG && _d('Converting MySQL', $col_type, $val);
3691-
3692- return unless defined $val; # value is NULL
3693-
3694- my %mysql_conv_func_for = (
3695- timestamp => 'UNIX_TIMESTAMP',
3696- date => 'TO_DAYS',
3697- time => 'TIME_TO_SEC',
3698- datetime => 'TO_DAYS',
3699- );
3700-
3701- my $num;
3702- if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
3703- $num = $val;
3704- }
3705- elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {
3706- my $func = $mysql_conv_func_for{$col_type};
3707- my $sql = "SELECT $func(?)";
3708- PTDEBUG && _d($dbh, $sql, $val);
3709- my $sth = $dbh->prepare($sql);
3710- $sth->execute($val);
3711- ($num) = $sth->fetchrow_array();
3712- }
3713- elsif ( $col_type eq 'datetime' ) {
3714- $num = $self->timestampdiff($dbh, $val);
3715- }
3716- else {
3717- die "I don't know how to chunk $col_type\n";
3718- }
3719- PTDEBUG && _d('Converts to', $num);
3720- return $num;
3721-}
3722-
3723-sub range_num {
3724- my ( $self, $dbh, $start, $interval, $max ) = @_;
3725- my $end = min($max, $start + $interval);
3726-
3727-
3728- $start = sprintf('%.17f', $start) if $start =~ /e/;
3729- $end = sprintf('%.17f', $end) if $end =~ /e/;
3730-
3731- $start =~ s/\.(\d{5}).*$/.$1/;
3732- $end =~ s/\.(\d{5}).*$/.$1/;
3733-
3734- if ( $end > $start ) {
3735- return ( $start, $end );
3736- }
3737- else {
3738- die "Chunk size is too small: $end !> $start\n";
3739- }
3740-}
3741-
3742-sub range_time {
3743- my ( $self, $dbh, $start, $interval, $max ) = @_;
3744- my $sql = "SELECT SEC_TO_TIME($start), SEC_TO_TIME(LEAST($max, $start + $interval))";
3745- PTDEBUG && _d($sql);
3746- return $dbh->selectrow_array($sql);
3747-}
3748-
3749-sub range_date {
3750- my ( $self, $dbh, $start, $interval, $max ) = @_;
3751- my $sql = "SELECT FROM_DAYS($start), FROM_DAYS(LEAST($max, $start + $interval))";
3752- PTDEBUG && _d($sql);
3753- return $dbh->selectrow_array($sql);
3754-}
3755-
3756-sub range_datetime {
3757- my ( $self, $dbh, $start, $interval, $max ) = @_;
3758- my $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $start SECOND), "
3759- . "DATE_ADD('$self->{EPOCH}', INTERVAL LEAST($max, $start + $interval) SECOND)";
3760- PTDEBUG && _d($sql);
3761- return $dbh->selectrow_array($sql);
3762-}
3763-
3764-sub range_timestamp {
3765- my ( $self, $dbh, $start, $interval, $max ) = @_;
3766- my $sql = "SELECT FROM_UNIXTIME($start), FROM_UNIXTIME(LEAST($max, $start + $interval))";
3767- PTDEBUG && _d($sql);
3768- return $dbh->selectrow_array($sql);
3769-}
3770-
3771-sub timestampdiff {
3772- my ( $self, $dbh, $time ) = @_;
3773- my $sql = "SELECT (COALESCE(TO_DAYS('$time'), 0) * 86400 + TIME_TO_SEC('$time')) "
3774- . "- TO_DAYS('$self->{EPOCH} 00:00:00') * 86400";
3775- PTDEBUG && _d($sql);
3776- my ( $diff ) = $dbh->selectrow_array($sql);
3777- $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $diff SECOND)";
3778- PTDEBUG && _d($sql);
3779- my ( $check ) = $dbh->selectrow_array($sql);
3780- die <<" EOF"
3781- Incorrect datetime math: given $time, calculated $diff but checked to $check.
3782- This could be due to a version of MySQL that overflows on large interval
3783- values to DATE_ADD(), or the given datetime is not a valid date. If not,
3784- please report this as a bug.
3785- EOF
3786- unless $check eq $time;
3787- return $diff;
3788-}
3789-
3790-
3791-
3792-
3793-sub get_valid_end_points {
3794- my ( $self, %args ) = @_;
3795- my @required_args = qw(dbh db_tbl col col_type);
3796- foreach my $arg ( @required_args ) {
3797- die "I need a $arg argument" unless $args{$arg};
3798- }
3799- my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3800- my ($real_min, $real_max) = @args{qw(min max)};
3801-
3802- my $err_fmt = "Error finding a valid %s value for table $db_tbl on "
3803- . "column $col. The real %s value %s is invalid and "
3804- . "no other valid values were found. Verify that the table "
3805- . "has at least one valid value for this column"
3806- . ($args{where} ? " where $args{where}." : ".");
3807-
3808- my $valid_min = $real_min;
3809- if ( defined $valid_min ) {
3810- PTDEBUG && _d("Validating min end point:", $real_min);
3811- $valid_min = $self->_get_valid_end_point(
3812- %args,
3813- val => $real_min,
3814- endpoint => 'min',
3815- );
3816- die sprintf($err_fmt, 'minimum', 'minimum',
3817- (defined $real_min ? $real_min : "NULL"))
3818- unless defined $valid_min;
3819- }
3820-
3821- my $valid_max = $real_max;
3822- if ( defined $valid_max ) {
3823- PTDEBUG && _d("Validating max end point:", $real_min);
3824- $valid_max = $self->_get_valid_end_point(
3825- %args,
3826- val => $real_max,
3827- endpoint => 'max',
3828- );
3829- die sprintf($err_fmt, 'maximum', 'maximum',
3830- (defined $real_max ? $real_max : "NULL"))
3831- unless defined $valid_max;
3832- }
3833-
3834- return $valid_min, $valid_max;
3835-}
3836-
3837-sub _get_valid_end_point {
3838- my ( $self, %args ) = @_;
3839- my @required_args = qw(dbh db_tbl col col_type);
3840- foreach my $arg ( @required_args ) {
3841- die "I need a $arg argument" unless $args{$arg};
3842- }
3843- my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3844- my $val = $args{val};
3845-
3846- return $val unless defined $val;
3847-
3848- my $validate = $col_type =~ m/time|date/ ? \&_validate_temporal_value
3849- : undef;
3850-
3851- if ( !$validate ) {
3852- PTDEBUG && _d("No validator for", $col_type, "values");
3853- return $val;
3854- }
3855-
3856- return $val if defined $validate->($dbh, $val);
3857-
3858- PTDEBUG && _d("Value is invalid, getting first valid value");
3859- $val = $self->get_first_valid_value(
3860- %args,
3861- val => $val,
3862- validate => $validate,
3863- );
3864-
3865- return $val;
3866-}
3867-
3868-sub get_first_valid_value {
3869- my ( $self, %args ) = @_;
3870- my @required_args = qw(dbh db_tbl col validate endpoint);
3871- foreach my $arg ( @required_args ) {
3872- die "I need a $arg argument" unless $args{$arg};
3873- }
3874- my ($dbh, $db_tbl, $col, $validate, $endpoint) = @args{@required_args};
3875- my $tries = defined $args{tries} ? $args{tries} : 5;
3876- my $val = $args{val};
3877-
3878- return unless defined $val;
3879-
3880- my $cmp = $endpoint =~ m/min/i ? '>'
3881- : $endpoint =~ m/max/i ? '<'
3882- : die "Invalid endpoint arg: $endpoint";
3883- my $sql = "SELECT $col FROM $db_tbl "
3884- . ($args{index_hint} ? "$args{index_hint} " : "")
3885- . "WHERE $col $cmp ? AND $col IS NOT NULL "
3886- . ($args{where} ? "AND ($args{where}) " : "")
3887- . "ORDER BY $col LIMIT 1";
3888- PTDEBUG && _d($dbh, $sql);
3889- my $sth = $dbh->prepare($sql);
3890-
3891- my $last_val = $val;
3892- while ( $tries-- ) {
3893- $sth->execute($last_val);
3894- my ($next_val) = $sth->fetchrow_array();
3895- PTDEBUG && _d('Next value:', $next_val, '; tries left:', $tries);
3896- if ( !defined $next_val ) {
3897- PTDEBUG && _d('No more rows in table');
3898- last;
3899- }
3900- if ( defined $validate->($dbh, $next_val) ) {
3901- PTDEBUG && _d('First valid value:', $next_val);
3902- $sth->finish();
3903- return $next_val;
3904- }
3905- $last_val = $next_val;
3906- }
3907- $sth->finish();
3908- $val = undef; # no valid value found
3909-
3910- return $val;
3911-}
3912-
3913-sub _validate_temporal_value {
3914- my ( $dbh, $val ) = @_;
3915- my $sql = "SELECT IF(TIME_FORMAT(?,'%H:%i:%s')=?, TIME_TO_SEC(?), TO_DAYS(?))";
3916- my $res;
3917- eval {
3918- PTDEBUG && _d($dbh, $sql, $val);
3919- my $sth = $dbh->prepare($sql);
3920- $sth->execute($val, $val, $val, $val);
3921- ($res) = $sth->fetchrow_array();
3922- $sth->finish();
3923- };
3924- if ( $EVAL_ERROR ) {
3925- PTDEBUG && _d($EVAL_ERROR);
3926- }
3927- return $res;
3928-}
3929-
3930-sub get_nonzero_value {
3931- my ( $self, %args ) = @_;
3932- my @required_args = qw(dbh db_tbl col col_type);
3933- foreach my $arg ( @required_args ) {
3934- die "I need a $arg argument" unless $args{$arg};
3935- }
3936- my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3937- my $tries = defined $args{tries} ? $args{tries} : 5;
3938- my $val = $args{val};
3939-
3940- my $is_nonzero = $col_type =~ m/time|date/ ? \&_validate_temporal_value
3941- : sub { return $_[1]; };
3942-
3943- if ( !$is_nonzero->($dbh, $val) ) { # quasi-double-negative, sorry
3944- PTDEBUG && _d('Discarding zero value:', $val);
3945- my $sql = "SELECT $col FROM $db_tbl "
3946- . ($args{index_hint} ? "$args{index_hint} " : "")
3947- . "WHERE $col > ? AND $col IS NOT NULL "
3948- . ($args{where} ? "AND ($args{where}) " : '')
3949- . "ORDER BY $col LIMIT 1";
3950- PTDEBUG && _d($sql);
3951- my $sth = $dbh->prepare($sql);
3952-
3953- my $last_val = $val;
3954- while ( $tries-- ) {
3955- $sth->execute($last_val);
3956- my ($next_val) = $sth->fetchrow_array();
3957- if ( $is_nonzero->($dbh, $next_val) ) {
3958- PTDEBUG && _d('First non-zero value:', $next_val);
3959- $sth->finish();
3960- return $next_val;
3961- }
3962- $last_val = $next_val;
3963- }
3964- $sth->finish();
3965- $val = undef; # no non-zero value found
3966- }
3967-
3968- return $val;
3969-}
3970-
3971-sub base_count {
3972- my ( $self, %args ) = @_;
3973- my @required_args = qw(count_to base symbols);
3974- foreach my $arg ( @required_args ) {
3975- die "I need a $arg argument" unless defined $args{$arg};
3976- }
3977- my ($n, $base, $symbols) = @args{@required_args};
3978-
3979- return $symbols->[0] if $n == 0;
3980-
3981- my $highest_power = floor(log($n+0.00001)/log($base));
3982- if ( $highest_power == 0 ){
3983- return $symbols->[$n];
3984- }
3985-
3986- my @base_powers;
3987- for my $power ( 0..$highest_power ) {
3988- push @base_powers, ($base**$power) || 1;
3989- }
3990-
3991- my @base_multiples;
3992- foreach my $base_power ( reverse @base_powers ) {
3993- my $multiples = floor(($n+0.00001) / $base_power);
3994- push @base_multiples, $multiples;
3995- $n -= $multiples * $base_power;
3996- }
3997- return join('', map { $symbols->[$_] } @base_multiples);
3998-}
3999-
4000-sub _d {
4001- my ($package, undef, $line) = caller 0;
4002- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4003- map { defined $_ ? $_ : 'undef' }
4004- @_;
4005- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4006-}
4007-
4008-1;
4009-}
4010-# ###########################################################################
4011-# End TableChunker package
4012-# ###########################################################################
4013-
4014-# ###########################################################################
4015-# TableNibbler package
4016-# This package is a copy without comments from the original. The original
4017-# with comments and its test file can be found in the Bazaar repository at,
4018-# lib/TableNibbler.pm
4019-# t/lib/TableNibbler.t
4020-# See https://launchpad.net/percona-toolkit for more information.
4021-# ###########################################################################
4022-{
4023-package TableNibbler;
4024-
4025-use strict;
4026-use warnings FATAL => 'all';
4027-use English qw(-no_match_vars);
4028-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
4029-
4030-sub new {
4031- my ( $class, %args ) = @_;
4032- my @required_args = qw(TableParser Quoter);
4033- foreach my $arg ( @required_args ) {
4034- die "I need a $arg argument" unless $args{$arg};
4035- }
4036- my $self = { %args };
4037- return bless $self, $class;
4038-}
4039-
4040-sub generate_asc_stmt {
4041- my ( $self, %args ) = @_;
4042- my @required_args = qw(tbl_struct index);
4043- foreach my $arg ( @required_args ) {
4044- die "I need a $arg argument" unless defined $args{$arg};
4045- }
4046- my ($tbl_struct, $index) = @args{@required_args};
4047- my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}};
4048- my $q = $self->{Quoter};
4049-
4050- die "Index '$index' does not exist in table"
4051- unless exists $tbl_struct->{keys}->{$index};
4052- PTDEBUG && _d('Will ascend index', $index);
4053-
4054- my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
4055- if ( $args{asc_first} ) {
4056- PTDEBUG && _d('Ascending only first column');
4057- @asc_cols = $asc_cols[0];
4058- }
4059- elsif ( my $n = $args{n_index_cols} ) {
4060- $n = scalar @asc_cols if $n > @asc_cols;
4061- PTDEBUG && _d('Ascending only first', $n, 'columns');
4062- @asc_cols = @asc_cols[0..($n-1)];
4063- }
4064- PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
4065-
4066- my @asc_slice;
4067- my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
4068- foreach my $col ( @asc_cols ) {
4069- if ( !exists $col_posn{$col} ) {
4070- push @cols, $col;
4071- $col_posn{$col} = $#cols;
4072- }
4073- push @asc_slice, $col_posn{$col};
4074- }
4075- PTDEBUG && _d('Will ascend, in ordinal position:', join(', ', @asc_slice));
4076-
4077- my $asc_stmt = {
4078- cols => \@cols,
4079- index => $index,
4080- where => '',
4081- slice => [],
4082- scols => [],
4083- };
4084-
4085- if ( @asc_slice ) {
4086- my $cmp_where;
4087- foreach my $cmp ( qw(< <= >= >) ) {
4088- $cmp_where = $self->generate_cmp_where(
4089- type => $cmp,
4090- slice => \@asc_slice,
4091- cols => \@cols,
4092- quoter => $q,
4093- is_nullable => $tbl_struct->{is_nullable},
4094- );
4095- $asc_stmt->{boundaries}->{$cmp} = $cmp_where->{where};
4096- }
4097- my $cmp = $args{asc_only} ? '>' : '>=';
4098- $asc_stmt->{where} = $asc_stmt->{boundaries}->{$cmp};
4099- $asc_stmt->{slice} = $cmp_where->{slice};
4100- $asc_stmt->{scols} = $cmp_where->{scols};
4101- }
4102-
4103- return $asc_stmt;
4104-}
4105-
4106-sub generate_cmp_where {
4107- my ( $self, %args ) = @_;
4108- foreach my $arg ( qw(type slice cols is_nullable) ) {
4109- die "I need a $arg arg" unless defined $args{$arg};
4110- }
4111- my @slice = @{$args{slice}};
4112- my @cols = @{$args{cols}};
4113- my $is_nullable = $args{is_nullable};
4114- my $type = $args{type};
4115- my $q = $self->{Quoter};
4116-
4117- (my $cmp = $type) =~ s/=//;
4118-
4119- my @r_slice; # Resulting slice columns, by ordinal
4120- my @r_scols; # Ditto, by name
4121-
4122- my @clauses;
4123- foreach my $i ( 0 .. $#slice ) {
4124- my @clause;
4125-
4126- foreach my $j ( 0 .. $i - 1 ) {
4127- my $ord = $slice[$j];
4128- my $col = $cols[$ord];
4129- my $quo = $q->quote($col);
4130- if ( $is_nullable->{$col} ) {
4131- push @clause, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))";
4132- push @r_slice, $ord, $ord;
4133- push @r_scols, $col, $col;
4134- }
4135- else {
4136- push @clause, "$quo = ?";
4137- push @r_slice, $ord;
4138- push @r_scols, $col;
4139- }
4140- }
4141-
4142- my $ord = $slice[$i];
4143- my $col = $cols[$ord];
4144- my $quo = $q->quote($col);
4145- my $end = $i == $#slice; # Last clause of the whole group.
4146- if ( $is_nullable->{$col} ) {
4147- if ( $type =~ m/=/ && $end ) {
4148- push @clause, "(? IS NULL OR $quo $type ?)";
4149- }
4150- elsif ( $type =~ m/>/ ) {
4151- push @clause, "((? IS NULL AND $quo IS NOT NULL) OR ($quo $cmp ?))";
4152- }
4153- else { # If $type =~ m/</ ) {
4154- push @clause, "((? IS NOT NULL AND $quo IS NULL) OR ($quo $cmp ?))";
4155- }
4156- push @r_slice, $ord, $ord;
4157- push @r_scols, $col, $col;
4158- }
4159- else {
4160- push @r_slice, $ord;
4161- push @r_scols, $col;
4162- push @clause, ($type =~ m/=/ && $end ? "$quo $type ?" : "$quo $cmp ?");
4163- }
4164-
4165- push @clauses, '(' . join(' AND ', @clause) . ')';
4166- }
4167- my $result = '(' . join(' OR ', @clauses) . ')';
4168- my $where = {
4169- slice => \@r_slice,
4170- scols => \@r_scols,
4171- where => $result,
4172- };
4173- return $where;
4174-}
4175-
4176-sub generate_del_stmt {
4177- my ( $self, %args ) = @_;
4178-
4179- my $tbl = $args{tbl_struct};
4180- my @cols = $args{cols} ? @{$args{cols}} : ();
4181- my $tp = $self->{TableParser};
4182- my $q = $self->{Quoter};
4183-
4184- my @del_cols;
4185- my @del_slice;
4186-
4187- my $index = $tp->find_best_index($tbl, $args{index});
4188- die "Cannot find an ascendable index in table" unless $index;
4189-
4190- if ( $index ) {
4191- @del_cols = @{$tbl->{keys}->{$index}->{cols}};
4192- }
4193- else {
4194- @del_cols = @{$tbl->{cols}};
4195- }
4196- PTDEBUG && _d('Columns needed for DELETE:', join(', ', @del_cols));
4197-
4198- my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
4199- foreach my $col ( @del_cols ) {
4200- if ( !exists $col_posn{$col} ) {
4201- push @cols, $col;
4202- $col_posn{$col} = $#cols;
4203- }
4204- push @del_slice, $col_posn{$col};
4205- }
4206- PTDEBUG && _d('Ordinals needed for DELETE:', join(', ', @del_slice));
4207-
4208- my $del_stmt = {
4209- cols => \@cols,
4210- index => $index,
4211- where => '',
4212- slice => [],
4213- scols => [],
4214- };
4215-
4216- my @clauses;
4217- foreach my $i ( 0 .. $#del_slice ) {
4218- my $ord = $del_slice[$i];
4219- my $col = $cols[$ord];
4220- my $quo = $q->quote($col);
4221- if ( $tbl->{is_nullable}->{$col} ) {
4222- push @clauses, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))";
4223- push @{$del_stmt->{slice}}, $ord, $ord;
4224- push @{$del_stmt->{scols}}, $col, $col;
4225- }
4226- else {
4227- push @clauses, "$quo = ?";
4228- push @{$del_stmt->{slice}}, $ord;
4229- push @{$del_stmt->{scols}}, $col;
4230- }
4231- }
4232-
4233- $del_stmt->{where} = '(' . join(' AND ', @clauses) . ')';
4234-
4235- return $del_stmt;
4236-}
4237-
4238-sub generate_ins_stmt {
4239- my ( $self, %args ) = @_;
4240- foreach my $arg ( qw(ins_tbl sel_cols) ) {
4241- die "I need a $arg argument" unless $args{$arg};
4242- }
4243- my $ins_tbl = $args{ins_tbl};
4244- my @sel_cols = @{$args{sel_cols}};
4245-
4246- die "You didn't specify any SELECT columns" unless @sel_cols;
4247-
4248- my @ins_cols;
4249- my @ins_slice;
4250- for my $i ( 0..$#sel_cols ) {
4251- next unless $ins_tbl->{is_col}->{$sel_cols[$i]};
4252- push @ins_cols, $sel_cols[$i];
4253- push @ins_slice, $i;
4254- }
4255-
4256- return {
4257- cols => \@ins_cols,
4258- slice => \@ins_slice,
4259- };
4260-}
4261-
4262-sub _d {
4263- my ($package, undef, $line) = caller 0;
4264- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4265- map { defined $_ ? $_ : 'undef' }
4266- @_;
4267- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4268-}
4269-
4270-1;
4271-}
4272-# ###########################################################################
4273-# End TableNibbler package
4274-# ###########################################################################
4275-
4276-# ###########################################################################
4277-# TableChecksum package
4278-# This package is a copy without comments from the original. The original
4279-# with comments and its test file can be found in the Bazaar repository at,
4280-# lib/TableChecksum.pm
4281-# t/lib/TableChecksum.t
4282-# See https://launchpad.net/percona-toolkit for more information.
4283-# ###########################################################################
4284-{
4285-package TableChecksum;
4286-
4287-use strict;
4288-use warnings FATAL => 'all';
4289-use English qw(-no_match_vars);
4290-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
4291-
4292-use List::Util qw(max);
4293-
4294-our %ALGOS = (
4295- CHECKSUM => { pref => 0, hash => 0 },
4296- BIT_XOR => { pref => 2, hash => 1 },
4297- ACCUM => { pref => 3, hash => 1 },
4298-);
4299-
4300-sub new {
4301- my ( $class, %args ) = @_;
4302- foreach my $arg ( qw(Quoter) ) {
4303- die "I need a $arg argument" unless defined $args{$arg};
4304- }
4305- my $self = { %args };
4306- return bless $self, $class;
4307-}
4308-
4309-sub crc32 {
4310- my ( $self, $string ) = @_;
4311- my $poly = 0xEDB88320;
4312- my $crc = 0xFFFFFFFF;
4313- foreach my $char ( split(//, $string) ) {
4314- my $comp = ($crc ^ ord($char)) & 0xFF;
4315- for ( 1 .. 8 ) {
4316- $comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1;
4317- }
4318- $crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp;
4319- }
4320- return $crc ^ 0xFFFFFFFF;
4321-}
4322-
4323-sub get_crc_wid {
4324- my ( $self, $dbh, $func ) = @_;
4325- my $crc_wid = 16;
4326- if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
4327- eval {
4328- my ($val) = $dbh->selectrow_array("SELECT $func('a')");
4329- $crc_wid = max(16, length($val));
4330- };
4331- }
4332- return $crc_wid;
4333-}
4334-
4335-sub get_crc_type {
4336- my ( $self, $dbh, $func ) = @_;
4337- my $type = '';
4338- my $length = 0;
4339- my $sql = "SELECT $func('a')";
4340- my $sth = $dbh->prepare($sql);
4341- eval {
4342- $sth->execute();
4343- $type = $sth->{mysql_type_name}->[0];
4344- $length = $sth->{mysql_length}->[0];
4345- PTDEBUG && _d($sql, $type, $length);
4346- if ( $type eq 'bigint' && $length < 20 ) {
4347- $type = 'int';
4348- }
4349- };
4350- $sth->finish;
4351- PTDEBUG && _d('crc_type:', $type, 'length:', $length);
4352- return ($type, $length);
4353-}
4354-
4355-sub best_algorithm {
4356- my ( $self, %args ) = @_;
4357- my ( $alg, $dbh ) = @args{ qw(algorithm dbh) };
4358- my @choices = sort { $ALGOS{$a}->{pref} <=> $ALGOS{$b}->{pref} } keys %ALGOS;
4359- die "Invalid checksum algorithm $alg"
4360- if $alg && !$ALGOS{$alg};
4361-
4362- if (
4363- $args{where} || $args{chunk} # CHECKSUM does whole table
4364- || $args{replicate}) # CHECKSUM can't do INSERT.. SELECT
4365- {
4366- PTDEBUG && _d('Cannot use CHECKSUM algorithm');
4367- @choices = grep { $_ ne 'CHECKSUM' } @choices;
4368- }
4369-
4370-
4371- if ( $alg && grep { $_ eq $alg } @choices ) {
4372- PTDEBUG && _d('User requested', $alg, 'algorithm');
4373- return $alg;
4374- }
4375-
4376- if ( $args{count} && grep { $_ ne 'CHECKSUM' } @choices ) {
4377- PTDEBUG && _d('Not using CHECKSUM algorithm because COUNT desired');
4378- @choices = grep { $_ ne 'CHECKSUM' } @choices;
4379- }
4380-
4381- PTDEBUG && _d('Algorithms, in order:', @choices);
4382- return $choices[0];
4383-}
4384-
4385-sub is_hash_algorithm {
4386- my ( $self, $algorithm ) = @_;
4387- return $ALGOS{$algorithm} && $ALGOS{$algorithm}->{hash};
4388-}
4389-
4390-sub choose_hash_func {
4391- my ( $self, %args ) = @_;
4392- my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
4393- if ( $args{function} ) {
4394- unshift @funcs, $args{function};
4395- }
4396- my ($result, $error);
4397- do {
4398- my $func;
4399- eval {
4400- $func = shift(@funcs);
4401- my $sql = "SELECT $func('test-string')";
4402- PTDEBUG && _d($sql);
4403- $args{dbh}->do($sql);
4404- $result = $func;
4405- };
4406- if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
4407- $error .= qq{$func cannot be used because "$1"\n};
4408- PTDEBUG && _d($func, 'cannot be used because', $1);
4409- }
4410- } while ( @funcs && !$result );
4411-
4412- die $error unless $result;
4413- PTDEBUG && _d('Chosen hash func:', $result);
4414- return $result;
4415-}
4416-
4417-sub optimize_xor {
4418- my ( $self, %args ) = @_;
4419- my ($dbh, $func) = @args{qw(dbh function)};
4420-
4421- die "$func never needs the BIT_XOR optimization"
4422- if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
4423-
4424- my $opt_slice = 0;
4425- my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
4426- my $sliced = '';
4427- my $start = 1;
4428- my $crc_wid = length($unsliced) < 16 ? 16 : length($unsliced);
4429-
4430- do { # Try different positions till sliced result equals non-sliced.
4431- PTDEBUG && _d('Trying slice', $opt_slice);
4432- $dbh->do(q{SET @crc := '', @cnt := 0});
4433- my $slices = $self->make_xor_slices(
4434- query => "\@crc := $func('a')",
4435- crc_wid => $crc_wid,
4436- opt_slice => $opt_slice,
4437- );
4438-
4439- my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
4440- $sliced = ($dbh->selectrow_array($sql))[0];
4441- if ( $sliced ne $unsliced ) {
4442- PTDEBUG && _d('Slice', $opt_slice, 'does not work');
4443- $start += 16;
4444- ++$opt_slice;
4445- }
4446- } while ( $start < $crc_wid && $sliced ne $unsliced );
4447-
4448- if ( $sliced eq $unsliced ) {
4449- PTDEBUG && _d('Slice', $opt_slice, 'works');
4450- return $opt_slice;
4451- }
4452- else {
4453- PTDEBUG && _d('No slice works');
4454- return undef;
4455- }
4456-}
4457-
4458-sub make_xor_slices {
4459- my ( $self, %args ) = @_;
4460- foreach my $arg ( qw(query crc_wid) ) {
4461- die "I need a $arg argument" unless defined $args{$arg};
4462- }
4463- my ( $query, $crc_wid, $opt_slice ) = @args{qw(query crc_wid opt_slice)};
4464-
4465- my @slices;
4466- for ( my $start = 1; $start <= $crc_wid; $start += 16 ) {
4467- my $len = $crc_wid - $start + 1;
4468- if ( $len > 16 ) {
4469- $len = 16;
4470- }
4471- push @slices,
4472- "LPAD(CONV(BIT_XOR("
4473- . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
4474- . ", 10, 16), $len, '0')";
4475- }
4476-
4477- if ( defined $opt_slice && $opt_slice < @slices ) {
4478- $slices[$opt_slice] =~ s/\@crc/\@crc := $query/;
4479- }
4480- else {
4481- map { s/\@crc/$query/ } @slices;
4482- }
4483-
4484- return join(', ', @slices);
4485-}
4486-
4487-sub make_row_checksum {
4488- my ( $self, %args ) = @_;
4489- my ( $tbl_struct, $func ) = @args{ qw(tbl_struct function) };
4490- my $q = $self->{Quoter};
4491-
4492- my $sep = $args{sep} || '#';
4493- $sep =~ s/'//g;
4494- $sep ||= '#';
4495-
4496- my $ignorecols = $args{ignorecols} || {};
4497-
4498- my %cols = map { lc($_) => 1 }
4499- grep { !exists $ignorecols->{$_} }
4500- ($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}});
4501- my %seen;
4502- my @cols =
4503- map {
4504- my $type = $tbl_struct->{type_for}->{$_};
4505- my $result = $q->quote($_);
4506- if ( $type eq 'timestamp' ) {
4507- $result .= ' + 0';
4508- }
4509- elsif ( $args{float_precision} && $type =~ m/float|double/ ) {
4510- $result = "ROUND($result, $args{float_precision})";
4511- }
4512- elsif ( $args{trim} && $type =~ m/varchar/ ) {
4513- $result = "TRIM($result)";
4514- }
4515- $result;
4516- }
4517- grep {
4518- $cols{$_} && !$seen{$_}++
4519- }
4520- @{$tbl_struct->{cols}};
4521-
4522- my $query;
4523- if ( !$args{no_cols} ) {
4524- $query = join(', ',
4525- map {
4526- my $col = $_;
4527- if ( $col =~ m/\+ 0/ ) {
4528- my ($real_col) = /^(\S+)/;
4529- $col .= " AS $real_col";
4530- }
4531- elsif ( $col =~ m/TRIM/ ) {
4532- my ($real_col) = m/TRIM\(([^\)]+)\)/;
4533- $col .= " AS $real_col";
4534- }
4535- $col;
4536- } @cols)
4537- . ', ';
4538- }
4539-
4540- if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
4541- my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}};
4542- if ( @nulls ) {
4543- my $bitmap = "CONCAT("
4544- . join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls)
4545- . ")";
4546- push @cols, $bitmap;
4547- }
4548-
4549- $query .= @cols > 1
4550- ? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))'
4551- : "$func($cols[0])";
4552- }
4553- else {
4554- my $fnv_func = uc $func;
4555- $query .= "$fnv_func(" . join(', ', @cols) . ')';
4556- }
4557-
4558- return $query;
4559-}
4560-
4561-sub make_checksum_query {
4562- my ( $self, %args ) = @_;
4563- my @required_args = qw(db tbl tbl_struct algorithm crc_wid crc_type);
4564- foreach my $arg( @required_args ) {
4565- die "I need a $arg argument" unless $args{$arg};
4566- }
4567- my ( $db, $tbl, $tbl_struct, $algorithm,
4568- $crc_wid, $crc_type) = @args{@required_args};
4569- my $func = $args{function};
4570- my $q = $self->{Quoter};
4571- my $result;
4572-
4573- die "Invalid or missing checksum algorithm"
4574- unless $algorithm && $ALGOS{$algorithm};
4575-
4576- if ( $algorithm eq 'CHECKSUM' ) {
4577- return "CHECKSUM TABLE " . $q->quote($db, $tbl);
4578- }
4579-
4580- my $expr = $self->make_row_checksum(%args, no_cols=>1);
4581-
4582- if ( $algorithm eq 'BIT_XOR' ) {
4583- if ( $crc_type =~ m/int$/ ) {
4584- $result = "COALESCE(LOWER(CONV(BIT_XOR(CAST($expr AS UNSIGNED)), 10, 16)), 0) AS crc ";
4585- }
4586- else {
4587- my $slices = $self->make_xor_slices( query => $expr, %args );
4588- $result = "COALESCE(LOWER(CONCAT($slices)), 0) AS crc ";
4589- }
4590- }
4591- else {
4592- if ( $crc_type =~ m/int$/ ) {
4593- $result = "COALESCE(RIGHT(MAX("
4594- . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
4595- . "CONV(CAST($func(CONCAT(\@crc, $expr)) AS UNSIGNED), 10, 16))"
4596- . "), $crc_wid), 0) AS crc ";
4597- }
4598- else {
4599- $result = "COALESCE(RIGHT(MAX("
4600- . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
4601- . "$func(CONCAT(\@crc, $expr)))"
4602- . "), $crc_wid), 0) AS crc ";
4603- }
4604- }
4605- if ( $args{replicate} ) {
4606- $result = "REPLACE /*PROGRESS_COMMENT*/ INTO $args{replicate} "
4607- . "(db, tbl, chunk, boundaries, this_cnt, this_crc) "
4608- . "SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, $result";
4609- }
4610- else {
4611- $result = "SELECT "
4612- . ($args{buffer} ? 'SQL_BUFFER_RESULT ' : '')
4613- . "/*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, $result";
4614- }
4615- return $result . "FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/";
4616-}
4617-
4618-sub find_replication_differences {
4619- my ( $self, $dbh, $table ) = @_;
4620-
4621- my $sql
4622- = "SELECT db, tbl, CONCAT(db, '.', tbl) AS `table`, "
4623- . "chunk, chunk_index, lower_boundary, upper_boundary, "
4624- . "COALESCE(this_cnt-master_cnt, 0) AS cnt_diff, "
4625- . "COALESCE("
4626- . "this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc), 0"
4627- . ") AS crc_diff, this_cnt, master_cnt, this_crc, master_crc "
4628- . "FROM $table "
4629- . "WHERE master_cnt <> this_cnt OR master_crc <> this_crc "
4630- . "OR ISNULL(master_crc) <> ISNULL(this_crc)";
4631- PTDEBUG && _d($sql);
4632- my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} });
4633- return $diffs;
4634-}
4635-
4636-sub _d {
4637- my ($package, undef, $line) = caller 0;
4638- @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4639- map { defined $_ ? $_ : 'undef' }
4640- @_;
4641- print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4642-}
4643-
4644-1;
4645-}
4646-# ###########################################################################
4647-# End TableChecksum package
4648-# ###########################################################################
4649-
4650-# ###########################################################################
4651-# TableSyncer package
4652-# This package is a copy without comments from the original. The original
4653-# with comments and its test file can be found in the Bazaar repository at,
4654-# lib/TableSyncer.pm
4655-# t/lib/TableSyncer.t
4656-# See https://launchpad.net/percona-toolkit for more information.
4657-# ###########################################################################
4658-{
4659-package TableSyncer;
4660-
4661-use strict;
4662-use warnings FATAL => 'all';
4663-use English qw(-no_match_vars);
4664-use constant PTDEBUG => $ENV{PTDEBUG} || 0;
4665-
4666-use Data::Dumper;
4667-$Data::Dumper::Indent = 1;
4668-$Data::Dumper::Sortkeys = 1;
4669-$Data::Dumper::Quotekeys = 0;
4670-
4671-sub new {
4672- my ( $class, %args ) = @_;
4673- my @required_args = qw(MasterSlave Quoter TableChecksum Retry);
4674- foreach my $arg ( @required_args ) {
4675- die "I need a $arg argument" unless defined $args{$arg};
4676- }
4677- my $self = { %args };
4678- return bless $self, $class;
4679-}
4680-
4681-sub get_best_plugin {
4682- my ( $self, %args ) = @_;
4683- foreach my $arg ( qw(plugins tbl_struct) ) {
4684- die "I need a $arg argument" unless $args{$arg};
4685- }
4686- PTDEBUG && _d('Getting best plugin');
4687- foreach my $plugin ( @{$args{plugins}} ) {
4688- PTDEBUG && _d('Trying plugin', $plugin->name);
4689- my ($can_sync, %plugin_args) = $plugin->can_sync(%args);
4690- if ( $can_sync ) {
4691- PTDEBUG && _d('Can sync with', $plugin->name, Dumper(\%plugin_args));
4692- return $plugin, %plugin_args;
4693- }
4694- }
4695- PTDEBUG && _d('No plugin can sync the table');
4696- return;
4697-}
4698-
4699-sub sync_table {
4700- my ( $self, %args ) = @_;
4701- my @required_args = qw(plugins src dst tbl_struct cols chunk_size
4702- RowDiff ChangeHandler);
4703- foreach my $arg ( @required_args ) {
4704- die "I need a $arg argument" unless $args{$arg};
4705- }
4706- PTDEBUG && _d('Syncing table with args:',
4707- map { "$_: " . Dumper($args{$_}) }
4708- qw(plugins src dst tbl_struct cols chunk_size));
4709-
4710- my ($plugins, $src, $dst, $tbl_struct, $cols, $chunk_size, $rd, $ch)
4711- = @args{@required_args};
4712- my $dp = $self->{DSNParser};
4713- $args{trace} = 1 unless defined $args{trace};
4714-
4715- if ( $args{bidirectional} && $args{ChangeHandler}->{queue} ) {
4716- die "Queueing does not work with bidirectional syncing";
4717- }
4718-
4719- $args{index_hint} = 1 unless defined $args{index_hint};
4720- $args{lock} ||= 0;
4721- $args{wait} ||= 0;
4722- $args{transaction} ||= 0;
4723- $args{timeout_ok} ||= 0;
4724-
4725- my $q = $self->{Quoter};
4726-
4727- my ($plugin, %plugin_args) = $self->get_best_plugin(%args);
4728- die "No plugin can sync $src->{db}.$src->{tbl}" unless $plugin;
4729-
4730- my $crc_col = '__crc';
4731- while ( $tbl_struct->{is_col}->{$crc_col} ) {
4732- $crc_col = "_$crc_col"; # Prepend more _ until not a column.
4733- }
4734- PTDEBUG && _d('CRC column:', $crc_col);
4735-
4736- my $index_hint;
4737- if ( $args{chunk_index} ) {
4738- PTDEBUG && _d('Using given chunk index for index hint');
4739- $index_hint = "FORCE INDEX (" . $q->quote($args{chunk_index}) . ")";
4740- }
4741- elsif ( $plugin_args{chunk_index} && $args{index_hint} ) {
4742- PTDEBUG && _d('Using chunk index chosen by plugin for index hint');
4743- $index_hint = "FORCE INDEX (" . $q->quote($plugin_args{chunk_index}) . ")";
4744- }
4745- PTDEBUG && _d('Index hint:', $index_hint);
4746-
4747- eval {
4748- $plugin->prepare_to_sync(
4749- %args,
4750- %plugin_args,
4751- dbh => $src->{dbh},
4752- db => $src->{db},
4753- tbl => $src->{tbl},
4754- crc_col => $crc_col,
4755- index_hint => $index_hint,
4756- );
4757- };
4758- if ( $EVAL_ERROR ) {
4759- die 'Failed to prepare TableSync', $plugin->name, ' plugin: ',
4760- $EVAL_ERROR;
4761- }
4762-
4763- if ( $plugin->uses_checksum() ) {
4764- eval {
4765- my ($chunk_sql, $row_sql) = $self->make_checksum_queries(%args);
4766- $plugin->set_checksum_queries($chunk_sql, $row_sql);
4767- };
4768- if ( $EVAL_ERROR ) {
4769- die "Failed to make checksum queries: $EVAL_ERROR";
4770- }
4771- }
4772-
4773- if ( $args{dry_run} ) {
4774- return $ch->get_changes(), ALGORITHM => $plugin->name;
4775- }
4776-
4777-
4778- eval {
4779- $src->{dbh}->do("USE `$src->{db}`");
4780- $dst->{dbh}->do("USE `$dst->{db}`");
4781- };
4782- if ( $EVAL_ERROR ) {
4783- die "Failed to USE database on source or destination: $EVAL_ERROR";
4784- }
4785-
4786- PTDEBUG && _d('left dbh', $src->{dbh});
4787- PTDEBUG && _d('right dbh', $dst->{dbh});
4788-
4789- chomp(my $hostname = `hostname`);
4790- my $trace_msg
4791- = $args{trace} ? "src_db:$src->{db} src_tbl:$src->{tbl} "
4792- . ($dp && $src->{dsn} ? "src_dsn:".$dp->as_string($src->{dsn}) : "")
4793- . " dst_db:$dst->{db} dst_tbl:$dst->{tbl} "
4794- . ($dp && $dst->{dsn} ? "dst_dsn:".$dp->as_string($dst->{dsn}) : "")
4795- . " " . join(" ", map { "$_:" . ($args{$_} || 0) }
4796- qw(lock transaction changing_src replicate bidirectional))
4797- . " pid:$PID "
4798- . ($ENV{USER} ? "user:$ENV{USER} " : "")
4799- . ($hostname ? "host:$hostname" : "")
4800- : "";
4801- PTDEBUG && _d("Binlog trace message:", $trace_msg);
4802-
4803- $self->lock_and_wait(%args, lock_level => 2); # per-table lock
4804-
4805- my $callback = $args{callback};
4806- my $cycle = 0;
4807- while ( !$plugin->done() ) {
4808-
4809- PTDEBUG && _d('Beginning sync cycle', $cycle);
4810- my $src_sql = $plugin->get_sql(
4811- database => $src->{db},
4812- table => $src->{tbl},
4813- where => $args{where},
4814- );
4815- my $dst_sql = $plugin->get_sql(
4816- database => $dst->{db},
4817- table => $dst->{tbl},
4818- where => $args{where},
4819- );
4820-
4821- if ( $args{transaction} ) {
4822- if ( $args{bidirectional} ) {
4823- $src_sql .= ' FOR UPDATE';
4824- $dst_sql .= ' FOR UPDATE';
4825- }
4826- elsif ( $args{changing_src} ) {
4827- $src_sql .= ' FOR UPDATE';
4828- $dst_sql .= ' LOCK IN SHARE MODE';
4829- }
4830- else {
4831- $src_sql .= ' LOCK IN SHARE MODE';
4832- $dst_sql .= ' FOR UPDATE';
4833- }
4834- }
4835- PTDEBUG && _d('src:', $src_sql);
4836- PTDEBUG && _d('dst:', $dst_sql);
4837-
4838- $callback->($src_sql, $dst_sql) if $callback;
4839-
4840- $plugin->prepare_sync_cycle($src);
4841- $plugin->prepare_sync_cycle($dst);
4842-
4843- my $src_sth = $src->{dbh}->prepare($src_sql);
4844- my $dst_sth = $dst->{dbh}->prepare($dst_sql);
4845- if ( $args{buffer_to_client} ) {
4846- $src_sth->{mysql_use_result} = 1;
4847- $dst_sth->{mysql_use_result} = 1;
4848- }
4849-
4850- my $executed_src = 0;
4851- if ( !$cycle || !$plugin->pending_changes() ) {
4852- $executed_src
4853- = $self->lock_and_wait(%args, src_sth => $src_sth, lock_level => 1);
4854- }
4855-
4856- $src_sth->execute() unless $executed_src;
4857- $dst_sth->execute();
4858-
4859- $rd->compare_sets(
4860- left_sth => $src_sth,
4861- right_sth => $dst_sth,
4862- left_dbh => $src->{dbh},
4863- right_dbh => $dst->{dbh},
4864- syncer => $plugin,
4865- tbl_struct => $tbl_struct,
4866- );
4867- $ch->process_rows(1, $trace_msg);
4868-
4869- PTDEBUG && _d('Finished sync cycle', $cycle);
4870- $cycle++;
4871- }
4872-
4873- $ch->process_rows(0, $trace_msg);
4874-
4875- $self->unlock(%args, lock_level => 2);
4876-
4877- return $ch->get_changes(), ALGORITHM => $plugin->name;
4878-}
4879-
4880-sub make_checksum_queries {
4881- my ( $self, %args ) = @_;
4882- my @required_args = qw(src dst tbl_struct);
4883- foreach my $arg ( @required_args ) {
4884- die "I need a $arg argument" unless $args{$arg};
4885- }
4886- my ($src, $dst, $tbl_struct) = @args{@required_args};
4887- my $checksum = $self->{TableChecksum};
4888-
4889- my $src_algo = $checksum->best_algorithm(
4890- algorithm => 'BIT_XOR',
4891- dbh => $src->{dbh},
4892- where => 1,
4893- chunk => 1,
4894- count => 1,
4895- );
4896- my $dst_algo = $checksum->best_algorithm(
4897- algorithm => 'BIT_XOR',
4898- dbh => $dst->{dbh},
4899- where => 1,
4900- chunk => 1,
4901- count => 1,
4902- );
4903- if ( $src_algo ne $dst_algo ) {
4904- die "Source and destination checksum algorithms are different: ",
4905- "$src_algo on source, $dst_algo on destination"
4906- }
4907- PTDEBUG && _d('Chosen algo:', $src_algo);
4908-
4909- my $src_func = $checksum->choose_hash_func(dbh => $src->{dbh}, %args);
4910- my $dst_func = $checksum->choose_hash_func(dbh => $dst->{dbh}, %args);
4911- if ( $src_func ne $dst_func ) {
4912- die "Source and destination hash functions are different: ",
4913- "$src_func on source, $dst_func on destination";
4914- }
4915- PTDEBUG && _d('Chosen hash func:', $src_func);
4916-
4917-
4918- my $crc_wid = $checksum->get_crc_wid($src->{dbh}, $src_func);
4919- my ($crc_type) = $checksum->get_crc_type($src->{dbh}, $src_func);
4920- my $opt_slice;
4921- if ( $src_algo eq 'BIT_XOR' && $crc_type !~ m/int$/ ) {
4922- $opt_slice = $checksum->optimize_xor(
4923- dbh => $src->{dbh},
4924- function => $src_func
4925- );
4926- }
4927-
4928- my $chunk_sql = $checksum->make_checksum_query(
4929- %args,
4930- db => $src->{db},
4931- tbl => $src->{tbl},
4932- algorithm => $src_algo,
4933- function => $src_func,
4934- crc_wid => $crc_wid,
4935- crc_type => $crc_type,
4936- opt_slice => $opt_slice,
4937- replicate => undef, # replicate means something different to this sub
4938- ); # than what we use it for; do not pass it!
4939- PTDEBUG && _d('Chunk sql:', $chunk_sql);
4940- my $row_sql = $checksum->make_row_checksum(
4941- %args,
4942- function => $src_func,
4943- );
4944- PTDEBUG && _d('Row sql:', $row_sql);
4945- return $chunk_sql, $row_sql;
4946-}
4947-
4948-sub lock_table {
4949- my ( $self, $dbh, $where, $db_tbl, $mode ) = @_;
4950- my $query = "LOCK TABLES $db_tbl $mode";
4951- PTDEBUG && _d($query);
4952- $dbh->do($query);
4953- PTDEBUG && _d('Acquired table lock on', $where, 'in', $mode, 'mode');
4954-}
4955-
4956-sub unlock {
4957- my ( $self, %args ) = @_;
4958-
4959- foreach my $arg ( qw(src dst lock transaction lock_level) ) {
4960- die "I need a $arg argument" unless defined $args{$arg};
4961- }
4962- my $src = $args{src};
4963- my $dst = $args{dst};
4964-
4965- return unless $args{lock} && $args{lock} <= $args{lock_level};
4966-
4967- foreach my $dbh ( $src->{dbh}, $dst->{dbh} ) {
4968- if ( $args{transaction} ) {
4969- PTDEBUG && _d('Committing', $dbh);
4970- $dbh->commit();
4971- }
4972- else {
4973- my $sql = 'UNLOCK TABLES';
4974- PTDEBUG && _d($dbh, $sql);
4975- $dbh->do($sql);
4976- }
4977- }
4978-
4979- return;
4980-}
4981-
4982-sub lock_and_wait {
4983- my ( $self, %args ) = @_;
4984- my $result = 0;
4985-
4986- foreach my $arg ( qw(src dst lock lock_level) ) {
4987- die "I need a $arg argument" unless defined $args{$arg};
4988- }
4989- my $src = $args{src};
4990- my $dst = $args{dst};
4991-
4992- return unless $args{lock} && $args{lock} == $args{lock_level};
4993- PTDEBUG && _d('lock and wait, lock level', $args{lock});
4994-
4995- foreach my $dbh ( $src->{dbh}, $dst->{dbh} ) {
4996- if ( $args{transaction} ) {
4997- PTDEBUG && _d('Committing', $dbh);
4998- $dbh->commit();
4999- }
5000- else {
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches