Merge lp:~percona-toolkit-dev/percona-toolkit/pt-upgrade-2.2 into lp:percona-toolkit/2.2
- pt-upgrade-2.2
- Merge into 2.2
Proposed by
Daniel Nichter
Status: | Merged |
---|---|
Approved by: | Daniel Nichter |
Approved revision: | 544 |
Merged at revision: | 552 |
Proposed branch: | lp:~percona-toolkit-dev/percona-toolkit/pt-upgrade-2.2 |
Merge into: | lp:percona-toolkit/2.2 |
Diff against target: |
21780 lines (+10022/-10718) 110 files modified
bin/pt-table-usage (+0/-3) bin/pt-upgrade (+6364/-9150) lib/Cxn.pm (+22/-50) lib/EventExecutor.pm (+115/-0) lib/FakeSth.pm (+51/-0) lib/MySQLProtocolParser.pm (+9/-8) lib/PerconaTest.pm (+16/-6) lib/QueryIterator.pm (+312/-0) lib/ResultIterator.pm (+175/-0) lib/ResultWriter.pm (+184/-0) lib/Sandbox.pm (+2/-0) lib/UpgradeReportFormatter.pm (+0/-223) lib/UpgradeResults.pm (+555/-0) t/lib/QueryIterator.t (+87/-0) t/lib/UpgradeReportFormatter.t (+0/-158) t/lib/UpgradeResults.t (+50/-0) t/lib/samples/UpgradeResults/format_query_times001 (+2/-0) t/lib/samples/compare-warnings.sql (+0/-1) t/lib/samples/rawlogs/rawlog002.txt (+4/-0) t/pt-upgrade/compare_hosts.t (+80/-171) t/pt-upgrade/compare_results.t (+133/-0) t/pt-upgrade/daemon.t (+0/-53) t/pt-upgrade/diff_query_times.t (+91/-0) t/pt-upgrade/diff_rows.t (+119/-0) t/pt-upgrade/diff_warnings.t (+120/-0) t/pt-upgrade/log_types.t (+178/-0) t/pt-upgrade/query_time_diffs.t (+53/-0) t/pt-upgrade/rewrite_non_select.t (+0/-81) t/pt-upgrade/run_time.t (+122/-0) t/pt-upgrade/samples/001/insert.log (+4/-0) t/pt-upgrade/samples/001/insert.txt (+35/-0) t/pt-upgrade/samples/001/insert_results.txt (+32/-0) t/pt-upgrade/samples/001/non-selects-rewritten.txt (+0/-112) t/pt-upgrade/samples/001/non-selects.log (+0/-16) t/pt-upgrade/samples/001/non-selects.txt (+0/-37) t/pt-upgrade/samples/001/one-error-no-clear-warnings.txt (+0/-63) t/pt-upgrade/samples/001/one-error.log (+0/-10) t/pt-upgrade/samples/001/one-error.txt (+0/-66) t/pt-upgrade/samples/001/select-everyone-no-queries.txt (+0/-3) t/pt-upgrade/samples/001/select-everyone-no-stats.txt (+0/-32) t/pt-upgrade/samples/001/select-everyone-rows.txt (+0/-35) t/pt-upgrade/samples/001/select-everyone.log (+0/-22) t/pt-upgrade/samples/001/select-everyone.txt (+0/-35) t/pt-upgrade/samples/001/select-one-rows.txt (+0/-36) t/pt-upgrade/samples/001/select-one.log (+0/-4) t/pt-upgrade/samples/001/select-one.txt (+0/-36) t/pt-upgrade/samples/001/select.log (+4/-0) t/pt-upgrade/samples/001/select.txt (+35/-0) t/pt-upgrade/samples/001/select_results.txt (+32/-0) t/pt-upgrade/samples/001/select_results/query (+3/-0) t/pt-upgrade/samples/001/select_results/results (+6/-0) t/pt-upgrade/samples/001/select_results/rows (+34/-0) t/pt-upgrade/samples/001/tables.sql (+10/-12) t/pt-upgrade/samples/002/host2.sql (+9/-0) t/pt-upgrade/samples/002/no-db.log (+0/-3) t/pt-upgrade/samples/002/report-01.txt (+0/-44) t/pt-upgrade/samples/002/select_missing_rows.log (+4/-0) t/pt-upgrade/samples/002/select_missing_rows.txt (+60/-0) t/pt-upgrade/samples/002/select_missing_rows_results.txt (+57/-0) t/pt-upgrade/samples/002/select_missing_rows_results/query (+3/-0) t/pt-upgrade/samples/002/select_missing_rows_results/results (+6/-0) t/pt-upgrade/samples/002/select_missing_rows_results/rows (+34/-0) t/pt-upgrade/samples/002/tables.sql (+13/-5) t/pt-upgrade/samples/003/conf (+1/-0) t/pt-upgrade/samples/003/double.log (+0/-4) t/pt-upgrade/samples/003/host2.sql (+5/-0) t/pt-upgrade/samples/003/insert_truncate_warning.log (+4/-0) t/pt-upgrade/samples/003/insert_truncate_warning.txt (+63/-0) t/pt-upgrade/samples/003/insert_truncate_warning_results.txt (+60/-0) t/pt-upgrade/samples/003/insert_truncate_warning_results/query (+3/-0) t/pt-upgrade/samples/003/insert_truncate_warning_results/results (+12/-0) t/pt-upgrade/samples/003/insert_truncate_warning_results/rows (+2/-0) t/pt-upgrade/samples/003/report001.txt (+0/-44) t/pt-upgrade/samples/003/tables.sql (+5/-7) t/pt-upgrade/samples/004/select-func.log (+0/-4) t/pt-upgrade/samples/004/select-func.txt (+0/-36) t/pt-upgrade/samples/004/select_function.log (+4/-0) t/pt-upgrade/samples/004/select_function.sed (+1/-0) t/pt-upgrade/samples/004/select_function.txt (+59/-0) t/pt-upgrade/samples/004/tables.sql (+0/-2) t/pt-upgrade/samples/005/error_on_both_hosts.log (+4/-0) t/pt-upgrade/samples/005/error_on_both_hosts.txt (+59/-0) t/pt-upgrade/samples/005/error_on_both_hosts_results.txt (+56/-0) t/pt-upgrade/samples/005/error_on_both_hosts_results/query (+3/-0) t/pt-upgrade/samples/005/error_on_both_hosts_results/results (+5/-0) t/pt-upgrade/samples/005/error_on_both_hosts_results/rows (+2/-0) t/pt-upgrade/samples/005/error_on_host1.log (+4/-0) t/pt-upgrade/samples/005/error_on_host1.txt (+61/-0) t/pt-upgrade/samples/005/error_on_host1_results.txt (+58/-0) t/pt-upgrade/samples/005/error_on_host1_results/query (+3/-0) t/pt-upgrade/samples/005/error_on_host1_results/results (+5/-0) t/pt-upgrade/samples/005/error_on_host1_results/rows (+2/-0) t/pt-upgrade/samples/005/error_on_host2.log (+4/-0) t/pt-upgrade/samples/005/error_on_host2.txt (+61/-0) t/pt-upgrade/samples/005/error_on_host2_results.txt (+58/-0) t/pt-upgrade/samples/005/error_on_host2_results/query (+3/-0) t/pt-upgrade/samples/005/error_on_host2_results/results (+6/-0) t/pt-upgrade/samples/005/error_on_host2_results/rows (+7/-0) t/pt-upgrade/samples/005/host1.sql (+5/-0) t/pt-upgrade/samples/005/host2.sql (+5/-0) t/pt-upgrade/samples/005/tables.sql (+2/-0) t/pt-upgrade/samples/genlog001.txt (+7/-0) t/pt-upgrade/samples/res001/query (+2/-0) t/pt-upgrade/samples/res001/results (+6/-0) t/pt-upgrade/samples/res001/rows (+7/-0) t/pt-upgrade/samples/res001/sleep1.log (+3/-0) t/pt-upgrade/samples/slow_slow.log (+30/-0) t/pt-upgrade/save_results.t (+110/-0) t/pt-upgrade/skip_non_select.t (+0/-56) t/pt-upgrade/warnings.t (+0/-90) |
To merge this branch: | bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-upgrade-2.2 |
Related bugs: | |
Related blueprints: |
Overhaul pt-upgrade
(Medium)
pt-upgrade use a file as reference
(Undefined)
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Daniel Nichter | Approve | ||
Review via email: mp+152801@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
- 542. By Daniel Nichter
-
Remove --read-timeout. Update docs.
- 543. By Daniel Nichter
-
Fix and test UpgradeResults:
:format_ query_times001( ). - 544. By Daniel Nichter
-
Functional test and fix query time diffs.
Revision history for this message
Daniel Nichter (daniel-nichter) wrote : | # |
The diff is worthless: the whole tool was rewritten.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'bin/pt-table-usage' |
2 | --- bin/pt-table-usage 2013-03-09 16:57:54 +0000 |
3 | +++ bin/pt-table-usage 2013-03-12 01:25:25 +0000 |
4 | @@ -6655,9 +6655,6 @@ |
5 | # just tweak the code below to be like pt-query-digest. |
6 | my %alias_for = ( |
7 | slowlog => ['SlowLogParser'], |
8 | - # binlog => ['BinaryLogParser'], |
9 | - # genlog => ['GeneralLogParser'], |
10 | - # tcpdump => ['TcpdumpParser','MySQLProtocolParser'], |
11 | ); |
12 | my $type = ['slowlog']; |
13 | $type = $alias_for{$type->[0]} if $alias_for{$type->[0]}; |
14 | |
15 | === modified file 'bin/pt-upgrade' |
16 | --- bin/pt-upgrade 2013-03-09 16:57:54 +0000 |
17 | +++ bin/pt-upgrade 2013-03-12 01:25:25 +0000 |
18 | @@ -20,35 +20,33 @@ |
19 | Lmo::Types |
20 | Lmo |
21 | DSNParser |
22 | - TableParser |
23 | Quoter |
24 | OptionParser |
25 | + Cxn |
26 | Transformers |
27 | - SlowLogParser |
28 | - EventAggregator |
29 | - QueryParser |
30 | - QueryRewriter |
31 | Daemon |
32 | - ChangeHandler |
33 | - RowDiff |
34 | - TableChunker |
35 | - TableNibbler |
36 | - TableChecksum |
37 | - TableSyncer |
38 | - TableSyncChunk |
39 | - TableSyncNibble |
40 | - TableSyncGroupBy |
41 | Outfile |
42 | - MockSyncStream |
43 | - MockSth |
44 | - ReportFormatter |
45 | - UpgradeReportFormatter |
46 | - CompareResults |
47 | - CompareQueryTimes |
48 | - CompareWarnings |
49 | Retry |
50 | HTTPMicro |
51 | VersionCheck |
52 | + QueryRewriter |
53 | + VersionParser |
54 | + FileIterator |
55 | + QueryIterator |
56 | + EventExecutor |
57 | + UpgradeResults |
58 | + ResultWriter |
59 | + ResultIterator |
60 | + FakeSth |
61 | + SlowLogParser |
62 | + GeneralLogParser |
63 | + BinaryLogParser |
64 | + RawLogParser |
65 | + ProtocolParser |
66 | + TcpdumpParser |
67 | + MySQLProtocolParser |
68 | + Runtime |
69 | + Progress |
70 | )); |
71 | } |
72 | |
73 | @@ -1138,417 +1136,6 @@ |
74 | # ########################################################################### |
75 | |
76 | # ########################################################################### |
77 | -# TableParser package |
78 | -# This package is a copy without comments from the original. The original |
79 | -# with comments and its test file can be found in the Bazaar repository at, |
80 | -# lib/TableParser.pm |
81 | -# t/lib/TableParser.t |
82 | -# See https://launchpad.net/percona-toolkit for more information. |
83 | -# ########################################################################### |
84 | -{ |
85 | -package TableParser; |
86 | - |
87 | -use strict; |
88 | -use warnings FATAL => 'all'; |
89 | -use English qw(-no_match_vars); |
90 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
91 | - |
92 | -use Data::Dumper; |
93 | -$Data::Dumper::Indent = 1; |
94 | -$Data::Dumper::Sortkeys = 1; |
95 | -$Data::Dumper::Quotekeys = 0; |
96 | - |
97 | -local $EVAL_ERROR; |
98 | -eval { |
99 | - require Quoter; |
100 | -}; |
101 | - |
102 | -sub new { |
103 | - my ( $class, %args ) = @_; |
104 | - my $self = { %args }; |
105 | - $self->{Quoter} ||= Quoter->new(); |
106 | - return bless $self, $class; |
107 | -} |
108 | - |
109 | -sub Quoter { shift->{Quoter} } |
110 | - |
111 | -sub get_create_table { |
112 | - my ( $self, $dbh, $db, $tbl ) = @_; |
113 | - die "I need a dbh parameter" unless $dbh; |
114 | - die "I need a db parameter" unless $db; |
115 | - die "I need a tbl parameter" unless $tbl; |
116 | - my $q = $self->{Quoter}; |
117 | - |
118 | - my $new_sql_mode |
119 | - = q{/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, } |
120 | - . q{@@SQL_MODE := '', } |
121 | - . q{@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, } |
122 | - . q{@@SQL_QUOTE_SHOW_CREATE := 1 */}; |
123 | - |
124 | - my $old_sql_mode |
125 | - = q{/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, } |
126 | - . q{@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */}; |
127 | - |
128 | - PTDEBUG && _d($new_sql_mode); |
129 | - eval { $dbh->do($new_sql_mode); }; |
130 | - PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); |
131 | - |
132 | - my $use_sql = 'USE ' . $q->quote($db); |
133 | - PTDEBUG && _d($dbh, $use_sql); |
134 | - $dbh->do($use_sql); |
135 | - |
136 | - my $show_sql = "SHOW CREATE TABLE " . $q->quote($db, $tbl); |
137 | - PTDEBUG && _d($show_sql); |
138 | - my $href; |
139 | - eval { $href = $dbh->selectrow_hashref($show_sql); }; |
140 | - if ( my $e = $EVAL_ERROR ) { |
141 | - PTDEBUG && _d($old_sql_mode); |
142 | - $dbh->do($old_sql_mode); |
143 | - |
144 | - die $e; |
145 | - } |
146 | - |
147 | - PTDEBUG && _d($old_sql_mode); |
148 | - $dbh->do($old_sql_mode); |
149 | - |
150 | - my ($key) = grep { m/create (?:table|view)/i } keys %$href; |
151 | - if ( !$key ) { |
152 | - die "Error: no 'Create Table' or 'Create View' in result set from " |
153 | - . "$show_sql: " . Dumper($href); |
154 | - } |
155 | - |
156 | - return $href->{$key}; |
157 | -} |
158 | - |
159 | -sub parse { |
160 | - my ( $self, $ddl, $opts ) = @_; |
161 | - return unless $ddl; |
162 | - |
163 | - if ( $ddl =~ m/CREATE (?:TEMPORARY )?TABLE "/ ) { |
164 | - $ddl = $self->ansi_to_legacy($ddl); |
165 | - } |
166 | - elsif ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) { |
167 | - die "TableParser doesn't handle CREATE TABLE without quoting."; |
168 | - } |
169 | - |
170 | - my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/; |
171 | - (undef, $name) = $self->{Quoter}->split_unquote($name) if $name; |
172 | - |
173 | - $ddl =~ s/(`[^`]+`)/\L$1/g; |
174 | - |
175 | - my $engine = $self->get_engine($ddl); |
176 | - |
177 | - my @defs = $ddl =~ m/^(\s+`.*?),?$/gm; |
178 | - my @cols = map { $_ =~ m/`([^`]+)`/ } @defs; |
179 | - PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols)); |
180 | - |
181 | - my %def_for; |
182 | - @def_for{@cols} = @defs; |
183 | - |
184 | - my (@nums, @null); |
185 | - my (%type_for, %is_nullable, %is_numeric, %is_autoinc); |
186 | - foreach my $col ( @cols ) { |
187 | - my $def = $def_for{$col}; |
188 | - my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/; |
189 | - die "Can't determine column type for $def" unless $type; |
190 | - $type_for{$col} = $type; |
191 | - if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) { |
192 | - push @nums, $col; |
193 | - $is_numeric{$col} = 1; |
194 | - } |
195 | - if ( $def !~ m/NOT NULL/ ) { |
196 | - push @null, $col; |
197 | - $is_nullable{$col} = 1; |
198 | - } |
199 | - $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0; |
200 | - } |
201 | - |
202 | - my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable); |
203 | - |
204 | - my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/; |
205 | - |
206 | - return { |
207 | - name => $name, |
208 | - cols => \@cols, |
209 | - col_posn => { map { $cols[$_] => $_ } 0..$#cols }, |
210 | - is_col => { map { $_ => 1 } @cols }, |
211 | - null_cols => \@null, |
212 | - is_nullable => \%is_nullable, |
213 | - is_autoinc => \%is_autoinc, |
214 | - clustered_key => $clustered_key, |
215 | - keys => $keys, |
216 | - defs => \%def_for, |
217 | - numeric_cols => \@nums, |
218 | - is_numeric => \%is_numeric, |
219 | - engine => $engine, |
220 | - type_for => \%type_for, |
221 | - charset => $charset, |
222 | - }; |
223 | -} |
224 | - |
225 | -sub sort_indexes { |
226 | - my ( $self, $tbl ) = @_; |
227 | - |
228 | - my @indexes |
229 | - = sort { |
230 | - (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY')) |
231 | - || ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} ) |
232 | - || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} ) |
233 | - || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) ) |
234 | - } |
235 | - grep { |
236 | - $tbl->{keys}->{$_}->{type} eq 'BTREE' |
237 | - } |
238 | - sort keys %{$tbl->{keys}}; |
239 | - |
240 | - PTDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes)); |
241 | - return @indexes; |
242 | -} |
243 | - |
244 | -sub find_best_index { |
245 | - my ( $self, $tbl, $index ) = @_; |
246 | - my $best; |
247 | - if ( $index ) { |
248 | - ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}}; |
249 | - } |
250 | - if ( !$best ) { |
251 | - if ( $index ) { |
252 | - die "Index '$index' does not exist in table"; |
253 | - } |
254 | - else { |
255 | - ($best) = $self->sort_indexes($tbl); |
256 | - } |
257 | - } |
258 | - PTDEBUG && _d('Best index found is', $best); |
259 | - return $best; |
260 | -} |
261 | - |
262 | -sub find_possible_keys { |
263 | - my ( $self, $dbh, $database, $table, $quoter, $where ) = @_; |
264 | - return () unless $where; |
265 | - my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table) |
266 | - . ' WHERE ' . $where; |
267 | - PTDEBUG && _d($sql); |
268 | - my $expl = $dbh->selectrow_hashref($sql); |
269 | - $expl = { map { lc($_) => $expl->{$_} } keys %$expl }; |
270 | - if ( $expl->{possible_keys} ) { |
271 | - PTDEBUG && _d('possible_keys =', $expl->{possible_keys}); |
272 | - my @candidates = split(',', $expl->{possible_keys}); |
273 | - my %possible = map { $_ => 1 } @candidates; |
274 | - if ( $expl->{key} ) { |
275 | - PTDEBUG && _d('MySQL chose', $expl->{key}); |
276 | - unshift @candidates, grep { $possible{$_} } split(',', $expl->{key}); |
277 | - PTDEBUG && _d('Before deduping:', join(', ', @candidates)); |
278 | - my %seen; |
279 | - @candidates = grep { !$seen{$_}++ } @candidates; |
280 | - } |
281 | - PTDEBUG && _d('Final list:', join(', ', @candidates)); |
282 | - return @candidates; |
283 | - } |
284 | - else { |
285 | - PTDEBUG && _d('No keys in possible_keys'); |
286 | - return (); |
287 | - } |
288 | -} |
289 | - |
290 | -sub check_table { |
291 | - my ( $self, %args ) = @_; |
292 | - my @required_args = qw(dbh db tbl); |
293 | - foreach my $arg ( @required_args ) { |
294 | - die "I need a $arg argument" unless $args{$arg}; |
295 | - } |
296 | - my ($dbh, $db, $tbl) = @args{@required_args}; |
297 | - my $q = $self->{Quoter} || 'Quoter'; |
298 | - my $db_tbl = $q->quote($db, $tbl); |
299 | - PTDEBUG && _d('Checking', $db_tbl); |
300 | - |
301 | - my $sql = "SHOW TABLES FROM " . $q->quote($db) |
302 | - . ' LIKE ' . $q->literal_like($tbl); |
303 | - PTDEBUG && _d($sql); |
304 | - my $row; |
305 | - eval { |
306 | - $row = $dbh->selectrow_arrayref($sql); |
307 | - }; |
308 | - if ( $EVAL_ERROR ) { |
309 | - PTDEBUG && _d($EVAL_ERROR); |
310 | - return 0; |
311 | - } |
312 | - if ( !$row->[0] || $row->[0] ne $tbl ) { |
313 | - PTDEBUG && _d('Table does not exist'); |
314 | - return 0; |
315 | - } |
316 | - |
317 | - PTDEBUG && _d('Table', $db, $tbl, 'exists'); |
318 | - return 1; |
319 | - |
320 | -} |
321 | - |
322 | -sub get_engine { |
323 | - my ( $self, $ddl, $opts ) = @_; |
324 | - my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/; |
325 | - PTDEBUG && _d('Storage engine:', $engine); |
326 | - return $engine || undef; |
327 | -} |
328 | - |
329 | -sub get_keys { |
330 | - my ( $self, $ddl, $opts, $is_nullable ) = @_; |
331 | - my $engine = $self->get_engine($ddl); |
332 | - my $keys = {}; |
333 | - my $clustered_key = undef; |
334 | - |
335 | - KEY: |
336 | - foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) { |
337 | - |
338 | - next KEY if $key =~ m/FOREIGN/; |
339 | - |
340 | - my $key_ddl = $key; |
341 | - PTDEBUG && _d('Parsed key:', $key_ddl); |
342 | - |
343 | - if ( !$engine || $engine !~ m/MEMORY|HEAP/ ) { |
344 | - $key =~ s/USING HASH/USING BTREE/; |
345 | - } |
346 | - |
347 | - my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/; |
348 | - my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/; |
349 | - $type = $type || $special || 'BTREE'; |
350 | - my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/; |
351 | - my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0; |
352 | - my @cols; |
353 | - my @col_prefixes; |
354 | - foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) { |
355 | - my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/; |
356 | - push @cols, $name; |
357 | - push @col_prefixes, $prefix; |
358 | - } |
359 | - $name =~ s/`//g; |
360 | - |
361 | - PTDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols)); |
362 | - |
363 | - $keys->{$name} = { |
364 | - name => $name, |
365 | - type => $type, |
366 | - colnames => $cols, |
367 | - cols => \@cols, |
368 | - col_prefixes => \@col_prefixes, |
369 | - is_unique => $unique, |
370 | - is_nullable => scalar(grep { $is_nullable->{$_} } @cols), |
371 | - is_col => { map { $_ => 1 } @cols }, |
372 | - ddl => $key_ddl, |
373 | - }; |
374 | - |
375 | - if ( ($engine || '') =~ m/InnoDB/i && !$clustered_key ) { |
376 | - my $this_key = $keys->{$name}; |
377 | - if ( $this_key->{name} eq 'PRIMARY' ) { |
378 | - $clustered_key = 'PRIMARY'; |
379 | - } |
380 | - elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) { |
381 | - $clustered_key = $this_key->{name}; |
382 | - } |
383 | - PTDEBUG && $clustered_key && _d('This key is the clustered key'); |
384 | - } |
385 | - } |
386 | - |
387 | - return $keys, $clustered_key; |
388 | -} |
389 | - |
390 | -sub get_fks { |
391 | - my ( $self, $ddl, $opts ) = @_; |
392 | - my $q = $self->{Quoter}; |
393 | - my $fks = {}; |
394 | - |
395 | - foreach my $fk ( |
396 | - $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg ) |
397 | - { |
398 | - my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/; |
399 | - my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/; |
400 | - my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/; |
401 | - |
402 | - my ($db, $tbl) = $q->split_unquote($parent, $opts->{database}); |
403 | - my %parent_tbl = (tbl => $tbl); |
404 | - $parent_tbl{db} = $db if $db; |
405 | - |
406 | - if ( $parent !~ m/\./ && $opts->{database} ) { |
407 | - $parent = $q->quote($opts->{database}) . ".$parent"; |
408 | - } |
409 | - |
410 | - $fks->{$name} = { |
411 | - name => $name, |
412 | - colnames => $cols, |
413 | - cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ], |
414 | - parent_tbl => \%parent_tbl, |
415 | - parent_tblname => $parent, |
416 | - parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ], |
417 | - parent_colnames=> $parent_cols, |
418 | - ddl => $fk, |
419 | - }; |
420 | - } |
421 | - |
422 | - return $fks; |
423 | -} |
424 | - |
425 | -sub remove_auto_increment { |
426 | - my ( $self, $ddl ) = @_; |
427 | - $ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m; |
428 | - return $ddl; |
429 | -} |
430 | - |
431 | -sub get_table_status { |
432 | - my ( $self, $dbh, $db, $like ) = @_; |
433 | - my $q = $self->{Quoter}; |
434 | - my $sql = "SHOW TABLE STATUS FROM " . $q->quote($db); |
435 | - my @params; |
436 | - if ( $like ) { |
437 | - $sql .= ' LIKE ?'; |
438 | - push @params, $like; |
439 | - } |
440 | - PTDEBUG && _d($sql, @params); |
441 | - my $sth = $dbh->prepare($sql); |
442 | - eval { $sth->execute(@params); }; |
443 | - if ($EVAL_ERROR) { |
444 | - PTDEBUG && _d($EVAL_ERROR); |
445 | - return; |
446 | - } |
447 | - my @tables = @{$sth->fetchall_arrayref({})}; |
448 | - @tables = map { |
449 | - my %tbl; # Make a copy with lowercased keys |
450 | - @tbl{ map { lc $_ } keys %$_ } = values %$_; |
451 | - $tbl{engine} ||= $tbl{type} || $tbl{comment}; |
452 | - delete $tbl{type}; |
453 | - \%tbl; |
454 | - } @tables; |
455 | - return @tables; |
456 | -} |
457 | - |
458 | -my $ansi_quote_re = qr/" [^"]* (?: "" [^"]* )* (?<=.) "/ismx; |
459 | -sub ansi_to_legacy { |
460 | - my ($self, $ddl) = @_; |
461 | - $ddl =~ s/($ansi_quote_re)/ansi_quote_replace($1)/ge; |
462 | - return $ddl; |
463 | -} |
464 | - |
465 | -sub ansi_quote_replace { |
466 | - my ($val) = @_; |
467 | - $val =~ s/^"|"$//g; |
468 | - $val =~ s/`/``/g; |
469 | - $val =~ s/""/"/g; |
470 | - return "`$val`"; |
471 | -} |
472 | - |
473 | -sub _d { |
474 | - my ($package, undef, $line) = caller 0; |
475 | - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
476 | - map { defined $_ ? $_ : 'undef' } |
477 | - @_; |
478 | - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
479 | -} |
480 | - |
481 | -1; |
482 | -} |
483 | -# ########################################################################### |
484 | -# End TableParser package |
485 | -# ########################################################################### |
486 | - |
487 | -# ########################################################################### |
488 | # Quoter package |
489 | # This package is a copy without comments from the original. The original |
490 | # with comments and its test file can be found in the Bazaar repository at, |
491 | @@ -2765,6 +2352,155 @@ |
492 | # ########################################################################### |
493 | |
494 | # ########################################################################### |
495 | +# Cxn package |
496 | +# This package is a copy without comments from the original. The original |
497 | +# with comments and its test file can be found in the Bazaar repository at, |
498 | +# lib/Cxn.pm |
499 | +# t/lib/Cxn.t |
500 | +# See https://launchpad.net/percona-toolkit for more information. |
501 | +# ########################################################################### |
502 | +{ |
503 | +package Cxn; |
504 | + |
505 | +use strict; |
506 | +use warnings FATAL => 'all'; |
507 | +use English qw(-no_match_vars); |
508 | +use Scalar::Util qw(blessed); |
509 | +use constant { |
510 | + PTDEBUG => $ENV{PTDEBUG} || 0, |
511 | + PERCONA_TOOLKIT_TEST_USE_DSN_NAMES => $ENV{PERCONA_TOOLKIT_TEST_USE_DSN_NAMES} || 0, |
512 | +}; |
513 | + |
514 | +sub new { |
515 | + my ( $class, %args ) = @_; |
516 | + my @required_args = qw(DSNParser OptionParser); |
517 | + foreach my $arg ( @required_args ) { |
518 | + die "I need a $arg argument" unless $args{$arg}; |
519 | + }; |
520 | + my ($dp, $o) = @args{@required_args}; |
521 | + |
522 | + my $dsn_defaults = $dp->parse_options($o); |
523 | + my $prev_dsn = $args{prev_dsn}; |
524 | + my $dsn = $args{dsn}; |
525 | + if ( !$dsn ) { |
526 | + $args{dsn_string} ||= 'h=' . ($dsn_defaults->{h} || 'localhost'); |
527 | + |
528 | + $dsn = $dp->parse( |
529 | + $args{dsn_string}, $prev_dsn, $dsn_defaults); |
530 | + } |
531 | + elsif ( $prev_dsn ) { |
532 | + $dsn = $dp->copy($prev_dsn, $dsn); |
533 | + } |
534 | + |
535 | + my $dsn_name = $dp->as_string($dsn, [qw(h P S)]) |
536 | + || $dp->as_string($dsn, [qw(F)]) |
537 | + || ''; |
538 | + |
539 | + my $self = { |
540 | + dsn => $dsn, |
541 | + dbh => $args{dbh}, |
542 | + dsn_name => $dsn_name, |
543 | + hostname => '', |
544 | + set => $args{set}, |
545 | + NAME_lc => defined($args{NAME_lc}) ? $args{NAME_lc} : 1, |
546 | + dbh_set => 0, |
547 | + ask_pass => $o->get('ask-pass'), |
548 | + DSNParser => $dp, |
549 | + is_cluster_node => undef, |
550 | + }; |
551 | + |
552 | + return bless $self, $class; |
553 | +} |
554 | + |
555 | +sub connect { |
556 | + my ( $self ) = @_; |
557 | + my $dsn = $self->{dsn}; |
558 | + my $dp = $self->{DSNParser}; |
559 | + |
560 | + my $dbh = $self->{dbh}; |
561 | + if ( !$dbh || !$dbh->ping() ) { |
562 | + if ( $self->{ask_pass} && !$self->{asked_for_pass} ) { |
563 | + $dsn->{p} = OptionParser::prompt_noecho("Enter MySQL password: "); |
564 | + $self->{asked_for_pass} = 1; |
565 | + } |
566 | + $dbh = $dp->get_dbh($dp->get_cxn_params($dsn), { AutoCommit => 1 }); |
567 | + } |
568 | + PTDEBUG && _d($dbh, 'Connected dbh to', $self->{name}); |
569 | + |
570 | + return $self->set_dbh($dbh); |
571 | +} |
572 | + |
573 | +sub set_dbh { |
574 | + my ($self, $dbh) = @_; |
575 | + |
576 | + if ( $self->{dbh} && $self->{dbh} == $dbh && $self->{dbh_set} ) { |
577 | + PTDEBUG && _d($dbh, 'Already set dbh'); |
578 | + return $dbh; |
579 | + } |
580 | + |
581 | + PTDEBUG && _d($dbh, 'Setting dbh'); |
582 | + |
583 | + $dbh->{FetchHashKeyName} = 'NAME_lc' if $self->{NAME_lc}; |
584 | + |
585 | + my $sql = 'SELECT @@hostname, @@server_id'; |
586 | + PTDEBUG && _d($dbh, $sql); |
587 | + my ($hostname, $server_id) = $dbh->selectrow_array($sql); |
588 | + PTDEBUG && _d($dbh, 'hostname:', $hostname, $server_id); |
589 | + if ( $hostname ) { |
590 | + $self->{hostname} = $hostname; |
591 | + } |
592 | + |
593 | + if ( my $set = $self->{set}) { |
594 | + $set->($dbh); |
595 | + } |
596 | + |
597 | + $self->{dbh} = $dbh; |
598 | + $self->{dbh_set} = 1; |
599 | + return $dbh; |
600 | +} |
601 | + |
602 | +sub dbh { |
603 | + my ($self) = @_; |
604 | + return $self->{dbh}; |
605 | +} |
606 | + |
607 | +sub dsn { |
608 | + my ($self) = @_; |
609 | + return $self->{dsn}; |
610 | +} |
611 | + |
612 | +sub name { |
613 | + my ($self) = @_; |
614 | + return $self->{dsn_name} if PERCONA_TOOLKIT_TEST_USE_DSN_NAMES; |
615 | + return $self->{hostname} || $self->{dsn_name} || 'unknown host'; |
616 | +} |
617 | + |
618 | +sub DESTROY { |
619 | + my ($self) = @_; |
620 | + if ( $self->{dbh} |
621 | + && blessed($self->{dbh}) |
622 | + && $self->{dbh}->can("disconnect") ) { |
623 | + PTDEBUG && _d('Disconnecting dbh', $self->{dbh}, $self->{name}); |
624 | + $self->{dbh}->disconnect(); |
625 | + } |
626 | + return; |
627 | +} |
628 | + |
629 | +sub _d { |
630 | + my ($package, undef, $line) = caller 0; |
631 | + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
632 | + map { defined $_ ? $_ : 'undef' } |
633 | + @_; |
634 | + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
635 | +} |
636 | + |
637 | +1; |
638 | +} |
639 | +# ########################################################################### |
640 | +# End Cxn package |
641 | +# ########################################################################### |
642 | + |
643 | +# ########################################################################### |
644 | # Transformers package |
645 | # This package is a copy without comments from the original. The original |
646 | # with comments and its test file can be found in the Bazaar repository at, |
647 | @@ -3118,1909 +2854,6 @@ |
648 | # ########################################################################### |
649 | |
650 | # ########################################################################### |
651 | -# SlowLogParser package |
652 | -# This package is a copy without comments from the original. The original |
653 | -# with comments and its test file can be found in the Bazaar repository at, |
654 | -# lib/SlowLogParser.pm |
655 | -# t/lib/SlowLogParser.t |
656 | -# See https://launchpad.net/percona-toolkit for more information. |
657 | -# ########################################################################### |
658 | -{ |
659 | -package SlowLogParser; |
660 | - |
661 | -use strict; |
662 | -use warnings FATAL => 'all'; |
663 | -use English qw(-no_match_vars); |
664 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
665 | - |
666 | -use Data::Dumper; |
667 | -$Data::Dumper::Indent = 1; |
668 | -$Data::Dumper::Sortkeys = 1; |
669 | -$Data::Dumper::Quotekeys = 0; |
670 | - |
671 | -sub new { |
672 | - my ( $class ) = @_; |
673 | - my $self = { |
674 | - pending => [], |
675 | - }; |
676 | - return bless $self, $class; |
677 | -} |
678 | - |
679 | -my $slow_log_ts_line = qr/^# Time: ([0-9: ]{15})/; |
680 | -my $slow_log_uh_line = qr/# User\@Host: ([^\[]+|\[[^[]+\]).*?@ (\S*) \[(.*)\]/; |
681 | -my $slow_log_hd_line = qr{ |
682 | - ^(?: |
683 | - T[cC][pP]\s[pP]ort:\s+\d+ # case differs on windows/unix |
684 | - | |
685 | - [/A-Z].*mysqld,\sVersion.*(?:started\swith:|embedded\slibrary) |
686 | - | |
687 | - Time\s+Id\s+Command |
688 | - ).*\n |
689 | - }xm; |
690 | - |
691 | -sub parse_event { |
692 | - my ( $self, %args ) = @_; |
693 | - my @required_args = qw(next_event tell); |
694 | - foreach my $arg ( @required_args ) { |
695 | - die "I need a $arg argument" unless $args{$arg}; |
696 | - } |
697 | - my ($next_event, $tell) = @args{@required_args}; |
698 | - |
699 | - my $pending = $self->{pending}; |
700 | - local $INPUT_RECORD_SEPARATOR = ";\n#"; |
701 | - my $trimlen = length($INPUT_RECORD_SEPARATOR); |
702 | - my $pos_in_log = $tell->(); |
703 | - my $stmt; |
704 | - |
705 | - EVENT: |
706 | - while ( |
707 | - defined($stmt = shift @$pending) |
708 | - or defined($stmt = $next_event->()) |
709 | - ) { |
710 | - my @properties = ('cmd', 'Query', 'pos_in_log', $pos_in_log); |
711 | - $pos_in_log = $tell->(); |
712 | - |
713 | - if ( $stmt =~ s/$slow_log_hd_line//go ){ # Throw away header lines in log |
714 | - my @chunks = split(/$INPUT_RECORD_SEPARATOR/o, $stmt); |
715 | - if ( @chunks > 1 ) { |
716 | - PTDEBUG && _d("Found multiple chunks"); |
717 | - $stmt = shift @chunks; |
718 | - unshift @$pending, @chunks; |
719 | - } |
720 | - } |
721 | - |
722 | - $stmt = '#' . $stmt unless $stmt =~ m/\A#/; |
723 | - $stmt =~ s/;\n#?\Z//; |
724 | - |
725 | - |
726 | - my ($got_ts, $got_uh, $got_ac, $got_db, $got_set, $got_embed); |
727 | - my $pos = 0; |
728 | - my $len = length($stmt); |
729 | - my $found_arg = 0; |
730 | - LINE: |
731 | - while ( $stmt =~ m/^(.*)$/mg ) { # /g is important, requires scalar match. |
732 | - $pos = pos($stmt); # Be careful not to mess this up! |
733 | - my $line = $1; # Necessary for /g and pos() to work. |
734 | - PTDEBUG && _d($line); |
735 | - |
736 | - if ($line =~ m/^(?:#|use |SET (?:last_insert_id|insert_id|timestamp))/o) { |
737 | - |
738 | - if ( !$got_ts && (my ( $time ) = $line =~ m/$slow_log_ts_line/o)) { |
739 | - PTDEBUG && _d("Got ts", $time); |
740 | - push @properties, 'ts', $time; |
741 | - ++$got_ts; |
742 | - if ( !$got_uh |
743 | - && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o ) |
744 | - ) { |
745 | - PTDEBUG && _d("Got user, host, ip", $user, $host, $ip); |
746 | - push @properties, 'user', $user, 'host', $host, 'ip', $ip; |
747 | - ++$got_uh; |
748 | - } |
749 | - } |
750 | - |
751 | - elsif ( !$got_uh |
752 | - && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o ) |
753 | - ) { |
754 | - PTDEBUG && _d("Got user, host, ip", $user, $host, $ip); |
755 | - push @properties, 'user', $user, 'host', $host, 'ip', $ip; |
756 | - ++$got_uh; |
757 | - } |
758 | - |
759 | - elsif (!$got_ac && $line =~ m/^# (?:administrator command:.*)$/) { |
760 | - PTDEBUG && _d("Got admin command"); |
761 | - $line =~ s/^#\s+//; # string leading "# ". |
762 | - push @properties, 'cmd', 'Admin', 'arg', $line; |
763 | - push @properties, 'bytes', length($properties[-1]); |
764 | - ++$found_arg; |
765 | - ++$got_ac; |
766 | - } |
767 | - |
768 | - elsif ( $line =~ m/^# +[A-Z][A-Za-z_]+: \S+/ ) { # Make the test cheap! |
769 | - PTDEBUG && _d("Got some line with properties"); |
770 | - |
771 | - if ( $line =~ m/Schema:\s+\w+: / ) { |
772 | - PTDEBUG && _d('Removing empty Schema attrib'); |
773 | - $line =~ s/Schema:\s+//; |
774 | - PTDEBUG && _d($line); |
775 | - } |
776 | - |
777 | - my @temp = $line =~ m/(\w+):\s+(\S+|\Z)/g; |
778 | - push @properties, @temp; |
779 | - } |
780 | - |
781 | - elsif ( !$got_db && (my ( $db ) = $line =~ m/^use ([^;]+)/ ) ) { |
782 | - PTDEBUG && _d("Got a default database:", $db); |
783 | - push @properties, 'db', $db; |
784 | - ++$got_db; |
785 | - } |
786 | - |
787 | - elsif (!$got_set && (my ($setting) = $line =~ m/^SET\s+([^;]*)/)) { |
788 | - PTDEBUG && _d("Got some setting:", $setting); |
789 | - push @properties, split(/,|\s*=\s*/, $setting); |
790 | - ++$got_set; |
791 | - } |
792 | - |
793 | - if ( !$found_arg && $pos == $len ) { |
794 | - PTDEBUG && _d("Did not find arg, looking for special cases"); |
795 | - local $INPUT_RECORD_SEPARATOR = ";\n"; # get next line |
796 | - if ( defined(my $l = $next_event->()) ) { |
797 | - if ( $l =~ /^\s*[A-Z][a-z_]+: / ) { |
798 | - PTDEBUG && _d("Found NULL query before", $l); |
799 | - local $INPUT_RECORD_SEPARATOR = ";\n#"; |
800 | - my $rest_of_event = $next_event->(); |
801 | - push @{$self->{pending}}, $l . $rest_of_event; |
802 | - push @properties, 'cmd', 'Query', 'arg', '/* No query */'; |
803 | - push @properties, 'bytes', 0; |
804 | - $found_arg++; |
805 | - } |
806 | - else { |
807 | - chomp $l; |
808 | - $l =~ s/^\s+//; |
809 | - PTDEBUG && _d("Found admin statement", $l); |
810 | - push @properties, 'cmd', 'Admin', 'arg', $l; |
811 | - push @properties, 'bytes', length($properties[-1]); |
812 | - $found_arg++; |
813 | - } |
814 | - } |
815 | - else { |
816 | - PTDEBUG && _d("I can't figure out what to do with this line"); |
817 | - next EVENT; |
818 | - } |
819 | - } |
820 | - } |
821 | - else { |
822 | - PTDEBUG && _d("Got the query/arg line"); |
823 | - my $arg = substr($stmt, $pos - length($line)); |
824 | - push @properties, 'arg', $arg, 'bytes', length($arg); |
825 | - if ( $args{misc} && $args{misc}->{embed} |
826 | - && ( my ($e) = $arg =~ m/($args{misc}->{embed})/) |
827 | - ) { |
828 | - push @properties, $e =~ m/$args{misc}->{capture}/g; |
829 | - } |
830 | - last LINE; |
831 | - } |
832 | - } |
833 | - |
834 | - PTDEBUG && _d('Properties of event:', Dumper(\@properties)); |
835 | - my $event = { @properties }; |
836 | - if ( $args{stats} ) { |
837 | - $args{stats}->{events_read}++; |
838 | - $args{stats}->{events_parsed}++; |
839 | - } |
840 | - return $event; |
841 | - } # EVENT |
842 | - |
843 | - @$pending = (); |
844 | - $args{oktorun}->(0) if $args{oktorun}; |
845 | - return; |
846 | -} |
847 | - |
848 | -sub _d { |
849 | - my ($package, undef, $line) = caller 0; |
850 | - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
851 | - map { defined $_ ? $_ : 'undef' } |
852 | - @_; |
853 | - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
854 | -} |
855 | - |
856 | -1; |
857 | -} |
858 | -# ########################################################################### |
859 | -# End SlowLogParser package |
860 | -# ########################################################################### |
861 | - |
862 | -# ########################################################################### |
863 | -# EventAggregator package |
864 | -# This package is a copy without comments from the original. The original |
865 | -# with comments and its test file can be found in the Bazaar repository at, |
866 | -# lib/EventAggregator.pm |
867 | -# t/lib/EventAggregator.t |
868 | -# See https://launchpad.net/percona-toolkit for more information. |
869 | -# ########################################################################### |
870 | -{ |
871 | -package EventAggregator; |
872 | - |
873 | -use strict; |
874 | -use warnings FATAL => 'all'; |
875 | -use English qw(-no_match_vars); |
876 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
877 | - |
878 | -use List::Util qw(min max); |
879 | -use Data::Dumper; |
880 | -$Data::Dumper::Indent = 1; |
881 | -$Data::Dumper::Sortkeys = 1; |
882 | -$Data::Dumper::Quotekeys = 0; |
883 | - |
884 | -use constant BUCK_SIZE => 1.05; |
885 | -use constant BASE_LOG => log(BUCK_SIZE); |
886 | -use constant BASE_OFFSET => abs(1 - log(0.000001) / BASE_LOG); # 284.1617969 |
887 | -use constant NUM_BUCK => 1000; |
888 | -use constant MIN_BUCK => .000001; |
889 | - |
890 | -my @buck_vals = map { bucket_value($_); } (0..NUM_BUCK-1); |
891 | - |
892 | -sub new { |
893 | - my ( $class, %args ) = @_; |
894 | - foreach my $arg ( qw(groupby worst) ) { |
895 | - die "I need a $arg argument" unless $args{$arg}; |
896 | - } |
897 | - my $attributes = $args{attributes} || {}; |
898 | - my $self = { |
899 | - groupby => $args{groupby}, |
900 | - detect_attribs => scalar keys %$attributes == 0 ? 1 : 0, |
901 | - all_attribs => [ keys %$attributes ], |
902 | - ignore_attribs => { |
903 | - map { $_ => $args{attributes}->{$_} } |
904 | - grep { $_ ne $args{groupby} } |
905 | - @{$args{ignore_attributes}} |
906 | - }, |
907 | - attributes => { |
908 | - map { $_ => $args{attributes}->{$_} } |
909 | - grep { $_ ne $args{groupby} } |
910 | - keys %$attributes |
911 | - }, |
912 | - alt_attribs => { |
913 | - map { $_ => make_alt_attrib(@{$args{attributes}->{$_}}) } |
914 | - grep { $_ ne $args{groupby} } |
915 | - keys %$attributes |
916 | - }, |
917 | - worst => $args{worst}, |
918 | - unroll_limit => $args{unroll_limit} || 1000, |
919 | - attrib_limit => $args{attrib_limit}, |
920 | - result_classes => {}, |
921 | - result_globals => {}, |
922 | - result_samples => {}, |
923 | - class_metrics => {}, |
924 | - global_metrics => {}, |
925 | - n_events => 0, |
926 | - unrolled_loops => undef, |
927 | - type_for => { %{$args{type_for} || { Query_time => 'num' }} }, |
928 | - }; |
929 | - return bless $self, $class; |
930 | -} |
931 | - |
932 | -sub reset_aggregated_data { |
933 | - my ( $self ) = @_; |
934 | - foreach my $class ( values %{$self->{result_classes}} ) { |
935 | - foreach my $attrib ( values %$class ) { |
936 | - delete @{$attrib}{keys %$attrib}; |
937 | - } |
938 | - } |
939 | - foreach my $class ( values %{$self->{result_globals}} ) { |
940 | - delete @{$class}{keys %$class}; |
941 | - } |
942 | - delete @{$self->{result_samples}}{keys %{$self->{result_samples}}}; |
943 | - $self->{n_events} = 0; |
944 | -} |
945 | - |
946 | -sub aggregate { |
947 | - my ( $self, $event ) = @_; |
948 | - |
949 | - my $group_by = $event->{$self->{groupby}}; |
950 | - return unless defined $group_by; |
951 | - |
952 | - $self->{n_events}++; |
953 | - PTDEBUG && _d('Event', $self->{n_events}); |
954 | - |
955 | - return $self->{unrolled_loops}->($self, $event, $group_by) |
956 | - if $self->{unrolled_loops}; |
957 | - |
958 | - if ( $self->{n_events} <= $self->{unroll_limit} ) { |
959 | - |
960 | - $self->add_new_attributes($event) if $self->{detect_attribs}; |
961 | - |
962 | - ATTRIB: |
963 | - foreach my $attrib ( keys %{$self->{attributes}} ) { |
964 | - |
965 | - if ( !exists $event->{$attrib} ) { |
966 | - PTDEBUG && _d("attrib doesn't exist in event:", $attrib); |
967 | - my $alt_attrib = $self->{alt_attribs}->{$attrib}->($event); |
968 | - PTDEBUG && _d('alt attrib:', $alt_attrib); |
969 | - next ATTRIB unless $alt_attrib; |
970 | - } |
971 | - |
972 | - GROUPBY: |
973 | - foreach my $val ( ref $group_by ? @$group_by : ($group_by) ) { |
974 | - my $class_attrib = $self->{result_classes}->{$val}->{$attrib} ||= {}; |
975 | - my $global_attrib = $self->{result_globals}->{$attrib} ||= {}; |
976 | - my $samples = $self->{result_samples}; |
977 | - my $handler = $self->{handlers}->{ $attrib }; |
978 | - if ( !$handler ) { |
979 | - $handler = $self->make_handler( |
980 | - event => $event, |
981 | - attribute => $attrib, |
982 | - alternates => $self->{attributes}->{$attrib}, |
983 | - worst => $self->{worst} eq $attrib, |
984 | - ); |
985 | - $self->{handlers}->{$attrib} = $handler; |
986 | - } |
987 | - next GROUPBY unless $handler; |
988 | - $samples->{$val} ||= $event; # Initialize to the first event. |
989 | - $handler->($event, $class_attrib, $global_attrib, $samples, $group_by); |
990 | - } |
991 | - } |
992 | - } |
993 | - else { |
994 | - $self->_make_unrolled_loops($event); |
995 | - $self->{unrolled_loops}->($self, $event, $group_by); |
996 | - } |
997 | - |
998 | - return; |
999 | -} |
1000 | - |
1001 | -sub _make_unrolled_loops { |
1002 | - my ( $self, $event ) = @_; |
1003 | - |
1004 | - my $group_by = $event->{$self->{groupby}}; |
1005 | - |
1006 | - my @attrs = grep { $self->{handlers}->{$_} } keys %{$self->{attributes}}; |
1007 | - my $globs = $self->{result_globals}; # Global stats for each |
1008 | - my $samples = $self->{result_samples}; |
1009 | - |
1010 | - my @lines = ( |
1011 | - 'my ( $self, $event, $group_by ) = @_;', |
1012 | - 'my ($val, $class, $global, $idx);', |
1013 | - (ref $group_by ? ('foreach my $group_by ( @$group_by ) {') : ()), |
1014 | - 'my $temp = $self->{result_classes}->{ $group_by } |
1015 | - ||= { map { $_ => { } } @attrs };', |
1016 | - '$samples->{$group_by} ||= $event;', # Always start with the first. |
1017 | - ); |
1018 | - foreach my $i ( 0 .. $#attrs ) { |
1019 | - push @lines, ( |
1020 | - '$class = $temp->{\'' . $attrs[$i] . '\'};', |
1021 | - '$global = $globs->{\'' . $attrs[$i] . '\'};', |
1022 | - $self->{unrolled_for}->{$attrs[$i]}, |
1023 | - ); |
1024 | - } |
1025 | - if ( ref $group_by ) { |
1026 | - push @lines, '}'; # Close the loop opened above |
1027 | - } |
1028 | - @lines = map { s/^/ /gm; $_ } @lines; # Indent for debugging |
1029 | - unshift @lines, 'sub {'; |
1030 | - push @lines, '}'; |
1031 | - |
1032 | - my $code = join("\n", @lines); |
1033 | - PTDEBUG && _d('Unrolled subroutine:', @lines); |
1034 | - my $sub = eval $code; |
1035 | - die $EVAL_ERROR if $EVAL_ERROR; |
1036 | - $self->{unrolled_loops} = $sub; |
1037 | - |
1038 | - return; |
1039 | -} |
1040 | - |
1041 | -sub results { |
1042 | - my ( $self ) = @_; |
1043 | - return { |
1044 | - classes => $self->{result_classes}, |
1045 | - globals => $self->{result_globals}, |
1046 | - samples => $self->{result_samples}, |
1047 | - }; |
1048 | -} |
1049 | - |
1050 | -sub set_results { |
1051 | - my ( $self, $results ) = @_; |
1052 | - $self->{result_classes} = $results->{classes}; |
1053 | - $self->{result_globals} = $results->{globals}; |
1054 | - $self->{result_samples} = $results->{samples}; |
1055 | - return; |
1056 | -} |
1057 | - |
1058 | -sub stats { |
1059 | - my ( $self ) = @_; |
1060 | - return { |
1061 | - classes => $self->{class_metrics}, |
1062 | - globals => $self->{global_metrics}, |
1063 | - }; |
1064 | -} |
1065 | - |
1066 | -sub attributes { |
1067 | - my ( $self ) = @_; |
1068 | - return $self->{type_for}; |
1069 | -} |
1070 | - |
1071 | -sub set_attribute_types { |
1072 | - my ( $self, $attrib_types ) = @_; |
1073 | - $self->{type_for} = $attrib_types; |
1074 | - return; |
1075 | -} |
1076 | - |
1077 | -sub type_for { |
1078 | - my ( $self, $attrib ) = @_; |
1079 | - return $self->{type_for}->{$attrib}; |
1080 | -} |
1081 | - |
1082 | -sub make_handler { |
1083 | - my ( $self, %args ) = @_; |
1084 | - my @required_args = qw(event attribute); |
1085 | - foreach my $arg ( @required_args ) { |
1086 | - die "I need a $arg argument" unless $args{$arg}; |
1087 | - } |
1088 | - my ($event, $attrib) = @args{@required_args}; |
1089 | - |
1090 | - my $val; |
1091 | - eval { $val= $self->_get_value(%args); }; |
1092 | - if ( $EVAL_ERROR ) { |
1093 | - PTDEBUG && _d("Cannot make", $attrib, "handler:", $EVAL_ERROR); |
1094 | - return; |
1095 | - } |
1096 | - return unless defined $val; # can't determine type if it's undef |
1097 | - |
1098 | - my $float_re = qr{[+-]?(?:(?=\d|[.])\d+(?:[.])\d{0,})(?:E[+-]?\d+)?}i; |
1099 | - my $type = $self->type_for($attrib) ? $self->type_for($attrib) |
1100 | - : $attrib =~ m/_crc$/ ? 'string' |
1101 | - : $val =~ m/^(?:\d+|$float_re)$/o ? 'num' |
1102 | - : $val =~ m/^(?:Yes|No)$/ ? 'bool' |
1103 | - : 'string'; |
1104 | - PTDEBUG && _d('Type for', $attrib, 'is', $type, '(sample:', $val, ')'); |
1105 | - $self->{type_for}->{$attrib} = $type; |
1106 | - |
1107 | - my @lines; |
1108 | - |
1109 | - my %track = ( |
1110 | - sum => $type =~ m/num|bool/ ? 1 : 0, # sum of values |
1111 | - unq => $type =~ m/bool|string/ ? 1 : 0, # count of unique values seen |
1112 | - all => $type eq 'num' ? 1 : 0, # all values in bucketed list |
1113 | - ); |
1114 | - |
1115 | - my $trf = ($type eq 'bool') ? q{(($val || '') eq 'Yes') ? 1 : 0} |
1116 | - : undef; |
1117 | - if ( $trf ) { |
1118 | - push @lines, q{$val = } . $trf . ';'; |
1119 | - } |
1120 | - |
1121 | - if ( $attrib eq 'Query_time' ) { |
1122 | - push @lines, ( |
1123 | - '$val =~ s/^(\d+(?:\.\d+)?).*/$1/;', |
1124 | - '$event->{\''.$attrib.'\'} = $val;', |
1125 | - ); |
1126 | - } |
1127 | - |
1128 | - if ( $type eq 'num' && $self->{attrib_limit} ) { |
1129 | - push @lines, ( |
1130 | - "if ( \$val > $self->{attrib_limit} ) {", |
1131 | - ' $val = $class->{last} ||= 0;', |
1132 | - '}', |
1133 | - '$class->{last} = $val;', |
1134 | - ); |
1135 | - } |
1136 | - |
1137 | - my $lt = $type eq 'num' ? '<' : 'lt'; |
1138 | - my $gt = $type eq 'num' ? '>' : 'gt'; |
1139 | - foreach my $place ( qw($class $global) ) { |
1140 | - my @tmp; # hold lines until PLACE placeholder is replaced |
1141 | - |
1142 | - push @tmp, '++PLACE->{cnt};'; # count of all values seen |
1143 | - |
1144 | - if ( $attrib =~ m/_crc$/ ) { |
1145 | - push @tmp, '$val = $val % 1_000;'; |
1146 | - } |
1147 | - |
1148 | - push @tmp, ( |
1149 | - 'PLACE->{min} = $val if !defined PLACE->{min} || $val ' |
1150 | - . $lt . ' PLACE->{min};', |
1151 | - ); |
1152 | - push @tmp, ( |
1153 | - 'PLACE->{max} = $val if !defined PLACE->{max} || $val ' |
1154 | - . $gt . ' PLACE->{max};', |
1155 | - ); |
1156 | - if ( $track{sum} ) { |
1157 | - push @tmp, 'PLACE->{sum} += $val;'; |
1158 | - } |
1159 | - |
1160 | - if ( $track{all} ) { |
1161 | - push @tmp, ( |
1162 | - 'exists PLACE->{all} or PLACE->{all} = {};', |
1163 | - '++PLACE->{all}->{ EventAggregator::bucket_idx($val) };', |
1164 | - ); |
1165 | - } |
1166 | - |
1167 | - push @lines, map { s/PLACE/$place/g; $_ } @tmp; |
1168 | - } |
1169 | - |
1170 | - if ( $track{unq} ) { |
1171 | - push @lines, '++$class->{unq}->{$val}'; |
1172 | - } |
1173 | - |
1174 | - if ( $args{worst} ) { |
1175 | - my $op = $type eq 'num' ? '>=' : 'ge'; |
1176 | - push @lines, ( |
1177 | - 'if ( $val ' . $op . ' ($class->{max} || 0) ) {', |
1178 | - ' $samples->{$group_by} = $event;', |
1179 | - '}', |
1180 | - ); |
1181 | - } |
1182 | - |
1183 | - my @unrolled = ( |
1184 | - "\$val = \$event->{'$attrib'};", |
1185 | - |
1186 | - ( map { "\$val = \$event->{'$_'} unless defined \$val;" } |
1187 | - grep { $_ ne $attrib } @{$args{alternates}} |
1188 | - ), |
1189 | - |
1190 | - 'defined $val && do {', |
1191 | - @lines, |
1192 | - '};', |
1193 | - ); |
1194 | - $self->{unrolled_for}->{$attrib} = join("\n", @unrolled); |
1195 | - |
1196 | - my @code = ( |
1197 | - 'sub {', |
1198 | - 'my ( $event, $class, $global, $samples, $group_by ) = @_;', |
1199 | - 'my ($val, $idx);', |
1200 | - |
1201 | - $self->{unrolled_for}->{$attrib}, |
1202 | - |
1203 | - 'return;', |
1204 | - '}', |
1205 | - ); |
1206 | - $self->{code_for}->{$attrib} = join("\n", @code); |
1207 | - PTDEBUG && _d($attrib, 'handler code:', $self->{code_for}->{$attrib}); |
1208 | - my $sub = eval $self->{code_for}->{$attrib}; |
1209 | - if ( $EVAL_ERROR ) { |
1210 | - die "Failed to compile $attrib handler code: $EVAL_ERROR"; |
1211 | - } |
1212 | - |
1213 | - return $sub; |
1214 | -} |
1215 | - |
1216 | -sub bucket_idx { |
1217 | - my ( $val ) = @_; |
1218 | - return 0 if $val < MIN_BUCK; |
1219 | - my $idx = int(BASE_OFFSET + log($val)/BASE_LOG); |
1220 | - return $idx > (NUM_BUCK-1) ? (NUM_BUCK-1) : $idx; |
1221 | -} |
1222 | - |
1223 | -sub bucket_value { |
1224 | - my ( $bucket ) = @_; |
1225 | - return 0 if $bucket == 0; |
1226 | - die "Invalid bucket: $bucket" if $bucket < 0 || $bucket > (NUM_BUCK-1); |
1227 | - return (BUCK_SIZE**($bucket-1)) * MIN_BUCK; |
1228 | -} |
1229 | - |
1230 | -{ |
1231 | - my @buck_tens; |
1232 | - sub buckets_of { |
1233 | - return @buck_tens if @buck_tens; |
1234 | - |
1235 | - my $start_bucket = 0; |
1236 | - my @base10_starts = (0); |
1237 | - map { push @base10_starts, (10**$_)*MIN_BUCK } (1..7); |
1238 | - |
1239 | - for my $base10_bucket ( 0..($#base10_starts-1) ) { |
1240 | - my $next_bucket = bucket_idx( $base10_starts[$base10_bucket+1] ); |
1241 | - PTDEBUG && _d('Base 10 bucket', $base10_bucket, 'maps to', |
1242 | - 'base 1.05 buckets', $start_bucket, '..', $next_bucket-1); |
1243 | - for my $base1_05_bucket ($start_bucket..($next_bucket-1)) { |
1244 | - $buck_tens[$base1_05_bucket] = $base10_bucket; |
1245 | - } |
1246 | - $start_bucket = $next_bucket; |
1247 | - } |
1248 | - |
1249 | - map { $buck_tens[$_] = 7 } ($start_bucket..(NUM_BUCK-1)); |
1250 | - |
1251 | - return @buck_tens; |
1252 | - } |
1253 | -} |
1254 | - |
1255 | -sub calculate_statistical_metrics { |
1256 | - my ( $self, %args ) = @_; |
1257 | - my $classes = $self->{result_classes}; |
1258 | - my $globals = $self->{result_globals}; |
1259 | - my $class_metrics = $self->{class_metrics}; |
1260 | - my $global_metrics = $self->{global_metrics}; |
1261 | - PTDEBUG && _d('Calculating statistical_metrics'); |
1262 | - foreach my $attrib ( keys %$globals ) { |
1263 | - if ( exists $globals->{$attrib}->{all} ) { |
1264 | - $global_metrics->{$attrib} |
1265 | - = $self->_calc_metrics( |
1266 | - $globals->{$attrib}->{all}, |
1267 | - $globals->{$attrib}, |
1268 | - ); |
1269 | - } |
1270 | - |
1271 | - foreach my $class ( keys %$classes ) { |
1272 | - if ( exists $classes->{$class}->{$attrib}->{all} ) { |
1273 | - $class_metrics->{$class}->{$attrib} |
1274 | - = $self->_calc_metrics( |
1275 | - $classes->{$class}->{$attrib}->{all}, |
1276 | - $classes->{$class}->{$attrib} |
1277 | - ); |
1278 | - } |
1279 | - } |
1280 | - } |
1281 | - |
1282 | - return; |
1283 | -} |
1284 | - |
1285 | -sub _calc_metrics { |
1286 | - my ( $self, $vals, $args ) = @_; |
1287 | - my $statistical_metrics = { |
1288 | - pct_95 => 0, |
1289 | - stddev => 0, |
1290 | - median => 0, |
1291 | - cutoff => undef, |
1292 | - }; |
1293 | - |
1294 | - return $statistical_metrics |
1295 | - unless defined $vals && %$vals && $args->{cnt}; |
1296 | - |
1297 | - my $n_vals = $args->{cnt}; |
1298 | - if ( $n_vals == 1 || $args->{max} == $args->{min} ) { |
1299 | - my $v = $args->{max} || 0; |
1300 | - my $bucket = int(6 + ( log($v > 0 ? $v : MIN_BUCK) / log(10))); |
1301 | - $bucket = $bucket > 7 ? 7 : $bucket < 0 ? 0 : $bucket; |
1302 | - return { |
1303 | - pct_95 => $v, |
1304 | - stddev => 0, |
1305 | - median => $v, |
1306 | - cutoff => $n_vals, |
1307 | - }; |
1308 | - } |
1309 | - elsif ( $n_vals == 2 ) { |
1310 | - foreach my $v ( $args->{min}, $args->{max} ) { |
1311 | - my $bucket = int(6 + ( log($v && $v > 0 ? $v : MIN_BUCK) / log(10))); |
1312 | - $bucket = $bucket > 7 ? 7 : $bucket < 0 ? 0 : $bucket; |
1313 | - } |
1314 | - my $v = $args->{max} || 0; |
1315 | - my $mean = (($args->{min} || 0) + $v) / 2; |
1316 | - return { |
1317 | - pct_95 => $v, |
1318 | - stddev => sqrt((($v - $mean) ** 2) *2), |
1319 | - median => $mean, |
1320 | - cutoff => $n_vals, |
1321 | - }; |
1322 | - } |
1323 | - |
1324 | - my $cutoff = $n_vals >= 10 ? int ( $n_vals * 0.95 ) : $n_vals; |
1325 | - $statistical_metrics->{cutoff} = $cutoff; |
1326 | - |
1327 | - my $total_left = $n_vals; |
1328 | - my $top_vals = $n_vals - $cutoff; # vals > 95th |
1329 | - my $sum_excl = 0; |
1330 | - my $sum = 0; |
1331 | - my $sumsq = 0; |
1332 | - my $mid = int($n_vals / 2); |
1333 | - my $median = 0; |
1334 | - my $prev = NUM_BUCK-1; # Used for getting median when $cutoff is odd |
1335 | - my $bucket_95 = 0; # top bucket in 95th |
1336 | - |
1337 | - PTDEBUG && _d('total vals:', $total_left, 'top vals:', $top_vals, 'mid:', $mid); |
1338 | - |
1339 | - my @buckets = map { 0 } (0..NUM_BUCK-1); |
1340 | - map { $buckets[$_] = $vals->{$_} } keys %$vals; |
1341 | - $vals = \@buckets; # repoint vals from given hashref to our array |
1342 | - |
1343 | - BUCKET: |
1344 | - for my $bucket ( reverse 0..(NUM_BUCK-1) ) { |
1345 | - my $val = $vals->[$bucket]; |
1346 | - next BUCKET unless $val; |
1347 | - |
1348 | - $total_left -= $val; |
1349 | - $sum_excl += $val; |
1350 | - $bucket_95 = $bucket if !$bucket_95 && $sum_excl > $top_vals; |
1351 | - |
1352 | - if ( !$median && $total_left <= $mid ) { |
1353 | - $median = (($cutoff % 2) || ($val > 1)) ? $buck_vals[$bucket] |
1354 | - : ($buck_vals[$bucket] + $buck_vals[$prev]) / 2; |
1355 | - } |
1356 | - |
1357 | - $sum += $val * $buck_vals[$bucket]; |
1358 | - $sumsq += $val * ($buck_vals[$bucket]**2); |
1359 | - $prev = $bucket; |
1360 | - } |
1361 | - |
1362 | - my $var = $sumsq/$n_vals - ( ($sum/$n_vals) ** 2 ); |
1363 | - my $stddev = $var > 0 ? sqrt($var) : 0; |
1364 | - my $maxstdev = (($args->{max} || 0) - ($args->{min} || 0)) / 2; |
1365 | - $stddev = $stddev > $maxstdev ? $maxstdev : $stddev; |
1366 | - |
1367 | - PTDEBUG && _d('sum:', $sum, 'sumsq:', $sumsq, 'stddev:', $stddev, |
1368 | - 'median:', $median, 'prev bucket:', $prev, |
1369 | - 'total left:', $total_left, 'sum excl', $sum_excl, |
1370 | - 'bucket 95:', $bucket_95, $buck_vals[$bucket_95]); |
1371 | - |
1372 | - $statistical_metrics->{stddev} = $stddev; |
1373 | - $statistical_metrics->{pct_95} = $buck_vals[$bucket_95]; |
1374 | - $statistical_metrics->{median} = $median; |
1375 | - |
1376 | - return $statistical_metrics; |
1377 | -} |
1378 | - |
1379 | -sub metrics { |
1380 | - my ( $self, %args ) = @_; |
1381 | - foreach my $arg ( qw(attrib where) ) { |
1382 | - die "I need a $arg argument" unless defined $args{$arg}; |
1383 | - } |
1384 | - my $attrib = $args{attrib}; |
1385 | - my $where = $args{where}; |
1386 | - |
1387 | - my $stats = $self->results(); |
1388 | - my $metrics = $self->stats(); |
1389 | - my $store = $stats->{classes}->{$where}->{$attrib}; |
1390 | - my $global_cnt = $stats->{globals}->{$attrib}->{cnt}; |
1391 | - |
1392 | - return { |
1393 | - cnt => $store->{cnt}, |
1394 | - pct => $global_cnt && $store->{cnt} ? $store->{cnt} / $global_cnt : 0, |
1395 | - sum => $store->{sum}, |
1396 | - min => $store->{min}, |
1397 | - max => $store->{max}, |
1398 | - avg => $store->{sum} && $store->{cnt} ? $store->{sum} / $store->{cnt} : 0, |
1399 | - median => $metrics->{classes}->{$where}->{$attrib}->{median} || 0, |
1400 | - pct_95 => $metrics->{classes}->{$where}->{$attrib}->{pct_95} || 0, |
1401 | - stddev => $metrics->{classes}->{$where}->{$attrib}->{stddev} || 0, |
1402 | - }; |
1403 | -} |
1404 | - |
1405 | -sub top_events { |
1406 | - my ( $self, %args ) = @_; |
1407 | - my $classes = $self->{result_classes}; |
1408 | - my @sorted = reverse sort { # Sorted list of $groupby values |
1409 | - $classes->{$a}->{$args{attrib}}->{$args{orderby}} |
1410 | - <=> $classes->{$b}->{$args{attrib}}->{$args{orderby}} |
1411 | - } grep { |
1412 | - defined $classes->{$_}->{$args{attrib}}->{$args{orderby}} |
1413 | - } keys %$classes; |
1414 | - my @chosen; # top events |
1415 | - my @other; # other events (< top) |
1416 | - my ($total, $count) = (0, 0); |
1417 | - foreach my $groupby ( @sorted ) { |
1418 | - if ( |
1419 | - (!$args{total} || $total < $args{total} ) |
1420 | - && ( !$args{count} || $count < $args{count} ) |
1421 | - ) { |
1422 | - push @chosen, [$groupby, 'top', $count+1]; |
1423 | - } |
1424 | - |
1425 | - elsif ( $args{ol_attrib} && (!$args{ol_freq} |
1426 | - || $classes->{$groupby}->{$args{ol_attrib}}->{cnt} >= $args{ol_freq}) |
1427 | - ) { |
1428 | - my $stats = $self->{class_metrics}->{$groupby}->{$args{ol_attrib}}; |
1429 | - if ( ($stats->{pct_95} || 0) >= $args{ol_limit} ) { |
1430 | - push @chosen, [$groupby, 'outlier', $count+1]; |
1431 | - } |
1432 | - else { |
1433 | - push @other, [$groupby, 'misc', $count+1]; |
1434 | - } |
1435 | - } |
1436 | - |
1437 | - else { |
1438 | - push @other, [$groupby, 'misc', $count+1]; |
1439 | - } |
1440 | - |
1441 | - $total += $classes->{$groupby}->{$args{attrib}}->{$args{orderby}}; |
1442 | - $count++; |
1443 | - } |
1444 | - return \@chosen, \@other; |
1445 | -} |
1446 | - |
1447 | -sub add_new_attributes { |
1448 | - my ( $self, $event ) = @_; |
1449 | - return unless $event; |
1450 | - |
1451 | - map { |
1452 | - my $attrib = $_; |
1453 | - $self->{attributes}->{$attrib} = [$attrib]; |
1454 | - $self->{alt_attribs}->{$attrib} = make_alt_attrib($attrib); |
1455 | - push @{$self->{all_attribs}}, $attrib; |
1456 | - PTDEBUG && _d('Added new attribute:', $attrib); |
1457 | - } |
1458 | - grep { |
1459 | - $_ ne $self->{groupby} |
1460 | - && !exists $self->{attributes}->{$_} |
1461 | - && !exists $self->{ignore_attribs}->{$_} |
1462 | - } |
1463 | - keys %$event; |
1464 | - |
1465 | - return; |
1466 | -} |
1467 | - |
1468 | -sub get_attributes { |
1469 | - my ( $self ) = @_; |
1470 | - return $self->{all_attribs}; |
1471 | -} |
1472 | - |
1473 | -sub events_processed { |
1474 | - my ( $self ) = @_; |
1475 | - return $self->{n_events}; |
1476 | -} |
1477 | - |
1478 | -sub make_alt_attrib { |
1479 | - my ( @attribs ) = @_; |
1480 | - |
1481 | - my $attrib = shift @attribs; # Primary attribute. |
1482 | - return sub {} unless @attribs; # No alternates. |
1483 | - |
1484 | - my @lines; |
1485 | - push @lines, 'sub { my ( $event ) = @_; my $alt_attrib;'; |
1486 | - push @lines, map { |
1487 | - "\$alt_attrib = '$_' if !defined \$alt_attrib " |
1488 | - . "&& exists \$event->{'$_'};" |
1489 | - } @attribs; |
1490 | - push @lines, 'return $alt_attrib; }'; |
1491 | - PTDEBUG && _d('alt attrib sub for', $attrib, ':', @lines); |
1492 | - my $sub = eval join("\n", @lines); |
1493 | - die if $EVAL_ERROR; |
1494 | - return $sub; |
1495 | -} |
1496 | - |
1497 | -sub merge { |
1498 | - my ( @ea_objs ) = @_; |
1499 | - PTDEBUG && _d('Merging', scalar @ea_objs, 'ea'); |
1500 | - return unless scalar @ea_objs; |
1501 | - |
1502 | - my $ea1 = shift @ea_objs; |
1503 | - my $r1 = $ea1->results; |
1504 | - my $worst = $ea1->{worst}; # for merging, finding worst sample |
1505 | - |
1506 | - my %attrib_types = %{ $ea1->attributes() }; |
1507 | - |
1508 | - foreach my $ea ( @ea_objs ) { |
1509 | - die "EventAggregator objects have different groupby: " |
1510 | - . "$ea1->{groupby} and $ea->{groupby}" |
1511 | - unless $ea1->{groupby} eq $ea->{groupby}; |
1512 | - die "EventAggregator objects have different worst: " |
1513 | - . "$ea1->{worst} and $ea->{worst}" |
1514 | - unless $ea1->{worst} eq $ea->{worst}; |
1515 | - |
1516 | - my $attrib_types = $ea->attributes(); |
1517 | - map { |
1518 | - $attrib_types{$_} = $attrib_types->{$_} |
1519 | - unless exists $attrib_types{$_}; |
1520 | - } keys %$attrib_types; |
1521 | - } |
1522 | - |
1523 | - my $r_merged = { |
1524 | - classes => {}, |
1525 | - globals => _deep_copy_attribs($r1->{globals}), |
1526 | - samples => {}, |
1527 | - }; |
1528 | - map { |
1529 | - $r_merged->{classes}->{$_} |
1530 | - = _deep_copy_attribs($r1->{classes}->{$_}); |
1531 | - |
1532 | - @{$r_merged->{samples}->{$_}}{keys %{$r1->{samples}->{$_}}} |
1533 | - = values %{$r1->{samples}->{$_}}; |
1534 | - } keys %{$r1->{classes}}; |
1535 | - |
1536 | - for my $i ( 0..$#ea_objs ) { |
1537 | - PTDEBUG && _d('Merging ea obj', ($i + 1)); |
1538 | - my $r2 = $ea_objs[$i]->results; |
1539 | - |
1540 | - eval { |
1541 | - CLASS: |
1542 | - foreach my $class ( keys %{$r2->{classes}} ) { |
1543 | - my $r1_class = $r_merged->{classes}->{$class}; |
1544 | - my $r2_class = $r2->{classes}->{$class}; |
1545 | - |
1546 | - if ( $r1_class && $r2_class ) { |
1547 | - CLASS_ATTRIB: |
1548 | - foreach my $attrib ( keys %$r2_class ) { |
1549 | - PTDEBUG && _d('merge', $attrib); |
1550 | - if ( $r1_class->{$attrib} && $r2_class->{$attrib} ) { |
1551 | - _add_attrib_vals($r1_class->{$attrib}, $r2_class->{$attrib}); |
1552 | - } |
1553 | - elsif ( !$r1_class->{$attrib} ) { |
1554 | - PTDEBUG && _d('copy', $attrib); |
1555 | - $r1_class->{$attrib} = |
1556 | - _deep_copy_attrib_vals($r2_class->{$attrib}) |
1557 | - } |
1558 | - } |
1559 | - } |
1560 | - elsif ( !$r1_class ) { |
1561 | - PTDEBUG && _d('copy class'); |
1562 | - $r_merged->{classes}->{$class} = _deep_copy_attribs($r2_class); |
1563 | - } |
1564 | - |
1565 | - my $new_worst_sample; |
1566 | - if ( $r_merged->{samples}->{$class} && $r2->{samples}->{$class} ) { |
1567 | - if ( $r2->{samples}->{$class}->{$worst} |
1568 | - > $r_merged->{samples}->{$class}->{$worst} ) { |
1569 | - $new_worst_sample = $r2->{samples}->{$class} |
1570 | - } |
1571 | - } |
1572 | - elsif ( !$r_merged->{samples}->{$class} ) { |
1573 | - $new_worst_sample = $r2->{samples}->{$class}; |
1574 | - } |
1575 | - if ( $new_worst_sample ) { |
1576 | - PTDEBUG && _d('New worst sample:', $worst, '=', |
1577 | - $new_worst_sample->{$worst}, 'item:', substr($class, 0, 100)); |
1578 | - my %new_sample; |
1579 | - @new_sample{keys %$new_worst_sample} |
1580 | - = values %$new_worst_sample; |
1581 | - $r_merged->{samples}->{$class} = \%new_sample; |
1582 | - } |
1583 | - } |
1584 | - }; |
1585 | - if ( $EVAL_ERROR ) { |
1586 | - warn "Error merging class/sample: $EVAL_ERROR"; |
1587 | - } |
1588 | - |
1589 | - eval { |
1590 | - GLOBAL_ATTRIB: |
1591 | - PTDEBUG && _d('Merging global attributes'); |
1592 | - foreach my $attrib ( keys %{$r2->{globals}} ) { |
1593 | - my $r1_global = $r_merged->{globals}->{$attrib}; |
1594 | - my $r2_global = $r2->{globals}->{$attrib}; |
1595 | - |
1596 | - if ( $r1_global && $r2_global ) { |
1597 | - PTDEBUG && _d('merge', $attrib); |
1598 | - _add_attrib_vals($r1_global, $r2_global); |
1599 | - } |
1600 | - elsif ( !$r1_global ) { |
1601 | - PTDEBUG && _d('copy', $attrib); |
1602 | - $r_merged->{globals}->{$attrib} |
1603 | - = _deep_copy_attrib_vals($r2_global); |
1604 | - } |
1605 | - } |
1606 | - }; |
1607 | - if ( $EVAL_ERROR ) { |
1608 | - warn "Error merging globals: $EVAL_ERROR"; |
1609 | - } |
1610 | - } |
1611 | - |
1612 | - my $ea_merged = new EventAggregator( |
1613 | - groupby => $ea1->{groupby}, |
1614 | - worst => $ea1->{worst}, |
1615 | - attributes => { map { $_=>[$_] } keys %attrib_types }, |
1616 | - ); |
1617 | - $ea_merged->set_results($r_merged); |
1618 | - $ea_merged->set_attribute_types(\%attrib_types); |
1619 | - return $ea_merged; |
1620 | -} |
1621 | - |
1622 | -sub _add_attrib_vals { |
1623 | - my ( $vals1, $vals2 ) = @_; |
1624 | - |
1625 | - foreach my $val ( keys %$vals1 ) { |
1626 | - my $val1 = $vals1->{$val}; |
1627 | - my $val2 = $vals2->{$val}; |
1628 | - |
1629 | - if ( (!ref $val1) && (!ref $val2) ) { |
1630 | - die "undefined $val value" unless defined $val1 && defined $val2; |
1631 | - |
1632 | - my $is_num = exists $vals1->{sum} ? 1 : 0; |
1633 | - if ( $val eq 'max' ) { |
1634 | - if ( $is_num ) { |
1635 | - $vals1->{$val} = $val1 > $val2 ? $val1 : $val2; |
1636 | - } |
1637 | - else { |
1638 | - $vals1->{$val} = $val1 gt $val2 ? $val1 : $val2; |
1639 | - } |
1640 | - } |
1641 | - elsif ( $val eq 'min' ) { |
1642 | - if ( $is_num ) { |
1643 | - $vals1->{$val} = $val1 < $val2 ? $val1 : $val2; |
1644 | - } |
1645 | - else { |
1646 | - $vals1->{$val} = $val1 lt $val2 ? $val1 : $val2; |
1647 | - } |
1648 | - } |
1649 | - else { |
1650 | - $vals1->{$val} += $val2; |
1651 | - } |
1652 | - } |
1653 | - elsif ( (ref $val1 eq 'ARRAY') && (ref $val2 eq 'ARRAY') ) { |
1654 | - die "Empty $val arrayref" unless @$val1 && @$val2; |
1655 | - my $n_buckets = (scalar @$val1) - 1; |
1656 | - for my $i ( 0..$n_buckets ) { |
1657 | - $vals1->{$val}->[$i] += $val2->[$i]; |
1658 | - } |
1659 | - } |
1660 | - elsif ( (ref $val1 eq 'HASH') && (ref $val2 eq 'HASH') ) { |
1661 | - die "Empty $val hashref" unless %$val1 and %$val2; |
1662 | - map { $vals1->{$val}->{$_} += $val2->{$_} } keys %$val2; |
1663 | - } |
1664 | - else { |
1665 | - PTDEBUG && _d('vals1:', Dumper($vals1)); |
1666 | - PTDEBUG && _d('vals2:', Dumper($vals2)); |
1667 | - die "$val type mismatch"; |
1668 | - } |
1669 | - } |
1670 | - |
1671 | - return; |
1672 | -} |
1673 | - |
1674 | -sub _deep_copy_attribs { |
1675 | - my ( $attribs ) = @_; |
1676 | - my $copy = {}; |
1677 | - foreach my $attrib ( keys %$attribs ) { |
1678 | - $copy->{$attrib} = _deep_copy_attrib_vals($attribs->{$attrib}); |
1679 | - } |
1680 | - return $copy; |
1681 | -} |
1682 | - |
1683 | -sub _deep_copy_attrib_vals { |
1684 | - my ( $vals ) = @_; |
1685 | - my $copy; |
1686 | - if ( ref $vals eq 'HASH' ) { |
1687 | - $copy = {}; |
1688 | - foreach my $val ( keys %$vals ) { |
1689 | - if ( my $ref_type = ref $val ) { |
1690 | - if ( $ref_type eq 'ARRAY' ) { |
1691 | - my $n_elems = (scalar @$val) - 1; |
1692 | - $copy->{$val} = [ map { undef } ( 0..$n_elems ) ]; |
1693 | - for my $i ( 0..$n_elems ) { |
1694 | - $copy->{$val}->[$i] = $vals->{$val}->[$i]; |
1695 | - } |
1696 | - } |
1697 | - elsif ( $ref_type eq 'HASH' ) { |
1698 | - $copy->{$val} = {}; |
1699 | - map { $copy->{$val}->{$_} += $vals->{$val}->{$_} } |
1700 | - keys %{$vals->{$val}} |
1701 | - } |
1702 | - else { |
1703 | - die "I don't know how to deep copy a $ref_type reference"; |
1704 | - } |
1705 | - } |
1706 | - else { |
1707 | - $copy->{$val} = $vals->{$val}; |
1708 | - } |
1709 | - } |
1710 | - } |
1711 | - else { |
1712 | - $copy = $vals; |
1713 | - } |
1714 | - return $copy; |
1715 | -} |
1716 | - |
1717 | -sub _get_value { |
1718 | - my ( $self, %args ) = @_; |
1719 | - my ($event, $attrib, $alts) = @args{qw(event attribute alternates)}; |
1720 | - return unless $event && $attrib; |
1721 | - |
1722 | - my $value; |
1723 | - if ( exists $event->{$attrib} ) { |
1724 | - $value = $event->{$attrib}; |
1725 | - } |
1726 | - elsif ( $alts ) { |
1727 | - my $found_value = 0; |
1728 | - foreach my $alt_attrib( @$alts ) { |
1729 | - if ( exists $event->{$alt_attrib} ) { |
1730 | - $value = $event->{$alt_attrib}; |
1731 | - $found_value = 1; |
1732 | - last; |
1733 | - } |
1734 | - } |
1735 | - die "Event does not have attribute $attrib or any of its alternates" |
1736 | - unless $found_value; |
1737 | - } |
1738 | - else { |
1739 | - die "Event does not have attribute $attrib and there are no alterantes"; |
1740 | - } |
1741 | - |
1742 | - return $value; |
1743 | -} |
1744 | - |
1745 | -sub _d { |
1746 | - my ($package, undef, $line) = caller 0; |
1747 | - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
1748 | - map { defined $_ ? $_ : 'undef' } |
1749 | - @_; |
1750 | - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
1751 | -} |
1752 | - |
1753 | -1; |
1754 | -} |
1755 | -# ########################################################################### |
1756 | -# End EventAggregator package |
1757 | -# ########################################################################### |
1758 | - |
1759 | -# ########################################################################### |
1760 | -# QueryParser package |
1761 | -# This package is a copy without comments from the original. The original |
1762 | -# with comments and its test file can be found in the Bazaar repository at, |
1763 | -# lib/QueryParser.pm |
1764 | -# t/lib/QueryParser.t |
1765 | -# See https://launchpad.net/percona-toolkit for more information. |
1766 | -# ########################################################################### |
1767 | -{ |
1768 | -package QueryParser; |
1769 | - |
1770 | -use strict; |
1771 | -use warnings FATAL => 'all'; |
1772 | -use English qw(-no_match_vars); |
1773 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
1774 | - |
1775 | -our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/; |
1776 | -our $tbl_regex = qr{ |
1777 | - \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names |
1778 | - \b\s* |
1779 | - \(? # Optional paren around tables |
1780 | - ($tbl_ident |
1781 | - (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )* |
1782 | - ) |
1783 | - }xio; |
1784 | -our $has_derived = qr{ |
1785 | - \b(?:FROM|JOIN|,) |
1786 | - \s*\(\s*SELECT |
1787 | - }xi; |
1788 | - |
1789 | -our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i; |
1790 | - |
1791 | -our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i; |
1792 | - |
1793 | -sub new { |
1794 | - my ( $class ) = @_; |
1795 | - bless {}, $class; |
1796 | -} |
1797 | - |
1798 | -sub get_tables { |
1799 | - my ( $self, $query ) = @_; |
1800 | - return unless $query; |
1801 | - PTDEBUG && _d('Getting tables for', $query); |
1802 | - |
1803 | - my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i; |
1804 | - if ( $ddl_stmt ) { |
1805 | - PTDEBUG && _d('Special table type:', $ddl_stmt); |
1806 | - $query =~ s/IF\s+(?:NOT\s+)?EXISTS//i; |
1807 | - if ( $query =~ m/$ddl_stmt DATABASE\b/i ) { |
1808 | - PTDEBUG && _d('Query alters a database, not a table'); |
1809 | - return (); |
1810 | - } |
1811 | - if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) { |
1812 | - my ($select) = $query =~ m/\b(SELECT\b.+)/is; |
1813 | - PTDEBUG && _d('CREATE TABLE ... SELECT:', $select); |
1814 | - return $self->get_tables($select); |
1815 | - } |
1816 | - my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i; |
1817 | - PTDEBUG && _d('Matches table:', $tbl); |
1818 | - return ($tbl); |
1819 | - } |
1820 | - |
1821 | - $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig; |
1822 | - |
1823 | - if ( $query =~ s/^\s*LOCK TABLES\s+//i ) { |
1824 | - PTDEBUG && _d('Special table type: LOCK TABLES'); |
1825 | - $query =~ s/\s+(?:READ(?:\s+LOCAL)?|WRITE)\s*//gi; |
1826 | - PTDEBUG && _d('Locked tables:', $query); |
1827 | - $query = "FROM $query"; |
1828 | - } |
1829 | - |
1830 | - $query =~ s/\\["']//g; # quoted strings |
1831 | - $query =~ s/".*?"/?/sg; # quoted strings |
1832 | - $query =~ s/'.*?'/?/sg; # quoted strings |
1833 | - |
1834 | - my @tables; |
1835 | - foreach my $tbls ( $query =~ m/$tbl_regex/gio ) { |
1836 | - PTDEBUG && _d('Match tables:', $tbls); |
1837 | - |
1838 | - next if $tbls =~ m/\ASELECT\b/i; |
1839 | - |
1840 | - foreach my $tbl ( split(',', $tbls) ) { |
1841 | - $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio; |
1842 | - |
1843 | - if ( $tbl !~ m/[a-zA-Z]/ ) { |
1844 | - PTDEBUG && _d('Skipping suspicious table name:', $tbl); |
1845 | - next; |
1846 | - } |
1847 | - |
1848 | - push @tables, $tbl; |
1849 | - } |
1850 | - } |
1851 | - return @tables; |
1852 | -} |
1853 | - |
1854 | -sub has_derived_table { |
1855 | - my ( $self, $query ) = @_; |
1856 | - my $match = $query =~ m/$has_derived/; |
1857 | - PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table'); |
1858 | - return $match; |
1859 | -} |
1860 | - |
1861 | -sub get_aliases { |
1862 | - my ( $self, $query, $list ) = @_; |
1863 | - |
1864 | - my $result = { |
1865 | - DATABASE => {}, |
1866 | - TABLE => {}, |
1867 | - }; |
1868 | - return $result unless $query; |
1869 | - |
1870 | - $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig; |
1871 | - |
1872 | - $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig; |
1873 | - |
1874 | - my @tbl_refs; |
1875 | - my ($tbl_refs, $from) = $query =~ m{ |
1876 | - ( |
1877 | - (FROM|INTO|UPDATE)\b\s* # Keyword before table refs |
1878 | - .+? # Table refs |
1879 | - ) |
1880 | - (?:\s+|\z) # If the query does not end with the table |
1881 | - (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs |
1882 | - }ix; |
1883 | - |
1884 | - if ( $tbl_refs ) { |
1885 | - |
1886 | - if ( $query =~ m/^(?:INSERT|REPLACE)/i ) { |
1887 | - $tbl_refs =~ s/\([^\)]+\)\s*//; |
1888 | - } |
1889 | - |
1890 | - PTDEBUG && _d('tbl refs:', $tbl_refs); |
1891 | - |
1892 | - my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i; |
1893 | - |
1894 | - my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i; |
1895 | - |
1896 | - $tbl_refs =~ s/ = /=/g; |
1897 | - |
1898 | - while ( |
1899 | - $tbl_refs =~ m{ |
1900 | - $before_tbl\b\s* |
1901 | - ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? ) |
1902 | - \s*$after_tbl |
1903 | - }xgio ) |
1904 | - { |
1905 | - my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3); |
1906 | - PTDEBUG && _d('Match table:', $tbl_ref); |
1907 | - push @tbl_refs, $tbl_ref; |
1908 | - $alias = $self->trim_identifier($alias); |
1909 | - |
1910 | - if ( $tbl_ref =~ m/^AS\s+\w+/i ) { |
1911 | - PTDEBUG && _d('Subquery', $tbl_ref); |
1912 | - $result->{TABLE}->{$alias} = undef; |
1913 | - next; |
1914 | - } |
1915 | - |
1916 | - my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/; |
1917 | - $db = $self->trim_identifier($db); |
1918 | - $tbl = $self->trim_identifier($tbl); |
1919 | - $result->{TABLE}->{$alias || $tbl} = $tbl; |
1920 | - $result->{DATABASE}->{$tbl} = $db if $db; |
1921 | - } |
1922 | - } |
1923 | - else { |
1924 | - PTDEBUG && _d("No tables ref in", $query); |
1925 | - } |
1926 | - |
1927 | - if ( $list ) { |
1928 | - return \@tbl_refs; |
1929 | - } |
1930 | - else { |
1931 | - return $result; |
1932 | - } |
1933 | -} |
1934 | - |
1935 | -sub split { |
1936 | - my ( $self, $query ) = @_; |
1937 | - return unless $query; |
1938 | - $query = $self->clean_query($query); |
1939 | - PTDEBUG && _d('Splitting', $query); |
1940 | - |
1941 | - my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i; |
1942 | - |
1943 | - my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query); |
1944 | - |
1945 | - my @statements; |
1946 | - if ( @split_statements == 1 ) { |
1947 | - push @statements, $query; |
1948 | - } |
1949 | - else { |
1950 | - for ( my $i = 0; $i <= $#split_statements; $i += 2 ) { |
1951 | - push @statements, $split_statements[$i].$split_statements[$i+1]; |
1952 | - |
1953 | - if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) { |
1954 | - $statements[-2] .= pop @statements; |
1955 | - } |
1956 | - } |
1957 | - } |
1958 | - |
1959 | - PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements); |
1960 | - return @statements; |
1961 | -} |
1962 | - |
1963 | -sub clean_query { |
1964 | - my ( $self, $query ) = @_; |
1965 | - return unless $query; |
1966 | - $query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */ |
1967 | - $query =~ s/^\s+//; # Remove leading spaces |
1968 | - $query =~ s/\s+$//; # Remove trailing spaces |
1969 | - $query =~ s/\s{2,}/ /g; # Remove extra spaces |
1970 | - return $query; |
1971 | -} |
1972 | - |
1973 | -sub split_subquery { |
1974 | - my ( $self, $query ) = @_; |
1975 | - return unless $query; |
1976 | - $query = $self->clean_query($query); |
1977 | - $query =~ s/;$//; |
1978 | - |
1979 | - my @subqueries; |
1980 | - my $sqno = 0; # subquery number |
1981 | - my $pos = 0; |
1982 | - while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) { |
1983 | - $pos = pos($query); |
1984 | - my $word = $1; |
1985 | - PTDEBUG && _d($word, $sqno); |
1986 | - if ( $word =~ m/^\(?SELECT\b/i ) { |
1987 | - my $start_pos = $pos - length($word) - 1; |
1988 | - if ( $start_pos ) { |
1989 | - $sqno++; |
1990 | - PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos); |
1991 | - $subqueries[$sqno] = { |
1992 | - start_pos => $start_pos, |
1993 | - end_pos => 0, |
1994 | - len => 0, |
1995 | - words => [$word], |
1996 | - lp => 1, # left parentheses |
1997 | - rp => 0, # right parentheses |
1998 | - done => 0, |
1999 | - }; |
2000 | - } |
2001 | - else { |
2002 | - PTDEBUG && _d('Main SELECT at pos 0'); |
2003 | - } |
2004 | - } |
2005 | - else { |
2006 | - next unless $sqno; # next unless we're in a subquery |
2007 | - PTDEBUG && _d('In subquery', $sqno); |
2008 | - my $sq = $subqueries[$sqno]; |
2009 | - if ( $sq->{done} ) { |
2010 | - PTDEBUG && _d('This subquery is done; SQL is for', |
2011 | - ($sqno - 1 ? "subquery $sqno" : "the main SELECT")); |
2012 | - next; |
2013 | - } |
2014 | - push @{$sq->{words}}, $word; |
2015 | - my $lp = ($word =~ tr/\(//) || 0; |
2016 | - my $rp = ($word =~ tr/\)//) || 0; |
2017 | - PTDEBUG && _d('parentheses left', $lp, 'right', $rp); |
2018 | - if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) { |
2019 | - my $end_pos = $pos - 1; |
2020 | - PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos); |
2021 | - $sq->{end_pos} = $end_pos; |
2022 | - $sq->{len} = $end_pos - $sq->{start_pos}; |
2023 | - } |
2024 | - } |
2025 | - } |
2026 | - |
2027 | - for my $i ( 1..$#subqueries ) { |
2028 | - my $sq = $subqueries[$i]; |
2029 | - next unless $sq; |
2030 | - $sq->{sql} = join(' ', @{$sq->{words}}); |
2031 | - substr $query, |
2032 | - $sq->{start_pos} + 1, # +1 for ( |
2033 | - $sq->{len} - 1, # -1 for ) |
2034 | - "__subquery_$i"; |
2035 | - } |
2036 | - |
2037 | - return $query, map { $_->{sql} } grep { defined $_ } @subqueries; |
2038 | -} |
2039 | - |
2040 | -sub query_type { |
2041 | - my ( $self, $query, $qr ) = @_; |
2042 | - my ($type, undef) = $qr->distill_verbs($query); |
2043 | - my $rw; |
2044 | - if ( $type =~ m/^SELECT\b/ ) { |
2045 | - $rw = 'read'; |
2046 | - } |
2047 | - elsif ( $type =~ m/^$data_manip_stmts\b/ |
2048 | - || $type =~ m/^$data_def_stmts\b/ ) { |
2049 | - $rw = 'write' |
2050 | - } |
2051 | - |
2052 | - return { |
2053 | - type => $type, |
2054 | - rw => $rw, |
2055 | - } |
2056 | -} |
2057 | - |
2058 | -sub get_columns { |
2059 | - my ( $self, $query ) = @_; |
2060 | - my $cols = []; |
2061 | - return $cols unless $query; |
2062 | - my $cols_def; |
2063 | - |
2064 | - if ( $query =~ m/^SELECT/i ) { |
2065 | - $query =~ s/ |
2066 | - ^SELECT\s+ |
2067 | - (?:ALL |
2068 | - |DISTINCT |
2069 | - |DISTINCTROW |
2070 | - |HIGH_PRIORITY |
2071 | - |STRAIGHT_JOIN |
2072 | - |SQL_SMALL_RESULT |
2073 | - |SQL_BIG_RESULT |
2074 | - |SQL_BUFFER_RESULT |
2075 | - |SQL_CACHE |
2076 | - |SQL_NO_CACHE |
2077 | - |SQL_CALC_FOUND_ROWS |
2078 | - )\s+ |
2079 | - /SELECT /xgi; |
2080 | - ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i; |
2081 | - } |
2082 | - elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) { |
2083 | - ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i; |
2084 | - } |
2085 | - |
2086 | - PTDEBUG && _d('Columns:', $cols_def); |
2087 | - if ( $cols_def ) { |
2088 | - @$cols = split(',', $cols_def); |
2089 | - map { |
2090 | - my $col = $_; |
2091 | - $col = s/^\s+//g; |
2092 | - $col = s/\s+$//g; |
2093 | - $col; |
2094 | - } @$cols; |
2095 | - } |
2096 | - |
2097 | - return $cols; |
2098 | -} |
2099 | - |
2100 | -sub parse { |
2101 | - my ( $self, $query ) = @_; |
2102 | - return unless $query; |
2103 | - my $parsed = {}; |
2104 | - |
2105 | - $query =~ s/\n/ /g; |
2106 | - $query = $self->clean_query($query); |
2107 | - |
2108 | - $parsed->{query} = $query, |
2109 | - $parsed->{tables} = $self->get_aliases($query, 1); |
2110 | - $parsed->{columns} = $self->get_columns($query); |
2111 | - |
2112 | - my ($type) = $query =~ m/^(\w+)/; |
2113 | - $parsed->{type} = lc $type; |
2114 | - |
2115 | - |
2116 | - $parsed->{sub_queries} = []; |
2117 | - |
2118 | - return $parsed; |
2119 | -} |
2120 | - |
2121 | -sub extract_tables { |
2122 | - my ( $self, %args ) = @_; |
2123 | - my $query = $args{query}; |
2124 | - my $default_db = $args{default_db}; |
2125 | - my $q = $self->{Quoter} || $args{Quoter}; |
2126 | - return unless $query; |
2127 | - PTDEBUG && _d('Extracting tables'); |
2128 | - my @tables; |
2129 | - my %seen; |
2130 | - foreach my $db_tbl ( $self->get_tables($query) ) { |
2131 | - next unless $db_tbl; |
2132 | - next if $seen{$db_tbl}++; # Unique-ify for issue 337. |
2133 | - my ( $db, $tbl ) = $q->split_unquote($db_tbl); |
2134 | - push @tables, [ $db || $default_db, $tbl ]; |
2135 | - } |
2136 | - return @tables; |
2137 | -} |
2138 | - |
2139 | -sub trim_identifier { |
2140 | - my ($self, $str) = @_; |
2141 | - return unless defined $str; |
2142 | - $str =~ s/`//g; |
2143 | - $str =~ s/^\s+//; |
2144 | - $str =~ s/\s+$//; |
2145 | - return $str; |
2146 | -} |
2147 | - |
2148 | -sub _d { |
2149 | - my ($package, undef, $line) = caller 0; |
2150 | - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
2151 | - map { defined $_ ? $_ : 'undef' } |
2152 | - @_; |
2153 | - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
2154 | -} |
2155 | - |
2156 | -1; |
2157 | -} |
2158 | -# ########################################################################### |
2159 | -# End QueryParser package |
2160 | -# ########################################################################### |
2161 | - |
2162 | -# ########################################################################### |
2163 | -# QueryRewriter package |
2164 | -# This package is a copy without comments from the original. The original |
2165 | -# with comments and its test file can be found in the Bazaar repository at, |
2166 | -# lib/QueryRewriter.pm |
2167 | -# t/lib/QueryRewriter.t |
2168 | -# See https://launchpad.net/percona-toolkit for more information. |
2169 | -# ########################################################################### |
2170 | -{ |
2171 | -package QueryRewriter; |
2172 | - |
2173 | -use strict; |
2174 | -use warnings FATAL => 'all'; |
2175 | -use English qw(-no_match_vars); |
2176 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
2177 | - |
2178 | -our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT |
2179 | - |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi; |
2180 | -my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly! |
2181 | -my $bal; |
2182 | -$bal = qr/ |
2183 | - \( |
2184 | - (?: |
2185 | - (?> [^()]+ ) # Non-parens without backtracking |
2186 | - | |
2187 | - (??{ $bal }) # Group with matching parens |
2188 | - )* |
2189 | - \) |
2190 | - /x; |
2191 | - |
2192 | -my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments |
2193 | -my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */ |
2194 | -my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */ |
2195 | -my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW |
2196 | - |
2197 | - |
2198 | -sub new { |
2199 | - my ( $class, %args ) = @_; |
2200 | - my $self = { %args }; |
2201 | - return bless $self, $class; |
2202 | -} |
2203 | - |
2204 | -sub strip_comments { |
2205 | - my ( $self, $query ) = @_; |
2206 | - return unless $query; |
2207 | - $query =~ s/$olc_re//go; |
2208 | - $query =~ s/$mlc_re//go; |
2209 | - if ( $query =~ m/$vlc_rf/i ) { # contains show + version |
2210 | - $query =~ s/$vlc_re//go; |
2211 | - } |
2212 | - return $query; |
2213 | -} |
2214 | - |
2215 | -sub shorten { |
2216 | - my ( $self, $query, $length ) = @_; |
2217 | - $query =~ s{ |
2218 | - \A( |
2219 | - (?:INSERT|REPLACE) |
2220 | - (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)? |
2221 | - (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\) |
2222 | - ) |
2223 | - \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)} |
2224 | - {$1 /*... omitted ...*/$2}xsi; |
2225 | - |
2226 | - return $query unless $query =~ m/IN\s*\(\s*(?!select)/i; |
2227 | - |
2228 | - my $last_length = 0; |
2229 | - my $query_length = length($query); |
2230 | - while ( |
2231 | - $length > 0 |
2232 | - && $query_length > $length |
2233 | - && $query_length < ( $last_length || $query_length + 1 ) |
2234 | - ) { |
2235 | - $last_length = $query_length; |
2236 | - $query =~ s{ |
2237 | - (\bIN\s*\() # The opening of an IN list |
2238 | - ([^\)]+) # Contents of the list, assuming no item contains paren |
2239 | - (?=\)) # Close of the list |
2240 | - } |
2241 | - { |
2242 | - $1 . __shorten($2) |
2243 | - }gexsi; |
2244 | - } |
2245 | - |
2246 | - return $query; |
2247 | -} |
2248 | - |
2249 | -sub __shorten { |
2250 | - my ( $snippet ) = @_; |
2251 | - my @vals = split(/,/, $snippet); |
2252 | - return $snippet unless @vals > 20; |
2253 | - my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items |
2254 | - return |
2255 | - join(',', @keep) |
2256 | - . "/*... omitted " |
2257 | - . scalar(@vals) |
2258 | - . " items ...*/"; |
2259 | -} |
2260 | - |
2261 | -sub fingerprint { |
2262 | - my ( $self, $query ) = @_; |
2263 | - |
2264 | - $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query |
2265 | - && return 'mysqldump'; |
2266 | - $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # pt-table-checksum, etc query |
2267 | - && return 'percona-toolkit'; |
2268 | - $query =~ m/\Aadministrator command: / |
2269 | - && return $query; |
2270 | - $query =~ m/\A\s*(call\s+\S+)\(/i |
2271 | - && return lc($1); # Warning! $1 used, be careful. |
2272 | - if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) { |
2273 | - $query = $beginning; # Shorten multi-value INSERT statements ASAP |
2274 | - } |
2275 | - |
2276 | - $query =~ s/$olc_re//go; |
2277 | - $query =~ s/$mlc_re//go; |
2278 | - $query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE |
2279 | - && return $query; |
2280 | - |
2281 | - $query =~ s/\\["']//g; # quoted strings |
2282 | - $query =~ s/".*?"/?/sg; # quoted strings |
2283 | - $query =~ s/'.*?'/?/sg; # quoted strings |
2284 | - |
2285 | - if ( $self->{match_md5_checksums} ) { |
2286 | - $query =~ s/([._-])[a-f0-9]{32}/$1?/g; |
2287 | - } |
2288 | - |
2289 | - if ( !$self->{match_embedded_numbers} ) { |
2290 | - $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g; |
2291 | - } |
2292 | - else { |
2293 | - $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g; |
2294 | - } |
2295 | - |
2296 | - if ( $self->{match_md5_checksums} ) { |
2297 | - $query =~ s/[xb+-]\?/?/g; |
2298 | - } |
2299 | - else { |
2300 | - $query =~ s/[xb.+-]\?/?/g; |
2301 | - } |
2302 | - |
2303 | - $query =~ s/\A\s+//; # Chop off leading whitespace |
2304 | - chomp $query; # Kill trailing whitespace |
2305 | - $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace |
2306 | - $query = lc $query; |
2307 | - $query =~ s/\bnull\b/?/g; # Get rid of NULLs |
2308 | - $query =~ s{ # Collapse IN and VALUES lists |
2309 | - \b(in|values?)(?:[\s,]*\([\s?,]*\))+ |
2310 | - } |
2311 | - {$1(?+)}gx; |
2312 | - $query =~ s{ # Collapse UNION |
2313 | - \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+ |
2314 | - } |
2315 | - {$1 /*repeat$2*/}xg; |
2316 | - $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT |
2317 | - |
2318 | - if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause |
2319 | - 1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query; |
2320 | - } |
2321 | - |
2322 | - return $query; |
2323 | -} |
2324 | - |
2325 | -sub distill_verbs { |
2326 | - my ( $self, $query ) = @_; |
2327 | - |
2328 | - $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1"; |
2329 | - $query =~ m/\A\s*use\s+/ && return "USE"; |
2330 | - $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK"; |
2331 | - $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1"; |
2332 | - |
2333 | - if ( $query =~ m/\Aadministrator command:/ ) { |
2334 | - $query =~ s/administrator command:/ADMIN/; |
2335 | - $query = uc $query; |
2336 | - return $query; |
2337 | - } |
2338 | - |
2339 | - $query = $self->strip_comments($query); |
2340 | - |
2341 | - if ( $query =~ m/\A\s*SHOW\s+/i ) { |
2342 | - PTDEBUG && _d($query); |
2343 | - |
2344 | - $query = uc $query; |
2345 | - $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g; |
2346 | - $query =~ s/\s+COUNT[^)]+\)//g; |
2347 | - |
2348 | - $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms; |
2349 | - |
2350 | - $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s; |
2351 | - $query =~ s/\s+/ /g; |
2352 | - PTDEBUG && _d($query); |
2353 | - return $query; |
2354 | - } |
2355 | - |
2356 | - eval $QueryParser::data_def_stmts; |
2357 | - eval $QueryParser::tbl_ident; |
2358 | - my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i; |
2359 | - if ( $dds) { |
2360 | - my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i; |
2361 | - $obj = uc $obj if $obj; |
2362 | - PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj); |
2363 | - my ($db_or_tbl) |
2364 | - = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i; |
2365 | - PTDEBUG && _d('Matches db or table:', $db_or_tbl); |
2366 | - return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl; |
2367 | - } |
2368 | - |
2369 | - my @verbs = $query =~ m/\b($verbs)\b/gio; |
2370 | - @verbs = do { |
2371 | - my $last = ''; |
2372 | - grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs; |
2373 | - }; |
2374 | - |
2375 | - if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) { |
2376 | - PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]); |
2377 | - my $union = grep { $_ eq 'UNION' } @verbs; |
2378 | - @verbs = $union ? qw(SELECT UNION) : qw(SELECT); |
2379 | - } |
2380 | - |
2381 | - my $verb_str = join(q{ }, @verbs); |
2382 | - return $verb_str; |
2383 | -} |
2384 | - |
2385 | -sub __distill_tables { |
2386 | - my ( $self, $query, $table, %args ) = @_; |
2387 | - my $qp = $args{QueryParser} || $self->{QueryParser}; |
2388 | - die "I need a QueryParser argument" unless $qp; |
2389 | - |
2390 | - my @tables = map { |
2391 | - $_ =~ s/`//g; |
2392 | - $_ =~ s/(_?)[0-9]+/$1?/g; |
2393 | - $_; |
2394 | - } grep { defined $_ } $qp->get_tables($query); |
2395 | - |
2396 | - push @tables, $table if $table; |
2397 | - |
2398 | - @tables = do { |
2399 | - my $last = ''; |
2400 | - grep { my $pass = $_ ne $last; $last = $_; $pass } @tables; |
2401 | - }; |
2402 | - |
2403 | - return @tables; |
2404 | -} |
2405 | - |
2406 | -sub distill { |
2407 | - my ( $self, $query, %args ) = @_; |
2408 | - |
2409 | - if ( $args{generic} ) { |
2410 | - my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/; |
2411 | - return '' unless $cmd; |
2412 | - $query = (uc $cmd) . ($arg ? " $arg" : ''); |
2413 | - } |
2414 | - else { |
2415 | - my ($verbs, $table) = $self->distill_verbs($query, %args); |
2416 | - |
2417 | - if ( $verbs && $verbs =~ m/^SHOW/ ) { |
2418 | - my %alias_for = qw( |
2419 | - SCHEMA DATABASE |
2420 | - KEYS INDEX |
2421 | - INDEXES INDEX |
2422 | - ); |
2423 | - map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for; |
2424 | - $query = $verbs; |
2425 | - } |
2426 | - else { |
2427 | - my @tables = $self->__distill_tables($query, $table, %args); |
2428 | - $query = join(q{ }, $verbs, @tables); |
2429 | - } |
2430 | - } |
2431 | - |
2432 | - if ( $args{trf} ) { |
2433 | - $query = $args{trf}->($query, %args); |
2434 | - } |
2435 | - |
2436 | - return $query; |
2437 | -} |
2438 | - |
2439 | -sub convert_to_select { |
2440 | - my ( $self, $query ) = @_; |
2441 | - return unless $query; |
2442 | - |
2443 | - return if $query =~ m/=\s*\(\s*SELECT /i; |
2444 | - |
2445 | - $query =~ s{ |
2446 | - \A.*? |
2447 | - update(?:\s+(?:low_priority|ignore))?\s+(.*?) |
2448 | - \s+set\b(.*?) |
2449 | - (?:\s*where\b(.*?))? |
2450 | - (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)? |
2451 | - \Z |
2452 | - } |
2453 | - {__update_to_select($1, $2, $3, $4)}exsi |
2454 | - || $query =~ s{ |
2455 | - \A.*? |
2456 | - (?:insert(?:\s+ignore)?|replace)\s+ |
2457 | - .*?\binto\b(.*?)\(([^\)]+)\)\s* |
2458 | - values?\s*(\(.*?\))\s* |
2459 | - (?:\blimit\b|on\s+duplicate\s+key.*)?\s* |
2460 | - \Z |
2461 | - } |
2462 | - {__insert_to_select($1, $2, $3)}exsi |
2463 | - || $query =~ s{ |
2464 | - \A.*? |
2465 | - (?:insert(?:\s+ignore)?|replace)\s+ |
2466 | - (?:.*?\binto)\b(.*?)\s* |
2467 | - set\s+(.*?)\s* |
2468 | - (?:\blimit\b|on\s+duplicate\s+key.*)?\s* |
2469 | - \Z |
2470 | - } |
2471 | - {__insert_to_select_with_set($1, $2)}exsi |
2472 | - || $query =~ s{ |
2473 | - \A.*? |
2474 | - delete\s+(.*?) |
2475 | - \bfrom\b(.*) |
2476 | - \Z |
2477 | - } |
2478 | - {__delete_to_select($1, $2)}exsi; |
2479 | - $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si; |
2480 | - $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism; |
2481 | - return $query; |
2482 | -} |
2483 | - |
2484 | -sub convert_select_list { |
2485 | - my ( $self, $query ) = @_; |
2486 | - $query =~ s{ |
2487 | - \A\s*select(.*?)\bfrom\b |
2488 | - } |
2489 | - {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi; |
2490 | - return $query; |
2491 | -} |
2492 | - |
2493 | -sub __delete_to_select { |
2494 | - my ( $delete, $join ) = @_; |
2495 | - if ( $join =~ m/\bjoin\b/ ) { |
2496 | - return "select 1 from $join"; |
2497 | - } |
2498 | - return "select * from $join"; |
2499 | -} |
2500 | - |
2501 | -sub __insert_to_select { |
2502 | - my ( $tbl, $cols, $vals ) = @_; |
2503 | - PTDEBUG && _d('Args:', @_); |
2504 | - my @cols = split(/,/, $cols); |
2505 | - PTDEBUG && _d('Cols:', @cols); |
2506 | - $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens |
2507 | - my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g; |
2508 | - PTDEBUG && _d('Vals:', @vals); |
2509 | - if ( @cols == @vals ) { |
2510 | - return "select * from $tbl where " |
2511 | - . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols)); |
2512 | - } |
2513 | - else { |
2514 | - return "select * from $tbl limit 1"; |
2515 | - } |
2516 | -} |
2517 | - |
2518 | -sub __insert_to_select_with_set { |
2519 | - my ( $from, $set ) = @_; |
2520 | - $set =~ s/,/ and /g; |
2521 | - return "select * from $from where $set "; |
2522 | -} |
2523 | - |
2524 | -sub __update_to_select { |
2525 | - my ( $from, $set, $where, $limit ) = @_; |
2526 | - return "select $set from $from " |
2527 | - . ( $where ? "where $where" : '' ) |
2528 | - . ( $limit ? " $limit " : '' ); |
2529 | -} |
2530 | - |
2531 | -sub wrap_in_derived { |
2532 | - my ( $self, $query ) = @_; |
2533 | - return unless $query; |
2534 | - return $query =~ m/\A\s*select/i |
2535 | - ? "select 1 from ($query) as x limit 1" |
2536 | - : $query; |
2537 | -} |
2538 | - |
2539 | -sub _d { |
2540 | - my ($package, undef, $line) = caller 0; |
2541 | - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
2542 | - map { defined $_ ? $_ : 'undef' } |
2543 | - @_; |
2544 | - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
2545 | -} |
2546 | - |
2547 | -1; |
2548 | -} |
2549 | -# ########################################################################### |
2550 | -# End QueryRewriter package |
2551 | -# ########################################################################### |
2552 | - |
2553 | -# ########################################################################### |
2554 | # Daemon package |
2555 | # This package is a copy without comments from the original. The original |
2556 | # with comments and its test file can be found in the Bazaar repository at, |
2557 | @@ -5223,3403 +3056,6 @@ |
2558 | # ########################################################################### |
2559 | |
2560 | # ########################################################################### |
2561 | -# ChangeHandler package |
2562 | -# This package is a copy without comments from the original. The original |
2563 | -# with comments and its test file can be found in the Bazaar repository at, |
2564 | -# lib/ChangeHandler.pm |
2565 | -# t/lib/ChangeHandler.t |
2566 | -# See https://launchpad.net/percona-toolkit for more information. |
2567 | -# ########################################################################### |
2568 | -{ |
2569 | -package ChangeHandler; |
2570 | - |
2571 | -use strict; |
2572 | -use warnings FATAL => 'all'; |
2573 | -use English qw(-no_match_vars); |
2574 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
2575 | - |
2576 | -my $DUPE_KEY = qr/Duplicate entry/; |
2577 | -our @ACTIONS = qw(DELETE REPLACE INSERT UPDATE); |
2578 | - |
2579 | -sub new { |
2580 | - my ( $class, %args ) = @_; |
2581 | - foreach my $arg ( qw(Quoter left_db left_tbl right_db right_tbl |
2582 | - replace queue) ) { |
2583 | - die "I need a $arg argument" unless defined $args{$arg}; |
2584 | - } |
2585 | - my $q = $args{Quoter}; |
2586 | - |
2587 | - my $self = { |
2588 | - hex_blob => 1, |
2589 | - %args, |
2590 | - left_db_tbl => $q->quote(@args{qw(left_db left_tbl)}), |
2591 | - right_db_tbl => $q->quote(@args{qw(right_db right_tbl)}), |
2592 | - }; |
2593 | - |
2594 | - $self->{src_db_tbl} = $self->{left_db_tbl}; |
2595 | - $self->{dst_db_tbl} = $self->{right_db_tbl}; |
2596 | - |
2597 | - map { $self->{$_} = [] } @ACTIONS; |
2598 | - $self->{changes} = { map { $_ => 0 } @ACTIONS }; |
2599 | - |
2600 | - return bless $self, $class; |
2601 | -} |
2602 | - |
2603 | -sub fetch_back { |
2604 | - my ( $self, $dbh ) = @_; |
2605 | - $self->{fetch_back} = $dbh; |
2606 | - PTDEBUG && _d('Set fetch back dbh', $dbh); |
2607 | - return; |
2608 | -} |
2609 | - |
2610 | -sub set_src { |
2611 | - my ( $self, $src, $dbh ) = @_; |
2612 | - die "I need a src argument" unless $src; |
2613 | - if ( lc $src eq 'left' ) { |
2614 | - $self->{src_db_tbl} = $self->{left_db_tbl}; |
2615 | - $self->{dst_db_tbl} = $self->{right_db_tbl}; |
2616 | - } |
2617 | - elsif ( lc $src eq 'right' ) { |
2618 | - $self->{src_db_tbl} = $self->{right_db_tbl}; |
2619 | - $self->{dst_db_tbl} = $self->{left_db_tbl}; |
2620 | - } |
2621 | - else { |
2622 | - die "src argument must be either 'left' or 'right'" |
2623 | - } |
2624 | - PTDEBUG && _d('Set src to', $src); |
2625 | - $self->fetch_back($dbh) if $dbh; |
2626 | - return; |
2627 | -} |
2628 | - |
2629 | -sub src { |
2630 | - my ( $self ) = @_; |
2631 | - return $self->{src_db_tbl}; |
2632 | -} |
2633 | - |
2634 | -sub dst { |
2635 | - my ( $self ) = @_; |
2636 | - return $self->{dst_db_tbl}; |
2637 | -} |
2638 | - |
2639 | -sub _take_action { |
2640 | - my ( $self, $sql, $dbh ) = @_; |
2641 | - PTDEBUG && _d('Calling subroutines on', $dbh, $sql); |
2642 | - foreach my $action ( @{$self->{actions}} ) { |
2643 | - $action->($sql, $dbh); |
2644 | - } |
2645 | - return; |
2646 | -} |
2647 | - |
2648 | -sub change { |
2649 | - my ( $self, $action, $row, $cols, $dbh ) = @_; |
2650 | - PTDEBUG && _d($dbh, $action, 'where', $self->make_where_clause($row, $cols)); |
2651 | - |
2652 | - return unless $action; |
2653 | - |
2654 | - $self->{changes}->{ |
2655 | - $self->{replace} && $action ne 'DELETE' ? 'REPLACE' : $action |
2656 | - }++; |
2657 | - if ( $self->{queue} ) { |
2658 | - $self->__queue($action, $row, $cols, $dbh); |
2659 | - } |
2660 | - else { |
2661 | - eval { |
2662 | - my $func = "make_$action"; |
2663 | - $self->_take_action($self->$func($row, $cols), $dbh); |
2664 | - }; |
2665 | - if ( $EVAL_ERROR =~ m/$DUPE_KEY/ ) { |
2666 | - PTDEBUG && _d('Duplicate key violation; will queue and rewrite'); |
2667 | - $self->{queue}++; |
2668 | - $self->{replace} = 1; |
2669 | - $self->__queue($action, $row, $cols, $dbh); |
2670 | - } |
2671 | - elsif ( $EVAL_ERROR ) { |
2672 | - die $EVAL_ERROR; |
2673 | - } |
2674 | - } |
2675 | - return; |
2676 | -} |
2677 | - |
2678 | -sub __queue { |
2679 | - my ( $self, $action, $row, $cols, $dbh ) = @_; |
2680 | - PTDEBUG && _d('Queueing change for later'); |
2681 | - if ( $self->{replace} ) { |
2682 | - $action = $action eq 'DELETE' ? $action : 'REPLACE'; |
2683 | - } |
2684 | - push @{$self->{$action}}, [ $row, $cols, $dbh ]; |
2685 | -} |
2686 | - |
2687 | -sub process_rows { |
2688 | - my ( $self, $queue_level, $trace_msg ) = @_; |
2689 | - my $error_count = 0; |
2690 | - TRY: { |
2691 | - if ( $queue_level && $queue_level < $self->{queue} ) { # see redo below! |
2692 | - PTDEBUG && _d('Not processing now', $queue_level, '<', $self->{queue}); |
2693 | - return; |
2694 | - } |
2695 | - PTDEBUG && _d('Processing rows:'); |
2696 | - my ($row, $cur_act); |
2697 | - eval { |
2698 | - foreach my $action ( @ACTIONS ) { |
2699 | - my $func = "make_$action"; |
2700 | - my $rows = $self->{$action}; |
2701 | - PTDEBUG && _d(scalar(@$rows), 'to', $action); |
2702 | - $cur_act = $action; |
2703 | - while ( @$rows ) { |
2704 | - $row = shift @$rows; |
2705 | - my $sql = $self->$func(@$row); |
2706 | - $sql .= " /*percona-toolkit $trace_msg*/" if $trace_msg; |
2707 | - $self->_take_action($sql, $row->[2]); |
2708 | - } |
2709 | - } |
2710 | - $error_count = 0; |
2711 | - }; |
2712 | - if ( !$error_count++ && $EVAL_ERROR =~ m/$DUPE_KEY/ ) { |
2713 | - PTDEBUG && _d('Duplicate key violation; re-queueing and rewriting'); |
2714 | - $self->{queue}++; # Defer rows to the very end |
2715 | - $self->{replace} = 1; |
2716 | - $self->__queue($cur_act, @$row); |
2717 | - redo TRY; |
2718 | - } |
2719 | - elsif ( $EVAL_ERROR ) { |
2720 | - die $EVAL_ERROR; |
2721 | - } |
2722 | - } |
2723 | -} |
2724 | - |
2725 | -sub make_DELETE { |
2726 | - my ( $self, $row, $cols ) = @_; |
2727 | - PTDEBUG && _d('Make DELETE'); |
2728 | - return "DELETE FROM $self->{dst_db_tbl} WHERE " |
2729 | - . $self->make_where_clause($row, $cols) |
2730 | - . ' LIMIT 1'; |
2731 | -} |
2732 | - |
2733 | -sub make_UPDATE { |
2734 | - my ( $self, $row, $cols ) = @_; |
2735 | - PTDEBUG && _d('Make UPDATE'); |
2736 | - if ( $self->{replace} ) { |
2737 | - return $self->make_row('REPLACE', $row, $cols); |
2738 | - } |
2739 | - my %in_where = map { $_ => 1 } @$cols; |
2740 | - my $where = $self->make_where_clause($row, $cols); |
2741 | - my @cols; |
2742 | - if ( my $dbh = $self->{fetch_back} ) { |
2743 | - my $sql = $self->make_fetch_back_query($where); |
2744 | - PTDEBUG && _d('Fetching data on dbh', $dbh, 'for UPDATE:', $sql); |
2745 | - my $res = $dbh->selectrow_hashref($sql); |
2746 | - @{$row}{keys %$res} = values %$res; |
2747 | - @cols = $self->sort_cols($res); |
2748 | - } |
2749 | - else { |
2750 | - @cols = $self->sort_cols($row); |
2751 | - } |
2752 | - my $types = $self->{tbl_struct}->{type_for}; |
2753 | - return "UPDATE $self->{dst_db_tbl} SET " |
2754 | - . join(', ', map { |
2755 | - my $is_char = ($types->{$_} || '') =~ m/char|text/i; |
2756 | - $self->{Quoter}->quote($_) |
2757 | - . '=' . $self->{Quoter}->quote_val($row->{$_}, |
2758 | - is_char => $is_char); |
2759 | - } grep { !$in_where{$_} } @cols) |
2760 | - . " WHERE $where LIMIT 1"; |
2761 | -} |
2762 | - |
2763 | -sub make_INSERT { |
2764 | - my ( $self, $row, $cols ) = @_; |
2765 | - PTDEBUG && _d('Make INSERT'); |
2766 | - if ( $self->{replace} ) { |
2767 | - return $self->make_row('REPLACE', $row, $cols); |
2768 | - } |
2769 | - return $self->make_row('INSERT', $row, $cols); |
2770 | -} |
2771 | - |
2772 | -sub make_REPLACE { |
2773 | - my ( $self, $row, $cols ) = @_; |
2774 | - PTDEBUG && _d('Make REPLACE'); |
2775 | - return $self->make_row('REPLACE', $row, $cols); |
2776 | -} |
2777 | - |
2778 | -sub make_row { |
2779 | - my ( $self, $verb, $row, $cols ) = @_; |
2780 | - my @cols; |
2781 | - if ( my $dbh = $self->{fetch_back} ) { |
2782 | - my $where = $self->make_where_clause($row, $cols); |
2783 | - my $sql = $self->make_fetch_back_query($where); |
2784 | - PTDEBUG && _d('Fetching data on dbh', $dbh, 'for', $verb, ':', $sql); |
2785 | - my $res = $dbh->selectrow_hashref($sql); |
2786 | - @{$row}{keys %$res} = values %$res; |
2787 | - @cols = $self->sort_cols($res); |
2788 | - } |
2789 | - else { |
2790 | - @cols = $self->sort_cols($row); |
2791 | - } |
2792 | - my $q = $self->{Quoter}; |
2793 | - my $type_for = $self->{tbl_struct}->{type_for}; |
2794 | - return "$verb INTO $self->{dst_db_tbl}(" |
2795 | - . join(', ', map { $q->quote($_) } @cols) |
2796 | - . ') VALUES (' |
2797 | - . join(', ', map { |
2798 | - my $is_char = ($type_for->{$_} || '') =~ m/char|text/i; |
2799 | - $q->quote_val($row->{$_}, |
2800 | - is_char => $is_char) } @cols ) |
2801 | - . ')'; |
2802 | -} |
2803 | - |
2804 | -sub make_where_clause { |
2805 | - my ( $self, $row, $cols ) = @_; |
2806 | - my @clauses = map { |
2807 | - my $val = $row->{$_}; |
2808 | - my $sep = defined $val ? '=' : ' IS '; |
2809 | - my $is_char = ($self->{tbl_struct}->{type_for}->{$_} || '') =~ m/char|text/i; |
2810 | - $self->{Quoter}->quote($_) . $sep . $self->{Quoter}->quote_val($val, |
2811 | - is_char => $is_char); |
2812 | - } @$cols; |
2813 | - return join(' AND ', @clauses); |
2814 | -} |
2815 | - |
2816 | - |
2817 | -sub get_changes { |
2818 | - my ( $self ) = @_; |
2819 | - return %{$self->{changes}}; |
2820 | -} |
2821 | - |
2822 | - |
2823 | -sub sort_cols { |
2824 | - my ( $self, $row ) = @_; |
2825 | - my @cols; |
2826 | - if ( $self->{tbl_struct} ) { |
2827 | - my $pos = $self->{tbl_struct}->{col_posn}; |
2828 | - my @not_in_tbl; |
2829 | - @cols = sort { |
2830 | - $pos->{$a} <=> $pos->{$b} |
2831 | - } |
2832 | - grep { |
2833 | - if ( !defined $pos->{$_} ) { |
2834 | - push @not_in_tbl, $_; |
2835 | - 0; |
2836 | - } |
2837 | - else { |
2838 | - 1; |
2839 | - } |
2840 | - } |
2841 | - keys %$row; |
2842 | - push @cols, @not_in_tbl if @not_in_tbl; |
2843 | - } |
2844 | - else { |
2845 | - @cols = sort keys %$row; |
2846 | - } |
2847 | - return @cols; |
2848 | -} |
2849 | - |
2850 | -sub make_fetch_back_query { |
2851 | - my ( $self, $where ) = @_; |
2852 | - die "I need a where argument" unless $where; |
2853 | - my $cols = '*'; |
2854 | - my $tbl_struct = $self->{tbl_struct}; |
2855 | - if ( $tbl_struct ) { |
2856 | - $cols = join(', ', |
2857 | - map { |
2858 | - my $col = $_; |
2859 | - if ( $self->{hex_blob} |
2860 | - && $tbl_struct->{type_for}->{$col} =~ m/b(?:lob|inary)/ ) { |
2861 | - $col = "IF(BINARY(`$col`)='', '', CONCAT('0x', HEX(`$col`))) AS `$col`"; |
2862 | - } |
2863 | - else { |
2864 | - $col = "`$col`"; |
2865 | - } |
2866 | - $col; |
2867 | - } @{ $tbl_struct->{cols} } |
2868 | - ); |
2869 | - |
2870 | - if ( !$cols ) { |
2871 | - PTDEBUG && _d('Failed to make explicit columns list from tbl struct'); |
2872 | - $cols = '*'; |
2873 | - } |
2874 | - } |
2875 | - return "SELECT $cols FROM $self->{src_db_tbl} WHERE $where LIMIT 1"; |
2876 | -} |
2877 | - |
2878 | -sub _d { |
2879 | - my ($package, undef, $line) = caller 0; |
2880 | - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
2881 | - map { defined $_ ? $_ : 'undef' } |
2882 | - @_; |
2883 | - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
2884 | -} |
2885 | - |
2886 | -1; |
2887 | -} |
2888 | -# ########################################################################### |
2889 | -# End ChangeHandler package |
2890 | -# ########################################################################### |
2891 | - |
2892 | -# ########################################################################### |
2893 | -# RowDiff package |
2894 | -# This package is a copy without comments from the original. The original |
2895 | -# with comments and its test file can be found in the Bazaar repository at, |
2896 | -# lib/RowDiff.pm |
2897 | -# t/lib/RowDiff.t |
2898 | -# See https://launchpad.net/percona-toolkit for more information. |
2899 | -# ########################################################################### |
2900 | -{ |
2901 | -package RowDiff; |
2902 | - |
2903 | -use strict; |
2904 | -use warnings FATAL => 'all'; |
2905 | -use English qw(-no_match_vars); |
2906 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
2907 | - |
2908 | -sub new { |
2909 | - my ( $class, %args ) = @_; |
2910 | - die "I need a dbh" unless $args{dbh}; |
2911 | - my $self = { %args }; |
2912 | - return bless $self, $class; |
2913 | -} |
2914 | - |
2915 | -sub compare_sets { |
2916 | - my ( $self, %args ) = @_; |
2917 | - my @required_args = qw(left_sth right_sth syncer tbl_struct); |
2918 | - foreach my $arg ( @required_args ) { |
2919 | - die "I need a $arg argument" unless defined $args{$arg}; |
2920 | - } |
2921 | - my $left_sth = $args{left_sth}; |
2922 | - my $right_sth = $args{right_sth}; |
2923 | - my $syncer = $args{syncer}; |
2924 | - my $tbl_struct = $args{tbl_struct}; |
2925 | - |
2926 | - my ($lr, $rr); # Current row from the left/right sths. |
2927 | - $args{key_cols} = $syncer->key_cols(); # for key_cmp() |
2928 | - |
2929 | - my $left_done = 0; |
2930 | - my $right_done = 0; |
2931 | - my $done = $self->{done}; |
2932 | - |
2933 | - do { |
2934 | - if ( !$lr && !$left_done ) { |
2935 | - PTDEBUG && _d('Fetching row from left'); |
2936 | - eval { $lr = $left_sth->fetchrow_hashref(); }; |
2937 | - PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); |
2938 | - $left_done = !$lr || $EVAL_ERROR ? 1 : 0; |
2939 | - } |
2940 | - elsif ( PTDEBUG ) { |
2941 | - _d('Left still has rows'); |
2942 | - } |
2943 | - |
2944 | - if ( !$rr && !$right_done ) { |
2945 | - PTDEBUG && _d('Fetching row from right'); |
2946 | - eval { $rr = $right_sth->fetchrow_hashref(); }; |
2947 | - PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); |
2948 | - $right_done = !$rr || $EVAL_ERROR ? 1 : 0; |
2949 | - } |
2950 | - elsif ( PTDEBUG ) { |
2951 | - _d('Right still has rows'); |
2952 | - } |
2953 | - |
2954 | - my $cmp; |
2955 | - if ( $lr && $rr ) { |
2956 | - $cmp = $self->key_cmp(%args, lr => $lr, rr => $rr); |
2957 | - PTDEBUG && _d('Key comparison on left and right:', $cmp); |
2958 | - } |
2959 | - if ( $lr || $rr ) { |
2960 | - if ( $lr && $rr && defined $cmp && $cmp == 0 ) { |
2961 | - PTDEBUG && _d('Left and right have the same key'); |
2962 | - $syncer->same_row(%args, lr => $lr, rr => $rr); |
2963 | - $self->{same_row}->(%args, lr => $lr, rr => $rr) |
2964 | - if $self->{same_row}; |
2965 | - $lr = $rr = undef; # Fetch another row from each side. |
2966 | - } |
2967 | - elsif ( !$rr || ( defined $cmp && $cmp < 0 ) ) { |
2968 | - PTDEBUG && _d('Left is not in right'); |
2969 | - $syncer->not_in_right(%args, lr => $lr, rr => $rr); |
2970 | - $self->{not_in_right}->(%args, lr => $lr, rr => $rr) |
2971 | - if $self->{not_in_right}; |
2972 | - $lr = undef; |
2973 | - } |
2974 | - else { |
2975 | - PTDEBUG && _d('Right is not in left'); |
2976 | - $syncer->not_in_left(%args, lr => $lr, rr => $rr); |
2977 | - $self->{not_in_left}->(%args, lr => $lr, rr => $rr) |
2978 | - if $self->{not_in_left}; |
2979 | - $rr = undef; |
2980 | - } |
2981 | - } |
2982 | - $left_done = $right_done = 1 if $done && $done->(%args); |
2983 | - } while ( !($left_done && $right_done) ); |
2984 | - PTDEBUG && _d('No more rows'); |
2985 | - $syncer->done_with_rows(); |
2986 | -} |
2987 | - |
2988 | -sub key_cmp { |
2989 | - my ( $self, %args ) = @_; |
2990 | - my @required_args = qw(lr rr key_cols tbl_struct); |
2991 | - foreach my $arg ( @required_args ) { |
2992 | - die "I need a $arg argument" unless exists $args{$arg}; |
2993 | - } |
2994 | - my ($lr, $rr, $key_cols, $tbl_struct) = @args{@required_args}; |
2995 | - PTDEBUG && _d('Comparing keys using columns:', join(',', @$key_cols)); |
2996 | - |
2997 | - my $callback = $self->{key_cmp}; |
2998 | - my $trf = $self->{trf}; |
2999 | - |
3000 | - foreach my $col ( @$key_cols ) { |
3001 | - my $l = $lr->{$col}; |
3002 | - my $r = $rr->{$col}; |
3003 | - if ( !defined $l || !defined $r ) { |
3004 | - PTDEBUG && _d($col, 'is not defined in both rows'); |
3005 | - return defined $l ? 1 : defined $r ? -1 : 0; |
3006 | - } |
3007 | - else { |
3008 | - if ( $tbl_struct->{is_numeric}->{$col} ) { # Numeric column |
3009 | - PTDEBUG && _d($col, 'is numeric'); |
3010 | - ($l, $r) = $trf->($l, $r, $tbl_struct, $col) if $trf; |
3011 | - my $cmp = $l <=> $r; |
3012 | - if ( $cmp ) { |
3013 | - PTDEBUG && _d('Column', $col, 'differs:', $l, '!=', $r); |
3014 | - $callback->($col, $l, $r) if $callback; |
3015 | - return $cmp; |
3016 | - } |
3017 | - } |
3018 | - elsif ( $l ne $r ) { |
3019 | - my $cmp; |
3020 | - my $coll = $tbl_struct->{collation_for}->{$col}; |
3021 | - if ( $coll && ( $coll ne 'latin1_swedish_ci' |
3022 | - || $l =~ m/[^\040-\177]/ || $r =~ m/[^\040-\177]/) ) |
3023 | - { |
3024 | - PTDEBUG && _d('Comparing', $col, 'via MySQL'); |
3025 | - $cmp = $self->db_cmp($coll, $l, $r); |
3026 | - } |
3027 | - else { |
3028 | - PTDEBUG && _d('Comparing', $col, 'in lowercase'); |
3029 | - $cmp = lc $l cmp lc $r; |
3030 | - } |
3031 | - if ( $cmp ) { |
3032 | - PTDEBUG && _d('Column', $col, 'differs:', $l, 'ne', $r); |
3033 | - $callback->($col, $l, $r) if $callback; |
3034 | - return $cmp; |
3035 | - } |
3036 | - } |
3037 | - } |
3038 | - } |
3039 | - return 0; |
3040 | -} |
3041 | - |
3042 | -sub db_cmp { |
3043 | - my ( $self, $collation, $l, $r ) = @_; |
3044 | - if ( !$self->{sth}->{$collation} ) { |
3045 | - if ( !$self->{charset_for} ) { |
3046 | - PTDEBUG && _d('Fetching collations from MySQL'); |
3047 | - my @collations = @{$self->{dbh}->selectall_arrayref( |
3048 | - 'SHOW COLLATION', {Slice => { collation => 1, charset => 1 }})}; |
3049 | - foreach my $collation ( @collations ) { |
3050 | - $self->{charset_for}->{$collation->{collation}} |
3051 | - = $collation->{charset}; |
3052 | - } |
3053 | - } |
3054 | - my $sql = "SELECT STRCMP(_$self->{charset_for}->{$collation}? COLLATE $collation, " |
3055 | - . "_$self->{charset_for}->{$collation}? COLLATE $collation) AS res"; |
3056 | - PTDEBUG && _d($sql); |
3057 | - $self->{sth}->{$collation} = $self->{dbh}->prepare($sql); |
3058 | - } |
3059 | - my $sth = $self->{sth}->{$collation}; |
3060 | - $sth->execute($l, $r); |
3061 | - return $sth->fetchall_arrayref()->[0]->[0]; |
3062 | -} |
3063 | - |
3064 | -sub _d { |
3065 | - my ($package, undef, $line) = caller 0; |
3066 | - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
3067 | - map { defined $_ ? $_ : 'undef' } |
3068 | - @_; |
3069 | - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
3070 | -} |
3071 | - |
3072 | -1; |
3073 | -} |
3074 | -# ########################################################################### |
3075 | -# End RowDiff package |
3076 | -# ########################################################################### |
3077 | - |
3078 | -# ########################################################################### |
3079 | -# TableChunker package |
3080 | -# This package is a copy without comments from the original. The original |
3081 | -# with comments and its test file can be found in the Bazaar repository at, |
3082 | -# lib/TableChunker.pm |
3083 | -# t/lib/TableChunker.t |
3084 | -# See https://launchpad.net/percona-toolkit for more information. |
3085 | -# ########################################################################### |
3086 | -{ |
3087 | -package TableChunker; |
3088 | - |
3089 | -use strict; |
3090 | -use warnings FATAL => 'all'; |
3091 | -use English qw(-no_match_vars); |
3092 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
3093 | - |
3094 | -use POSIX qw(floor ceil); |
3095 | -use List::Util qw(min max); |
3096 | -use Data::Dumper; |
3097 | -$Data::Dumper::Indent = 1; |
3098 | -$Data::Dumper::Sortkeys = 1; |
3099 | -$Data::Dumper::Quotekeys = 0; |
3100 | - |
3101 | -sub new { |
3102 | - my ( $class, %args ) = @_; |
3103 | - foreach my $arg ( qw(Quoter TableParser) ) { |
3104 | - die "I need a $arg argument" unless $args{$arg}; |
3105 | - } |
3106 | - |
3107 | - my %int_types = map { $_ => 1 } qw(bigint date datetime int mediumint smallint time timestamp tinyint year); |
3108 | - my %real_types = map { $_ => 1 } qw(decimal double float); |
3109 | - |
3110 | - my $self = { |
3111 | - %args, |
3112 | - int_types => \%int_types, |
3113 | - real_types => \%real_types, |
3114 | - EPOCH => '1970-01-01', |
3115 | - }; |
3116 | - |
3117 | - return bless $self, $class; |
3118 | -} |
3119 | - |
3120 | -sub find_chunk_columns { |
3121 | - my ( $self, %args ) = @_; |
3122 | - foreach my $arg ( qw(tbl_struct) ) { |
3123 | - die "I need a $arg argument" unless $args{$arg}; |
3124 | - } |
3125 | - my $tbl_struct = $args{tbl_struct}; |
3126 | - |
3127 | - my @possible_indexes; |
3128 | - foreach my $index ( values %{ $tbl_struct->{keys} } ) { |
3129 | - |
3130 | - next unless $index->{type} eq 'BTREE'; |
3131 | - |
3132 | - next if grep { defined } @{$index->{col_prefixes}}; |
3133 | - |
3134 | - if ( $args{exact} ) { |
3135 | - next unless $index->{is_unique} && @{$index->{cols}} == 1; |
3136 | - } |
3137 | - |
3138 | - push @possible_indexes, $index; |
3139 | - } |
3140 | - PTDEBUG && _d('Possible chunk indexes in order:', |
3141 | - join(', ', map { $_->{name} } @possible_indexes)); |
3142 | - |
3143 | - my $can_chunk_exact = 0; |
3144 | - my @candidate_cols; |
3145 | - foreach my $index ( @possible_indexes ) { |
3146 | - my $col = $index->{cols}->[0]; |
3147 | - |
3148 | - my $col_type = $tbl_struct->{type_for}->{$col}; |
3149 | - next unless $self->{int_types}->{$col_type} |
3150 | - || $self->{real_types}->{$col_type} |
3151 | - || $col_type =~ m/char/; |
3152 | - |
3153 | - push @candidate_cols, { column => $col, index => $index->{name} }; |
3154 | - } |
3155 | - |
3156 | - $can_chunk_exact = 1 if $args{exact} && scalar @candidate_cols; |
3157 | - |
3158 | - if ( PTDEBUG ) { |
3159 | - my $chunk_type = $args{exact} ? 'Exact' : 'Inexact'; |
3160 | - _d($chunk_type, 'chunkable:', |
3161 | - join(', ', map { "$_->{column} on $_->{index}" } @candidate_cols)); |
3162 | - } |
3163 | - |
3164 | - my @result; |
3165 | - PTDEBUG && _d('Ordering columns by order in tbl, PK first'); |
3166 | - if ( $tbl_struct->{keys}->{PRIMARY} ) { |
3167 | - my $pk_first_col = $tbl_struct->{keys}->{PRIMARY}->{cols}->[0]; |
3168 | - @result = grep { $_->{column} eq $pk_first_col } @candidate_cols; |
3169 | - @candidate_cols = grep { $_->{column} ne $pk_first_col } @candidate_cols; |
3170 | - } |
3171 | - my $i = 0; |
3172 | - my %col_pos = map { $_ => $i++ } @{$tbl_struct->{cols}}; |
3173 | - push @result, sort { $col_pos{$a->{column}} <=> $col_pos{$b->{column}} } |
3174 | - @candidate_cols; |
3175 | - |
3176 | - if ( PTDEBUG ) { |
3177 | - _d('Chunkable columns:', |
3178 | - join(', ', map { "$_->{column} on $_->{index}" } @result)); |
3179 | - _d('Can chunk exactly:', $can_chunk_exact); |
3180 | - } |
3181 | - |
3182 | - return ($can_chunk_exact, @result); |
3183 | -} |
3184 | - |
3185 | -sub calculate_chunks { |
3186 | - my ( $self, %args ) = @_; |
3187 | - my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); |
3188 | - foreach my $arg ( @required_args ) { |
3189 | - die "I need a $arg argument" unless defined $args{$arg}; |
3190 | - } |
3191 | - PTDEBUG && _d('Calculate chunks for', |
3192 | - join(", ", map {"$_=".(defined $args{$_} ? $args{$_} : "undef")} |
3193 | - qw(db tbl chunk_col min max rows_in_range chunk_size zero_chunk exact) |
3194 | - )); |
3195 | - |
3196 | - if ( !$args{rows_in_range} ) { |
3197 | - PTDEBUG && _d("Empty table"); |
3198 | - return '1=1'; |
3199 | - } |
3200 | - |
3201 | - if ( $args{rows_in_range} < $args{chunk_size} ) { |
3202 | - PTDEBUG && _d("Chunk size larger than rows in range"); |
3203 | - return '1=1'; |
3204 | - } |
3205 | - |
3206 | - my $q = $self->{Quoter}; |
3207 | - my $dbh = $args{dbh}; |
3208 | - my $chunk_col = $args{chunk_col}; |
3209 | - my $tbl_struct = $args{tbl_struct}; |
3210 | - my $col_type = $tbl_struct->{type_for}->{$chunk_col}; |
3211 | - PTDEBUG && _d('chunk col type:', $col_type); |
3212 | - |
3213 | - my %chunker; |
3214 | - if ( $tbl_struct->{is_numeric}->{$chunk_col} || $col_type =~ /date|time/ ) { |
3215 | - %chunker = $self->_chunk_numeric(%args); |
3216 | - } |
3217 | - elsif ( $col_type =~ m/char/ ) { |
3218 | - %chunker = $self->_chunk_char(%args); |
3219 | - } |
3220 | - else { |
3221 | - die "Cannot chunk $col_type columns"; |
3222 | - } |
3223 | - PTDEBUG && _d("Chunker:", Dumper(\%chunker)); |
3224 | - my ($col, $start_point, $end_point, $interval, $range_func) |
3225 | - = @chunker{qw(col start_point end_point interval range_func)}; |
3226 | - |
3227 | - my @chunks; |
3228 | - if ( $start_point < $end_point ) { |
3229 | - |
3230 | - push @chunks, "$col = 0" if $chunker{have_zero_chunk}; |
3231 | - |
3232 | - my ($beg, $end); |
3233 | - my $iter = 0; |
3234 | - for ( my $i = $start_point; $i < $end_point; $i += $interval ) { |
3235 | - ($beg, $end) = $self->$range_func($dbh, $i, $interval, $end_point); |
3236 | - |
3237 | - if ( $iter++ == 0 ) { |
3238 | - push @chunks, |
3239 | - ($chunker{have_zero_chunk} ? "$col > 0 AND " : "") |
3240 | - ."$col < " . $q->quote_val($end); |
3241 | - } |
3242 | - else { |
3243 | - push @chunks, "$col >= " . $q->quote_val($beg) . " AND $col < " . $q->quote_val($end); |
3244 | - } |
3245 | - } |
3246 | - |
3247 | - my $chunk_range = lc($args{chunk_range} || 'open'); |
3248 | - my $nullable = $args{tbl_struct}->{is_nullable}->{$args{chunk_col}}; |
3249 | - pop @chunks; |
3250 | - if ( @chunks ) { |
3251 | - push @chunks, "$col >= " . $q->quote_val($beg) |
3252 | - . ($chunk_range eq 'openclosed' |
3253 | - ? " AND $col <= " . $q->quote_val($args{max}) : ""); |
3254 | - } |
3255 | - else { |
3256 | - push @chunks, $nullable ? "$col IS NOT NULL" : '1=1'; |
3257 | - } |
3258 | - if ( $nullable ) { |
3259 | - push @chunks, "$col IS NULL"; |
3260 | - } |
3261 | - } |
3262 | - else { |
3263 | - PTDEBUG && _d('No chunks; using single chunk 1=1'); |
3264 | - push @chunks, '1=1'; |
3265 | - } |
3266 | - |
3267 | - return @chunks; |
3268 | -} |
3269 | - |
3270 | -sub _chunk_numeric { |
3271 | - my ( $self, %args ) = @_; |
3272 | - my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); |
3273 | - foreach my $arg ( @required_args ) { |
3274 | - die "I need a $arg argument" unless defined $args{$arg}; |
3275 | - } |
3276 | - my $q = $self->{Quoter}; |
3277 | - my $db_tbl = $q->quote($args{db}, $args{tbl}); |
3278 | - my $col_type = $args{tbl_struct}->{type_for}->{$args{chunk_col}}; |
3279 | - |
3280 | - my $range_func; |
3281 | - if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) { |
3282 | - $range_func = 'range_num'; |
3283 | - } |
3284 | - elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) { |
3285 | - $range_func = "range_$col_type"; |
3286 | - } |
3287 | - elsif ( $col_type eq 'datetime' ) { |
3288 | - $range_func = 'range_datetime'; |
3289 | - } |
3290 | - |
3291 | - my ($start_point, $end_point); |
3292 | - eval { |
3293 | - $start_point = $self->value_to_number( |
3294 | - value => $args{min}, |
3295 | - column_type => $col_type, |
3296 | - dbh => $args{dbh}, |
3297 | - ); |
3298 | - $end_point = $self->value_to_number( |
3299 | - value => $args{max}, |
3300 | - column_type => $col_type, |
3301 | - dbh => $args{dbh}, |
3302 | - ); |
3303 | - }; |
3304 | - if ( $EVAL_ERROR ) { |
3305 | - if ( $EVAL_ERROR =~ m/don't know how to chunk/ ) { |
3306 | - die $EVAL_ERROR; |
3307 | - } |
3308 | - else { |
3309 | - die "Error calculating chunk start and end points for table " |
3310 | - . "`$args{tbl_struct}->{name}` on column `$args{chunk_col}` " |
3311 | - . "with min/max values " |
3312 | - . join('/', |
3313 | - map { defined $args{$_} ? $args{$_} : 'undef' } qw(min max)) |
3314 | - . ":\n\n" |
3315 | - . $EVAL_ERROR |
3316 | - . "\nVerify that the min and max values are valid for the column. " |
3317 | - . "If they are valid, this error could be caused by a bug in the " |
3318 | - . "tool."; |
3319 | - } |
3320 | - } |
3321 | - |
3322 | - if ( !defined $start_point ) { |
3323 | - PTDEBUG && _d('Start point is undefined'); |
3324 | - $start_point = 0; |
3325 | - } |
3326 | - if ( !defined $end_point || $end_point < $start_point ) { |
3327 | - PTDEBUG && _d('End point is undefined or before start point'); |
3328 | - $end_point = 0; |
3329 | - } |
3330 | - PTDEBUG && _d("Actual chunk range:", $start_point, "to", $end_point); |
3331 | - |
3332 | - my $have_zero_chunk = 0; |
3333 | - if ( $args{zero_chunk} ) { |
3334 | - if ( $start_point != $end_point && $start_point >= 0 ) { |
3335 | - PTDEBUG && _d('Zero chunking'); |
3336 | - my $nonzero_val = $self->get_nonzero_value( |
3337 | - %args, |
3338 | - db_tbl => $db_tbl, |
3339 | - col => $args{chunk_col}, |
3340 | - col_type => $col_type, |
3341 | - val => $args{min} |
3342 | - ); |
3343 | - $start_point = $self->value_to_number( |
3344 | - value => $nonzero_val, |
3345 | - column_type => $col_type, |
3346 | - dbh => $args{dbh}, |
3347 | - ); |
3348 | - $have_zero_chunk = 1; |
3349 | - } |
3350 | - else { |
3351 | - PTDEBUG && _d("Cannot zero chunk"); |
3352 | - } |
3353 | - } |
3354 | - PTDEBUG && _d("Using chunk range:", $start_point, "to", $end_point); |
3355 | - |
3356 | - my $interval = $args{chunk_size} |
3357 | - * ($end_point - $start_point) |
3358 | - / $args{rows_in_range}; |
3359 | - if ( $self->{int_types}->{$col_type} ) { |
3360 | - $interval = ceil($interval); |
3361 | - } |
3362 | - $interval ||= $args{chunk_size}; |
3363 | - if ( $args{exact} ) { |
3364 | - $interval = $args{chunk_size}; |
3365 | - } |
3366 | - PTDEBUG && _d('Chunk interval:', $interval, 'units'); |
3367 | - |
3368 | - return ( |
3369 | - col => $q->quote($args{chunk_col}), |
3370 | - start_point => $start_point, |
3371 | - end_point => $end_point, |
3372 | - interval => $interval, |
3373 | - range_func => $range_func, |
3374 | - have_zero_chunk => $have_zero_chunk, |
3375 | - ); |
3376 | -} |
3377 | - |
3378 | -sub _chunk_char { |
3379 | - my ( $self, %args ) = @_; |
3380 | - my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size); |
3381 | - foreach my $arg ( @required_args ) { |
3382 | - die "I need a $arg argument" unless defined $args{$arg}; |
3383 | - } |
3384 | - my $q = $self->{Quoter}; |
3385 | - my $db_tbl = $q->quote($args{db}, $args{tbl}); |
3386 | - my $dbh = $args{dbh}; |
3387 | - my $chunk_col = $args{chunk_col}; |
3388 | - my $qchunk_col = $q->quote($args{chunk_col}); |
3389 | - my $row; |
3390 | - my $sql; |
3391 | - |
3392 | - my ($min_col, $max_col) = @{args}{qw(min max)}; |
3393 | - $sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord"; |
3394 | - PTDEBUG && _d($dbh, $sql); |
3395 | - my $ord_sth = $dbh->prepare($sql); # avoid quoting issues |
3396 | - $ord_sth->execute($min_col, $max_col); |
3397 | - $row = $ord_sth->fetchrow_arrayref(); |
3398 | - my ($min_col_ord, $max_col_ord) = ($row->[0], $row->[1]); |
3399 | - PTDEBUG && _d("Min/max col char code:", $min_col_ord, $max_col_ord); |
3400 | - |
3401 | - my $base; |
3402 | - my @chars; |
3403 | - PTDEBUG && _d("Table charset:", $args{tbl_struct}->{charset}); |
3404 | - if ( ($args{tbl_struct}->{charset} || "") eq "latin1" ) { |
3405 | - my @sorted_latin1_chars = ( |
3406 | - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
3407 | - 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, |
3408 | - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, |
3409 | - 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, |
3410 | - 88, 89, 90, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, 161, |
3411 | - 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, |
3412 | - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, |
3413 | - 190, 191, 215, 216, 222, 223, 247, 255); |
3414 | - |
3415 | - my ($first_char, $last_char); |
3416 | - for my $i ( 0..$#sorted_latin1_chars ) { |
3417 | - $first_char = $i and last if $sorted_latin1_chars[$i] >= $min_col_ord; |
3418 | - } |
3419 | - for my $i ( $first_char..$#sorted_latin1_chars ) { |
3420 | - $last_char = $i and last if $sorted_latin1_chars[$i] >= $max_col_ord; |
3421 | - }; |
3422 | - |
3423 | - @chars = map { chr $_; } @sorted_latin1_chars[$first_char..$last_char]; |
3424 | - $base = scalar @chars; |
3425 | - } |
3426 | - else { |
3427 | - |
3428 | - my $tmp_tbl = '__maatkit_char_chunking_map'; |
3429 | - my $tmp_db_tbl = $q->quote($args{db}, $tmp_tbl); |
3430 | - $sql = "DROP TABLE IF EXISTS $tmp_db_tbl"; |
3431 | - PTDEBUG && _d($dbh, $sql); |
3432 | - $dbh->do($sql); |
3433 | - my $col_def = $args{tbl_struct}->{defs}->{$chunk_col}; |
3434 | - $sql = "CREATE TEMPORARY TABLE $tmp_db_tbl ($col_def) " |
3435 | - . "ENGINE=MEMORY"; |
3436 | - PTDEBUG && _d($dbh, $sql); |
3437 | - $dbh->do($sql); |
3438 | - |
3439 | - $sql = "INSERT INTO $tmp_db_tbl VALUES (CHAR(?))"; |
3440 | - PTDEBUG && _d($dbh, $sql); |
3441 | - my $ins_char_sth = $dbh->prepare($sql); # avoid quoting issues |
3442 | - for my $char_code ( $min_col_ord..$max_col_ord ) { |
3443 | - $ins_char_sth->execute($char_code); |
3444 | - } |
3445 | - |
3446 | - $sql = "SELECT $qchunk_col FROM $tmp_db_tbl " |
3447 | - . "WHERE $qchunk_col BETWEEN ? AND ? " |
3448 | - . "ORDER BY $qchunk_col"; |
3449 | - PTDEBUG && _d($dbh, $sql); |
3450 | - my $sel_char_sth = $dbh->prepare($sql); |
3451 | - $sel_char_sth->execute($min_col, $max_col); |
3452 | - |
3453 | - @chars = map { $_->[0] } @{ $sel_char_sth->fetchall_arrayref() }; |
3454 | - $base = scalar @chars; |
3455 | - |
3456 | - $sql = "DROP TABLE $tmp_db_tbl"; |
3457 | - PTDEBUG && _d($dbh, $sql); |
3458 | - $dbh->do($sql); |
3459 | - } |
3460 | - PTDEBUG && _d("Base", $base, "chars:", @chars); |
3461 | - |
3462 | - die "Cannot chunk table $db_tbl using the character column " |
3463 | - . "$chunk_col, most likely because all values start with the " |
3464 | - . "same character. This table must be synced separately by " |
3465 | - . "specifying a list of --algorithms without the Chunk algorithm" |
3466 | - if $base == 1; |
3467 | - |
3468 | - |
3469 | - $sql = "SELECT MAX(LENGTH($qchunk_col)) FROM $db_tbl " |
3470 | - . ($args{where} ? "WHERE $args{where} " : "") |
3471 | - . "ORDER BY $qchunk_col"; |
3472 | - PTDEBUG && _d($dbh, $sql); |
3473 | - $row = $dbh->selectrow_arrayref($sql); |
3474 | - my $max_col_len = $row->[0]; |
3475 | - PTDEBUG && _d("Max column value:", $max_col, $max_col_len); |
3476 | - my $n_values; |
3477 | - for my $n_chars ( 1..$max_col_len ) { |
3478 | - $n_values = $base**$n_chars; |
3479 | - if ( $n_values >= $args{chunk_size} ) { |
3480 | - PTDEBUG && _d($n_chars, "chars in base", $base, "expresses", |
3481 | - $n_values, "values"); |
3482 | - last; |
3483 | - } |
3484 | - } |
3485 | - |
3486 | - my $n_chunks = $args{rows_in_range} / $args{chunk_size}; |
3487 | - my $interval = floor(($n_values+0.00001) / $n_chunks) || 1; |
3488 | - |
3489 | - my $range_func = sub { |
3490 | - my ( $self, $dbh, $start, $interval, $max ) = @_; |
3491 | - my $start_char = $self->base_count( |
3492 | - count_to => $start, |
3493 | - base => $base, |
3494 | - symbols => \@chars, |
3495 | - ); |
3496 | - my $end_char = $self->base_count( |
3497 | - count_to => min($max, $start + $interval), |
3498 | - base => $base, |
3499 | - symbols => \@chars, |
3500 | - ); |
3501 | - return $start_char, $end_char; |
3502 | - }; |
3503 | - |
3504 | - return ( |
3505 | - col => $qchunk_col, |
3506 | - start_point => 0, |
3507 | - end_point => $n_values, |
3508 | - interval => $interval, |
3509 | - range_func => $range_func, |
3510 | - ); |
3511 | -} |
3512 | - |
3513 | -sub get_first_chunkable_column { |
3514 | - my ( $self, %args ) = @_; |
3515 | - foreach my $arg ( qw(tbl_struct) ) { |
3516 | - die "I need a $arg argument" unless $args{$arg}; |
3517 | - } |
3518 | - |
3519 | - my ($exact, @cols) = $self->find_chunk_columns(%args); |
3520 | - my $col = $cols[0]->{column}; |
3521 | - my $idx = $cols[0]->{index}; |
3522 | - |
3523 | - my $wanted_col = $args{chunk_column}; |
3524 | - my $wanted_idx = $args{chunk_index}; |
3525 | - PTDEBUG && _d("Preferred chunk col/idx:", $wanted_col, $wanted_idx); |
3526 | - |
3527 | - if ( $wanted_col && $wanted_idx ) { |
3528 | - foreach my $chunkable_col ( @cols ) { |
3529 | - if ( $wanted_col eq $chunkable_col->{column} |
3530 | - && $wanted_idx eq $chunkable_col->{index} ) { |
3531 | - $col = $wanted_col; |
3532 | - $idx = $wanted_idx; |
3533 | - last; |
3534 | - } |
3535 | - } |
3536 | - } |
3537 | - elsif ( $wanted_col ) { |
3538 | - foreach my $chunkable_col ( @cols ) { |
3539 | - if ( $wanted_col eq $chunkable_col->{column} ) { |
3540 | - $col = $wanted_col; |
3541 | - $idx = $chunkable_col->{index}; |
3542 | - last; |
3543 | - } |
3544 | - } |
3545 | - } |
3546 | - elsif ( $wanted_idx ) { |
3547 | - foreach my $chunkable_col ( @cols ) { |
3548 | - if ( $wanted_idx eq $chunkable_col->{index} ) { |
3549 | - $col = $chunkable_col->{column}; |
3550 | - $idx = $wanted_idx; |
3551 | - last; |
3552 | - } |
3553 | - } |
3554 | - } |
3555 | - |
3556 | - PTDEBUG && _d('First chunkable col/index:', $col, $idx); |
3557 | - return $col, $idx; |
3558 | -} |
3559 | - |
3560 | -sub size_to_rows { |
3561 | - my ( $self, %args ) = @_; |
3562 | - my @required_args = qw(dbh db tbl chunk_size); |
3563 | - foreach my $arg ( @required_args ) { |
3564 | - die "I need a $arg argument" unless $args{$arg}; |
3565 | - } |
3566 | - my ($dbh, $db, $tbl, $chunk_size) = @args{@required_args}; |
3567 | - my $q = $self->{Quoter}; |
3568 | - my $tp = $self->{TableParser}; |
3569 | - |
3570 | - my ($n_rows, $avg_row_length); |
3571 | - |
3572 | - my ( $num, $suffix ) = $chunk_size =~ m/^(\d+)([MGk])?$/; |
3573 | - if ( $suffix ) { # Convert to bytes. |
3574 | - $chunk_size = $suffix eq 'k' ? $num * 1_024 |
3575 | - : $suffix eq 'M' ? $num * 1_024 * 1_024 |
3576 | - : $num * 1_024 * 1_024 * 1_024; |
3577 | - } |
3578 | - elsif ( $num ) { |
3579 | - $n_rows = $num; |
3580 | - } |
3581 | - else { |
3582 | - die "Invalid chunk size $chunk_size; must be an integer " |
3583 | - . "with optional suffix kMG"; |
3584 | - } |
3585 | - |
3586 | - if ( $suffix || $args{avg_row_length} ) { |
3587 | - my ($status) = $tp->get_table_status($dbh, $db, $tbl); |
3588 | - $avg_row_length = $status->{avg_row_length}; |
3589 | - if ( !defined $n_rows ) { |
3590 | - $n_rows = $avg_row_length ? ceil($chunk_size / $avg_row_length) : undef; |
3591 | - } |
3592 | - } |
3593 | - |
3594 | - return $n_rows, $avg_row_length; |
3595 | -} |
3596 | - |
3597 | -sub get_range_statistics { |
3598 | - my ( $self, %args ) = @_; |
3599 | - my @required_args = qw(dbh db tbl chunk_col tbl_struct); |
3600 | - foreach my $arg ( @required_args ) { |
3601 | - die "I need a $arg argument" unless $args{$arg}; |
3602 | - } |
3603 | - my ($dbh, $db, $tbl, $col) = @args{@required_args}; |
3604 | - my $where = $args{where}; |
3605 | - my $q = $self->{Quoter}; |
3606 | - |
3607 | - my $col_type = $args{tbl_struct}->{type_for}->{$col}; |
3608 | - my $col_is_numeric = $args{tbl_struct}->{is_numeric}->{$col}; |
3609 | - |
3610 | - my $db_tbl = $q->quote($db, $tbl); |
3611 | - $col = $q->quote($col); |
3612 | - |
3613 | - my ($min, $max); |
3614 | - eval { |
3615 | - my $sql = "SELECT MIN($col), MAX($col) FROM $db_tbl" |
3616 | - . ($args{index_hint} ? " $args{index_hint}" : "") |
3617 | - . ($where ? " WHERE ($where)" : ''); |
3618 | - PTDEBUG && _d($dbh, $sql); |
3619 | - ($min, $max) = $dbh->selectrow_array($sql); |
3620 | - PTDEBUG && _d("Actual end points:", $min, $max); |
3621 | - |
3622 | - ($min, $max) = $self->get_valid_end_points( |
3623 | - %args, |
3624 | - dbh => $dbh, |
3625 | - db_tbl => $db_tbl, |
3626 | - col => $col, |
3627 | - col_type => $col_type, |
3628 | - min => $min, |
3629 | - max => $max, |
3630 | - ); |
3631 | - PTDEBUG && _d("Valid end points:", $min, $max); |
3632 | - }; |
3633 | - if ( $EVAL_ERROR ) { |
3634 | - die "Error getting min and max values for table $db_tbl " |
3635 | - . "on column $col: $EVAL_ERROR"; |
3636 | - } |
3637 | - |
3638 | - my $sql = "EXPLAIN SELECT * FROM $db_tbl" |
3639 | - . ($args{index_hint} ? " $args{index_hint}" : "") |
3640 | - . ($where ? " WHERE $where" : ''); |
3641 | - PTDEBUG && _d($sql); |
3642 | - my $expl = $dbh->selectrow_hashref($sql); |
3643 | - |
3644 | - return ( |
3645 | - min => $min, |
3646 | - max => $max, |
3647 | - rows_in_range => $expl->{rows}, |
3648 | - ); |
3649 | -} |
3650 | - |
3651 | -sub inject_chunks { |
3652 | - my ( $self, %args ) = @_; |
3653 | - foreach my $arg ( qw(database table chunks chunk_num query) ) { |
3654 | - die "I need a $arg argument" unless defined $args{$arg}; |
3655 | - } |
3656 | - PTDEBUG && _d('Injecting chunk', $args{chunk_num}); |
3657 | - my $query = $args{query}; |
3658 | - my $comment = sprintf("/*%s.%s:%d/%d*/", |
3659 | - $args{database}, $args{table}, |
3660 | - $args{chunk_num} + 1, scalar @{$args{chunks}}); |
3661 | - $query =~ s!/\*PROGRESS_COMMENT\*/!$comment!; |
3662 | - my $where = "WHERE (" . $args{chunks}->[$args{chunk_num}] . ')'; |
3663 | - if ( $args{where} && grep { $_ } @{$args{where}} ) { |
3664 | - $where .= " AND (" |
3665 | - . join(" AND ", map { "($_)" } grep { $_ } @{$args{where}} ) |
3666 | - . ")"; |
3667 | - } |
3668 | - my $db_tbl = $self->{Quoter}->quote(@args{qw(database table)}); |
3669 | - my $index_hint = $args{index_hint} || ''; |
3670 | - |
3671 | - PTDEBUG && _d('Parameters:', |
3672 | - Dumper({WHERE => $where, DB_TBL => $db_tbl, INDEX_HINT => $index_hint})); |
3673 | - $query =~ s!/\*WHERE\*/! $where!; |
3674 | - $query =~ s!/\*DB_TBL\*/!$db_tbl!; |
3675 | - $query =~ s!/\*INDEX_HINT\*/! $index_hint!; |
3676 | - $query =~ s!/\*CHUNK_NUM\*/! $args{chunk_num} AS chunk_num,!; |
3677 | - |
3678 | - return $query; |
3679 | -} |
3680 | - |
3681 | - |
3682 | -sub value_to_number { |
3683 | - my ( $self, %args ) = @_; |
3684 | - my @required_args = qw(column_type dbh); |
3685 | - foreach my $arg ( @required_args ) { |
3686 | - die "I need a $arg argument" unless defined $args{$arg}; |
3687 | - } |
3688 | - my $val = $args{value}; |
3689 | - my ($col_type, $dbh) = @args{@required_args}; |
3690 | - PTDEBUG && _d('Converting MySQL', $col_type, $val); |
3691 | - |
3692 | - return unless defined $val; # value is NULL |
3693 | - |
3694 | - my %mysql_conv_func_for = ( |
3695 | - timestamp => 'UNIX_TIMESTAMP', |
3696 | - date => 'TO_DAYS', |
3697 | - time => 'TIME_TO_SEC', |
3698 | - datetime => 'TO_DAYS', |
3699 | - ); |
3700 | - |
3701 | - my $num; |
3702 | - if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) { |
3703 | - $num = $val; |
3704 | - } |
3705 | - elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) { |
3706 | - my $func = $mysql_conv_func_for{$col_type}; |
3707 | - my $sql = "SELECT $func(?)"; |
3708 | - PTDEBUG && _d($dbh, $sql, $val); |
3709 | - my $sth = $dbh->prepare($sql); |
3710 | - $sth->execute($val); |
3711 | - ($num) = $sth->fetchrow_array(); |
3712 | - } |
3713 | - elsif ( $col_type eq 'datetime' ) { |
3714 | - $num = $self->timestampdiff($dbh, $val); |
3715 | - } |
3716 | - else { |
3717 | - die "I don't know how to chunk $col_type\n"; |
3718 | - } |
3719 | - PTDEBUG && _d('Converts to', $num); |
3720 | - return $num; |
3721 | -} |
3722 | - |
3723 | -sub range_num { |
3724 | - my ( $self, $dbh, $start, $interval, $max ) = @_; |
3725 | - my $end = min($max, $start + $interval); |
3726 | - |
3727 | - |
3728 | - $start = sprintf('%.17f', $start) if $start =~ /e/; |
3729 | - $end = sprintf('%.17f', $end) if $end =~ /e/; |
3730 | - |
3731 | - $start =~ s/\.(\d{5}).*$/.$1/; |
3732 | - $end =~ s/\.(\d{5}).*$/.$1/; |
3733 | - |
3734 | - if ( $end > $start ) { |
3735 | - return ( $start, $end ); |
3736 | - } |
3737 | - else { |
3738 | - die "Chunk size is too small: $end !> $start\n"; |
3739 | - } |
3740 | -} |
3741 | - |
3742 | -sub range_time { |
3743 | - my ( $self, $dbh, $start, $interval, $max ) = @_; |
3744 | - my $sql = "SELECT SEC_TO_TIME($start), SEC_TO_TIME(LEAST($max, $start + $interval))"; |
3745 | - PTDEBUG && _d($sql); |
3746 | - return $dbh->selectrow_array($sql); |
3747 | -} |
3748 | - |
3749 | -sub range_date { |
3750 | - my ( $self, $dbh, $start, $interval, $max ) = @_; |
3751 | - my $sql = "SELECT FROM_DAYS($start), FROM_DAYS(LEAST($max, $start + $interval))"; |
3752 | - PTDEBUG && _d($sql); |
3753 | - return $dbh->selectrow_array($sql); |
3754 | -} |
3755 | - |
3756 | -sub range_datetime { |
3757 | - my ( $self, $dbh, $start, $interval, $max ) = @_; |
3758 | - my $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $start SECOND), " |
3759 | - . "DATE_ADD('$self->{EPOCH}', INTERVAL LEAST($max, $start + $interval) SECOND)"; |
3760 | - PTDEBUG && _d($sql); |
3761 | - return $dbh->selectrow_array($sql); |
3762 | -} |
3763 | - |
3764 | -sub range_timestamp { |
3765 | - my ( $self, $dbh, $start, $interval, $max ) = @_; |
3766 | - my $sql = "SELECT FROM_UNIXTIME($start), FROM_UNIXTIME(LEAST($max, $start + $interval))"; |
3767 | - PTDEBUG && _d($sql); |
3768 | - return $dbh->selectrow_array($sql); |
3769 | -} |
3770 | - |
3771 | -sub timestampdiff { |
3772 | - my ( $self, $dbh, $time ) = @_; |
3773 | - my $sql = "SELECT (COALESCE(TO_DAYS('$time'), 0) * 86400 + TIME_TO_SEC('$time')) " |
3774 | - . "- TO_DAYS('$self->{EPOCH} 00:00:00') * 86400"; |
3775 | - PTDEBUG && _d($sql); |
3776 | - my ( $diff ) = $dbh->selectrow_array($sql); |
3777 | - $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $diff SECOND)"; |
3778 | - PTDEBUG && _d($sql); |
3779 | - my ( $check ) = $dbh->selectrow_array($sql); |
3780 | - die <<" EOF" |
3781 | - Incorrect datetime math: given $time, calculated $diff but checked to $check. |
3782 | - This could be due to a version of MySQL that overflows on large interval |
3783 | - values to DATE_ADD(), or the given datetime is not a valid date. If not, |
3784 | - please report this as a bug. |
3785 | - EOF |
3786 | - unless $check eq $time; |
3787 | - return $diff; |
3788 | -} |
3789 | - |
3790 | - |
3791 | - |
3792 | - |
3793 | -sub get_valid_end_points { |
3794 | - my ( $self, %args ) = @_; |
3795 | - my @required_args = qw(dbh db_tbl col col_type); |
3796 | - foreach my $arg ( @required_args ) { |
3797 | - die "I need a $arg argument" unless $args{$arg}; |
3798 | - } |
3799 | - my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args}; |
3800 | - my ($real_min, $real_max) = @args{qw(min max)}; |
3801 | - |
3802 | - my $err_fmt = "Error finding a valid %s value for table $db_tbl on " |
3803 | - . "column $col. The real %s value %s is invalid and " |
3804 | - . "no other valid values were found. Verify that the table " |
3805 | - . "has at least one valid value for this column" |
3806 | - . ($args{where} ? " where $args{where}." : "."); |
3807 | - |
3808 | - my $valid_min = $real_min; |
3809 | - if ( defined $valid_min ) { |
3810 | - PTDEBUG && _d("Validating min end point:", $real_min); |
3811 | - $valid_min = $self->_get_valid_end_point( |
3812 | - %args, |
3813 | - val => $real_min, |
3814 | - endpoint => 'min', |
3815 | - ); |
3816 | - die sprintf($err_fmt, 'minimum', 'minimum', |
3817 | - (defined $real_min ? $real_min : "NULL")) |
3818 | - unless defined $valid_min; |
3819 | - } |
3820 | - |
3821 | - my $valid_max = $real_max; |
3822 | - if ( defined $valid_max ) { |
3823 | - PTDEBUG && _d("Validating max end point:", $real_min); |
3824 | - $valid_max = $self->_get_valid_end_point( |
3825 | - %args, |
3826 | - val => $real_max, |
3827 | - endpoint => 'max', |
3828 | - ); |
3829 | - die sprintf($err_fmt, 'maximum', 'maximum', |
3830 | - (defined $real_max ? $real_max : "NULL")) |
3831 | - unless defined $valid_max; |
3832 | - } |
3833 | - |
3834 | - return $valid_min, $valid_max; |
3835 | -} |
3836 | - |
3837 | -sub _get_valid_end_point { |
3838 | - my ( $self, %args ) = @_; |
3839 | - my @required_args = qw(dbh db_tbl col col_type); |
3840 | - foreach my $arg ( @required_args ) { |
3841 | - die "I need a $arg argument" unless $args{$arg}; |
3842 | - } |
3843 | - my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args}; |
3844 | - my $val = $args{val}; |
3845 | - |
3846 | - return $val unless defined $val; |
3847 | - |
3848 | - my $validate = $col_type =~ m/time|date/ ? \&_validate_temporal_value |
3849 | - : undef; |
3850 | - |
3851 | - if ( !$validate ) { |
3852 | - PTDEBUG && _d("No validator for", $col_type, "values"); |
3853 | - return $val; |
3854 | - } |
3855 | - |
3856 | - return $val if defined $validate->($dbh, $val); |
3857 | - |
3858 | - PTDEBUG && _d("Value is invalid, getting first valid value"); |
3859 | - $val = $self->get_first_valid_value( |
3860 | - %args, |
3861 | - val => $val, |
3862 | - validate => $validate, |
3863 | - ); |
3864 | - |
3865 | - return $val; |
3866 | -} |
3867 | - |
3868 | -sub get_first_valid_value { |
3869 | - my ( $self, %args ) = @_; |
3870 | - my @required_args = qw(dbh db_tbl col validate endpoint); |
3871 | - foreach my $arg ( @required_args ) { |
3872 | - die "I need a $arg argument" unless $args{$arg}; |
3873 | - } |
3874 | - my ($dbh, $db_tbl, $col, $validate, $endpoint) = @args{@required_args}; |
3875 | - my $tries = defined $args{tries} ? $args{tries} : 5; |
3876 | - my $val = $args{val}; |
3877 | - |
3878 | - return unless defined $val; |
3879 | - |
3880 | - my $cmp = $endpoint =~ m/min/i ? '>' |
3881 | - : $endpoint =~ m/max/i ? '<' |
3882 | - : die "Invalid endpoint arg: $endpoint"; |
3883 | - my $sql = "SELECT $col FROM $db_tbl " |
3884 | - . ($args{index_hint} ? "$args{index_hint} " : "") |
3885 | - . "WHERE $col $cmp ? AND $col IS NOT NULL " |
3886 | - . ($args{where} ? "AND ($args{where}) " : "") |
3887 | - . "ORDER BY $col LIMIT 1"; |
3888 | - PTDEBUG && _d($dbh, $sql); |
3889 | - my $sth = $dbh->prepare($sql); |
3890 | - |
3891 | - my $last_val = $val; |
3892 | - while ( $tries-- ) { |
3893 | - $sth->execute($last_val); |
3894 | - my ($next_val) = $sth->fetchrow_array(); |
3895 | - PTDEBUG && _d('Next value:', $next_val, '; tries left:', $tries); |
3896 | - if ( !defined $next_val ) { |
3897 | - PTDEBUG && _d('No more rows in table'); |
3898 | - last; |
3899 | - } |
3900 | - if ( defined $validate->($dbh, $next_val) ) { |
3901 | - PTDEBUG && _d('First valid value:', $next_val); |
3902 | - $sth->finish(); |
3903 | - return $next_val; |
3904 | - } |
3905 | - $last_val = $next_val; |
3906 | - } |
3907 | - $sth->finish(); |
3908 | - $val = undef; # no valid value found |
3909 | - |
3910 | - return $val; |
3911 | -} |
3912 | - |
3913 | -sub _validate_temporal_value { |
3914 | - my ( $dbh, $val ) = @_; |
3915 | - my $sql = "SELECT IF(TIME_FORMAT(?,'%H:%i:%s')=?, TIME_TO_SEC(?), TO_DAYS(?))"; |
3916 | - my $res; |
3917 | - eval { |
3918 | - PTDEBUG && _d($dbh, $sql, $val); |
3919 | - my $sth = $dbh->prepare($sql); |
3920 | - $sth->execute($val, $val, $val, $val); |
3921 | - ($res) = $sth->fetchrow_array(); |
3922 | - $sth->finish(); |
3923 | - }; |
3924 | - if ( $EVAL_ERROR ) { |
3925 | - PTDEBUG && _d($EVAL_ERROR); |
3926 | - } |
3927 | - return $res; |
3928 | -} |
3929 | - |
3930 | -sub get_nonzero_value { |
3931 | - my ( $self, %args ) = @_; |
3932 | - my @required_args = qw(dbh db_tbl col col_type); |
3933 | - foreach my $arg ( @required_args ) { |
3934 | - die "I need a $arg argument" unless $args{$arg}; |
3935 | - } |
3936 | - my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args}; |
3937 | - my $tries = defined $args{tries} ? $args{tries} : 5; |
3938 | - my $val = $args{val}; |
3939 | - |
3940 | - my $is_nonzero = $col_type =~ m/time|date/ ? \&_validate_temporal_value |
3941 | - : sub { return $_[1]; }; |
3942 | - |
3943 | - if ( !$is_nonzero->($dbh, $val) ) { # quasi-double-negative, sorry |
3944 | - PTDEBUG && _d('Discarding zero value:', $val); |
3945 | - my $sql = "SELECT $col FROM $db_tbl " |
3946 | - . ($args{index_hint} ? "$args{index_hint} " : "") |
3947 | - . "WHERE $col > ? AND $col IS NOT NULL " |
3948 | - . ($args{where} ? "AND ($args{where}) " : '') |
3949 | - . "ORDER BY $col LIMIT 1"; |
3950 | - PTDEBUG && _d($sql); |
3951 | - my $sth = $dbh->prepare($sql); |
3952 | - |
3953 | - my $last_val = $val; |
3954 | - while ( $tries-- ) { |
3955 | - $sth->execute($last_val); |
3956 | - my ($next_val) = $sth->fetchrow_array(); |
3957 | - if ( $is_nonzero->($dbh, $next_val) ) { |
3958 | - PTDEBUG && _d('First non-zero value:', $next_val); |
3959 | - $sth->finish(); |
3960 | - return $next_val; |
3961 | - } |
3962 | - $last_val = $next_val; |
3963 | - } |
3964 | - $sth->finish(); |
3965 | - $val = undef; # no non-zero value found |
3966 | - } |
3967 | - |
3968 | - return $val; |
3969 | -} |
3970 | - |
3971 | -sub base_count { |
3972 | - my ( $self, %args ) = @_; |
3973 | - my @required_args = qw(count_to base symbols); |
3974 | - foreach my $arg ( @required_args ) { |
3975 | - die "I need a $arg argument" unless defined $args{$arg}; |
3976 | - } |
3977 | - my ($n, $base, $symbols) = @args{@required_args}; |
3978 | - |
3979 | - return $symbols->[0] if $n == 0; |
3980 | - |
3981 | - my $highest_power = floor(log($n+0.00001)/log($base)); |
3982 | - if ( $highest_power == 0 ){ |
3983 | - return $symbols->[$n]; |
3984 | - } |
3985 | - |
3986 | - my @base_powers; |
3987 | - for my $power ( 0..$highest_power ) { |
3988 | - push @base_powers, ($base**$power) || 1; |
3989 | - } |
3990 | - |
3991 | - my @base_multiples; |
3992 | - foreach my $base_power ( reverse @base_powers ) { |
3993 | - my $multiples = floor(($n+0.00001) / $base_power); |
3994 | - push @base_multiples, $multiples; |
3995 | - $n -= $multiples * $base_power; |
3996 | - } |
3997 | - return join('', map { $symbols->[$_] } @base_multiples); |
3998 | -} |
3999 | - |
4000 | -sub _d { |
4001 | - my ($package, undef, $line) = caller 0; |
4002 | - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
4003 | - map { defined $_ ? $_ : 'undef' } |
4004 | - @_; |
4005 | - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
4006 | -} |
4007 | - |
4008 | -1; |
4009 | -} |
4010 | -# ########################################################################### |
4011 | -# End TableChunker package |
4012 | -# ########################################################################### |
4013 | - |
4014 | -# ########################################################################### |
4015 | -# TableNibbler package |
4016 | -# This package is a copy without comments from the original. The original |
4017 | -# with comments and its test file can be found in the Bazaar repository at, |
4018 | -# lib/TableNibbler.pm |
4019 | -# t/lib/TableNibbler.t |
4020 | -# See https://launchpad.net/percona-toolkit for more information. |
4021 | -# ########################################################################### |
4022 | -{ |
4023 | -package TableNibbler; |
4024 | - |
4025 | -use strict; |
4026 | -use warnings FATAL => 'all'; |
4027 | -use English qw(-no_match_vars); |
4028 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
4029 | - |
4030 | -sub new { |
4031 | - my ( $class, %args ) = @_; |
4032 | - my @required_args = qw(TableParser Quoter); |
4033 | - foreach my $arg ( @required_args ) { |
4034 | - die "I need a $arg argument" unless $args{$arg}; |
4035 | - } |
4036 | - my $self = { %args }; |
4037 | - return bless $self, $class; |
4038 | -} |
4039 | - |
4040 | -sub generate_asc_stmt { |
4041 | - my ( $self, %args ) = @_; |
4042 | - my @required_args = qw(tbl_struct index); |
4043 | - foreach my $arg ( @required_args ) { |
4044 | - die "I need a $arg argument" unless defined $args{$arg}; |
4045 | - } |
4046 | - my ($tbl_struct, $index) = @args{@required_args}; |
4047 | - my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}}; |
4048 | - my $q = $self->{Quoter}; |
4049 | - |
4050 | - die "Index '$index' does not exist in table" |
4051 | - unless exists $tbl_struct->{keys}->{$index}; |
4052 | - PTDEBUG && _d('Will ascend index', $index); |
4053 | - |
4054 | - my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}}; |
4055 | - if ( $args{asc_first} ) { |
4056 | - PTDEBUG && _d('Ascending only first column'); |
4057 | - @asc_cols = $asc_cols[0]; |
4058 | - } |
4059 | - elsif ( my $n = $args{n_index_cols} ) { |
4060 | - $n = scalar @asc_cols if $n > @asc_cols; |
4061 | - PTDEBUG && _d('Ascending only first', $n, 'columns'); |
4062 | - @asc_cols = @asc_cols[0..($n-1)]; |
4063 | - } |
4064 | - PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols)); |
4065 | - |
4066 | - my @asc_slice; |
4067 | - my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols }; |
4068 | - foreach my $col ( @asc_cols ) { |
4069 | - if ( !exists $col_posn{$col} ) { |
4070 | - push @cols, $col; |
4071 | - $col_posn{$col} = $#cols; |
4072 | - } |
4073 | - push @asc_slice, $col_posn{$col}; |
4074 | - } |
4075 | - PTDEBUG && _d('Will ascend, in ordinal position:', join(', ', @asc_slice)); |
4076 | - |
4077 | - my $asc_stmt = { |
4078 | - cols => \@cols, |
4079 | - index => $index, |
4080 | - where => '', |
4081 | - slice => [], |
4082 | - scols => [], |
4083 | - }; |
4084 | - |
4085 | - if ( @asc_slice ) { |
4086 | - my $cmp_where; |
4087 | - foreach my $cmp ( qw(< <= >= >) ) { |
4088 | - $cmp_where = $self->generate_cmp_where( |
4089 | - type => $cmp, |
4090 | - slice => \@asc_slice, |
4091 | - cols => \@cols, |
4092 | - quoter => $q, |
4093 | - is_nullable => $tbl_struct->{is_nullable}, |
4094 | - ); |
4095 | - $asc_stmt->{boundaries}->{$cmp} = $cmp_where->{where}; |
4096 | - } |
4097 | - my $cmp = $args{asc_only} ? '>' : '>='; |
4098 | - $asc_stmt->{where} = $asc_stmt->{boundaries}->{$cmp}; |
4099 | - $asc_stmt->{slice} = $cmp_where->{slice}; |
4100 | - $asc_stmt->{scols} = $cmp_where->{scols}; |
4101 | - } |
4102 | - |
4103 | - return $asc_stmt; |
4104 | -} |
4105 | - |
4106 | -sub generate_cmp_where { |
4107 | - my ( $self, %args ) = @_; |
4108 | - foreach my $arg ( qw(type slice cols is_nullable) ) { |
4109 | - die "I need a $arg arg" unless defined $args{$arg}; |
4110 | - } |
4111 | - my @slice = @{$args{slice}}; |
4112 | - my @cols = @{$args{cols}}; |
4113 | - my $is_nullable = $args{is_nullable}; |
4114 | - my $type = $args{type}; |
4115 | - my $q = $self->{Quoter}; |
4116 | - |
4117 | - (my $cmp = $type) =~ s/=//; |
4118 | - |
4119 | - my @r_slice; # Resulting slice columns, by ordinal |
4120 | - my @r_scols; # Ditto, by name |
4121 | - |
4122 | - my @clauses; |
4123 | - foreach my $i ( 0 .. $#slice ) { |
4124 | - my @clause; |
4125 | - |
4126 | - foreach my $j ( 0 .. $i - 1 ) { |
4127 | - my $ord = $slice[$j]; |
4128 | - my $col = $cols[$ord]; |
4129 | - my $quo = $q->quote($col); |
4130 | - if ( $is_nullable->{$col} ) { |
4131 | - push @clause, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))"; |
4132 | - push @r_slice, $ord, $ord; |
4133 | - push @r_scols, $col, $col; |
4134 | - } |
4135 | - else { |
4136 | - push @clause, "$quo = ?"; |
4137 | - push @r_slice, $ord; |
4138 | - push @r_scols, $col; |
4139 | - } |
4140 | - } |
4141 | - |
4142 | - my $ord = $slice[$i]; |
4143 | - my $col = $cols[$ord]; |
4144 | - my $quo = $q->quote($col); |
4145 | - my $end = $i == $#slice; # Last clause of the whole group. |
4146 | - if ( $is_nullable->{$col} ) { |
4147 | - if ( $type =~ m/=/ && $end ) { |
4148 | - push @clause, "(? IS NULL OR $quo $type ?)"; |
4149 | - } |
4150 | - elsif ( $type =~ m/>/ ) { |
4151 | - push @clause, "((? IS NULL AND $quo IS NOT NULL) OR ($quo $cmp ?))"; |
4152 | - } |
4153 | - else { # If $type =~ m/</ ) { |
4154 | - push @clause, "((? IS NOT NULL AND $quo IS NULL) OR ($quo $cmp ?))"; |
4155 | - } |
4156 | - push @r_slice, $ord, $ord; |
4157 | - push @r_scols, $col, $col; |
4158 | - } |
4159 | - else { |
4160 | - push @r_slice, $ord; |
4161 | - push @r_scols, $col; |
4162 | - push @clause, ($type =~ m/=/ && $end ? "$quo $type ?" : "$quo $cmp ?"); |
4163 | - } |
4164 | - |
4165 | - push @clauses, '(' . join(' AND ', @clause) . ')'; |
4166 | - } |
4167 | - my $result = '(' . join(' OR ', @clauses) . ')'; |
4168 | - my $where = { |
4169 | - slice => \@r_slice, |
4170 | - scols => \@r_scols, |
4171 | - where => $result, |
4172 | - }; |
4173 | - return $where; |
4174 | -} |
4175 | - |
4176 | -sub generate_del_stmt { |
4177 | - my ( $self, %args ) = @_; |
4178 | - |
4179 | - my $tbl = $args{tbl_struct}; |
4180 | - my @cols = $args{cols} ? @{$args{cols}} : (); |
4181 | - my $tp = $self->{TableParser}; |
4182 | - my $q = $self->{Quoter}; |
4183 | - |
4184 | - my @del_cols; |
4185 | - my @del_slice; |
4186 | - |
4187 | - my $index = $tp->find_best_index($tbl, $args{index}); |
4188 | - die "Cannot find an ascendable index in table" unless $index; |
4189 | - |
4190 | - if ( $index ) { |
4191 | - @del_cols = @{$tbl->{keys}->{$index}->{cols}}; |
4192 | - } |
4193 | - else { |
4194 | - @del_cols = @{$tbl->{cols}}; |
4195 | - } |
4196 | - PTDEBUG && _d('Columns needed for DELETE:', join(', ', @del_cols)); |
4197 | - |
4198 | - my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols }; |
4199 | - foreach my $col ( @del_cols ) { |
4200 | - if ( !exists $col_posn{$col} ) { |
4201 | - push @cols, $col; |
4202 | - $col_posn{$col} = $#cols; |
4203 | - } |
4204 | - push @del_slice, $col_posn{$col}; |
4205 | - } |
4206 | - PTDEBUG && _d('Ordinals needed for DELETE:', join(', ', @del_slice)); |
4207 | - |
4208 | - my $del_stmt = { |
4209 | - cols => \@cols, |
4210 | - index => $index, |
4211 | - where => '', |
4212 | - slice => [], |
4213 | - scols => [], |
4214 | - }; |
4215 | - |
4216 | - my @clauses; |
4217 | - foreach my $i ( 0 .. $#del_slice ) { |
4218 | - my $ord = $del_slice[$i]; |
4219 | - my $col = $cols[$ord]; |
4220 | - my $quo = $q->quote($col); |
4221 | - if ( $tbl->{is_nullable}->{$col} ) { |
4222 | - push @clauses, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))"; |
4223 | - push @{$del_stmt->{slice}}, $ord, $ord; |
4224 | - push @{$del_stmt->{scols}}, $col, $col; |
4225 | - } |
4226 | - else { |
4227 | - push @clauses, "$quo = ?"; |
4228 | - push @{$del_stmt->{slice}}, $ord; |
4229 | - push @{$del_stmt->{scols}}, $col; |
4230 | - } |
4231 | - } |
4232 | - |
4233 | - $del_stmt->{where} = '(' . join(' AND ', @clauses) . ')'; |
4234 | - |
4235 | - return $del_stmt; |
4236 | -} |
4237 | - |
4238 | -sub generate_ins_stmt { |
4239 | - my ( $self, %args ) = @_; |
4240 | - foreach my $arg ( qw(ins_tbl sel_cols) ) { |
4241 | - die "I need a $arg argument" unless $args{$arg}; |
4242 | - } |
4243 | - my $ins_tbl = $args{ins_tbl}; |
4244 | - my @sel_cols = @{$args{sel_cols}}; |
4245 | - |
4246 | - die "You didn't specify any SELECT columns" unless @sel_cols; |
4247 | - |
4248 | - my @ins_cols; |
4249 | - my @ins_slice; |
4250 | - for my $i ( 0..$#sel_cols ) { |
4251 | - next unless $ins_tbl->{is_col}->{$sel_cols[$i]}; |
4252 | - push @ins_cols, $sel_cols[$i]; |
4253 | - push @ins_slice, $i; |
4254 | - } |
4255 | - |
4256 | - return { |
4257 | - cols => \@ins_cols, |
4258 | - slice => \@ins_slice, |
4259 | - }; |
4260 | -} |
4261 | - |
4262 | -sub _d { |
4263 | - my ($package, undef, $line) = caller 0; |
4264 | - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
4265 | - map { defined $_ ? $_ : 'undef' } |
4266 | - @_; |
4267 | - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
4268 | -} |
4269 | - |
4270 | -1; |
4271 | -} |
4272 | -# ########################################################################### |
4273 | -# End TableNibbler package |
4274 | -# ########################################################################### |
4275 | - |
4276 | -# ########################################################################### |
4277 | -# TableChecksum package |
4278 | -# This package is a copy without comments from the original. The original |
4279 | -# with comments and its test file can be found in the Bazaar repository at, |
4280 | -# lib/TableChecksum.pm |
4281 | -# t/lib/TableChecksum.t |
4282 | -# See https://launchpad.net/percona-toolkit for more information. |
4283 | -# ########################################################################### |
4284 | -{ |
4285 | -package TableChecksum; |
4286 | - |
4287 | -use strict; |
4288 | -use warnings FATAL => 'all'; |
4289 | -use English qw(-no_match_vars); |
4290 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
4291 | - |
4292 | -use List::Util qw(max); |
4293 | - |
4294 | -our %ALGOS = ( |
4295 | - CHECKSUM => { pref => 0, hash => 0 }, |
4296 | - BIT_XOR => { pref => 2, hash => 1 }, |
4297 | - ACCUM => { pref => 3, hash => 1 }, |
4298 | -); |
4299 | - |
4300 | -sub new { |
4301 | - my ( $class, %args ) = @_; |
4302 | - foreach my $arg ( qw(Quoter) ) { |
4303 | - die "I need a $arg argument" unless defined $args{$arg}; |
4304 | - } |
4305 | - my $self = { %args }; |
4306 | - return bless $self, $class; |
4307 | -} |
4308 | - |
4309 | -sub crc32 { |
4310 | - my ( $self, $string ) = @_; |
4311 | - my $poly = 0xEDB88320; |
4312 | - my $crc = 0xFFFFFFFF; |
4313 | - foreach my $char ( split(//, $string) ) { |
4314 | - my $comp = ($crc ^ ord($char)) & 0xFF; |
4315 | - for ( 1 .. 8 ) { |
4316 | - $comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1; |
4317 | - } |
4318 | - $crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp; |
4319 | - } |
4320 | - return $crc ^ 0xFFFFFFFF; |
4321 | -} |
4322 | - |
4323 | -sub get_crc_wid { |
4324 | - my ( $self, $dbh, $func ) = @_; |
4325 | - my $crc_wid = 16; |
4326 | - if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) { |
4327 | - eval { |
4328 | - my ($val) = $dbh->selectrow_array("SELECT $func('a')"); |
4329 | - $crc_wid = max(16, length($val)); |
4330 | - }; |
4331 | - } |
4332 | - return $crc_wid; |
4333 | -} |
4334 | - |
4335 | -sub get_crc_type { |
4336 | - my ( $self, $dbh, $func ) = @_; |
4337 | - my $type = ''; |
4338 | - my $length = 0; |
4339 | - my $sql = "SELECT $func('a')"; |
4340 | - my $sth = $dbh->prepare($sql); |
4341 | - eval { |
4342 | - $sth->execute(); |
4343 | - $type = $sth->{mysql_type_name}->[0]; |
4344 | - $length = $sth->{mysql_length}->[0]; |
4345 | - PTDEBUG && _d($sql, $type, $length); |
4346 | - if ( $type eq 'bigint' && $length < 20 ) { |
4347 | - $type = 'int'; |
4348 | - } |
4349 | - }; |
4350 | - $sth->finish; |
4351 | - PTDEBUG && _d('crc_type:', $type, 'length:', $length); |
4352 | - return ($type, $length); |
4353 | -} |
4354 | - |
4355 | -sub best_algorithm { |
4356 | - my ( $self, %args ) = @_; |
4357 | - my ( $alg, $dbh ) = @args{ qw(algorithm dbh) }; |
4358 | - my @choices = sort { $ALGOS{$a}->{pref} <=> $ALGOS{$b}->{pref} } keys %ALGOS; |
4359 | - die "Invalid checksum algorithm $alg" |
4360 | - if $alg && !$ALGOS{$alg}; |
4361 | - |
4362 | - if ( |
4363 | - $args{where} || $args{chunk} # CHECKSUM does whole table |
4364 | - || $args{replicate}) # CHECKSUM can't do INSERT.. SELECT |
4365 | - { |
4366 | - PTDEBUG && _d('Cannot use CHECKSUM algorithm'); |
4367 | - @choices = grep { $_ ne 'CHECKSUM' } @choices; |
4368 | - } |
4369 | - |
4370 | - |
4371 | - if ( $alg && grep { $_ eq $alg } @choices ) { |
4372 | - PTDEBUG && _d('User requested', $alg, 'algorithm'); |
4373 | - return $alg; |
4374 | - } |
4375 | - |
4376 | - if ( $args{count} && grep { $_ ne 'CHECKSUM' } @choices ) { |
4377 | - PTDEBUG && _d('Not using CHECKSUM algorithm because COUNT desired'); |
4378 | - @choices = grep { $_ ne 'CHECKSUM' } @choices; |
4379 | - } |
4380 | - |
4381 | - PTDEBUG && _d('Algorithms, in order:', @choices); |
4382 | - return $choices[0]; |
4383 | -} |
4384 | - |
4385 | -sub is_hash_algorithm { |
4386 | - my ( $self, $algorithm ) = @_; |
4387 | - return $ALGOS{$algorithm} && $ALGOS{$algorithm}->{hash}; |
4388 | -} |
4389 | - |
4390 | -sub choose_hash_func { |
4391 | - my ( $self, %args ) = @_; |
4392 | - my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1); |
4393 | - if ( $args{function} ) { |
4394 | - unshift @funcs, $args{function}; |
4395 | - } |
4396 | - my ($result, $error); |
4397 | - do { |
4398 | - my $func; |
4399 | - eval { |
4400 | - $func = shift(@funcs); |
4401 | - my $sql = "SELECT $func('test-string')"; |
4402 | - PTDEBUG && _d($sql); |
4403 | - $args{dbh}->do($sql); |
4404 | - $result = $func; |
4405 | - }; |
4406 | - if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) { |
4407 | - $error .= qq{$func cannot be used because "$1"\n}; |
4408 | - PTDEBUG && _d($func, 'cannot be used because', $1); |
4409 | - } |
4410 | - } while ( @funcs && !$result ); |
4411 | - |
4412 | - die $error unless $result; |
4413 | - PTDEBUG && _d('Chosen hash func:', $result); |
4414 | - return $result; |
4415 | -} |
4416 | - |
4417 | -sub optimize_xor { |
4418 | - my ( $self, %args ) = @_; |
4419 | - my ($dbh, $func) = @args{qw(dbh function)}; |
4420 | - |
4421 | - die "$func never needs the BIT_XOR optimization" |
4422 | - if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i; |
4423 | - |
4424 | - my $opt_slice = 0; |
4425 | - my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0]; |
4426 | - my $sliced = ''; |
4427 | - my $start = 1; |
4428 | - my $crc_wid = length($unsliced) < 16 ? 16 : length($unsliced); |
4429 | - |
4430 | - do { # Try different positions till sliced result equals non-sliced. |
4431 | - PTDEBUG && _d('Trying slice', $opt_slice); |
4432 | - $dbh->do(q{SET @crc := '', @cnt := 0}); |
4433 | - my $slices = $self->make_xor_slices( |
4434 | - query => "\@crc := $func('a')", |
4435 | - crc_wid => $crc_wid, |
4436 | - opt_slice => $opt_slice, |
4437 | - ); |
4438 | - |
4439 | - my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x"; |
4440 | - $sliced = ($dbh->selectrow_array($sql))[0]; |
4441 | - if ( $sliced ne $unsliced ) { |
4442 | - PTDEBUG && _d('Slice', $opt_slice, 'does not work'); |
4443 | - $start += 16; |
4444 | - ++$opt_slice; |
4445 | - } |
4446 | - } while ( $start < $crc_wid && $sliced ne $unsliced ); |
4447 | - |
4448 | - if ( $sliced eq $unsliced ) { |
4449 | - PTDEBUG && _d('Slice', $opt_slice, 'works'); |
4450 | - return $opt_slice; |
4451 | - } |
4452 | - else { |
4453 | - PTDEBUG && _d('No slice works'); |
4454 | - return undef; |
4455 | - } |
4456 | -} |
4457 | - |
4458 | -sub make_xor_slices { |
4459 | - my ( $self, %args ) = @_; |
4460 | - foreach my $arg ( qw(query crc_wid) ) { |
4461 | - die "I need a $arg argument" unless defined $args{$arg}; |
4462 | - } |
4463 | - my ( $query, $crc_wid, $opt_slice ) = @args{qw(query crc_wid opt_slice)}; |
4464 | - |
4465 | - my @slices; |
4466 | - for ( my $start = 1; $start <= $crc_wid; $start += 16 ) { |
4467 | - my $len = $crc_wid - $start + 1; |
4468 | - if ( $len > 16 ) { |
4469 | - $len = 16; |
4470 | - } |
4471 | - push @slices, |
4472 | - "LPAD(CONV(BIT_XOR(" |
4473 | - . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))" |
4474 | - . ", 10, 16), $len, '0')"; |
4475 | - } |
4476 | - |
4477 | - if ( defined $opt_slice && $opt_slice < @slices ) { |
4478 | - $slices[$opt_slice] =~ s/\@crc/\@crc := $query/; |
4479 | - } |
4480 | - else { |
4481 | - map { s/\@crc/$query/ } @slices; |
4482 | - } |
4483 | - |
4484 | - return join(', ', @slices); |
4485 | -} |
4486 | - |
4487 | -sub make_row_checksum { |
4488 | - my ( $self, %args ) = @_; |
4489 | - my ( $tbl_struct, $func ) = @args{ qw(tbl_struct function) }; |
4490 | - my $q = $self->{Quoter}; |
4491 | - |
4492 | - my $sep = $args{sep} || '#'; |
4493 | - $sep =~ s/'//g; |
4494 | - $sep ||= '#'; |
4495 | - |
4496 | - my $ignorecols = $args{ignorecols} || {}; |
4497 | - |
4498 | - my %cols = map { lc($_) => 1 } |
4499 | - grep { !exists $ignorecols->{$_} } |
4500 | - ($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}}); |
4501 | - my %seen; |
4502 | - my @cols = |
4503 | - map { |
4504 | - my $type = $tbl_struct->{type_for}->{$_}; |
4505 | - my $result = $q->quote($_); |
4506 | - if ( $type eq 'timestamp' ) { |
4507 | - $result .= ' + 0'; |
4508 | - } |
4509 | - elsif ( $args{float_precision} && $type =~ m/float|double/ ) { |
4510 | - $result = "ROUND($result, $args{float_precision})"; |
4511 | - } |
4512 | - elsif ( $args{trim} && $type =~ m/varchar/ ) { |
4513 | - $result = "TRIM($result)"; |
4514 | - } |
4515 | - $result; |
4516 | - } |
4517 | - grep { |
4518 | - $cols{$_} && !$seen{$_}++ |
4519 | - } |
4520 | - @{$tbl_struct->{cols}}; |
4521 | - |
4522 | - my $query; |
4523 | - if ( !$args{no_cols} ) { |
4524 | - $query = join(', ', |
4525 | - map { |
4526 | - my $col = $_; |
4527 | - if ( $col =~ m/\+ 0/ ) { |
4528 | - my ($real_col) = /^(\S+)/; |
4529 | - $col .= " AS $real_col"; |
4530 | - } |
4531 | - elsif ( $col =~ m/TRIM/ ) { |
4532 | - my ($real_col) = m/TRIM\(([^\)]+)\)/; |
4533 | - $col .= " AS $real_col"; |
4534 | - } |
4535 | - $col; |
4536 | - } @cols) |
4537 | - . ', '; |
4538 | - } |
4539 | - |
4540 | - if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) { |
4541 | - my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}}; |
4542 | - if ( @nulls ) { |
4543 | - my $bitmap = "CONCAT(" |
4544 | - . join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls) |
4545 | - . ")"; |
4546 | - push @cols, $bitmap; |
4547 | - } |
4548 | - |
4549 | - $query .= @cols > 1 |
4550 | - ? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))' |
4551 | - : "$func($cols[0])"; |
4552 | - } |
4553 | - else { |
4554 | - my $fnv_func = uc $func; |
4555 | - $query .= "$fnv_func(" . join(', ', @cols) . ')'; |
4556 | - } |
4557 | - |
4558 | - return $query; |
4559 | -} |
4560 | - |
4561 | -sub make_checksum_query { |
4562 | - my ( $self, %args ) = @_; |
4563 | - my @required_args = qw(db tbl tbl_struct algorithm crc_wid crc_type); |
4564 | - foreach my $arg( @required_args ) { |
4565 | - die "I need a $arg argument" unless $args{$arg}; |
4566 | - } |
4567 | - my ( $db, $tbl, $tbl_struct, $algorithm, |
4568 | - $crc_wid, $crc_type) = @args{@required_args}; |
4569 | - my $func = $args{function}; |
4570 | - my $q = $self->{Quoter}; |
4571 | - my $result; |
4572 | - |
4573 | - die "Invalid or missing checksum algorithm" |
4574 | - unless $algorithm && $ALGOS{$algorithm}; |
4575 | - |
4576 | - if ( $algorithm eq 'CHECKSUM' ) { |
4577 | - return "CHECKSUM TABLE " . $q->quote($db, $tbl); |
4578 | - } |
4579 | - |
4580 | - my $expr = $self->make_row_checksum(%args, no_cols=>1); |
4581 | - |
4582 | - if ( $algorithm eq 'BIT_XOR' ) { |
4583 | - if ( $crc_type =~ m/int$/ ) { |
4584 | - $result = "COALESCE(LOWER(CONV(BIT_XOR(CAST($expr AS UNSIGNED)), 10, 16)), 0) AS crc "; |
4585 | - } |
4586 | - else { |
4587 | - my $slices = $self->make_xor_slices( query => $expr, %args ); |
4588 | - $result = "COALESCE(LOWER(CONCAT($slices)), 0) AS crc "; |
4589 | - } |
4590 | - } |
4591 | - else { |
4592 | - if ( $crc_type =~ m/int$/ ) { |
4593 | - $result = "COALESCE(RIGHT(MAX(" |
4594 | - . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), " |
4595 | - . "CONV(CAST($func(CONCAT(\@crc, $expr)) AS UNSIGNED), 10, 16))" |
4596 | - . "), $crc_wid), 0) AS crc "; |
4597 | - } |
4598 | - else { |
4599 | - $result = "COALESCE(RIGHT(MAX(" |
4600 | - . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), " |
4601 | - . "$func(CONCAT(\@crc, $expr)))" |
4602 | - . "), $crc_wid), 0) AS crc "; |
4603 | - } |
4604 | - } |
4605 | - if ( $args{replicate} ) { |
4606 | - $result = "REPLACE /*PROGRESS_COMMENT*/ INTO $args{replicate} " |
4607 | - . "(db, tbl, chunk, boundaries, this_cnt, this_crc) " |
4608 | - . "SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, $result"; |
4609 | - } |
4610 | - else { |
4611 | - $result = "SELECT " |
4612 | - . ($args{buffer} ? 'SQL_BUFFER_RESULT ' : '') |
4613 | - . "/*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, $result"; |
4614 | - } |
4615 | - return $result . "FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/"; |
4616 | -} |
4617 | - |
4618 | -sub find_replication_differences { |
4619 | - my ( $self, $dbh, $table ) = @_; |
4620 | - |
4621 | - my $sql |
4622 | - = "SELECT db, tbl, CONCAT(db, '.', tbl) AS `table`, " |
4623 | - . "chunk, chunk_index, lower_boundary, upper_boundary, " |
4624 | - . "COALESCE(this_cnt-master_cnt, 0) AS cnt_diff, " |
4625 | - . "COALESCE(" |
4626 | - . "this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc), 0" |
4627 | - . ") AS crc_diff, this_cnt, master_cnt, this_crc, master_crc " |
4628 | - . "FROM $table " |
4629 | - . "WHERE master_cnt <> this_cnt OR master_crc <> this_crc " |
4630 | - . "OR ISNULL(master_crc) <> ISNULL(this_crc)"; |
4631 | - PTDEBUG && _d($sql); |
4632 | - my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} }); |
4633 | - return $diffs; |
4634 | -} |
4635 | - |
4636 | -sub _d { |
4637 | - my ($package, undef, $line) = caller 0; |
4638 | - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
4639 | - map { defined $_ ? $_ : 'undef' } |
4640 | - @_; |
4641 | - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
4642 | -} |
4643 | - |
4644 | -1; |
4645 | -} |
4646 | -# ########################################################################### |
4647 | -# End TableChecksum package |
4648 | -# ########################################################################### |
4649 | - |
4650 | -# ########################################################################### |
4651 | -# TableSyncer package |
4652 | -# This package is a copy without comments from the original. The original |
4653 | -# with comments and its test file can be found in the Bazaar repository at, |
4654 | -# lib/TableSyncer.pm |
4655 | -# t/lib/TableSyncer.t |
4656 | -# See https://launchpad.net/percona-toolkit for more information. |
4657 | -# ########################################################################### |
4658 | -{ |
4659 | -package TableSyncer; |
4660 | - |
4661 | -use strict; |
4662 | -use warnings FATAL => 'all'; |
4663 | -use English qw(-no_match_vars); |
4664 | -use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
4665 | - |
4666 | -use Data::Dumper; |
4667 | -$Data::Dumper::Indent = 1; |
4668 | -$Data::Dumper::Sortkeys = 1; |
4669 | -$Data::Dumper::Quotekeys = 0; |
4670 | - |
4671 | -sub new { |
4672 | - my ( $class, %args ) = @_; |
4673 | - my @required_args = qw(MasterSlave Quoter TableChecksum Retry); |
4674 | - foreach my $arg ( @required_args ) { |
4675 | - die "I need a $arg argument" unless defined $args{$arg}; |
4676 | - } |
4677 | - my $self = { %args }; |
4678 | - return bless $self, $class; |
4679 | -} |
4680 | - |
4681 | -sub get_best_plugin { |
4682 | - my ( $self, %args ) = @_; |
4683 | - foreach my $arg ( qw(plugins tbl_struct) ) { |
4684 | - die "I need a $arg argument" unless $args{$arg}; |
4685 | - } |
4686 | - PTDEBUG && _d('Getting best plugin'); |
4687 | - foreach my $plugin ( @{$args{plugins}} ) { |
4688 | - PTDEBUG && _d('Trying plugin', $plugin->name); |
4689 | - my ($can_sync, %plugin_args) = $plugin->can_sync(%args); |
4690 | - if ( $can_sync ) { |
4691 | - PTDEBUG && _d('Can sync with', $plugin->name, Dumper(\%plugin_args)); |
4692 | - return $plugin, %plugin_args; |
4693 | - } |
4694 | - } |
4695 | - PTDEBUG && _d('No plugin can sync the table'); |
4696 | - return; |
4697 | -} |
4698 | - |
4699 | -sub sync_table { |
4700 | - my ( $self, %args ) = @_; |
4701 | - my @required_args = qw(plugins src dst tbl_struct cols chunk_size |
4702 | - RowDiff ChangeHandler); |
4703 | - foreach my $arg ( @required_args ) { |
4704 | - die "I need a $arg argument" unless $args{$arg}; |
4705 | - } |
4706 | - PTDEBUG && _d('Syncing table with args:', |
4707 | - map { "$_: " . Dumper($args{$_}) } |
4708 | - qw(plugins src dst tbl_struct cols chunk_size)); |
4709 | - |
4710 | - my ($plugins, $src, $dst, $tbl_struct, $cols, $chunk_size, $rd, $ch) |
4711 | - = @args{@required_args}; |
4712 | - my $dp = $self->{DSNParser}; |
4713 | - $args{trace} = 1 unless defined $args{trace}; |
4714 | - |
4715 | - if ( $args{bidirectional} && $args{ChangeHandler}->{queue} ) { |
4716 | - die "Queueing does not work with bidirectional syncing"; |
4717 | - } |
4718 | - |
4719 | - $args{index_hint} = 1 unless defined $args{index_hint}; |
4720 | - $args{lock} ||= 0; |
4721 | - $args{wait} ||= 0; |
4722 | - $args{transaction} ||= 0; |
4723 | - $args{timeout_ok} ||= 0; |
4724 | - |
4725 | - my $q = $self->{Quoter}; |
4726 | - |
4727 | - my ($plugin, %plugin_args) = $self->get_best_plugin(%args); |
4728 | - die "No plugin can sync $src->{db}.$src->{tbl}" unless $plugin; |
4729 | - |
4730 | - my $crc_col = '__crc'; |
4731 | - while ( $tbl_struct->{is_col}->{$crc_col} ) { |
4732 | - $crc_col = "_$crc_col"; # Prepend more _ until not a column. |
4733 | - } |
4734 | - PTDEBUG && _d('CRC column:', $crc_col); |
4735 | - |
4736 | - my $index_hint; |
4737 | - if ( $args{chunk_index} ) { |
4738 | - PTDEBUG && _d('Using given chunk index for index hint'); |
4739 | - $index_hint = "FORCE INDEX (" . $q->quote($args{chunk_index}) . ")"; |
4740 | - } |
4741 | - elsif ( $plugin_args{chunk_index} && $args{index_hint} ) { |
4742 | - PTDEBUG && _d('Using chunk index chosen by plugin for index hint'); |
4743 | - $index_hint = "FORCE INDEX (" . $q->quote($plugin_args{chunk_index}) . ")"; |
4744 | - } |
4745 | - PTDEBUG && _d('Index hint:', $index_hint); |
4746 | - |
4747 | - eval { |
4748 | - $plugin->prepare_to_sync( |
4749 | - %args, |
4750 | - %plugin_args, |
4751 | - dbh => $src->{dbh}, |
4752 | - db => $src->{db}, |
4753 | - tbl => $src->{tbl}, |
4754 | - crc_col => $crc_col, |
4755 | - index_hint => $index_hint, |
4756 | - ); |
4757 | - }; |
4758 | - if ( $EVAL_ERROR ) { |
4759 | - die 'Failed to prepare TableSync', $plugin->name, ' plugin: ', |
4760 | - $EVAL_ERROR; |
4761 | - } |
4762 | - |
4763 | - if ( $plugin->uses_checksum() ) { |
4764 | - eval { |
4765 | - my ($chunk_sql, $row_sql) = $self->make_checksum_queries(%args); |
4766 | - $plugin->set_checksum_queries($chunk_sql, $row_sql); |
4767 | - }; |
4768 | - if ( $EVAL_ERROR ) { |
4769 | - die "Failed to make checksum queries: $EVAL_ERROR"; |
4770 | - } |
4771 | - } |
4772 | - |
4773 | - if ( $args{dry_run} ) { |
4774 | - return $ch->get_changes(), ALGORITHM => $plugin->name; |
4775 | - } |
4776 | - |
4777 | - |
4778 | - eval { |
4779 | - $src->{dbh}->do("USE `$src->{db}`"); |
4780 | - $dst->{dbh}->do("USE `$dst->{db}`"); |
4781 | - }; |
4782 | - if ( $EVAL_ERROR ) { |
4783 | - die "Failed to USE database on source or destination: $EVAL_ERROR"; |
4784 | - } |
4785 | - |
4786 | - PTDEBUG && _d('left dbh', $src->{dbh}); |
4787 | - PTDEBUG && _d('right dbh', $dst->{dbh}); |
4788 | - |
4789 | - chomp(my $hostname = `hostname`); |
4790 | - my $trace_msg |
4791 | - = $args{trace} ? "src_db:$src->{db} src_tbl:$src->{tbl} " |
4792 | - . ($dp && $src->{dsn} ? "src_dsn:".$dp->as_string($src->{dsn}) : "") |
4793 | - . " dst_db:$dst->{db} dst_tbl:$dst->{tbl} " |
4794 | - . ($dp && $dst->{dsn} ? "dst_dsn:".$dp->as_string($dst->{dsn}) : "") |
4795 | - . " " . join(" ", map { "$_:" . ($args{$_} || 0) } |
4796 | - qw(lock transaction changing_src replicate bidirectional)) |
4797 | - . " pid:$PID " |
4798 | - . ($ENV{USER} ? "user:$ENV{USER} " : "") |
4799 | - . ($hostname ? "host:$hostname" : "") |
4800 | - : ""; |
4801 | - PTDEBUG && _d("Binlog trace message:", $trace_msg); |
4802 | - |
4803 | - $self->lock_and_wait(%args, lock_level => 2); # per-table lock |
4804 | - |
4805 | - my $callback = $args{callback}; |
4806 | - my $cycle = 0; |
4807 | - while ( !$plugin->done() ) { |
4808 | - |
4809 | - PTDEBUG && _d('Beginning sync cycle', $cycle); |
4810 | - my $src_sql = $plugin->get_sql( |
4811 | - database => $src->{db}, |
4812 | - table => $src->{tbl}, |
4813 | - where => $args{where}, |
4814 | - ); |
4815 | - my $dst_sql = $plugin->get_sql( |
4816 | - database => $dst->{db}, |
4817 | - table => $dst->{tbl}, |
4818 | - where => $args{where}, |
4819 | - ); |
4820 | - |
4821 | - if ( $args{transaction} ) { |
4822 | - if ( $args{bidirectional} ) { |
4823 | - $src_sql .= ' FOR UPDATE'; |
4824 | - $dst_sql .= ' FOR UPDATE'; |
4825 | - } |
4826 | - elsif ( $args{changing_src} ) { |
4827 | - $src_sql .= ' FOR UPDATE'; |
4828 | - $dst_sql .= ' LOCK IN SHARE MODE'; |
4829 | - } |
4830 | - else { |
4831 | - $src_sql .= ' LOCK IN SHARE MODE'; |
4832 | - $dst_sql .= ' FOR UPDATE'; |
4833 | - } |
4834 | - } |
4835 | - PTDEBUG && _d('src:', $src_sql); |
4836 | - PTDEBUG && _d('dst:', $dst_sql); |
4837 | - |
4838 | - $callback->($src_sql, $dst_sql) if $callback; |
4839 | - |
4840 | - $plugin->prepare_sync_cycle($src); |
4841 | - $plugin->prepare_sync_cycle($dst); |
4842 | - |
4843 | - my $src_sth = $src->{dbh}->prepare($src_sql); |
4844 | - my $dst_sth = $dst->{dbh}->prepare($dst_sql); |
4845 | - if ( $args{buffer_to_client} ) { |
4846 | - $src_sth->{mysql_use_result} = 1; |
4847 | - $dst_sth->{mysql_use_result} = 1; |
4848 | - } |
4849 | - |
4850 | - my $executed_src = 0; |
4851 | - if ( !$cycle || !$plugin->pending_changes() ) { |
4852 | - $executed_src |
4853 | - = $self->lock_and_wait(%args, src_sth => $src_sth, lock_level => 1); |
4854 | - } |
4855 | - |
4856 | - $src_sth->execute() unless $executed_src; |
4857 | - $dst_sth->execute(); |
4858 | - |
4859 | - $rd->compare_sets( |
4860 | - left_sth => $src_sth, |
4861 | - right_sth => $dst_sth, |
4862 | - left_dbh => $src->{dbh}, |
4863 | - right_dbh => $dst->{dbh}, |
4864 | - syncer => $plugin, |
4865 | - tbl_struct => $tbl_struct, |
4866 | - ); |
4867 | - $ch->process_rows(1, $trace_msg); |
4868 | - |
4869 | - PTDEBUG && _d('Finished sync cycle', $cycle); |
4870 | - $cycle++; |
4871 | - } |
4872 | - |
4873 | - $ch->process_rows(0, $trace_msg); |
4874 | - |
4875 | - $self->unlock(%args, lock_level => 2); |
4876 | - |
4877 | - return $ch->get_changes(), ALGORITHM => $plugin->name; |
4878 | -} |
4879 | - |
4880 | -sub make_checksum_queries { |
4881 | - my ( $self, %args ) = @_; |
4882 | - my @required_args = qw(src dst tbl_struct); |
4883 | - foreach my $arg ( @required_args ) { |
4884 | - die "I need a $arg argument" unless $args{$arg}; |
4885 | - } |
4886 | - my ($src, $dst, $tbl_struct) = @args{@required_args}; |
4887 | - my $checksum = $self->{TableChecksum}; |
4888 | - |
4889 | - my $src_algo = $checksum->best_algorithm( |
4890 | - algorithm => 'BIT_XOR', |
4891 | - dbh => $src->{dbh}, |
4892 | - where => 1, |
4893 | - chunk => 1, |
4894 | - count => 1, |
4895 | - ); |
4896 | - my $dst_algo = $checksum->best_algorithm( |
4897 | - algorithm => 'BIT_XOR', |
4898 | - dbh => $dst->{dbh}, |
4899 | - where => 1, |
4900 | - chunk => 1, |
4901 | - count => 1, |
4902 | - ); |
4903 | - if ( $src_algo ne $dst_algo ) { |
4904 | - die "Source and destination checksum algorithms are different: ", |
4905 | - "$src_algo on source, $dst_algo on destination" |
4906 | - } |
4907 | - PTDEBUG && _d('Chosen algo:', $src_algo); |
4908 | - |
4909 | - my $src_func = $checksum->choose_hash_func(dbh => $src->{dbh}, %args); |
4910 | - my $dst_func = $checksum->choose_hash_func(dbh => $dst->{dbh}, %args); |
4911 | - if ( $src_func ne $dst_func ) { |
4912 | - die "Source and destination hash functions are different: ", |
4913 | - "$src_func on source, $dst_func on destination"; |
4914 | - } |
4915 | - PTDEBUG && _d('Chosen hash func:', $src_func); |
4916 | - |
4917 | - |
4918 | - my $crc_wid = $checksum->get_crc_wid($src->{dbh}, $src_func); |
4919 | - my ($crc_type) = $checksum->get_crc_type($src->{dbh}, $src_func); |
4920 | - my $opt_slice; |
4921 | - if ( $src_algo eq 'BIT_XOR' && $crc_type !~ m/int$/ ) { |
4922 | - $opt_slice = $checksum->optimize_xor( |
4923 | - dbh => $src->{dbh}, |
4924 | - function => $src_func |
4925 | - ); |
4926 | - } |
4927 | - |
4928 | - my $chunk_sql = $checksum->make_checksum_query( |
4929 | - %args, |
4930 | - db => $src->{db}, |
4931 | - tbl => $src->{tbl}, |
4932 | - algorithm => $src_algo, |
4933 | - function => $src_func, |
4934 | - crc_wid => $crc_wid, |
4935 | - crc_type => $crc_type, |
4936 | - opt_slice => $opt_slice, |
4937 | - replicate => undef, # replicate means something different to this sub |
4938 | - ); # than what we use it for; do not pass it! |
4939 | - PTDEBUG && _d('Chunk sql:', $chunk_sql); |
4940 | - my $row_sql = $checksum->make_row_checksum( |
4941 | - %args, |
4942 | - function => $src_func, |
4943 | - ); |
4944 | - PTDEBUG && _d('Row sql:', $row_sql); |
4945 | - return $chunk_sql, $row_sql; |
4946 | -} |
4947 | - |
4948 | -sub lock_table { |
4949 | - my ( $self, $dbh, $where, $db_tbl, $mode ) = @_; |
4950 | - my $query = "LOCK TABLES $db_tbl $mode"; |
4951 | - PTDEBUG && _d($query); |
4952 | - $dbh->do($query); |
4953 | - PTDEBUG && _d('Acquired table lock on', $where, 'in', $mode, 'mode'); |
4954 | -} |
4955 | - |
4956 | -sub unlock { |
4957 | - my ( $self, %args ) = @_; |
4958 | - |
4959 | - foreach my $arg ( qw(src dst lock transaction lock_level) ) { |
4960 | - die "I need a $arg argument" unless defined $args{$arg}; |
4961 | - } |
4962 | - my $src = $args{src}; |
4963 | - my $dst = $args{dst}; |
4964 | - |
4965 | - return unless $args{lock} && $args{lock} <= $args{lock_level}; |
4966 | - |
4967 | - foreach my $dbh ( $src->{dbh}, $dst->{dbh} ) { |
4968 | - if ( $args{transaction} ) { |
4969 | - PTDEBUG && _d('Committing', $dbh); |
4970 | - $dbh->commit(); |
4971 | - } |
4972 | - else { |
4973 | - my $sql = 'UNLOCK TABLES'; |
4974 | - PTDEBUG && _d($dbh, $sql); |
4975 | - $dbh->do($sql); |
4976 | - } |
4977 | - } |
4978 | - |
4979 | - return; |
4980 | -} |
4981 | - |
4982 | -sub lock_and_wait { |
4983 | - my ( $self, %args ) = @_; |
4984 | - my $result = 0; |
4985 | - |
4986 | - foreach my $arg ( qw(src dst lock lock_level) ) { |
4987 | - die "I need a $arg argument" unless defined $args{$arg}; |
4988 | - } |
4989 | - my $src = $args{src}; |
4990 | - my $dst = $args{dst}; |
4991 | - |
4992 | - return unless $args{lock} && $args{lock} == $args{lock_level}; |
4993 | - PTDEBUG && _d('lock and wait, lock level', $args{lock}); |
4994 | - |
4995 | - foreach my $dbh ( $src->{dbh}, $dst->{dbh} ) { |
4996 | - if ( $args{transaction} ) { |
4997 | - PTDEBUG && _d('Committing', $dbh); |
4998 | - $dbh->commit(); |
4999 | - } |
5000 | - else { |
The diff has been truncated for viewing.
All passing: https:/ /refute. testnoir. com/percona- toolkit/ jobs/percona- toolkit- full-branch- test/28/ results