Merge lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint into lp:percona-toolkit/2.1
- pt-fingerprint
- Merge into 2.1
Proposed by
Daniel Nichter
Status: | Merged |
---|---|
Merged at revision: | 224 |
Proposed branch: | lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint |
Merge into: | lp:percona-toolkit/2.1 |
Diff against target: |
2398 lines (+2333/-5) 8 files modified
bin/pt-fingerprint (+2143/-0) lib/QueryRewriter.pm (+24/-4) t/lib/QueryRewriter.t (+59/-1) t/pt-fingerprint/basics.t (+101/-0) t/pt-fingerprint/samples/query001 (+2/-0) t/pt-fingerprint/samples/query001.fingerprint (+1/-0) t/pt-fingerprint/samples/query002 (+2/-0) t/pt-fingerprint/samples/query002.fingerprint (+1/-0) |
To merge this branch: | bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint |
Related bugs: | |
Related blueprints: |
Add pt-fingerprint
(Medium)
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Daniel Nichter | Approve | ||
Review via email: mp+100250@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === added file 'bin/pt-fingerprint' |
2 | --- bin/pt-fingerprint 1970-01-01 00:00:00 +0000 |
3 | +++ bin/pt-fingerprint 2012-03-30 22:06:22 +0000 |
4 | @@ -0,0 +1,2143 @@ |
5 | +#!/usr/bin/env perl |
6 | + |
7 | +# This program is part of Percona Toolkit: http://www.percona.com/software/ |
8 | +# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal |
9 | +# notices and disclaimers. |
10 | + |
11 | +use strict; |
12 | +use warnings FATAL => 'all'; |
13 | +use constant MKDEBUG => $ENV{MKDEBUG} || 0; |
14 | + |
15 | +# ########################################################################### |
16 | +# OptionParser package |
17 | +# This package is a copy without comments from the original. The original |
18 | +# with comments and its test file can be found in the Bazaar repository at, |
19 | +# lib/OptionParser.pm |
20 | +# t/lib/OptionParser.t |
21 | +# See https://launchpad.net/percona-toolkit for more information. |
22 | +# ########################################################################### |
23 | +{ |
24 | +package OptionParser; |
25 | + |
26 | +use strict; |
27 | +use warnings FATAL => 'all'; |
28 | +use English qw(-no_match_vars); |
29 | +use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
30 | + |
31 | +use List::Util qw(max); |
32 | +use Getopt::Long; |
33 | + |
34 | +my $POD_link_re = '[LC]<"?([^">]+)"?>'; |
35 | + |
36 | +sub new { |
37 | + my ( $class, %args ) = @_; |
38 | + my @required_args = qw(); |
39 | + foreach my $arg ( @required_args ) { |
40 | + die "I need a $arg argument" unless $args{$arg}; |
41 | + } |
42 | + |
43 | + my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/; |
44 | + $program_name ||= $PROGRAM_NAME; |
45 | + my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.'; |
46 | + |
47 | + my %attributes = ( |
48 | + 'type' => 1, |
49 | + 'short form' => 1, |
50 | + 'group' => 1, |
51 | + 'default' => 1, |
52 | + 'cumulative' => 1, |
53 | + 'negatable' => 1, |
54 | + ); |
55 | + |
56 | + my $self = { |
57 | + head1 => 'OPTIONS', # These args are used internally |
58 | + skip_rules => 0, # to instantiate another Option- |
59 | + item => '--(.*)', # Parser obj that parses the |
60 | + attributes => \%attributes, # DSN OPTIONS section. Tools |
61 | + parse_attributes => \&_parse_attribs, # don't tinker with these args. |
62 | + |
63 | + %args, |
64 | + |
65 | + strict => 1, # disabled by a special rule |
66 | + program_name => $program_name, |
67 | + opts => {}, |
68 | + got_opts => 0, |
69 | + short_opts => {}, |
70 | + defaults => {}, |
71 | + groups => {}, |
72 | + allowed_groups => {}, |
73 | + errors => [], |
74 | + rules => [], # desc of rules for --help |
75 | + mutex => [], # rule: opts are mutually exclusive |
76 | + atleast1 => [], # rule: at least one opt is required |
77 | + disables => {}, # rule: opt disables other opts |
78 | + defaults_to => {}, # rule: opt defaults to value of other opt |
79 | + DSNParser => undef, |
80 | + default_files => [ |
81 | + "/etc/percona-toolkit/percona-toolkit.conf", |
82 | + "/etc/percona-toolkit/$program_name.conf", |
83 | + "$home/.percona-toolkit.conf", |
84 | + "$home/.$program_name.conf", |
85 | + ], |
86 | + types => { |
87 | + string => 's', # standard Getopt type |
88 | + int => 'i', # standard Getopt type |
89 | + float => 'f', # standard Getopt type |
90 | + Hash => 'H', # hash, formed from a comma-separated list |
91 | + hash => 'h', # hash as above, but only if a value is given |
92 | + Array => 'A', # array, similar to Hash |
93 | + array => 'a', # array, similar to hash |
94 | + DSN => 'd', # DSN |
95 | + size => 'z', # size with kMG suffix (powers of 2^10) |
96 | + time => 'm', # time, with an optional suffix of s/h/m/d |
97 | + }, |
98 | + }; |
99 | + |
100 | + return bless $self, $class; |
101 | +} |
102 | + |
103 | +sub get_specs { |
104 | + my ( $self, $file ) = @_; |
105 | + $file ||= $self->{file} || __FILE__; |
106 | + my @specs = $self->_pod_to_specs($file); |
107 | + $self->_parse_specs(@specs); |
108 | + |
109 | + open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR"; |
110 | + my $contents = do { local $/ = undef; <$fh> }; |
111 | + close $fh; |
112 | + if ( $contents =~ m/^=head1 DSN OPTIONS/m ) { |
113 | + PTDEBUG && _d('Parsing DSN OPTIONS'); |
114 | + my $dsn_attribs = { |
115 | + dsn => 1, |
116 | + copy => 1, |
117 | + }; |
118 | + my $parse_dsn_attribs = sub { |
119 | + my ( $self, $option, $attribs ) = @_; |
120 | + map { |
121 | + my $val = $attribs->{$_}; |
122 | + if ( $val ) { |
123 | + $val = $val eq 'yes' ? 1 |
124 | + : $val eq 'no' ? 0 |
125 | + : $val; |
126 | + $attribs->{$_} = $val; |
127 | + } |
128 | + } keys %$attribs; |
129 | + return { |
130 | + key => $option, |
131 | + %$attribs, |
132 | + }; |
133 | + }; |
134 | + my $dsn_o = new OptionParser( |
135 | + description => 'DSN OPTIONS', |
136 | + head1 => 'DSN OPTIONS', |
137 | + dsn => 0, # XXX don't infinitely recurse! |
138 | + item => '\* (.)', # key opts are a single character |
139 | + skip_rules => 1, # no rules before opts |
140 | + attributes => $dsn_attribs, |
141 | + parse_attributes => $parse_dsn_attribs, |
142 | + ); |
143 | + my @dsn_opts = map { |
144 | + my $opts = { |
145 | + key => $_->{spec}->{key}, |
146 | + dsn => $_->{spec}->{dsn}, |
147 | + copy => $_->{spec}->{copy}, |
148 | + desc => $_->{desc}, |
149 | + }; |
150 | + $opts; |
151 | + } $dsn_o->_pod_to_specs($file); |
152 | + $self->{DSNParser} = DSNParser->new(opts => \@dsn_opts); |
153 | + } |
154 | + |
155 | + if ( $contents =~ m/^=head1 VERSION\n\n^(.+)$/m ) { |
156 | + $self->{version} = $1; |
157 | + PTDEBUG && _d($self->{version}); |
158 | + } |
159 | + |
160 | + return; |
161 | +} |
162 | + |
163 | +sub DSNParser { |
164 | + my ( $self ) = @_; |
165 | + return $self->{DSNParser}; |
166 | +}; |
167 | + |
168 | +sub get_defaults_files { |
169 | + my ( $self ) = @_; |
170 | + return @{$self->{default_files}}; |
171 | +} |
172 | + |
173 | +sub _pod_to_specs { |
174 | + my ( $self, $file ) = @_; |
175 | + $file ||= $self->{file} || __FILE__; |
176 | + open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR"; |
177 | + |
178 | + my @specs = (); |
179 | + my @rules = (); |
180 | + my $para; |
181 | + |
182 | + local $INPUT_RECORD_SEPARATOR = ''; |
183 | + while ( $para = <$fh> ) { |
184 | + next unless $para =~ m/^=head1 $self->{head1}/; |
185 | + last; |
186 | + } |
187 | + |
188 | + while ( $para = <$fh> ) { |
189 | + last if $para =~ m/^=over/; |
190 | + next if $self->{skip_rules}; |
191 | + chomp $para; |
192 | + $para =~ s/\s+/ /g; |
193 | + $para =~ s/$POD_link_re/$1/go; |
194 | + PTDEBUG && _d('Option rule:', $para); |
195 | + push @rules, $para; |
196 | + } |
197 | + |
198 | + die "POD has no $self->{head1} section" unless $para; |
199 | + |
200 | + do { |
201 | + if ( my ($option) = $para =~ m/^=item $self->{item}/ ) { |
202 | + chomp $para; |
203 | + PTDEBUG && _d($para); |
204 | + my %attribs; |
205 | + |
206 | + $para = <$fh>; # read next paragraph, possibly attributes |
207 | + |
208 | + if ( $para =~ m/: / ) { # attributes |
209 | + $para =~ s/\s+\Z//g; |
210 | + %attribs = map { |
211 | + my ( $attrib, $val) = split(/: /, $_); |
212 | + die "Unrecognized attribute for --$option: $attrib" |
213 | + unless $self->{attributes}->{$attrib}; |
214 | + ($attrib, $val); |
215 | + } split(/; /, $para); |
216 | + if ( $attribs{'short form'} ) { |
217 | + $attribs{'short form'} =~ s/-//; |
218 | + } |
219 | + $para = <$fh>; # read next paragraph, probably short help desc |
220 | + } |
221 | + else { |
222 | + PTDEBUG && _d('Option has no attributes'); |
223 | + } |
224 | + |
225 | + $para =~ s/\s+\Z//g; |
226 | + $para =~ s/\s+/ /g; |
227 | + $para =~ s/$POD_link_re/$1/go; |
228 | + |
229 | + $para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s; |
230 | + PTDEBUG && _d('Short help:', $para); |
231 | + |
232 | + die "No description after option spec $option" if $para =~ m/^=item/; |
233 | + |
234 | + if ( my ($base_option) = $option =~ m/^\[no\](.*)/ ) { |
235 | + $option = $base_option; |
236 | + $attribs{'negatable'} = 1; |
237 | + } |
238 | + |
239 | + push @specs, { |
240 | + spec => $self->{parse_attributes}->($self, $option, \%attribs), |
241 | + desc => $para |
242 | + . (defined $attribs{default} ? " (default $attribs{default})" : ''), |
243 | + group => ($attribs{'group'} ? $attribs{'group'} : 'default'), |
244 | + }; |
245 | + } |
246 | + while ( $para = <$fh> ) { |
247 | + last unless $para; |
248 | + if ( $para =~ m/^=head1/ ) { |
249 | + $para = undef; # Can't 'last' out of a do {} block. |
250 | + last; |
251 | + } |
252 | + last if $para =~ m/^=item /; |
253 | + } |
254 | + } while ( $para ); |
255 | + |
256 | + die "No valid specs in $self->{head1}" unless @specs; |
257 | + |
258 | + close $fh; |
259 | + return @specs, @rules; |
260 | +} |
261 | + |
262 | +sub _parse_specs { |
263 | + my ( $self, @specs ) = @_; |
264 | + my %disables; # special rule that requires deferred checking |
265 | + |
266 | + foreach my $opt ( @specs ) { |
267 | + if ( ref $opt ) { # It's an option spec, not a rule. |
268 | + PTDEBUG && _d('Parsing opt spec:', |
269 | + map { ($_, '=>', $opt->{$_}) } keys %$opt); |
270 | + |
271 | + my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/; |
272 | + if ( !$long ) { |
273 | + die "Cannot parse long option from spec $opt->{spec}"; |
274 | + } |
275 | + $opt->{long} = $long; |
276 | + |
277 | + die "Duplicate long option --$long" if exists $self->{opts}->{$long}; |
278 | + $self->{opts}->{$long} = $opt; |
279 | + |
280 | + if ( length $long == 1 ) { |
281 | + PTDEBUG && _d('Long opt', $long, 'looks like short opt'); |
282 | + $self->{short_opts}->{$long} = $long; |
283 | + } |
284 | + |
285 | + if ( $short ) { |
286 | + die "Duplicate short option -$short" |
287 | + if exists $self->{short_opts}->{$short}; |
288 | + $self->{short_opts}->{$short} = $long; |
289 | + $opt->{short} = $short; |
290 | + } |
291 | + else { |
292 | + $opt->{short} = undef; |
293 | + } |
294 | + |
295 | + $opt->{is_negatable} = $opt->{spec} =~ m/!/ ? 1 : 0; |
296 | + $opt->{is_cumulative} = $opt->{spec} =~ m/\+/ ? 1 : 0; |
297 | + $opt->{is_required} = $opt->{desc} =~ m/required/ ? 1 : 0; |
298 | + |
299 | + $opt->{group} ||= 'default'; |
300 | + $self->{groups}->{ $opt->{group} }->{$long} = 1; |
301 | + |
302 | + $opt->{value} = undef; |
303 | + $opt->{got} = 0; |
304 | + |
305 | + my ( $type ) = $opt->{spec} =~ m/=(.)/; |
306 | + $opt->{type} = $type; |
307 | + PTDEBUG && _d($long, 'type:', $type); |
308 | + |
309 | + |
310 | + $opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ ); |
311 | + |
312 | + if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) { |
313 | + $self->{defaults}->{$long} = defined $def ? $def : 1; |
314 | + PTDEBUG && _d($long, 'default:', $def); |
315 | + } |
316 | + |
317 | + if ( $long eq 'config' ) { |
318 | + $self->{defaults}->{$long} = join(',', $self->get_defaults_files()); |
319 | + } |
320 | + |
321 | + if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) { |
322 | + $disables{$long} = $dis; |
323 | + PTDEBUG && _d('Deferring check of disables rule for', $opt, $dis); |
324 | + } |
325 | + |
326 | + $self->{opts}->{$long} = $opt; |
327 | + } |
328 | + else { # It's an option rule, not a spec. |
329 | + PTDEBUG && _d('Parsing rule:', $opt); |
330 | + push @{$self->{rules}}, $opt; |
331 | + my @participants = $self->_get_participants($opt); |
332 | + my $rule_ok = 0; |
333 | + |
334 | + if ( $opt =~ m/mutually exclusive|one and only one/ ) { |
335 | + $rule_ok = 1; |
336 | + push @{$self->{mutex}}, \@participants; |
337 | + PTDEBUG && _d(@participants, 'are mutually exclusive'); |
338 | + } |
339 | + if ( $opt =~ m/at least one|one and only one/ ) { |
340 | + $rule_ok = 1; |
341 | + push @{$self->{atleast1}}, \@participants; |
342 | + PTDEBUG && _d(@participants, 'require at least one'); |
343 | + } |
344 | + if ( $opt =~ m/default to/ ) { |
345 | + $rule_ok = 1; |
346 | + $self->{defaults_to}->{$participants[0]} = $participants[1]; |
347 | + PTDEBUG && _d($participants[0], 'defaults to', $participants[1]); |
348 | + } |
349 | + if ( $opt =~ m/restricted to option groups/ ) { |
350 | + $rule_ok = 1; |
351 | + my ($groups) = $opt =~ m/groups ([\w\s\,]+)/; |
352 | + my @groups = split(',', $groups); |
353 | + %{$self->{allowed_groups}->{$participants[0]}} = map { |
354 | + s/\s+//; |
355 | + $_ => 1; |
356 | + } @groups; |
357 | + } |
358 | + if( $opt =~ m/accepts additional command-line arguments/ ) { |
359 | + $rule_ok = 1; |
360 | + $self->{strict} = 0; |
361 | + PTDEBUG && _d("Strict mode disabled by rule"); |
362 | + } |
363 | + |
364 | + die "Unrecognized option rule: $opt" unless $rule_ok; |
365 | + } |
366 | + } |
367 | + |
368 | + foreach my $long ( keys %disables ) { |
369 | + my @participants = $self->_get_participants($disables{$long}); |
370 | + $self->{disables}->{$long} = \@participants; |
371 | + PTDEBUG && _d('Option', $long, 'disables', @participants); |
372 | + } |
373 | + |
374 | + return; |
375 | +} |
376 | + |
377 | +sub _get_participants { |
378 | + my ( $self, $str ) = @_; |
379 | + my @participants; |
380 | + foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) { |
381 | + die "Option --$long does not exist while processing rule $str" |
382 | + unless exists $self->{opts}->{$long}; |
383 | + push @participants, $long; |
384 | + } |
385 | + PTDEBUG && _d('Participants for', $str, ':', @participants); |
386 | + return @participants; |
387 | +} |
388 | + |
389 | +sub opts { |
390 | + my ( $self ) = @_; |
391 | + my %opts = %{$self->{opts}}; |
392 | + return %opts; |
393 | +} |
394 | + |
395 | +sub short_opts { |
396 | + my ( $self ) = @_; |
397 | + my %short_opts = %{$self->{short_opts}}; |
398 | + return %short_opts; |
399 | +} |
400 | + |
401 | +sub set_defaults { |
402 | + my ( $self, %defaults ) = @_; |
403 | + $self->{defaults} = {}; |
404 | + foreach my $long ( keys %defaults ) { |
405 | + die "Cannot set default for nonexistent option $long" |
406 | + unless exists $self->{opts}->{$long}; |
407 | + $self->{defaults}->{$long} = $defaults{$long}; |
408 | + PTDEBUG && _d('Default val for', $long, ':', $defaults{$long}); |
409 | + } |
410 | + return; |
411 | +} |
412 | + |
413 | +sub get_defaults { |
414 | + my ( $self ) = @_; |
415 | + return $self->{defaults}; |
416 | +} |
417 | + |
418 | +sub get_groups { |
419 | + my ( $self ) = @_; |
420 | + return $self->{groups}; |
421 | +} |
422 | + |
423 | +sub _set_option { |
424 | + my ( $self, $opt, $val ) = @_; |
425 | + my $long = exists $self->{opts}->{$opt} ? $opt |
426 | + : exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt} |
427 | + : die "Getopt::Long gave a nonexistent option: $opt"; |
428 | + |
429 | + $opt = $self->{opts}->{$long}; |
430 | + if ( $opt->{is_cumulative} ) { |
431 | + $opt->{value}++; |
432 | + } |
433 | + else { |
434 | + $opt->{value} = $val; |
435 | + } |
436 | + $opt->{got} = 1; |
437 | + PTDEBUG && _d('Got option', $long, '=', $val); |
438 | +} |
439 | + |
440 | +sub get_opts { |
441 | + my ( $self ) = @_; |
442 | + |
443 | + foreach my $long ( keys %{$self->{opts}} ) { |
444 | + $self->{opts}->{$long}->{got} = 0; |
445 | + $self->{opts}->{$long}->{value} |
446 | + = exists $self->{defaults}->{$long} ? $self->{defaults}->{$long} |
447 | + : $self->{opts}->{$long}->{is_cumulative} ? 0 |
448 | + : undef; |
449 | + } |
450 | + $self->{got_opts} = 0; |
451 | + |
452 | + $self->{errors} = []; |
453 | + |
454 | + if ( @ARGV && $ARGV[0] eq "--config" ) { |
455 | + shift @ARGV; |
456 | + $self->_set_option('config', shift @ARGV); |
457 | + } |
458 | + if ( $self->has('config') ) { |
459 | + my @extra_args; |
460 | + foreach my $filename ( split(',', $self->get('config')) ) { |
461 | + eval { |
462 | + push @extra_args, $self->_read_config_file($filename); |
463 | + }; |
464 | + if ( $EVAL_ERROR ) { |
465 | + if ( $self->got('config') ) { |
466 | + die $EVAL_ERROR; |
467 | + } |
468 | + elsif ( PTDEBUG ) { |
469 | + _d($EVAL_ERROR); |
470 | + } |
471 | + } |
472 | + } |
473 | + unshift @ARGV, @extra_args; |
474 | + } |
475 | + |
476 | + Getopt::Long::Configure('no_ignore_case', 'bundling'); |
477 | + GetOptions( |
478 | + map { $_->{spec} => sub { $self->_set_option(@_); } } |
479 | + grep { $_->{long} ne 'config' } # --config is handled specially above. |
480 | + values %{$self->{opts}} |
481 | + ) or $self->save_error('Error parsing options'); |
482 | + |
483 | + if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) { |
484 | + if ( $self->{version} ) { |
485 | + print $self->{version}, "\n"; |
486 | + } |
487 | + else { |
488 | + print "Error parsing version. See the VERSION section of the tool's documentation.\n"; |
489 | + } |
490 | + exit 0; |
491 | + } |
492 | + |
493 | + if ( @ARGV && $self->{strict} ) { |
494 | + $self->save_error("Unrecognized command-line options @ARGV"); |
495 | + } |
496 | + |
497 | + foreach my $mutex ( @{$self->{mutex}} ) { |
498 | + my @set = grep { $self->{opts}->{$_}->{got} } @$mutex; |
499 | + if ( @set > 1 ) { |
500 | + my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" } |
501 | + @{$mutex}[ 0 .. scalar(@$mutex) - 2] ) |
502 | + . ' and --'.$self->{opts}->{$mutex->[-1]}->{long} |
503 | + . ' are mutually exclusive.'; |
504 | + $self->save_error($err); |
505 | + } |
506 | + } |
507 | + |
508 | + foreach my $required ( @{$self->{atleast1}} ) { |
509 | + my @set = grep { $self->{opts}->{$_}->{got} } @$required; |
510 | + if ( @set == 0 ) { |
511 | + my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" } |
512 | + @{$required}[ 0 .. scalar(@$required) - 2] ) |
513 | + .' or --'.$self->{opts}->{$required->[-1]}->{long}; |
514 | + $self->save_error("Specify at least one of $err"); |
515 | + } |
516 | + } |
517 | + |
518 | + $self->_check_opts( keys %{$self->{opts}} ); |
519 | + $self->{got_opts} = 1; |
520 | + return; |
521 | +} |
522 | + |
523 | +sub _check_opts { |
524 | + my ( $self, @long ) = @_; |
525 | + my $long_last = scalar @long; |
526 | + while ( @long ) { |
527 | + foreach my $i ( 0..$#long ) { |
528 | + my $long = $long[$i]; |
529 | + next unless $long; |
530 | + my $opt = $self->{opts}->{$long}; |
531 | + if ( $opt->{got} ) { |
532 | + if ( exists $self->{disables}->{$long} ) { |
533 | + my @disable_opts = @{$self->{disables}->{$long}}; |
534 | + map { $self->{opts}->{$_}->{value} = undef; } @disable_opts; |
535 | + PTDEBUG && _d('Unset options', @disable_opts, |
536 | + 'because', $long,'disables them'); |
537 | + } |
538 | + |
539 | + if ( exists $self->{allowed_groups}->{$long} ) { |
540 | + |
541 | + my @restricted_groups = grep { |
542 | + !exists $self->{allowed_groups}->{$long}->{$_} |
543 | + } keys %{$self->{groups}}; |
544 | + |
545 | + my @restricted_opts; |
546 | + foreach my $restricted_group ( @restricted_groups ) { |
547 | + RESTRICTED_OPT: |
548 | + foreach my $restricted_opt ( |
549 | + keys %{$self->{groups}->{$restricted_group}} ) |
550 | + { |
551 | + next RESTRICTED_OPT if $restricted_opt eq $long; |
552 | + push @restricted_opts, $restricted_opt |
553 | + if $self->{opts}->{$restricted_opt}->{got}; |
554 | + } |
555 | + } |
556 | + |
557 | + if ( @restricted_opts ) { |
558 | + my $err; |
559 | + if ( @restricted_opts == 1 ) { |
560 | + $err = "--$restricted_opts[0]"; |
561 | + } |
562 | + else { |
563 | + $err = join(', ', |
564 | + map { "--$self->{opts}->{$_}->{long}" } |
565 | + grep { $_ } |
566 | + @restricted_opts[0..scalar(@restricted_opts) - 2] |
567 | + ) |
568 | + . ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long}; |
569 | + } |
570 | + $self->save_error("--$long is not allowed with $err"); |
571 | + } |
572 | + } |
573 | + |
574 | + } |
575 | + elsif ( $opt->{is_required} ) { |
576 | + $self->save_error("Required option --$long must be specified"); |
577 | + } |
578 | + |
579 | + $self->_validate_type($opt); |
580 | + if ( $opt->{parsed} ) { |
581 | + delete $long[$i]; |
582 | + } |
583 | + else { |
584 | + PTDEBUG && _d('Temporarily failed to parse', $long); |
585 | + } |
586 | + } |
587 | + |
588 | + die "Failed to parse options, possibly due to circular dependencies" |
589 | + if @long == $long_last; |
590 | + $long_last = @long; |
591 | + } |
592 | + |
593 | + return; |
594 | +} |
595 | + |
596 | +sub _validate_type { |
597 | + my ( $self, $opt ) = @_; |
598 | + return unless $opt; |
599 | + |
600 | + if ( !$opt->{type} ) { |
601 | + $opt->{parsed} = 1; |
602 | + return; |
603 | + } |
604 | + |
605 | + my $val = $opt->{value}; |
606 | + |
607 | + if ( $val && $opt->{type} eq 'm' ) { # type time |
608 | + PTDEBUG && _d('Parsing option', $opt->{long}, 'as a time value'); |
609 | + my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/; |
610 | + if ( !$suffix ) { |
611 | + my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/; |
612 | + $suffix = $s || 's'; |
613 | + PTDEBUG && _d('No suffix given; using', $suffix, 'for', |
614 | + $opt->{long}, '(value:', $val, ')'); |
615 | + } |
616 | + if ( $suffix =~ m/[smhd]/ ) { |
617 | + $val = $suffix eq 's' ? $num # Seconds |
618 | + : $suffix eq 'm' ? $num * 60 # Minutes |
619 | + : $suffix eq 'h' ? $num * 3600 # Hours |
620 | + : $num * 86400; # Days |
621 | + $opt->{value} = ($prefix || '') . $val; |
622 | + PTDEBUG && _d('Setting option', $opt->{long}, 'to', $val); |
623 | + } |
624 | + else { |
625 | + $self->save_error("Invalid time suffix for --$opt->{long}"); |
626 | + } |
627 | + } |
628 | + elsif ( $val && $opt->{type} eq 'd' ) { # type DSN |
629 | + PTDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN'); |
630 | + my $prev = {}; |
631 | + my $from_key = $self->{defaults_to}->{ $opt->{long} }; |
632 | + if ( $from_key ) { |
633 | + PTDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN'); |
634 | + if ( $self->{opts}->{$from_key}->{parsed} ) { |
635 | + $prev = $self->{opts}->{$from_key}->{value}; |
636 | + } |
637 | + else { |
638 | + PTDEBUG && _d('Cannot parse', $opt->{long}, 'until', |
639 | + $from_key, 'parsed'); |
640 | + return; |
641 | + } |
642 | + } |
643 | + my $defaults = $self->{DSNParser}->parse_options($self); |
644 | + $opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults); |
645 | + } |
646 | + elsif ( $val && $opt->{type} eq 'z' ) { # type size |
647 | + PTDEBUG && _d('Parsing option', $opt->{long}, 'as a size value'); |
648 | + $self->_parse_size($opt, $val); |
649 | + } |
650 | + elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) { |
651 | + $opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) }; |
652 | + } |
653 | + elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) { |
654 | + $opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ]; |
655 | + } |
656 | + else { |
657 | + PTDEBUG && _d('Nothing to validate for option', |
658 | + $opt->{long}, 'type', $opt->{type}, 'value', $val); |
659 | + } |
660 | + |
661 | + $opt->{parsed} = 1; |
662 | + return; |
663 | +} |
664 | + |
665 | +sub get { |
666 | + my ( $self, $opt ) = @_; |
667 | + my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); |
668 | + die "Option $opt does not exist" |
669 | + unless $long && exists $self->{opts}->{$long}; |
670 | + return $self->{opts}->{$long}->{value}; |
671 | +} |
672 | + |
673 | +sub got { |
674 | + my ( $self, $opt ) = @_; |
675 | + my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); |
676 | + die "Option $opt does not exist" |
677 | + unless $long && exists $self->{opts}->{$long}; |
678 | + return $self->{opts}->{$long}->{got}; |
679 | +} |
680 | + |
681 | +sub has { |
682 | + my ( $self, $opt ) = @_; |
683 | + my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); |
684 | + return defined $long ? exists $self->{opts}->{$long} : 0; |
685 | +} |
686 | + |
687 | +sub set { |
688 | + my ( $self, $opt, $val ) = @_; |
689 | + my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); |
690 | + die "Option $opt does not exist" |
691 | + unless $long && exists $self->{opts}->{$long}; |
692 | + $self->{opts}->{$long}->{value} = $val; |
693 | + return; |
694 | +} |
695 | + |
696 | +sub save_error { |
697 | + my ( $self, $error ) = @_; |
698 | + push @{$self->{errors}}, $error; |
699 | + return; |
700 | +} |
701 | + |
702 | +sub errors { |
703 | + my ( $self ) = @_; |
704 | + return $self->{errors}; |
705 | +} |
706 | + |
707 | +sub usage { |
708 | + my ( $self ) = @_; |
709 | + warn "No usage string is set" unless $self->{usage}; # XXX |
710 | + return "Usage: " . ($self->{usage} || '') . "\n"; |
711 | +} |
712 | + |
713 | +sub descr { |
714 | + my ( $self ) = @_; |
715 | + warn "No description string is set" unless $self->{description}; # XXX |
716 | + my $descr = ($self->{description} || $self->{program_name} || '') |
717 | + . " For more details, please use the --help option, " |
718 | + . "or try 'perldoc $PROGRAM_NAME' " |
719 | + . "for complete documentation."; |
720 | + $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g) |
721 | + unless $ENV{DONT_BREAK_LINES}; |
722 | + $descr =~ s/ +$//mg; |
723 | + return $descr; |
724 | +} |
725 | + |
726 | +sub usage_or_errors { |
727 | + my ( $self, $file, $return ) = @_; |
728 | + $file ||= $self->{file} || __FILE__; |
729 | + |
730 | + if ( !$self->{description} || !$self->{usage} ) { |
731 | + PTDEBUG && _d("Getting description and usage from SYNOPSIS in", $file); |
732 | + my %synop = $self->_parse_synopsis($file); |
733 | + $self->{description} ||= $synop{description}; |
734 | + $self->{usage} ||= $synop{usage}; |
735 | + PTDEBUG && _d("Description:", $self->{description}, |
736 | + "\nUsage:", $self->{usage}); |
737 | + } |
738 | + |
739 | + if ( $self->{opts}->{help}->{got} ) { |
740 | + print $self->print_usage() or die "Cannot print usage: $OS_ERROR"; |
741 | + exit 0 unless $return; |
742 | + } |
743 | + elsif ( scalar @{$self->{errors}} ) { |
744 | + print $self->print_errors() or die "Cannot print errors: $OS_ERROR"; |
745 | + exit 0 unless $return; |
746 | + } |
747 | + |
748 | + return; |
749 | +} |
750 | + |
751 | +sub print_errors { |
752 | + my ( $self ) = @_; |
753 | + my $usage = $self->usage() . "\n"; |
754 | + if ( (my @errors = @{$self->{errors}}) ) { |
755 | + $usage .= join("\n * ", 'Errors in command-line arguments:', @errors) |
756 | + . "\n"; |
757 | + } |
758 | + return $usage . "\n" . $self->descr(); |
759 | +} |
760 | + |
761 | +sub print_usage { |
762 | + my ( $self ) = @_; |
763 | + die "Run get_opts() before print_usage()" unless $self->{got_opts}; |
764 | + my @opts = values %{$self->{opts}}; |
765 | + |
766 | + my $maxl = max( |
767 | + map { |
768 | + length($_->{long}) # option long name |
769 | + + ($_->{is_negatable} ? 4 : 0) # "[no]" if opt is negatable |
770 | + + ($_->{type} ? 2 : 0) # "=x" where x is the opt type |
771 | + } |
772 | + @opts); |
773 | + |
774 | + my $maxs = max(0, |
775 | + map { |
776 | + length($_) |
777 | + + ($self->{opts}->{$_}->{is_negatable} ? 4 : 0) |
778 | + + ($self->{opts}->{$_}->{type} ? 2 : 0) |
779 | + } |
780 | + values %{$self->{short_opts}}); |
781 | + |
782 | + my $lcol = max($maxl, ($maxs + 3)); |
783 | + my $rcol = 80 - $lcol - 6; |
784 | + my $rpad = ' ' x ( 80 - $rcol ); |
785 | + |
786 | + $maxs = max($lcol - 3, $maxs); |
787 | + |
788 | + my $usage = $self->descr() . "\n" . $self->usage(); |
789 | + |
790 | + my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}}; |
791 | + push @groups, 'default'; |
792 | + |
793 | + foreach my $group ( reverse @groups ) { |
794 | + $usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n"; |
795 | + foreach my $opt ( |
796 | + sort { $a->{long} cmp $b->{long} } |
797 | + grep { $_->{group} eq $group } |
798 | + @opts ) |
799 | + { |
800 | + my $long = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long}; |
801 | + my $short = $opt->{short}; |
802 | + my $desc = $opt->{desc}; |
803 | + |
804 | + $long .= $opt->{type} ? "=$opt->{type}" : ""; |
805 | + |
806 | + if ( $opt->{type} && $opt->{type} eq 'm' ) { |
807 | + my ($s) = $desc =~ m/\(suffix (.)\)/; |
808 | + $s ||= 's'; |
809 | + $desc =~ s/\s+\(suffix .\)//; |
810 | + $desc .= ". Optional suffix s=seconds, m=minutes, h=hours, " |
811 | + . "d=days; if no suffix, $s is used."; |
812 | + } |
813 | + $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g); |
814 | + $desc =~ s/ +$//mg; |
815 | + if ( $short ) { |
816 | + $usage .= sprintf(" --%-${maxs}s -%s %s\n", $long, $short, $desc); |
817 | + } |
818 | + else { |
819 | + $usage .= sprintf(" --%-${lcol}s %s\n", $long, $desc); |
820 | + } |
821 | + } |
822 | + } |
823 | + |
824 | + $usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n"; |
825 | + |
826 | + if ( (my @rules = @{$self->{rules}}) ) { |
827 | + $usage .= "\nRules:\n\n"; |
828 | + $usage .= join("\n", map { " $_" } @rules) . "\n"; |
829 | + } |
830 | + if ( $self->{DSNParser} ) { |
831 | + $usage .= "\n" . $self->{DSNParser}->usage(); |
832 | + } |
833 | + $usage .= "\nOptions and values after processing arguments:\n\n"; |
834 | + foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) { |
835 | + my $val = $opt->{value}; |
836 | + my $type = $opt->{type} || ''; |
837 | + my $bool = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/; |
838 | + $val = $bool ? ( $val ? 'TRUE' : 'FALSE' ) |
839 | + : !defined $val ? '(No value)' |
840 | + : $type eq 'd' ? $self->{DSNParser}->as_string($val) |
841 | + : $type =~ m/H|h/ ? join(',', sort keys %$val) |
842 | + : $type =~ m/A|a/ ? join(',', @$val) |
843 | + : $val; |
844 | + $usage .= sprintf(" --%-${lcol}s %s\n", $opt->{long}, $val); |
845 | + } |
846 | + return $usage; |
847 | +} |
848 | + |
849 | +sub prompt_noecho { |
850 | + shift @_ if ref $_[0] eq __PACKAGE__; |
851 | + my ( $prompt ) = @_; |
852 | + local $OUTPUT_AUTOFLUSH = 1; |
853 | + print $prompt |
854 | + or die "Cannot print: $OS_ERROR"; |
855 | + my $response; |
856 | + eval { |
857 | + require Term::ReadKey; |
858 | + Term::ReadKey::ReadMode('noecho'); |
859 | + chomp($response = <STDIN>); |
860 | + Term::ReadKey::ReadMode('normal'); |
861 | + print "\n" |
862 | + or die "Cannot print: $OS_ERROR"; |
863 | + }; |
864 | + if ( $EVAL_ERROR ) { |
865 | + die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR"; |
866 | + } |
867 | + return $response; |
868 | +} |
869 | + |
870 | +sub _read_config_file { |
871 | + my ( $self, $filename ) = @_; |
872 | + open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n"; |
873 | + my @args; |
874 | + my $prefix = '--'; |
875 | + my $parse = 1; |
876 | + |
877 | + LINE: |
878 | + while ( my $line = <$fh> ) { |
879 | + chomp $line; |
880 | + next LINE if $line =~ m/^\s*(?:\#|\;|$)/; |
881 | + $line =~ s/\s+#.*$//g; |
882 | + $line =~ s/^\s+|\s+$//g; |
883 | + if ( $line eq '--' ) { |
884 | + $prefix = ''; |
885 | + $parse = 0; |
886 | + next LINE; |
887 | + } |
888 | + if ( $parse |
889 | + && (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/) |
890 | + ) { |
891 | + push @args, grep { defined $_ } ("$prefix$opt", $arg); |
892 | + } |
893 | + elsif ( $line =~ m/./ ) { |
894 | + push @args, $line; |
895 | + } |
896 | + else { |
897 | + die "Syntax error in file $filename at line $INPUT_LINE_NUMBER"; |
898 | + } |
899 | + } |
900 | + close $fh; |
901 | + return @args; |
902 | +} |
903 | + |
904 | +sub read_para_after { |
905 | + my ( $self, $file, $regex ) = @_; |
906 | + open my $fh, "<", $file or die "Can't open $file: $OS_ERROR"; |
907 | + local $INPUT_RECORD_SEPARATOR = ''; |
908 | + my $para; |
909 | + while ( $para = <$fh> ) { |
910 | + next unless $para =~ m/^=pod$/m; |
911 | + last; |
912 | + } |
913 | + while ( $para = <$fh> ) { |
914 | + next unless $para =~ m/$regex/; |
915 | + last; |
916 | + } |
917 | + $para = <$fh>; |
918 | + chomp($para); |
919 | + close $fh or die "Can't close $file: $OS_ERROR"; |
920 | + return $para; |
921 | +} |
922 | + |
923 | +sub clone { |
924 | + my ( $self ) = @_; |
925 | + |
926 | + my %clone = map { |
927 | + my $hashref = $self->{$_}; |
928 | + my $val_copy = {}; |
929 | + foreach my $key ( keys %$hashref ) { |
930 | + my $ref = ref $hashref->{$key}; |
931 | + $val_copy->{$key} = !$ref ? $hashref->{$key} |
932 | + : $ref eq 'HASH' ? { %{$hashref->{$key}} } |
933 | + : $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ] |
934 | + : $hashref->{$key}; |
935 | + } |
936 | + $_ => $val_copy; |
937 | + } qw(opts short_opts defaults); |
938 | + |
939 | + foreach my $scalar ( qw(got_opts) ) { |
940 | + $clone{$scalar} = $self->{$scalar}; |
941 | + } |
942 | + |
943 | + return bless \%clone; |
944 | +} |
945 | + |
946 | +sub _parse_size { |
947 | + my ( $self, $opt, $val ) = @_; |
948 | + |
949 | + if ( lc($val || '') eq 'null' ) { |
950 | + PTDEBUG && _d('NULL size for', $opt->{long}); |
951 | + $opt->{value} = 'null'; |
952 | + return; |
953 | + } |
954 | + |
955 | + my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824); |
956 | + my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/; |
957 | + if ( defined $num ) { |
958 | + if ( $factor ) { |
959 | + $num *= $factor_for{$factor}; |
960 | + PTDEBUG && _d('Setting option', $opt->{y}, |
961 | + 'to num', $num, '* factor', $factor); |
962 | + } |
963 | + $opt->{value} = ($pre || '') . $num; |
964 | + } |
965 | + else { |
966 | + $self->save_error("Invalid size for --$opt->{long}: $val"); |
967 | + } |
968 | + return; |
969 | +} |
970 | + |
971 | +sub _parse_attribs { |
972 | + my ( $self, $option, $attribs ) = @_; |
973 | + my $types = $self->{types}; |
974 | + return $option |
975 | + . ($attribs->{'short form'} ? '|' . $attribs->{'short form'} : '' ) |
976 | + . ($attribs->{'negatable'} ? '!' : '' ) |
977 | + . ($attribs->{'cumulative'} ? '+' : '' ) |
978 | + . ($attribs->{'type'} ? '=' . $types->{$attribs->{type}} : '' ); |
979 | +} |
980 | + |
981 | +sub _parse_synopsis { |
982 | + my ( $self, $file ) = @_; |
983 | + $file ||= $self->{file} || __FILE__; |
984 | + PTDEBUG && _d("Parsing SYNOPSIS in", $file); |
985 | + |
986 | + local $INPUT_RECORD_SEPARATOR = ''; # read paragraphs |
987 | + open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR"; |
988 | + my $para; |
989 | + 1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/; |
990 | + die "$file does not contain a SYNOPSIS section" unless $para; |
991 | + my @synop; |
992 | + for ( 1..2 ) { # 1 for the usage, 2 for the description |
993 | + my $para = <$fh>; |
994 | + push @synop, $para; |
995 | + } |
996 | + close $fh; |
997 | + PTDEBUG && _d("Raw SYNOPSIS text:", @synop); |
998 | + my ($usage, $desc) = @synop; |
999 | + die "The SYNOPSIS section in $file is not formatted properly" |
1000 | + unless $usage && $desc; |
1001 | + |
1002 | + $usage =~ s/^\s*Usage:\s+(.+)/$1/; |
1003 | + chomp $usage; |
1004 | + |
1005 | + $desc =~ s/\n/ /g; |
1006 | + $desc =~ s/\s{2,}/ /g; |
1007 | + $desc =~ s/\. ([A-Z][a-z])/. $1/g; |
1008 | + $desc =~ s/\s+$//; |
1009 | + |
1010 | + return ( |
1011 | + description => $desc, |
1012 | + usage => $usage, |
1013 | + ); |
1014 | +}; |
1015 | + |
1016 | +sub _d { |
1017 | + my ($package, undef, $line) = caller 0; |
1018 | + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
1019 | + map { defined $_ ? $_ : 'undef' } |
1020 | + @_; |
1021 | + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
1022 | +} |
1023 | + |
1024 | +if ( PTDEBUG ) { |
1025 | + print '# ', $^X, ' ', $], "\n"; |
1026 | + if ( my $uname = `uname -a` ) { |
1027 | + $uname =~ s/\s+/ /g; |
1028 | + print "# $uname\n"; |
1029 | + } |
1030 | + print '# Arguments: ', |
1031 | + join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n"; |
1032 | +} |
1033 | + |
1034 | +1; |
1035 | +} |
1036 | +# ########################################################################### |
1037 | +# End OptionParser package |
1038 | +# ########################################################################### |
1039 | + |
1040 | +# ########################################################################### |
1041 | +# QueryParser package |
1042 | +# This package is a copy without comments from the original. The original |
1043 | +# with comments and its test file can be found in the Bazaar repository at, |
1044 | +# lib/QueryParser.pm |
1045 | +# t/lib/QueryParser.t |
1046 | +# See https://launchpad.net/percona-toolkit for more information. |
1047 | +# ########################################################################### |
1048 | +{ |
1049 | +package QueryParser; |
1050 | + |
1051 | +use strict; |
1052 | +use warnings FATAL => 'all'; |
1053 | +use English qw(-no_match_vars); |
1054 | +use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
1055 | + |
1056 | +our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/; |
1057 | +our $tbl_regex = qr{ |
1058 | + \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names |
1059 | + \b\s* |
1060 | + \(? # Optional paren around tables |
1061 | + ($tbl_ident |
1062 | + (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )* |
1063 | + ) |
1064 | + }xio; |
1065 | +our $has_derived = qr{ |
1066 | + \b(?:FROM|JOIN|,) |
1067 | + \s*\(\s*SELECT |
1068 | + }xi; |
1069 | + |
1070 | +our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i; |
1071 | + |
1072 | +our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i; |
1073 | + |
1074 | +sub new { |
1075 | + my ( $class ) = @_; |
1076 | + bless {}, $class; |
1077 | +} |
1078 | + |
1079 | +sub get_tables { |
1080 | + my ( $self, $query ) = @_; |
1081 | + return unless $query; |
1082 | + PTDEBUG && _d('Getting tables for', $query); |
1083 | + |
1084 | + my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i; |
1085 | + if ( $ddl_stmt ) { |
1086 | + PTDEBUG && _d('Special table type:', $ddl_stmt); |
1087 | + $query =~ s/IF\s+(?:NOT\s+)?EXISTS//i; |
1088 | + if ( $query =~ m/$ddl_stmt DATABASE\b/i ) { |
1089 | + PTDEBUG && _d('Query alters a database, not a table'); |
1090 | + return (); |
1091 | + } |
1092 | + if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) { |
1093 | + my ($select) = $query =~ m/\b(SELECT\b.+)/is; |
1094 | + PTDEBUG && _d('CREATE TABLE ... SELECT:', $select); |
1095 | + return $self->get_tables($select); |
1096 | + } |
1097 | + my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i; |
1098 | + PTDEBUG && _d('Matches table:', $tbl); |
1099 | + return ($tbl); |
1100 | + } |
1101 | + |
1102 | + $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig; |
1103 | + |
1104 | + if ( $query =~ /^\s*LOCK TABLES/i ) { |
1105 | + PTDEBUG && _d('Special table type: LOCK TABLES'); |
1106 | + $query =~ s/^(\s*LOCK TABLES\s+)//; |
1107 | + $query =~ s/\s+(?:READ|WRITE|LOCAL)+\s*//g; |
1108 | + PTDEBUG && _d('Locked tables:', $query); |
1109 | + $query = "FROM $query"; |
1110 | + } |
1111 | + |
1112 | + $query =~ s/\\["']//g; # quoted strings |
1113 | + $query =~ s/".*?"/?/sg; # quoted strings |
1114 | + $query =~ s/'.*?'/?/sg; # quoted strings |
1115 | + |
1116 | + my @tables; |
1117 | + foreach my $tbls ( $query =~ m/$tbl_regex/gio ) { |
1118 | + PTDEBUG && _d('Match tables:', $tbls); |
1119 | + |
1120 | + next if $tbls =~ m/\ASELECT\b/i; |
1121 | + |
1122 | + foreach my $tbl ( split(',', $tbls) ) { |
1123 | + $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio; |
1124 | + |
1125 | + if ( $tbl !~ m/[a-zA-Z]/ ) { |
1126 | + PTDEBUG && _d('Skipping suspicious table name:', $tbl); |
1127 | + next; |
1128 | + } |
1129 | + |
1130 | + push @tables, $tbl; |
1131 | + } |
1132 | + } |
1133 | + return @tables; |
1134 | +} |
1135 | + |
1136 | +sub has_derived_table { |
1137 | + my ( $self, $query ) = @_; |
1138 | + my $match = $query =~ m/$has_derived/; |
1139 | + PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table'); |
1140 | + return $match; |
1141 | +} |
1142 | + |
1143 | +sub get_aliases { |
1144 | + my ( $self, $query, $list ) = @_; |
1145 | + |
1146 | + my $result = { |
1147 | + DATABASE => {}, |
1148 | + TABLE => {}, |
1149 | + }; |
1150 | + return $result unless $query; |
1151 | + |
1152 | + $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig; |
1153 | + |
1154 | + $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig; |
1155 | + |
1156 | + my @tbl_refs; |
1157 | + my ($tbl_refs, $from) = $query =~ m{ |
1158 | + ( |
1159 | + (FROM|INTO|UPDATE)\b\s* # Keyword before table refs |
1160 | + .+? # Table refs |
1161 | + ) |
1162 | + (?:\s+|\z) # If the query does not end with the table |
1163 | + (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs |
1164 | + }ix; |
1165 | + |
1166 | + if ( $tbl_refs ) { |
1167 | + |
1168 | + if ( $query =~ m/^(?:INSERT|REPLACE)/i ) { |
1169 | + $tbl_refs =~ s/\([^\)]+\)\s*//; |
1170 | + } |
1171 | + |
1172 | + PTDEBUG && _d('tbl refs:', $tbl_refs); |
1173 | + |
1174 | + my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i; |
1175 | + |
1176 | + my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i; |
1177 | + |
1178 | + $tbl_refs =~ s/ = /=/g; |
1179 | + |
1180 | + while ( |
1181 | + $tbl_refs =~ m{ |
1182 | + $before_tbl\b\s* |
1183 | + ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? ) |
1184 | + \s*$after_tbl |
1185 | + }xgio ) |
1186 | + { |
1187 | + my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3); |
1188 | + PTDEBUG && _d('Match table:', $tbl_ref); |
1189 | + push @tbl_refs, $tbl_ref; |
1190 | + $alias = $self->trim_identifier($alias); |
1191 | + |
1192 | + if ( $tbl_ref =~ m/^AS\s+\w+/i ) { |
1193 | + PTDEBUG && _d('Subquery', $tbl_ref); |
1194 | + $result->{TABLE}->{$alias} = undef; |
1195 | + next; |
1196 | + } |
1197 | + |
1198 | + my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/; |
1199 | + $db = $self->trim_identifier($db); |
1200 | + $tbl = $self->trim_identifier($tbl); |
1201 | + $result->{TABLE}->{$alias || $tbl} = $tbl; |
1202 | + $result->{DATABASE}->{$tbl} = $db if $db; |
1203 | + } |
1204 | + } |
1205 | + else { |
1206 | + PTDEBUG && _d("No tables ref in", $query); |
1207 | + } |
1208 | + |
1209 | + if ( $list ) { |
1210 | + return \@tbl_refs; |
1211 | + } |
1212 | + else { |
1213 | + return $result; |
1214 | + } |
1215 | +} |
1216 | + |
1217 | +sub split { |
1218 | + my ( $self, $query ) = @_; |
1219 | + return unless $query; |
1220 | + $query = $self->clean_query($query); |
1221 | + PTDEBUG && _d('Splitting', $query); |
1222 | + |
1223 | + my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i; |
1224 | + |
1225 | + my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query); |
1226 | + |
1227 | + my @statements; |
1228 | + if ( @split_statements == 1 ) { |
1229 | + push @statements, $query; |
1230 | + } |
1231 | + else { |
1232 | + for ( my $i = 0; $i <= $#split_statements; $i += 2 ) { |
1233 | + push @statements, $split_statements[$i].$split_statements[$i+1]; |
1234 | + |
1235 | + if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) { |
1236 | + $statements[-2] .= pop @statements; |
1237 | + } |
1238 | + } |
1239 | + } |
1240 | + |
1241 | + PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements); |
1242 | + return @statements; |
1243 | +} |
1244 | + |
1245 | +sub clean_query { |
1246 | + my ( $self, $query ) = @_; |
1247 | + return unless $query; |
1248 | + $query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */ |
1249 | + $query =~ s/^\s+//; # Remove leading spaces |
1250 | + $query =~ s/\s+$//; # Remove trailing spaces |
1251 | + $query =~ s/\s{2,}/ /g; # Remove extra spaces |
1252 | + return $query; |
1253 | +} |
1254 | + |
1255 | +sub split_subquery { |
1256 | + my ( $self, $query ) = @_; |
1257 | + return unless $query; |
1258 | + $query = $self->clean_query($query); |
1259 | + $query =~ s/;$//; |
1260 | + |
1261 | + my @subqueries; |
1262 | + my $sqno = 0; # subquery number |
1263 | + my $pos = 0; |
1264 | + while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) { |
1265 | + $pos = pos($query); |
1266 | + my $word = $1; |
1267 | + PTDEBUG && _d($word, $sqno); |
1268 | + if ( $word =~ m/^\(?SELECT\b/i ) { |
1269 | + my $start_pos = $pos - length($word) - 1; |
1270 | + if ( $start_pos ) { |
1271 | + $sqno++; |
1272 | + PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos); |
1273 | + $subqueries[$sqno] = { |
1274 | + start_pos => $start_pos, |
1275 | + end_pos => 0, |
1276 | + len => 0, |
1277 | + words => [$word], |
1278 | + lp => 1, # left parentheses |
1279 | + rp => 0, # right parentheses |
1280 | + done => 0, |
1281 | + }; |
1282 | + } |
1283 | + else { |
1284 | + PTDEBUG && _d('Main SELECT at pos 0'); |
1285 | + } |
1286 | + } |
1287 | + else { |
1288 | + next unless $sqno; # next unless we're in a subquery |
1289 | + PTDEBUG && _d('In subquery', $sqno); |
1290 | + my $sq = $subqueries[$sqno]; |
1291 | + if ( $sq->{done} ) { |
1292 | + PTDEBUG && _d('This subquery is done; SQL is for', |
1293 | + ($sqno - 1 ? "subquery $sqno" : "the main SELECT")); |
1294 | + next; |
1295 | + } |
1296 | + push @{$sq->{words}}, $word; |
1297 | + my $lp = ($word =~ tr/\(//) || 0; |
1298 | + my $rp = ($word =~ tr/\)//) || 0; |
1299 | + PTDEBUG && _d('parentheses left', $lp, 'right', $rp); |
1300 | + if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) { |
1301 | + my $end_pos = $pos - 1; |
1302 | + PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos); |
1303 | + $sq->{end_pos} = $end_pos; |
1304 | + $sq->{len} = $end_pos - $sq->{start_pos}; |
1305 | + } |
1306 | + } |
1307 | + } |
1308 | + |
1309 | + for my $i ( 1..$#subqueries ) { |
1310 | + my $sq = $subqueries[$i]; |
1311 | + next unless $sq; |
1312 | + $sq->{sql} = join(' ', @{$sq->{words}}); |
1313 | + substr $query, |
1314 | + $sq->{start_pos} + 1, # +1 for ( |
1315 | + $sq->{len} - 1, # -1 for ) |
1316 | + "__subquery_$i"; |
1317 | + } |
1318 | + |
1319 | + return $query, map { $_->{sql} } grep { defined $_ } @subqueries; |
1320 | +} |
1321 | + |
1322 | +sub query_type { |
1323 | + my ( $self, $query, $qr ) = @_; |
1324 | + my ($type, undef) = $qr->distill_verbs($query); |
1325 | + my $rw; |
1326 | + if ( $type =~ m/^SELECT\b/ ) { |
1327 | + $rw = 'read'; |
1328 | + } |
1329 | + elsif ( $type =~ m/^$data_manip_stmts\b/ |
1330 | + || $type =~ m/^$data_def_stmts\b/ ) { |
1331 | + $rw = 'write' |
1332 | + } |
1333 | + |
1334 | + return { |
1335 | + type => $type, |
1336 | + rw => $rw, |
1337 | + } |
1338 | +} |
1339 | + |
1340 | +sub get_columns { |
1341 | + my ( $self, $query ) = @_; |
1342 | + my $cols = []; |
1343 | + return $cols unless $query; |
1344 | + my $cols_def; |
1345 | + |
1346 | + if ( $query =~ m/^SELECT/i ) { |
1347 | + $query =~ s/ |
1348 | + ^SELECT\s+ |
1349 | + (?:ALL |
1350 | + |DISTINCT |
1351 | + |DISTINCTROW |
1352 | + |HIGH_PRIORITY |
1353 | + |STRAIGHT_JOIN |
1354 | + |SQL_SMALL_RESULT |
1355 | + |SQL_BIG_RESULT |
1356 | + |SQL_BUFFER_RESULT |
1357 | + |SQL_CACHE |
1358 | + |SQL_NO_CACHE |
1359 | + |SQL_CALC_FOUND_ROWS |
1360 | + )\s+ |
1361 | + /SELECT /xgi; |
1362 | + ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i; |
1363 | + } |
1364 | + elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) { |
1365 | + ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i; |
1366 | + } |
1367 | + |
1368 | + PTDEBUG && _d('Columns:', $cols_def); |
1369 | + if ( $cols_def ) { |
1370 | + @$cols = split(',', $cols_def); |
1371 | + map { |
1372 | + my $col = $_; |
1373 | + $col = s/^\s+//g; |
1374 | + $col = s/\s+$//g; |
1375 | + $col; |
1376 | + } @$cols; |
1377 | + } |
1378 | + |
1379 | + return $cols; |
1380 | +} |
1381 | + |
1382 | +sub parse { |
1383 | + my ( $self, $query ) = @_; |
1384 | + return unless $query; |
1385 | + my $parsed = {}; |
1386 | + |
1387 | + $query =~ s/\n/ /g; |
1388 | + $query = $self->clean_query($query); |
1389 | + |
1390 | + $parsed->{query} = $query, |
1391 | + $parsed->{tables} = $self->get_aliases($query, 1); |
1392 | + $parsed->{columns} = $self->get_columns($query); |
1393 | + |
1394 | + my ($type) = $query =~ m/^(\w+)/; |
1395 | + $parsed->{type} = lc $type; |
1396 | + |
1397 | + |
1398 | + $parsed->{sub_queries} = []; |
1399 | + |
1400 | + return $parsed; |
1401 | +} |
1402 | + |
1403 | +sub extract_tables { |
1404 | + my ( $self, %args ) = @_; |
1405 | + my $query = $args{query}; |
1406 | + my $default_db = $args{default_db}; |
1407 | + my $q = $self->{Quoter} || $args{Quoter}; |
1408 | + return unless $query; |
1409 | + PTDEBUG && _d('Extracting tables'); |
1410 | + my @tables; |
1411 | + my %seen; |
1412 | + foreach my $db_tbl ( $self->get_tables($query) ) { |
1413 | + next unless $db_tbl; |
1414 | + next if $seen{$db_tbl}++; # Unique-ify for issue 337. |
1415 | + my ( $db, $tbl ) = $q->split_unquote($db_tbl); |
1416 | + push @tables, [ $db || $default_db, $tbl ]; |
1417 | + } |
1418 | + return @tables; |
1419 | +} |
1420 | + |
1421 | +sub trim_identifier { |
1422 | + my ($self, $str) = @_; |
1423 | + return unless defined $str; |
1424 | + $str =~ s/`//g; |
1425 | + $str =~ s/^\s+//; |
1426 | + $str =~ s/\s+$//; |
1427 | + return $str; |
1428 | +} |
1429 | + |
1430 | +sub _d { |
1431 | + my ($package, undef, $line) = caller 0; |
1432 | + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
1433 | + map { defined $_ ? $_ : 'undef' } |
1434 | + @_; |
1435 | + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
1436 | +} |
1437 | + |
1438 | +1; |
1439 | +} |
1440 | +# ########################################################################### |
1441 | +# End QueryParser package |
1442 | +# ########################################################################### |
1443 | + |
1444 | +# ########################################################################### |
1445 | +# QueryRewriter package |
1446 | +# This package is a copy without comments from the original. The original |
1447 | +# with comments and its test file can be found in the Bazaar repository at, |
1448 | +# lib/QueryRewriter.pm |
1449 | +# t/lib/QueryRewriter.t |
1450 | +# See https://launchpad.net/percona-toolkit for more information. |
1451 | +# ########################################################################### |
1452 | +{ |
1453 | +package QueryRewriter; |
1454 | + |
1455 | +use strict; |
1456 | +use warnings FATAL => 'all'; |
1457 | +use English qw(-no_match_vars); |
1458 | +use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
1459 | + |
1460 | +our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT |
1461 | + |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi; |
1462 | +my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly! |
1463 | +my $bal; |
1464 | +$bal = qr/ |
1465 | + \( |
1466 | + (?: |
1467 | + (?> [^()]+ ) # Non-parens without backtracking |
1468 | + | |
1469 | + (??{ $bal }) # Group with matching parens |
1470 | + )* |
1471 | + \) |
1472 | + /x; |
1473 | + |
1474 | +my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments |
1475 | +my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */ |
1476 | +my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */ |
1477 | +my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW |
1478 | + |
1479 | + |
1480 | +sub new { |
1481 | + my ( $class, %args ) = @_; |
1482 | + my $self = { %args }; |
1483 | + return bless $self, $class; |
1484 | +} |
1485 | + |
1486 | +sub strip_comments { |
1487 | + my ( $self, $query ) = @_; |
1488 | + return unless $query; |
1489 | + $query =~ s/$olc_re//go; |
1490 | + $query =~ s/$mlc_re//go; |
1491 | + if ( $query =~ m/$vlc_rf/i ) { # contains show + version |
1492 | + $query =~ s/$vlc_re//go; |
1493 | + } |
1494 | + return $query; |
1495 | +} |
1496 | + |
1497 | +sub shorten { |
1498 | + my ( $self, $query, $length ) = @_; |
1499 | + $query =~ s{ |
1500 | + \A( |
1501 | + (?:INSERT|REPLACE) |
1502 | + (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)? |
1503 | + (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\) |
1504 | + ) |
1505 | + \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)} |
1506 | + {$1 /*... omitted ...*/$2}xsi; |
1507 | + |
1508 | + return $query unless $query =~ m/IN\s*\(\s*(?!select)/i; |
1509 | + |
1510 | + my $last_length = 0; |
1511 | + my $query_length = length($query); |
1512 | + while ( |
1513 | + $length > 0 |
1514 | + && $query_length > $length |
1515 | + && $query_length < ( $last_length || $query_length + 1 ) |
1516 | + ) { |
1517 | + $last_length = $query_length; |
1518 | + $query =~ s{ |
1519 | + (\bIN\s*\() # The opening of an IN list |
1520 | + ([^\)]+) # Contents of the list, assuming no item contains paren |
1521 | + (?=\)) # Close of the list |
1522 | + } |
1523 | + { |
1524 | + $1 . __shorten($2) |
1525 | + }gexsi; |
1526 | + } |
1527 | + |
1528 | + return $query; |
1529 | +} |
1530 | + |
1531 | +sub __shorten { |
1532 | + my ( $snippet ) = @_; |
1533 | + my @vals = split(/,/, $snippet); |
1534 | + return $snippet unless @vals > 20; |
1535 | + my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items |
1536 | + return |
1537 | + join(',', @keep) |
1538 | + . "/*... omitted " |
1539 | + . scalar(@vals) |
1540 | + . " items ...*/"; |
1541 | +} |
1542 | + |
1543 | +sub fingerprint { |
1544 | + my ( $self, $query ) = @_; |
1545 | + |
1546 | + $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query |
1547 | + && return 'mysqldump'; |
1548 | + $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # pt-table-checksum, etc query |
1549 | + && return 'percona-toolkit'; |
1550 | + $query =~ m/\Aadministrator command: / |
1551 | + && return $query; |
1552 | + $query =~ m/\A\s*(call\s+\S+)\(/i |
1553 | + && return lc($1); # Warning! $1 used, be careful. |
1554 | + if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) { |
1555 | + $query = $beginning; # Shorten multi-value INSERT statements ASAP |
1556 | + } |
1557 | + |
1558 | + $query =~ s/$olc_re//go; |
1559 | + $query =~ s/$mlc_re//go; |
1560 | + $query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE |
1561 | + && return $query; |
1562 | + |
1563 | + $query =~ s/\\["']//g; # quoted strings |
1564 | + $query =~ s/".*?"/?/sg; # quoted strings |
1565 | + $query =~ s/'.*?'/?/sg; # quoted strings |
1566 | + |
1567 | + if ( $self->{match_md5_checksums} ) { |
1568 | + $query =~ s/([._-])[a-f0-9]{32}/$1?/g; |
1569 | + } |
1570 | + |
1571 | + if ( !$self->{match_embedded_numbers} ) { |
1572 | + $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g; |
1573 | + } |
1574 | + else { |
1575 | + $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g; |
1576 | + } |
1577 | + |
1578 | + if ( $self->{match_md5_checksums} ) { |
1579 | + $query =~ s/[xb+-]\?/?/g; |
1580 | + } |
1581 | + else { |
1582 | + $query =~ s/[xb.+-]\?/?/g; |
1583 | + } |
1584 | + |
1585 | + $query =~ s/\A\s+//; # Chop off leading whitespace |
1586 | + chomp $query; # Kill trailing whitespace |
1587 | + $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace |
1588 | + $query = lc $query; |
1589 | + $query =~ s/\bnull\b/?/g; # Get rid of NULLs |
1590 | + $query =~ s{ # Collapse IN and VALUES lists |
1591 | + \b(in|values?)(?:[\s,]*\([\s?,]*\))+ |
1592 | + } |
1593 | + {$1(?+)}gx; |
1594 | + $query =~ s{ # Collapse UNION |
1595 | + \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+ |
1596 | + } |
1597 | + {$1 /*repeat$2*/}xg; |
1598 | + $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT |
1599 | + |
1600 | + if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause |
1601 | + 1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query; |
1602 | + } |
1603 | + |
1604 | + return $query; |
1605 | +} |
1606 | + |
1607 | +sub distill_verbs { |
1608 | + my ( $self, $query ) = @_; |
1609 | + |
1610 | + $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1"; |
1611 | + $query =~ m/\A\s*use\s+/ && return "USE"; |
1612 | + $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK"; |
1613 | + $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1"; |
1614 | + |
1615 | + if ( $query =~ m/\Aadministrator command:/ ) { |
1616 | + $query =~ s/administrator command:/ADMIN/; |
1617 | + $query = uc $query; |
1618 | + return $query; |
1619 | + } |
1620 | + |
1621 | + $query = $self->strip_comments($query); |
1622 | + |
1623 | + if ( $query =~ m/\A\s*SHOW\s+/i ) { |
1624 | + PTDEBUG && _d($query); |
1625 | + |
1626 | + $query = uc $query; |
1627 | + $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g; |
1628 | + $query =~ s/\s+COUNT[^)]+\)//g; |
1629 | + |
1630 | + $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms; |
1631 | + |
1632 | + $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s; |
1633 | + $query =~ s/\s+/ /g; |
1634 | + PTDEBUG && _d($query); |
1635 | + return $query; |
1636 | + } |
1637 | + |
1638 | + eval $QueryParser::data_def_stmts; |
1639 | + eval $QueryParser::tbl_ident; |
1640 | + my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i; |
1641 | + if ( $dds) { |
1642 | + my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i; |
1643 | + $obj = uc $obj if $obj; |
1644 | + PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj); |
1645 | + my ($db_or_tbl) |
1646 | + = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i; |
1647 | + PTDEBUG && _d('Matches db or table:', $db_or_tbl); |
1648 | + return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl; |
1649 | + } |
1650 | + |
1651 | + my @verbs = $query =~ m/\b($verbs)\b/gio; |
1652 | + @verbs = do { |
1653 | + my $last = ''; |
1654 | + grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs; |
1655 | + }; |
1656 | + |
1657 | + if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) { |
1658 | + PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]); |
1659 | + my $union = grep { $_ eq 'UNION' } @verbs; |
1660 | + @verbs = $union ? qw(SELECT UNION) : qw(SELECT); |
1661 | + } |
1662 | + |
1663 | + my $verb_str = join(q{ }, @verbs); |
1664 | + return $verb_str; |
1665 | +} |
1666 | + |
1667 | +sub __distill_tables { |
1668 | + my ( $self, $query, $table, %args ) = @_; |
1669 | + my $qp = $args{QueryParser} || $self->{QueryParser}; |
1670 | + die "I need a QueryParser argument" unless $qp; |
1671 | + |
1672 | + my @tables = map { |
1673 | + $_ =~ s/`//g; |
1674 | + $_ =~ s/(_?)[0-9]+/$1?/g; |
1675 | + $_; |
1676 | + } grep { defined $_ } $qp->get_tables($query); |
1677 | + |
1678 | + push @tables, $table if $table; |
1679 | + |
1680 | + @tables = do { |
1681 | + my $last = ''; |
1682 | + grep { my $pass = $_ ne $last; $last = $_; $pass } @tables; |
1683 | + }; |
1684 | + |
1685 | + return @tables; |
1686 | +} |
1687 | + |
1688 | +sub distill { |
1689 | + my ( $self, $query, %args ) = @_; |
1690 | + |
1691 | + if ( $args{generic} ) { |
1692 | + my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/; |
1693 | + return '' unless $cmd; |
1694 | + $query = (uc $cmd) . ($arg ? " $arg" : ''); |
1695 | + } |
1696 | + else { |
1697 | + my ($verbs, $table) = $self->distill_verbs($query, %args); |
1698 | + |
1699 | + if ( $verbs && $verbs =~ m/^SHOW/ ) { |
1700 | + my %alias_for = qw( |
1701 | + SCHEMA DATABASE |
1702 | + KEYS INDEX |
1703 | + INDEXES INDEX |
1704 | + ); |
1705 | + map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for; |
1706 | + $query = $verbs; |
1707 | + } |
1708 | + else { |
1709 | + my @tables = $self->__distill_tables($query, $table, %args); |
1710 | + $query = join(q{ }, $verbs, @tables); |
1711 | + } |
1712 | + } |
1713 | + |
1714 | + if ( $args{trf} ) { |
1715 | + $query = $args{trf}->($query, %args); |
1716 | + } |
1717 | + |
1718 | + return $query; |
1719 | +} |
1720 | + |
1721 | +sub convert_to_select { |
1722 | + my ( $self, $query ) = @_; |
1723 | + return unless $query; |
1724 | + |
1725 | + return if $query =~ m/=\s*\(\s*SELECT /i; |
1726 | + |
1727 | + $query =~ s{ |
1728 | + \A.*? |
1729 | + update(?:\s+(?:low_priority|ignore))?\s+(.*?) |
1730 | + \s+set\b(.*?) |
1731 | + (?:\s*where\b(.*?))? |
1732 | + (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)? |
1733 | + \Z |
1734 | + } |
1735 | + {__update_to_select($1, $2, $3, $4)}exsi |
1736 | + || $query =~ s{ |
1737 | + \A.*? |
1738 | + (?:insert(?:\s+ignore)?|replace)\s+ |
1739 | + .*?\binto\b(.*?)\(([^\)]+)\)\s* |
1740 | + values?\s*(\(.*?\))\s* |
1741 | + (?:\blimit\b|on\s+duplicate\s+key.*)?\s* |
1742 | + \Z |
1743 | + } |
1744 | + {__insert_to_select($1, $2, $3)}exsi |
1745 | + || $query =~ s{ |
1746 | + \A.*? |
1747 | + (?:insert(?:\s+ignore)?|replace)\s+ |
1748 | + (?:.*?\binto)\b(.*?)\s* |
1749 | + set\s+(.*?)\s* |
1750 | + (?:\blimit\b|on\s+duplicate\s+key.*)?\s* |
1751 | + \Z |
1752 | + } |
1753 | + {__insert_to_select_with_set($1, $2)}exsi |
1754 | + || $query =~ s{ |
1755 | + \A.*? |
1756 | + delete\s+(.*?) |
1757 | + \bfrom\b(.*) |
1758 | + \Z |
1759 | + } |
1760 | + {__delete_to_select($1, $2)}exsi; |
1761 | + $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si; |
1762 | + $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism; |
1763 | + return $query; |
1764 | +} |
1765 | + |
1766 | +sub convert_select_list { |
1767 | + my ( $self, $query ) = @_; |
1768 | + $query =~ s{ |
1769 | + \A\s*select(.*?)\bfrom\b |
1770 | + } |
1771 | + {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi; |
1772 | + return $query; |
1773 | +} |
1774 | + |
1775 | +sub __delete_to_select { |
1776 | + my ( $delete, $join ) = @_; |
1777 | + if ( $join =~ m/\bjoin\b/ ) { |
1778 | + return "select 1 from $join"; |
1779 | + } |
1780 | + return "select * from $join"; |
1781 | +} |
1782 | + |
1783 | +sub __insert_to_select { |
1784 | + my ( $tbl, $cols, $vals ) = @_; |
1785 | + PTDEBUG && _d('Args:', @_); |
1786 | + my @cols = split(/,/, $cols); |
1787 | + PTDEBUG && _d('Cols:', @cols); |
1788 | + $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens |
1789 | + my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g; |
1790 | + PTDEBUG && _d('Vals:', @vals); |
1791 | + if ( @cols == @vals ) { |
1792 | + return "select * from $tbl where " |
1793 | + . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols)); |
1794 | + } |
1795 | + else { |
1796 | + return "select * from $tbl limit 1"; |
1797 | + } |
1798 | +} |
1799 | + |
1800 | +sub __insert_to_select_with_set { |
1801 | + my ( $from, $set ) = @_; |
1802 | + $set =~ s/,/ and /g; |
1803 | + return "select * from $from where $set "; |
1804 | +} |
1805 | + |
1806 | +sub __update_to_select { |
1807 | + my ( $from, $set, $where, $limit ) = @_; |
1808 | + return "select $set from $from " |
1809 | + . ( $where ? "where $where" : '' ) |
1810 | + . ( $limit ? " $limit " : '' ); |
1811 | +} |
1812 | + |
1813 | +sub wrap_in_derived { |
1814 | + my ( $self, $query ) = @_; |
1815 | + return unless $query; |
1816 | + return $query =~ m/\A\s*select/i |
1817 | + ? "select 1 from ($query) as x limit 1" |
1818 | + : $query; |
1819 | +} |
1820 | + |
1821 | +sub _d { |
1822 | + my ($package, undef, $line) = caller 0; |
1823 | + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } |
1824 | + map { defined $_ ? $_ : 'undef' } |
1825 | + @_; |
1826 | + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
1827 | +} |
1828 | + |
1829 | +1; |
1830 | +} |
1831 | +# ########################################################################### |
1832 | +# End QueryRewriter package |
1833 | +# ########################################################################### |
1834 | + |
1835 | +# ########################################################################### |
1836 | +# This is a combination of modules and programs in one -- a runnable module. |
1837 | +# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last |
1838 | +# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition. |
1839 | +# |
1840 | +# Check at the end of this package for the call to main() which actually runs |
1841 | +# the program. |
1842 | +# ########################################################################### |
1843 | +package pt_fingerprint; |
1844 | + |
1845 | +use English qw(-no_match_vars); |
1846 | +use Data::Dumper; |
1847 | +$Data::Dumper::Indent = 1; |
1848 | +$OUTPUT_AUTOFLUSH = 1; |
1849 | + |
1850 | +use constant MKDEBUG => $ENV{MKDEBUG} || 0; |
1851 | + |
1852 | +sub main { |
1853 | + @ARGV = @_; # set global ARGV for this package |
1854 | + |
1855 | + # ########################################################################## |
1856 | + # Get configuration information. |
1857 | + # ########################################################################## |
1858 | + my $o = new OptionParser(); |
1859 | + $o->get_specs(); |
1860 | + $o->get_opts(); |
1861 | + $o->usage_or_errors(); |
1862 | + |
1863 | + my $qp = new QueryParser(); |
1864 | + my $qr = new QueryRewriter( |
1865 | + QueryParser => $qp, |
1866 | + match_md5_checksums => $o->get('match-md5-checksums'), |
1867 | + match_embedded_numbers => $o->get('match-embedded-numbers'), |
1868 | + ); |
1869 | + |
1870 | + if ( $o->got('query') ) { |
1871 | + print $qr->fingerprint($o->get('query')), "\n"; |
1872 | + } |
1873 | + else { |
1874 | + local $INPUT_RECORD_SEPARATOR = ";\n"; |
1875 | + while ( <> ) { |
1876 | + my $query = $_; |
1877 | + chomp $query; |
1878 | + $query =~ s/^#.+$//mg; |
1879 | + $query =~ s/^\s+//; |
1880 | + next unless $query =~ m/^\w/; |
1881 | + print $qr->fingerprint($query), "\n"; |
1882 | + } |
1883 | + } |
1884 | +} |
1885 | + |
1886 | +# ############################################################################ |
1887 | +# Run the program. |
1888 | +# ############################################################################ |
1889 | +if ( !caller ) { exit main(@ARGV); } |
1890 | + |
1891 | +1; # Because this is a module as well as a script. |
1892 | + |
1893 | +# ############################################################################# |
1894 | +# Documentation. |
1895 | +# ############################################################################# |
1896 | + |
1897 | +=pod |
1898 | + |
1899 | +=head1 NAME |
1900 | + |
1901 | +pt-fingerprint - Convert queries into fingerprints. |
1902 | + |
1903 | +=head1 SYNOPSIS |
1904 | + |
1905 | +Usage: pt-fingerprint [OPTIONS] [FILES] |
1906 | + |
1907 | +pt-fingerprint converts queries into fingerprints. With the --query |
1908 | +option, converts the option's value into a fingerprint. With no options, treats |
1909 | +command-line arguments as FILEs and reads and converts semicolon-separated |
1910 | +queries from the FILEs. When FILE is -, it read standard input. |
1911 | + |
1912 | +Convert a single query: |
1913 | + |
1914 | + pt-fingerprint --query "select a, b, c from users where id = 500" |
1915 | + |
1916 | +Convert a file full of queries: |
1917 | + |
1918 | + pt-fingerprint /path/to/file.txt |
1919 | + |
1920 | +=head1 RISKS |
1921 | + |
1922 | +The following section is included to inform users about the potential risks, |
1923 | +whether known or unknown, of using this tool. The two main categories of risks |
1924 | +are those created by the nature of the tool (e.g. read-only tools vs. read-write |
1925 | +tools) and those created by bugs. |
1926 | + |
1927 | +The pt-fingerprint tool simply reads data and transforms it, so risks are |
1928 | +minimal. |
1929 | + |
1930 | +See also L<"BUGS"> for more information on filing bugs and getting help. |
1931 | + |
1932 | +=head1 DESCRIPTION |
1933 | + |
1934 | +A query fingerprint is the abstracted form of a query, which makes it possible |
1935 | +to group similar queries together. Abstracting a query removes literal values, |
1936 | +normalizes whitespace, and so on. For example, consider these two queries: |
1937 | + |
1938 | + SELECT name, password FROM user WHERE id='12823'; |
1939 | + select name, password from user |
1940 | + where id=5; |
1941 | + |
1942 | +Both of those queries will fingerprint to |
1943 | + |
1944 | + select name, password from user where id=? |
1945 | + |
1946 | +Once the query's fingerprint is known, we can then talk about a query as though |
1947 | +it represents all similar queries. |
1948 | + |
1949 | +Query fingerprinting accommodates a great many special cases, which have proven |
1950 | +necessary in the real world. For example, an IN list with 5 literals is really |
1951 | +equivalent to one with 4 literals, so lists of literals are collapsed to a |
1952 | +single one. If you want to understand more about how and why all of these cases |
1953 | +are handled, please review the test cases in the Subversion repository. If you |
1954 | +find something that is not fingerprinted properly, please submit a bug report |
1955 | +with a reproducible test case. Here is a list of transformations during |
1956 | +fingerprinting, which might not be exhaustive: |
1957 | + |
1958 | +=over |
1959 | + |
1960 | +=item * |
1961 | + |
1962 | +Group all SELECT queries from mysqldump together, even if they are against |
1963 | +different tables. Ditto for all of pt-table-checksum's checksum queries. |
1964 | + |
1965 | +=item * |
1966 | + |
1967 | +Shorten multi-value INSERT statements to a single VALUES() list. |
1968 | + |
1969 | +=item * |
1970 | + |
1971 | +Strip comments. |
1972 | + |
1973 | +=item * |
1974 | + |
1975 | +Abstract the databases in USE statements, so all USE statements are grouped |
1976 | +together. |
1977 | + |
1978 | +=item * |
1979 | + |
1980 | +Replace all literals, such as quoted strings. For efficiency, the code that |
1981 | +replaces literal numbers is somewhat non-selective, and might replace some |
1982 | +things as numbers when they really are not. Hexadecimal literals are also |
1983 | +replaced. NULL is treated as a literal. Numbers embedded in identifiers are |
1984 | +also replaced, so tables named similarly will be fingerprinted to the same |
1985 | +values (e.g. users_2009 and users_2010 will fingerprint identically). |
1986 | + |
1987 | +=item * |
1988 | + |
1989 | +Collapse all whitespace into a single space. |
1990 | + |
1991 | +=item * |
1992 | + |
1993 | +Lowercase the entire query. |
1994 | + |
1995 | +=item * |
1996 | + |
1997 | +Replace all literals inside of IN() and VALUES() lists with a single |
1998 | +placeholder, regardless of cardinality. |
1999 | + |
2000 | +=item * |
2001 | + |
2002 | +Collapse multiple identical UNION queries into a single one. |
2003 | + |
2004 | +=back |
2005 | + |
2006 | +=head1 OPTIONS |
2007 | + |
2008 | +This tool accepts additional command-line arguments. Refer to the |
2009 | +L<"SYNOPSIS"> and usage information for details. |
2010 | + |
2011 | +=over |
2012 | + |
2013 | +=item --config |
2014 | + |
2015 | +type: Array |
2016 | + |
2017 | +Read this comma-separated list of config files; if specified, this must be the |
2018 | +first option on the command line. |
2019 | + |
2020 | +=item --help |
2021 | + |
2022 | +Show help and exit. |
2023 | + |
2024 | +=item --match-embedded-numbers |
2025 | + |
2026 | +Match numbers embedded in words and replace as single values. This option |
2027 | +causes the tool to be more careful about matching numbers so that words |
2028 | +with numbers, like C<catch22> are matched and replaced as a single C<?> |
2029 | +placeholder. Otherwise the default number matching pattern will replace |
2030 | +C<catch22> as C<catch?>. |
2031 | + |
2032 | +This is helpful if database or table names contain numbers. |
2033 | + |
2034 | +=item --match-md5-checksums |
2035 | + |
2036 | +Match MD5 checksums and replace as single values. This option causes |
2037 | +the tool to be more careful about matching numbers so that MD5 checksums |
2038 | +like C<fbc5e685a5d3d45aa1d0347fdb7c4d35> are matched and replaced as a |
2039 | +single C<?> placeholder. Otherwise, the default number matching pattern will |
2040 | +replace C<fbc5e685a5d3d45aa1d0347fdb7c4d35> as C<fbc?>. |
2041 | + |
2042 | +=item --query |
2043 | + |
2044 | +type: string |
2045 | + |
2046 | +The query to convert into a fingerprint. |
2047 | + |
2048 | +=item --version |
2049 | + |
2050 | +Show version and exit. |
2051 | + |
2052 | +=back |
2053 | + |
2054 | +=head1 ENVIRONMENT |
2055 | + |
2056 | +The environment variable C<PTDEBUG> enables verbose debugging output to STDERR. |
2057 | +To enable debugging and capture all output to a file, run the tool like: |
2058 | + |
2059 | + PTDEBUG=1 pt-fingerprint ... > FILE 2>&1 |
2060 | + |
2061 | +Be careful: debugging output is voluminous and can generate several megabytes |
2062 | +of output. |
2063 | + |
2064 | +=head1 SYSTEM REQUIREMENTS |
2065 | + |
2066 | +You need Perl, DBI, DBD::mysql, and some core packages that ought to be |
2067 | +installed in any reasonably new version of Perl. |
2068 | + |
2069 | +=head1 BUGS |
2070 | + |
2071 | +For a list of known bugs, see L<http://www.percona.com/bugs/pt-fingerprint>. |
2072 | + |
2073 | +Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>. |
2074 | +Include the following information in your bug report: |
2075 | + |
2076 | +=over |
2077 | + |
2078 | +=item * Complete command-line used to run the tool |
2079 | + |
2080 | +=item * Tool L<"--version"> |
2081 | + |
2082 | +=item * MySQL version of all servers involved |
2083 | + |
2084 | +=item * Output from the tool including STDERR |
2085 | + |
2086 | +=item * Input files (log/dump/config files, etc.) |
2087 | + |
2088 | +=back |
2089 | + |
2090 | +If possible, include debugging output by running the tool with C<PTDEBUG>; |
2091 | +see L<"ENVIRONMENT">. |
2092 | + |
2093 | +=head1 DOWNLOADING |
2094 | + |
2095 | +Visit L<http://www.percona.com/software/percona-toolkit/> to download the |
2096 | +latest release of Percona Toolkit. Or, get the latest release from the |
2097 | +command line: |
2098 | + |
2099 | + wget percona.com/get/percona-toolkit.tar.gz |
2100 | + |
2101 | + wget percona.com/get/percona-toolkit.rpm |
2102 | + |
2103 | + wget percona.com/get/percona-toolkit.deb |
2104 | + |
2105 | +You can also get individual tools from the latest release: |
2106 | + |
2107 | + wget percona.com/get/TOOL |
2108 | + |
2109 | +Replace C<TOOL> with the name of any tool. |
2110 | + |
2111 | +=head1 AUTHORS |
2112 | + |
2113 | +Baron Schwartz and Daniel Nichter |
2114 | + |
2115 | +=head1 ABOUT PERCONA TOOLKIT |
2116 | + |
2117 | +This tool is part of Percona Toolkit, a collection of advanced command-line |
2118 | +tools developed by Percona for MySQL support and consulting. Percona Toolkit |
2119 | +was forked from two projects in June, 2011: Maatkit and Aspersa. Those |
2120 | +projects were created by Baron Schwartz and developed primarily by him and |
2121 | +Daniel Nichter, both of whom are employed by Percona. Visit |
2122 | +L<http://www.percona.com/software/> for more software developed by Percona. |
2123 | + |
2124 | +=head1 COPYRIGHT, LICENSE, AND WARRANTY |
2125 | + |
2126 | +This program is copyright 2011-2012 Percona Inc. |
2127 | +Feedback and improvements are welcome. |
2128 | + |
2129 | +THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED |
2130 | +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF |
2131 | +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
2132 | + |
2133 | +This program is free software; you can redistribute it and/or modify it under |
2134 | +the terms of the GNU General Public License as published by the Free Software |
2135 | +Foundation, version 2; OR the Perl Artistic License. On UNIX and similar |
2136 | +systems, you can issue `man perlgpl' or `man perlartistic' to read these |
2137 | +licenses. |
2138 | + |
2139 | +You should have received a copy of the GNU General Public License along with |
2140 | +this program; if not, write to the Free Software Foundation, Inc., 59 Temple |
2141 | +Place, Suite 330, Boston, MA 02111-1307 USA. |
2142 | + |
2143 | +=head1 VERSION |
2144 | + |
2145 | +pt-fingerprint 2.0.0 |
2146 | + |
2147 | +=cut |
2148 | |
2149 | === modified file 'lib/QueryRewriter.pm' |
2150 | --- lib/QueryRewriter.pm 2012-01-19 19:46:56 +0000 |
2151 | +++ lib/QueryRewriter.pm 2012-03-30 22:06:22 +0000 |
2152 | @@ -175,10 +175,30 @@ |
2153 | $query =~ s/\\["']//g; # quoted strings |
2154 | $query =~ s/".*?"/?/sg; # quoted strings |
2155 | $query =~ s/'.*?'/?/sg; # quoted strings |
2156 | - # This regex is extremely broad in its definition of what looks like a |
2157 | - # number. That is for speed. |
2158 | - $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers |
2159 | - $query =~ s/[xb.+-]\?/?/g; # Clean up leftovers |
2160 | + |
2161 | + # MD5 checksums which are always 32 hex chars |
2162 | + if ( $self->{match_md5_checksums} ) { |
2163 | + $query =~ s/([._-])[a-f0-9]{32}/$1?/g; |
2164 | + } |
2165 | + |
2166 | + # Things resembling numbers/hex. |
2167 | + if ( !$self->{match_embedded_numbers} ) { |
2168 | + # For speed, this regex is extremely broad in its definition |
2169 | + # of what looks like a number. |
2170 | + $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g; |
2171 | + } |
2172 | + else { |
2173 | + $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g; |
2174 | + } |
2175 | + |
2176 | + # Clean up leftovers |
2177 | + if ( $self->{match_md5_checksums} ) { |
2178 | + $query =~ s/[xb+-]\?/?/g; |
2179 | + } |
2180 | + else { |
2181 | + $query =~ s/[xb.+-]\?/?/g; |
2182 | + } |
2183 | + |
2184 | $query =~ s/\A\s+//; # Chop off leading whitespace |
2185 | chomp $query; # Kill trailing whitespace |
2186 | $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace |
2187 | |
2188 | === modified file 't/lib/QueryRewriter.t' |
2189 | --- t/lib/QueryRewriter.t 2012-03-06 13:56:08 +0000 |
2190 | +++ t/lib/QueryRewriter.t 2012-03-30 22:06:22 +0000 |
2191 | @@ -10,7 +10,7 @@ |
2192 | use strict; |
2193 | use warnings FATAL => 'all'; |
2194 | use English qw(-no_match_vars); |
2195 | -use Test::More tests => 266; |
2196 | +use Test::More tests => 271; |
2197 | |
2198 | use QueryRewriter; |
2199 | use QueryParser; |
2200 | @@ -349,6 +349,64 @@ |
2201 | "Fingerprint LOAD DATA INFILE" |
2202 | ); |
2203 | |
2204 | +# fingerprint MD5 checksums, 32 char hex strings. This is a |
2205 | +# special feature used by pt-fingerprint. |
2206 | +$qr = new QueryRewriter( |
2207 | + QueryParser => $qp, |
2208 | + match_md5_checksums => 1, |
2209 | +); |
2210 | + |
2211 | +is( |
2212 | + $qr->fingerprint( |
2213 | + "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1" |
2214 | + ), |
2215 | + "select * from db.?_temp where id=?", |
2216 | + "Fingerprint db.MD5_tbl" |
2217 | +); |
2218 | + |
2219 | +is( |
2220 | + $qr->fingerprint( |
2221 | + "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1" |
2222 | + ), |
2223 | + "select * from db.temp_? where id=?", |
2224 | + "Fingerprint db.tbl_MD5" |
2225 | +); |
2226 | + |
2227 | +$qr = new QueryRewriter( |
2228 | + QueryParser => $qp, |
2229 | + match_md5_checksums => 1, |
2230 | + match_embedded_numbers => 1, |
2231 | +); |
2232 | + |
2233 | +is( |
2234 | + $qr->fingerprint( |
2235 | + "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1" |
2236 | + ), |
2237 | + "select * from db.?_temp where id=?", |
2238 | + "Fingerprint db.MD5_tbl (with match_embedded_numbers)" |
2239 | +); |
2240 | + |
2241 | +is( |
2242 | + $qr->fingerprint( |
2243 | + "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1" |
2244 | + ), |
2245 | + "select * from db.temp_? where id=?", |
2246 | + "Fingerprint db.tbl_MD5 (with match_embedded_numbers)" |
2247 | +); |
2248 | + |
2249 | +$qr = new QueryRewriter( |
2250 | + QueryParser => $qp, |
2251 | + match_embedded_numbers => 1, |
2252 | +); |
2253 | + |
2254 | +is( |
2255 | + $qr->fingerprint( |
2256 | + "SELECT * FROM prices.rt_5min where id=1" |
2257 | + ), |
2258 | + "select * from prices.rt_5min where id=?", |
2259 | + "Fingerprint db.tbl<number>name (preserve number)" |
2260 | +); |
2261 | + |
2262 | # ############################################################################# |
2263 | # convert_to_select() |
2264 | # ############################################################################# |
2265 | |
2266 | === added directory 't/pt-fingerprint' |
2267 | === added file 't/pt-fingerprint/basics.t' |
2268 | --- t/pt-fingerprint/basics.t 1970-01-01 00:00:00 +0000 |
2269 | +++ t/pt-fingerprint/basics.t 2012-03-30 22:06:22 +0000 |
2270 | @@ -0,0 +1,101 @@ |
2271 | +#!/usr/bin/env perl |
2272 | + |
2273 | +BEGIN { |
2274 | + die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" |
2275 | + unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; |
2276 | + unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; |
2277 | +}; |
2278 | + |
2279 | +use strict; |
2280 | +use warnings FATAL => 'all'; |
2281 | +use English qw(-no_match_vars); |
2282 | +use Test::More tests => 7; |
2283 | + |
2284 | +use PerconaTest; |
2285 | +require "$trunk/bin/pt-fingerprint"; |
2286 | + |
2287 | +my @args = qw(); |
2288 | +my $output; |
2289 | +my $sample = "$trunk/t/pt-fingerprint/samples"; |
2290 | +my $pqd = "$trunk/bin/pt-query-digest"; |
2291 | + |
2292 | +$output = `$trunk/bin/pt-fingerprint --help`; |
2293 | +like( |
2294 | + $output, |
2295 | + qr/--help/, |
2296 | + "It runs" |
2297 | +); |
2298 | + |
2299 | + |
2300 | +sub test_query_file { |
2301 | + my ($file) = @_; |
2302 | + if ( ! -f "$sample/$file.fingerprint" ) { |
2303 | + `$pqd --fingerprint $sample/$file | awk '/Fingerprint/ { getline; print; exit; }' | sed -e 's/^#[ ]*//' > $sample/$file.fingerprint`; |
2304 | + diag("Created $sample/$file.fingerprint"); |
2305 | + } |
2306 | + chomp(my $expect = `cat $sample/$file.fingerprint`); |
2307 | + my $got = output( |
2308 | + sub { pt_fingerprint::main("$sample/$file") } |
2309 | + ); |
2310 | + chomp($got); |
2311 | + is( |
2312 | + $got, |
2313 | + $expect, |
2314 | + "$file fingerprint" |
2315 | + ); |
2316 | +}; |
2317 | + |
2318 | +opendir my $dir, $sample or die "Cannot open $sample: $OS_ERROR\n"; |
2319 | +while (defined(my $file = readdir($dir))) { |
2320 | + next unless $file =~ m/^query\d+$/; |
2321 | + test_query_file($file); |
2322 | +} |
2323 | +closedir $dir; |
2324 | + |
2325 | + |
2326 | +sub test_query { |
2327 | + my (%args) = @_; |
2328 | + my $query = $args{query}; |
2329 | + my $expect = $args{expect}; |
2330 | + my @ops = $args{ops} ? @{$args{ops}} : (); |
2331 | + |
2332 | + $output = output( |
2333 | + sub { pt_fingerprint::main('--query', $query, @ops) } |
2334 | + ); |
2335 | + chomp($output); |
2336 | + is( |
2337 | + $output, |
2338 | + $expect, |
2339 | + $args{name} ? $args{name} : "Fingerprint " . substr($query, 0, 70) |
2340 | + ); |
2341 | +} |
2342 | + |
2343 | +test_query( |
2344 | + query => 'select * from tbl where id=1', |
2345 | + expect => 'select * from tbl where id=?', |
2346 | +); |
2347 | + |
2348 | +test_query( |
2349 | + name => "Fingerprint MD5_word", |
2350 | + query => "SELECT c FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1", |
2351 | + expect => "select c from db.?_temp where id=?", |
2352 | + ops => [qw(--match-md5-checksums)], |
2353 | +); |
2354 | + |
2355 | +test_query( |
2356 | + name => "Fingerprint word_MD5", |
2357 | + query => "SELECT c FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1", |
2358 | + expect => "select c from db.temp_? where id=?", |
2359 | + ops => [qw(--match-md5-checksums)], |
2360 | +); |
2361 | + |
2362 | +test_query( |
2363 | + name => "Fingerprint word<number>", |
2364 | + query => "SELECT c FROM db.catch22 WHERE id is null", |
2365 | + expect => "select c from db.catch22 where id is ?", |
2366 | + ops => [qw(--match-embedded-numbers)], |
2367 | +); |
2368 | +# ############################################################################# |
2369 | +# Done. |
2370 | +# ############################################################################# |
2371 | +exit; |
2372 | |
2373 | === added directory 't/pt-fingerprint/samples' |
2374 | === added file 't/pt-fingerprint/samples/query001' |
2375 | --- t/pt-fingerprint/samples/query001 1970-01-01 00:00:00 +0000 |
2376 | +++ t/pt-fingerprint/samples/query001 2012-03-30 22:06:22 +0000 |
2377 | @@ -0,0 +1,2 @@ |
2378 | +# Query_time: 1 |
2379 | +select * from db.tbl where id=1 or foo='bar'; |
2380 | |
2381 | === added file 't/pt-fingerprint/samples/query001.fingerprint' |
2382 | --- t/pt-fingerprint/samples/query001.fingerprint 1970-01-01 00:00:00 +0000 |
2383 | +++ t/pt-fingerprint/samples/query001.fingerprint 2012-03-30 22:06:22 +0000 |
2384 | @@ -0,0 +1,1 @@ |
2385 | +select * from db.tbl where id=? or foo=? |
2386 | |
2387 | === added file 't/pt-fingerprint/samples/query002' |
2388 | --- t/pt-fingerprint/samples/query002 1970-01-01 00:00:00 +0000 |
2389 | +++ t/pt-fingerprint/samples/query002 2012-03-30 22:06:22 +0000 |
2390 | @@ -0,0 +1,2 @@ |
2391 | +# Query_time: 1 |
2392 | +select col from db.tbl1 where id in (1, 2, 3); |
2393 | |
2394 | === added file 't/pt-fingerprint/samples/query002.fingerprint' |
2395 | --- t/pt-fingerprint/samples/query002.fingerprint 1970-01-01 00:00:00 +0000 |
2396 | +++ t/pt-fingerprint/samples/query002.fingerprint 2012-03-30 22:06:22 +0000 |
2397 | @@ -0,0 +1,1 @@ |
2398 | +select col from db.tbl? where id in(?+) |