Merge lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint into lp:percona-toolkit/2.1

Proposed by Daniel Nichter
Status: Merged
Merged at revision: 224
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint
Merge into: lp:percona-toolkit/2.1
Diff against target: 2398 lines (+2333/-5)
8 files modified
bin/pt-fingerprint (+2143/-0)
lib/QueryRewriter.pm (+24/-4)
t/lib/QueryRewriter.t (+59/-1)
t/pt-fingerprint/basics.t (+101/-0)
t/pt-fingerprint/samples/query001 (+2/-0)
t/pt-fingerprint/samples/query001.fingerprint (+1/-0)
t/pt-fingerprint/samples/query002 (+2/-0)
t/pt-fingerprint/samples/query002.fingerprint (+1/-0)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint
Reviewer Review Type Date Requested Status
Daniel Nichter Approve
Review via email: mp+100250@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== added file 'bin/pt-fingerprint'
--- bin/pt-fingerprint 1970-01-01 00:00:00 +0000
+++ bin/pt-fingerprint 2012-03-30 22:06:22 +0000
@@ -0,0 +1,2143 @@
1#!/usr/bin/env perl
2
3# This program is part of Percona Toolkit: http://www.percona.com/software/
4# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
5# notices and disclaimers.
6
7use strict;
8use warnings FATAL => 'all';
9use constant MKDEBUG => $ENV{MKDEBUG} || 0;
10
11# ###########################################################################
12# OptionParser package
13# This package is a copy without comments from the original. The original
14# with comments and its test file can be found in the Bazaar repository at,
15# lib/OptionParser.pm
16# t/lib/OptionParser.t
17# See https://launchpad.net/percona-toolkit for more information.
18# ###########################################################################
19{
20package OptionParser;
21
22use strict;
23use warnings FATAL => 'all';
24use English qw(-no_match_vars);
25use constant PTDEBUG => $ENV{PTDEBUG} || 0;
26
27use List::Util qw(max);
28use Getopt::Long;
29
30my $POD_link_re = '[LC]<"?([^">]+)"?>';
31
32sub new {
33 my ( $class, %args ) = @_;
34 my @required_args = qw();
35 foreach my $arg ( @required_args ) {
36 die "I need a $arg argument" unless $args{$arg};
37 }
38
39 my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
40 $program_name ||= $PROGRAM_NAME;
41 my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.';
42
43 my %attributes = (
44 'type' => 1,
45 'short form' => 1,
46 'group' => 1,
47 'default' => 1,
48 'cumulative' => 1,
49 'negatable' => 1,
50 );
51
52 my $self = {
53 head1 => 'OPTIONS', # These args are used internally
54 skip_rules => 0, # to instantiate another Option-
55 item => '--(.*)', # Parser obj that parses the
56 attributes => \%attributes, # DSN OPTIONS section. Tools
57 parse_attributes => \&_parse_attribs, # don't tinker with these args.
58
59 %args,
60
61 strict => 1, # disabled by a special rule
62 program_name => $program_name,
63 opts => {},
64 got_opts => 0,
65 short_opts => {},
66 defaults => {},
67 groups => {},
68 allowed_groups => {},
69 errors => [],
70 rules => [], # desc of rules for --help
71 mutex => [], # rule: opts are mutually exclusive
72 atleast1 => [], # rule: at least one opt is required
73 disables => {}, # rule: opt disables other opts
74 defaults_to => {}, # rule: opt defaults to value of other opt
75 DSNParser => undef,
76 default_files => [
77 "/etc/percona-toolkit/percona-toolkit.conf",
78 "/etc/percona-toolkit/$program_name.conf",
79 "$home/.percona-toolkit.conf",
80 "$home/.$program_name.conf",
81 ],
82 types => {
83 string => 's', # standard Getopt type
84 int => 'i', # standard Getopt type
85 float => 'f', # standard Getopt type
86 Hash => 'H', # hash, formed from a comma-separated list
87 hash => 'h', # hash as above, but only if a value is given
88 Array => 'A', # array, similar to Hash
89 array => 'a', # array, similar to hash
90 DSN => 'd', # DSN
91 size => 'z', # size with kMG suffix (powers of 2^10)
92 time => 'm', # time, with an optional suffix of s/h/m/d
93 },
94 };
95
96 return bless $self, $class;
97}
98
99sub get_specs {
100 my ( $self, $file ) = @_;
101 $file ||= $self->{file} || __FILE__;
102 my @specs = $self->_pod_to_specs($file);
103 $self->_parse_specs(@specs);
104
105 open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
106 my $contents = do { local $/ = undef; <$fh> };
107 close $fh;
108 if ( $contents =~ m/^=head1 DSN OPTIONS/m ) {
109 PTDEBUG && _d('Parsing DSN OPTIONS');
110 my $dsn_attribs = {
111 dsn => 1,
112 copy => 1,
113 };
114 my $parse_dsn_attribs = sub {
115 my ( $self, $option, $attribs ) = @_;
116 map {
117 my $val = $attribs->{$_};
118 if ( $val ) {
119 $val = $val eq 'yes' ? 1
120 : $val eq 'no' ? 0
121 : $val;
122 $attribs->{$_} = $val;
123 }
124 } keys %$attribs;
125 return {
126 key => $option,
127 %$attribs,
128 };
129 };
130 my $dsn_o = new OptionParser(
131 description => 'DSN OPTIONS',
132 head1 => 'DSN OPTIONS',
133 dsn => 0, # XXX don't infinitely recurse!
134 item => '\* (.)', # key opts are a single character
135 skip_rules => 1, # no rules before opts
136 attributes => $dsn_attribs,
137 parse_attributes => $parse_dsn_attribs,
138 );
139 my @dsn_opts = map {
140 my $opts = {
141 key => $_->{spec}->{key},
142 dsn => $_->{spec}->{dsn},
143 copy => $_->{spec}->{copy},
144 desc => $_->{desc},
145 };
146 $opts;
147 } $dsn_o->_pod_to_specs($file);
148 $self->{DSNParser} = DSNParser->new(opts => \@dsn_opts);
149 }
150
151 if ( $contents =~ m/^=head1 VERSION\n\n^(.+)$/m ) {
152 $self->{version} = $1;
153 PTDEBUG && _d($self->{version});
154 }
155
156 return;
157}
158
159sub DSNParser {
160 my ( $self ) = @_;
161 return $self->{DSNParser};
162};
163
164sub get_defaults_files {
165 my ( $self ) = @_;
166 return @{$self->{default_files}};
167}
168
169sub _pod_to_specs {
170 my ( $self, $file ) = @_;
171 $file ||= $self->{file} || __FILE__;
172 open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR";
173
174 my @specs = ();
175 my @rules = ();
176 my $para;
177
178 local $INPUT_RECORD_SEPARATOR = '';
179 while ( $para = <$fh> ) {
180 next unless $para =~ m/^=head1 $self->{head1}/;
181 last;
182 }
183
184 while ( $para = <$fh> ) {
185 last if $para =~ m/^=over/;
186 next if $self->{skip_rules};
187 chomp $para;
188 $para =~ s/\s+/ /g;
189 $para =~ s/$POD_link_re/$1/go;
190 PTDEBUG && _d('Option rule:', $para);
191 push @rules, $para;
192 }
193
194 die "POD has no $self->{head1} section" unless $para;
195
196 do {
197 if ( my ($option) = $para =~ m/^=item $self->{item}/ ) {
198 chomp $para;
199 PTDEBUG && _d($para);
200 my %attribs;
201
202 $para = <$fh>; # read next paragraph, possibly attributes
203
204 if ( $para =~ m/: / ) { # attributes
205 $para =~ s/\s+\Z//g;
206 %attribs = map {
207 my ( $attrib, $val) = split(/: /, $_);
208 die "Unrecognized attribute for --$option: $attrib"
209 unless $self->{attributes}->{$attrib};
210 ($attrib, $val);
211 } split(/; /, $para);
212 if ( $attribs{'short form'} ) {
213 $attribs{'short form'} =~ s/-//;
214 }
215 $para = <$fh>; # read next paragraph, probably short help desc
216 }
217 else {
218 PTDEBUG && _d('Option has no attributes');
219 }
220
221 $para =~ s/\s+\Z//g;
222 $para =~ s/\s+/ /g;
223 $para =~ s/$POD_link_re/$1/go;
224
225 $para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s;
226 PTDEBUG && _d('Short help:', $para);
227
228 die "No description after option spec $option" if $para =~ m/^=item/;
229
230 if ( my ($base_option) = $option =~ m/^\[no\](.*)/ ) {
231 $option = $base_option;
232 $attribs{'negatable'} = 1;
233 }
234
235 push @specs, {
236 spec => $self->{parse_attributes}->($self, $option, \%attribs),
237 desc => $para
238 . (defined $attribs{default} ? " (default $attribs{default})" : ''),
239 group => ($attribs{'group'} ? $attribs{'group'} : 'default'),
240 };
241 }
242 while ( $para = <$fh> ) {
243 last unless $para;
244 if ( $para =~ m/^=head1/ ) {
245 $para = undef; # Can't 'last' out of a do {} block.
246 last;
247 }
248 last if $para =~ m/^=item /;
249 }
250 } while ( $para );
251
252 die "No valid specs in $self->{head1}" unless @specs;
253
254 close $fh;
255 return @specs, @rules;
256}
257
258sub _parse_specs {
259 my ( $self, @specs ) = @_;
260 my %disables; # special rule that requires deferred checking
261
262 foreach my $opt ( @specs ) {
263 if ( ref $opt ) { # It's an option spec, not a rule.
264 PTDEBUG && _d('Parsing opt spec:',
265 map { ($_, '=>', $opt->{$_}) } keys %$opt);
266
267 my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
268 if ( !$long ) {
269 die "Cannot parse long option from spec $opt->{spec}";
270 }
271 $opt->{long} = $long;
272
273 die "Duplicate long option --$long" if exists $self->{opts}->{$long};
274 $self->{opts}->{$long} = $opt;
275
276 if ( length $long == 1 ) {
277 PTDEBUG && _d('Long opt', $long, 'looks like short opt');
278 $self->{short_opts}->{$long} = $long;
279 }
280
281 if ( $short ) {
282 die "Duplicate short option -$short"
283 if exists $self->{short_opts}->{$short};
284 $self->{short_opts}->{$short} = $long;
285 $opt->{short} = $short;
286 }
287 else {
288 $opt->{short} = undef;
289 }
290
291 $opt->{is_negatable} = $opt->{spec} =~ m/!/ ? 1 : 0;
292 $opt->{is_cumulative} = $opt->{spec} =~ m/\+/ ? 1 : 0;
293 $opt->{is_required} = $opt->{desc} =~ m/required/ ? 1 : 0;
294
295 $opt->{group} ||= 'default';
296 $self->{groups}->{ $opt->{group} }->{$long} = 1;
297
298 $opt->{value} = undef;
299 $opt->{got} = 0;
300
301 my ( $type ) = $opt->{spec} =~ m/=(.)/;
302 $opt->{type} = $type;
303 PTDEBUG && _d($long, 'type:', $type);
304
305
306 $opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ );
307
308 if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) {
309 $self->{defaults}->{$long} = defined $def ? $def : 1;
310 PTDEBUG && _d($long, 'default:', $def);
311 }
312
313 if ( $long eq 'config' ) {
314 $self->{defaults}->{$long} = join(',', $self->get_defaults_files());
315 }
316
317 if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) {
318 $disables{$long} = $dis;
319 PTDEBUG && _d('Deferring check of disables rule for', $opt, $dis);
320 }
321
322 $self->{opts}->{$long} = $opt;
323 }
324 else { # It's an option rule, not a spec.
325 PTDEBUG && _d('Parsing rule:', $opt);
326 push @{$self->{rules}}, $opt;
327 my @participants = $self->_get_participants($opt);
328 my $rule_ok = 0;
329
330 if ( $opt =~ m/mutually exclusive|one and only one/ ) {
331 $rule_ok = 1;
332 push @{$self->{mutex}}, \@participants;
333 PTDEBUG && _d(@participants, 'are mutually exclusive');
334 }
335 if ( $opt =~ m/at least one|one and only one/ ) {
336 $rule_ok = 1;
337 push @{$self->{atleast1}}, \@participants;
338 PTDEBUG && _d(@participants, 'require at least one');
339 }
340 if ( $opt =~ m/default to/ ) {
341 $rule_ok = 1;
342 $self->{defaults_to}->{$participants[0]} = $participants[1];
343 PTDEBUG && _d($participants[0], 'defaults to', $participants[1]);
344 }
345 if ( $opt =~ m/restricted to option groups/ ) {
346 $rule_ok = 1;
347 my ($groups) = $opt =~ m/groups ([\w\s\,]+)/;
348 my @groups = split(',', $groups);
349 %{$self->{allowed_groups}->{$participants[0]}} = map {
350 s/\s+//;
351 $_ => 1;
352 } @groups;
353 }
354 if( $opt =~ m/accepts additional command-line arguments/ ) {
355 $rule_ok = 1;
356 $self->{strict} = 0;
357 PTDEBUG && _d("Strict mode disabled by rule");
358 }
359
360 die "Unrecognized option rule: $opt" unless $rule_ok;
361 }
362 }
363
364 foreach my $long ( keys %disables ) {
365 my @participants = $self->_get_participants($disables{$long});
366 $self->{disables}->{$long} = \@participants;
367 PTDEBUG && _d('Option', $long, 'disables', @participants);
368 }
369
370 return;
371}
372
373sub _get_participants {
374 my ( $self, $str ) = @_;
375 my @participants;
376 foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) {
377 die "Option --$long does not exist while processing rule $str"
378 unless exists $self->{opts}->{$long};
379 push @participants, $long;
380 }
381 PTDEBUG && _d('Participants for', $str, ':', @participants);
382 return @participants;
383}
384
385sub opts {
386 my ( $self ) = @_;
387 my %opts = %{$self->{opts}};
388 return %opts;
389}
390
391sub short_opts {
392 my ( $self ) = @_;
393 my %short_opts = %{$self->{short_opts}};
394 return %short_opts;
395}
396
397sub set_defaults {
398 my ( $self, %defaults ) = @_;
399 $self->{defaults} = {};
400 foreach my $long ( keys %defaults ) {
401 die "Cannot set default for nonexistent option $long"
402 unless exists $self->{opts}->{$long};
403 $self->{defaults}->{$long} = $defaults{$long};
404 PTDEBUG && _d('Default val for', $long, ':', $defaults{$long});
405 }
406 return;
407}
408
409sub get_defaults {
410 my ( $self ) = @_;
411 return $self->{defaults};
412}
413
414sub get_groups {
415 my ( $self ) = @_;
416 return $self->{groups};
417}
418
419sub _set_option {
420 my ( $self, $opt, $val ) = @_;
421 my $long = exists $self->{opts}->{$opt} ? $opt
422 : exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt}
423 : die "Getopt::Long gave a nonexistent option: $opt";
424
425 $opt = $self->{opts}->{$long};
426 if ( $opt->{is_cumulative} ) {
427 $opt->{value}++;
428 }
429 else {
430 $opt->{value} = $val;
431 }
432 $opt->{got} = 1;
433 PTDEBUG && _d('Got option', $long, '=', $val);
434}
435
436sub get_opts {
437 my ( $self ) = @_;
438
439 foreach my $long ( keys %{$self->{opts}} ) {
440 $self->{opts}->{$long}->{got} = 0;
441 $self->{opts}->{$long}->{value}
442 = exists $self->{defaults}->{$long} ? $self->{defaults}->{$long}
443 : $self->{opts}->{$long}->{is_cumulative} ? 0
444 : undef;
445 }
446 $self->{got_opts} = 0;
447
448 $self->{errors} = [];
449
450 if ( @ARGV && $ARGV[0] eq "--config" ) {
451 shift @ARGV;
452 $self->_set_option('config', shift @ARGV);
453 }
454 if ( $self->has('config') ) {
455 my @extra_args;
456 foreach my $filename ( split(',', $self->get('config')) ) {
457 eval {
458 push @extra_args, $self->_read_config_file($filename);
459 };
460 if ( $EVAL_ERROR ) {
461 if ( $self->got('config') ) {
462 die $EVAL_ERROR;
463 }
464 elsif ( PTDEBUG ) {
465 _d($EVAL_ERROR);
466 }
467 }
468 }
469 unshift @ARGV, @extra_args;
470 }
471
472 Getopt::Long::Configure('no_ignore_case', 'bundling');
473 GetOptions(
474 map { $_->{spec} => sub { $self->_set_option(@_); } }
475 grep { $_->{long} ne 'config' } # --config is handled specially above.
476 values %{$self->{opts}}
477 ) or $self->save_error('Error parsing options');
478
479 if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) {
480 if ( $self->{version} ) {
481 print $self->{version}, "\n";
482 }
483 else {
484 print "Error parsing version. See the VERSION section of the tool's documentation.\n";
485 }
486 exit 0;
487 }
488
489 if ( @ARGV && $self->{strict} ) {
490 $self->save_error("Unrecognized command-line options @ARGV");
491 }
492
493 foreach my $mutex ( @{$self->{mutex}} ) {
494 my @set = grep { $self->{opts}->{$_}->{got} } @$mutex;
495 if ( @set > 1 ) {
496 my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
497 @{$mutex}[ 0 .. scalar(@$mutex) - 2] )
498 . ' and --'.$self->{opts}->{$mutex->[-1]}->{long}
499 . ' are mutually exclusive.';
500 $self->save_error($err);
501 }
502 }
503
504 foreach my $required ( @{$self->{atleast1}} ) {
505 my @set = grep { $self->{opts}->{$_}->{got} } @$required;
506 if ( @set == 0 ) {
507 my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
508 @{$required}[ 0 .. scalar(@$required) - 2] )
509 .' or --'.$self->{opts}->{$required->[-1]}->{long};
510 $self->save_error("Specify at least one of $err");
511 }
512 }
513
514 $self->_check_opts( keys %{$self->{opts}} );
515 $self->{got_opts} = 1;
516 return;
517}
518
519sub _check_opts {
520 my ( $self, @long ) = @_;
521 my $long_last = scalar @long;
522 while ( @long ) {
523 foreach my $i ( 0..$#long ) {
524 my $long = $long[$i];
525 next unless $long;
526 my $opt = $self->{opts}->{$long};
527 if ( $opt->{got} ) {
528 if ( exists $self->{disables}->{$long} ) {
529 my @disable_opts = @{$self->{disables}->{$long}};
530 map { $self->{opts}->{$_}->{value} = undef; } @disable_opts;
531 PTDEBUG && _d('Unset options', @disable_opts,
532 'because', $long,'disables them');
533 }
534
535 if ( exists $self->{allowed_groups}->{$long} ) {
536
537 my @restricted_groups = grep {
538 !exists $self->{allowed_groups}->{$long}->{$_}
539 } keys %{$self->{groups}};
540
541 my @restricted_opts;
542 foreach my $restricted_group ( @restricted_groups ) {
543 RESTRICTED_OPT:
544 foreach my $restricted_opt (
545 keys %{$self->{groups}->{$restricted_group}} )
546 {
547 next RESTRICTED_OPT if $restricted_opt eq $long;
548 push @restricted_opts, $restricted_opt
549 if $self->{opts}->{$restricted_opt}->{got};
550 }
551 }
552
553 if ( @restricted_opts ) {
554 my $err;
555 if ( @restricted_opts == 1 ) {
556 $err = "--$restricted_opts[0]";
557 }
558 else {
559 $err = join(', ',
560 map { "--$self->{opts}->{$_}->{long}" }
561 grep { $_ }
562 @restricted_opts[0..scalar(@restricted_opts) - 2]
563 )
564 . ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long};
565 }
566 $self->save_error("--$long is not allowed with $err");
567 }
568 }
569
570 }
571 elsif ( $opt->{is_required} ) {
572 $self->save_error("Required option --$long must be specified");
573 }
574
575 $self->_validate_type($opt);
576 if ( $opt->{parsed} ) {
577 delete $long[$i];
578 }
579 else {
580 PTDEBUG && _d('Temporarily failed to parse', $long);
581 }
582 }
583
584 die "Failed to parse options, possibly due to circular dependencies"
585 if @long == $long_last;
586 $long_last = @long;
587 }
588
589 return;
590}
591
592sub _validate_type {
593 my ( $self, $opt ) = @_;
594 return unless $opt;
595
596 if ( !$opt->{type} ) {
597 $opt->{parsed} = 1;
598 return;
599 }
600
601 my $val = $opt->{value};
602
603 if ( $val && $opt->{type} eq 'm' ) { # type time
604 PTDEBUG && _d('Parsing option', $opt->{long}, 'as a time value');
605 my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
606 if ( !$suffix ) {
607 my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/;
608 $suffix = $s || 's';
609 PTDEBUG && _d('No suffix given; using', $suffix, 'for',
610 $opt->{long}, '(value:', $val, ')');
611 }
612 if ( $suffix =~ m/[smhd]/ ) {
613 $val = $suffix eq 's' ? $num # Seconds
614 : $suffix eq 'm' ? $num * 60 # Minutes
615 : $suffix eq 'h' ? $num * 3600 # Hours
616 : $num * 86400; # Days
617 $opt->{value} = ($prefix || '') . $val;
618 PTDEBUG && _d('Setting option', $opt->{long}, 'to', $val);
619 }
620 else {
621 $self->save_error("Invalid time suffix for --$opt->{long}");
622 }
623 }
624 elsif ( $val && $opt->{type} eq 'd' ) { # type DSN
625 PTDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN');
626 my $prev = {};
627 my $from_key = $self->{defaults_to}->{ $opt->{long} };
628 if ( $from_key ) {
629 PTDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN');
630 if ( $self->{opts}->{$from_key}->{parsed} ) {
631 $prev = $self->{opts}->{$from_key}->{value};
632 }
633 else {
634 PTDEBUG && _d('Cannot parse', $opt->{long}, 'until',
635 $from_key, 'parsed');
636 return;
637 }
638 }
639 my $defaults = $self->{DSNParser}->parse_options($self);
640 $opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults);
641 }
642 elsif ( $val && $opt->{type} eq 'z' ) { # type size
643 PTDEBUG && _d('Parsing option', $opt->{long}, 'as a size value');
644 $self->_parse_size($opt, $val);
645 }
646 elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) {
647 $opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) };
648 }
649 elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) {
650 $opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ];
651 }
652 else {
653 PTDEBUG && _d('Nothing to validate for option',
654 $opt->{long}, 'type', $opt->{type}, 'value', $val);
655 }
656
657 $opt->{parsed} = 1;
658 return;
659}
660
661sub get {
662 my ( $self, $opt ) = @_;
663 my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
664 die "Option $opt does not exist"
665 unless $long && exists $self->{opts}->{$long};
666 return $self->{opts}->{$long}->{value};
667}
668
669sub got {
670 my ( $self, $opt ) = @_;
671 my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
672 die "Option $opt does not exist"
673 unless $long && exists $self->{opts}->{$long};
674 return $self->{opts}->{$long}->{got};
675}
676
677sub has {
678 my ( $self, $opt ) = @_;
679 my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
680 return defined $long ? exists $self->{opts}->{$long} : 0;
681}
682
683sub set {
684 my ( $self, $opt, $val ) = @_;
685 my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
686 die "Option $opt does not exist"
687 unless $long && exists $self->{opts}->{$long};
688 $self->{opts}->{$long}->{value} = $val;
689 return;
690}
691
692sub save_error {
693 my ( $self, $error ) = @_;
694 push @{$self->{errors}}, $error;
695 return;
696}
697
698sub errors {
699 my ( $self ) = @_;
700 return $self->{errors};
701}
702
703sub usage {
704 my ( $self ) = @_;
705 warn "No usage string is set" unless $self->{usage}; # XXX
706 return "Usage: " . ($self->{usage} || '') . "\n";
707}
708
709sub descr {
710 my ( $self ) = @_;
711 warn "No description string is set" unless $self->{description}; # XXX
712 my $descr = ($self->{description} || $self->{program_name} || '')
713 . " For more details, please use the --help option, "
714 . "or try 'perldoc $PROGRAM_NAME' "
715 . "for complete documentation.";
716 $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g)
717 unless $ENV{DONT_BREAK_LINES};
718 $descr =~ s/ +$//mg;
719 return $descr;
720}
721
722sub usage_or_errors {
723 my ( $self, $file, $return ) = @_;
724 $file ||= $self->{file} || __FILE__;
725
726 if ( !$self->{description} || !$self->{usage} ) {
727 PTDEBUG && _d("Getting description and usage from SYNOPSIS in", $file);
728 my %synop = $self->_parse_synopsis($file);
729 $self->{description} ||= $synop{description};
730 $self->{usage} ||= $synop{usage};
731 PTDEBUG && _d("Description:", $self->{description},
732 "\nUsage:", $self->{usage});
733 }
734
735 if ( $self->{opts}->{help}->{got} ) {
736 print $self->print_usage() or die "Cannot print usage: $OS_ERROR";
737 exit 0 unless $return;
738 }
739 elsif ( scalar @{$self->{errors}} ) {
740 print $self->print_errors() or die "Cannot print errors: $OS_ERROR";
741 exit 0 unless $return;
742 }
743
744 return;
745}
746
747sub print_errors {
748 my ( $self ) = @_;
749 my $usage = $self->usage() . "\n";
750 if ( (my @errors = @{$self->{errors}}) ) {
751 $usage .= join("\n * ", 'Errors in command-line arguments:', @errors)
752 . "\n";
753 }
754 return $usage . "\n" . $self->descr();
755}
756
757sub print_usage {
758 my ( $self ) = @_;
759 die "Run get_opts() before print_usage()" unless $self->{got_opts};
760 my @opts = values %{$self->{opts}};
761
762 my $maxl = max(
763 map {
764 length($_->{long}) # option long name
765 + ($_->{is_negatable} ? 4 : 0) # "[no]" if opt is negatable
766 + ($_->{type} ? 2 : 0) # "=x" where x is the opt type
767 }
768 @opts);
769
770 my $maxs = max(0,
771 map {
772 length($_)
773 + ($self->{opts}->{$_}->{is_negatable} ? 4 : 0)
774 + ($self->{opts}->{$_}->{type} ? 2 : 0)
775 }
776 values %{$self->{short_opts}});
777
778 my $lcol = max($maxl, ($maxs + 3));
779 my $rcol = 80 - $lcol - 6;
780 my $rpad = ' ' x ( 80 - $rcol );
781
782 $maxs = max($lcol - 3, $maxs);
783
784 my $usage = $self->descr() . "\n" . $self->usage();
785
786 my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}};
787 push @groups, 'default';
788
789 foreach my $group ( reverse @groups ) {
790 $usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n";
791 foreach my $opt (
792 sort { $a->{long} cmp $b->{long} }
793 grep { $_->{group} eq $group }
794 @opts )
795 {
796 my $long = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long};
797 my $short = $opt->{short};
798 my $desc = $opt->{desc};
799
800 $long .= $opt->{type} ? "=$opt->{type}" : "";
801
802 if ( $opt->{type} && $opt->{type} eq 'm' ) {
803 my ($s) = $desc =~ m/\(suffix (.)\)/;
804 $s ||= 's';
805 $desc =~ s/\s+\(suffix .\)//;
806 $desc .= ". Optional suffix s=seconds, m=minutes, h=hours, "
807 . "d=days; if no suffix, $s is used.";
808 }
809 $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g);
810 $desc =~ s/ +$//mg;
811 if ( $short ) {
812 $usage .= sprintf(" --%-${maxs}s -%s %s\n", $long, $short, $desc);
813 }
814 else {
815 $usage .= sprintf(" --%-${lcol}s %s\n", $long, $desc);
816 }
817 }
818 }
819
820 $usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n";
821
822 if ( (my @rules = @{$self->{rules}}) ) {
823 $usage .= "\nRules:\n\n";
824 $usage .= join("\n", map { " $_" } @rules) . "\n";
825 }
826 if ( $self->{DSNParser} ) {
827 $usage .= "\n" . $self->{DSNParser}->usage();
828 }
829 $usage .= "\nOptions and values after processing arguments:\n\n";
830 foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) {
831 my $val = $opt->{value};
832 my $type = $opt->{type} || '';
833 my $bool = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
834 $val = $bool ? ( $val ? 'TRUE' : 'FALSE' )
835 : !defined $val ? '(No value)'
836 : $type eq 'd' ? $self->{DSNParser}->as_string($val)
837 : $type =~ m/H|h/ ? join(',', sort keys %$val)
838 : $type =~ m/A|a/ ? join(',', @$val)
839 : $val;
840 $usage .= sprintf(" --%-${lcol}s %s\n", $opt->{long}, $val);
841 }
842 return $usage;
843}
844
845sub prompt_noecho {
846 shift @_ if ref $_[0] eq __PACKAGE__;
847 my ( $prompt ) = @_;
848 local $OUTPUT_AUTOFLUSH = 1;
849 print $prompt
850 or die "Cannot print: $OS_ERROR";
851 my $response;
852 eval {
853 require Term::ReadKey;
854 Term::ReadKey::ReadMode('noecho');
855 chomp($response = <STDIN>);
856 Term::ReadKey::ReadMode('normal');
857 print "\n"
858 or die "Cannot print: $OS_ERROR";
859 };
860 if ( $EVAL_ERROR ) {
861 die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
862 }
863 return $response;
864}
865
866sub _read_config_file {
867 my ( $self, $filename ) = @_;
868 open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n";
869 my @args;
870 my $prefix = '--';
871 my $parse = 1;
872
873 LINE:
874 while ( my $line = <$fh> ) {
875 chomp $line;
876 next LINE if $line =~ m/^\s*(?:\#|\;|$)/;
877 $line =~ s/\s+#.*$//g;
878 $line =~ s/^\s+|\s+$//g;
879 if ( $line eq '--' ) {
880 $prefix = '';
881 $parse = 0;
882 next LINE;
883 }
884 if ( $parse
885 && (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/)
886 ) {
887 push @args, grep { defined $_ } ("$prefix$opt", $arg);
888 }
889 elsif ( $line =~ m/./ ) {
890 push @args, $line;
891 }
892 else {
893 die "Syntax error in file $filename at line $INPUT_LINE_NUMBER";
894 }
895 }
896 close $fh;
897 return @args;
898}
899
900sub read_para_after {
901 my ( $self, $file, $regex ) = @_;
902 open my $fh, "<", $file or die "Can't open $file: $OS_ERROR";
903 local $INPUT_RECORD_SEPARATOR = '';
904 my $para;
905 while ( $para = <$fh> ) {
906 next unless $para =~ m/^=pod$/m;
907 last;
908 }
909 while ( $para = <$fh> ) {
910 next unless $para =~ m/$regex/;
911 last;
912 }
913 $para = <$fh>;
914 chomp($para);
915 close $fh or die "Can't close $file: $OS_ERROR";
916 return $para;
917}
918
919sub clone {
920 my ( $self ) = @_;
921
922 my %clone = map {
923 my $hashref = $self->{$_};
924 my $val_copy = {};
925 foreach my $key ( keys %$hashref ) {
926 my $ref = ref $hashref->{$key};
927 $val_copy->{$key} = !$ref ? $hashref->{$key}
928 : $ref eq 'HASH' ? { %{$hashref->{$key}} }
929 : $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ]
930 : $hashref->{$key};
931 }
932 $_ => $val_copy;
933 } qw(opts short_opts defaults);
934
935 foreach my $scalar ( qw(got_opts) ) {
936 $clone{$scalar} = $self->{$scalar};
937 }
938
939 return bless \%clone;
940}
941
942sub _parse_size {
943 my ( $self, $opt, $val ) = @_;
944
945 if ( lc($val || '') eq 'null' ) {
946 PTDEBUG && _d('NULL size for', $opt->{long});
947 $opt->{value} = 'null';
948 return;
949 }
950
951 my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);
952 my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
953 if ( defined $num ) {
954 if ( $factor ) {
955 $num *= $factor_for{$factor};
956 PTDEBUG && _d('Setting option', $opt->{y},
957 'to num', $num, '* factor', $factor);
958 }
959 $opt->{value} = ($pre || '') . $num;
960 }
961 else {
962 $self->save_error("Invalid size for --$opt->{long}: $val");
963 }
964 return;
965}
966
967sub _parse_attribs {
968 my ( $self, $option, $attribs ) = @_;
969 my $types = $self->{types};
970 return $option
971 . ($attribs->{'short form'} ? '|' . $attribs->{'short form'} : '' )
972 . ($attribs->{'negatable'} ? '!' : '' )
973 . ($attribs->{'cumulative'} ? '+' : '' )
974 . ($attribs->{'type'} ? '=' . $types->{$attribs->{type}} : '' );
975}
976
977sub _parse_synopsis {
978 my ( $self, $file ) = @_;
979 $file ||= $self->{file} || __FILE__;
980 PTDEBUG && _d("Parsing SYNOPSIS in", $file);
981
982 local $INPUT_RECORD_SEPARATOR = ''; # read paragraphs
983 open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
984 my $para;
985 1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/;
986 die "$file does not contain a SYNOPSIS section" unless $para;
987 my @synop;
988 for ( 1..2 ) { # 1 for the usage, 2 for the description
989 my $para = <$fh>;
990 push @synop, $para;
991 }
992 close $fh;
993 PTDEBUG && _d("Raw SYNOPSIS text:", @synop);
994 my ($usage, $desc) = @synop;
995 die "The SYNOPSIS section in $file is not formatted properly"
996 unless $usage && $desc;
997
998 $usage =~ s/^\s*Usage:\s+(.+)/$1/;
999 chomp $usage;
1000
1001 $desc =~ s/\n/ /g;
1002 $desc =~ s/\s{2,}/ /g;
1003 $desc =~ s/\. ([A-Z][a-z])/. $1/g;
1004 $desc =~ s/\s+$//;
1005
1006 return (
1007 description => $desc,
1008 usage => $usage,
1009 );
1010};
1011
1012sub _d {
1013 my ($package, undef, $line) = caller 0;
1014 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1015 map { defined $_ ? $_ : 'undef' }
1016 @_;
1017 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1018}
1019
1020if ( PTDEBUG ) {
1021 print '# ', $^X, ' ', $], "\n";
1022 if ( my $uname = `uname -a` ) {
1023 $uname =~ s/\s+/ /g;
1024 print "# $uname\n";
1025 }
1026 print '# Arguments: ',
1027 join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n";
1028}
1029
10301;
1031}
1032# ###########################################################################
1033# End OptionParser package
1034# ###########################################################################
1035
1036# ###########################################################################
1037# QueryParser package
1038# This package is a copy without comments from the original. The original
1039# with comments and its test file can be found in the Bazaar repository at,
1040# lib/QueryParser.pm
1041# t/lib/QueryParser.t
1042# See https://launchpad.net/percona-toolkit for more information.
1043# ###########################################################################
1044{
1045package QueryParser;
1046
1047use strict;
1048use warnings FATAL => 'all';
1049use English qw(-no_match_vars);
1050use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1051
1052our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/;
1053our $tbl_regex = qr{
1054 \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names
1055 \b\s*
1056 \(? # Optional paren around tables
1057 ($tbl_ident
1058 (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )*
1059 )
1060 }xio;
1061our $has_derived = qr{
1062 \b(?:FROM|JOIN|,)
1063 \s*\(\s*SELECT
1064 }xi;
1065
1066our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i;
1067
1068our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i;
1069
1070sub new {
1071 my ( $class ) = @_;
1072 bless {}, $class;
1073}
1074
1075sub get_tables {
1076 my ( $self, $query ) = @_;
1077 return unless $query;
1078 PTDEBUG && _d('Getting tables for', $query);
1079
1080 my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i;
1081 if ( $ddl_stmt ) {
1082 PTDEBUG && _d('Special table type:', $ddl_stmt);
1083 $query =~ s/IF\s+(?:NOT\s+)?EXISTS//i;
1084 if ( $query =~ m/$ddl_stmt DATABASE\b/i ) {
1085 PTDEBUG && _d('Query alters a database, not a table');
1086 return ();
1087 }
1088 if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) {
1089 my ($select) = $query =~ m/\b(SELECT\b.+)/is;
1090 PTDEBUG && _d('CREATE TABLE ... SELECT:', $select);
1091 return $self->get_tables($select);
1092 }
1093 my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i;
1094 PTDEBUG && _d('Matches table:', $tbl);
1095 return ($tbl);
1096 }
1097
1098 $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
1099
1100 if ( $query =~ /^\s*LOCK TABLES/i ) {
1101 PTDEBUG && _d('Special table type: LOCK TABLES');
1102 $query =~ s/^(\s*LOCK TABLES\s+)//;
1103 $query =~ s/\s+(?:READ|WRITE|LOCAL)+\s*//g;
1104 PTDEBUG && _d('Locked tables:', $query);
1105 $query = "FROM $query";
1106 }
1107
1108 $query =~ s/\\["']//g; # quoted strings
1109 $query =~ s/".*?"/?/sg; # quoted strings
1110 $query =~ s/'.*?'/?/sg; # quoted strings
1111
1112 my @tables;
1113 foreach my $tbls ( $query =~ m/$tbl_regex/gio ) {
1114 PTDEBUG && _d('Match tables:', $tbls);
1115
1116 next if $tbls =~ m/\ASELECT\b/i;
1117
1118 foreach my $tbl ( split(',', $tbls) ) {
1119 $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio;
1120
1121 if ( $tbl !~ m/[a-zA-Z]/ ) {
1122 PTDEBUG && _d('Skipping suspicious table name:', $tbl);
1123 next;
1124 }
1125
1126 push @tables, $tbl;
1127 }
1128 }
1129 return @tables;
1130}
1131
1132sub has_derived_table {
1133 my ( $self, $query ) = @_;
1134 my $match = $query =~ m/$has_derived/;
1135 PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table');
1136 return $match;
1137}
1138
1139sub get_aliases {
1140 my ( $self, $query, $list ) = @_;
1141
1142 my $result = {
1143 DATABASE => {},
1144 TABLE => {},
1145 };
1146 return $result unless $query;
1147
1148 $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
1149
1150 $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig;
1151
1152 my @tbl_refs;
1153 my ($tbl_refs, $from) = $query =~ m{
1154 (
1155 (FROM|INTO|UPDATE)\b\s* # Keyword before table refs
1156 .+? # Table refs
1157 )
1158 (?:\s+|\z) # If the query does not end with the table
1159 (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs
1160 }ix;
1161
1162 if ( $tbl_refs ) {
1163
1164 if ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
1165 $tbl_refs =~ s/\([^\)]+\)\s*//;
1166 }
1167
1168 PTDEBUG && _d('tbl refs:', $tbl_refs);
1169
1170 my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i;
1171
1172 my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i;
1173
1174 $tbl_refs =~ s/ = /=/g;
1175
1176 while (
1177 $tbl_refs =~ m{
1178 $before_tbl\b\s*
1179 ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? )
1180 \s*$after_tbl
1181 }xgio )
1182 {
1183 my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3);
1184 PTDEBUG && _d('Match table:', $tbl_ref);
1185 push @tbl_refs, $tbl_ref;
1186 $alias = $self->trim_identifier($alias);
1187
1188 if ( $tbl_ref =~ m/^AS\s+\w+/i ) {
1189 PTDEBUG && _d('Subquery', $tbl_ref);
1190 $result->{TABLE}->{$alias} = undef;
1191 next;
1192 }
1193
1194 my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/;
1195 $db = $self->trim_identifier($db);
1196 $tbl = $self->trim_identifier($tbl);
1197 $result->{TABLE}->{$alias || $tbl} = $tbl;
1198 $result->{DATABASE}->{$tbl} = $db if $db;
1199 }
1200 }
1201 else {
1202 PTDEBUG && _d("No tables ref in", $query);
1203 }
1204
1205 if ( $list ) {
1206 return \@tbl_refs;
1207 }
1208 else {
1209 return $result;
1210 }
1211}
1212
1213sub split {
1214 my ( $self, $query ) = @_;
1215 return unless $query;
1216 $query = $self->clean_query($query);
1217 PTDEBUG && _d('Splitting', $query);
1218
1219 my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i;
1220
1221 my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query);
1222
1223 my @statements;
1224 if ( @split_statements == 1 ) {
1225 push @statements, $query;
1226 }
1227 else {
1228 for ( my $i = 0; $i <= $#split_statements; $i += 2 ) {
1229 push @statements, $split_statements[$i].$split_statements[$i+1];
1230
1231 if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) {
1232 $statements[-2] .= pop @statements;
1233 }
1234 }
1235 }
1236
1237 PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements);
1238 return @statements;
1239}
1240
1241sub clean_query {
1242 my ( $self, $query ) = @_;
1243 return unless $query;
1244 $query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */
1245 $query =~ s/^\s+//; # Remove leading spaces
1246 $query =~ s/\s+$//; # Remove trailing spaces
1247 $query =~ s/\s{2,}/ /g; # Remove extra spaces
1248 return $query;
1249}
1250
1251sub split_subquery {
1252 my ( $self, $query ) = @_;
1253 return unless $query;
1254 $query = $self->clean_query($query);
1255 $query =~ s/;$//;
1256
1257 my @subqueries;
1258 my $sqno = 0; # subquery number
1259 my $pos = 0;
1260 while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) {
1261 $pos = pos($query);
1262 my $word = $1;
1263 PTDEBUG && _d($word, $sqno);
1264 if ( $word =~ m/^\(?SELECT\b/i ) {
1265 my $start_pos = $pos - length($word) - 1;
1266 if ( $start_pos ) {
1267 $sqno++;
1268 PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos);
1269 $subqueries[$sqno] = {
1270 start_pos => $start_pos,
1271 end_pos => 0,
1272 len => 0,
1273 words => [$word],
1274 lp => 1, # left parentheses
1275 rp => 0, # right parentheses
1276 done => 0,
1277 };
1278 }
1279 else {
1280 PTDEBUG && _d('Main SELECT at pos 0');
1281 }
1282 }
1283 else {
1284 next unless $sqno; # next unless we're in a subquery
1285 PTDEBUG && _d('In subquery', $sqno);
1286 my $sq = $subqueries[$sqno];
1287 if ( $sq->{done} ) {
1288 PTDEBUG && _d('This subquery is done; SQL is for',
1289 ($sqno - 1 ? "subquery $sqno" : "the main SELECT"));
1290 next;
1291 }
1292 push @{$sq->{words}}, $word;
1293 my $lp = ($word =~ tr/\(//) || 0;
1294 my $rp = ($word =~ tr/\)//) || 0;
1295 PTDEBUG && _d('parentheses left', $lp, 'right', $rp);
1296 if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) {
1297 my $end_pos = $pos - 1;
1298 PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos);
1299 $sq->{end_pos} = $end_pos;
1300 $sq->{len} = $end_pos - $sq->{start_pos};
1301 }
1302 }
1303 }
1304
1305 for my $i ( 1..$#subqueries ) {
1306 my $sq = $subqueries[$i];
1307 next unless $sq;
1308 $sq->{sql} = join(' ', @{$sq->{words}});
1309 substr $query,
1310 $sq->{start_pos} + 1, # +1 for (
1311 $sq->{len} - 1, # -1 for )
1312 "__subquery_$i";
1313 }
1314
1315 return $query, map { $_->{sql} } grep { defined $_ } @subqueries;
1316}
1317
1318sub query_type {
1319 my ( $self, $query, $qr ) = @_;
1320 my ($type, undef) = $qr->distill_verbs($query);
1321 my $rw;
1322 if ( $type =~ m/^SELECT\b/ ) {
1323 $rw = 'read';
1324 }
1325 elsif ( $type =~ m/^$data_manip_stmts\b/
1326 || $type =~ m/^$data_def_stmts\b/ ) {
1327 $rw = 'write'
1328 }
1329
1330 return {
1331 type => $type,
1332 rw => $rw,
1333 }
1334}
1335
1336sub get_columns {
1337 my ( $self, $query ) = @_;
1338 my $cols = [];
1339 return $cols unless $query;
1340 my $cols_def;
1341
1342 if ( $query =~ m/^SELECT/i ) {
1343 $query =~ s/
1344 ^SELECT\s+
1345 (?:ALL
1346 |DISTINCT
1347 |DISTINCTROW
1348 |HIGH_PRIORITY
1349 |STRAIGHT_JOIN
1350 |SQL_SMALL_RESULT
1351 |SQL_BIG_RESULT
1352 |SQL_BUFFER_RESULT
1353 |SQL_CACHE
1354 |SQL_NO_CACHE
1355 |SQL_CALC_FOUND_ROWS
1356 )\s+
1357 /SELECT /xgi;
1358 ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i;
1359 }
1360 elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
1361 ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i;
1362 }
1363
1364 PTDEBUG && _d('Columns:', $cols_def);
1365 if ( $cols_def ) {
1366 @$cols = split(',', $cols_def);
1367 map {
1368 my $col = $_;
1369 $col = s/^\s+//g;
1370 $col = s/\s+$//g;
1371 $col;
1372 } @$cols;
1373 }
1374
1375 return $cols;
1376}
1377
1378sub parse {
1379 my ( $self, $query ) = @_;
1380 return unless $query;
1381 my $parsed = {};
1382
1383 $query =~ s/\n/ /g;
1384 $query = $self->clean_query($query);
1385
1386 $parsed->{query} = $query,
1387 $parsed->{tables} = $self->get_aliases($query, 1);
1388 $parsed->{columns} = $self->get_columns($query);
1389
1390 my ($type) = $query =~ m/^(\w+)/;
1391 $parsed->{type} = lc $type;
1392
1393
1394 $parsed->{sub_queries} = [];
1395
1396 return $parsed;
1397}
1398
1399sub extract_tables {
1400 my ( $self, %args ) = @_;
1401 my $query = $args{query};
1402 my $default_db = $args{default_db};
1403 my $q = $self->{Quoter} || $args{Quoter};
1404 return unless $query;
1405 PTDEBUG && _d('Extracting tables');
1406 my @tables;
1407 my %seen;
1408 foreach my $db_tbl ( $self->get_tables($query) ) {
1409 next unless $db_tbl;
1410 next if $seen{$db_tbl}++; # Unique-ify for issue 337.
1411 my ( $db, $tbl ) = $q->split_unquote($db_tbl);
1412 push @tables, [ $db || $default_db, $tbl ];
1413 }
1414 return @tables;
1415}
1416
1417sub trim_identifier {
1418 my ($self, $str) = @_;
1419 return unless defined $str;
1420 $str =~ s/`//g;
1421 $str =~ s/^\s+//;
1422 $str =~ s/\s+$//;
1423 return $str;
1424}
1425
1426sub _d {
1427 my ($package, undef, $line) = caller 0;
1428 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1429 map { defined $_ ? $_ : 'undef' }
1430 @_;
1431 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1432}
1433
14341;
1435}
1436# ###########################################################################
1437# End QueryParser package
1438# ###########################################################################
1439
1440# ###########################################################################
1441# QueryRewriter package
1442# This package is a copy without comments from the original. The original
1443# with comments and its test file can be found in the Bazaar repository at,
1444# lib/QueryRewriter.pm
1445# t/lib/QueryRewriter.t
1446# See https://launchpad.net/percona-toolkit for more information.
1447# ###########################################################################
1448{
1449package QueryRewriter;
1450
1451use strict;
1452use warnings FATAL => 'all';
1453use English qw(-no_match_vars);
1454use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1455
1456our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT
1457 |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi;
1458my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly!
1459my $bal;
1460$bal = qr/
1461 \(
1462 (?:
1463 (?> [^()]+ ) # Non-parens without backtracking
1464 |
1465 (??{ $bal }) # Group with matching parens
1466 )*
1467 \)
1468 /x;
1469
1470my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
1471my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
1472my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
1473my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
1474
1475
1476sub new {
1477 my ( $class, %args ) = @_;
1478 my $self = { %args };
1479 return bless $self, $class;
1480}
1481
1482sub strip_comments {
1483 my ( $self, $query ) = @_;
1484 return unless $query;
1485 $query =~ s/$olc_re//go;
1486 $query =~ s/$mlc_re//go;
1487 if ( $query =~ m/$vlc_rf/i ) { # contains show + version
1488 $query =~ s/$vlc_re//go;
1489 }
1490 return $query;
1491}
1492
1493sub shorten {
1494 my ( $self, $query, $length ) = @_;
1495 $query =~ s{
1496 \A(
1497 (?:INSERT|REPLACE)
1498 (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)?
1499 (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\)
1500 )
1501 \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)}
1502 {$1 /*... omitted ...*/$2}xsi;
1503
1504 return $query unless $query =~ m/IN\s*\(\s*(?!select)/i;
1505
1506 my $last_length = 0;
1507 my $query_length = length($query);
1508 while (
1509 $length > 0
1510 && $query_length > $length
1511 && $query_length < ( $last_length || $query_length + 1 )
1512 ) {
1513 $last_length = $query_length;
1514 $query =~ s{
1515 (\bIN\s*\() # The opening of an IN list
1516 ([^\)]+) # Contents of the list, assuming no item contains paren
1517 (?=\)) # Close of the list
1518 }
1519 {
1520 $1 . __shorten($2)
1521 }gexsi;
1522 }
1523
1524 return $query;
1525}
1526
1527sub __shorten {
1528 my ( $snippet ) = @_;
1529 my @vals = split(/,/, $snippet);
1530 return $snippet unless @vals > 20;
1531 my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items
1532 return
1533 join(',', @keep)
1534 . "/*... omitted "
1535 . scalar(@vals)
1536 . " items ...*/";
1537}
1538
1539sub fingerprint {
1540 my ( $self, $query ) = @_;
1541
1542 $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query
1543 && return 'mysqldump';
1544 $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # pt-table-checksum, etc query
1545 && return 'percona-toolkit';
1546 $query =~ m/\Aadministrator command: /
1547 && return $query;
1548 $query =~ m/\A\s*(call\s+\S+)\(/i
1549 && return lc($1); # Warning! $1 used, be careful.
1550 if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) {
1551 $query = $beginning; # Shorten multi-value INSERT statements ASAP
1552 }
1553
1554 $query =~ s/$olc_re//go;
1555 $query =~ s/$mlc_re//go;
1556 $query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
1557 && return $query;
1558
1559 $query =~ s/\\["']//g; # quoted strings
1560 $query =~ s/".*?"/?/sg; # quoted strings
1561 $query =~ s/'.*?'/?/sg; # quoted strings
1562
1563 if ( $self->{match_md5_checksums} ) {
1564 $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
1565 }
1566
1567 if ( !$self->{match_embedded_numbers} ) {
1568 $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
1569 }
1570 else {
1571 $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
1572 }
1573
1574 if ( $self->{match_md5_checksums} ) {
1575 $query =~ s/[xb+-]\?/?/g;
1576 }
1577 else {
1578 $query =~ s/[xb.+-]\?/?/g;
1579 }
1580
1581 $query =~ s/\A\s+//; # Chop off leading whitespace
1582 chomp $query; # Kill trailing whitespace
1583 $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace
1584 $query = lc $query;
1585 $query =~ s/\bnull\b/?/g; # Get rid of NULLs
1586 $query =~ s{ # Collapse IN and VALUES lists
1587 \b(in|values?)(?:[\s,]*\([\s?,]*\))+
1588 }
1589 {$1(?+)}gx;
1590 $query =~ s{ # Collapse UNION
1591 \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+
1592 }
1593 {$1 /*repeat$2*/}xg;
1594 $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
1595
1596 if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause
1597 1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query;
1598 }
1599
1600 return $query;
1601}
1602
1603sub distill_verbs {
1604 my ( $self, $query ) = @_;
1605
1606 $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1";
1607 $query =~ m/\A\s*use\s+/ && return "USE";
1608 $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
1609 $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
1610
1611 if ( $query =~ m/\Aadministrator command:/ ) {
1612 $query =~ s/administrator command:/ADMIN/;
1613 $query = uc $query;
1614 return $query;
1615 }
1616
1617 $query = $self->strip_comments($query);
1618
1619 if ( $query =~ m/\A\s*SHOW\s+/i ) {
1620 PTDEBUG && _d($query);
1621
1622 $query = uc $query;
1623 $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
1624 $query =~ s/\s+COUNT[^)]+\)//g;
1625
1626 $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
1627
1628 $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s;
1629 $query =~ s/\s+/ /g;
1630 PTDEBUG && _d($query);
1631 return $query;
1632 }
1633
1634 eval $QueryParser::data_def_stmts;
1635 eval $QueryParser::tbl_ident;
1636 my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
1637 if ( $dds) {
1638 my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
1639 $obj = uc $obj if $obj;
1640 PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
1641 my ($db_or_tbl)
1642 = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i;
1643 PTDEBUG && _d('Matches db or table:', $db_or_tbl);
1644 return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl;
1645 }
1646
1647 my @verbs = $query =~ m/\b($verbs)\b/gio;
1648 @verbs = do {
1649 my $last = '';
1650 grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs;
1651 };
1652
1653 if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) {
1654 PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]);
1655 my $union = grep { $_ eq 'UNION' } @verbs;
1656 @verbs = $union ? qw(SELECT UNION) : qw(SELECT);
1657 }
1658
1659 my $verb_str = join(q{ }, @verbs);
1660 return $verb_str;
1661}
1662
1663sub __distill_tables {
1664 my ( $self, $query, $table, %args ) = @_;
1665 my $qp = $args{QueryParser} || $self->{QueryParser};
1666 die "I need a QueryParser argument" unless $qp;
1667
1668 my @tables = map {
1669 $_ =~ s/`//g;
1670 $_ =~ s/(_?)[0-9]+/$1?/g;
1671 $_;
1672 } grep { defined $_ } $qp->get_tables($query);
1673
1674 push @tables, $table if $table;
1675
1676 @tables = do {
1677 my $last = '';
1678 grep { my $pass = $_ ne $last; $last = $_; $pass } @tables;
1679 };
1680
1681 return @tables;
1682}
1683
1684sub distill {
1685 my ( $self, $query, %args ) = @_;
1686
1687 if ( $args{generic} ) {
1688 my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/;
1689 return '' unless $cmd;
1690 $query = (uc $cmd) . ($arg ? " $arg" : '');
1691 }
1692 else {
1693 my ($verbs, $table) = $self->distill_verbs($query, %args);
1694
1695 if ( $verbs && $verbs =~ m/^SHOW/ ) {
1696 my %alias_for = qw(
1697 SCHEMA DATABASE
1698 KEYS INDEX
1699 INDEXES INDEX
1700 );
1701 map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
1702 $query = $verbs;
1703 }
1704 else {
1705 my @tables = $self->__distill_tables($query, $table, %args);
1706 $query = join(q{ }, $verbs, @tables);
1707 }
1708 }
1709
1710 if ( $args{trf} ) {
1711 $query = $args{trf}->($query, %args);
1712 }
1713
1714 return $query;
1715}
1716
1717sub convert_to_select {
1718 my ( $self, $query ) = @_;
1719 return unless $query;
1720
1721 return if $query =~ m/=\s*\(\s*SELECT /i;
1722
1723 $query =~ s{
1724 \A.*?
1725 update(?:\s+(?:low_priority|ignore))?\s+(.*?)
1726 \s+set\b(.*?)
1727 (?:\s*where\b(.*?))?
1728 (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)?
1729 \Z
1730 }
1731 {__update_to_select($1, $2, $3, $4)}exsi
1732 || $query =~ s{
1733 \A.*?
1734 (?:insert(?:\s+ignore)?|replace)\s+
1735 .*?\binto\b(.*?)\(([^\)]+)\)\s*
1736 values?\s*(\(.*?\))\s*
1737 (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
1738 \Z
1739 }
1740 {__insert_to_select($1, $2, $3)}exsi
1741 || $query =~ s{
1742 \A.*?
1743 (?:insert(?:\s+ignore)?|replace)\s+
1744 (?:.*?\binto)\b(.*?)\s*
1745 set\s+(.*?)\s*
1746 (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
1747 \Z
1748 }
1749 {__insert_to_select_with_set($1, $2)}exsi
1750 || $query =~ s{
1751 \A.*?
1752 delete\s+(.*?)
1753 \bfrom\b(.*)
1754 \Z
1755 }
1756 {__delete_to_select($1, $2)}exsi;
1757 $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si;
1758 $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism;
1759 return $query;
1760}
1761
1762sub convert_select_list {
1763 my ( $self, $query ) = @_;
1764 $query =~ s{
1765 \A\s*select(.*?)\bfrom\b
1766 }
1767 {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi;
1768 return $query;
1769}
1770
1771sub __delete_to_select {
1772 my ( $delete, $join ) = @_;
1773 if ( $join =~ m/\bjoin\b/ ) {
1774 return "select 1 from $join";
1775 }
1776 return "select * from $join";
1777}
1778
1779sub __insert_to_select {
1780 my ( $tbl, $cols, $vals ) = @_;
1781 PTDEBUG && _d('Args:', @_);
1782 my @cols = split(/,/, $cols);
1783 PTDEBUG && _d('Cols:', @cols);
1784 $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens
1785 my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g;
1786 PTDEBUG && _d('Vals:', @vals);
1787 if ( @cols == @vals ) {
1788 return "select * from $tbl where "
1789 . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols));
1790 }
1791 else {
1792 return "select * from $tbl limit 1";
1793 }
1794}
1795
1796sub __insert_to_select_with_set {
1797 my ( $from, $set ) = @_;
1798 $set =~ s/,/ and /g;
1799 return "select * from $from where $set ";
1800}
1801
1802sub __update_to_select {
1803 my ( $from, $set, $where, $limit ) = @_;
1804 return "select $set from $from "
1805 . ( $where ? "where $where" : '' )
1806 . ( $limit ? " $limit " : '' );
1807}
1808
1809sub wrap_in_derived {
1810 my ( $self, $query ) = @_;
1811 return unless $query;
1812 return $query =~ m/\A\s*select/i
1813 ? "select 1 from ($query) as x limit 1"
1814 : $query;
1815}
1816
1817sub _d {
1818 my ($package, undef, $line) = caller 0;
1819 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1820 map { defined $_ ? $_ : 'undef' }
1821 @_;
1822 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1823}
1824
18251;
1826}
1827# ###########################################################################
1828# End QueryRewriter package
1829# ###########################################################################
1830
1831# ###########################################################################
1832# This is a combination of modules and programs in one -- a runnable module.
1833# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
1834# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition.
1835#
1836# Check at the end of this package for the call to main() which actually runs
1837# the program.
1838# ###########################################################################
1839package pt_fingerprint;
1840
1841use English qw(-no_match_vars);
1842use Data::Dumper;
1843$Data::Dumper::Indent = 1;
1844$OUTPUT_AUTOFLUSH = 1;
1845
1846use constant MKDEBUG => $ENV{MKDEBUG} || 0;
1847
1848sub main {
1849 @ARGV = @_; # set global ARGV for this package
1850
1851 # ##########################################################################
1852 # Get configuration information.
1853 # ##########################################################################
1854 my $o = new OptionParser();
1855 $o->get_specs();
1856 $o->get_opts();
1857 $o->usage_or_errors();
1858
1859 my $qp = new QueryParser();
1860 my $qr = new QueryRewriter(
1861 QueryParser => $qp,
1862 match_md5_checksums => $o->get('match-md5-checksums'),
1863 match_embedded_numbers => $o->get('match-embedded-numbers'),
1864 );
1865
1866 if ( $o->got('query') ) {
1867 print $qr->fingerprint($o->get('query')), "\n";
1868 }
1869 else {
1870 local $INPUT_RECORD_SEPARATOR = ";\n";
1871 while ( <> ) {
1872 my $query = $_;
1873 chomp $query;
1874 $query =~ s/^#.+$//mg;
1875 $query =~ s/^\s+//;
1876 next unless $query =~ m/^\w/;
1877 print $qr->fingerprint($query), "\n";
1878 }
1879 }
1880}
1881
1882# ############################################################################
1883# Run the program.
1884# ############################################################################
1885if ( !caller ) { exit main(@ARGV); }
1886
18871; # Because this is a module as well as a script.
1888
1889# #############################################################################
1890# Documentation.
1891# #############################################################################
1892
1893=pod
1894
1895=head1 NAME
1896
1897pt-fingerprint - Convert queries into fingerprints.
1898
1899=head1 SYNOPSIS
1900
1901Usage: pt-fingerprint [OPTIONS] [FILES]
1902
1903pt-fingerprint converts queries into fingerprints. With the --query
1904option, converts the option's value into a fingerprint. With no options, treats
1905command-line arguments as FILEs and reads and converts semicolon-separated
1906queries from the FILEs. When FILE is -, it read standard input.
1907
1908Convert a single query:
1909
1910 pt-fingerprint --query "select a, b, c from users where id = 500"
1911
1912Convert a file full of queries:
1913
1914 pt-fingerprint /path/to/file.txt
1915
1916=head1 RISKS
1917
1918The following section is included to inform users about the potential risks,
1919whether known or unknown, of using this tool. The two main categories of risks
1920are those created by the nature of the tool (e.g. read-only tools vs. read-write
1921tools) and those created by bugs.
1922
1923The pt-fingerprint tool simply reads data and transforms it, so risks are
1924minimal.
1925
1926See also L<"BUGS"> for more information on filing bugs and getting help.
1927
1928=head1 DESCRIPTION
1929
1930A query fingerprint is the abstracted form of a query, which makes it possible
1931to group similar queries together. Abstracting a query removes literal values,
1932normalizes whitespace, and so on. For example, consider these two queries:
1933
1934 SELECT name, password FROM user WHERE id='12823';
1935 select name, password from user
1936 where id=5;
1937
1938Both of those queries will fingerprint to
1939
1940 select name, password from user where id=?
1941
1942Once the query's fingerprint is known, we can then talk about a query as though
1943it represents all similar queries.
1944
1945Query fingerprinting accommodates a great many special cases, which have proven
1946necessary in the real world. For example, an IN list with 5 literals is really
1947equivalent to one with 4 literals, so lists of literals are collapsed to a
1948single one. If you want to understand more about how and why all of these cases
1949are handled, please review the test cases in the Subversion repository. If you
1950find something that is not fingerprinted properly, please submit a bug report
1951with a reproducible test case. Here is a list of transformations during
1952fingerprinting, which might not be exhaustive:
1953
1954=over
1955
1956=item *
1957
1958Group all SELECT queries from mysqldump together, even if they are against
1959different tables. Ditto for all of pt-table-checksum's checksum queries.
1960
1961=item *
1962
1963Shorten multi-value INSERT statements to a single VALUES() list.
1964
1965=item *
1966
1967Strip comments.
1968
1969=item *
1970
1971Abstract the databases in USE statements, so all USE statements are grouped
1972together.
1973
1974=item *
1975
1976Replace all literals, such as quoted strings. For efficiency, the code that
1977replaces literal numbers is somewhat non-selective, and might replace some
1978things as numbers when they really are not. Hexadecimal literals are also
1979replaced. NULL is treated as a literal. Numbers embedded in identifiers are
1980also replaced, so tables named similarly will be fingerprinted to the same
1981values (e.g. users_2009 and users_2010 will fingerprint identically).
1982
1983=item *
1984
1985Collapse all whitespace into a single space.
1986
1987=item *
1988
1989Lowercase the entire query.
1990
1991=item *
1992
1993Replace all literals inside of IN() and VALUES() lists with a single
1994placeholder, regardless of cardinality.
1995
1996=item *
1997
1998Collapse multiple identical UNION queries into a single one.
1999
2000=back
2001
2002=head1 OPTIONS
2003
2004This tool accepts additional command-line arguments. Refer to the
2005L<"SYNOPSIS"> and usage information for details.
2006
2007=over
2008
2009=item --config
2010
2011type: Array
2012
2013Read this comma-separated list of config files; if specified, this must be the
2014first option on the command line.
2015
2016=item --help
2017
2018Show help and exit.
2019
2020=item --match-embedded-numbers
2021
2022Match numbers embedded in words and replace as single values. This option
2023causes the tool to be more careful about matching numbers so that words
2024with numbers, like C<catch22> are matched and replaced as a single C<?>
2025placeholder. Otherwise the default number matching pattern will replace
2026C<catch22> as C<catch?>.
2027
2028This is helpful if database or table names contain numbers.
2029
2030=item --match-md5-checksums
2031
2032Match MD5 checksums and replace as single values. This option causes
2033the tool to be more careful about matching numbers so that MD5 checksums
2034like C<fbc5e685a5d3d45aa1d0347fdb7c4d35> are matched and replaced as a
2035single C<?> placeholder. Otherwise, the default number matching pattern will
2036replace C<fbc5e685a5d3d45aa1d0347fdb7c4d35> as C<fbc?>.
2037
2038=item --query
2039
2040type: string
2041
2042The query to convert into a fingerprint.
2043
2044=item --version
2045
2046Show version and exit.
2047
2048=back
2049
2050=head1 ENVIRONMENT
2051
2052The environment variable C<PTDEBUG> enables verbose debugging output to STDERR.
2053To enable debugging and capture all output to a file, run the tool like:
2054
2055 PTDEBUG=1 pt-fingerprint ... > FILE 2>&1
2056
2057Be careful: debugging output is voluminous and can generate several megabytes
2058of output.
2059
2060=head1 SYSTEM REQUIREMENTS
2061
2062You need Perl, DBI, DBD::mysql, and some core packages that ought to be
2063installed in any reasonably new version of Perl.
2064
2065=head1 BUGS
2066
2067For a list of known bugs, see L<http://www.percona.com/bugs/pt-fingerprint>.
2068
2069Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>.
2070Include the following information in your bug report:
2071
2072=over
2073
2074=item * Complete command-line used to run the tool
2075
2076=item * Tool L<"--version">
2077
2078=item * MySQL version of all servers involved
2079
2080=item * Output from the tool including STDERR
2081
2082=item * Input files (log/dump/config files, etc.)
2083
2084=back
2085
2086If possible, include debugging output by running the tool with C<PTDEBUG>;
2087see L<"ENVIRONMENT">.
2088
2089=head1 DOWNLOADING
2090
2091Visit L<http://www.percona.com/software/percona-toolkit/> to download the
2092latest release of Percona Toolkit. Or, get the latest release from the
2093command line:
2094
2095 wget percona.com/get/percona-toolkit.tar.gz
2096
2097 wget percona.com/get/percona-toolkit.rpm
2098
2099 wget percona.com/get/percona-toolkit.deb
2100
2101You can also get individual tools from the latest release:
2102
2103 wget percona.com/get/TOOL
2104
2105Replace C<TOOL> with the name of any tool.
2106
2107=head1 AUTHORS
2108
2109Baron Schwartz and Daniel Nichter
2110
2111=head1 ABOUT PERCONA TOOLKIT
2112
2113This tool is part of Percona Toolkit, a collection of advanced command-line
2114tools developed by Percona for MySQL support and consulting. Percona Toolkit
2115was forked from two projects in June, 2011: Maatkit and Aspersa. Those
2116projects were created by Baron Schwartz and developed primarily by him and
2117Daniel Nichter, both of whom are employed by Percona. Visit
2118L<http://www.percona.com/software/> for more software developed by Percona.
2119
2120=head1 COPYRIGHT, LICENSE, AND WARRANTY
2121
2122This program is copyright 2011-2012 Percona Inc.
2123Feedback and improvements are welcome.
2124
2125THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
2126WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
2127MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2128
2129This program is free software; you can redistribute it and/or modify it under
2130the terms of the GNU General Public License as published by the Free Software
2131Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
2132systems, you can issue `man perlgpl' or `man perlartistic' to read these
2133licenses.
2134
2135You should have received a copy of the GNU General Public License along with
2136this program; if not, write to the Free Software Foundation, Inc., 59 Temple
2137Place, Suite 330, Boston, MA 02111-1307 USA.
2138
2139=head1 VERSION
2140
2141pt-fingerprint 2.0.0
2142
2143=cut
02144
=== modified file 'lib/QueryRewriter.pm'
--- lib/QueryRewriter.pm 2012-01-19 19:46:56 +0000
+++ lib/QueryRewriter.pm 2012-03-30 22:06:22 +0000
@@ -175,10 +175,30 @@
175 $query =~ s/\\["']//g; # quoted strings175 $query =~ s/\\["']//g; # quoted strings
176 $query =~ s/".*?"/?/sg; # quoted strings176 $query =~ s/".*?"/?/sg; # quoted strings
177 $query =~ s/'.*?'/?/sg; # quoted strings177 $query =~ s/'.*?'/?/sg; # quoted strings
178 # This regex is extremely broad in its definition of what looks like a178
179 # number. That is for speed.179 # MD5 checksums which are always 32 hex chars
180 $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers180 if ( $self->{match_md5_checksums} ) {
181 $query =~ s/[xb.+-]\?/?/g; # Clean up leftovers181 $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
182 }
183
184 # Things resembling numbers/hex.
185 if ( !$self->{match_embedded_numbers} ) {
186 # For speed, this regex is extremely broad in its definition
187 # of what looks like a number.
188 $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
189 }
190 else {
191 $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
192 }
193
194 # Clean up leftovers
195 if ( $self->{match_md5_checksums} ) {
196 $query =~ s/[xb+-]\?/?/g;
197 }
198 else {
199 $query =~ s/[xb.+-]\?/?/g;
200 }
201
182 $query =~ s/\A\s+//; # Chop off leading whitespace202 $query =~ s/\A\s+//; # Chop off leading whitespace
183 chomp $query; # Kill trailing whitespace203 chomp $query; # Kill trailing whitespace
184 $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace204 $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace
185205
=== modified file 't/lib/QueryRewriter.t'
--- t/lib/QueryRewriter.t 2012-03-06 13:56:08 +0000
+++ t/lib/QueryRewriter.t 2012-03-30 22:06:22 +0000
@@ -10,7 +10,7 @@
10use strict;10use strict;
11use warnings FATAL => 'all';11use warnings FATAL => 'all';
12use English qw(-no_match_vars);12use English qw(-no_match_vars);
13use Test::More tests => 266;13use Test::More tests => 271;
1414
15use QueryRewriter;15use QueryRewriter;
16use QueryParser;16use QueryParser;
@@ -349,6 +349,64 @@
349 "Fingerprint LOAD DATA INFILE"349 "Fingerprint LOAD DATA INFILE"
350);350);
351351
352# fingerprint MD5 checksums, 32 char hex strings. This is a
353# special feature used by pt-fingerprint.
354$qr = new QueryRewriter(
355 QueryParser => $qp,
356 match_md5_checksums => 1,
357);
358
359is(
360 $qr->fingerprint(
361 "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1"
362 ),
363 "select * from db.?_temp where id=?",
364 "Fingerprint db.MD5_tbl"
365);
366
367is(
368 $qr->fingerprint(
369 "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1"
370 ),
371 "select * from db.temp_? where id=?",
372 "Fingerprint db.tbl_MD5"
373);
374
375$qr = new QueryRewriter(
376 QueryParser => $qp,
377 match_md5_checksums => 1,
378 match_embedded_numbers => 1,
379);
380
381is(
382 $qr->fingerprint(
383 "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1"
384 ),
385 "select * from db.?_temp where id=?",
386 "Fingerprint db.MD5_tbl (with match_embedded_numbers)"
387);
388
389is(
390 $qr->fingerprint(
391 "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1"
392 ),
393 "select * from db.temp_? where id=?",
394 "Fingerprint db.tbl_MD5 (with match_embedded_numbers)"
395);
396
397$qr = new QueryRewriter(
398 QueryParser => $qp,
399 match_embedded_numbers => 1,
400);
401
402is(
403 $qr->fingerprint(
404 "SELECT * FROM prices.rt_5min where id=1"
405 ),
406 "select * from prices.rt_5min where id=?",
407 "Fingerprint db.tbl<number>name (preserve number)"
408);
409
352# #############################################################################410# #############################################################################
353# convert_to_select()411# convert_to_select()
354# #############################################################################412# #############################################################################
355413
=== added directory 't/pt-fingerprint'
=== added file 't/pt-fingerprint/basics.t'
--- t/pt-fingerprint/basics.t 1970-01-01 00:00:00 +0000
+++ t/pt-fingerprint/basics.t 2012-03-30 22:06:22 +0000
@@ -0,0 +1,101 @@
1#!/usr/bin/env perl
2
3BEGIN {
4 die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
5 unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
6 unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
7};
8
9use strict;
10use warnings FATAL => 'all';
11use English qw(-no_match_vars);
12use Test::More tests => 7;
13
14use PerconaTest;
15require "$trunk/bin/pt-fingerprint";
16
17my @args = qw();
18my $output;
19my $sample = "$trunk/t/pt-fingerprint/samples";
20my $pqd = "$trunk/bin/pt-query-digest";
21
22$output = `$trunk/bin/pt-fingerprint --help`;
23like(
24 $output,
25 qr/--help/,
26 "It runs"
27);
28
29
30sub test_query_file {
31 my ($file) = @_;
32 if ( ! -f "$sample/$file.fingerprint" ) {
33 `$pqd --fingerprint $sample/$file | awk '/Fingerprint/ { getline; print; exit; }' | sed -e 's/^#[ ]*//' > $sample/$file.fingerprint`;
34 diag("Created $sample/$file.fingerprint");
35 }
36 chomp(my $expect = `cat $sample/$file.fingerprint`);
37 my $got = output(
38 sub { pt_fingerprint::main("$sample/$file") }
39 );
40 chomp($got);
41 is(
42 $got,
43 $expect,
44 "$file fingerprint"
45 );
46};
47
48opendir my $dir, $sample or die "Cannot open $sample: $OS_ERROR\n";
49while (defined(my $file = readdir($dir))) {
50 next unless $file =~ m/^query\d+$/;
51 test_query_file($file);
52}
53closedir $dir;
54
55
56sub test_query {
57 my (%args) = @_;
58 my $query = $args{query};
59 my $expect = $args{expect};
60 my @ops = $args{ops} ? @{$args{ops}} : ();
61
62 $output = output(
63 sub { pt_fingerprint::main('--query', $query, @ops) }
64 );
65 chomp($output);
66 is(
67 $output,
68 $expect,
69 $args{name} ? $args{name} : "Fingerprint " . substr($query, 0, 70)
70 );
71}
72
73test_query(
74 query => 'select * from tbl where id=1',
75 expect => 'select * from tbl where id=?',
76);
77
78test_query(
79 name => "Fingerprint MD5_word",
80 query => "SELECT c FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1",
81 expect => "select c from db.?_temp where id=?",
82 ops => [qw(--match-md5-checksums)],
83);
84
85test_query(
86 name => "Fingerprint word_MD5",
87 query => "SELECT c FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1",
88 expect => "select c from db.temp_? where id=?",
89 ops => [qw(--match-md5-checksums)],
90);
91
92test_query(
93 name => "Fingerprint word<number>",
94 query => "SELECT c FROM db.catch22 WHERE id is null",
95 expect => "select c from db.catch22 where id is ?",
96 ops => [qw(--match-embedded-numbers)],
97);
98# #############################################################################
99# Done.
100# #############################################################################
101exit;
0102
=== added directory 't/pt-fingerprint/samples'
=== added file 't/pt-fingerprint/samples/query001'
--- t/pt-fingerprint/samples/query001 1970-01-01 00:00:00 +0000
+++ t/pt-fingerprint/samples/query001 2012-03-30 22:06:22 +0000
@@ -0,0 +1,2 @@
1# Query_time: 1
2select * from db.tbl where id=1 or foo='bar';
03
=== added file 't/pt-fingerprint/samples/query001.fingerprint'
--- t/pt-fingerprint/samples/query001.fingerprint 1970-01-01 00:00:00 +0000
+++ t/pt-fingerprint/samples/query001.fingerprint 2012-03-30 22:06:22 +0000
@@ -0,0 +1,1 @@
1select * from db.tbl where id=? or foo=?
02
=== added file 't/pt-fingerprint/samples/query002'
--- t/pt-fingerprint/samples/query002 1970-01-01 00:00:00 +0000
+++ t/pt-fingerprint/samples/query002 2012-03-30 22:06:22 +0000
@@ -0,0 +1,2 @@
1# Query_time: 1
2select col from db.tbl1 where id in (1, 2, 3);
03
=== added file 't/pt-fingerprint/samples/query002.fingerprint'
--- t/pt-fingerprint/samples/query002.fingerprint 1970-01-01 00:00:00 +0000
+++ t/pt-fingerprint/samples/query002.fingerprint 2012-03-30 22:06:22 +0000
@@ -0,0 +1,1 @@
1select col from db.tbl? where id in(?+)

Subscribers

People subscribed via source and target branches