Merge lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint into lp:percona-toolkit/2.1

Proposed by Daniel Nichter
Status: Merged
Merged at revision: 224
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint
Merge into: lp:percona-toolkit/2.1
Diff against target: 2398 lines (+2333/-5)
8 files modified
bin/pt-fingerprint (+2143/-0)
lib/QueryRewriter.pm (+24/-4)
t/lib/QueryRewriter.t (+59/-1)
t/pt-fingerprint/basics.t (+101/-0)
t/pt-fingerprint/samples/query001 (+2/-0)
t/pt-fingerprint/samples/query001.fingerprint (+1/-0)
t/pt-fingerprint/samples/query002 (+2/-0)
t/pt-fingerprint/samples/query002.fingerprint (+1/-0)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint
Reviewer Review Type Date Requested Status
Daniel Nichter Approve
Review via email: mp+100250@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'bin/pt-fingerprint'
2--- bin/pt-fingerprint 1970-01-01 00:00:00 +0000
3+++ bin/pt-fingerprint 2012-03-30 22:06:22 +0000
4@@ -0,0 +1,2143 @@
5+#!/usr/bin/env perl
6+
7+# This program is part of Percona Toolkit: http://www.percona.com/software/
8+# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
9+# notices and disclaimers.
10+
11+use strict;
12+use warnings FATAL => 'all';
13+use constant MKDEBUG => $ENV{MKDEBUG} || 0;
14+
15+# ###########################################################################
16+# OptionParser package
17+# This package is a copy without comments from the original. The original
18+# with comments and its test file can be found in the Bazaar repository at,
19+# lib/OptionParser.pm
20+# t/lib/OptionParser.t
21+# See https://launchpad.net/percona-toolkit for more information.
22+# ###########################################################################
23+{
24+package OptionParser;
25+
26+use strict;
27+use warnings FATAL => 'all';
28+use English qw(-no_match_vars);
29+use constant PTDEBUG => $ENV{PTDEBUG} || 0;
30+
31+use List::Util qw(max);
32+use Getopt::Long;
33+
34+my $POD_link_re = '[LC]<"?([^">]+)"?>';
35+
36+sub new {
37+ my ( $class, %args ) = @_;
38+ my @required_args = qw();
39+ foreach my $arg ( @required_args ) {
40+ die "I need a $arg argument" unless $args{$arg};
41+ }
42+
43+ my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
44+ $program_name ||= $PROGRAM_NAME;
45+ my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.';
46+
47+ my %attributes = (
48+ 'type' => 1,
49+ 'short form' => 1,
50+ 'group' => 1,
51+ 'default' => 1,
52+ 'cumulative' => 1,
53+ 'negatable' => 1,
54+ );
55+
56+ my $self = {
57+ head1 => 'OPTIONS', # These args are used internally
58+ skip_rules => 0, # to instantiate another Option-
59+ item => '--(.*)', # Parser obj that parses the
60+ attributes => \%attributes, # DSN OPTIONS section. Tools
61+ parse_attributes => \&_parse_attribs, # don't tinker with these args.
62+
63+ %args,
64+
65+ strict => 1, # disabled by a special rule
66+ program_name => $program_name,
67+ opts => {},
68+ got_opts => 0,
69+ short_opts => {},
70+ defaults => {},
71+ groups => {},
72+ allowed_groups => {},
73+ errors => [],
74+ rules => [], # desc of rules for --help
75+ mutex => [], # rule: opts are mutually exclusive
76+ atleast1 => [], # rule: at least one opt is required
77+ disables => {}, # rule: opt disables other opts
78+ defaults_to => {}, # rule: opt defaults to value of other opt
79+ DSNParser => undef,
80+ default_files => [
81+ "/etc/percona-toolkit/percona-toolkit.conf",
82+ "/etc/percona-toolkit/$program_name.conf",
83+ "$home/.percona-toolkit.conf",
84+ "$home/.$program_name.conf",
85+ ],
86+ types => {
87+ string => 's', # standard Getopt type
88+ int => 'i', # standard Getopt type
89+ float => 'f', # standard Getopt type
90+ Hash => 'H', # hash, formed from a comma-separated list
91+ hash => 'h', # hash as above, but only if a value is given
92+ Array => 'A', # array, similar to Hash
93+ array => 'a', # array, similar to hash
94+ DSN => 'd', # DSN
95+ size => 'z', # size with kMG suffix (powers of 2^10)
96+ time => 'm', # time, with an optional suffix of s/h/m/d
97+ },
98+ };
99+
100+ return bless $self, $class;
101+}
102+
103+sub get_specs {
104+ my ( $self, $file ) = @_;
105+ $file ||= $self->{file} || __FILE__;
106+ my @specs = $self->_pod_to_specs($file);
107+ $self->_parse_specs(@specs);
108+
109+ open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
110+ my $contents = do { local $/ = undef; <$fh> };
111+ close $fh;
112+ if ( $contents =~ m/^=head1 DSN OPTIONS/m ) {
113+ PTDEBUG && _d('Parsing DSN OPTIONS');
114+ my $dsn_attribs = {
115+ dsn => 1,
116+ copy => 1,
117+ };
118+ my $parse_dsn_attribs = sub {
119+ my ( $self, $option, $attribs ) = @_;
120+ map {
121+ my $val = $attribs->{$_};
122+ if ( $val ) {
123+ $val = $val eq 'yes' ? 1
124+ : $val eq 'no' ? 0
125+ : $val;
126+ $attribs->{$_} = $val;
127+ }
128+ } keys %$attribs;
129+ return {
130+ key => $option,
131+ %$attribs,
132+ };
133+ };
134+ my $dsn_o = new OptionParser(
135+ description => 'DSN OPTIONS',
136+ head1 => 'DSN OPTIONS',
137+ dsn => 0, # XXX don't infinitely recurse!
138+ item => '\* (.)', # key opts are a single character
139+ skip_rules => 1, # no rules before opts
140+ attributes => $dsn_attribs,
141+ parse_attributes => $parse_dsn_attribs,
142+ );
143+ my @dsn_opts = map {
144+ my $opts = {
145+ key => $_->{spec}->{key},
146+ dsn => $_->{spec}->{dsn},
147+ copy => $_->{spec}->{copy},
148+ desc => $_->{desc},
149+ };
150+ $opts;
151+ } $dsn_o->_pod_to_specs($file);
152+ $self->{DSNParser} = DSNParser->new(opts => \@dsn_opts);
153+ }
154+
155+ if ( $contents =~ m/^=head1 VERSION\n\n^(.+)$/m ) {
156+ $self->{version} = $1;
157+ PTDEBUG && _d($self->{version});
158+ }
159+
160+ return;
161+}
162+
163+sub DSNParser {
164+ my ( $self ) = @_;
165+ return $self->{DSNParser};
166+};
167+
168+sub get_defaults_files {
169+ my ( $self ) = @_;
170+ return @{$self->{default_files}};
171+}
172+
173+sub _pod_to_specs {
174+ my ( $self, $file ) = @_;
175+ $file ||= $self->{file} || __FILE__;
176+ open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR";
177+
178+ my @specs = ();
179+ my @rules = ();
180+ my $para;
181+
182+ local $INPUT_RECORD_SEPARATOR = '';
183+ while ( $para = <$fh> ) {
184+ next unless $para =~ m/^=head1 $self->{head1}/;
185+ last;
186+ }
187+
188+ while ( $para = <$fh> ) {
189+ last if $para =~ m/^=over/;
190+ next if $self->{skip_rules};
191+ chomp $para;
192+ $para =~ s/\s+/ /g;
193+ $para =~ s/$POD_link_re/$1/go;
194+ PTDEBUG && _d('Option rule:', $para);
195+ push @rules, $para;
196+ }
197+
198+ die "POD has no $self->{head1} section" unless $para;
199+
200+ do {
201+ if ( my ($option) = $para =~ m/^=item $self->{item}/ ) {
202+ chomp $para;
203+ PTDEBUG && _d($para);
204+ my %attribs;
205+
206+ $para = <$fh>; # read next paragraph, possibly attributes
207+
208+ if ( $para =~ m/: / ) { # attributes
209+ $para =~ s/\s+\Z//g;
210+ %attribs = map {
211+ my ( $attrib, $val) = split(/: /, $_);
212+ die "Unrecognized attribute for --$option: $attrib"
213+ unless $self->{attributes}->{$attrib};
214+ ($attrib, $val);
215+ } split(/; /, $para);
216+ if ( $attribs{'short form'} ) {
217+ $attribs{'short form'} =~ s/-//;
218+ }
219+ $para = <$fh>; # read next paragraph, probably short help desc
220+ }
221+ else {
222+ PTDEBUG && _d('Option has no attributes');
223+ }
224+
225+ $para =~ s/\s+\Z//g;
226+ $para =~ s/\s+/ /g;
227+ $para =~ s/$POD_link_re/$1/go;
228+
229+ $para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s;
230+ PTDEBUG && _d('Short help:', $para);
231+
232+ die "No description after option spec $option" if $para =~ m/^=item/;
233+
234+ if ( my ($base_option) = $option =~ m/^\[no\](.*)/ ) {
235+ $option = $base_option;
236+ $attribs{'negatable'} = 1;
237+ }
238+
239+ push @specs, {
240+ spec => $self->{parse_attributes}->($self, $option, \%attribs),
241+ desc => $para
242+ . (defined $attribs{default} ? " (default $attribs{default})" : ''),
243+ group => ($attribs{'group'} ? $attribs{'group'} : 'default'),
244+ };
245+ }
246+ while ( $para = <$fh> ) {
247+ last unless $para;
248+ if ( $para =~ m/^=head1/ ) {
249+ $para = undef; # Can't 'last' out of a do {} block.
250+ last;
251+ }
252+ last if $para =~ m/^=item /;
253+ }
254+ } while ( $para );
255+
256+ die "No valid specs in $self->{head1}" unless @specs;
257+
258+ close $fh;
259+ return @specs, @rules;
260+}
261+
262+sub _parse_specs {
263+ my ( $self, @specs ) = @_;
264+ my %disables; # special rule that requires deferred checking
265+
266+ foreach my $opt ( @specs ) {
267+ if ( ref $opt ) { # It's an option spec, not a rule.
268+ PTDEBUG && _d('Parsing opt spec:',
269+ map { ($_, '=>', $opt->{$_}) } keys %$opt);
270+
271+ my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
272+ if ( !$long ) {
273+ die "Cannot parse long option from spec $opt->{spec}";
274+ }
275+ $opt->{long} = $long;
276+
277+ die "Duplicate long option --$long" if exists $self->{opts}->{$long};
278+ $self->{opts}->{$long} = $opt;
279+
280+ if ( length $long == 1 ) {
281+ PTDEBUG && _d('Long opt', $long, 'looks like short opt');
282+ $self->{short_opts}->{$long} = $long;
283+ }
284+
285+ if ( $short ) {
286+ die "Duplicate short option -$short"
287+ if exists $self->{short_opts}->{$short};
288+ $self->{short_opts}->{$short} = $long;
289+ $opt->{short} = $short;
290+ }
291+ else {
292+ $opt->{short} = undef;
293+ }
294+
295+ $opt->{is_negatable} = $opt->{spec} =~ m/!/ ? 1 : 0;
296+ $opt->{is_cumulative} = $opt->{spec} =~ m/\+/ ? 1 : 0;
297+ $opt->{is_required} = $opt->{desc} =~ m/required/ ? 1 : 0;
298+
299+ $opt->{group} ||= 'default';
300+ $self->{groups}->{ $opt->{group} }->{$long} = 1;
301+
302+ $opt->{value} = undef;
303+ $opt->{got} = 0;
304+
305+ my ( $type ) = $opt->{spec} =~ m/=(.)/;
306+ $opt->{type} = $type;
307+ PTDEBUG && _d($long, 'type:', $type);
308+
309+
310+ $opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ );
311+
312+ if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) {
313+ $self->{defaults}->{$long} = defined $def ? $def : 1;
314+ PTDEBUG && _d($long, 'default:', $def);
315+ }
316+
317+ if ( $long eq 'config' ) {
318+ $self->{defaults}->{$long} = join(',', $self->get_defaults_files());
319+ }
320+
321+ if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) {
322+ $disables{$long} = $dis;
323+ PTDEBUG && _d('Deferring check of disables rule for', $opt, $dis);
324+ }
325+
326+ $self->{opts}->{$long} = $opt;
327+ }
328+ else { # It's an option rule, not a spec.
329+ PTDEBUG && _d('Parsing rule:', $opt);
330+ push @{$self->{rules}}, $opt;
331+ my @participants = $self->_get_participants($opt);
332+ my $rule_ok = 0;
333+
334+ if ( $opt =~ m/mutually exclusive|one and only one/ ) {
335+ $rule_ok = 1;
336+ push @{$self->{mutex}}, \@participants;
337+ PTDEBUG && _d(@participants, 'are mutually exclusive');
338+ }
339+ if ( $opt =~ m/at least one|one and only one/ ) {
340+ $rule_ok = 1;
341+ push @{$self->{atleast1}}, \@participants;
342+ PTDEBUG && _d(@participants, 'require at least one');
343+ }
344+ if ( $opt =~ m/default to/ ) {
345+ $rule_ok = 1;
346+ $self->{defaults_to}->{$participants[0]} = $participants[1];
347+ PTDEBUG && _d($participants[0], 'defaults to', $participants[1]);
348+ }
349+ if ( $opt =~ m/restricted to option groups/ ) {
350+ $rule_ok = 1;
351+ my ($groups) = $opt =~ m/groups ([\w\s\,]+)/;
352+ my @groups = split(',', $groups);
353+ %{$self->{allowed_groups}->{$participants[0]}} = map {
354+ s/\s+//;
355+ $_ => 1;
356+ } @groups;
357+ }
358+ if( $opt =~ m/accepts additional command-line arguments/ ) {
359+ $rule_ok = 1;
360+ $self->{strict} = 0;
361+ PTDEBUG && _d("Strict mode disabled by rule");
362+ }
363+
364+ die "Unrecognized option rule: $opt" unless $rule_ok;
365+ }
366+ }
367+
368+ foreach my $long ( keys %disables ) {
369+ my @participants = $self->_get_participants($disables{$long});
370+ $self->{disables}->{$long} = \@participants;
371+ PTDEBUG && _d('Option', $long, 'disables', @participants);
372+ }
373+
374+ return;
375+}
376+
377+sub _get_participants {
378+ my ( $self, $str ) = @_;
379+ my @participants;
380+ foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) {
381+ die "Option --$long does not exist while processing rule $str"
382+ unless exists $self->{opts}->{$long};
383+ push @participants, $long;
384+ }
385+ PTDEBUG && _d('Participants for', $str, ':', @participants);
386+ return @participants;
387+}
388+
389+sub opts {
390+ my ( $self ) = @_;
391+ my %opts = %{$self->{opts}};
392+ return %opts;
393+}
394+
395+sub short_opts {
396+ my ( $self ) = @_;
397+ my %short_opts = %{$self->{short_opts}};
398+ return %short_opts;
399+}
400+
401+sub set_defaults {
402+ my ( $self, %defaults ) = @_;
403+ $self->{defaults} = {};
404+ foreach my $long ( keys %defaults ) {
405+ die "Cannot set default for nonexistent option $long"
406+ unless exists $self->{opts}->{$long};
407+ $self->{defaults}->{$long} = $defaults{$long};
408+ PTDEBUG && _d('Default val for', $long, ':', $defaults{$long});
409+ }
410+ return;
411+}
412+
413+sub get_defaults {
414+ my ( $self ) = @_;
415+ return $self->{defaults};
416+}
417+
418+sub get_groups {
419+ my ( $self ) = @_;
420+ return $self->{groups};
421+}
422+
423+sub _set_option {
424+ my ( $self, $opt, $val ) = @_;
425+ my $long = exists $self->{opts}->{$opt} ? $opt
426+ : exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt}
427+ : die "Getopt::Long gave a nonexistent option: $opt";
428+
429+ $opt = $self->{opts}->{$long};
430+ if ( $opt->{is_cumulative} ) {
431+ $opt->{value}++;
432+ }
433+ else {
434+ $opt->{value} = $val;
435+ }
436+ $opt->{got} = 1;
437+ PTDEBUG && _d('Got option', $long, '=', $val);
438+}
439+
440+sub get_opts {
441+ my ( $self ) = @_;
442+
443+ foreach my $long ( keys %{$self->{opts}} ) {
444+ $self->{opts}->{$long}->{got} = 0;
445+ $self->{opts}->{$long}->{value}
446+ = exists $self->{defaults}->{$long} ? $self->{defaults}->{$long}
447+ : $self->{opts}->{$long}->{is_cumulative} ? 0
448+ : undef;
449+ }
450+ $self->{got_opts} = 0;
451+
452+ $self->{errors} = [];
453+
454+ if ( @ARGV && $ARGV[0] eq "--config" ) {
455+ shift @ARGV;
456+ $self->_set_option('config', shift @ARGV);
457+ }
458+ if ( $self->has('config') ) {
459+ my @extra_args;
460+ foreach my $filename ( split(',', $self->get('config')) ) {
461+ eval {
462+ push @extra_args, $self->_read_config_file($filename);
463+ };
464+ if ( $EVAL_ERROR ) {
465+ if ( $self->got('config') ) {
466+ die $EVAL_ERROR;
467+ }
468+ elsif ( PTDEBUG ) {
469+ _d($EVAL_ERROR);
470+ }
471+ }
472+ }
473+ unshift @ARGV, @extra_args;
474+ }
475+
476+ Getopt::Long::Configure('no_ignore_case', 'bundling');
477+ GetOptions(
478+ map { $_->{spec} => sub { $self->_set_option(@_); } }
479+ grep { $_->{long} ne 'config' } # --config is handled specially above.
480+ values %{$self->{opts}}
481+ ) or $self->save_error('Error parsing options');
482+
483+ if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) {
484+ if ( $self->{version} ) {
485+ print $self->{version}, "\n";
486+ }
487+ else {
488+ print "Error parsing version. See the VERSION section of the tool's documentation.\n";
489+ }
490+ exit 0;
491+ }
492+
493+ if ( @ARGV && $self->{strict} ) {
494+ $self->save_error("Unrecognized command-line options @ARGV");
495+ }
496+
497+ foreach my $mutex ( @{$self->{mutex}} ) {
498+ my @set = grep { $self->{opts}->{$_}->{got} } @$mutex;
499+ if ( @set > 1 ) {
500+ my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
501+ @{$mutex}[ 0 .. scalar(@$mutex) - 2] )
502+ . ' and --'.$self->{opts}->{$mutex->[-1]}->{long}
503+ . ' are mutually exclusive.';
504+ $self->save_error($err);
505+ }
506+ }
507+
508+ foreach my $required ( @{$self->{atleast1}} ) {
509+ my @set = grep { $self->{opts}->{$_}->{got} } @$required;
510+ if ( @set == 0 ) {
511+ my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
512+ @{$required}[ 0 .. scalar(@$required) - 2] )
513+ .' or --'.$self->{opts}->{$required->[-1]}->{long};
514+ $self->save_error("Specify at least one of $err");
515+ }
516+ }
517+
518+ $self->_check_opts( keys %{$self->{opts}} );
519+ $self->{got_opts} = 1;
520+ return;
521+}
522+
523+sub _check_opts {
524+ my ( $self, @long ) = @_;
525+ my $long_last = scalar @long;
526+ while ( @long ) {
527+ foreach my $i ( 0..$#long ) {
528+ my $long = $long[$i];
529+ next unless $long;
530+ my $opt = $self->{opts}->{$long};
531+ if ( $opt->{got} ) {
532+ if ( exists $self->{disables}->{$long} ) {
533+ my @disable_opts = @{$self->{disables}->{$long}};
534+ map { $self->{opts}->{$_}->{value} = undef; } @disable_opts;
535+ PTDEBUG && _d('Unset options', @disable_opts,
536+ 'because', $long,'disables them');
537+ }
538+
539+ if ( exists $self->{allowed_groups}->{$long} ) {
540+
541+ my @restricted_groups = grep {
542+ !exists $self->{allowed_groups}->{$long}->{$_}
543+ } keys %{$self->{groups}};
544+
545+ my @restricted_opts;
546+ foreach my $restricted_group ( @restricted_groups ) {
547+ RESTRICTED_OPT:
548+ foreach my $restricted_opt (
549+ keys %{$self->{groups}->{$restricted_group}} )
550+ {
551+ next RESTRICTED_OPT if $restricted_opt eq $long;
552+ push @restricted_opts, $restricted_opt
553+ if $self->{opts}->{$restricted_opt}->{got};
554+ }
555+ }
556+
557+ if ( @restricted_opts ) {
558+ my $err;
559+ if ( @restricted_opts == 1 ) {
560+ $err = "--$restricted_opts[0]";
561+ }
562+ else {
563+ $err = join(', ',
564+ map { "--$self->{opts}->{$_}->{long}" }
565+ grep { $_ }
566+ @restricted_opts[0..scalar(@restricted_opts) - 2]
567+ )
568+ . ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long};
569+ }
570+ $self->save_error("--$long is not allowed with $err");
571+ }
572+ }
573+
574+ }
575+ elsif ( $opt->{is_required} ) {
576+ $self->save_error("Required option --$long must be specified");
577+ }
578+
579+ $self->_validate_type($opt);
580+ if ( $opt->{parsed} ) {
581+ delete $long[$i];
582+ }
583+ else {
584+ PTDEBUG && _d('Temporarily failed to parse', $long);
585+ }
586+ }
587+
588+ die "Failed to parse options, possibly due to circular dependencies"
589+ if @long == $long_last;
590+ $long_last = @long;
591+ }
592+
593+ return;
594+}
595+
596+sub _validate_type {
597+ my ( $self, $opt ) = @_;
598+ return unless $opt;
599+
600+ if ( !$opt->{type} ) {
601+ $opt->{parsed} = 1;
602+ return;
603+ }
604+
605+ my $val = $opt->{value};
606+
607+ if ( $val && $opt->{type} eq 'm' ) { # type time
608+ PTDEBUG && _d('Parsing option', $opt->{long}, 'as a time value');
609+ my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
610+ if ( !$suffix ) {
611+ my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/;
612+ $suffix = $s || 's';
613+ PTDEBUG && _d('No suffix given; using', $suffix, 'for',
614+ $opt->{long}, '(value:', $val, ')');
615+ }
616+ if ( $suffix =~ m/[smhd]/ ) {
617+ $val = $suffix eq 's' ? $num # Seconds
618+ : $suffix eq 'm' ? $num * 60 # Minutes
619+ : $suffix eq 'h' ? $num * 3600 # Hours
620+ : $num * 86400; # Days
621+ $opt->{value} = ($prefix || '') . $val;
622+ PTDEBUG && _d('Setting option', $opt->{long}, 'to', $val);
623+ }
624+ else {
625+ $self->save_error("Invalid time suffix for --$opt->{long}");
626+ }
627+ }
628+ elsif ( $val && $opt->{type} eq 'd' ) { # type DSN
629+ PTDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN');
630+ my $prev = {};
631+ my $from_key = $self->{defaults_to}->{ $opt->{long} };
632+ if ( $from_key ) {
633+ PTDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN');
634+ if ( $self->{opts}->{$from_key}->{parsed} ) {
635+ $prev = $self->{opts}->{$from_key}->{value};
636+ }
637+ else {
638+ PTDEBUG && _d('Cannot parse', $opt->{long}, 'until',
639+ $from_key, 'parsed');
640+ return;
641+ }
642+ }
643+ my $defaults = $self->{DSNParser}->parse_options($self);
644+ $opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults);
645+ }
646+ elsif ( $val && $opt->{type} eq 'z' ) { # type size
647+ PTDEBUG && _d('Parsing option', $opt->{long}, 'as a size value');
648+ $self->_parse_size($opt, $val);
649+ }
650+ elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) {
651+ $opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) };
652+ }
653+ elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) {
654+ $opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ];
655+ }
656+ else {
657+ PTDEBUG && _d('Nothing to validate for option',
658+ $opt->{long}, 'type', $opt->{type}, 'value', $val);
659+ }
660+
661+ $opt->{parsed} = 1;
662+ return;
663+}
664+
665+sub get {
666+ my ( $self, $opt ) = @_;
667+ my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
668+ die "Option $opt does not exist"
669+ unless $long && exists $self->{opts}->{$long};
670+ return $self->{opts}->{$long}->{value};
671+}
672+
673+sub got {
674+ my ( $self, $opt ) = @_;
675+ my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
676+ die "Option $opt does not exist"
677+ unless $long && exists $self->{opts}->{$long};
678+ return $self->{opts}->{$long}->{got};
679+}
680+
681+sub has {
682+ my ( $self, $opt ) = @_;
683+ my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
684+ return defined $long ? exists $self->{opts}->{$long} : 0;
685+}
686+
687+sub set {
688+ my ( $self, $opt, $val ) = @_;
689+ my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
690+ die "Option $opt does not exist"
691+ unless $long && exists $self->{opts}->{$long};
692+ $self->{opts}->{$long}->{value} = $val;
693+ return;
694+}
695+
696+sub save_error {
697+ my ( $self, $error ) = @_;
698+ push @{$self->{errors}}, $error;
699+ return;
700+}
701+
702+sub errors {
703+ my ( $self ) = @_;
704+ return $self->{errors};
705+}
706+
707+sub usage {
708+ my ( $self ) = @_;
709+ warn "No usage string is set" unless $self->{usage}; # XXX
710+ return "Usage: " . ($self->{usage} || '') . "\n";
711+}
712+
713+sub descr {
714+ my ( $self ) = @_;
715+ warn "No description string is set" unless $self->{description}; # XXX
716+ my $descr = ($self->{description} || $self->{program_name} || '')
717+ . " For more details, please use the --help option, "
718+ . "or try 'perldoc $PROGRAM_NAME' "
719+ . "for complete documentation.";
720+ $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g)
721+ unless $ENV{DONT_BREAK_LINES};
722+ $descr =~ s/ +$//mg;
723+ return $descr;
724+}
725+
726+sub usage_or_errors {
727+ my ( $self, $file, $return ) = @_;
728+ $file ||= $self->{file} || __FILE__;
729+
730+ if ( !$self->{description} || !$self->{usage} ) {
731+ PTDEBUG && _d("Getting description and usage from SYNOPSIS in", $file);
732+ my %synop = $self->_parse_synopsis($file);
733+ $self->{description} ||= $synop{description};
734+ $self->{usage} ||= $synop{usage};
735+ PTDEBUG && _d("Description:", $self->{description},
736+ "\nUsage:", $self->{usage});
737+ }
738+
739+ if ( $self->{opts}->{help}->{got} ) {
740+ print $self->print_usage() or die "Cannot print usage: $OS_ERROR";
741+ exit 0 unless $return;
742+ }
743+ elsif ( scalar @{$self->{errors}} ) {
744+ print $self->print_errors() or die "Cannot print errors: $OS_ERROR";
745+ exit 0 unless $return;
746+ }
747+
748+ return;
749+}
750+
751+sub print_errors {
752+ my ( $self ) = @_;
753+ my $usage = $self->usage() . "\n";
754+ if ( (my @errors = @{$self->{errors}}) ) {
755+ $usage .= join("\n * ", 'Errors in command-line arguments:', @errors)
756+ . "\n";
757+ }
758+ return $usage . "\n" . $self->descr();
759+}
760+
761+sub print_usage {
762+ my ( $self ) = @_;
763+ die "Run get_opts() before print_usage()" unless $self->{got_opts};
764+ my @opts = values %{$self->{opts}};
765+
766+ my $maxl = max(
767+ map {
768+ length($_->{long}) # option long name
769+ + ($_->{is_negatable} ? 4 : 0) # "[no]" if opt is negatable
770+ + ($_->{type} ? 2 : 0) # "=x" where x is the opt type
771+ }
772+ @opts);
773+
774+ my $maxs = max(0,
775+ map {
776+ length($_)
777+ + ($self->{opts}->{$_}->{is_negatable} ? 4 : 0)
778+ + ($self->{opts}->{$_}->{type} ? 2 : 0)
779+ }
780+ values %{$self->{short_opts}});
781+
782+ my $lcol = max($maxl, ($maxs + 3));
783+ my $rcol = 80 - $lcol - 6;
784+ my $rpad = ' ' x ( 80 - $rcol );
785+
786+ $maxs = max($lcol - 3, $maxs);
787+
788+ my $usage = $self->descr() . "\n" . $self->usage();
789+
790+ my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}};
791+ push @groups, 'default';
792+
793+ foreach my $group ( reverse @groups ) {
794+ $usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n";
795+ foreach my $opt (
796+ sort { $a->{long} cmp $b->{long} }
797+ grep { $_->{group} eq $group }
798+ @opts )
799+ {
800+ my $long = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long};
801+ my $short = $opt->{short};
802+ my $desc = $opt->{desc};
803+
804+ $long .= $opt->{type} ? "=$opt->{type}" : "";
805+
806+ if ( $opt->{type} && $opt->{type} eq 'm' ) {
807+ my ($s) = $desc =~ m/\(suffix (.)\)/;
808+ $s ||= 's';
809+ $desc =~ s/\s+\(suffix .\)//;
810+ $desc .= ". Optional suffix s=seconds, m=minutes, h=hours, "
811+ . "d=days; if no suffix, $s is used.";
812+ }
813+ $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g);
814+ $desc =~ s/ +$//mg;
815+ if ( $short ) {
816+ $usage .= sprintf(" --%-${maxs}s -%s %s\n", $long, $short, $desc);
817+ }
818+ else {
819+ $usage .= sprintf(" --%-${lcol}s %s\n", $long, $desc);
820+ }
821+ }
822+ }
823+
824+ $usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n";
825+
826+ if ( (my @rules = @{$self->{rules}}) ) {
827+ $usage .= "\nRules:\n\n";
828+ $usage .= join("\n", map { " $_" } @rules) . "\n";
829+ }
830+ if ( $self->{DSNParser} ) {
831+ $usage .= "\n" . $self->{DSNParser}->usage();
832+ }
833+ $usage .= "\nOptions and values after processing arguments:\n\n";
834+ foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) {
835+ my $val = $opt->{value};
836+ my $type = $opt->{type} || '';
837+ my $bool = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
838+ $val = $bool ? ( $val ? 'TRUE' : 'FALSE' )
839+ : !defined $val ? '(No value)'
840+ : $type eq 'd' ? $self->{DSNParser}->as_string($val)
841+ : $type =~ m/H|h/ ? join(',', sort keys %$val)
842+ : $type =~ m/A|a/ ? join(',', @$val)
843+ : $val;
844+ $usage .= sprintf(" --%-${lcol}s %s\n", $opt->{long}, $val);
845+ }
846+ return $usage;
847+}
848+
849+sub prompt_noecho {
850+ shift @_ if ref $_[0] eq __PACKAGE__;
851+ my ( $prompt ) = @_;
852+ local $OUTPUT_AUTOFLUSH = 1;
853+ print $prompt
854+ or die "Cannot print: $OS_ERROR";
855+ my $response;
856+ eval {
857+ require Term::ReadKey;
858+ Term::ReadKey::ReadMode('noecho');
859+ chomp($response = <STDIN>);
860+ Term::ReadKey::ReadMode('normal');
861+ print "\n"
862+ or die "Cannot print: $OS_ERROR";
863+ };
864+ if ( $EVAL_ERROR ) {
865+ die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
866+ }
867+ return $response;
868+}
869+
870+sub _read_config_file {
871+ my ( $self, $filename ) = @_;
872+ open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n";
873+ my @args;
874+ my $prefix = '--';
875+ my $parse = 1;
876+
877+ LINE:
878+ while ( my $line = <$fh> ) {
879+ chomp $line;
880+ next LINE if $line =~ m/^\s*(?:\#|\;|$)/;
881+ $line =~ s/\s+#.*$//g;
882+ $line =~ s/^\s+|\s+$//g;
883+ if ( $line eq '--' ) {
884+ $prefix = '';
885+ $parse = 0;
886+ next LINE;
887+ }
888+ if ( $parse
889+ && (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/)
890+ ) {
891+ push @args, grep { defined $_ } ("$prefix$opt", $arg);
892+ }
893+ elsif ( $line =~ m/./ ) {
894+ push @args, $line;
895+ }
896+ else {
897+ die "Syntax error in file $filename at line $INPUT_LINE_NUMBER";
898+ }
899+ }
900+ close $fh;
901+ return @args;
902+}
903+
904+sub read_para_after {
905+ my ( $self, $file, $regex ) = @_;
906+ open my $fh, "<", $file or die "Can't open $file: $OS_ERROR";
907+ local $INPUT_RECORD_SEPARATOR = '';
908+ my $para;
909+ while ( $para = <$fh> ) {
910+ next unless $para =~ m/^=pod$/m;
911+ last;
912+ }
913+ while ( $para = <$fh> ) {
914+ next unless $para =~ m/$regex/;
915+ last;
916+ }
917+ $para = <$fh>;
918+ chomp($para);
919+ close $fh or die "Can't close $file: $OS_ERROR";
920+ return $para;
921+}
922+
923+sub clone {
924+ my ( $self ) = @_;
925+
926+ my %clone = map {
927+ my $hashref = $self->{$_};
928+ my $val_copy = {};
929+ foreach my $key ( keys %$hashref ) {
930+ my $ref = ref $hashref->{$key};
931+ $val_copy->{$key} = !$ref ? $hashref->{$key}
932+ : $ref eq 'HASH' ? { %{$hashref->{$key}} }
933+ : $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ]
934+ : $hashref->{$key};
935+ }
936+ $_ => $val_copy;
937+ } qw(opts short_opts defaults);
938+
939+ foreach my $scalar ( qw(got_opts) ) {
940+ $clone{$scalar} = $self->{$scalar};
941+ }
942+
943+ return bless \%clone;
944+}
945+
946+sub _parse_size {
947+ my ( $self, $opt, $val ) = @_;
948+
949+ if ( lc($val || '') eq 'null' ) {
950+ PTDEBUG && _d('NULL size for', $opt->{long});
951+ $opt->{value} = 'null';
952+ return;
953+ }
954+
955+ my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);
956+ my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
957+ if ( defined $num ) {
958+ if ( $factor ) {
959+ $num *= $factor_for{$factor};
960+ PTDEBUG && _d('Setting option', $opt->{y},
961+ 'to num', $num, '* factor', $factor);
962+ }
963+ $opt->{value} = ($pre || '') . $num;
964+ }
965+ else {
966+ $self->save_error("Invalid size for --$opt->{long}: $val");
967+ }
968+ return;
969+}
970+
971+sub _parse_attribs {
972+ my ( $self, $option, $attribs ) = @_;
973+ my $types = $self->{types};
974+ return $option
975+ . ($attribs->{'short form'} ? '|' . $attribs->{'short form'} : '' )
976+ . ($attribs->{'negatable'} ? '!' : '' )
977+ . ($attribs->{'cumulative'} ? '+' : '' )
978+ . ($attribs->{'type'} ? '=' . $types->{$attribs->{type}} : '' );
979+}
980+
981+sub _parse_synopsis {
982+ my ( $self, $file ) = @_;
983+ $file ||= $self->{file} || __FILE__;
984+ PTDEBUG && _d("Parsing SYNOPSIS in", $file);
985+
986+ local $INPUT_RECORD_SEPARATOR = ''; # read paragraphs
987+ open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
988+ my $para;
989+ 1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/;
990+ die "$file does not contain a SYNOPSIS section" unless $para;
991+ my @synop;
992+ for ( 1..2 ) { # 1 for the usage, 2 for the description
993+ my $para = <$fh>;
994+ push @synop, $para;
995+ }
996+ close $fh;
997+ PTDEBUG && _d("Raw SYNOPSIS text:", @synop);
998+ my ($usage, $desc) = @synop;
999+ die "The SYNOPSIS section in $file is not formatted properly"
1000+ unless $usage && $desc;
1001+
1002+ $usage =~ s/^\s*Usage:\s+(.+)/$1/;
1003+ chomp $usage;
1004+
1005+ $desc =~ s/\n/ /g;
1006+ $desc =~ s/\s{2,}/ /g;
1007+ $desc =~ s/\. ([A-Z][a-z])/. $1/g;
1008+ $desc =~ s/\s+$//;
1009+
1010+ return (
1011+ description => $desc,
1012+ usage => $usage,
1013+ );
1014+};
1015+
1016+sub _d {
1017+ my ($package, undef, $line) = caller 0;
1018+ @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1019+ map { defined $_ ? $_ : 'undef' }
1020+ @_;
1021+ print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1022+}
1023+
1024+if ( PTDEBUG ) {
1025+ print '# ', $^X, ' ', $], "\n";
1026+ if ( my $uname = `uname -a` ) {
1027+ $uname =~ s/\s+/ /g;
1028+ print "# $uname\n";
1029+ }
1030+ print '# Arguments: ',
1031+ join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n";
1032+}
1033+
1034+1;
1035+}
1036+# ###########################################################################
1037+# End OptionParser package
1038+# ###########################################################################
1039+
1040+# ###########################################################################
1041+# QueryParser package
1042+# This package is a copy without comments from the original. The original
1043+# with comments and its test file can be found in the Bazaar repository at,
1044+# lib/QueryParser.pm
1045+# t/lib/QueryParser.t
1046+# See https://launchpad.net/percona-toolkit for more information.
1047+# ###########################################################################
1048+{
1049+package QueryParser;
1050+
1051+use strict;
1052+use warnings FATAL => 'all';
1053+use English qw(-no_match_vars);
1054+use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1055+
1056+our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/;
1057+our $tbl_regex = qr{
1058+ \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names
1059+ \b\s*
1060+ \(? # Optional paren around tables
1061+ ($tbl_ident
1062+ (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )*
1063+ )
1064+ }xio;
1065+our $has_derived = qr{
1066+ \b(?:FROM|JOIN|,)
1067+ \s*\(\s*SELECT
1068+ }xi;
1069+
1070+our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i;
1071+
1072+our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i;
1073+
1074+sub new {
1075+ my ( $class ) = @_;
1076+ bless {}, $class;
1077+}
1078+
1079+sub get_tables {
1080+ my ( $self, $query ) = @_;
1081+ return unless $query;
1082+ PTDEBUG && _d('Getting tables for', $query);
1083+
1084+ my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i;
1085+ if ( $ddl_stmt ) {
1086+ PTDEBUG && _d('Special table type:', $ddl_stmt);
1087+ $query =~ s/IF\s+(?:NOT\s+)?EXISTS//i;
1088+ if ( $query =~ m/$ddl_stmt DATABASE\b/i ) {
1089+ PTDEBUG && _d('Query alters a database, not a table');
1090+ return ();
1091+ }
1092+ if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) {
1093+ my ($select) = $query =~ m/\b(SELECT\b.+)/is;
1094+ PTDEBUG && _d('CREATE TABLE ... SELECT:', $select);
1095+ return $self->get_tables($select);
1096+ }
1097+ my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i;
1098+ PTDEBUG && _d('Matches table:', $tbl);
1099+ return ($tbl);
1100+ }
1101+
1102+ $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
1103+
1104+ if ( $query =~ /^\s*LOCK TABLES/i ) {
1105+ PTDEBUG && _d('Special table type: LOCK TABLES');
1106+ $query =~ s/^(\s*LOCK TABLES\s+)//;
1107+ $query =~ s/\s+(?:READ|WRITE|LOCAL)+\s*//g;
1108+ PTDEBUG && _d('Locked tables:', $query);
1109+ $query = "FROM $query";
1110+ }
1111+
1112+ $query =~ s/\\["']//g; # quoted strings
1113+ $query =~ s/".*?"/?/sg; # quoted strings
1114+ $query =~ s/'.*?'/?/sg; # quoted strings
1115+
1116+ my @tables;
1117+ foreach my $tbls ( $query =~ m/$tbl_regex/gio ) {
1118+ PTDEBUG && _d('Match tables:', $tbls);
1119+
1120+ next if $tbls =~ m/\ASELECT\b/i;
1121+
1122+ foreach my $tbl ( split(',', $tbls) ) {
1123+ $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio;
1124+
1125+ if ( $tbl !~ m/[a-zA-Z]/ ) {
1126+ PTDEBUG && _d('Skipping suspicious table name:', $tbl);
1127+ next;
1128+ }
1129+
1130+ push @tables, $tbl;
1131+ }
1132+ }
1133+ return @tables;
1134+}
1135+
1136+sub has_derived_table {
1137+ my ( $self, $query ) = @_;
1138+ my $match = $query =~ m/$has_derived/;
1139+ PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table');
1140+ return $match;
1141+}
1142+
1143+sub get_aliases {
1144+ my ( $self, $query, $list ) = @_;
1145+
1146+ my $result = {
1147+ DATABASE => {},
1148+ TABLE => {},
1149+ };
1150+ return $result unless $query;
1151+
1152+ $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
1153+
1154+ $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig;
1155+
1156+ my @tbl_refs;
1157+ my ($tbl_refs, $from) = $query =~ m{
1158+ (
1159+ (FROM|INTO|UPDATE)\b\s* # Keyword before table refs
1160+ .+? # Table refs
1161+ )
1162+ (?:\s+|\z) # If the query does not end with the table
1163+ (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs
1164+ }ix;
1165+
1166+ if ( $tbl_refs ) {
1167+
1168+ if ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
1169+ $tbl_refs =~ s/\([^\)]+\)\s*//;
1170+ }
1171+
1172+ PTDEBUG && _d('tbl refs:', $tbl_refs);
1173+
1174+ my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i;
1175+
1176+ my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i;
1177+
1178+ $tbl_refs =~ s/ = /=/g;
1179+
1180+ while (
1181+ $tbl_refs =~ m{
1182+ $before_tbl\b\s*
1183+ ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? )
1184+ \s*$after_tbl
1185+ }xgio )
1186+ {
1187+ my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3);
1188+ PTDEBUG && _d('Match table:', $tbl_ref);
1189+ push @tbl_refs, $tbl_ref;
1190+ $alias = $self->trim_identifier($alias);
1191+
1192+ if ( $tbl_ref =~ m/^AS\s+\w+/i ) {
1193+ PTDEBUG && _d('Subquery', $tbl_ref);
1194+ $result->{TABLE}->{$alias} = undef;
1195+ next;
1196+ }
1197+
1198+ my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/;
1199+ $db = $self->trim_identifier($db);
1200+ $tbl = $self->trim_identifier($tbl);
1201+ $result->{TABLE}->{$alias || $tbl} = $tbl;
1202+ $result->{DATABASE}->{$tbl} = $db if $db;
1203+ }
1204+ }
1205+ else {
1206+ PTDEBUG && _d("No tables ref in", $query);
1207+ }
1208+
1209+ if ( $list ) {
1210+ return \@tbl_refs;
1211+ }
1212+ else {
1213+ return $result;
1214+ }
1215+}
1216+
1217+sub split {
1218+ my ( $self, $query ) = @_;
1219+ return unless $query;
1220+ $query = $self->clean_query($query);
1221+ PTDEBUG && _d('Splitting', $query);
1222+
1223+ my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i;
1224+
1225+ my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query);
1226+
1227+ my @statements;
1228+ if ( @split_statements == 1 ) {
1229+ push @statements, $query;
1230+ }
1231+ else {
1232+ for ( my $i = 0; $i <= $#split_statements; $i += 2 ) {
1233+ push @statements, $split_statements[$i].$split_statements[$i+1];
1234+
1235+ if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) {
1236+ $statements[-2] .= pop @statements;
1237+ }
1238+ }
1239+ }
1240+
1241+ PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements);
1242+ return @statements;
1243+}
1244+
1245+sub clean_query {
1246+ my ( $self, $query ) = @_;
1247+ return unless $query;
1248+ $query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */
1249+ $query =~ s/^\s+//; # Remove leading spaces
1250+ $query =~ s/\s+$//; # Remove trailing spaces
1251+ $query =~ s/\s{2,}/ /g; # Remove extra spaces
1252+ return $query;
1253+}
1254+
1255+sub split_subquery {
1256+ my ( $self, $query ) = @_;
1257+ return unless $query;
1258+ $query = $self->clean_query($query);
1259+ $query =~ s/;$//;
1260+
1261+ my @subqueries;
1262+ my $sqno = 0; # subquery number
1263+ my $pos = 0;
1264+ while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) {
1265+ $pos = pos($query);
1266+ my $word = $1;
1267+ PTDEBUG && _d($word, $sqno);
1268+ if ( $word =~ m/^\(?SELECT\b/i ) {
1269+ my $start_pos = $pos - length($word) - 1;
1270+ if ( $start_pos ) {
1271+ $sqno++;
1272+ PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos);
1273+ $subqueries[$sqno] = {
1274+ start_pos => $start_pos,
1275+ end_pos => 0,
1276+ len => 0,
1277+ words => [$word],
1278+ lp => 1, # left parentheses
1279+ rp => 0, # right parentheses
1280+ done => 0,
1281+ };
1282+ }
1283+ else {
1284+ PTDEBUG && _d('Main SELECT at pos 0');
1285+ }
1286+ }
1287+ else {
1288+ next unless $sqno; # next unless we're in a subquery
1289+ PTDEBUG && _d('In subquery', $sqno);
1290+ my $sq = $subqueries[$sqno];
1291+ if ( $sq->{done} ) {
1292+ PTDEBUG && _d('This subquery is done; SQL is for',
1293+ ($sqno - 1 ? "subquery $sqno" : "the main SELECT"));
1294+ next;
1295+ }
1296+ push @{$sq->{words}}, $word;
1297+ my $lp = ($word =~ tr/\(//) || 0;
1298+ my $rp = ($word =~ tr/\)//) || 0;
1299+ PTDEBUG && _d('parentheses left', $lp, 'right', $rp);
1300+ if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) {
1301+ my $end_pos = $pos - 1;
1302+ PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos);
1303+ $sq->{end_pos} = $end_pos;
1304+ $sq->{len} = $end_pos - $sq->{start_pos};
1305+ }
1306+ }
1307+ }
1308+
1309+ for my $i ( 1..$#subqueries ) {
1310+ my $sq = $subqueries[$i];
1311+ next unless $sq;
1312+ $sq->{sql} = join(' ', @{$sq->{words}});
1313+ substr $query,
1314+ $sq->{start_pos} + 1, # +1 for (
1315+ $sq->{len} - 1, # -1 for )
1316+ "__subquery_$i";
1317+ }
1318+
1319+ return $query, map { $_->{sql} } grep { defined $_ } @subqueries;
1320+}
1321+
1322+sub query_type {
1323+ my ( $self, $query, $qr ) = @_;
1324+ my ($type, undef) = $qr->distill_verbs($query);
1325+ my $rw;
1326+ if ( $type =~ m/^SELECT\b/ ) {
1327+ $rw = 'read';
1328+ }
1329+ elsif ( $type =~ m/^$data_manip_stmts\b/
1330+ || $type =~ m/^$data_def_stmts\b/ ) {
1331+ $rw = 'write'
1332+ }
1333+
1334+ return {
1335+ type => $type,
1336+ rw => $rw,
1337+ }
1338+}
1339+
1340+sub get_columns {
1341+ my ( $self, $query ) = @_;
1342+ my $cols = [];
1343+ return $cols unless $query;
1344+ my $cols_def;
1345+
1346+ if ( $query =~ m/^SELECT/i ) {
1347+ $query =~ s/
1348+ ^SELECT\s+
1349+ (?:ALL
1350+ |DISTINCT
1351+ |DISTINCTROW
1352+ |HIGH_PRIORITY
1353+ |STRAIGHT_JOIN
1354+ |SQL_SMALL_RESULT
1355+ |SQL_BIG_RESULT
1356+ |SQL_BUFFER_RESULT
1357+ |SQL_CACHE
1358+ |SQL_NO_CACHE
1359+ |SQL_CALC_FOUND_ROWS
1360+ )\s+
1361+ /SELECT /xgi;
1362+ ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i;
1363+ }
1364+ elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
1365+ ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i;
1366+ }
1367+
1368+ PTDEBUG && _d('Columns:', $cols_def);
1369+ if ( $cols_def ) {
1370+ @$cols = split(',', $cols_def);
1371+ map {
1372+ my $col = $_;
1373+ $col = s/^\s+//g;
1374+ $col = s/\s+$//g;
1375+ $col;
1376+ } @$cols;
1377+ }
1378+
1379+ return $cols;
1380+}
1381+
1382+sub parse {
1383+ my ( $self, $query ) = @_;
1384+ return unless $query;
1385+ my $parsed = {};
1386+
1387+ $query =~ s/\n/ /g;
1388+ $query = $self->clean_query($query);
1389+
1390+ $parsed->{query} = $query,
1391+ $parsed->{tables} = $self->get_aliases($query, 1);
1392+ $parsed->{columns} = $self->get_columns($query);
1393+
1394+ my ($type) = $query =~ m/^(\w+)/;
1395+ $parsed->{type} = lc $type;
1396+
1397+
1398+ $parsed->{sub_queries} = [];
1399+
1400+ return $parsed;
1401+}
1402+
1403+sub extract_tables {
1404+ my ( $self, %args ) = @_;
1405+ my $query = $args{query};
1406+ my $default_db = $args{default_db};
1407+ my $q = $self->{Quoter} || $args{Quoter};
1408+ return unless $query;
1409+ PTDEBUG && _d('Extracting tables');
1410+ my @tables;
1411+ my %seen;
1412+ foreach my $db_tbl ( $self->get_tables($query) ) {
1413+ next unless $db_tbl;
1414+ next if $seen{$db_tbl}++; # Unique-ify for issue 337.
1415+ my ( $db, $tbl ) = $q->split_unquote($db_tbl);
1416+ push @tables, [ $db || $default_db, $tbl ];
1417+ }
1418+ return @tables;
1419+}
1420+
1421+sub trim_identifier {
1422+ my ($self, $str) = @_;
1423+ return unless defined $str;
1424+ $str =~ s/`//g;
1425+ $str =~ s/^\s+//;
1426+ $str =~ s/\s+$//;
1427+ return $str;
1428+}
1429+
1430+sub _d {
1431+ my ($package, undef, $line) = caller 0;
1432+ @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1433+ map { defined $_ ? $_ : 'undef' }
1434+ @_;
1435+ print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1436+}
1437+
1438+1;
1439+}
1440+# ###########################################################################
1441+# End QueryParser package
1442+# ###########################################################################
1443+
1444+# ###########################################################################
1445+# QueryRewriter package
1446+# This package is a copy without comments from the original. The original
1447+# with comments and its test file can be found in the Bazaar repository at,
1448+# lib/QueryRewriter.pm
1449+# t/lib/QueryRewriter.t
1450+# See https://launchpad.net/percona-toolkit for more information.
1451+# ###########################################################################
1452+{
1453+package QueryRewriter;
1454+
1455+use strict;
1456+use warnings FATAL => 'all';
1457+use English qw(-no_match_vars);
1458+use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1459+
1460+our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT
1461+ |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi;
1462+my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly!
1463+my $bal;
1464+$bal = qr/
1465+ \(
1466+ (?:
1467+ (?> [^()]+ ) # Non-parens without backtracking
1468+ |
1469+ (??{ $bal }) # Group with matching parens
1470+ )*
1471+ \)
1472+ /x;
1473+
1474+my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
1475+my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
1476+my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
1477+my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
1478+
1479+
1480+sub new {
1481+ my ( $class, %args ) = @_;
1482+ my $self = { %args };
1483+ return bless $self, $class;
1484+}
1485+
1486+sub strip_comments {
1487+ my ( $self, $query ) = @_;
1488+ return unless $query;
1489+ $query =~ s/$olc_re//go;
1490+ $query =~ s/$mlc_re//go;
1491+ if ( $query =~ m/$vlc_rf/i ) { # contains show + version
1492+ $query =~ s/$vlc_re//go;
1493+ }
1494+ return $query;
1495+}
1496+
1497+sub shorten {
1498+ my ( $self, $query, $length ) = @_;
1499+ $query =~ s{
1500+ \A(
1501+ (?:INSERT|REPLACE)
1502+ (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)?
1503+ (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\)
1504+ )
1505+ \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)}
1506+ {$1 /*... omitted ...*/$2}xsi;
1507+
1508+ return $query unless $query =~ m/IN\s*\(\s*(?!select)/i;
1509+
1510+ my $last_length = 0;
1511+ my $query_length = length($query);
1512+ while (
1513+ $length > 0
1514+ && $query_length > $length
1515+ && $query_length < ( $last_length || $query_length + 1 )
1516+ ) {
1517+ $last_length = $query_length;
1518+ $query =~ s{
1519+ (\bIN\s*\() # The opening of an IN list
1520+ ([^\)]+) # Contents of the list, assuming no item contains paren
1521+ (?=\)) # Close of the list
1522+ }
1523+ {
1524+ $1 . __shorten($2)
1525+ }gexsi;
1526+ }
1527+
1528+ return $query;
1529+}
1530+
1531+sub __shorten {
1532+ my ( $snippet ) = @_;
1533+ my @vals = split(/,/, $snippet);
1534+ return $snippet unless @vals > 20;
1535+ my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items
1536+ return
1537+ join(',', @keep)
1538+ . "/*... omitted "
1539+ . scalar(@vals)
1540+ . " items ...*/";
1541+}
1542+
1543+sub fingerprint {
1544+ my ( $self, $query ) = @_;
1545+
1546+ $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query
1547+ && return 'mysqldump';
1548+ $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # pt-table-checksum, etc query
1549+ && return 'percona-toolkit';
1550+ $query =~ m/\Aadministrator command: /
1551+ && return $query;
1552+ $query =~ m/\A\s*(call\s+\S+)\(/i
1553+ && return lc($1); # Warning! $1 used, be careful.
1554+ if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) {
1555+ $query = $beginning; # Shorten multi-value INSERT statements ASAP
1556+ }
1557+
1558+ $query =~ s/$olc_re//go;
1559+ $query =~ s/$mlc_re//go;
1560+ $query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
1561+ && return $query;
1562+
1563+ $query =~ s/\\["']//g; # quoted strings
1564+ $query =~ s/".*?"/?/sg; # quoted strings
1565+ $query =~ s/'.*?'/?/sg; # quoted strings
1566+
1567+ if ( $self->{match_md5_checksums} ) {
1568+ $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
1569+ }
1570+
1571+ if ( !$self->{match_embedded_numbers} ) {
1572+ $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
1573+ }
1574+ else {
1575+ $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
1576+ }
1577+
1578+ if ( $self->{match_md5_checksums} ) {
1579+ $query =~ s/[xb+-]\?/?/g;
1580+ }
1581+ else {
1582+ $query =~ s/[xb.+-]\?/?/g;
1583+ }
1584+
1585+ $query =~ s/\A\s+//; # Chop off leading whitespace
1586+ chomp $query; # Kill trailing whitespace
1587+ $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace
1588+ $query = lc $query;
1589+ $query =~ s/\bnull\b/?/g; # Get rid of NULLs
1590+ $query =~ s{ # Collapse IN and VALUES lists
1591+ \b(in|values?)(?:[\s,]*\([\s?,]*\))+
1592+ }
1593+ {$1(?+)}gx;
1594+ $query =~ s{ # Collapse UNION
1595+ \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+
1596+ }
1597+ {$1 /*repeat$2*/}xg;
1598+ $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
1599+
1600+ if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause
1601+ 1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query;
1602+ }
1603+
1604+ return $query;
1605+}
1606+
1607+sub distill_verbs {
1608+ my ( $self, $query ) = @_;
1609+
1610+ $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1";
1611+ $query =~ m/\A\s*use\s+/ && return "USE";
1612+ $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
1613+ $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
1614+
1615+ if ( $query =~ m/\Aadministrator command:/ ) {
1616+ $query =~ s/administrator command:/ADMIN/;
1617+ $query = uc $query;
1618+ return $query;
1619+ }
1620+
1621+ $query = $self->strip_comments($query);
1622+
1623+ if ( $query =~ m/\A\s*SHOW\s+/i ) {
1624+ PTDEBUG && _d($query);
1625+
1626+ $query = uc $query;
1627+ $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
1628+ $query =~ s/\s+COUNT[^)]+\)//g;
1629+
1630+ $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
1631+
1632+ $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s;
1633+ $query =~ s/\s+/ /g;
1634+ PTDEBUG && _d($query);
1635+ return $query;
1636+ }
1637+
1638+ eval $QueryParser::data_def_stmts;
1639+ eval $QueryParser::tbl_ident;
1640+ my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
1641+ if ( $dds) {
1642+ my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
1643+ $obj = uc $obj if $obj;
1644+ PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
1645+ my ($db_or_tbl)
1646+ = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i;
1647+ PTDEBUG && _d('Matches db or table:', $db_or_tbl);
1648+ return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl;
1649+ }
1650+
1651+ my @verbs = $query =~ m/\b($verbs)\b/gio;
1652+ @verbs = do {
1653+ my $last = '';
1654+ grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs;
1655+ };
1656+
1657+ if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) {
1658+ PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]);
1659+ my $union = grep { $_ eq 'UNION' } @verbs;
1660+ @verbs = $union ? qw(SELECT UNION) : qw(SELECT);
1661+ }
1662+
1663+ my $verb_str = join(q{ }, @verbs);
1664+ return $verb_str;
1665+}
1666+
1667+sub __distill_tables {
1668+ my ( $self, $query, $table, %args ) = @_;
1669+ my $qp = $args{QueryParser} || $self->{QueryParser};
1670+ die "I need a QueryParser argument" unless $qp;
1671+
1672+ my @tables = map {
1673+ $_ =~ s/`//g;
1674+ $_ =~ s/(_?)[0-9]+/$1?/g;
1675+ $_;
1676+ } grep { defined $_ } $qp->get_tables($query);
1677+
1678+ push @tables, $table if $table;
1679+
1680+ @tables = do {
1681+ my $last = '';
1682+ grep { my $pass = $_ ne $last; $last = $_; $pass } @tables;
1683+ };
1684+
1685+ return @tables;
1686+}
1687+
1688+sub distill {
1689+ my ( $self, $query, %args ) = @_;
1690+
1691+ if ( $args{generic} ) {
1692+ my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/;
1693+ return '' unless $cmd;
1694+ $query = (uc $cmd) . ($arg ? " $arg" : '');
1695+ }
1696+ else {
1697+ my ($verbs, $table) = $self->distill_verbs($query, %args);
1698+
1699+ if ( $verbs && $verbs =~ m/^SHOW/ ) {
1700+ my %alias_for = qw(
1701+ SCHEMA DATABASE
1702+ KEYS INDEX
1703+ INDEXES INDEX
1704+ );
1705+ map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
1706+ $query = $verbs;
1707+ }
1708+ else {
1709+ my @tables = $self->__distill_tables($query, $table, %args);
1710+ $query = join(q{ }, $verbs, @tables);
1711+ }
1712+ }
1713+
1714+ if ( $args{trf} ) {
1715+ $query = $args{trf}->($query, %args);
1716+ }
1717+
1718+ return $query;
1719+}
1720+
1721+sub convert_to_select {
1722+ my ( $self, $query ) = @_;
1723+ return unless $query;
1724+
1725+ return if $query =~ m/=\s*\(\s*SELECT /i;
1726+
1727+ $query =~ s{
1728+ \A.*?
1729+ update(?:\s+(?:low_priority|ignore))?\s+(.*?)
1730+ \s+set\b(.*?)
1731+ (?:\s*where\b(.*?))?
1732+ (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)?
1733+ \Z
1734+ }
1735+ {__update_to_select($1, $2, $3, $4)}exsi
1736+ || $query =~ s{
1737+ \A.*?
1738+ (?:insert(?:\s+ignore)?|replace)\s+
1739+ .*?\binto\b(.*?)\(([^\)]+)\)\s*
1740+ values?\s*(\(.*?\))\s*
1741+ (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
1742+ \Z
1743+ }
1744+ {__insert_to_select($1, $2, $3)}exsi
1745+ || $query =~ s{
1746+ \A.*?
1747+ (?:insert(?:\s+ignore)?|replace)\s+
1748+ (?:.*?\binto)\b(.*?)\s*
1749+ set\s+(.*?)\s*
1750+ (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
1751+ \Z
1752+ }
1753+ {__insert_to_select_with_set($1, $2)}exsi
1754+ || $query =~ s{
1755+ \A.*?
1756+ delete\s+(.*?)
1757+ \bfrom\b(.*)
1758+ \Z
1759+ }
1760+ {__delete_to_select($1, $2)}exsi;
1761+ $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si;
1762+ $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism;
1763+ return $query;
1764+}
1765+
1766+sub convert_select_list {
1767+ my ( $self, $query ) = @_;
1768+ $query =~ s{
1769+ \A\s*select(.*?)\bfrom\b
1770+ }
1771+ {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi;
1772+ return $query;
1773+}
1774+
1775+sub __delete_to_select {
1776+ my ( $delete, $join ) = @_;
1777+ if ( $join =~ m/\bjoin\b/ ) {
1778+ return "select 1 from $join";
1779+ }
1780+ return "select * from $join";
1781+}
1782+
1783+sub __insert_to_select {
1784+ my ( $tbl, $cols, $vals ) = @_;
1785+ PTDEBUG && _d('Args:', @_);
1786+ my @cols = split(/,/, $cols);
1787+ PTDEBUG && _d('Cols:', @cols);
1788+ $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens
1789+ my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g;
1790+ PTDEBUG && _d('Vals:', @vals);
1791+ if ( @cols == @vals ) {
1792+ return "select * from $tbl where "
1793+ . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols));
1794+ }
1795+ else {
1796+ return "select * from $tbl limit 1";
1797+ }
1798+}
1799+
1800+sub __insert_to_select_with_set {
1801+ my ( $from, $set ) = @_;
1802+ $set =~ s/,/ and /g;
1803+ return "select * from $from where $set ";
1804+}
1805+
1806+sub __update_to_select {
1807+ my ( $from, $set, $where, $limit ) = @_;
1808+ return "select $set from $from "
1809+ . ( $where ? "where $where" : '' )
1810+ . ( $limit ? " $limit " : '' );
1811+}
1812+
1813+sub wrap_in_derived {
1814+ my ( $self, $query ) = @_;
1815+ return unless $query;
1816+ return $query =~ m/\A\s*select/i
1817+ ? "select 1 from ($query) as x limit 1"
1818+ : $query;
1819+}
1820+
1821+sub _d {
1822+ my ($package, undef, $line) = caller 0;
1823+ @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1824+ map { defined $_ ? $_ : 'undef' }
1825+ @_;
1826+ print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1827+}
1828+
1829+1;
1830+}
1831+# ###########################################################################
1832+# End QueryRewriter package
1833+# ###########################################################################
1834+
1835+# ###########################################################################
1836+# This is a combination of modules and programs in one -- a runnable module.
1837+# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
1838+# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition.
1839+#
1840+# Check at the end of this package for the call to main() which actually runs
1841+# the program.
1842+# ###########################################################################
1843+package pt_fingerprint;
1844+
1845+use English qw(-no_match_vars);
1846+use Data::Dumper;
1847+$Data::Dumper::Indent = 1;
1848+$OUTPUT_AUTOFLUSH = 1;
1849+
1850+use constant MKDEBUG => $ENV{MKDEBUG} || 0;
1851+
1852+sub main {
1853+ @ARGV = @_; # set global ARGV for this package
1854+
1855+ # ##########################################################################
1856+ # Get configuration information.
1857+ # ##########################################################################
1858+ my $o = new OptionParser();
1859+ $o->get_specs();
1860+ $o->get_opts();
1861+ $o->usage_or_errors();
1862+
1863+ my $qp = new QueryParser();
1864+ my $qr = new QueryRewriter(
1865+ QueryParser => $qp,
1866+ match_md5_checksums => $o->get('match-md5-checksums'),
1867+ match_embedded_numbers => $o->get('match-embedded-numbers'),
1868+ );
1869+
1870+ if ( $o->got('query') ) {
1871+ print $qr->fingerprint($o->get('query')), "\n";
1872+ }
1873+ else {
1874+ local $INPUT_RECORD_SEPARATOR = ";\n";
1875+ while ( <> ) {
1876+ my $query = $_;
1877+ chomp $query;
1878+ $query =~ s/^#.+$//mg;
1879+ $query =~ s/^\s+//;
1880+ next unless $query =~ m/^\w/;
1881+ print $qr->fingerprint($query), "\n";
1882+ }
1883+ }
1884+}
1885+
1886+# ############################################################################
1887+# Run the program.
1888+# ############################################################################
1889+if ( !caller ) { exit main(@ARGV); }
1890+
1891+1; # Because this is a module as well as a script.
1892+
1893+# #############################################################################
1894+# Documentation.
1895+# #############################################################################
1896+
1897+=pod
1898+
1899+=head1 NAME
1900+
1901+pt-fingerprint - Convert queries into fingerprints.
1902+
1903+=head1 SYNOPSIS
1904+
1905+Usage: pt-fingerprint [OPTIONS] [FILES]
1906+
1907+pt-fingerprint converts queries into fingerprints. With the --query
1908+option, converts the option's value into a fingerprint. With no options, treats
1909+command-line arguments as FILEs and reads and converts semicolon-separated
1910+queries from the FILEs. When FILE is -, it read standard input.
1911+
1912+Convert a single query:
1913+
1914+ pt-fingerprint --query "select a, b, c from users where id = 500"
1915+
1916+Convert a file full of queries:
1917+
1918+ pt-fingerprint /path/to/file.txt
1919+
1920+=head1 RISKS
1921+
1922+The following section is included to inform users about the potential risks,
1923+whether known or unknown, of using this tool. The two main categories of risks
1924+are those created by the nature of the tool (e.g. read-only tools vs. read-write
1925+tools) and those created by bugs.
1926+
1927+The pt-fingerprint tool simply reads data and transforms it, so risks are
1928+minimal.
1929+
1930+See also L<"BUGS"> for more information on filing bugs and getting help.
1931+
1932+=head1 DESCRIPTION
1933+
1934+A query fingerprint is the abstracted form of a query, which makes it possible
1935+to group similar queries together. Abstracting a query removes literal values,
1936+normalizes whitespace, and so on. For example, consider these two queries:
1937+
1938+ SELECT name, password FROM user WHERE id='12823';
1939+ select name, password from user
1940+ where id=5;
1941+
1942+Both of those queries will fingerprint to
1943+
1944+ select name, password from user where id=?
1945+
1946+Once the query's fingerprint is known, we can then talk about a query as though
1947+it represents all similar queries.
1948+
1949+Query fingerprinting accommodates a great many special cases, which have proven
1950+necessary in the real world. For example, an IN list with 5 literals is really
1951+equivalent to one with 4 literals, so lists of literals are collapsed to a
1952+single one. If you want to understand more about how and why all of these cases
1953+are handled, please review the test cases in the Subversion repository. If you
1954+find something that is not fingerprinted properly, please submit a bug report
1955+with a reproducible test case. Here is a list of transformations during
1956+fingerprinting, which might not be exhaustive:
1957+
1958+=over
1959+
1960+=item *
1961+
1962+Group all SELECT queries from mysqldump together, even if they are against
1963+different tables. Ditto for all of pt-table-checksum's checksum queries.
1964+
1965+=item *
1966+
1967+Shorten multi-value INSERT statements to a single VALUES() list.
1968+
1969+=item *
1970+
1971+Strip comments.
1972+
1973+=item *
1974+
1975+Abstract the databases in USE statements, so all USE statements are grouped
1976+together.
1977+
1978+=item *
1979+
1980+Replace all literals, such as quoted strings. For efficiency, the code that
1981+replaces literal numbers is somewhat non-selective, and might replace some
1982+things as numbers when they really are not. Hexadecimal literals are also
1983+replaced. NULL is treated as a literal. Numbers embedded in identifiers are
1984+also replaced, so tables named similarly will be fingerprinted to the same
1985+values (e.g. users_2009 and users_2010 will fingerprint identically).
1986+
1987+=item *
1988+
1989+Collapse all whitespace into a single space.
1990+
1991+=item *
1992+
1993+Lowercase the entire query.
1994+
1995+=item *
1996+
1997+Replace all literals inside of IN() and VALUES() lists with a single
1998+placeholder, regardless of cardinality.
1999+
2000+=item *
2001+
2002+Collapse multiple identical UNION queries into a single one.
2003+
2004+=back
2005+
2006+=head1 OPTIONS
2007+
2008+This tool accepts additional command-line arguments. Refer to the
2009+L<"SYNOPSIS"> and usage information for details.
2010+
2011+=over
2012+
2013+=item --config
2014+
2015+type: Array
2016+
2017+Read this comma-separated list of config files; if specified, this must be the
2018+first option on the command line.
2019+
2020+=item --help
2021+
2022+Show help and exit.
2023+
2024+=item --match-embedded-numbers
2025+
2026+Match numbers embedded in words and replace as single values. This option
2027+causes the tool to be more careful about matching numbers so that words
2028+with numbers, like C<catch22> are matched and replaced as a single C<?>
2029+placeholder. Otherwise the default number matching pattern will replace
2030+C<catch22> as C<catch?>.
2031+
2032+This is helpful if database or table names contain numbers.
2033+
2034+=item --match-md5-checksums
2035+
2036+Match MD5 checksums and replace as single values. This option causes
2037+the tool to be more careful about matching numbers so that MD5 checksums
2038+like C<fbc5e685a5d3d45aa1d0347fdb7c4d35> are matched and replaced as a
2039+single C<?> placeholder. Otherwise, the default number matching pattern will
2040+replace C<fbc5e685a5d3d45aa1d0347fdb7c4d35> as C<fbc?>.
2041+
2042+=item --query
2043+
2044+type: string
2045+
2046+The query to convert into a fingerprint.
2047+
2048+=item --version
2049+
2050+Show version and exit.
2051+
2052+=back
2053+
2054+=head1 ENVIRONMENT
2055+
2056+The environment variable C<PTDEBUG> enables verbose debugging output to STDERR.
2057+To enable debugging and capture all output to a file, run the tool like:
2058+
2059+ PTDEBUG=1 pt-fingerprint ... > FILE 2>&1
2060+
2061+Be careful: debugging output is voluminous and can generate several megabytes
2062+of output.
2063+
2064+=head1 SYSTEM REQUIREMENTS
2065+
2066+You need Perl, DBI, DBD::mysql, and some core packages that ought to be
2067+installed in any reasonably new version of Perl.
2068+
2069+=head1 BUGS
2070+
2071+For a list of known bugs, see L<http://www.percona.com/bugs/pt-fingerprint>.
2072+
2073+Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>.
2074+Include the following information in your bug report:
2075+
2076+=over
2077+
2078+=item * Complete command-line used to run the tool
2079+
2080+=item * Tool L<"--version">
2081+
2082+=item * MySQL version of all servers involved
2083+
2084+=item * Output from the tool including STDERR
2085+
2086+=item * Input files (log/dump/config files, etc.)
2087+
2088+=back
2089+
2090+If possible, include debugging output by running the tool with C<PTDEBUG>;
2091+see L<"ENVIRONMENT">.
2092+
2093+=head1 DOWNLOADING
2094+
2095+Visit L<http://www.percona.com/software/percona-toolkit/> to download the
2096+latest release of Percona Toolkit. Or, get the latest release from the
2097+command line:
2098+
2099+ wget percona.com/get/percona-toolkit.tar.gz
2100+
2101+ wget percona.com/get/percona-toolkit.rpm
2102+
2103+ wget percona.com/get/percona-toolkit.deb
2104+
2105+You can also get individual tools from the latest release:
2106+
2107+ wget percona.com/get/TOOL
2108+
2109+Replace C<TOOL> with the name of any tool.
2110+
2111+=head1 AUTHORS
2112+
2113+Baron Schwartz and Daniel Nichter
2114+
2115+=head1 ABOUT PERCONA TOOLKIT
2116+
2117+This tool is part of Percona Toolkit, a collection of advanced command-line
2118+tools developed by Percona for MySQL support and consulting. Percona Toolkit
2119+was forked from two projects in June, 2011: Maatkit and Aspersa. Those
2120+projects were created by Baron Schwartz and developed primarily by him and
2121+Daniel Nichter, both of whom are employed by Percona. Visit
2122+L<http://www.percona.com/software/> for more software developed by Percona.
2123+
2124+=head1 COPYRIGHT, LICENSE, AND WARRANTY
2125+
2126+This program is copyright 2011-2012 Percona Inc.
2127+Feedback and improvements are welcome.
2128+
2129+THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
2130+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
2131+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2132+
2133+This program is free software; you can redistribute it and/or modify it under
2134+the terms of the GNU General Public License as published by the Free Software
2135+Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
2136+systems, you can issue `man perlgpl' or `man perlartistic' to read these
2137+licenses.
2138+
2139+You should have received a copy of the GNU General Public License along with
2140+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
2141+Place, Suite 330, Boston, MA 02111-1307 USA.
2142+
2143+=head1 VERSION
2144+
2145+pt-fingerprint 2.0.0
2146+
2147+=cut
2148
2149=== modified file 'lib/QueryRewriter.pm'
2150--- lib/QueryRewriter.pm 2012-01-19 19:46:56 +0000
2151+++ lib/QueryRewriter.pm 2012-03-30 22:06:22 +0000
2152@@ -175,10 +175,30 @@
2153 $query =~ s/\\["']//g; # quoted strings
2154 $query =~ s/".*?"/?/sg; # quoted strings
2155 $query =~ s/'.*?'/?/sg; # quoted strings
2156- # This regex is extremely broad in its definition of what looks like a
2157- # number. That is for speed.
2158- $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers
2159- $query =~ s/[xb.+-]\?/?/g; # Clean up leftovers
2160+
2161+ # MD5 checksums which are always 32 hex chars
2162+ if ( $self->{match_md5_checksums} ) {
2163+ $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
2164+ }
2165+
2166+ # Things resembling numbers/hex.
2167+ if ( !$self->{match_embedded_numbers} ) {
2168+ # For speed, this regex is extremely broad in its definition
2169+ # of what looks like a number.
2170+ $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
2171+ }
2172+ else {
2173+ $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
2174+ }
2175+
2176+ # Clean up leftovers
2177+ if ( $self->{match_md5_checksums} ) {
2178+ $query =~ s/[xb+-]\?/?/g;
2179+ }
2180+ else {
2181+ $query =~ s/[xb.+-]\?/?/g;
2182+ }
2183+
2184 $query =~ s/\A\s+//; # Chop off leading whitespace
2185 chomp $query; # Kill trailing whitespace
2186 $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace
2187
2188=== modified file 't/lib/QueryRewriter.t'
2189--- t/lib/QueryRewriter.t 2012-03-06 13:56:08 +0000
2190+++ t/lib/QueryRewriter.t 2012-03-30 22:06:22 +0000
2191@@ -10,7 +10,7 @@
2192 use strict;
2193 use warnings FATAL => 'all';
2194 use English qw(-no_match_vars);
2195-use Test::More tests => 266;
2196+use Test::More tests => 271;
2197
2198 use QueryRewriter;
2199 use QueryParser;
2200@@ -349,6 +349,64 @@
2201 "Fingerprint LOAD DATA INFILE"
2202 );
2203
2204+# fingerprint MD5 checksums, 32 char hex strings. This is a
2205+# special feature used by pt-fingerprint.
2206+$qr = new QueryRewriter(
2207+ QueryParser => $qp,
2208+ match_md5_checksums => 1,
2209+);
2210+
2211+is(
2212+ $qr->fingerprint(
2213+ "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1"
2214+ ),
2215+ "select * from db.?_temp where id=?",
2216+ "Fingerprint db.MD5_tbl"
2217+);
2218+
2219+is(
2220+ $qr->fingerprint(
2221+ "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1"
2222+ ),
2223+ "select * from db.temp_? where id=?",
2224+ "Fingerprint db.tbl_MD5"
2225+);
2226+
2227+$qr = new QueryRewriter(
2228+ QueryParser => $qp,
2229+ match_md5_checksums => 1,
2230+ match_embedded_numbers => 1,
2231+);
2232+
2233+is(
2234+ $qr->fingerprint(
2235+ "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1"
2236+ ),
2237+ "select * from db.?_temp where id=?",
2238+ "Fingerprint db.MD5_tbl (with match_embedded_numbers)"
2239+);
2240+
2241+is(
2242+ $qr->fingerprint(
2243+ "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1"
2244+ ),
2245+ "select * from db.temp_? where id=?",
2246+ "Fingerprint db.tbl_MD5 (with match_embedded_numbers)"
2247+);
2248+
2249+$qr = new QueryRewriter(
2250+ QueryParser => $qp,
2251+ match_embedded_numbers => 1,
2252+);
2253+
2254+is(
2255+ $qr->fingerprint(
2256+ "SELECT * FROM prices.rt_5min where id=1"
2257+ ),
2258+ "select * from prices.rt_5min where id=?",
2259+ "Fingerprint db.tbl<number>name (preserve number)"
2260+);
2261+
2262 # #############################################################################
2263 # convert_to_select()
2264 # #############################################################################
2265
2266=== added directory 't/pt-fingerprint'
2267=== added file 't/pt-fingerprint/basics.t'
2268--- t/pt-fingerprint/basics.t 1970-01-01 00:00:00 +0000
2269+++ t/pt-fingerprint/basics.t 2012-03-30 22:06:22 +0000
2270@@ -0,0 +1,101 @@
2271+#!/usr/bin/env perl
2272+
2273+BEGIN {
2274+ die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
2275+ unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
2276+ unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
2277+};
2278+
2279+use strict;
2280+use warnings FATAL => 'all';
2281+use English qw(-no_match_vars);
2282+use Test::More tests => 7;
2283+
2284+use PerconaTest;
2285+require "$trunk/bin/pt-fingerprint";
2286+
2287+my @args = qw();
2288+my $output;
2289+my $sample = "$trunk/t/pt-fingerprint/samples";
2290+my $pqd = "$trunk/bin/pt-query-digest";
2291+
2292+$output = `$trunk/bin/pt-fingerprint --help`;
2293+like(
2294+ $output,
2295+ qr/--help/,
2296+ "It runs"
2297+);
2298+
2299+
2300+sub test_query_file {
2301+ my ($file) = @_;
2302+ if ( ! -f "$sample/$file.fingerprint" ) {
2303+ `$pqd --fingerprint $sample/$file | awk '/Fingerprint/ { getline; print; exit; }' | sed -e 's/^#[ ]*//' > $sample/$file.fingerprint`;
2304+ diag("Created $sample/$file.fingerprint");
2305+ }
2306+ chomp(my $expect = `cat $sample/$file.fingerprint`);
2307+ my $got = output(
2308+ sub { pt_fingerprint::main("$sample/$file") }
2309+ );
2310+ chomp($got);
2311+ is(
2312+ $got,
2313+ $expect,
2314+ "$file fingerprint"
2315+ );
2316+};
2317+
2318+opendir my $dir, $sample or die "Cannot open $sample: $OS_ERROR\n";
2319+while (defined(my $file = readdir($dir))) {
2320+ next unless $file =~ m/^query\d+$/;
2321+ test_query_file($file);
2322+}
2323+closedir $dir;
2324+
2325+
2326+sub test_query {
2327+ my (%args) = @_;
2328+ my $query = $args{query};
2329+ my $expect = $args{expect};
2330+ my @ops = $args{ops} ? @{$args{ops}} : ();
2331+
2332+ $output = output(
2333+ sub { pt_fingerprint::main('--query', $query, @ops) }
2334+ );
2335+ chomp($output);
2336+ is(
2337+ $output,
2338+ $expect,
2339+ $args{name} ? $args{name} : "Fingerprint " . substr($query, 0, 70)
2340+ );
2341+}
2342+
2343+test_query(
2344+ query => 'select * from tbl where id=1',
2345+ expect => 'select * from tbl where id=?',
2346+);
2347+
2348+test_query(
2349+ name => "Fingerprint MD5_word",
2350+ query => "SELECT c FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1",
2351+ expect => "select c from db.?_temp where id=?",
2352+ ops => [qw(--match-md5-checksums)],
2353+);
2354+
2355+test_query(
2356+ name => "Fingerprint word_MD5",
2357+ query => "SELECT c FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1",
2358+ expect => "select c from db.temp_? where id=?",
2359+ ops => [qw(--match-md5-checksums)],
2360+);
2361+
2362+test_query(
2363+ name => "Fingerprint word<number>",
2364+ query => "SELECT c FROM db.catch22 WHERE id is null",
2365+ expect => "select c from db.catch22 where id is ?",
2366+ ops => [qw(--match-embedded-numbers)],
2367+);
2368+# #############################################################################
2369+# Done.
2370+# #############################################################################
2371+exit;
2372
2373=== added directory 't/pt-fingerprint/samples'
2374=== added file 't/pt-fingerprint/samples/query001'
2375--- t/pt-fingerprint/samples/query001 1970-01-01 00:00:00 +0000
2376+++ t/pt-fingerprint/samples/query001 2012-03-30 22:06:22 +0000
2377@@ -0,0 +1,2 @@
2378+# Query_time: 1
2379+select * from db.tbl where id=1 or foo='bar';
2380
2381=== added file 't/pt-fingerprint/samples/query001.fingerprint'
2382--- t/pt-fingerprint/samples/query001.fingerprint 1970-01-01 00:00:00 +0000
2383+++ t/pt-fingerprint/samples/query001.fingerprint 2012-03-30 22:06:22 +0000
2384@@ -0,0 +1,1 @@
2385+select * from db.tbl where id=? or foo=?
2386
2387=== added file 't/pt-fingerprint/samples/query002'
2388--- t/pt-fingerprint/samples/query002 1970-01-01 00:00:00 +0000
2389+++ t/pt-fingerprint/samples/query002 2012-03-30 22:06:22 +0000
2390@@ -0,0 +1,2 @@
2391+# Query_time: 1
2392+select col from db.tbl1 where id in (1, 2, 3);
2393
2394=== added file 't/pt-fingerprint/samples/query002.fingerprint'
2395--- t/pt-fingerprint/samples/query002.fingerprint 1970-01-01 00:00:00 +0000
2396+++ t/pt-fingerprint/samples/query002.fingerprint 2012-03-30 22:06:22 +0000
2397@@ -0,0 +1,1 @@
2398+select col from db.tbl? where id in(?+)

Subscribers

People subscribed via source and target branches