Merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-usage into lp:percona-toolkit/2.1

Proposed by Daniel Nichter
Status: Merged
Merged at revision: 225
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-table-usage
Merge into: lp:percona-toolkit/2.1
Diff against target: 9952 lines (+9746/-8)
22 files modified
bin/pt-table-usage (+7320/-0)
lib/SQLParser.pm (+98/-8)
lib/TableUsage.pm (+1060/-0)
t/lib/TableUsage.t (+817/-0)
t/pt-table-usage/basics.t (+138/-0)
t/pt-table-usage/create_table_definitions.t (+41/-0)
t/pt-table-usage/explain_extended.t (+79/-0)
t/pt-table-usage/samples/ee.out (+6/-0)
t/pt-table-usage/samples/ee.sql (+26/-0)
t/pt-table-usage/samples/in/slow001.txt (+24/-0)
t/pt-table-usage/samples/in/slow002.txt (+20/-0)
t/pt-table-usage/samples/in/slow003.txt (+3/-0)
t/pt-table-usage/samples/out/create-table-defs-001.txt (+4/-0)
t/pt-table-usage/samples/out/create001.txt (+5/-0)
t/pt-table-usage/samples/out/drop-table-if-exists.txt (+3/-0)
t/pt-table-usage/samples/out/query001.txt (+6/-0)
t/pt-table-usage/samples/out/query002.txt (+5/-0)
t/pt-table-usage/samples/out/slow001.txt (+31/-0)
t/pt-table-usage/samples/out/slow002.txt (+40/-0)
t/pt-table-usage/samples/out/slow003-001.txt (+6/-0)
t/pt-table-usage/samples/out/slow003-002.txt (+8/-0)
t/pt-table-usage/samples/out/slow003-003.txt (+6/-0)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-usage
Reviewer Review Type Date Requested Status
Daniel Nichter Approve
Review via email: mp+100256@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== added file 'bin/pt-table-usage'
--- bin/pt-table-usage 1970-01-01 00:00:00 +0000
+++ bin/pt-table-usage 2012-03-30 22:48:21 +0000
@@ -0,0 +1,7320 @@
1#!/usr/bin/env perl
2
3# This program is part of Percona Toolkit: http://www.percona.com/software/
4# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
5# notices and disclaimers.
6
7use strict;
8use warnings FATAL => 'all';
9use constant PTDEBUG => $ENV{PTDEBUG} || 0;
10
11# ###########################################################################
12# DSNParser package
13# This package is a copy without comments from the original. The original
14# with comments and its test file can be found in the Bazaar repository at,
15# lib/DSNParser.pm
16# t/lib/DSNParser.t
17# See https://launchpad.net/percona-toolkit for more information.
18# ###########################################################################
19{
20package DSNParser;
21
22use strict;
23use warnings FATAL => 'all';
24use English qw(-no_match_vars);
25use constant PTDEBUG => $ENV{PTDEBUG} || 0;
26
27use Data::Dumper;
28$Data::Dumper::Indent = 0;
29$Data::Dumper::Quotekeys = 0;
30
31eval {
32 require DBI;
33};
34my $have_dbi = $EVAL_ERROR ? 0 : 1;
35
36sub new {
37 my ( $class, %args ) = @_;
38 foreach my $arg ( qw(opts) ) {
39 die "I need a $arg argument" unless $args{$arg};
40 }
41 my $self = {
42 opts => {} # h, P, u, etc. Should come from DSN OPTIONS section in POD.
43 };
44 foreach my $opt ( @{$args{opts}} ) {
45 if ( !$opt->{key} || !$opt->{desc} ) {
46 die "Invalid DSN option: ", Dumper($opt);
47 }
48 PTDEBUG && _d('DSN option:',
49 join(', ',
50 map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') }
51 keys %$opt
52 )
53 );
54 $self->{opts}->{$opt->{key}} = {
55 dsn => $opt->{dsn},
56 desc => $opt->{desc},
57 copy => $opt->{copy} || 0,
58 };
59 }
60 return bless $self, $class;
61}
62
63sub prop {
64 my ( $self, $prop, $value ) = @_;
65 if ( @_ > 2 ) {
66 PTDEBUG && _d('Setting', $prop, 'property');
67 $self->{$prop} = $value;
68 }
69 return $self->{$prop};
70}
71
72sub parse {
73 my ( $self, $dsn, $prev, $defaults ) = @_;
74 if ( !$dsn ) {
75 PTDEBUG && _d('No DSN to parse');
76 return;
77 }
78 PTDEBUG && _d('Parsing', $dsn);
79 $prev ||= {};
80 $defaults ||= {};
81 my %given_props;
82 my %final_props;
83 my $opts = $self->{opts};
84
85 foreach my $dsn_part ( split(/,/, $dsn) ) {
86 if ( my ($prop_key, $prop_val) = $dsn_part =~ m/^(.)=(.*)$/ ) {
87 $given_props{$prop_key} = $prop_val;
88 }
89 else {
90 PTDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part);
91 $given_props{h} = $dsn_part;
92 }
93 }
94
95 foreach my $key ( keys %$opts ) {
96 PTDEBUG && _d('Finding value for', $key);
97 $final_props{$key} = $given_props{$key};
98 if ( !defined $final_props{$key}
99 && defined $prev->{$key} && $opts->{$key}->{copy} )
100 {
101 $final_props{$key} = $prev->{$key};
102 PTDEBUG && _d('Copying value for', $key, 'from previous DSN');
103 }
104 if ( !defined $final_props{$key} ) {
105 $final_props{$key} = $defaults->{$key};
106 PTDEBUG && _d('Copying value for', $key, 'from defaults');
107 }
108 }
109
110 foreach my $key ( keys %given_props ) {
111 die "Unknown DSN option '$key' in '$dsn'. For more details, "
112 . "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
113 . "for complete documentation."
114 unless exists $opts->{$key};
115 }
116 if ( (my $required = $self->prop('required')) ) {
117 foreach my $key ( keys %$required ) {
118 die "Missing required DSN option '$key' in '$dsn'. For more details, "
119 . "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
120 . "for complete documentation."
121 unless $final_props{$key};
122 }
123 }
124
125 return \%final_props;
126}
127
128sub parse_options {
129 my ( $self, $o ) = @_;
130 die 'I need an OptionParser object' unless ref $o eq 'OptionParser';
131 my $dsn_string
132 = join(',',
133 map { "$_=".$o->get($_); }
134 grep { $o->has($_) && $o->get($_) }
135 keys %{$self->{opts}}
136 );
137 PTDEBUG && _d('DSN string made from options:', $dsn_string);
138 return $self->parse($dsn_string);
139}
140
141sub as_string {
142 my ( $self, $dsn, $props ) = @_;
143 return $dsn unless ref $dsn;
144 my @keys = $props ? @$props : sort keys %$dsn;
145 return join(',',
146 map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
147 grep {
148 exists $self->{opts}->{$_}
149 && exists $dsn->{$_}
150 && defined $dsn->{$_}
151 } @keys);
152}
153
154sub usage {
155 my ( $self ) = @_;
156 my $usage
157 = "DSN syntax is key=value[,key=value...] Allowable DSN keys:\n\n"
158 . " KEY COPY MEANING\n"
159 . " === ==== =============================================\n";
160 my %opts = %{$self->{opts}};
161 foreach my $key ( sort keys %opts ) {
162 $usage .= " $key "
163 . ($opts{$key}->{copy} ? 'yes ' : 'no ')
164 . ($opts{$key}->{desc} || '[No description]')
165 . "\n";
166 }
167 $usage .= "\n If the DSN is a bareword, the word is treated as the 'h' key.\n";
168 return $usage;
169}
170
171sub get_cxn_params {
172 my ( $self, $info ) = @_;
173 my $dsn;
174 my %opts = %{$self->{opts}};
175 my $driver = $self->prop('dbidriver') || '';
176 if ( $driver eq 'Pg' ) {
177 $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
178 . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
179 grep { defined $info->{$_} }
180 qw(h P));
181 }
182 else {
183 $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
184 . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
185 grep { defined $info->{$_} }
186 qw(F h P S A))
187 . ';mysql_read_default_group=client';
188 }
189 PTDEBUG && _d($dsn);
190 return ($dsn, $info->{u}, $info->{p});
191}
192
193sub fill_in_dsn {
194 my ( $self, $dbh, $dsn ) = @_;
195 my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name');
196 my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()');
197 $user =~ s/@.*//;
198 $dsn->{h} ||= $vars->{hostname}->{Value};
199 $dsn->{S} ||= $vars->{'socket'}->{Value};
200 $dsn->{P} ||= $vars->{port}->{Value};
201 $dsn->{u} ||= $user;
202 $dsn->{D} ||= $db;
203}
204
205sub get_dbh {
206 my ( $self, $cxn_string, $user, $pass, $opts ) = @_;
207 $opts ||= {};
208 my $defaults = {
209 AutoCommit => 0,
210 RaiseError => 1,
211 PrintError => 0,
212 ShowErrorStatement => 1,
213 mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0),
214 };
215 @{$defaults}{ keys %$opts } = values %$opts;
216
217 if ( $opts->{mysql_use_result} ) {
218 $defaults->{mysql_use_result} = 1;
219 }
220
221 if ( !$have_dbi ) {
222 die "Cannot connect to MySQL because the Perl DBI module is not "
223 . "installed or not found. Run 'perl -MDBI' to see the directories "
224 . "that Perl searches for DBI. If DBI is not installed, try:\n"
225 . " Debian/Ubuntu apt-get install libdbi-perl\n"
226 . " RHEL/CentOS yum install perl-DBI\n"
227 . " OpenSolaris pgk install pkg:/SUNWpmdbi\n";
228
229 }
230
231 my $dbh;
232 my $tries = 2;
233 while ( !$dbh && $tries-- ) {
234 PTDEBUG && _d($cxn_string, ' ', $user, ' ', $pass,
235 join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults ));
236
237 eval {
238 $dbh = DBI->connect($cxn_string, $user, $pass, $defaults);
239
240 if ( $cxn_string =~ m/mysql/i ) {
241 my $sql;
242
243 $sql = 'SELECT @@SQL_MODE';
244 PTDEBUG && _d($dbh, $sql);
245 my ($sql_mode) = $dbh->selectrow_array($sql);
246
247 $sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1'
248 . '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO'
249 . ($sql_mode ? ",$sql_mode" : '')
250 . '\'*/';
251 PTDEBUG && _d($dbh, $sql);
252 $dbh->do($sql);
253
254 if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) {
255 $sql = "/*!40101 SET NAMES $charset*/";
256 PTDEBUG && _d($dbh, ':', $sql);
257 $dbh->do($sql);
258 PTDEBUG && _d('Enabling charset for STDOUT');
259 if ( $charset eq 'utf8' ) {
260 binmode(STDOUT, ':utf8')
261 or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
262 }
263 else {
264 binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR";
265 }
266 }
267
268 if ( $self->prop('set-vars') ) {
269 $sql = "SET " . $self->prop('set-vars');
270 PTDEBUG && _d($dbh, ':', $sql);
271 $dbh->do($sql);
272 }
273 }
274 };
275 if ( !$dbh && $EVAL_ERROR ) {
276 PTDEBUG && _d($EVAL_ERROR);
277 if ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) {
278 PTDEBUG && _d('Going to try again without utf8 support');
279 delete $defaults->{mysql_enable_utf8};
280 }
281 elsif ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) {
282 die "Cannot connect to MySQL because the Perl DBD::mysql module is "
283 . "not installed or not found. Run 'perl -MDBD::mysql' to see "
284 . "the directories that Perl searches for DBD::mysql. If "
285 . "DBD::mysql is not installed, try:\n"
286 . " Debian/Ubuntu apt-get install libdbd-mysql-perl\n"
287 . " RHEL/CentOS yum install perl-DBD-MySQL\n"
288 . " OpenSolaris pgk install pkg:/SUNWapu13dbd-mysql\n";
289 }
290 if ( !$tries ) {
291 die $EVAL_ERROR;
292 }
293 }
294 }
295
296 PTDEBUG && _d('DBH info: ',
297 $dbh,
298 Dumper($dbh->selectrow_hashref(
299 'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')),
300 'Connection info:', $dbh->{mysql_hostinfo},
301 'Character set info:', Dumper($dbh->selectall_arrayref(
302 'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})),
303 '$DBD::mysql::VERSION:', $DBD::mysql::VERSION,
304 '$DBI::VERSION:', $DBI::VERSION,
305 );
306
307 return $dbh;
308}
309
310sub get_hostname {
311 my ( $self, $dbh ) = @_;
312 if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) {
313 return $host;
314 }
315 my ( $hostname, $one ) = $dbh->selectrow_array(
316 'SELECT /*!50038 @@hostname, */ 1');
317 return $hostname;
318}
319
320sub disconnect {
321 my ( $self, $dbh ) = @_;
322 PTDEBUG && $self->print_active_handles($dbh);
323 $dbh->disconnect;
324}
325
326sub print_active_handles {
327 my ( $self, $thing, $level ) = @_;
328 $level ||= 0;
329 printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level,
330 $thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : ''))
331 or die "Cannot print: $OS_ERROR";
332 foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) {
333 $self->print_active_handles( $handle, $level + 1 );
334 }
335}
336
337sub copy {
338 my ( $self, $dsn_1, $dsn_2, %args ) = @_;
339 die 'I need a dsn_1 argument' unless $dsn_1;
340 die 'I need a dsn_2 argument' unless $dsn_2;
341 my %new_dsn = map {
342 my $key = $_;
343 my $val;
344 if ( $args{overwrite} ) {
345 $val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key};
346 }
347 else {
348 $val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key};
349 }
350 $key => $val;
351 } keys %{$self->{opts}};
352 return \%new_dsn;
353}
354
355sub _d {
356 my ($package, undef, $line) = caller 0;
357 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
358 map { defined $_ ? $_ : 'undef' }
359 @_;
360 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
361}
362
3631;
364}
365# ###########################################################################
366# End DSNParser package
367# ###########################################################################
368
369# ###########################################################################
370# OptionParser package
371# This package is a copy without comments from the original. The original
372# with comments and its test file can be found in the Bazaar repository at,
373# lib/OptionParser.pm
374# t/lib/OptionParser.t
375# See https://launchpad.net/percona-toolkit for more information.
376# ###########################################################################
377{
378package OptionParser;
379
380use strict;
381use warnings FATAL => 'all';
382use English qw(-no_match_vars);
383use constant PTDEBUG => $ENV{PTDEBUG} || 0;
384
385use List::Util qw(max);
386use Getopt::Long;
387
388my $POD_link_re = '[LC]<"?([^">]+)"?>';
389
390sub new {
391 my ( $class, %args ) = @_;
392 my @required_args = qw();
393 foreach my $arg ( @required_args ) {
394 die "I need a $arg argument" unless $args{$arg};
395 }
396
397 my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
398 $program_name ||= $PROGRAM_NAME;
399 my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.';
400
401 my %attributes = (
402 'type' => 1,
403 'short form' => 1,
404 'group' => 1,
405 'default' => 1,
406 'cumulative' => 1,
407 'negatable' => 1,
408 );
409
410 my $self = {
411 head1 => 'OPTIONS', # These args are used internally
412 skip_rules => 0, # to instantiate another Option-
413 item => '--(.*)', # Parser obj that parses the
414 attributes => \%attributes, # DSN OPTIONS section. Tools
415 parse_attributes => \&_parse_attribs, # don't tinker with these args.
416
417 %args,
418
419 strict => 1, # disabled by a special rule
420 program_name => $program_name,
421 opts => {},
422 got_opts => 0,
423 short_opts => {},
424 defaults => {},
425 groups => {},
426 allowed_groups => {},
427 errors => [],
428 rules => [], # desc of rules for --help
429 mutex => [], # rule: opts are mutually exclusive
430 atleast1 => [], # rule: at least one opt is required
431 disables => {}, # rule: opt disables other opts
432 defaults_to => {}, # rule: opt defaults to value of other opt
433 DSNParser => undef,
434 default_files => [
435 "/etc/percona-toolkit/percona-toolkit.conf",
436 "/etc/percona-toolkit/$program_name.conf",
437 "$home/.percona-toolkit.conf",
438 "$home/.$program_name.conf",
439 ],
440 types => {
441 string => 's', # standard Getopt type
442 int => 'i', # standard Getopt type
443 float => 'f', # standard Getopt type
444 Hash => 'H', # hash, formed from a comma-separated list
445 hash => 'h', # hash as above, but only if a value is given
446 Array => 'A', # array, similar to Hash
447 array => 'a', # array, similar to hash
448 DSN => 'd', # DSN
449 size => 'z', # size with kMG suffix (powers of 2^10)
450 time => 'm', # time, with an optional suffix of s/h/m/d
451 },
452 };
453
454 return bless $self, $class;
455}
456
457sub get_specs {
458 my ( $self, $file ) = @_;
459 $file ||= $self->{file} || __FILE__;
460 my @specs = $self->_pod_to_specs($file);
461 $self->_parse_specs(@specs);
462
463 open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
464 my $contents = do { local $/ = undef; <$fh> };
465 close $fh;
466 if ( $contents =~ m/^=head1 DSN OPTIONS/m ) {
467 PTDEBUG && _d('Parsing DSN OPTIONS');
468 my $dsn_attribs = {
469 dsn => 1,
470 copy => 1,
471 };
472 my $parse_dsn_attribs = sub {
473 my ( $self, $option, $attribs ) = @_;
474 map {
475 my $val = $attribs->{$_};
476 if ( $val ) {
477 $val = $val eq 'yes' ? 1
478 : $val eq 'no' ? 0
479 : $val;
480 $attribs->{$_} = $val;
481 }
482 } keys %$attribs;
483 return {
484 key => $option,
485 %$attribs,
486 };
487 };
488 my $dsn_o = new OptionParser(
489 description => 'DSN OPTIONS',
490 head1 => 'DSN OPTIONS',
491 dsn => 0, # XXX don't infinitely recurse!
492 item => '\* (.)', # key opts are a single character
493 skip_rules => 1, # no rules before opts
494 attributes => $dsn_attribs,
495 parse_attributes => $parse_dsn_attribs,
496 );
497 my @dsn_opts = map {
498 my $opts = {
499 key => $_->{spec}->{key},
500 dsn => $_->{spec}->{dsn},
501 copy => $_->{spec}->{copy},
502 desc => $_->{desc},
503 };
504 $opts;
505 } $dsn_o->_pod_to_specs($file);
506 $self->{DSNParser} = DSNParser->new(opts => \@dsn_opts);
507 }
508
509 if ( $contents =~ m/^=head1 VERSION\n\n^(.+)$/m ) {
510 $self->{version} = $1;
511 PTDEBUG && _d($self->{version});
512 }
513
514 return;
515}
516
517sub DSNParser {
518 my ( $self ) = @_;
519 return $self->{DSNParser};
520};
521
522sub get_defaults_files {
523 my ( $self ) = @_;
524 return @{$self->{default_files}};
525}
526
527sub _pod_to_specs {
528 my ( $self, $file ) = @_;
529 $file ||= $self->{file} || __FILE__;
530 open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR";
531
532 my @specs = ();
533 my @rules = ();
534 my $para;
535
536 local $INPUT_RECORD_SEPARATOR = '';
537 while ( $para = <$fh> ) {
538 next unless $para =~ m/^=head1 $self->{head1}/;
539 last;
540 }
541
542 while ( $para = <$fh> ) {
543 last if $para =~ m/^=over/;
544 next if $self->{skip_rules};
545 chomp $para;
546 $para =~ s/\s+/ /g;
547 $para =~ s/$POD_link_re/$1/go;
548 PTDEBUG && _d('Option rule:', $para);
549 push @rules, $para;
550 }
551
552 die "POD has no $self->{head1} section" unless $para;
553
554 do {
555 if ( my ($option) = $para =~ m/^=item $self->{item}/ ) {
556 chomp $para;
557 PTDEBUG && _d($para);
558 my %attribs;
559
560 $para = <$fh>; # read next paragraph, possibly attributes
561
562 if ( $para =~ m/: / ) { # attributes
563 $para =~ s/\s+\Z//g;
564 %attribs = map {
565 my ( $attrib, $val) = split(/: /, $_);
566 die "Unrecognized attribute for --$option: $attrib"
567 unless $self->{attributes}->{$attrib};
568 ($attrib, $val);
569 } split(/; /, $para);
570 if ( $attribs{'short form'} ) {
571 $attribs{'short form'} =~ s/-//;
572 }
573 $para = <$fh>; # read next paragraph, probably short help desc
574 }
575 else {
576 PTDEBUG && _d('Option has no attributes');
577 }
578
579 $para =~ s/\s+\Z//g;
580 $para =~ s/\s+/ /g;
581 $para =~ s/$POD_link_re/$1/go;
582
583 $para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s;
584 PTDEBUG && _d('Short help:', $para);
585
586 die "No description after option spec $option" if $para =~ m/^=item/;
587
588 if ( my ($base_option) = $option =~ m/^\[no\](.*)/ ) {
589 $option = $base_option;
590 $attribs{'negatable'} = 1;
591 }
592
593 push @specs, {
594 spec => $self->{parse_attributes}->($self, $option, \%attribs),
595 desc => $para
596 . (defined $attribs{default} ? " (default $attribs{default})" : ''),
597 group => ($attribs{'group'} ? $attribs{'group'} : 'default'),
598 };
599 }
600 while ( $para = <$fh> ) {
601 last unless $para;
602 if ( $para =~ m/^=head1/ ) {
603 $para = undef; # Can't 'last' out of a do {} block.
604 last;
605 }
606 last if $para =~ m/^=item /;
607 }
608 } while ( $para );
609
610 die "No valid specs in $self->{head1}" unless @specs;
611
612 close $fh;
613 return @specs, @rules;
614}
615
616sub _parse_specs {
617 my ( $self, @specs ) = @_;
618 my %disables; # special rule that requires deferred checking
619
620 foreach my $opt ( @specs ) {
621 if ( ref $opt ) { # It's an option spec, not a rule.
622 PTDEBUG && _d('Parsing opt spec:',
623 map { ($_, '=>', $opt->{$_}) } keys %$opt);
624
625 my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
626 if ( !$long ) {
627 die "Cannot parse long option from spec $opt->{spec}";
628 }
629 $opt->{long} = $long;
630
631 die "Duplicate long option --$long" if exists $self->{opts}->{$long};
632 $self->{opts}->{$long} = $opt;
633
634 if ( length $long == 1 ) {
635 PTDEBUG && _d('Long opt', $long, 'looks like short opt');
636 $self->{short_opts}->{$long} = $long;
637 }
638
639 if ( $short ) {
640 die "Duplicate short option -$short"
641 if exists $self->{short_opts}->{$short};
642 $self->{short_opts}->{$short} = $long;
643 $opt->{short} = $short;
644 }
645 else {
646 $opt->{short} = undef;
647 }
648
649 $opt->{is_negatable} = $opt->{spec} =~ m/!/ ? 1 : 0;
650 $opt->{is_cumulative} = $opt->{spec} =~ m/\+/ ? 1 : 0;
651 $opt->{is_required} = $opt->{desc} =~ m/required/ ? 1 : 0;
652
653 $opt->{group} ||= 'default';
654 $self->{groups}->{ $opt->{group} }->{$long} = 1;
655
656 $opt->{value} = undef;
657 $opt->{got} = 0;
658
659 my ( $type ) = $opt->{spec} =~ m/=(.)/;
660 $opt->{type} = $type;
661 PTDEBUG && _d($long, 'type:', $type);
662
663
664 $opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ );
665
666 if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) {
667 $self->{defaults}->{$long} = defined $def ? $def : 1;
668 PTDEBUG && _d($long, 'default:', $def);
669 }
670
671 if ( $long eq 'config' ) {
672 $self->{defaults}->{$long} = join(',', $self->get_defaults_files());
673 }
674
675 if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) {
676 $disables{$long} = $dis;
677 PTDEBUG && _d('Deferring check of disables rule for', $opt, $dis);
678 }
679
680 $self->{opts}->{$long} = $opt;
681 }
682 else { # It's an option rule, not a spec.
683 PTDEBUG && _d('Parsing rule:', $opt);
684 push @{$self->{rules}}, $opt;
685 my @participants = $self->_get_participants($opt);
686 my $rule_ok = 0;
687
688 if ( $opt =~ m/mutually exclusive|one and only one/ ) {
689 $rule_ok = 1;
690 push @{$self->{mutex}}, \@participants;
691 PTDEBUG && _d(@participants, 'are mutually exclusive');
692 }
693 if ( $opt =~ m/at least one|one and only one/ ) {
694 $rule_ok = 1;
695 push @{$self->{atleast1}}, \@participants;
696 PTDEBUG && _d(@participants, 'require at least one');
697 }
698 if ( $opt =~ m/default to/ ) {
699 $rule_ok = 1;
700 $self->{defaults_to}->{$participants[0]} = $participants[1];
701 PTDEBUG && _d($participants[0], 'defaults to', $participants[1]);
702 }
703 if ( $opt =~ m/restricted to option groups/ ) {
704 $rule_ok = 1;
705 my ($groups) = $opt =~ m/groups ([\w\s\,]+)/;
706 my @groups = split(',', $groups);
707 %{$self->{allowed_groups}->{$participants[0]}} = map {
708 s/\s+//;
709 $_ => 1;
710 } @groups;
711 }
712 if( $opt =~ m/accepts additional command-line arguments/ ) {
713 $rule_ok = 1;
714 $self->{strict} = 0;
715 PTDEBUG && _d("Strict mode disabled by rule");
716 }
717
718 die "Unrecognized option rule: $opt" unless $rule_ok;
719 }
720 }
721
722 foreach my $long ( keys %disables ) {
723 my @participants = $self->_get_participants($disables{$long});
724 $self->{disables}->{$long} = \@participants;
725 PTDEBUG && _d('Option', $long, 'disables', @participants);
726 }
727
728 return;
729}
730
731sub _get_participants {
732 my ( $self, $str ) = @_;
733 my @participants;
734 foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) {
735 die "Option --$long does not exist while processing rule $str"
736 unless exists $self->{opts}->{$long};
737 push @participants, $long;
738 }
739 PTDEBUG && _d('Participants for', $str, ':', @participants);
740 return @participants;
741}
742
743sub opts {
744 my ( $self ) = @_;
745 my %opts = %{$self->{opts}};
746 return %opts;
747}
748
749sub short_opts {
750 my ( $self ) = @_;
751 my %short_opts = %{$self->{short_opts}};
752 return %short_opts;
753}
754
755sub set_defaults {
756 my ( $self, %defaults ) = @_;
757 $self->{defaults} = {};
758 foreach my $long ( keys %defaults ) {
759 die "Cannot set default for nonexistent option $long"
760 unless exists $self->{opts}->{$long};
761 $self->{defaults}->{$long} = $defaults{$long};
762 PTDEBUG && _d('Default val for', $long, ':', $defaults{$long});
763 }
764 return;
765}
766
767sub get_defaults {
768 my ( $self ) = @_;
769 return $self->{defaults};
770}
771
772sub get_groups {
773 my ( $self ) = @_;
774 return $self->{groups};
775}
776
777sub _set_option {
778 my ( $self, $opt, $val ) = @_;
779 my $long = exists $self->{opts}->{$opt} ? $opt
780 : exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt}
781 : die "Getopt::Long gave a nonexistent option: $opt";
782
783 $opt = $self->{opts}->{$long};
784 if ( $opt->{is_cumulative} ) {
785 $opt->{value}++;
786 }
787 else {
788 $opt->{value} = $val;
789 }
790 $opt->{got} = 1;
791 PTDEBUG && _d('Got option', $long, '=', $val);
792}
793
794sub get_opts {
795 my ( $self ) = @_;
796
797 foreach my $long ( keys %{$self->{opts}} ) {
798 $self->{opts}->{$long}->{got} = 0;
799 $self->{opts}->{$long}->{value}
800 = exists $self->{defaults}->{$long} ? $self->{defaults}->{$long}
801 : $self->{opts}->{$long}->{is_cumulative} ? 0
802 : undef;
803 }
804 $self->{got_opts} = 0;
805
806 $self->{errors} = [];
807
808 if ( @ARGV && $ARGV[0] eq "--config" ) {
809 shift @ARGV;
810 $self->_set_option('config', shift @ARGV);
811 }
812 if ( $self->has('config') ) {
813 my @extra_args;
814 foreach my $filename ( split(',', $self->get('config')) ) {
815 eval {
816 push @extra_args, $self->_read_config_file($filename);
817 };
818 if ( $EVAL_ERROR ) {
819 if ( $self->got('config') ) {
820 die $EVAL_ERROR;
821 }
822 elsif ( PTDEBUG ) {
823 _d($EVAL_ERROR);
824 }
825 }
826 }
827 unshift @ARGV, @extra_args;
828 }
829
830 Getopt::Long::Configure('no_ignore_case', 'bundling');
831 GetOptions(
832 map { $_->{spec} => sub { $self->_set_option(@_); } }
833 grep { $_->{long} ne 'config' } # --config is handled specially above.
834 values %{$self->{opts}}
835 ) or $self->save_error('Error parsing options');
836
837 if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) {
838 if ( $self->{version} ) {
839 print $self->{version}, "\n";
840 }
841 else {
842 print "Error parsing version. See the VERSION section of the tool's documentation.\n";
843 }
844 exit 0;
845 }
846
847 if ( @ARGV && $self->{strict} ) {
848 $self->save_error("Unrecognized command-line options @ARGV");
849 }
850
851 foreach my $mutex ( @{$self->{mutex}} ) {
852 my @set = grep { $self->{opts}->{$_}->{got} } @$mutex;
853 if ( @set > 1 ) {
854 my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
855 @{$mutex}[ 0 .. scalar(@$mutex) - 2] )
856 . ' and --'.$self->{opts}->{$mutex->[-1]}->{long}
857 . ' are mutually exclusive.';
858 $self->save_error($err);
859 }
860 }
861
862 foreach my $required ( @{$self->{atleast1}} ) {
863 my @set = grep { $self->{opts}->{$_}->{got} } @$required;
864 if ( @set == 0 ) {
865 my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
866 @{$required}[ 0 .. scalar(@$required) - 2] )
867 .' or --'.$self->{opts}->{$required->[-1]}->{long};
868 $self->save_error("Specify at least one of $err");
869 }
870 }
871
872 $self->_check_opts( keys %{$self->{opts}} );
873 $self->{got_opts} = 1;
874 return;
875}
876
877sub _check_opts {
878 my ( $self, @long ) = @_;
879 my $long_last = scalar @long;
880 while ( @long ) {
881 foreach my $i ( 0..$#long ) {
882 my $long = $long[$i];
883 next unless $long;
884 my $opt = $self->{opts}->{$long};
885 if ( $opt->{got} ) {
886 if ( exists $self->{disables}->{$long} ) {
887 my @disable_opts = @{$self->{disables}->{$long}};
888 map { $self->{opts}->{$_}->{value} = undef; } @disable_opts;
889 PTDEBUG && _d('Unset options', @disable_opts,
890 'because', $long,'disables them');
891 }
892
893 if ( exists $self->{allowed_groups}->{$long} ) {
894
895 my @restricted_groups = grep {
896 !exists $self->{allowed_groups}->{$long}->{$_}
897 } keys %{$self->{groups}};
898
899 my @restricted_opts;
900 foreach my $restricted_group ( @restricted_groups ) {
901 RESTRICTED_OPT:
902 foreach my $restricted_opt (
903 keys %{$self->{groups}->{$restricted_group}} )
904 {
905 next RESTRICTED_OPT if $restricted_opt eq $long;
906 push @restricted_opts, $restricted_opt
907 if $self->{opts}->{$restricted_opt}->{got};
908 }
909 }
910
911 if ( @restricted_opts ) {
912 my $err;
913 if ( @restricted_opts == 1 ) {
914 $err = "--$restricted_opts[0]";
915 }
916 else {
917 $err = join(', ',
918 map { "--$self->{opts}->{$_}->{long}" }
919 grep { $_ }
920 @restricted_opts[0..scalar(@restricted_opts) - 2]
921 )
922 . ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long};
923 }
924 $self->save_error("--$long is not allowed with $err");
925 }
926 }
927
928 }
929 elsif ( $opt->{is_required} ) {
930 $self->save_error("Required option --$long must be specified");
931 }
932
933 $self->_validate_type($opt);
934 if ( $opt->{parsed} ) {
935 delete $long[$i];
936 }
937 else {
938 PTDEBUG && _d('Temporarily failed to parse', $long);
939 }
940 }
941
942 die "Failed to parse options, possibly due to circular dependencies"
943 if @long == $long_last;
944 $long_last = @long;
945 }
946
947 return;
948}
949
950sub _validate_type {
951 my ( $self, $opt ) = @_;
952 return unless $opt;
953
954 if ( !$opt->{type} ) {
955 $opt->{parsed} = 1;
956 return;
957 }
958
959 my $val = $opt->{value};
960
961 if ( $val && $opt->{type} eq 'm' ) { # type time
962 PTDEBUG && _d('Parsing option', $opt->{long}, 'as a time value');
963 my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
964 if ( !$suffix ) {
965 my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/;
966 $suffix = $s || 's';
967 PTDEBUG && _d('No suffix given; using', $suffix, 'for',
968 $opt->{long}, '(value:', $val, ')');
969 }
970 if ( $suffix =~ m/[smhd]/ ) {
971 $val = $suffix eq 's' ? $num # Seconds
972 : $suffix eq 'm' ? $num * 60 # Minutes
973 : $suffix eq 'h' ? $num * 3600 # Hours
974 : $num * 86400; # Days
975 $opt->{value} = ($prefix || '') . $val;
976 PTDEBUG && _d('Setting option', $opt->{long}, 'to', $val);
977 }
978 else {
979 $self->save_error("Invalid time suffix for --$opt->{long}");
980 }
981 }
982 elsif ( $val && $opt->{type} eq 'd' ) { # type DSN
983 PTDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN');
984 my $prev = {};
985 my $from_key = $self->{defaults_to}->{ $opt->{long} };
986 if ( $from_key ) {
987 PTDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN');
988 if ( $self->{opts}->{$from_key}->{parsed} ) {
989 $prev = $self->{opts}->{$from_key}->{value};
990 }
991 else {
992 PTDEBUG && _d('Cannot parse', $opt->{long}, 'until',
993 $from_key, 'parsed');
994 return;
995 }
996 }
997 my $defaults = $self->{DSNParser}->parse_options($self);
998 $opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults);
999 }
1000 elsif ( $val && $opt->{type} eq 'z' ) { # type size
1001 PTDEBUG && _d('Parsing option', $opt->{long}, 'as a size value');
1002 $self->_parse_size($opt, $val);
1003 }
1004 elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) {
1005 $opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) };
1006 }
1007 elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) {
1008 $opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ];
1009 }
1010 else {
1011 PTDEBUG && _d('Nothing to validate for option',
1012 $opt->{long}, 'type', $opt->{type}, 'value', $val);
1013 }
1014
1015 $opt->{parsed} = 1;
1016 return;
1017}
1018
1019sub get {
1020 my ( $self, $opt ) = @_;
1021 my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1022 die "Option $opt does not exist"
1023 unless $long && exists $self->{opts}->{$long};
1024 return $self->{opts}->{$long}->{value};
1025}
1026
1027sub got {
1028 my ( $self, $opt ) = @_;
1029 my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1030 die "Option $opt does not exist"
1031 unless $long && exists $self->{opts}->{$long};
1032 return $self->{opts}->{$long}->{got};
1033}
1034
1035sub has {
1036 my ( $self, $opt ) = @_;
1037 my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1038 return defined $long ? exists $self->{opts}->{$long} : 0;
1039}
1040
1041sub set {
1042 my ( $self, $opt, $val ) = @_;
1043 my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1044 die "Option $opt does not exist"
1045 unless $long && exists $self->{opts}->{$long};
1046 $self->{opts}->{$long}->{value} = $val;
1047 return;
1048}
1049
1050sub save_error {
1051 my ( $self, $error ) = @_;
1052 push @{$self->{errors}}, $error;
1053 return;
1054}
1055
1056sub errors {
1057 my ( $self ) = @_;
1058 return $self->{errors};
1059}
1060
1061sub usage {
1062 my ( $self ) = @_;
1063 warn "No usage string is set" unless $self->{usage}; # XXX
1064 return "Usage: " . ($self->{usage} || '') . "\n";
1065}
1066
1067sub descr {
1068 my ( $self ) = @_;
1069 warn "No description string is set" unless $self->{description}; # XXX
1070 my $descr = ($self->{description} || $self->{program_name} || '')
1071 . " For more details, please use the --help option, "
1072 . "or try 'perldoc $PROGRAM_NAME' "
1073 . "for complete documentation.";
1074 $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g)
1075 unless $ENV{DONT_BREAK_LINES};
1076 $descr =~ s/ +$//mg;
1077 return $descr;
1078}
1079
1080sub usage_or_errors {
1081 my ( $self, $file, $return ) = @_;
1082 $file ||= $self->{file} || __FILE__;
1083
1084 if ( !$self->{description} || !$self->{usage} ) {
1085 PTDEBUG && _d("Getting description and usage from SYNOPSIS in", $file);
1086 my %synop = $self->_parse_synopsis($file);
1087 $self->{description} ||= $synop{description};
1088 $self->{usage} ||= $synop{usage};
1089 PTDEBUG && _d("Description:", $self->{description},
1090 "\nUsage:", $self->{usage});
1091 }
1092
1093 if ( $self->{opts}->{help}->{got} ) {
1094 print $self->print_usage() or die "Cannot print usage: $OS_ERROR";
1095 exit 0 unless $return;
1096 }
1097 elsif ( scalar @{$self->{errors}} ) {
1098 print $self->print_errors() or die "Cannot print errors: $OS_ERROR";
1099 exit 0 unless $return;
1100 }
1101
1102 return;
1103}
1104
1105sub print_errors {
1106 my ( $self ) = @_;
1107 my $usage = $self->usage() . "\n";
1108 if ( (my @errors = @{$self->{errors}}) ) {
1109 $usage .= join("\n * ", 'Errors in command-line arguments:', @errors)
1110 . "\n";
1111 }
1112 return $usage . "\n" . $self->descr();
1113}
1114
1115sub print_usage {
1116 my ( $self ) = @_;
1117 die "Run get_opts() before print_usage()" unless $self->{got_opts};
1118 my @opts = values %{$self->{opts}};
1119
1120 my $maxl = max(
1121 map {
1122 length($_->{long}) # option long name
1123 + ($_->{is_negatable} ? 4 : 0) # "[no]" if opt is negatable
1124 + ($_->{type} ? 2 : 0) # "=x" where x is the opt type
1125 }
1126 @opts);
1127
1128 my $maxs = max(0,
1129 map {
1130 length($_)
1131 + ($self->{opts}->{$_}->{is_negatable} ? 4 : 0)
1132 + ($self->{opts}->{$_}->{type} ? 2 : 0)
1133 }
1134 values %{$self->{short_opts}});
1135
1136 my $lcol = max($maxl, ($maxs + 3));
1137 my $rcol = 80 - $lcol - 6;
1138 my $rpad = ' ' x ( 80 - $rcol );
1139
1140 $maxs = max($lcol - 3, $maxs);
1141
1142 my $usage = $self->descr() . "\n" . $self->usage();
1143
1144 my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}};
1145 push @groups, 'default';
1146
1147 foreach my $group ( reverse @groups ) {
1148 $usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n";
1149 foreach my $opt (
1150 sort { $a->{long} cmp $b->{long} }
1151 grep { $_->{group} eq $group }
1152 @opts )
1153 {
1154 my $long = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long};
1155 my $short = $opt->{short};
1156 my $desc = $opt->{desc};
1157
1158 $long .= $opt->{type} ? "=$opt->{type}" : "";
1159
1160 if ( $opt->{type} && $opt->{type} eq 'm' ) {
1161 my ($s) = $desc =~ m/\(suffix (.)\)/;
1162 $s ||= 's';
1163 $desc =~ s/\s+\(suffix .\)//;
1164 $desc .= ". Optional suffix s=seconds, m=minutes, h=hours, "
1165 . "d=days; if no suffix, $s is used.";
1166 }
1167 $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g);
1168 $desc =~ s/ +$//mg;
1169 if ( $short ) {
1170 $usage .= sprintf(" --%-${maxs}s -%s %s\n", $long, $short, $desc);
1171 }
1172 else {
1173 $usage .= sprintf(" --%-${lcol}s %s\n", $long, $desc);
1174 }
1175 }
1176 }
1177
1178 $usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n";
1179
1180 if ( (my @rules = @{$self->{rules}}) ) {
1181 $usage .= "\nRules:\n\n";
1182 $usage .= join("\n", map { " $_" } @rules) . "\n";
1183 }
1184 if ( $self->{DSNParser} ) {
1185 $usage .= "\n" . $self->{DSNParser}->usage();
1186 }
1187 $usage .= "\nOptions and values after processing arguments:\n\n";
1188 foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) {
1189 my $val = $opt->{value};
1190 my $type = $opt->{type} || '';
1191 my $bool = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
1192 $val = $bool ? ( $val ? 'TRUE' : 'FALSE' )
1193 : !defined $val ? '(No value)'
1194 : $type eq 'd' ? $self->{DSNParser}->as_string($val)
1195 : $type =~ m/H|h/ ? join(',', sort keys %$val)
1196 : $type =~ m/A|a/ ? join(',', @$val)
1197 : $val;
1198 $usage .= sprintf(" --%-${lcol}s %s\n", $opt->{long}, $val);
1199 }
1200 return $usage;
1201}
1202
1203sub prompt_noecho {
1204 shift @_ if ref $_[0] eq __PACKAGE__;
1205 my ( $prompt ) = @_;
1206 local $OUTPUT_AUTOFLUSH = 1;
1207 print $prompt
1208 or die "Cannot print: $OS_ERROR";
1209 my $response;
1210 eval {
1211 require Term::ReadKey;
1212 Term::ReadKey::ReadMode('noecho');
1213 chomp($response = <STDIN>);
1214 Term::ReadKey::ReadMode('normal');
1215 print "\n"
1216 or die "Cannot print: $OS_ERROR";
1217 };
1218 if ( $EVAL_ERROR ) {
1219 die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
1220 }
1221 return $response;
1222}
1223
1224sub _read_config_file {
1225 my ( $self, $filename ) = @_;
1226 open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n";
1227 my @args;
1228 my $prefix = '--';
1229 my $parse = 1;
1230
1231 LINE:
1232 while ( my $line = <$fh> ) {
1233 chomp $line;
1234 next LINE if $line =~ m/^\s*(?:\#|\;|$)/;
1235 $line =~ s/\s+#.*$//g;
1236 $line =~ s/^\s+|\s+$//g;
1237 if ( $line eq '--' ) {
1238 $prefix = '';
1239 $parse = 0;
1240 next LINE;
1241 }
1242 if ( $parse
1243 && (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/)
1244 ) {
1245 push @args, grep { defined $_ } ("$prefix$opt", $arg);
1246 }
1247 elsif ( $line =~ m/./ ) {
1248 push @args, $line;
1249 }
1250 else {
1251 die "Syntax error in file $filename at line $INPUT_LINE_NUMBER";
1252 }
1253 }
1254 close $fh;
1255 return @args;
1256}
1257
1258sub read_para_after {
1259 my ( $self, $file, $regex ) = @_;
1260 open my $fh, "<", $file or die "Can't open $file: $OS_ERROR";
1261 local $INPUT_RECORD_SEPARATOR = '';
1262 my $para;
1263 while ( $para = <$fh> ) {
1264 next unless $para =~ m/^=pod$/m;
1265 last;
1266 }
1267 while ( $para = <$fh> ) {
1268 next unless $para =~ m/$regex/;
1269 last;
1270 }
1271 $para = <$fh>;
1272 chomp($para);
1273 close $fh or die "Can't close $file: $OS_ERROR";
1274 return $para;
1275}
1276
1277sub clone {
1278 my ( $self ) = @_;
1279
1280 my %clone = map {
1281 my $hashref = $self->{$_};
1282 my $val_copy = {};
1283 foreach my $key ( keys %$hashref ) {
1284 my $ref = ref $hashref->{$key};
1285 $val_copy->{$key} = !$ref ? $hashref->{$key}
1286 : $ref eq 'HASH' ? { %{$hashref->{$key}} }
1287 : $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ]
1288 : $hashref->{$key};
1289 }
1290 $_ => $val_copy;
1291 } qw(opts short_opts defaults);
1292
1293 foreach my $scalar ( qw(got_opts) ) {
1294 $clone{$scalar} = $self->{$scalar};
1295 }
1296
1297 return bless \%clone;
1298}
1299
1300sub _parse_size {
1301 my ( $self, $opt, $val ) = @_;
1302
1303 if ( lc($val || '') eq 'null' ) {
1304 PTDEBUG && _d('NULL size for', $opt->{long});
1305 $opt->{value} = 'null';
1306 return;
1307 }
1308
1309 my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);
1310 my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
1311 if ( defined $num ) {
1312 if ( $factor ) {
1313 $num *= $factor_for{$factor};
1314 PTDEBUG && _d('Setting option', $opt->{y},
1315 'to num', $num, '* factor', $factor);
1316 }
1317 $opt->{value} = ($pre || '') . $num;
1318 }
1319 else {
1320 $self->save_error("Invalid size for --$opt->{long}: $val");
1321 }
1322 return;
1323}
1324
1325sub _parse_attribs {
1326 my ( $self, $option, $attribs ) = @_;
1327 my $types = $self->{types};
1328 return $option
1329 . ($attribs->{'short form'} ? '|' . $attribs->{'short form'} : '' )
1330 . ($attribs->{'negatable'} ? '!' : '' )
1331 . ($attribs->{'cumulative'} ? '+' : '' )
1332 . ($attribs->{'type'} ? '=' . $types->{$attribs->{type}} : '' );
1333}
1334
1335sub _parse_synopsis {
1336 my ( $self, $file ) = @_;
1337 $file ||= $self->{file} || __FILE__;
1338 PTDEBUG && _d("Parsing SYNOPSIS in", $file);
1339
1340 local $INPUT_RECORD_SEPARATOR = ''; # read paragraphs
1341 open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
1342 my $para;
1343 1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/;
1344 die "$file does not contain a SYNOPSIS section" unless $para;
1345 my @synop;
1346 for ( 1..2 ) { # 1 for the usage, 2 for the description
1347 my $para = <$fh>;
1348 push @synop, $para;
1349 }
1350 close $fh;
1351 PTDEBUG && _d("Raw SYNOPSIS text:", @synop);
1352 my ($usage, $desc) = @synop;
1353 die "The SYNOPSIS section in $file is not formatted properly"
1354 unless $usage && $desc;
1355
1356 $usage =~ s/^\s*Usage:\s+(.+)/$1/;
1357 chomp $usage;
1358
1359 $desc =~ s/\n/ /g;
1360 $desc =~ s/\s{2,}/ /g;
1361 $desc =~ s/\. ([A-Z][a-z])/. $1/g;
1362 $desc =~ s/\s+$//;
1363
1364 return (
1365 description => $desc,
1366 usage => $usage,
1367 );
1368};
1369
1370sub _d {
1371 my ($package, undef, $line) = caller 0;
1372 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1373 map { defined $_ ? $_ : 'undef' }
1374 @_;
1375 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1376}
1377
1378if ( PTDEBUG ) {
1379 print '# ', $^X, ' ', $], "\n";
1380 if ( my $uname = `uname -a` ) {
1381 $uname =~ s/\s+/ /g;
1382 print "# $uname\n";
1383 }
1384 print '# Arguments: ',
1385 join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n";
1386}
1387
13881;
1389}
1390# ###########################################################################
1391# End OptionParser package
1392# ###########################################################################
1393
1394# ###########################################################################
1395# SlowLogParser package
1396# This package is a copy without comments from the original. The original
1397# with comments and its test file can be found in the Bazaar repository at,
1398# lib/SlowLogParser.pm
1399# t/lib/SlowLogParser.t
1400# See https://launchpad.net/percona-toolkit for more information.
1401# ###########################################################################
1402{
1403package SlowLogParser;
1404
1405use strict;
1406use warnings FATAL => 'all';
1407use English qw(-no_match_vars);
1408use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1409
1410use Data::Dumper;
1411$Data::Dumper::Indent = 1;
1412$Data::Dumper::Sortkeys = 1;
1413$Data::Dumper::Quotekeys = 0;
1414
1415sub new {
1416 my ( $class ) = @_;
1417 my $self = {
1418 pending => [],
1419 };
1420 return bless $self, $class;
1421}
1422
1423my $slow_log_ts_line = qr/^# Time: ([0-9: ]{15})/;
1424my $slow_log_uh_line = qr/# User\@Host: ([^\[]+|\[[^[]+\]).*?@ (\S*) \[(.*)\]/;
1425my $slow_log_hd_line = qr{
1426 ^(?:
1427 T[cC][pP]\s[pP]ort:\s+\d+ # case differs on windows/unix
1428 |
1429 [/A-Z].*mysqld,\sVersion.*(?:started\swith:|embedded\slibrary)
1430 |
1431 Time\s+Id\s+Command
1432 ).*\n
1433 }xm;
1434
1435sub parse_event {
1436 my ( $self, %args ) = @_;
1437 my @required_args = qw(next_event tell);
1438 foreach my $arg ( @required_args ) {
1439 die "I need a $arg argument" unless $args{$arg};
1440 }
1441 my ($next_event, $tell) = @args{@required_args};
1442
1443 my $pending = $self->{pending};
1444 local $INPUT_RECORD_SEPARATOR = ";\n#";
1445 my $trimlen = length($INPUT_RECORD_SEPARATOR);
1446 my $pos_in_log = $tell->();
1447 my $stmt;
1448
1449 EVENT:
1450 while (
1451 defined($stmt = shift @$pending)
1452 or defined($stmt = $next_event->())
1453 ) {
1454 my @properties = ('cmd', 'Query', 'pos_in_log', $pos_in_log);
1455 $pos_in_log = $tell->();
1456
1457 if ( $stmt =~ s/$slow_log_hd_line//go ){ # Throw away header lines in log
1458 my @chunks = split(/$INPUT_RECORD_SEPARATOR/o, $stmt);
1459 if ( @chunks > 1 ) {
1460 PTDEBUG && _d("Found multiple chunks");
1461 $stmt = shift @chunks;
1462 unshift @$pending, @chunks;
1463 }
1464 }
1465
1466 $stmt = '#' . $stmt unless $stmt =~ m/\A#/;
1467 $stmt =~ s/;\n#?\Z//;
1468
1469
1470 my ($got_ts, $got_uh, $got_ac, $got_db, $got_set, $got_embed);
1471 my $pos = 0;
1472 my $len = length($stmt);
1473 my $found_arg = 0;
1474 LINE:
1475 while ( $stmt =~ m/^(.*)$/mg ) { # /g is important, requires scalar match.
1476 $pos = pos($stmt); # Be careful not to mess this up!
1477 my $line = $1; # Necessary for /g and pos() to work.
1478 PTDEBUG && _d($line);
1479
1480 if ($line =~ m/^(?:#|use |SET (?:last_insert_id|insert_id|timestamp))/o) {
1481
1482 if ( !$got_ts && (my ( $time ) = $line =~ m/$slow_log_ts_line/o)) {
1483 PTDEBUG && _d("Got ts", $time);
1484 push @properties, 'ts', $time;
1485 ++$got_ts;
1486 if ( !$got_uh
1487 && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o )
1488 ) {
1489 PTDEBUG && _d("Got user, host, ip", $user, $host, $ip);
1490 push @properties, 'user', $user, 'host', $host, 'ip', $ip;
1491 ++$got_uh;
1492 }
1493 }
1494
1495 elsif ( !$got_uh
1496 && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o )
1497 ) {
1498 PTDEBUG && _d("Got user, host, ip", $user, $host, $ip);
1499 push @properties, 'user', $user, 'host', $host, 'ip', $ip;
1500 ++$got_uh;
1501 }
1502
1503 elsif (!$got_ac && $line =~ m/^# (?:administrator command:.*)$/) {
1504 PTDEBUG && _d("Got admin command");
1505 $line =~ s/^#\s+//; # string leading "# ".
1506 push @properties, 'cmd', 'Admin', 'arg', $line;
1507 push @properties, 'bytes', length($properties[-1]);
1508 ++$found_arg;
1509 ++$got_ac;
1510 }
1511
1512 elsif ( $line =~ m/^# +[A-Z][A-Za-z_]+: \S+/ ) { # Make the test cheap!
1513 PTDEBUG && _d("Got some line with properties");
1514
1515 if ( $line =~ m/Schema:\s+\w+: / ) {
1516 PTDEBUG && _d('Removing empty Schema attrib');
1517 $line =~ s/Schema:\s+//;
1518 PTDEBUG && _d($line);
1519 }
1520
1521 my @temp = $line =~ m/(\w+):\s+(\S+|\Z)/g;
1522 push @properties, @temp;
1523 }
1524
1525 elsif ( !$got_db && (my ( $db ) = $line =~ m/^use ([^;]+)/ ) ) {
1526 PTDEBUG && _d("Got a default database:", $db);
1527 push @properties, 'db', $db;
1528 ++$got_db;
1529 }
1530
1531 elsif (!$got_set && (my ($setting) = $line =~ m/^SET\s+([^;]*)/)) {
1532 PTDEBUG && _d("Got some setting:", $setting);
1533 push @properties, split(/,|\s*=\s*/, $setting);
1534 ++$got_set;
1535 }
1536
1537 if ( !$found_arg && $pos == $len ) {
1538 PTDEBUG && _d("Did not find arg, looking for special cases");
1539 local $INPUT_RECORD_SEPARATOR = ";\n";
1540 if ( defined(my $l = $next_event->()) ) {
1541 chomp $l;
1542 $l =~ s/^\s+//;
1543 PTDEBUG && _d("Found admin statement", $l);
1544 push @properties, 'cmd', 'Admin', 'arg', $l;
1545 push @properties, 'bytes', length($properties[-1]);
1546 $found_arg++;
1547 }
1548 else {
1549 PTDEBUG && _d("I can't figure out what to do with this line");
1550 next EVENT;
1551 }
1552 }
1553 }
1554 else {
1555 PTDEBUG && _d("Got the query/arg line");
1556 my $arg = substr($stmt, $pos - length($line));
1557 push @properties, 'arg', $arg, 'bytes', length($arg);
1558 if ( $args{misc} && $args{misc}->{embed}
1559 && ( my ($e) = $arg =~ m/($args{misc}->{embed})/)
1560 ) {
1561 push @properties, $e =~ m/$args{misc}->{capture}/g;
1562 }
1563 last LINE;
1564 }
1565 }
1566
1567 PTDEBUG && _d('Properties of event:', Dumper(\@properties));
1568 my $event = { @properties };
1569 if ( $args{stats} ) {
1570 $args{stats}->{events_read}++;
1571 $args{stats}->{events_parsed}++;
1572 }
1573 return $event;
1574 } # EVENT
1575
1576 @$pending = ();
1577 $args{oktorun}->(0) if $args{oktorun};
1578 return;
1579}
1580
1581sub _d {
1582 my ($package, undef, $line) = caller 0;
1583 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1584 map { defined $_ ? $_ : 'undef' }
1585 @_;
1586 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1587}
1588
15891;
1590}
1591# ###########################################################################
1592# End SlowLogParser package
1593# ###########################################################################
1594
1595# ###########################################################################
1596# Transformers package
1597# This package is a copy without comments from the original. The original
1598# with comments and its test file can be found in the Bazaar repository at,
1599# lib/Transformers.pm
1600# t/lib/Transformers.t
1601# See https://launchpad.net/percona-toolkit for more information.
1602# ###########################################################################
1603{
1604package Transformers;
1605
1606use strict;
1607use warnings FATAL => 'all';
1608use English qw(-no_match_vars);
1609use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1610
1611use Time::Local qw(timegm timelocal);
1612use Digest::MD5 qw(md5_hex);
1613
1614require Exporter;
1615our @ISA = qw(Exporter);
1616our %EXPORT_TAGS = ();
1617our @EXPORT = ();
1618our @EXPORT_OK = qw(
1619 micro_t
1620 percentage_of
1621 secs_to_time
1622 time_to_secs
1623 shorten
1624 ts
1625 parse_timestamp
1626 unix_timestamp
1627 any_unix_timestamp
1628 make_checksum
1629 crc32
1630);
1631
1632our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;
1633our $proper_ts = qr/(\d\d\d\d)-(\d\d)-(\d\d)[T ](\d\d):(\d\d):(\d\d)(\.\d+)?/;
1634our $n_ts = qr/(\d{1,5})([shmd]?)/; # Limit \d{1,5} because \d{6} looks
1635
1636sub micro_t {
1637 my ( $t, %args ) = @_;
1638 my $p_ms = defined $args{p_ms} ? $args{p_ms} : 0; # precision for ms vals
1639 my $p_s = defined $args{p_s} ? $args{p_s} : 0; # precision for s vals
1640 my $f;
1641
1642 $t = 0 if $t < 0;
1643
1644 $t = sprintf('%.17f', $t) if $t =~ /e/;
1645
1646 $t =~ s/\.(\d{1,6})\d*/\.$1/;
1647
1648 if ($t > 0 && $t <= 0.000999) {
1649 $f = ($t * 1000000) . 'us';
1650 }
1651 elsif ($t >= 0.001000 && $t <= 0.999999) {
1652 $f = sprintf("%.${p_ms}f", $t * 1000);
1653 $f = ($f * 1) . 'ms'; # * 1 to remove insignificant zeros
1654 }
1655 elsif ($t >= 1) {
1656 $f = sprintf("%.${p_s}f", $t);
1657 $f = ($f * 1) . 's'; # * 1 to remove insignificant zeros
1658 }
1659 else {
1660 $f = 0; # $t should = 0 at this point
1661 }
1662
1663 return $f;
1664}
1665
1666sub percentage_of {
1667 my ( $is, $of, %args ) = @_;
1668 my $p = $args{p} || 0; # float precision
1669 my $fmt = $p ? "%.${p}f" : "%d";
1670 return sprintf $fmt, ($is * 100) / ($of ||= 1);
1671}
1672
1673sub secs_to_time {
1674 my ( $secs, $fmt ) = @_;
1675 $secs ||= 0;
1676 return '00:00' unless $secs;
1677
1678 $fmt ||= $secs >= 86_400 ? 'd'
1679 : $secs >= 3_600 ? 'h'
1680 : 'm';
1681
1682 return
1683 $fmt eq 'd' ? sprintf(
1684 "%d+%02d:%02d:%02d",
1685 int($secs / 86_400),
1686 int(($secs % 86_400) / 3_600),
1687 int(($secs % 3_600) / 60),
1688 $secs % 60)
1689 : $fmt eq 'h' ? sprintf(
1690 "%02d:%02d:%02d",
1691 int(($secs % 86_400) / 3_600),
1692 int(($secs % 3_600) / 60),
1693 $secs % 60)
1694 : sprintf(
1695 "%02d:%02d",
1696 int(($secs % 3_600) / 60),
1697 $secs % 60);
1698}
1699
1700sub time_to_secs {
1701 my ( $val, $default_suffix ) = @_;
1702 die "I need a val argument" unless defined $val;
1703 my $t = 0;
1704 my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
1705 $suffix = $suffix || $default_suffix || 's';
1706 if ( $suffix =~ m/[smhd]/ ) {
1707 $t = $suffix eq 's' ? $num * 1 # Seconds
1708 : $suffix eq 'm' ? $num * 60 # Minutes
1709 : $suffix eq 'h' ? $num * 3600 # Hours
1710 : $num * 86400; # Days
1711
1712 $t *= -1 if $prefix && $prefix eq '-';
1713 }
1714 else {
1715 die "Invalid suffix for $val: $suffix";
1716 }
1717 return $t;
1718}
1719
1720sub shorten {
1721 my ( $num, %args ) = @_;
1722 my $p = defined $args{p} ? $args{p} : 2; # float precision
1723 my $d = defined $args{d} ? $args{d} : 1_024; # divisor
1724 my $n = 0;
1725 my @units = ('', qw(k M G T P E Z Y));
1726 while ( $num >= $d && $n < @units - 1 ) {
1727 $num /= $d;
1728 ++$n;
1729 }
1730 return sprintf(
1731 $num =~ m/\./ || $n
1732 ? "%.${p}f%s"
1733 : '%d',
1734 $num, $units[$n]);
1735}
1736
1737sub ts {
1738 my ( $time, $gmt ) = @_;
1739 my ( $sec, $min, $hour, $mday, $mon, $year )
1740 = $gmt ? gmtime($time) : localtime($time);
1741 $mon += 1;
1742 $year += 1900;
1743 my $val = sprintf("%d-%02d-%02dT%02d:%02d:%02d",
1744 $year, $mon, $mday, $hour, $min, $sec);
1745 if ( my ($us) = $time =~ m/(\.\d+)$/ ) {
1746 $us = sprintf("%.6f", $us);
1747 $us =~ s/^0\././;
1748 $val .= $us;
1749 }
1750 return $val;
1751}
1752
1753sub parse_timestamp {
1754 my ( $val ) = @_;
1755 if ( my($y, $m, $d, $h, $i, $s, $f)
1756 = $val =~ m/^$mysql_ts$/ )
1757 {
1758 return sprintf "%d-%02d-%02d %02d:%02d:"
1759 . (defined $f ? '%09.6f' : '%02d'),
1760 $y + 2000, $m, $d, $h, $i, (defined $f ? $s + $f : $s);
1761 }
1762 return $val;
1763}
1764
1765sub unix_timestamp {
1766 my ( $val, $gmt ) = @_;
1767 if ( my($y, $m, $d, $h, $i, $s, $us) = $val =~ m/^$proper_ts$/ ) {
1768 $val = $gmt
1769 ? timegm($s, $i, $h, $d, $m - 1, $y)
1770 : timelocal($s, $i, $h, $d, $m - 1, $y);
1771 if ( defined $us ) {
1772 $us = sprintf('%.6f', $us);
1773 $us =~ s/^0\././;
1774 $val .= $us;
1775 }
1776 }
1777 return $val;
1778}
1779
1780sub any_unix_timestamp {
1781 my ( $val, $callback ) = @_;
1782
1783 if ( my ($n, $suffix) = $val =~ m/^$n_ts$/ ) {
1784 $n = $suffix eq 's' ? $n # Seconds
1785 : $suffix eq 'm' ? $n * 60 # Minutes
1786 : $suffix eq 'h' ? $n * 3600 # Hours
1787 : $suffix eq 'd' ? $n * 86400 # Days
1788 : $n; # default: Seconds
1789 PTDEBUG && _d('ts is now - N[shmd]:', $n);
1790 return time - $n;
1791 }
1792 elsif ( $val =~ m/^\d{9,}/ ) {
1793 PTDEBUG && _d('ts is already a unix timestamp');
1794 return $val;
1795 }
1796 elsif ( my ($ymd, $hms) = $val =~ m/^(\d{6})(?:\s+(\d+:\d+:\d+))?/ ) {
1797 PTDEBUG && _d('ts is MySQL slow log timestamp');
1798 $val .= ' 00:00:00' unless $hms;
1799 return unix_timestamp(parse_timestamp($val));
1800 }
1801 elsif ( ($ymd, $hms) = $val =~ m/^(\d{4}-\d\d-\d\d)(?:[T ](\d+:\d+:\d+))?/) {
1802 PTDEBUG && _d('ts is properly formatted timestamp');
1803 $val .= ' 00:00:00' unless $hms;
1804 return unix_timestamp($val);
1805 }
1806 else {
1807 PTDEBUG && _d('ts is MySQL expression');
1808 return $callback->($val) if $callback && ref $callback eq 'CODE';
1809 }
1810
1811 PTDEBUG && _d('Unknown ts type:', $val);
1812 return;
1813}
1814
1815sub make_checksum {
1816 my ( $val ) = @_;
1817 my $checksum = uc substr(md5_hex($val), -16);
1818 PTDEBUG && _d($checksum, 'checksum for', $val);
1819 return $checksum;
1820}
1821
1822sub crc32 {
1823 my ( $string ) = @_;
1824 return unless $string;
1825 my $poly = 0xEDB88320;
1826 my $crc = 0xFFFFFFFF;
1827 foreach my $char ( split(//, $string) ) {
1828 my $comp = ($crc ^ ord($char)) & 0xFF;
1829 for ( 1 .. 8 ) {
1830 $comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1;
1831 }
1832 $crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp;
1833 }
1834 return $crc ^ 0xFFFFFFFF;
1835}
1836
1837sub _d {
1838 my ($package, undef, $line) = caller 0;
1839 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1840 map { defined $_ ? $_ : 'undef' }
1841 @_;
1842 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1843}
1844
18451;
1846}
1847# ###########################################################################
1848# End Transformers package
1849# ###########################################################################
1850
1851# ###########################################################################
1852# QueryRewriter package
1853# This package is a copy without comments from the original. The original
1854# with comments and its test file can be found in the Bazaar repository at,
1855# lib/QueryRewriter.pm
1856# t/lib/QueryRewriter.t
1857# See https://launchpad.net/percona-toolkit for more information.
1858# ###########################################################################
1859{
1860package QueryRewriter;
1861
1862use strict;
1863use warnings FATAL => 'all';
1864use English qw(-no_match_vars);
1865use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1866
1867our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT
1868 |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi;
1869my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly!
1870my $bal;
1871$bal = qr/
1872 \(
1873 (?:
1874 (?> [^()]+ ) # Non-parens without backtracking
1875 |
1876 (??{ $bal }) # Group with matching parens
1877 )*
1878 \)
1879 /x;
1880
1881my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
1882my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
1883my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
1884my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
1885
1886
1887sub new {
1888 my ( $class, %args ) = @_;
1889 my $self = { %args };
1890 return bless $self, $class;
1891}
1892
1893sub strip_comments {
1894 my ( $self, $query ) = @_;
1895 return unless $query;
1896 $query =~ s/$olc_re//go;
1897 $query =~ s/$mlc_re//go;
1898 if ( $query =~ m/$vlc_rf/i ) { # contains show + version
1899 $query =~ s/$vlc_re//go;
1900 }
1901 return $query;
1902}
1903
1904sub shorten {
1905 my ( $self, $query, $length ) = @_;
1906 $query =~ s{
1907 \A(
1908 (?:INSERT|REPLACE)
1909 (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)?
1910 (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\)
1911 )
1912 \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)}
1913 {$1 /*... omitted ...*/$2}xsi;
1914
1915 return $query unless $query =~ m/IN\s*\(\s*(?!select)/i;
1916
1917 my $last_length = 0;
1918 my $query_length = length($query);
1919 while (
1920 $length > 0
1921 && $query_length > $length
1922 && $query_length < ( $last_length || $query_length + 1 )
1923 ) {
1924 $last_length = $query_length;
1925 $query =~ s{
1926 (\bIN\s*\() # The opening of an IN list
1927 ([^\)]+) # Contents of the list, assuming no item contains paren
1928 (?=\)) # Close of the list
1929 }
1930 {
1931 $1 . __shorten($2)
1932 }gexsi;
1933 }
1934
1935 return $query;
1936}
1937
1938sub __shorten {
1939 my ( $snippet ) = @_;
1940 my @vals = split(/,/, $snippet);
1941 return $snippet unless @vals > 20;
1942 my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items
1943 return
1944 join(',', @keep)
1945 . "/*... omitted "
1946 . scalar(@vals)
1947 . " items ...*/";
1948}
1949
1950sub fingerprint {
1951 my ( $self, $query ) = @_;
1952
1953 $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query
1954 && return 'mysqldump';
1955 $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # pt-table-checksum, etc query
1956 && return 'percona-toolkit';
1957 $query =~ m/\Aadministrator command: /
1958 && return $query;
1959 $query =~ m/\A\s*(call\s+\S+)\(/i
1960 && return lc($1); # Warning! $1 used, be careful.
1961 if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) {
1962 $query = $beginning; # Shorten multi-value INSERT statements ASAP
1963 }
1964
1965 $query =~ s/$olc_re//go;
1966 $query =~ s/$mlc_re//go;
1967 $query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
1968 && return $query;
1969
1970 $query =~ s/\\["']//g; # quoted strings
1971 $query =~ s/".*?"/?/sg; # quoted strings
1972 $query =~ s/'.*?'/?/sg; # quoted strings
1973
1974 if ( $self->{match_md5_checksums} ) {
1975 $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
1976 }
1977
1978 if ( !$self->{match_embedded_numbers} ) {
1979 $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
1980 }
1981 else {
1982 $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
1983 }
1984
1985 if ( $self->{match_md5_checksums} ) {
1986 $query =~ s/[xb+-]\?/?/g;
1987 }
1988 else {
1989 $query =~ s/[xb.+-]\?/?/g;
1990 }
1991
1992 $query =~ s/\A\s+//; # Chop off leading whitespace
1993 chomp $query; # Kill trailing whitespace
1994 $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace
1995 $query = lc $query;
1996 $query =~ s/\bnull\b/?/g; # Get rid of NULLs
1997 $query =~ s{ # Collapse IN and VALUES lists
1998 \b(in|values?)(?:[\s,]*\([\s?,]*\))+
1999 }
2000 {$1(?+)}gx;
2001 $query =~ s{ # Collapse UNION
2002 \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+
2003 }
2004 {$1 /*repeat$2*/}xg;
2005 $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
2006
2007 if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause
2008 1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query;
2009 }
2010
2011 return $query;
2012}
2013
2014sub distill_verbs {
2015 my ( $self, $query ) = @_;
2016
2017 $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1";
2018 $query =~ m/\A\s*use\s+/ && return "USE";
2019 $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
2020 $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
2021
2022 if ( $query =~ m/\Aadministrator command:/ ) {
2023 $query =~ s/administrator command:/ADMIN/;
2024 $query = uc $query;
2025 return $query;
2026 }
2027
2028 $query = $self->strip_comments($query);
2029
2030 if ( $query =~ m/\A\s*SHOW\s+/i ) {
2031 PTDEBUG && _d($query);
2032
2033 $query = uc $query;
2034 $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
2035 $query =~ s/\s+COUNT[^)]+\)//g;
2036
2037 $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
2038
2039 $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s;
2040 $query =~ s/\s+/ /g;
2041 PTDEBUG && _d($query);
2042 return $query;
2043 }
2044
2045 eval $QueryParser::data_def_stmts;
2046 eval $QueryParser::tbl_ident;
2047 my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
2048 if ( $dds) {
2049 my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
2050 $obj = uc $obj if $obj;
2051 PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
2052 my ($db_or_tbl)
2053 = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i;
2054 PTDEBUG && _d('Matches db or table:', $db_or_tbl);
2055 return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl;
2056 }
2057
2058 my @verbs = $query =~ m/\b($verbs)\b/gio;
2059 @verbs = do {
2060 my $last = '';
2061 grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs;
2062 };
2063
2064 if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) {
2065 PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]);
2066 my $union = grep { $_ eq 'UNION' } @verbs;
2067 @verbs = $union ? qw(SELECT UNION) : qw(SELECT);
2068 }
2069
2070 my $verb_str = join(q{ }, @verbs);
2071 return $verb_str;
2072}
2073
2074sub __distill_tables {
2075 my ( $self, $query, $table, %args ) = @_;
2076 my $qp = $args{QueryParser} || $self->{QueryParser};
2077 die "I need a QueryParser argument" unless $qp;
2078
2079 my @tables = map {
2080 $_ =~ s/`//g;
2081 $_ =~ s/(_?)[0-9]+/$1?/g;
2082 $_;
2083 } grep { defined $_ } $qp->get_tables($query);
2084
2085 push @tables, $table if $table;
2086
2087 @tables = do {
2088 my $last = '';
2089 grep { my $pass = $_ ne $last; $last = $_; $pass } @tables;
2090 };
2091
2092 return @tables;
2093}
2094
2095sub distill {
2096 my ( $self, $query, %args ) = @_;
2097
2098 if ( $args{generic} ) {
2099 my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/;
2100 return '' unless $cmd;
2101 $query = (uc $cmd) . ($arg ? " $arg" : '');
2102 }
2103 else {
2104 my ($verbs, $table) = $self->distill_verbs($query, %args);
2105
2106 if ( $verbs && $verbs =~ m/^SHOW/ ) {
2107 my %alias_for = qw(
2108 SCHEMA DATABASE
2109 KEYS INDEX
2110 INDEXES INDEX
2111 );
2112 map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
2113 $query = $verbs;
2114 }
2115 else {
2116 my @tables = $self->__distill_tables($query, $table, %args);
2117 $query = join(q{ }, $verbs, @tables);
2118 }
2119 }
2120
2121 if ( $args{trf} ) {
2122 $query = $args{trf}->($query, %args);
2123 }
2124
2125 return $query;
2126}
2127
2128sub convert_to_select {
2129 my ( $self, $query ) = @_;
2130 return unless $query;
2131
2132 return if $query =~ m/=\s*\(\s*SELECT /i;
2133
2134 $query =~ s{
2135 \A.*?
2136 update(?:\s+(?:low_priority|ignore))?\s+(.*?)
2137 \s+set\b(.*?)
2138 (?:\s*where\b(.*?))?
2139 (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)?
2140 \Z
2141 }
2142 {__update_to_select($1, $2, $3, $4)}exsi
2143 || $query =~ s{
2144 \A.*?
2145 (?:insert(?:\s+ignore)?|replace)\s+
2146 .*?\binto\b(.*?)\(([^\)]+)\)\s*
2147 values?\s*(\(.*?\))\s*
2148 (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
2149 \Z
2150 }
2151 {__insert_to_select($1, $2, $3)}exsi
2152 || $query =~ s{
2153 \A.*?
2154 (?:insert(?:\s+ignore)?|replace)\s+
2155 (?:.*?\binto)\b(.*?)\s*
2156 set\s+(.*?)\s*
2157 (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
2158 \Z
2159 }
2160 {__insert_to_select_with_set($1, $2)}exsi
2161 || $query =~ s{
2162 \A.*?
2163 delete\s+(.*?)
2164 \bfrom\b(.*)
2165 \Z
2166 }
2167 {__delete_to_select($1, $2)}exsi;
2168 $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si;
2169 $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism;
2170 return $query;
2171}
2172
2173sub convert_select_list {
2174 my ( $self, $query ) = @_;
2175 $query =~ s{
2176 \A\s*select(.*?)\bfrom\b
2177 }
2178 {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi;
2179 return $query;
2180}
2181
2182sub __delete_to_select {
2183 my ( $delete, $join ) = @_;
2184 if ( $join =~ m/\bjoin\b/ ) {
2185 return "select 1 from $join";
2186 }
2187 return "select * from $join";
2188}
2189
2190sub __insert_to_select {
2191 my ( $tbl, $cols, $vals ) = @_;
2192 PTDEBUG && _d('Args:', @_);
2193 my @cols = split(/,/, $cols);
2194 PTDEBUG && _d('Cols:', @cols);
2195 $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens
2196 my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g;
2197 PTDEBUG && _d('Vals:', @vals);
2198 if ( @cols == @vals ) {
2199 return "select * from $tbl where "
2200 . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols));
2201 }
2202 else {
2203 return "select * from $tbl limit 1";
2204 }
2205}
2206
2207sub __insert_to_select_with_set {
2208 my ( $from, $set ) = @_;
2209 $set =~ s/,/ and /g;
2210 return "select * from $from where $set ";
2211}
2212
2213sub __update_to_select {
2214 my ( $from, $set, $where, $limit ) = @_;
2215 return "select $set from $from "
2216 . ( $where ? "where $where" : '' )
2217 . ( $limit ? " $limit " : '' );
2218}
2219
2220sub wrap_in_derived {
2221 my ( $self, $query ) = @_;
2222 return unless $query;
2223 return $query =~ m/\A\s*select/i
2224 ? "select 1 from ($query) as x limit 1"
2225 : $query;
2226}
2227
2228sub _d {
2229 my ($package, undef, $line) = caller 0;
2230 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2231 map { defined $_ ? $_ : 'undef' }
2232 @_;
2233 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2234}
2235
22361;
2237}
2238# ###########################################################################
2239# End QueryRewriter package
2240# ###########################################################################
2241
2242# ###########################################################################
2243# QueryParser package
2244# This package is a copy without comments from the original. The original
2245# with comments and its test file can be found in the Bazaar repository at,
2246# lib/QueryParser.pm
2247# t/lib/QueryParser.t
2248# See https://launchpad.net/percona-toolkit for more information.
2249# ###########################################################################
2250{
2251package QueryParser;
2252
2253use strict;
2254use warnings FATAL => 'all';
2255use English qw(-no_match_vars);
2256use constant PTDEBUG => $ENV{PTDEBUG} || 0;
2257
2258our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/;
2259our $tbl_regex = qr{
2260 \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names
2261 \b\s*
2262 \(? # Optional paren around tables
2263 ($tbl_ident
2264 (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )*
2265 )
2266 }xio;
2267our $has_derived = qr{
2268 \b(?:FROM|JOIN|,)
2269 \s*\(\s*SELECT
2270 }xi;
2271
2272our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i;
2273
2274our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i;
2275
2276sub new {
2277 my ( $class ) = @_;
2278 bless {}, $class;
2279}
2280
2281sub get_tables {
2282 my ( $self, $query ) = @_;
2283 return unless $query;
2284 PTDEBUG && _d('Getting tables for', $query);
2285
2286 my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i;
2287 if ( $ddl_stmt ) {
2288 PTDEBUG && _d('Special table type:', $ddl_stmt);
2289 $query =~ s/IF\s+(?:NOT\s+)?EXISTS//i;
2290 if ( $query =~ m/$ddl_stmt DATABASE\b/i ) {
2291 PTDEBUG && _d('Query alters a database, not a table');
2292 return ();
2293 }
2294 if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) {
2295 my ($select) = $query =~ m/\b(SELECT\b.+)/is;
2296 PTDEBUG && _d('CREATE TABLE ... SELECT:', $select);
2297 return $self->get_tables($select);
2298 }
2299 my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i;
2300 PTDEBUG && _d('Matches table:', $tbl);
2301 return ($tbl);
2302 }
2303
2304 $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
2305
2306 if ( $query =~ /^\s*LOCK TABLES/i ) {
2307 PTDEBUG && _d('Special table type: LOCK TABLES');
2308 $query =~ s/^(\s*LOCK TABLES\s+)//;
2309 $query =~ s/\s+(?:READ|WRITE|LOCAL)+\s*//g;
2310 PTDEBUG && _d('Locked tables:', $query);
2311 $query = "FROM $query";
2312 }
2313
2314 $query =~ s/\\["']//g; # quoted strings
2315 $query =~ s/".*?"/?/sg; # quoted strings
2316 $query =~ s/'.*?'/?/sg; # quoted strings
2317
2318 my @tables;
2319 foreach my $tbls ( $query =~ m/$tbl_regex/gio ) {
2320 PTDEBUG && _d('Match tables:', $tbls);
2321
2322 next if $tbls =~ m/\ASELECT\b/i;
2323
2324 foreach my $tbl ( split(',', $tbls) ) {
2325 $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio;
2326
2327 if ( $tbl !~ m/[a-zA-Z]/ ) {
2328 PTDEBUG && _d('Skipping suspicious table name:', $tbl);
2329 next;
2330 }
2331
2332 push @tables, $tbl;
2333 }
2334 }
2335 return @tables;
2336}
2337
2338sub has_derived_table {
2339 my ( $self, $query ) = @_;
2340 my $match = $query =~ m/$has_derived/;
2341 PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table');
2342 return $match;
2343}
2344
2345sub get_aliases {
2346 my ( $self, $query, $list ) = @_;
2347
2348 my $result = {
2349 DATABASE => {},
2350 TABLE => {},
2351 };
2352 return $result unless $query;
2353
2354 $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
2355
2356 $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig;
2357
2358 my @tbl_refs;
2359 my ($tbl_refs, $from) = $query =~ m{
2360 (
2361 (FROM|INTO|UPDATE)\b\s* # Keyword before table refs
2362 .+? # Table refs
2363 )
2364 (?:\s+|\z) # If the query does not end with the table
2365 (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs
2366 }ix;
2367
2368 if ( $tbl_refs ) {
2369
2370 if ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
2371 $tbl_refs =~ s/\([^\)]+\)\s*//;
2372 }
2373
2374 PTDEBUG && _d('tbl refs:', $tbl_refs);
2375
2376 my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i;
2377
2378 my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i;
2379
2380 $tbl_refs =~ s/ = /=/g;
2381
2382 while (
2383 $tbl_refs =~ m{
2384 $before_tbl\b\s*
2385 ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? )
2386 \s*$after_tbl
2387 }xgio )
2388 {
2389 my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3);
2390 PTDEBUG && _d('Match table:', $tbl_ref);
2391 push @tbl_refs, $tbl_ref;
2392 $alias = $self->trim_identifier($alias);
2393
2394 if ( $tbl_ref =~ m/^AS\s+\w+/i ) {
2395 PTDEBUG && _d('Subquery', $tbl_ref);
2396 $result->{TABLE}->{$alias} = undef;
2397 next;
2398 }
2399
2400 my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/;
2401 $db = $self->trim_identifier($db);
2402 $tbl = $self->trim_identifier($tbl);
2403 $result->{TABLE}->{$alias || $tbl} = $tbl;
2404 $result->{DATABASE}->{$tbl} = $db if $db;
2405 }
2406 }
2407 else {
2408 PTDEBUG && _d("No tables ref in", $query);
2409 }
2410
2411 if ( $list ) {
2412 return \@tbl_refs;
2413 }
2414 else {
2415 return $result;
2416 }
2417}
2418
2419sub split {
2420 my ( $self, $query ) = @_;
2421 return unless $query;
2422 $query = $self->clean_query($query);
2423 PTDEBUG && _d('Splitting', $query);
2424
2425 my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i;
2426
2427 my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query);
2428
2429 my @statements;
2430 if ( @split_statements == 1 ) {
2431 push @statements, $query;
2432 }
2433 else {
2434 for ( my $i = 0; $i <= $#split_statements; $i += 2 ) {
2435 push @statements, $split_statements[$i].$split_statements[$i+1];
2436
2437 if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) {
2438 $statements[-2] .= pop @statements;
2439 }
2440 }
2441 }
2442
2443 PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements);
2444 return @statements;
2445}
2446
2447sub clean_query {
2448 my ( $self, $query ) = @_;
2449 return unless $query;
2450 $query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */
2451 $query =~ s/^\s+//; # Remove leading spaces
2452 $query =~ s/\s+$//; # Remove trailing spaces
2453 $query =~ s/\s{2,}/ /g; # Remove extra spaces
2454 return $query;
2455}
2456
2457sub split_subquery {
2458 my ( $self, $query ) = @_;
2459 return unless $query;
2460 $query = $self->clean_query($query);
2461 $query =~ s/;$//;
2462
2463 my @subqueries;
2464 my $sqno = 0; # subquery number
2465 my $pos = 0;
2466 while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) {
2467 $pos = pos($query);
2468 my $word = $1;
2469 PTDEBUG && _d($word, $sqno);
2470 if ( $word =~ m/^\(?SELECT\b/i ) {
2471 my $start_pos = $pos - length($word) - 1;
2472 if ( $start_pos ) {
2473 $sqno++;
2474 PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos);
2475 $subqueries[$sqno] = {
2476 start_pos => $start_pos,
2477 end_pos => 0,
2478 len => 0,
2479 words => [$word],
2480 lp => 1, # left parentheses
2481 rp => 0, # right parentheses
2482 done => 0,
2483 };
2484 }
2485 else {
2486 PTDEBUG && _d('Main SELECT at pos 0');
2487 }
2488 }
2489 else {
2490 next unless $sqno; # next unless we're in a subquery
2491 PTDEBUG && _d('In subquery', $sqno);
2492 my $sq = $subqueries[$sqno];
2493 if ( $sq->{done} ) {
2494 PTDEBUG && _d('This subquery is done; SQL is for',
2495 ($sqno - 1 ? "subquery $sqno" : "the main SELECT"));
2496 next;
2497 }
2498 push @{$sq->{words}}, $word;
2499 my $lp = ($word =~ tr/\(//) || 0;
2500 my $rp = ($word =~ tr/\)//) || 0;
2501 PTDEBUG && _d('parentheses left', $lp, 'right', $rp);
2502 if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) {
2503 my $end_pos = $pos - 1;
2504 PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos);
2505 $sq->{end_pos} = $end_pos;
2506 $sq->{len} = $end_pos - $sq->{start_pos};
2507 }
2508 }
2509 }
2510
2511 for my $i ( 1..$#subqueries ) {
2512 my $sq = $subqueries[$i];
2513 next unless $sq;
2514 $sq->{sql} = join(' ', @{$sq->{words}});
2515 substr $query,
2516 $sq->{start_pos} + 1, # +1 for (
2517 $sq->{len} - 1, # -1 for )
2518 "__subquery_$i";
2519 }
2520
2521 return $query, map { $_->{sql} } grep { defined $_ } @subqueries;
2522}
2523
2524sub query_type {
2525 my ( $self, $query, $qr ) = @_;
2526 my ($type, undef) = $qr->distill_verbs($query);
2527 my $rw;
2528 if ( $type =~ m/^SELECT\b/ ) {
2529 $rw = 'read';
2530 }
2531 elsif ( $type =~ m/^$data_manip_stmts\b/
2532 || $type =~ m/^$data_def_stmts\b/ ) {
2533 $rw = 'write'
2534 }
2535
2536 return {
2537 type => $type,
2538 rw => $rw,
2539 }
2540}
2541
2542sub get_columns {
2543 my ( $self, $query ) = @_;
2544 my $cols = [];
2545 return $cols unless $query;
2546 my $cols_def;
2547
2548 if ( $query =~ m/^SELECT/i ) {
2549 $query =~ s/
2550 ^SELECT\s+
2551 (?:ALL
2552 |DISTINCT
2553 |DISTINCTROW
2554 |HIGH_PRIORITY
2555 |STRAIGHT_JOIN
2556 |SQL_SMALL_RESULT
2557 |SQL_BIG_RESULT
2558 |SQL_BUFFER_RESULT
2559 |SQL_CACHE
2560 |SQL_NO_CACHE
2561 |SQL_CALC_FOUND_ROWS
2562 )\s+
2563 /SELECT /xgi;
2564 ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i;
2565 }
2566 elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
2567 ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i;
2568 }
2569
2570 PTDEBUG && _d('Columns:', $cols_def);
2571 if ( $cols_def ) {
2572 @$cols = split(',', $cols_def);
2573 map {
2574 my $col = $_;
2575 $col = s/^\s+//g;
2576 $col = s/\s+$//g;
2577 $col;
2578 } @$cols;
2579 }
2580
2581 return $cols;
2582}
2583
2584sub parse {
2585 my ( $self, $query ) = @_;
2586 return unless $query;
2587 my $parsed = {};
2588
2589 $query =~ s/\n/ /g;
2590 $query = $self->clean_query($query);
2591
2592 $parsed->{query} = $query,
2593 $parsed->{tables} = $self->get_aliases($query, 1);
2594 $parsed->{columns} = $self->get_columns($query);
2595
2596 my ($type) = $query =~ m/^(\w+)/;
2597 $parsed->{type} = lc $type;
2598
2599
2600 $parsed->{sub_queries} = [];
2601
2602 return $parsed;
2603}
2604
2605sub extract_tables {
2606 my ( $self, %args ) = @_;
2607 my $query = $args{query};
2608 my $default_db = $args{default_db};
2609 my $q = $self->{Quoter} || $args{Quoter};
2610 return unless $query;
2611 PTDEBUG && _d('Extracting tables');
2612 my @tables;
2613 my %seen;
2614 foreach my $db_tbl ( $self->get_tables($query) ) {
2615 next unless $db_tbl;
2616 next if $seen{$db_tbl}++; # Unique-ify for issue 337.
2617 my ( $db, $tbl ) = $q->split_unquote($db_tbl);
2618 push @tables, [ $db || $default_db, $tbl ];
2619 }
2620 return @tables;
2621}
2622
2623sub trim_identifier {
2624 my ($self, $str) = @_;
2625 return unless defined $str;
2626 $str =~ s/`//g;
2627 $str =~ s/^\s+//;
2628 $str =~ s/\s+$//;
2629 return $str;
2630}
2631
2632sub _d {
2633 my ($package, undef, $line) = caller 0;
2634 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2635 map { defined $_ ? $_ : 'undef' }
2636 @_;
2637 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2638}
2639
26401;
2641}
2642# ###########################################################################
2643# End QueryParser package
2644# ###########################################################################
2645
2646# ###########################################################################
2647# FileIterator package
2648# This package is a copy without comments from the original. The original
2649# with comments and its test file can be found in the Bazaar repository at,
2650# lib/FileIterator.pm
2651# t/lib/FileIterator.t
2652# See https://launchpad.net/percona-toolkit for more information.
2653# ###########################################################################
2654{
2655package FileIterator;
2656
2657use strict;
2658use warnings FATAL => 'all';
2659use English qw(-no_match_vars);
2660use constant PTDEBUG => $ENV{PTDEBUG} || 0;
2661
2662sub new {
2663 my ( $class, %args ) = @_;
2664 my $self = {
2665 %args,
2666 };
2667 return bless $self, $class;
2668}
2669
2670sub get_file_itr {
2671 my ( $self, @filenames ) = @_;
2672
2673 my @final_filenames;
2674 FILENAME:
2675 foreach my $fn ( @filenames ) {
2676 if ( !defined $fn ) {
2677 warn "Skipping undefined filename";
2678 next FILENAME;
2679 }
2680 if ( $fn ne '-' ) {
2681 if ( !-e $fn || !-r $fn ) {
2682 warn "$fn does not exist or is not readable";
2683 next FILENAME;
2684 }
2685 }
2686 push @final_filenames, $fn;
2687 }
2688
2689 if ( !@filenames ) {
2690 push @final_filenames, '-';
2691 PTDEBUG && _d('Auto-adding "-" to the list of filenames');
2692 }
2693
2694 PTDEBUG && _d('Final filenames:', @final_filenames);
2695 return sub {
2696 while ( @final_filenames ) {
2697 my $fn = shift @final_filenames;
2698 PTDEBUG && _d('Filename:', $fn);
2699 if ( $fn eq '-' ) { # Magical STDIN filename.
2700 return (*STDIN, undef, undef);
2701 }
2702 open my $fh, '<', $fn or warn "Cannot open $fn: $OS_ERROR";
2703 if ( $fh ) {
2704 return ( $fh, $fn, -s $fn );
2705 }
2706 }
2707 return (); # Avoids $f being set to 0 in list context.
2708 };
2709}
2710
2711sub _d {
2712 my ($package, undef, $line) = caller 0;
2713 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2714 map { defined $_ ? $_ : 'undef' }
2715 @_;
2716 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2717}
2718
27191;
2720}
2721# ###########################################################################
2722# End FileIterator package
2723# ###########################################################################
2724
2725# ###########################################################################
2726# SQLParser r0
2727# Don't update this package!
2728# ###########################################################################
2729
2730package SQLParser;
2731
2732{ # package scope
2733use strict;
2734use warnings FATAL => 'all';
2735use English qw(-no_match_vars);
2736use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2737
2738use Data::Dumper;
2739$Data::Dumper::Indent = 1;
2740$Data::Dumper::Sortkeys = 1;
2741$Data::Dumper::Quotekeys = 0;
2742
2743my $quoted_ident = qr/`[^`]+`/;
2744my $unquoted_ident = qr/
2745 \@{0,2} # optional @ or @@ for variables
2746 \w+ # the ident name
2747 (?:\([^\)]*\))? # optional function params
2748/x;
2749
2750my $ident_alias = qr/
2751 \s+ # space before alias
2752 (?:(AS)\s+)? # optional AS keyword
2753 ((?>$quoted_ident|$unquoted_ident)) # alais
2754/xi;
2755
2756my $table_ident = qr/(?:
2757 ((?:(?>$quoted_ident|$unquoted_ident)\.?){1,2}) # table
2758 (?:$ident_alias)? # optional alias
2759)/xo;
2760
2761my $column_ident = qr/(?:
2762 ((?:(?>$quoted_ident|$unquoted_ident|\*)\.?){1,3}) # column
2763 (?:$ident_alias)? # optional alias
2764)/xo;
2765
2766my $function_ident = qr/
2767 \b
2768 (
2769 \w+ # function name
2770 \( # opening parenthesis
2771 [^\)]+ # function args, if any
2772 \) # closing parenthesis
2773 )
2774/x;
2775
2776my %ignore_function = (
2777 INDEX => 1,
2778 KEY => 1,
2779);
2780
2781sub new {
2782 my ( $class, %args ) = @_;
2783 my $self = {
2784 %args,
2785 };
2786 return bless $self, $class;
2787}
2788
2789sub parse {
2790 my ( $self, $query ) = @_;
2791 return unless $query;
2792
2793 my $allowed_types = qr/(?:
2794 DELETE
2795 |INSERT
2796 |REPLACE
2797 |SELECT
2798 |UPDATE
2799 |CREATE
2800 )/xi;
2801
2802 $query = $self->clean_query($query);
2803
2804 my $type;
2805 if ( $query =~ s/^(\w+)\s+// ) {
2806 $type = lc $1;
2807 MKDEBUG && _d('Query type:', $type);
2808 die "Cannot parse " . uc($type) . " queries"
2809 unless $type =~ m/$allowed_types/i;
2810 }
2811 else {
2812 die "Query does not begin with a word"; # shouldn't happen
2813 }
2814
2815 $query = $self->normalize_keyword_spaces($query);
2816
2817 my @subqueries;
2818 if ( $query =~ m/(\(SELECT )/i ) {
2819 MKDEBUG && _d('Removing subqueries');
2820 @subqueries = $self->remove_subqueries($query);
2821 $query = shift @subqueries;
2822 }
2823 elsif ( $type eq 'create' && $query =~ m/\s+SELECT/ ) {
2824 MKDEBUG && _d('CREATE..SELECT');
2825 ($subqueries[0]->{query}) = $query =~ m/\s+(SELECT .+)/;
2826 $query =~ s/\s+SELECT.+//;
2827 }
2828
2829 my $parse_func = "parse_$type";
2830 my $struct = $self->$parse_func($query);
2831 if ( !$struct ) {
2832 MKDEBUG && _d($parse_func, 'failed to parse query');
2833 return;
2834 }
2835 $struct->{type} = $type;
2836 $self->_parse_clauses($struct);
2837
2838 if ( @subqueries ) {
2839 MKDEBUG && _d('Parsing subqueries');
2840 foreach my $subquery ( @subqueries ) {
2841 my $subquery_struct = $self->parse($subquery->{query});
2842 @{$subquery_struct}{keys %$subquery} = values %$subquery;
2843 push @{$struct->{subqueries}}, $subquery_struct;
2844 }
2845 }
2846
2847 MKDEBUG && _d('Query struct:', Dumper($struct));
2848 return $struct;
2849}
2850
2851
2852sub _parse_clauses {
2853 my ( $self, $struct ) = @_;
2854 foreach my $clause ( keys %{$struct->{clauses}} ) {
2855 if ( $clause =~ m/ / ) {
2856 (my $clause_no_space = $clause) =~ s/ /_/g;
2857 $struct->{clauses}->{$clause_no_space} = $struct->{clauses}->{$clause};
2858 delete $struct->{clauses}->{$clause};
2859 $clause = $clause_no_space;
2860 }
2861
2862 my $parse_func = "parse_$clause";
2863 $struct->{$clause} = $self->$parse_func($struct->{clauses}->{$clause});
2864
2865 if ( $clause eq 'select' ) {
2866 MKDEBUG && _d('Parsing subquery clauses');
2867 $struct->{select}->{type} = 'select';
2868 $self->_parse_clauses($struct->{select});
2869 }
2870 }
2871 return;
2872}
2873
2874sub clean_query {
2875 my ( $self, $query ) = @_;
2876 return unless $query;
2877
2878 $query =~ s/^\s*--.*$//gm; # -- comments
2879 $query =~ s/\s+/ /g; # extra spaces/flatten
2880 $query =~ s!/\*.*?\*/!!g; # /* comments */
2881 $query =~ s/^\s+//; # leading spaces
2882 $query =~ s/\s+$//; # trailing spaces
2883
2884 return $query;
2885}
2886
2887sub normalize_keyword_spaces {
2888 my ( $self, $query ) = @_;
2889
2890 $query =~ s/\b(VALUE(?:S)?)\(/$1 (/i;
2891 $query =~ s/\bON\(/on (/gi;
2892 $query =~ s/\bUSING\(/using (/gi;
2893
2894 $query =~ s/\(\s+SELECT\s+/(SELECT /gi;
2895
2896 return $query;
2897}
2898
2899sub _parse_query {
2900 my ( $self, $query, $keywords, $first_clause, $clauses ) = @_;
2901 return unless $query;
2902 my $struct = {};
2903
2904 1 while $query =~ s/$keywords\s+/$struct->{keywords}->{lc $1}=1, ''/gie;
2905
2906 my @clause = grep { defined $_ }
2907 ($query =~ m/\G(.+?)(?:$clauses\s+|\Z)/gci);
2908
2909 my $clause = $first_clause,
2910 my $value = shift @clause;
2911 $struct->{clauses}->{$clause} = $value;
2912 MKDEBUG && _d('Clause:', $clause, $value);
2913
2914 while ( @clause ) {
2915 $clause = shift @clause;
2916 $value = shift @clause;
2917 $struct->{clauses}->{lc $clause} = $value;
2918 MKDEBUG && _d('Clause:', $clause, $value);
2919 }
2920
2921 ($struct->{unknown}) = ($query =~ m/\G(.+)/);
2922
2923 return $struct;
2924}
2925
2926sub parse_delete {
2927 my ( $self, $query ) = @_;
2928 if ( $query =~ s/FROM\s+//i ) {
2929 my $keywords = qr/(LOW_PRIORITY|QUICK|IGNORE)/i;
2930 my $clauses = qr/(FROM|WHERE|ORDER BY|LIMIT)/i;
2931 return $self->_parse_query($query, $keywords, 'from', $clauses);
2932 }
2933 else {
2934 die "DELETE without FROM: $query";
2935 }
2936}
2937
2938sub parse_insert {
2939 my ( $self, $query ) = @_;
2940 return unless $query;
2941 my $struct = {};
2942
2943 my $keywords = qr/(LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)/i;
2944 1 while $query =~ s/$keywords\s+/$struct->{keywords}->{lc $1}=1, ''/gie;
2945
2946 if ( $query =~ m/ON DUPLICATE KEY UPDATE (.+)/i ) {
2947 my $values = $1;
2948 die "No values after ON DUPLICATE KEY UPDATE: $query" unless $values;
2949 $struct->{clauses}->{on_duplicate} = $values;
2950 MKDEBUG && _d('Clause: on duplicate key update', $values);
2951
2952 $query =~ s/\s+ON DUPLICATE KEY UPDATE.+//;
2953 }
2954
2955 if ( my @into = ($query =~ m/
2956 (?:INTO\s+)? # INTO, optional
2957 (.+?)\s+ # table ref
2958 (\([^\)]+\)\s+)? # column list, optional
2959 (VALUE.?|SET|SELECT)\s+ # start of next caluse
2960 /xgci)
2961 ) {
2962 my $tbl = shift @into; # table ref
2963 $struct->{clauses}->{into} = $tbl;
2964 MKDEBUG && _d('Clause: into', $tbl);
2965
2966 my $cols = shift @into; # columns, maybe
2967 if ( $cols ) {
2968 $cols =~ s/[\(\)]//g;
2969 $struct->{clauses}->{columns} = $cols;
2970 MKDEBUG && _d('Clause: columns', $cols);
2971 }
2972
2973 my $next_clause = lc(shift @into); # VALUES, SET or SELECT
2974 die "INSERT/REPLACE without clause after table: $query"
2975 unless $next_clause;
2976 $next_clause = 'values' if $next_clause eq 'value';
2977 my ($values) = ($query =~ m/\G(.+)/gci);
2978 die "INSERT/REPLACE without values: $query" unless $values;
2979 $struct->{clauses}->{$next_clause} = $values;
2980 MKDEBUG && _d('Clause:', $next_clause, $values);
2981 }
2982
2983 ($struct->{unknown}) = ($query =~ m/\G(.+)/);
2984
2985 return $struct;
2986}
2987{
2988 no warnings;
2989 *parse_replace = \&parse_insert;
2990}
2991
2992sub parse_select {
2993 my ( $self, $query ) = @_;
2994
2995 my @keywords;
2996 my $final_keywords = qr/(FOR UPDATE|LOCK IN SHARE MODE)/i;
2997 1 while $query =~ s/\s+$final_keywords/(push @keywords, $1), ''/gie;
2998
2999 my $keywords = qr/(
3000 ALL
3001 |DISTINCT
3002 |DISTINCTROW
3003 |HIGH_PRIORITY
3004 |STRAIGHT_JOIN
3005 |SQL_SMALL_RESULT
3006 |SQL_BIG_RESULT
3007 |SQL_BUFFER_RESULT
3008 |SQL_CACHE
3009 |SQL_NO_CACHE
3010 |SQL_CALC_FOUND_ROWS
3011 )/xi;
3012 my $clauses = qr/(
3013 FROM
3014 |WHERE
3015 |GROUP\sBY
3016 |HAVING
3017 |ORDER\sBY
3018 |LIMIT
3019 |PROCEDURE
3020 |INTO OUTFILE
3021 )/xi;
3022 my $struct = $self->_parse_query($query, $keywords, 'columns', $clauses);
3023
3024 map { s/ /_/g; $struct->{keywords}->{lc $_} = 1; } @keywords;
3025
3026 return $struct;
3027}
3028
3029sub parse_update {
3030 my $keywords = qr/(LOW_PRIORITY|IGNORE)/i;
3031 my $clauses = qr/(SET|WHERE|ORDER BY|LIMIT)/i;
3032 return _parse_query(@_, $keywords, 'tables', $clauses);
3033
3034}
3035
3036sub parse_create {
3037 my ($self, $query) = @_;
3038 my ($obj, $name) = $query =~ m/
3039 (\S+)\s+
3040 (?:IF NOT EXISTS\s+)?
3041 (\S+)
3042 /xi;
3043 return {
3044 object => lc $obj,
3045 name => $name,
3046 unknown => undef,
3047 };
3048}
3049
3050sub parse_from {
3051 my ( $self, $from ) = @_;
3052 return unless $from;
3053 MKDEBUG && _d('Parsing FROM', $from);
3054
3055 my $using_cols;
3056 ($from, $using_cols) = $self->remove_using_columns($from);
3057
3058 my $funcs;
3059 ($from, $funcs) = $self->remove_functions($from);
3060
3061 my $comma_join = qr/(?>\s*,\s*)/;
3062 my $ansi_join = qr/(?>
3063 \s+
3064 (?:(?:INNER|CROSS|STRAIGHT_JOIN|LEFT|RIGHT|OUTER|NATURAL)\s+)*
3065 JOIN
3066 \s+
3067 )/xi;
3068
3069 my @tbls; # all table refs, a hashref for each
3070 my $tbl_ref; # current table ref hashref
3071 my $join; # join info hahsref for current table ref
3072 foreach my $thing ( split /($comma_join|$ansi_join)/io, $from ) {
3073 die "Error parsing FROM clause" unless $thing;
3074
3075 $thing =~ s/^\s+//;
3076 $thing =~ s/\s+$//;
3077 MKDEBUG && _d('Table thing:', $thing);
3078
3079 if ( $thing =~ m/\s+(?:ON|USING)\s+/i ) {
3080 MKDEBUG && _d("JOIN condition");
3081 my ($tbl_ref_txt, $join_condition_verb, $join_condition_value)
3082 = $thing =~ m/^(.+?)\s+(ON|USING)\s+(.+)/i;
3083
3084 $tbl_ref = $self->parse_table_reference($tbl_ref_txt);
3085
3086 $join->{condition} = lc $join_condition_verb;
3087 if ( $join->{condition} eq 'on' ) {
3088 $join->{where} = $self->parse_where($join_condition_value, $funcs);
3089 }
3090 else { # USING
3091 $join->{columns} = $self->_parse_csv(shift @$using_cols);
3092 }
3093 }
3094 elsif ( $thing =~ m/(?:,|JOIN)/i ) {
3095 if ( $join ) {
3096 $tbl_ref->{join} = $join;
3097 }
3098 push @tbls, $tbl_ref;
3099 MKDEBUG && _d("Complete table reference:", Dumper($tbl_ref));
3100
3101 $tbl_ref = undef;
3102 $join = {};
3103
3104 $join->{to} = $tbls[-1]->{tbl};
3105 if ( $thing eq ',' ) {
3106 $join->{type} = 'inner';
3107 $join->{ansi} = 0;
3108 }
3109 else { # ansi join
3110 my $type = $thing =~ m/^(.+?)\s+JOIN$/i ? lc $1 : 'inner';
3111 $join->{type} = $type;
3112 $join->{ansi} = 1;
3113 }
3114 }
3115 else {
3116 $tbl_ref = $self->parse_table_reference($thing);
3117 MKDEBUG && _d('Table reference:', Dumper($tbl_ref));
3118 }
3119 }
3120
3121 if ( $tbl_ref ) {
3122 if ( $join ) {
3123 $tbl_ref->{join} = $join;
3124 }
3125 push @tbls, $tbl_ref;
3126 MKDEBUG && _d("Complete table reference:", Dumper($tbl_ref));
3127 }
3128
3129 return \@tbls;
3130}
3131
3132sub parse_table_reference {
3133 my ( $self, $tbl_ref ) = @_;
3134 return unless $tbl_ref;
3135 MKDEBUG && _d('Parsing table reference:', $tbl_ref);
3136 my %tbl;
3137
3138 if ( $tbl_ref =~ s/
3139 \s+(
3140 (?:FORCE|USE|INGORE)\s
3141 (?:INDEX|KEY)
3142 \s*\([^\)]+\)\s*
3143 )//xi)
3144 {
3145 $tbl{index_hint} = $1;
3146 MKDEBUG && _d('Index hint:', $tbl{index_hint});
3147 }
3148
3149 if ( $tbl_ref =~ m/$table_ident/ ) {
3150 my ($db_tbl, $as, $alias) = ($1, $2, $3); # XXX
3151 my $ident_struct = $self->parse_identifier('table', $db_tbl);
3152 $alias =~ s/`//g if $alias;
3153 @tbl{keys %$ident_struct} = values %$ident_struct;
3154 $tbl{explicit_alias} = 1 if $as;
3155 $tbl{alias} = $alias if $alias;
3156 }
3157 else {
3158 die "Table ident match failed"; # shouldn't happen
3159 }
3160
3161 return \%tbl;
3162}
3163{
3164 no warnings; # Why? See same line above.
3165 *parse_into = \&parse_from;
3166 *parse_tables = \&parse_from;
3167}
3168
3169sub parse_where {
3170 my ( $self, $where, $functions ) = @_;
3171 return unless $where;
3172 MKDEBUG && _d("Parsing WHERE", $where);
3173
3174 my $op_symbol = qr/
3175 (?:
3176 <=(?:>)?
3177 |>=
3178 |<>
3179 |!=
3180 |<
3181 |>
3182 |=
3183 )/xi;
3184 my $op_verb = qr/
3185 (?:
3186 (?:(?:NOT\s)?LIKE)
3187 |(?:IS(?:\sNOT\s)?)
3188 |(?:(?:\sNOT\s)?BETWEEN)
3189 |(?:(?:NOT\s)?IN)
3190 )
3191 /xi;
3192 my $op_pat = qr/
3193 (
3194 (?>
3195 (?:$op_symbol) # don't need spaces around the symbols, e.g.: col=1
3196 |(?:\s+$op_verb) # must have space before verb op, e.g.: col LIKE ...
3197 )
3198 )/x;
3199
3200 my $offset = 0;
3201 my $pred = "";
3202 my @pred;
3203 my @has_op;
3204 while ( $where =~ m/\b(and|or)\b/gi ) {
3205 my $pos = (pos $where) - (length $1); # pos at and|or, not after
3206
3207 $pred = substr $where, $offset, ($pos-$offset);
3208 push @pred, $pred;
3209 push @has_op, $pred =~ m/$op_pat/o ? 1 : 0;
3210
3211 $offset = $pos;
3212 }
3213 $pred = substr $where, $offset;
3214 push @pred, $pred;
3215 push @has_op, $pred =~ m/$op_pat/o ? 1 : 0;
3216 MKDEBUG && _d("Predicate fragments:", Dumper(\@pred));
3217 MKDEBUG && _d("Predicate frags with operators:", @has_op);
3218
3219 my $n = scalar @pred - 1;
3220 for my $i ( 1..$n ) {
3221 $i *= -1;
3222 my $j = $i - 1; # preceding pred frag
3223
3224 next if $pred[$j] !~ m/\s+between\s+/i && $self->_is_constant($pred[$i]);
3225
3226 if ( !$has_op[$i] ) {
3227 $pred[$j] .= $pred[$i];
3228 $pred[$i] = undef;
3229 }
3230 }
3231 MKDEBUG && _d("Predicate fragments joined:", Dumper(\@pred));
3232
3233 for my $i ( 0..@pred ) {
3234 $pred = $pred[$i];
3235 next unless defined $pred;
3236 my $n_single_quotes = ($pred =~ tr/'//);
3237 my $n_double_quotes = ($pred =~ tr/"//);
3238 if ( ($n_single_quotes % 2) || ($n_double_quotes % 2) ) {
3239 $pred[$i] .= $pred[$i + 1];
3240 $pred[$i + 1] = undef;
3241 }
3242 }
3243 MKDEBUG && _d("Predicate fragments balanced:", Dumper(\@pred));
3244
3245 my @predicates;
3246 foreach my $pred ( @pred ) {
3247 next unless defined $pred;
3248 $pred =~ s/^\s+//;
3249 $pred =~ s/\s+$//;
3250 my $conj;
3251 if ( $pred =~ s/^(and|or)\s+//i ) {
3252 $conj = lc $1;
3253 }
3254 my ($col, $op, $val) = $pred =~ m/^(.+?)$op_pat(.+)$/o;
3255 if ( !$col || !$op ) {
3256 if ( $self->_is_constant($pred) ) {
3257 $val = lc $pred;
3258 }
3259 else {
3260 die "Failed to parse WHERE condition: $pred";
3261 }
3262 }
3263
3264 if ( $col ) {
3265 $col =~ s/\s+$//;
3266 $col =~ s/^\(+//; # no unquoted column name begins with (
3267 }
3268 if ( $op ) {
3269 $op = lc $op;
3270 $op =~ s/^\s+//;
3271 $op =~ s/\s+$//;
3272 }
3273 $val =~ s/^\s+//;
3274
3275 if ( ($op || '') !~ m/IN/i && $val !~ m/^\w+\([^\)]+\)$/ ) {
3276 $val =~ s/\)+$//;
3277 }
3278
3279 if ( $val =~ m/NULL|TRUE|FALSE/i ) {
3280 $val = lc $val;
3281 }
3282
3283 if ( $functions ) {
3284 $col = shift @$functions if $col =~ m/__FUNC\d+__/;
3285 $val = shift @$functions if $val =~ m/__FUNC\d+__/;
3286 }
3287
3288 push @predicates, {
3289 predicate => $conj,
3290 left_arg => $col,
3291 operator => $op,
3292 right_arg => $val,
3293 };
3294 }
3295
3296 return \@predicates;
3297}
3298
3299sub _is_constant {
3300 my ( $self, $val ) = @_;
3301 return 0 unless defined $val;
3302 $val =~ s/^\s*(?:and|or)\s+//;
3303 return
3304 $val =~ m/^\s*(?:TRUE|FALSE)\s*$/i || $val =~ m/^\s*-?\d+\s*$/ ? 1 : 0;
3305}
3306
3307sub parse_having {
3308 my ( $self, $having ) = @_;
3309 return $having;
3310}
3311
3312sub parse_group_by {
3313 my ( $self, $group_by ) = @_;
3314 return unless $group_by;
3315 MKDEBUG && _d('Parsing GROUP BY', $group_by);
3316
3317 my $with_rollup = $group_by =~ s/\s+WITH ROLLUP\s*//i;
3318
3319 my $idents = $self->parse_identifiers( $self->_parse_csv($group_by) );
3320
3321 $idents->{with_rollup} = 1 if $with_rollup;
3322
3323 return $idents;
3324}
3325
3326sub parse_order_by {
3327 my ( $self, $order_by ) = @_;
3328 return unless $order_by;
3329 MKDEBUG && _d('Parsing ORDER BY', $order_by);
3330 my $idents = $self->parse_identifiers( $self->_parse_csv($order_by) );
3331 return $idents;
3332}
3333
3334sub parse_limit {
3335 my ( $self, $limit ) = @_;
3336 return unless $limit;
3337 my $struct = {
3338 row_count => undef,
3339 };
3340 if ( $limit =~ m/(\S+)\s+OFFSET\s+(\S+)/i ) {
3341 $struct->{explicit_offset} = 1;
3342 $struct->{row_count} = $1;
3343 $struct->{offset} = $2;
3344 }
3345 else {
3346 my ($offset, $cnt) = $limit =~ m/(?:(\S+),\s+)?(\S+)/i;
3347 $struct->{row_count} = $cnt;
3348 $struct->{offset} = $offset if defined $offset;
3349 }
3350 return $struct;
3351}
3352
3353sub parse_values {
3354 my ( $self, $values ) = @_;
3355 return unless $values;
3356 $values =~ s/^\s*\(//;
3357 $values =~ s/\s*\)//;
3358 my $vals = $self->_parse_csv(
3359 $values,
3360 quoted_values => 1,
3361 remove_quotes => 0,
3362 );
3363 return $vals;
3364}
3365
3366sub parse_set {
3367 my ( $self, $set ) = @_;
3368 MKDEBUG && _d("Parse SET", $set);
3369 return unless $set;
3370 my $vals = $self->_parse_csv($set);
3371 return unless $vals && @$vals;
3372
3373 my @set;
3374 foreach my $col_val ( @$vals ) {
3375 my ($col, $val) = $col_val =~ m/^([^=]+)\s*=\s*(.+)/;
3376 my $ident_struct = $self->parse_identifier('column', $col);
3377 my $set_struct = {
3378 %$ident_struct,
3379 value => $val,
3380 };
3381 MKDEBUG && _d("SET:", Dumper($set_struct));
3382 push @set, $set_struct;
3383 }
3384 return \@set;
3385}
3386
3387sub _parse_csv {
3388 my ( $self, $vals, %args ) = @_;
3389 return unless $vals;
3390
3391 my @vals;
3392 if ( $args{quoted_values} ) {
3393 my $quote_char = '';
3394 VAL:
3395 foreach my $val ( split(',', $vals) ) {
3396 MKDEBUG && _d("Next value:", $val);
3397 if ( $quote_char ) {
3398 MKDEBUG && _d("Value is part of previous quoted value");
3399 $vals[-1] .= ",$val";
3400
3401 if ( $val =~ m/[^\\]*$quote_char$/ ) {
3402 if ( $args{remove_quotes} ) {
3403 $vals[-1] =~ s/^\s*$quote_char//;
3404 $vals[-1] =~ s/$quote_char\s*$//;
3405 }
3406 MKDEBUG && _d("Previous quoted value is complete:", $vals[-1]);
3407 $quote_char = '';
3408 }
3409
3410 next VAL;
3411 }
3412
3413 $val =~ s/^\s+//;
3414
3415 if ( $val =~ m/^(['"])/ ) {
3416 MKDEBUG && _d("Value is quoted");
3417 $quote_char = $1; # XXX
3418 if ( $val =~ m/.$quote_char$/ ) {
3419 MKDEBUG && _d("Value is complete");
3420 $quote_char = '';
3421 if ( $args{remove_quotes} ) {
3422 $vals[-1] =~ s/^\s*$quote_char//;
3423 $vals[-1] =~ s/$quote_char\s*$//;
3424 }
3425 }
3426 else {
3427 MKDEBUG && _d("Quoted value is not complete");
3428 }
3429 }
3430 else {
3431 $val =~ s/\s+$//;
3432 }
3433
3434 MKDEBUG && _d("Saving value", ($quote_char ? "fragment" : ""));
3435 push @vals, $val;
3436 }
3437 }
3438 else {
3439 @vals = map { s/^\s+//; s/\s+$//; $_ } split(',', $vals);
3440 }
3441
3442 return \@vals;
3443}
3444{
3445 no warnings; # Why? See same line above.
3446 *parse_on_duplicate = \&_parse_csv;
3447}
3448
3449sub parse_columns {
3450 my ( $self, $cols ) = @_;
3451 MKDEBUG && _d('Parsing columns list:', $cols);
3452
3453 my @cols;
3454 pos $cols = 0;
3455 while (pos $cols < length $cols) {
3456 if ($cols =~ m/\G\s*$column_ident\s*(?>,|\Z)/gcxo) {
3457 my ($db_tbl_col, $as, $alias) = ($1, $2, $3); # XXX
3458 my $ident_struct = $self->parse_identifier('column', $db_tbl_col);
3459 $alias =~ s/`//g if $alias;
3460 my $col_struct = {
3461 %$ident_struct,
3462 ($as ? (explicit_alias => 1) : ()),
3463 ($alias ? (alias => $alias) : ()),
3464 };
3465 push @cols, $col_struct;
3466 }
3467 else {
3468 die "Column ident match failed"; # shouldn't happen
3469 }
3470 }
3471
3472 return \@cols;
3473}
3474
3475sub remove_subqueries {
3476 my ( $self, $query ) = @_;
3477
3478 my @start_pos;
3479 while ( $query =~ m/(\(SELECT )/gi ) {
3480 my $pos = (pos $query) - (length $1);
3481 push @start_pos, $pos;
3482 }
3483
3484 @start_pos = reverse @start_pos;
3485 my @end_pos;
3486 for my $i ( 0..$#start_pos ) {
3487 my $closed = 0;
3488 pos $query = $start_pos[$i];
3489 while ( $query =~ m/([\(\)])/cg ) {
3490 my $c = $1;
3491 $closed += ($c eq '(' ? 1 : -1);
3492 last unless $closed;
3493 }
3494 push @end_pos, pos $query;
3495 }
3496
3497 my @subqueries;
3498 my $len_adj = 0;
3499 my $n = 0;
3500 for my $i ( 0..$#start_pos ) {
3501 MKDEBUG && _d('Query:', $query);
3502 my $offset = $start_pos[$i];
3503 my $len = $end_pos[$i] - $start_pos[$i] - $len_adj;
3504 MKDEBUG && _d("Subquery $n start", $start_pos[$i],
3505 'orig end', $end_pos[$i], 'adj', $len_adj, 'adj end',
3506 $offset + $len, 'len', $len);
3507
3508 my $struct = {};
3509 my $token = '__SQ' . $n . '__';
3510 my $subquery = substr($query, $offset, $len, $token);
3511 MKDEBUG && _d("Subquery $n:", $subquery);
3512
3513 my $outer_start = $start_pos[$i + 1];
3514 my $outer_end = $end_pos[$i + 1];
3515 if ( $outer_start && ($outer_start < $start_pos[$i])
3516 && $outer_end && ($outer_end > $end_pos[$i]) ) {
3517 MKDEBUG && _d("Subquery $n nested in next subquery");
3518 $len_adj += $len - length $token;
3519 $struct->{nested} = $i + 1;
3520 }
3521 else {
3522 MKDEBUG && _d("Subquery $n not nested");
3523 $len_adj = 0;
3524 if ( $subqueries[-1] && $subqueries[-1]->{nested} ) {
3525 MKDEBUG && _d("Outermost subquery");
3526 }
3527 }
3528
3529 if ( $query =~ m/(?:=|>|<|>=|<=|<>|!=|<=>)\s*$token/ ) {
3530 $struct->{context} = 'scalar';
3531 }
3532 elsif ( $query =~ m/\b(?:IN|ANY|SOME|ALL|EXISTS)\s*$token/i ) {
3533 if ( $query !~ m/\($token\)/ ) {
3534 $query =~ s/$token/\($token\)/;
3535 $len_adj -= 2 if $struct->{nested};
3536 }
3537 $struct->{context} = 'list';
3538 }
3539 else {
3540 $struct->{context} = 'identifier';
3541 }
3542 MKDEBUG && _d("Subquery $n context:", $struct->{context});
3543
3544 $subquery =~ s/^\s*\(//;
3545 $subquery =~ s/\s*\)\s*$//;
3546
3547 $struct->{query} = $subquery;
3548 push @subqueries, $struct;
3549 $n++;
3550 }
3551
3552 return $query, @subqueries;
3553}
3554
3555sub remove_using_columns {
3556 my ($self, $from) = @_;
3557 return unless $from;
3558 MKDEBUG && _d('Removing cols from USING clauses');
3559 my $using = qr/
3560 \bUSING
3561 \s*
3562 \(
3563 ([^\)]+)
3564 \)
3565 /xi;
3566 my @cols;
3567 $from =~ s/$using/push @cols, $1; "USING ($#cols)"/eg;
3568 MKDEBUG && _d('FROM:', $from, Dumper(\@cols));
3569 return $from, \@cols;
3570}
3571
3572sub replace_function {
3573 my ($func, $funcs) = @_;
3574 my ($func_name) = $func =~ m/^(\w+)/;
3575 if ( !$ignore_function{uc $func_name} ) {
3576 my $n = scalar @$funcs;
3577 push @$funcs, $func;
3578 return "__FUNC${n}__";
3579 }
3580 return $func;
3581}
3582
3583sub remove_functions {
3584 my ($self, $clause) = @_;
3585 return unless $clause;
3586 MKDEBUG && _d('Removing functions from clause:', $clause);
3587 my @funcs;
3588 $clause =~ s/$function_ident/replace_function($1, \@funcs)/eg;
3589 MKDEBUG && _d('Function-stripped clause:', $clause, Dumper(\@funcs));
3590 return $clause, \@funcs;
3591}
3592
3593sub parse_identifiers {
3594 my ( $self, $idents ) = @_;
3595 return unless $idents;
3596 MKDEBUG && _d("Parsing identifiers");
3597
3598 my @ident_parts;
3599 foreach my $ident ( @$idents ) {
3600 MKDEBUG && _d("Identifier:", $ident);
3601 my $parts = {};
3602
3603 if ( $ident =~ s/\s+(ASC|DESC)\s*$//i ) {
3604 $parts->{sort} = uc $1; # XXX
3605 }
3606
3607 if ( $ident =~ m/^\d+$/ ) { # Position like 5
3608 MKDEBUG && _d("Positional ident");
3609 $parts->{position} = $ident;
3610 }
3611 elsif ( $ident =~ m/^\w+\(/ ) { # Function like MIN(col)
3612 MKDEBUG && _d("Expression ident");
3613 my ($func, $expr) = $ident =~ m/^(\w+)\(([^\)]*)\)/;
3614 $parts->{function} = uc $func;
3615 $parts->{expression} = $expr if $expr;
3616 }
3617 else { # Ref like (table.)column
3618 MKDEBUG && _d("Table/column ident");
3619 my ($tbl, $col) = $self->split_unquote($ident);
3620 $parts->{table} = $tbl if $tbl;
3621 $parts->{column} = $col;
3622 }
3623 push @ident_parts, $parts;
3624 }
3625
3626 return \@ident_parts;
3627}
3628
3629sub parse_identifier {
3630 my ( $self, $type, $ident ) = @_;
3631 return unless $type && $ident;
3632 MKDEBUG && _d("Parsing", $type, "identifier:", $ident);
3633
3634 if ( $ident =~ m/^\w+\(/ ) { # Function like MIN(col)
3635 my ($func, $expr) = $ident =~ m/^(\w+)\(([^\)]*)\)/;
3636 MKDEBUG && _d('Function', $func, 'arg', $expr);
3637 return { col => $ident } unless $expr; # NOW()
3638 $ident = $expr; # col from MAX(col)
3639 }
3640
3641 my %ident_struct;
3642 my @ident_parts = map { s/`//g; $_; } split /[.]/, $ident;
3643 if ( @ident_parts == 3 ) {
3644 @ident_struct{qw(db tbl col)} = @ident_parts;
3645 }
3646 elsif ( @ident_parts == 2 ) {
3647 my @parts_for_type = $type eq 'column' ? qw(tbl col)
3648 : $type eq 'table' ? qw(db tbl)
3649 : die "Invalid identifier type: $type";
3650 @ident_struct{@parts_for_type} = @ident_parts;
3651 }
3652 elsif ( @ident_parts == 1 ) {
3653 my $part = $type eq 'column' ? 'col' : 'tbl';
3654 @ident_struct{($part)} = @ident_parts;
3655 }
3656 else {
3657 die "Invalid number of parts in $type reference: $ident";
3658 }
3659
3660 if ( $self->{SchemaQualifier} ) {
3661 if ( $type eq 'column' && !$ident_struct{tbl} ) {
3662 my $qcol = $self->{SchemaQualifier}->qualify_column(
3663 column => $ident_struct{col},
3664 );
3665 $ident_struct{db} = $qcol->{db} if $qcol->{db};
3666 $ident_struct{tbl} = $qcol->{tbl} if $qcol->{tbl};
3667 }
3668 elsif ( $type eq 'table' && !$ident_struct{db} ) {
3669 my $db = $self->{SchemaQualifier}->get_database_for_table(
3670 table => $ident_struct{tbl},
3671 );
3672 $ident_struct{db} = $db if $db;
3673 }
3674 }
3675
3676 MKDEBUG && _d($type, "identifier struct:", Dumper(\%ident_struct));
3677 return \%ident_struct;
3678}
3679
3680sub split_unquote {
3681 my ( $self, $db_tbl, $default_db ) = @_;
3682 $db_tbl =~ s/`//g;
3683 my ( $db, $tbl ) = split(/[.]/, $db_tbl);
3684 if ( !$tbl ) {
3685 $tbl = $db;
3686 $db = $default_db;
3687 }
3688 return ($db, $tbl);
3689}
3690
3691sub is_identifier {
3692 my ( $self, $thing ) = @_;
3693
3694 return 0 unless $thing;
3695
3696 return 0 if $thing =~ m/\s*['"]/;
3697
3698 return 0 if $thing =~ m/^\s*\d+(?:\.\d+)?\s*$/;
3699
3700 return 0 if $thing =~ m/^\s*(?>
3701 NULL
3702 |DUAL
3703 )\s*$/xi;
3704
3705 return 1 if $thing =~ m/^\s*$column_ident\s*$/;
3706
3707 return 0;
3708}
3709