Merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-usage into lp:percona-toolkit/2.1
- pt-table-usage
- Merge into 2.1
Proposed by
Daniel Nichter
Status: | Merged |
---|---|
Merged at revision: | 225 |
Proposed branch: | lp:~percona-toolkit-dev/percona-toolkit/pt-table-usage |
Merge into: | lp:percona-toolkit/2.1 |
Diff against target: |
9952 lines (+9746/-8) 22 files modified
bin/pt-table-usage (+7320/-0) lib/SQLParser.pm (+98/-8) lib/TableUsage.pm (+1060/-0) t/lib/TableUsage.t (+817/-0) t/pt-table-usage/basics.t (+138/-0) t/pt-table-usage/create_table_definitions.t (+41/-0) t/pt-table-usage/explain_extended.t (+79/-0) t/pt-table-usage/samples/ee.out (+6/-0) t/pt-table-usage/samples/ee.sql (+26/-0) t/pt-table-usage/samples/in/slow001.txt (+24/-0) t/pt-table-usage/samples/in/slow002.txt (+20/-0) t/pt-table-usage/samples/in/slow003.txt (+3/-0) t/pt-table-usage/samples/out/create-table-defs-001.txt (+4/-0) t/pt-table-usage/samples/out/create001.txt (+5/-0) t/pt-table-usage/samples/out/drop-table-if-exists.txt (+3/-0) t/pt-table-usage/samples/out/query001.txt (+6/-0) t/pt-table-usage/samples/out/query002.txt (+5/-0) t/pt-table-usage/samples/out/slow001.txt (+31/-0) t/pt-table-usage/samples/out/slow002.txt (+40/-0) t/pt-table-usage/samples/out/slow003-001.txt (+6/-0) t/pt-table-usage/samples/out/slow003-002.txt (+8/-0) t/pt-table-usage/samples/out/slow003-003.txt (+6/-0) |
To merge this branch: | bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-usage |
Related bugs: | |
Related blueprints: |
Add pt-table-usage
(Medium)
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Daniel Nichter | Approve | ||
Review via email: mp+100256@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === added file 'bin/pt-table-usage' | |||
2 | --- bin/pt-table-usage 1970-01-01 00:00:00 +0000 | |||
3 | +++ bin/pt-table-usage 2012-03-30 22:48:21 +0000 | |||
4 | @@ -0,0 +1,7320 @@ | |||
5 | 1 | #!/usr/bin/env perl | ||
6 | 2 | |||
7 | 3 | # This program is part of Percona Toolkit: http://www.percona.com/software/ | ||
8 | 4 | # See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal | ||
9 | 5 | # notices and disclaimers. | ||
10 | 6 | |||
11 | 7 | use strict; | ||
12 | 8 | use warnings FATAL => 'all'; | ||
13 | 9 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
14 | 10 | |||
15 | 11 | # ########################################################################### | ||
16 | 12 | # DSNParser package | ||
17 | 13 | # This package is a copy without comments from the original. The original | ||
18 | 14 | # with comments and its test file can be found in the Bazaar repository at, | ||
19 | 15 | # lib/DSNParser.pm | ||
20 | 16 | # t/lib/DSNParser.t | ||
21 | 17 | # See https://launchpad.net/percona-toolkit for more information. | ||
22 | 18 | # ########################################################################### | ||
23 | 19 | { | ||
24 | 20 | package DSNParser; | ||
25 | 21 | |||
26 | 22 | use strict; | ||
27 | 23 | use warnings FATAL => 'all'; | ||
28 | 24 | use English qw(-no_match_vars); | ||
29 | 25 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
30 | 26 | |||
31 | 27 | use Data::Dumper; | ||
32 | 28 | $Data::Dumper::Indent = 0; | ||
33 | 29 | $Data::Dumper::Quotekeys = 0; | ||
34 | 30 | |||
35 | 31 | eval { | ||
36 | 32 | require DBI; | ||
37 | 33 | }; | ||
38 | 34 | my $have_dbi = $EVAL_ERROR ? 0 : 1; | ||
39 | 35 | |||
40 | 36 | sub new { | ||
41 | 37 | my ( $class, %args ) = @_; | ||
42 | 38 | foreach my $arg ( qw(opts) ) { | ||
43 | 39 | die "I need a $arg argument" unless $args{$arg}; | ||
44 | 40 | } | ||
45 | 41 | my $self = { | ||
46 | 42 | opts => {} # h, P, u, etc. Should come from DSN OPTIONS section in POD. | ||
47 | 43 | }; | ||
48 | 44 | foreach my $opt ( @{$args{opts}} ) { | ||
49 | 45 | if ( !$opt->{key} || !$opt->{desc} ) { | ||
50 | 46 | die "Invalid DSN option: ", Dumper($opt); | ||
51 | 47 | } | ||
52 | 48 | PTDEBUG && _d('DSN option:', | ||
53 | 49 | join(', ', | ||
54 | 50 | map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') } | ||
55 | 51 | keys %$opt | ||
56 | 52 | ) | ||
57 | 53 | ); | ||
58 | 54 | $self->{opts}->{$opt->{key}} = { | ||
59 | 55 | dsn => $opt->{dsn}, | ||
60 | 56 | desc => $opt->{desc}, | ||
61 | 57 | copy => $opt->{copy} || 0, | ||
62 | 58 | }; | ||
63 | 59 | } | ||
64 | 60 | return bless $self, $class; | ||
65 | 61 | } | ||
66 | 62 | |||
67 | 63 | sub prop { | ||
68 | 64 | my ( $self, $prop, $value ) = @_; | ||
69 | 65 | if ( @_ > 2 ) { | ||
70 | 66 | PTDEBUG && _d('Setting', $prop, 'property'); | ||
71 | 67 | $self->{$prop} = $value; | ||
72 | 68 | } | ||
73 | 69 | return $self->{$prop}; | ||
74 | 70 | } | ||
75 | 71 | |||
76 | 72 | sub parse { | ||
77 | 73 | my ( $self, $dsn, $prev, $defaults ) = @_; | ||
78 | 74 | if ( !$dsn ) { | ||
79 | 75 | PTDEBUG && _d('No DSN to parse'); | ||
80 | 76 | return; | ||
81 | 77 | } | ||
82 | 78 | PTDEBUG && _d('Parsing', $dsn); | ||
83 | 79 | $prev ||= {}; | ||
84 | 80 | $defaults ||= {}; | ||
85 | 81 | my %given_props; | ||
86 | 82 | my %final_props; | ||
87 | 83 | my $opts = $self->{opts}; | ||
88 | 84 | |||
89 | 85 | foreach my $dsn_part ( split(/,/, $dsn) ) { | ||
90 | 86 | if ( my ($prop_key, $prop_val) = $dsn_part =~ m/^(.)=(.*)$/ ) { | ||
91 | 87 | $given_props{$prop_key} = $prop_val; | ||
92 | 88 | } | ||
93 | 89 | else { | ||
94 | 90 | PTDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part); | ||
95 | 91 | $given_props{h} = $dsn_part; | ||
96 | 92 | } | ||
97 | 93 | } | ||
98 | 94 | |||
99 | 95 | foreach my $key ( keys %$opts ) { | ||
100 | 96 | PTDEBUG && _d('Finding value for', $key); | ||
101 | 97 | $final_props{$key} = $given_props{$key}; | ||
102 | 98 | if ( !defined $final_props{$key} | ||
103 | 99 | && defined $prev->{$key} && $opts->{$key}->{copy} ) | ||
104 | 100 | { | ||
105 | 101 | $final_props{$key} = $prev->{$key}; | ||
106 | 102 | PTDEBUG && _d('Copying value for', $key, 'from previous DSN'); | ||
107 | 103 | } | ||
108 | 104 | if ( !defined $final_props{$key} ) { | ||
109 | 105 | $final_props{$key} = $defaults->{$key}; | ||
110 | 106 | PTDEBUG && _d('Copying value for', $key, 'from defaults'); | ||
111 | 107 | } | ||
112 | 108 | } | ||
113 | 109 | |||
114 | 110 | foreach my $key ( keys %given_props ) { | ||
115 | 111 | die "Unknown DSN option '$key' in '$dsn'. For more details, " | ||
116 | 112 | . "please use the --help option, or try 'perldoc $PROGRAM_NAME' " | ||
117 | 113 | . "for complete documentation." | ||
118 | 114 | unless exists $opts->{$key}; | ||
119 | 115 | } | ||
120 | 116 | if ( (my $required = $self->prop('required')) ) { | ||
121 | 117 | foreach my $key ( keys %$required ) { | ||
122 | 118 | die "Missing required DSN option '$key' in '$dsn'. For more details, " | ||
123 | 119 | . "please use the --help option, or try 'perldoc $PROGRAM_NAME' " | ||
124 | 120 | . "for complete documentation." | ||
125 | 121 | unless $final_props{$key}; | ||
126 | 122 | } | ||
127 | 123 | } | ||
128 | 124 | |||
129 | 125 | return \%final_props; | ||
130 | 126 | } | ||
131 | 127 | |||
132 | 128 | sub parse_options { | ||
133 | 129 | my ( $self, $o ) = @_; | ||
134 | 130 | die 'I need an OptionParser object' unless ref $o eq 'OptionParser'; | ||
135 | 131 | my $dsn_string | ||
136 | 132 | = join(',', | ||
137 | 133 | map { "$_=".$o->get($_); } | ||
138 | 134 | grep { $o->has($_) && $o->get($_) } | ||
139 | 135 | keys %{$self->{opts}} | ||
140 | 136 | ); | ||
141 | 137 | PTDEBUG && _d('DSN string made from options:', $dsn_string); | ||
142 | 138 | return $self->parse($dsn_string); | ||
143 | 139 | } | ||
144 | 140 | |||
145 | 141 | sub as_string { | ||
146 | 142 | my ( $self, $dsn, $props ) = @_; | ||
147 | 143 | return $dsn unless ref $dsn; | ||
148 | 144 | my @keys = $props ? @$props : sort keys %$dsn; | ||
149 | 145 | return join(',', | ||
150 | 146 | map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) } | ||
151 | 147 | grep { | ||
152 | 148 | exists $self->{opts}->{$_} | ||
153 | 149 | && exists $dsn->{$_} | ||
154 | 150 | && defined $dsn->{$_} | ||
155 | 151 | } @keys); | ||
156 | 152 | } | ||
157 | 153 | |||
158 | 154 | sub usage { | ||
159 | 155 | my ( $self ) = @_; | ||
160 | 156 | my $usage | ||
161 | 157 | = "DSN syntax is key=value[,key=value...] Allowable DSN keys:\n\n" | ||
162 | 158 | . " KEY COPY MEANING\n" | ||
163 | 159 | . " === ==== =============================================\n"; | ||
164 | 160 | my %opts = %{$self->{opts}}; | ||
165 | 161 | foreach my $key ( sort keys %opts ) { | ||
166 | 162 | $usage .= " $key " | ||
167 | 163 | . ($opts{$key}->{copy} ? 'yes ' : 'no ') | ||
168 | 164 | . ($opts{$key}->{desc} || '[No description]') | ||
169 | 165 | . "\n"; | ||
170 | 166 | } | ||
171 | 167 | $usage .= "\n If the DSN is a bareword, the word is treated as the 'h' key.\n"; | ||
172 | 168 | return $usage; | ||
173 | 169 | } | ||
174 | 170 | |||
175 | 171 | sub get_cxn_params { | ||
176 | 172 | my ( $self, $info ) = @_; | ||
177 | 173 | my $dsn; | ||
178 | 174 | my %opts = %{$self->{opts}}; | ||
179 | 175 | my $driver = $self->prop('dbidriver') || ''; | ||
180 | 176 | if ( $driver eq 'Pg' ) { | ||
181 | 177 | $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';' | ||
182 | 178 | . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" } | ||
183 | 179 | grep { defined $info->{$_} } | ||
184 | 180 | qw(h P)); | ||
185 | 181 | } | ||
186 | 182 | else { | ||
187 | 183 | $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';' | ||
188 | 184 | . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" } | ||
189 | 185 | grep { defined $info->{$_} } | ||
190 | 186 | qw(F h P S A)) | ||
191 | 187 | . ';mysql_read_default_group=client'; | ||
192 | 188 | } | ||
193 | 189 | PTDEBUG && _d($dsn); | ||
194 | 190 | return ($dsn, $info->{u}, $info->{p}); | ||
195 | 191 | } | ||
196 | 192 | |||
197 | 193 | sub fill_in_dsn { | ||
198 | 194 | my ( $self, $dbh, $dsn ) = @_; | ||
199 | 195 | my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name'); | ||
200 | 196 | my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()'); | ||
201 | 197 | $user =~ s/@.*//; | ||
202 | 198 | $dsn->{h} ||= $vars->{hostname}->{Value}; | ||
203 | 199 | $dsn->{S} ||= $vars->{'socket'}->{Value}; | ||
204 | 200 | $dsn->{P} ||= $vars->{port}->{Value}; | ||
205 | 201 | $dsn->{u} ||= $user; | ||
206 | 202 | $dsn->{D} ||= $db; | ||
207 | 203 | } | ||
208 | 204 | |||
209 | 205 | sub get_dbh { | ||
210 | 206 | my ( $self, $cxn_string, $user, $pass, $opts ) = @_; | ||
211 | 207 | $opts ||= {}; | ||
212 | 208 | my $defaults = { | ||
213 | 209 | AutoCommit => 0, | ||
214 | 210 | RaiseError => 1, | ||
215 | 211 | PrintError => 0, | ||
216 | 212 | ShowErrorStatement => 1, | ||
217 | 213 | mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0), | ||
218 | 214 | }; | ||
219 | 215 | @{$defaults}{ keys %$opts } = values %$opts; | ||
220 | 216 | |||
221 | 217 | if ( $opts->{mysql_use_result} ) { | ||
222 | 218 | $defaults->{mysql_use_result} = 1; | ||
223 | 219 | } | ||
224 | 220 | |||
225 | 221 | if ( !$have_dbi ) { | ||
226 | 222 | die "Cannot connect to MySQL because the Perl DBI module is not " | ||
227 | 223 | . "installed or not found. Run 'perl -MDBI' to see the directories " | ||
228 | 224 | . "that Perl searches for DBI. If DBI is not installed, try:\n" | ||
229 | 225 | . " Debian/Ubuntu apt-get install libdbi-perl\n" | ||
230 | 226 | . " RHEL/CentOS yum install perl-DBI\n" | ||
231 | 227 | . " OpenSolaris pgk install pkg:/SUNWpmdbi\n"; | ||
232 | 228 | |||
233 | 229 | } | ||
234 | 230 | |||
235 | 231 | my $dbh; | ||
236 | 232 | my $tries = 2; | ||
237 | 233 | while ( !$dbh && $tries-- ) { | ||
238 | 234 | PTDEBUG && _d($cxn_string, ' ', $user, ' ', $pass, | ||
239 | 235 | join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults )); | ||
240 | 236 | |||
241 | 237 | eval { | ||
242 | 238 | $dbh = DBI->connect($cxn_string, $user, $pass, $defaults); | ||
243 | 239 | |||
244 | 240 | if ( $cxn_string =~ m/mysql/i ) { | ||
245 | 241 | my $sql; | ||
246 | 242 | |||
247 | 243 | $sql = 'SELECT @@SQL_MODE'; | ||
248 | 244 | PTDEBUG && _d($dbh, $sql); | ||
249 | 245 | my ($sql_mode) = $dbh->selectrow_array($sql); | ||
250 | 246 | |||
251 | 247 | $sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1' | ||
252 | 248 | . '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO' | ||
253 | 249 | . ($sql_mode ? ",$sql_mode" : '') | ||
254 | 250 | . '\'*/'; | ||
255 | 251 | PTDEBUG && _d($dbh, $sql); | ||
256 | 252 | $dbh->do($sql); | ||
257 | 253 | |||
258 | 254 | if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) { | ||
259 | 255 | $sql = "/*!40101 SET NAMES $charset*/"; | ||
260 | 256 | PTDEBUG && _d($dbh, ':', $sql); | ||
261 | 257 | $dbh->do($sql); | ||
262 | 258 | PTDEBUG && _d('Enabling charset for STDOUT'); | ||
263 | 259 | if ( $charset eq 'utf8' ) { | ||
264 | 260 | binmode(STDOUT, ':utf8') | ||
265 | 261 | or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR"; | ||
266 | 262 | } | ||
267 | 263 | else { | ||
268 | 264 | binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR"; | ||
269 | 265 | } | ||
270 | 266 | } | ||
271 | 267 | |||
272 | 268 | if ( $self->prop('set-vars') ) { | ||
273 | 269 | $sql = "SET " . $self->prop('set-vars'); | ||
274 | 270 | PTDEBUG && _d($dbh, ':', $sql); | ||
275 | 271 | $dbh->do($sql); | ||
276 | 272 | } | ||
277 | 273 | } | ||
278 | 274 | }; | ||
279 | 275 | if ( !$dbh && $EVAL_ERROR ) { | ||
280 | 276 | PTDEBUG && _d($EVAL_ERROR); | ||
281 | 277 | if ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) { | ||
282 | 278 | PTDEBUG && _d('Going to try again without utf8 support'); | ||
283 | 279 | delete $defaults->{mysql_enable_utf8}; | ||
284 | 280 | } | ||
285 | 281 | elsif ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) { | ||
286 | 282 | die "Cannot connect to MySQL because the Perl DBD::mysql module is " | ||
287 | 283 | . "not installed or not found. Run 'perl -MDBD::mysql' to see " | ||
288 | 284 | . "the directories that Perl searches for DBD::mysql. If " | ||
289 | 285 | . "DBD::mysql is not installed, try:\n" | ||
290 | 286 | . " Debian/Ubuntu apt-get install libdbd-mysql-perl\n" | ||
291 | 287 | . " RHEL/CentOS yum install perl-DBD-MySQL\n" | ||
292 | 288 | . " OpenSolaris pgk install pkg:/SUNWapu13dbd-mysql\n"; | ||
293 | 289 | } | ||
294 | 290 | if ( !$tries ) { | ||
295 | 291 | die $EVAL_ERROR; | ||
296 | 292 | } | ||
297 | 293 | } | ||
298 | 294 | } | ||
299 | 295 | |||
300 | 296 | PTDEBUG && _d('DBH info: ', | ||
301 | 297 | $dbh, | ||
302 | 298 | Dumper($dbh->selectrow_hashref( | ||
303 | 299 | 'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')), | ||
304 | 300 | 'Connection info:', $dbh->{mysql_hostinfo}, | ||
305 | 301 | 'Character set info:', Dumper($dbh->selectall_arrayref( | ||
306 | 302 | 'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})), | ||
307 | 303 | '$DBD::mysql::VERSION:', $DBD::mysql::VERSION, | ||
308 | 304 | '$DBI::VERSION:', $DBI::VERSION, | ||
309 | 305 | ); | ||
310 | 306 | |||
311 | 307 | return $dbh; | ||
312 | 308 | } | ||
313 | 309 | |||
314 | 310 | sub get_hostname { | ||
315 | 311 | my ( $self, $dbh ) = @_; | ||
316 | 312 | if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) { | ||
317 | 313 | return $host; | ||
318 | 314 | } | ||
319 | 315 | my ( $hostname, $one ) = $dbh->selectrow_array( | ||
320 | 316 | 'SELECT /*!50038 @@hostname, */ 1'); | ||
321 | 317 | return $hostname; | ||
322 | 318 | } | ||
323 | 319 | |||
324 | 320 | sub disconnect { | ||
325 | 321 | my ( $self, $dbh ) = @_; | ||
326 | 322 | PTDEBUG && $self->print_active_handles($dbh); | ||
327 | 323 | $dbh->disconnect; | ||
328 | 324 | } | ||
329 | 325 | |||
330 | 326 | sub print_active_handles { | ||
331 | 327 | my ( $self, $thing, $level ) = @_; | ||
332 | 328 | $level ||= 0; | ||
333 | 329 | printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level, | ||
334 | 330 | $thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : '')) | ||
335 | 331 | or die "Cannot print: $OS_ERROR"; | ||
336 | 332 | foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) { | ||
337 | 333 | $self->print_active_handles( $handle, $level + 1 ); | ||
338 | 334 | } | ||
339 | 335 | } | ||
340 | 336 | |||
341 | 337 | sub copy { | ||
342 | 338 | my ( $self, $dsn_1, $dsn_2, %args ) = @_; | ||
343 | 339 | die 'I need a dsn_1 argument' unless $dsn_1; | ||
344 | 340 | die 'I need a dsn_2 argument' unless $dsn_2; | ||
345 | 341 | my %new_dsn = map { | ||
346 | 342 | my $key = $_; | ||
347 | 343 | my $val; | ||
348 | 344 | if ( $args{overwrite} ) { | ||
349 | 345 | $val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key}; | ||
350 | 346 | } | ||
351 | 347 | else { | ||
352 | 348 | $val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key}; | ||
353 | 349 | } | ||
354 | 350 | $key => $val; | ||
355 | 351 | } keys %{$self->{opts}}; | ||
356 | 352 | return \%new_dsn; | ||
357 | 353 | } | ||
358 | 354 | |||
359 | 355 | sub _d { | ||
360 | 356 | my ($package, undef, $line) = caller 0; | ||
361 | 357 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
362 | 358 | map { defined $_ ? $_ : 'undef' } | ||
363 | 359 | @_; | ||
364 | 360 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
365 | 361 | } | ||
366 | 362 | |||
367 | 363 | 1; | ||
368 | 364 | } | ||
369 | 365 | # ########################################################################### | ||
370 | 366 | # End DSNParser package | ||
371 | 367 | # ########################################################################### | ||
372 | 368 | |||
373 | 369 | # ########################################################################### | ||
374 | 370 | # OptionParser package | ||
375 | 371 | # This package is a copy without comments from the original. The original | ||
376 | 372 | # with comments and its test file can be found in the Bazaar repository at, | ||
377 | 373 | # lib/OptionParser.pm | ||
378 | 374 | # t/lib/OptionParser.t | ||
379 | 375 | # See https://launchpad.net/percona-toolkit for more information. | ||
380 | 376 | # ########################################################################### | ||
381 | 377 | { | ||
382 | 378 | package OptionParser; | ||
383 | 379 | |||
384 | 380 | use strict; | ||
385 | 381 | use warnings FATAL => 'all'; | ||
386 | 382 | use English qw(-no_match_vars); | ||
387 | 383 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
388 | 384 | |||
389 | 385 | use List::Util qw(max); | ||
390 | 386 | use Getopt::Long; | ||
391 | 387 | |||
392 | 388 | my $POD_link_re = '[LC]<"?([^">]+)"?>'; | ||
393 | 389 | |||
394 | 390 | sub new { | ||
395 | 391 | my ( $class, %args ) = @_; | ||
396 | 392 | my @required_args = qw(); | ||
397 | 393 | foreach my $arg ( @required_args ) { | ||
398 | 394 | die "I need a $arg argument" unless $args{$arg}; | ||
399 | 395 | } | ||
400 | 396 | |||
401 | 397 | my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/; | ||
402 | 398 | $program_name ||= $PROGRAM_NAME; | ||
403 | 399 | my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.'; | ||
404 | 400 | |||
405 | 401 | my %attributes = ( | ||
406 | 402 | 'type' => 1, | ||
407 | 403 | 'short form' => 1, | ||
408 | 404 | 'group' => 1, | ||
409 | 405 | 'default' => 1, | ||
410 | 406 | 'cumulative' => 1, | ||
411 | 407 | 'negatable' => 1, | ||
412 | 408 | ); | ||
413 | 409 | |||
414 | 410 | my $self = { | ||
415 | 411 | head1 => 'OPTIONS', # These args are used internally | ||
416 | 412 | skip_rules => 0, # to instantiate another Option- | ||
417 | 413 | item => '--(.*)', # Parser obj that parses the | ||
418 | 414 | attributes => \%attributes, # DSN OPTIONS section. Tools | ||
419 | 415 | parse_attributes => \&_parse_attribs, # don't tinker with these args. | ||
420 | 416 | |||
421 | 417 | %args, | ||
422 | 418 | |||
423 | 419 | strict => 1, # disabled by a special rule | ||
424 | 420 | program_name => $program_name, | ||
425 | 421 | opts => {}, | ||
426 | 422 | got_opts => 0, | ||
427 | 423 | short_opts => {}, | ||
428 | 424 | defaults => {}, | ||
429 | 425 | groups => {}, | ||
430 | 426 | allowed_groups => {}, | ||
431 | 427 | errors => [], | ||
432 | 428 | rules => [], # desc of rules for --help | ||
433 | 429 | mutex => [], # rule: opts are mutually exclusive | ||
434 | 430 | atleast1 => [], # rule: at least one opt is required | ||
435 | 431 | disables => {}, # rule: opt disables other opts | ||
436 | 432 | defaults_to => {}, # rule: opt defaults to value of other opt | ||
437 | 433 | DSNParser => undef, | ||
438 | 434 | default_files => [ | ||
439 | 435 | "/etc/percona-toolkit/percona-toolkit.conf", | ||
440 | 436 | "/etc/percona-toolkit/$program_name.conf", | ||
441 | 437 | "$home/.percona-toolkit.conf", | ||
442 | 438 | "$home/.$program_name.conf", | ||
443 | 439 | ], | ||
444 | 440 | types => { | ||
445 | 441 | string => 's', # standard Getopt type | ||
446 | 442 | int => 'i', # standard Getopt type | ||
447 | 443 | float => 'f', # standard Getopt type | ||
448 | 444 | Hash => 'H', # hash, formed from a comma-separated list | ||
449 | 445 | hash => 'h', # hash as above, but only if a value is given | ||
450 | 446 | Array => 'A', # array, similar to Hash | ||
451 | 447 | array => 'a', # array, similar to hash | ||
452 | 448 | DSN => 'd', # DSN | ||
453 | 449 | size => 'z', # size with kMG suffix (powers of 2^10) | ||
454 | 450 | time => 'm', # time, with an optional suffix of s/h/m/d | ||
455 | 451 | }, | ||
456 | 452 | }; | ||
457 | 453 | |||
458 | 454 | return bless $self, $class; | ||
459 | 455 | } | ||
460 | 456 | |||
461 | 457 | sub get_specs { | ||
462 | 458 | my ( $self, $file ) = @_; | ||
463 | 459 | $file ||= $self->{file} || __FILE__; | ||
464 | 460 | my @specs = $self->_pod_to_specs($file); | ||
465 | 461 | $self->_parse_specs(@specs); | ||
466 | 462 | |||
467 | 463 | open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR"; | ||
468 | 464 | my $contents = do { local $/ = undef; <$fh> }; | ||
469 | 465 | close $fh; | ||
470 | 466 | if ( $contents =~ m/^=head1 DSN OPTIONS/m ) { | ||
471 | 467 | PTDEBUG && _d('Parsing DSN OPTIONS'); | ||
472 | 468 | my $dsn_attribs = { | ||
473 | 469 | dsn => 1, | ||
474 | 470 | copy => 1, | ||
475 | 471 | }; | ||
476 | 472 | my $parse_dsn_attribs = sub { | ||
477 | 473 | my ( $self, $option, $attribs ) = @_; | ||
478 | 474 | map { | ||
479 | 475 | my $val = $attribs->{$_}; | ||
480 | 476 | if ( $val ) { | ||
481 | 477 | $val = $val eq 'yes' ? 1 | ||
482 | 478 | : $val eq 'no' ? 0 | ||
483 | 479 | : $val; | ||
484 | 480 | $attribs->{$_} = $val; | ||
485 | 481 | } | ||
486 | 482 | } keys %$attribs; | ||
487 | 483 | return { | ||
488 | 484 | key => $option, | ||
489 | 485 | %$attribs, | ||
490 | 486 | }; | ||
491 | 487 | }; | ||
492 | 488 | my $dsn_o = new OptionParser( | ||
493 | 489 | description => 'DSN OPTIONS', | ||
494 | 490 | head1 => 'DSN OPTIONS', | ||
495 | 491 | dsn => 0, # XXX don't infinitely recurse! | ||
496 | 492 | item => '\* (.)', # key opts are a single character | ||
497 | 493 | skip_rules => 1, # no rules before opts | ||
498 | 494 | attributes => $dsn_attribs, | ||
499 | 495 | parse_attributes => $parse_dsn_attribs, | ||
500 | 496 | ); | ||
501 | 497 | my @dsn_opts = map { | ||
502 | 498 | my $opts = { | ||
503 | 499 | key => $_->{spec}->{key}, | ||
504 | 500 | dsn => $_->{spec}->{dsn}, | ||
505 | 501 | copy => $_->{spec}->{copy}, | ||
506 | 502 | desc => $_->{desc}, | ||
507 | 503 | }; | ||
508 | 504 | $opts; | ||
509 | 505 | } $dsn_o->_pod_to_specs($file); | ||
510 | 506 | $self->{DSNParser} = DSNParser->new(opts => \@dsn_opts); | ||
511 | 507 | } | ||
512 | 508 | |||
513 | 509 | if ( $contents =~ m/^=head1 VERSION\n\n^(.+)$/m ) { | ||
514 | 510 | $self->{version} = $1; | ||
515 | 511 | PTDEBUG && _d($self->{version}); | ||
516 | 512 | } | ||
517 | 513 | |||
518 | 514 | return; | ||
519 | 515 | } | ||
520 | 516 | |||
521 | 517 | sub DSNParser { | ||
522 | 518 | my ( $self ) = @_; | ||
523 | 519 | return $self->{DSNParser}; | ||
524 | 520 | }; | ||
525 | 521 | |||
526 | 522 | sub get_defaults_files { | ||
527 | 523 | my ( $self ) = @_; | ||
528 | 524 | return @{$self->{default_files}}; | ||
529 | 525 | } | ||
530 | 526 | |||
531 | 527 | sub _pod_to_specs { | ||
532 | 528 | my ( $self, $file ) = @_; | ||
533 | 529 | $file ||= $self->{file} || __FILE__; | ||
534 | 530 | open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR"; | ||
535 | 531 | |||
536 | 532 | my @specs = (); | ||
537 | 533 | my @rules = (); | ||
538 | 534 | my $para; | ||
539 | 535 | |||
540 | 536 | local $INPUT_RECORD_SEPARATOR = ''; | ||
541 | 537 | while ( $para = <$fh> ) { | ||
542 | 538 | next unless $para =~ m/^=head1 $self->{head1}/; | ||
543 | 539 | last; | ||
544 | 540 | } | ||
545 | 541 | |||
546 | 542 | while ( $para = <$fh> ) { | ||
547 | 543 | last if $para =~ m/^=over/; | ||
548 | 544 | next if $self->{skip_rules}; | ||
549 | 545 | chomp $para; | ||
550 | 546 | $para =~ s/\s+/ /g; | ||
551 | 547 | $para =~ s/$POD_link_re/$1/go; | ||
552 | 548 | PTDEBUG && _d('Option rule:', $para); | ||
553 | 549 | push @rules, $para; | ||
554 | 550 | } | ||
555 | 551 | |||
556 | 552 | die "POD has no $self->{head1} section" unless $para; | ||
557 | 553 | |||
558 | 554 | do { | ||
559 | 555 | if ( my ($option) = $para =~ m/^=item $self->{item}/ ) { | ||
560 | 556 | chomp $para; | ||
561 | 557 | PTDEBUG && _d($para); | ||
562 | 558 | my %attribs; | ||
563 | 559 | |||
564 | 560 | $para = <$fh>; # read next paragraph, possibly attributes | ||
565 | 561 | |||
566 | 562 | if ( $para =~ m/: / ) { # attributes | ||
567 | 563 | $para =~ s/\s+\Z//g; | ||
568 | 564 | %attribs = map { | ||
569 | 565 | my ( $attrib, $val) = split(/: /, $_); | ||
570 | 566 | die "Unrecognized attribute for --$option: $attrib" | ||
571 | 567 | unless $self->{attributes}->{$attrib}; | ||
572 | 568 | ($attrib, $val); | ||
573 | 569 | } split(/; /, $para); | ||
574 | 570 | if ( $attribs{'short form'} ) { | ||
575 | 571 | $attribs{'short form'} =~ s/-//; | ||
576 | 572 | } | ||
577 | 573 | $para = <$fh>; # read next paragraph, probably short help desc | ||
578 | 574 | } | ||
579 | 575 | else { | ||
580 | 576 | PTDEBUG && _d('Option has no attributes'); | ||
581 | 577 | } | ||
582 | 578 | |||
583 | 579 | $para =~ s/\s+\Z//g; | ||
584 | 580 | $para =~ s/\s+/ /g; | ||
585 | 581 | $para =~ s/$POD_link_re/$1/go; | ||
586 | 582 | |||
587 | 583 | $para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s; | ||
588 | 584 | PTDEBUG && _d('Short help:', $para); | ||
589 | 585 | |||
590 | 586 | die "No description after option spec $option" if $para =~ m/^=item/; | ||
591 | 587 | |||
592 | 588 | if ( my ($base_option) = $option =~ m/^\[no\](.*)/ ) { | ||
593 | 589 | $option = $base_option; | ||
594 | 590 | $attribs{'negatable'} = 1; | ||
595 | 591 | } | ||
596 | 592 | |||
597 | 593 | push @specs, { | ||
598 | 594 | spec => $self->{parse_attributes}->($self, $option, \%attribs), | ||
599 | 595 | desc => $para | ||
600 | 596 | . (defined $attribs{default} ? " (default $attribs{default})" : ''), | ||
601 | 597 | group => ($attribs{'group'} ? $attribs{'group'} : 'default'), | ||
602 | 598 | }; | ||
603 | 599 | } | ||
604 | 600 | while ( $para = <$fh> ) { | ||
605 | 601 | last unless $para; | ||
606 | 602 | if ( $para =~ m/^=head1/ ) { | ||
607 | 603 | $para = undef; # Can't 'last' out of a do {} block. | ||
608 | 604 | last; | ||
609 | 605 | } | ||
610 | 606 | last if $para =~ m/^=item /; | ||
611 | 607 | } | ||
612 | 608 | } while ( $para ); | ||
613 | 609 | |||
614 | 610 | die "No valid specs in $self->{head1}" unless @specs; | ||
615 | 611 | |||
616 | 612 | close $fh; | ||
617 | 613 | return @specs, @rules; | ||
618 | 614 | } | ||
619 | 615 | |||
620 | 616 | sub _parse_specs { | ||
621 | 617 | my ( $self, @specs ) = @_; | ||
622 | 618 | my %disables; # special rule that requires deferred checking | ||
623 | 619 | |||
624 | 620 | foreach my $opt ( @specs ) { | ||
625 | 621 | if ( ref $opt ) { # It's an option spec, not a rule. | ||
626 | 622 | PTDEBUG && _d('Parsing opt spec:', | ||
627 | 623 | map { ($_, '=>', $opt->{$_}) } keys %$opt); | ||
628 | 624 | |||
629 | 625 | my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/; | ||
630 | 626 | if ( !$long ) { | ||
631 | 627 | die "Cannot parse long option from spec $opt->{spec}"; | ||
632 | 628 | } | ||
633 | 629 | $opt->{long} = $long; | ||
634 | 630 | |||
635 | 631 | die "Duplicate long option --$long" if exists $self->{opts}->{$long}; | ||
636 | 632 | $self->{opts}->{$long} = $opt; | ||
637 | 633 | |||
638 | 634 | if ( length $long == 1 ) { | ||
639 | 635 | PTDEBUG && _d('Long opt', $long, 'looks like short opt'); | ||
640 | 636 | $self->{short_opts}->{$long} = $long; | ||
641 | 637 | } | ||
642 | 638 | |||
643 | 639 | if ( $short ) { | ||
644 | 640 | die "Duplicate short option -$short" | ||
645 | 641 | if exists $self->{short_opts}->{$short}; | ||
646 | 642 | $self->{short_opts}->{$short} = $long; | ||
647 | 643 | $opt->{short} = $short; | ||
648 | 644 | } | ||
649 | 645 | else { | ||
650 | 646 | $opt->{short} = undef; | ||
651 | 647 | } | ||
652 | 648 | |||
653 | 649 | $opt->{is_negatable} = $opt->{spec} =~ m/!/ ? 1 : 0; | ||
654 | 650 | $opt->{is_cumulative} = $opt->{spec} =~ m/\+/ ? 1 : 0; | ||
655 | 651 | $opt->{is_required} = $opt->{desc} =~ m/required/ ? 1 : 0; | ||
656 | 652 | |||
657 | 653 | $opt->{group} ||= 'default'; | ||
658 | 654 | $self->{groups}->{ $opt->{group} }->{$long} = 1; | ||
659 | 655 | |||
660 | 656 | $opt->{value} = undef; | ||
661 | 657 | $opt->{got} = 0; | ||
662 | 658 | |||
663 | 659 | my ( $type ) = $opt->{spec} =~ m/=(.)/; | ||
664 | 660 | $opt->{type} = $type; | ||
665 | 661 | PTDEBUG && _d($long, 'type:', $type); | ||
666 | 662 | |||
667 | 663 | |||
668 | 664 | $opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ ); | ||
669 | 665 | |||
670 | 666 | if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) { | ||
671 | 667 | $self->{defaults}->{$long} = defined $def ? $def : 1; | ||
672 | 668 | PTDEBUG && _d($long, 'default:', $def); | ||
673 | 669 | } | ||
674 | 670 | |||
675 | 671 | if ( $long eq 'config' ) { | ||
676 | 672 | $self->{defaults}->{$long} = join(',', $self->get_defaults_files()); | ||
677 | 673 | } | ||
678 | 674 | |||
679 | 675 | if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) { | ||
680 | 676 | $disables{$long} = $dis; | ||
681 | 677 | PTDEBUG && _d('Deferring check of disables rule for', $opt, $dis); | ||
682 | 678 | } | ||
683 | 679 | |||
684 | 680 | $self->{opts}->{$long} = $opt; | ||
685 | 681 | } | ||
686 | 682 | else { # It's an option rule, not a spec. | ||
687 | 683 | PTDEBUG && _d('Parsing rule:', $opt); | ||
688 | 684 | push @{$self->{rules}}, $opt; | ||
689 | 685 | my @participants = $self->_get_participants($opt); | ||
690 | 686 | my $rule_ok = 0; | ||
691 | 687 | |||
692 | 688 | if ( $opt =~ m/mutually exclusive|one and only one/ ) { | ||
693 | 689 | $rule_ok = 1; | ||
694 | 690 | push @{$self->{mutex}}, \@participants; | ||
695 | 691 | PTDEBUG && _d(@participants, 'are mutually exclusive'); | ||
696 | 692 | } | ||
697 | 693 | if ( $opt =~ m/at least one|one and only one/ ) { | ||
698 | 694 | $rule_ok = 1; | ||
699 | 695 | push @{$self->{atleast1}}, \@participants; | ||
700 | 696 | PTDEBUG && _d(@participants, 'require at least one'); | ||
701 | 697 | } | ||
702 | 698 | if ( $opt =~ m/default to/ ) { | ||
703 | 699 | $rule_ok = 1; | ||
704 | 700 | $self->{defaults_to}->{$participants[0]} = $participants[1]; | ||
705 | 701 | PTDEBUG && _d($participants[0], 'defaults to', $participants[1]); | ||
706 | 702 | } | ||
707 | 703 | if ( $opt =~ m/restricted to option groups/ ) { | ||
708 | 704 | $rule_ok = 1; | ||
709 | 705 | my ($groups) = $opt =~ m/groups ([\w\s\,]+)/; | ||
710 | 706 | my @groups = split(',', $groups); | ||
711 | 707 | %{$self->{allowed_groups}->{$participants[0]}} = map { | ||
712 | 708 | s/\s+//; | ||
713 | 709 | $_ => 1; | ||
714 | 710 | } @groups; | ||
715 | 711 | } | ||
716 | 712 | if( $opt =~ m/accepts additional command-line arguments/ ) { | ||
717 | 713 | $rule_ok = 1; | ||
718 | 714 | $self->{strict} = 0; | ||
719 | 715 | PTDEBUG && _d("Strict mode disabled by rule"); | ||
720 | 716 | } | ||
721 | 717 | |||
722 | 718 | die "Unrecognized option rule: $opt" unless $rule_ok; | ||
723 | 719 | } | ||
724 | 720 | } | ||
725 | 721 | |||
726 | 722 | foreach my $long ( keys %disables ) { | ||
727 | 723 | my @participants = $self->_get_participants($disables{$long}); | ||
728 | 724 | $self->{disables}->{$long} = \@participants; | ||
729 | 725 | PTDEBUG && _d('Option', $long, 'disables', @participants); | ||
730 | 726 | } | ||
731 | 727 | |||
732 | 728 | return; | ||
733 | 729 | } | ||
734 | 730 | |||
735 | 731 | sub _get_participants { | ||
736 | 732 | my ( $self, $str ) = @_; | ||
737 | 733 | my @participants; | ||
738 | 734 | foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) { | ||
739 | 735 | die "Option --$long does not exist while processing rule $str" | ||
740 | 736 | unless exists $self->{opts}->{$long}; | ||
741 | 737 | push @participants, $long; | ||
742 | 738 | } | ||
743 | 739 | PTDEBUG && _d('Participants for', $str, ':', @participants); | ||
744 | 740 | return @participants; | ||
745 | 741 | } | ||
746 | 742 | |||
747 | 743 | sub opts { | ||
748 | 744 | my ( $self ) = @_; | ||
749 | 745 | my %opts = %{$self->{opts}}; | ||
750 | 746 | return %opts; | ||
751 | 747 | } | ||
752 | 748 | |||
753 | 749 | sub short_opts { | ||
754 | 750 | my ( $self ) = @_; | ||
755 | 751 | my %short_opts = %{$self->{short_opts}}; | ||
756 | 752 | return %short_opts; | ||
757 | 753 | } | ||
758 | 754 | |||
759 | 755 | sub set_defaults { | ||
760 | 756 | my ( $self, %defaults ) = @_; | ||
761 | 757 | $self->{defaults} = {}; | ||
762 | 758 | foreach my $long ( keys %defaults ) { | ||
763 | 759 | die "Cannot set default for nonexistent option $long" | ||
764 | 760 | unless exists $self->{opts}->{$long}; | ||
765 | 761 | $self->{defaults}->{$long} = $defaults{$long}; | ||
766 | 762 | PTDEBUG && _d('Default val for', $long, ':', $defaults{$long}); | ||
767 | 763 | } | ||
768 | 764 | return; | ||
769 | 765 | } | ||
770 | 766 | |||
771 | 767 | sub get_defaults { | ||
772 | 768 | my ( $self ) = @_; | ||
773 | 769 | return $self->{defaults}; | ||
774 | 770 | } | ||
775 | 771 | |||
776 | 772 | sub get_groups { | ||
777 | 773 | my ( $self ) = @_; | ||
778 | 774 | return $self->{groups}; | ||
779 | 775 | } | ||
780 | 776 | |||
781 | 777 | sub _set_option { | ||
782 | 778 | my ( $self, $opt, $val ) = @_; | ||
783 | 779 | my $long = exists $self->{opts}->{$opt} ? $opt | ||
784 | 780 | : exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt} | ||
785 | 781 | : die "Getopt::Long gave a nonexistent option: $opt"; | ||
786 | 782 | |||
787 | 783 | $opt = $self->{opts}->{$long}; | ||
788 | 784 | if ( $opt->{is_cumulative} ) { | ||
789 | 785 | $opt->{value}++; | ||
790 | 786 | } | ||
791 | 787 | else { | ||
792 | 788 | $opt->{value} = $val; | ||
793 | 789 | } | ||
794 | 790 | $opt->{got} = 1; | ||
795 | 791 | PTDEBUG && _d('Got option', $long, '=', $val); | ||
796 | 792 | } | ||
797 | 793 | |||
798 | 794 | sub get_opts { | ||
799 | 795 | my ( $self ) = @_; | ||
800 | 796 | |||
801 | 797 | foreach my $long ( keys %{$self->{opts}} ) { | ||
802 | 798 | $self->{opts}->{$long}->{got} = 0; | ||
803 | 799 | $self->{opts}->{$long}->{value} | ||
804 | 800 | = exists $self->{defaults}->{$long} ? $self->{defaults}->{$long} | ||
805 | 801 | : $self->{opts}->{$long}->{is_cumulative} ? 0 | ||
806 | 802 | : undef; | ||
807 | 803 | } | ||
808 | 804 | $self->{got_opts} = 0; | ||
809 | 805 | |||
810 | 806 | $self->{errors} = []; | ||
811 | 807 | |||
812 | 808 | if ( @ARGV && $ARGV[0] eq "--config" ) { | ||
813 | 809 | shift @ARGV; | ||
814 | 810 | $self->_set_option('config', shift @ARGV); | ||
815 | 811 | } | ||
816 | 812 | if ( $self->has('config') ) { | ||
817 | 813 | my @extra_args; | ||
818 | 814 | foreach my $filename ( split(',', $self->get('config')) ) { | ||
819 | 815 | eval { | ||
820 | 816 | push @extra_args, $self->_read_config_file($filename); | ||
821 | 817 | }; | ||
822 | 818 | if ( $EVAL_ERROR ) { | ||
823 | 819 | if ( $self->got('config') ) { | ||
824 | 820 | die $EVAL_ERROR; | ||
825 | 821 | } | ||
826 | 822 | elsif ( PTDEBUG ) { | ||
827 | 823 | _d($EVAL_ERROR); | ||
828 | 824 | } | ||
829 | 825 | } | ||
830 | 826 | } | ||
831 | 827 | unshift @ARGV, @extra_args; | ||
832 | 828 | } | ||
833 | 829 | |||
834 | 830 | Getopt::Long::Configure('no_ignore_case', 'bundling'); | ||
835 | 831 | GetOptions( | ||
836 | 832 | map { $_->{spec} => sub { $self->_set_option(@_); } } | ||
837 | 833 | grep { $_->{long} ne 'config' } # --config is handled specially above. | ||
838 | 834 | values %{$self->{opts}} | ||
839 | 835 | ) or $self->save_error('Error parsing options'); | ||
840 | 836 | |||
841 | 837 | if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) { | ||
842 | 838 | if ( $self->{version} ) { | ||
843 | 839 | print $self->{version}, "\n"; | ||
844 | 840 | } | ||
845 | 841 | else { | ||
846 | 842 | print "Error parsing version. See the VERSION section of the tool's documentation.\n"; | ||
847 | 843 | } | ||
848 | 844 | exit 0; | ||
849 | 845 | } | ||
850 | 846 | |||
851 | 847 | if ( @ARGV && $self->{strict} ) { | ||
852 | 848 | $self->save_error("Unrecognized command-line options @ARGV"); | ||
853 | 849 | } | ||
854 | 850 | |||
855 | 851 | foreach my $mutex ( @{$self->{mutex}} ) { | ||
856 | 852 | my @set = grep { $self->{opts}->{$_}->{got} } @$mutex; | ||
857 | 853 | if ( @set > 1 ) { | ||
858 | 854 | my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" } | ||
859 | 855 | @{$mutex}[ 0 .. scalar(@$mutex) - 2] ) | ||
860 | 856 | . ' and --'.$self->{opts}->{$mutex->[-1]}->{long} | ||
861 | 857 | . ' are mutually exclusive.'; | ||
862 | 858 | $self->save_error($err); | ||
863 | 859 | } | ||
864 | 860 | } | ||
865 | 861 | |||
866 | 862 | foreach my $required ( @{$self->{atleast1}} ) { | ||
867 | 863 | my @set = grep { $self->{opts}->{$_}->{got} } @$required; | ||
868 | 864 | if ( @set == 0 ) { | ||
869 | 865 | my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" } | ||
870 | 866 | @{$required}[ 0 .. scalar(@$required) - 2] ) | ||
871 | 867 | .' or --'.$self->{opts}->{$required->[-1]}->{long}; | ||
872 | 868 | $self->save_error("Specify at least one of $err"); | ||
873 | 869 | } | ||
874 | 870 | } | ||
875 | 871 | |||
876 | 872 | $self->_check_opts( keys %{$self->{opts}} ); | ||
877 | 873 | $self->{got_opts} = 1; | ||
878 | 874 | return; | ||
879 | 875 | } | ||
880 | 876 | |||
881 | 877 | sub _check_opts { | ||
882 | 878 | my ( $self, @long ) = @_; | ||
883 | 879 | my $long_last = scalar @long; | ||
884 | 880 | while ( @long ) { | ||
885 | 881 | foreach my $i ( 0..$#long ) { | ||
886 | 882 | my $long = $long[$i]; | ||
887 | 883 | next unless $long; | ||
888 | 884 | my $opt = $self->{opts}->{$long}; | ||
889 | 885 | if ( $opt->{got} ) { | ||
890 | 886 | if ( exists $self->{disables}->{$long} ) { | ||
891 | 887 | my @disable_opts = @{$self->{disables}->{$long}}; | ||
892 | 888 | map { $self->{opts}->{$_}->{value} = undef; } @disable_opts; | ||
893 | 889 | PTDEBUG && _d('Unset options', @disable_opts, | ||
894 | 890 | 'because', $long,'disables them'); | ||
895 | 891 | } | ||
896 | 892 | |||
897 | 893 | if ( exists $self->{allowed_groups}->{$long} ) { | ||
898 | 894 | |||
899 | 895 | my @restricted_groups = grep { | ||
900 | 896 | !exists $self->{allowed_groups}->{$long}->{$_} | ||
901 | 897 | } keys %{$self->{groups}}; | ||
902 | 898 | |||
903 | 899 | my @restricted_opts; | ||
904 | 900 | foreach my $restricted_group ( @restricted_groups ) { | ||
905 | 901 | RESTRICTED_OPT: | ||
906 | 902 | foreach my $restricted_opt ( | ||
907 | 903 | keys %{$self->{groups}->{$restricted_group}} ) | ||
908 | 904 | { | ||
909 | 905 | next RESTRICTED_OPT if $restricted_opt eq $long; | ||
910 | 906 | push @restricted_opts, $restricted_opt | ||
911 | 907 | if $self->{opts}->{$restricted_opt}->{got}; | ||
912 | 908 | } | ||
913 | 909 | } | ||
914 | 910 | |||
915 | 911 | if ( @restricted_opts ) { | ||
916 | 912 | my $err; | ||
917 | 913 | if ( @restricted_opts == 1 ) { | ||
918 | 914 | $err = "--$restricted_opts[0]"; | ||
919 | 915 | } | ||
920 | 916 | else { | ||
921 | 917 | $err = join(', ', | ||
922 | 918 | map { "--$self->{opts}->{$_}->{long}" } | ||
923 | 919 | grep { $_ } | ||
924 | 920 | @restricted_opts[0..scalar(@restricted_opts) - 2] | ||
925 | 921 | ) | ||
926 | 922 | . ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long}; | ||
927 | 923 | } | ||
928 | 924 | $self->save_error("--$long is not allowed with $err"); | ||
929 | 925 | } | ||
930 | 926 | } | ||
931 | 927 | |||
932 | 928 | } | ||
933 | 929 | elsif ( $opt->{is_required} ) { | ||
934 | 930 | $self->save_error("Required option --$long must be specified"); | ||
935 | 931 | } | ||
936 | 932 | |||
937 | 933 | $self->_validate_type($opt); | ||
938 | 934 | if ( $opt->{parsed} ) { | ||
939 | 935 | delete $long[$i]; | ||
940 | 936 | } | ||
941 | 937 | else { | ||
942 | 938 | PTDEBUG && _d('Temporarily failed to parse', $long); | ||
943 | 939 | } | ||
944 | 940 | } | ||
945 | 941 | |||
946 | 942 | die "Failed to parse options, possibly due to circular dependencies" | ||
947 | 943 | if @long == $long_last; | ||
948 | 944 | $long_last = @long; | ||
949 | 945 | } | ||
950 | 946 | |||
951 | 947 | return; | ||
952 | 948 | } | ||
953 | 949 | |||
954 | 950 | sub _validate_type { | ||
955 | 951 | my ( $self, $opt ) = @_; | ||
956 | 952 | return unless $opt; | ||
957 | 953 | |||
958 | 954 | if ( !$opt->{type} ) { | ||
959 | 955 | $opt->{parsed} = 1; | ||
960 | 956 | return; | ||
961 | 957 | } | ||
962 | 958 | |||
963 | 959 | my $val = $opt->{value}; | ||
964 | 960 | |||
965 | 961 | if ( $val && $opt->{type} eq 'm' ) { # type time | ||
966 | 962 | PTDEBUG && _d('Parsing option', $opt->{long}, 'as a time value'); | ||
967 | 963 | my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/; | ||
968 | 964 | if ( !$suffix ) { | ||
969 | 965 | my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/; | ||
970 | 966 | $suffix = $s || 's'; | ||
971 | 967 | PTDEBUG && _d('No suffix given; using', $suffix, 'for', | ||
972 | 968 | $opt->{long}, '(value:', $val, ')'); | ||
973 | 969 | } | ||
974 | 970 | if ( $suffix =~ m/[smhd]/ ) { | ||
975 | 971 | $val = $suffix eq 's' ? $num # Seconds | ||
976 | 972 | : $suffix eq 'm' ? $num * 60 # Minutes | ||
977 | 973 | : $suffix eq 'h' ? $num * 3600 # Hours | ||
978 | 974 | : $num * 86400; # Days | ||
979 | 975 | $opt->{value} = ($prefix || '') . $val; | ||
980 | 976 | PTDEBUG && _d('Setting option', $opt->{long}, 'to', $val); | ||
981 | 977 | } | ||
982 | 978 | else { | ||
983 | 979 | $self->save_error("Invalid time suffix for --$opt->{long}"); | ||
984 | 980 | } | ||
985 | 981 | } | ||
986 | 982 | elsif ( $val && $opt->{type} eq 'd' ) { # type DSN | ||
987 | 983 | PTDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN'); | ||
988 | 984 | my $prev = {}; | ||
989 | 985 | my $from_key = $self->{defaults_to}->{ $opt->{long} }; | ||
990 | 986 | if ( $from_key ) { | ||
991 | 987 | PTDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN'); | ||
992 | 988 | if ( $self->{opts}->{$from_key}->{parsed} ) { | ||
993 | 989 | $prev = $self->{opts}->{$from_key}->{value}; | ||
994 | 990 | } | ||
995 | 991 | else { | ||
996 | 992 | PTDEBUG && _d('Cannot parse', $opt->{long}, 'until', | ||
997 | 993 | $from_key, 'parsed'); | ||
998 | 994 | return; | ||
999 | 995 | } | ||
1000 | 996 | } | ||
1001 | 997 | my $defaults = $self->{DSNParser}->parse_options($self); | ||
1002 | 998 | $opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults); | ||
1003 | 999 | } | ||
1004 | 1000 | elsif ( $val && $opt->{type} eq 'z' ) { # type size | ||
1005 | 1001 | PTDEBUG && _d('Parsing option', $opt->{long}, 'as a size value'); | ||
1006 | 1002 | $self->_parse_size($opt, $val); | ||
1007 | 1003 | } | ||
1008 | 1004 | elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) { | ||
1009 | 1005 | $opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) }; | ||
1010 | 1006 | } | ||
1011 | 1007 | elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) { | ||
1012 | 1008 | $opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ]; | ||
1013 | 1009 | } | ||
1014 | 1010 | else { | ||
1015 | 1011 | PTDEBUG && _d('Nothing to validate for option', | ||
1016 | 1012 | $opt->{long}, 'type', $opt->{type}, 'value', $val); | ||
1017 | 1013 | } | ||
1018 | 1014 | |||
1019 | 1015 | $opt->{parsed} = 1; | ||
1020 | 1016 | return; | ||
1021 | 1017 | } | ||
1022 | 1018 | |||
1023 | 1019 | sub get { | ||
1024 | 1020 | my ( $self, $opt ) = @_; | ||
1025 | 1021 | my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); | ||
1026 | 1022 | die "Option $opt does not exist" | ||
1027 | 1023 | unless $long && exists $self->{opts}->{$long}; | ||
1028 | 1024 | return $self->{opts}->{$long}->{value}; | ||
1029 | 1025 | } | ||
1030 | 1026 | |||
1031 | 1027 | sub got { | ||
1032 | 1028 | my ( $self, $opt ) = @_; | ||
1033 | 1029 | my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); | ||
1034 | 1030 | die "Option $opt does not exist" | ||
1035 | 1031 | unless $long && exists $self->{opts}->{$long}; | ||
1036 | 1032 | return $self->{opts}->{$long}->{got}; | ||
1037 | 1033 | } | ||
1038 | 1034 | |||
1039 | 1035 | sub has { | ||
1040 | 1036 | my ( $self, $opt ) = @_; | ||
1041 | 1037 | my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); | ||
1042 | 1038 | return defined $long ? exists $self->{opts}->{$long} : 0; | ||
1043 | 1039 | } | ||
1044 | 1040 | |||
1045 | 1041 | sub set { | ||
1046 | 1042 | my ( $self, $opt, $val ) = @_; | ||
1047 | 1043 | my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); | ||
1048 | 1044 | die "Option $opt does not exist" | ||
1049 | 1045 | unless $long && exists $self->{opts}->{$long}; | ||
1050 | 1046 | $self->{opts}->{$long}->{value} = $val; | ||
1051 | 1047 | return; | ||
1052 | 1048 | } | ||
1053 | 1049 | |||
1054 | 1050 | sub save_error { | ||
1055 | 1051 | my ( $self, $error ) = @_; | ||
1056 | 1052 | push @{$self->{errors}}, $error; | ||
1057 | 1053 | return; | ||
1058 | 1054 | } | ||
1059 | 1055 | |||
1060 | 1056 | sub errors { | ||
1061 | 1057 | my ( $self ) = @_; | ||
1062 | 1058 | return $self->{errors}; | ||
1063 | 1059 | } | ||
1064 | 1060 | |||
1065 | 1061 | sub usage { | ||
1066 | 1062 | my ( $self ) = @_; | ||
1067 | 1063 | warn "No usage string is set" unless $self->{usage}; # XXX | ||
1068 | 1064 | return "Usage: " . ($self->{usage} || '') . "\n"; | ||
1069 | 1065 | } | ||
1070 | 1066 | |||
1071 | 1067 | sub descr { | ||
1072 | 1068 | my ( $self ) = @_; | ||
1073 | 1069 | warn "No description string is set" unless $self->{description}; # XXX | ||
1074 | 1070 | my $descr = ($self->{description} || $self->{program_name} || '') | ||
1075 | 1071 | . " For more details, please use the --help option, " | ||
1076 | 1072 | . "or try 'perldoc $PROGRAM_NAME' " | ||
1077 | 1073 | . "for complete documentation."; | ||
1078 | 1074 | $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g) | ||
1079 | 1075 | unless $ENV{DONT_BREAK_LINES}; | ||
1080 | 1076 | $descr =~ s/ +$//mg; | ||
1081 | 1077 | return $descr; | ||
1082 | 1078 | } | ||
1083 | 1079 | |||
1084 | 1080 | sub usage_or_errors { | ||
1085 | 1081 | my ( $self, $file, $return ) = @_; | ||
1086 | 1082 | $file ||= $self->{file} || __FILE__; | ||
1087 | 1083 | |||
1088 | 1084 | if ( !$self->{description} || !$self->{usage} ) { | ||
1089 | 1085 | PTDEBUG && _d("Getting description and usage from SYNOPSIS in", $file); | ||
1090 | 1086 | my %synop = $self->_parse_synopsis($file); | ||
1091 | 1087 | $self->{description} ||= $synop{description}; | ||
1092 | 1088 | $self->{usage} ||= $synop{usage}; | ||
1093 | 1089 | PTDEBUG && _d("Description:", $self->{description}, | ||
1094 | 1090 | "\nUsage:", $self->{usage}); | ||
1095 | 1091 | } | ||
1096 | 1092 | |||
1097 | 1093 | if ( $self->{opts}->{help}->{got} ) { | ||
1098 | 1094 | print $self->print_usage() or die "Cannot print usage: $OS_ERROR"; | ||
1099 | 1095 | exit 0 unless $return; | ||
1100 | 1096 | } | ||
1101 | 1097 | elsif ( scalar @{$self->{errors}} ) { | ||
1102 | 1098 | print $self->print_errors() or die "Cannot print errors: $OS_ERROR"; | ||
1103 | 1099 | exit 0 unless $return; | ||
1104 | 1100 | } | ||
1105 | 1101 | |||
1106 | 1102 | return; | ||
1107 | 1103 | } | ||
1108 | 1104 | |||
1109 | 1105 | sub print_errors { | ||
1110 | 1106 | my ( $self ) = @_; | ||
1111 | 1107 | my $usage = $self->usage() . "\n"; | ||
1112 | 1108 | if ( (my @errors = @{$self->{errors}}) ) { | ||
1113 | 1109 | $usage .= join("\n * ", 'Errors in command-line arguments:', @errors) | ||
1114 | 1110 | . "\n"; | ||
1115 | 1111 | } | ||
1116 | 1112 | return $usage . "\n" . $self->descr(); | ||
1117 | 1113 | } | ||
1118 | 1114 | |||
1119 | 1115 | sub print_usage { | ||
1120 | 1116 | my ( $self ) = @_; | ||
1121 | 1117 | die "Run get_opts() before print_usage()" unless $self->{got_opts}; | ||
1122 | 1118 | my @opts = values %{$self->{opts}}; | ||
1123 | 1119 | |||
1124 | 1120 | my $maxl = max( | ||
1125 | 1121 | map { | ||
1126 | 1122 | length($_->{long}) # option long name | ||
1127 | 1123 | + ($_->{is_negatable} ? 4 : 0) # "[no]" if opt is negatable | ||
1128 | 1124 | + ($_->{type} ? 2 : 0) # "=x" where x is the opt type | ||
1129 | 1125 | } | ||
1130 | 1126 | @opts); | ||
1131 | 1127 | |||
1132 | 1128 | my $maxs = max(0, | ||
1133 | 1129 | map { | ||
1134 | 1130 | length($_) | ||
1135 | 1131 | + ($self->{opts}->{$_}->{is_negatable} ? 4 : 0) | ||
1136 | 1132 | + ($self->{opts}->{$_}->{type} ? 2 : 0) | ||
1137 | 1133 | } | ||
1138 | 1134 | values %{$self->{short_opts}}); | ||
1139 | 1135 | |||
1140 | 1136 | my $lcol = max($maxl, ($maxs + 3)); | ||
1141 | 1137 | my $rcol = 80 - $lcol - 6; | ||
1142 | 1138 | my $rpad = ' ' x ( 80 - $rcol ); | ||
1143 | 1139 | |||
1144 | 1140 | $maxs = max($lcol - 3, $maxs); | ||
1145 | 1141 | |||
1146 | 1142 | my $usage = $self->descr() . "\n" . $self->usage(); | ||
1147 | 1143 | |||
1148 | 1144 | my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}}; | ||
1149 | 1145 | push @groups, 'default'; | ||
1150 | 1146 | |||
1151 | 1147 | foreach my $group ( reverse @groups ) { | ||
1152 | 1148 | $usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n"; | ||
1153 | 1149 | foreach my $opt ( | ||
1154 | 1150 | sort { $a->{long} cmp $b->{long} } | ||
1155 | 1151 | grep { $_->{group} eq $group } | ||
1156 | 1152 | @opts ) | ||
1157 | 1153 | { | ||
1158 | 1154 | my $long = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long}; | ||
1159 | 1155 | my $short = $opt->{short}; | ||
1160 | 1156 | my $desc = $opt->{desc}; | ||
1161 | 1157 | |||
1162 | 1158 | $long .= $opt->{type} ? "=$opt->{type}" : ""; | ||
1163 | 1159 | |||
1164 | 1160 | if ( $opt->{type} && $opt->{type} eq 'm' ) { | ||
1165 | 1161 | my ($s) = $desc =~ m/\(suffix (.)\)/; | ||
1166 | 1162 | $s ||= 's'; | ||
1167 | 1163 | $desc =~ s/\s+\(suffix .\)//; | ||
1168 | 1164 | $desc .= ". Optional suffix s=seconds, m=minutes, h=hours, " | ||
1169 | 1165 | . "d=days; if no suffix, $s is used."; | ||
1170 | 1166 | } | ||
1171 | 1167 | $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g); | ||
1172 | 1168 | $desc =~ s/ +$//mg; | ||
1173 | 1169 | if ( $short ) { | ||
1174 | 1170 | $usage .= sprintf(" --%-${maxs}s -%s %s\n", $long, $short, $desc); | ||
1175 | 1171 | } | ||
1176 | 1172 | else { | ||
1177 | 1173 | $usage .= sprintf(" --%-${lcol}s %s\n", $long, $desc); | ||
1178 | 1174 | } | ||
1179 | 1175 | } | ||
1180 | 1176 | } | ||
1181 | 1177 | |||
1182 | 1178 | $usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n"; | ||
1183 | 1179 | |||
1184 | 1180 | if ( (my @rules = @{$self->{rules}}) ) { | ||
1185 | 1181 | $usage .= "\nRules:\n\n"; | ||
1186 | 1182 | $usage .= join("\n", map { " $_" } @rules) . "\n"; | ||
1187 | 1183 | } | ||
1188 | 1184 | if ( $self->{DSNParser} ) { | ||
1189 | 1185 | $usage .= "\n" . $self->{DSNParser}->usage(); | ||
1190 | 1186 | } | ||
1191 | 1187 | $usage .= "\nOptions and values after processing arguments:\n\n"; | ||
1192 | 1188 | foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) { | ||
1193 | 1189 | my $val = $opt->{value}; | ||
1194 | 1190 | my $type = $opt->{type} || ''; | ||
1195 | 1191 | my $bool = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/; | ||
1196 | 1192 | $val = $bool ? ( $val ? 'TRUE' : 'FALSE' ) | ||
1197 | 1193 | : !defined $val ? '(No value)' | ||
1198 | 1194 | : $type eq 'd' ? $self->{DSNParser}->as_string($val) | ||
1199 | 1195 | : $type =~ m/H|h/ ? join(',', sort keys %$val) | ||
1200 | 1196 | : $type =~ m/A|a/ ? join(',', @$val) | ||
1201 | 1197 | : $val; | ||
1202 | 1198 | $usage .= sprintf(" --%-${lcol}s %s\n", $opt->{long}, $val); | ||
1203 | 1199 | } | ||
1204 | 1200 | return $usage; | ||
1205 | 1201 | } | ||
1206 | 1202 | |||
1207 | 1203 | sub prompt_noecho { | ||
1208 | 1204 | shift @_ if ref $_[0] eq __PACKAGE__; | ||
1209 | 1205 | my ( $prompt ) = @_; | ||
1210 | 1206 | local $OUTPUT_AUTOFLUSH = 1; | ||
1211 | 1207 | print $prompt | ||
1212 | 1208 | or die "Cannot print: $OS_ERROR"; | ||
1213 | 1209 | my $response; | ||
1214 | 1210 | eval { | ||
1215 | 1211 | require Term::ReadKey; | ||
1216 | 1212 | Term::ReadKey::ReadMode('noecho'); | ||
1217 | 1213 | chomp($response = <STDIN>); | ||
1218 | 1214 | Term::ReadKey::ReadMode('normal'); | ||
1219 | 1215 | print "\n" | ||
1220 | 1216 | or die "Cannot print: $OS_ERROR"; | ||
1221 | 1217 | }; | ||
1222 | 1218 | if ( $EVAL_ERROR ) { | ||
1223 | 1219 | die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR"; | ||
1224 | 1220 | } | ||
1225 | 1221 | return $response; | ||
1226 | 1222 | } | ||
1227 | 1223 | |||
1228 | 1224 | sub _read_config_file { | ||
1229 | 1225 | my ( $self, $filename ) = @_; | ||
1230 | 1226 | open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n"; | ||
1231 | 1227 | my @args; | ||
1232 | 1228 | my $prefix = '--'; | ||
1233 | 1229 | my $parse = 1; | ||
1234 | 1230 | |||
1235 | 1231 | LINE: | ||
1236 | 1232 | while ( my $line = <$fh> ) { | ||
1237 | 1233 | chomp $line; | ||
1238 | 1234 | next LINE if $line =~ m/^\s*(?:\#|\;|$)/; | ||
1239 | 1235 | $line =~ s/\s+#.*$//g; | ||
1240 | 1236 | $line =~ s/^\s+|\s+$//g; | ||
1241 | 1237 | if ( $line eq '--' ) { | ||
1242 | 1238 | $prefix = ''; | ||
1243 | 1239 | $parse = 0; | ||
1244 | 1240 | next LINE; | ||
1245 | 1241 | } | ||
1246 | 1242 | if ( $parse | ||
1247 | 1243 | && (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/) | ||
1248 | 1244 | ) { | ||
1249 | 1245 | push @args, grep { defined $_ } ("$prefix$opt", $arg); | ||
1250 | 1246 | } | ||
1251 | 1247 | elsif ( $line =~ m/./ ) { | ||
1252 | 1248 | push @args, $line; | ||
1253 | 1249 | } | ||
1254 | 1250 | else { | ||
1255 | 1251 | die "Syntax error in file $filename at line $INPUT_LINE_NUMBER"; | ||
1256 | 1252 | } | ||
1257 | 1253 | } | ||
1258 | 1254 | close $fh; | ||
1259 | 1255 | return @args; | ||
1260 | 1256 | } | ||
1261 | 1257 | |||
1262 | 1258 | sub read_para_after { | ||
1263 | 1259 | my ( $self, $file, $regex ) = @_; | ||
1264 | 1260 | open my $fh, "<", $file or die "Can't open $file: $OS_ERROR"; | ||
1265 | 1261 | local $INPUT_RECORD_SEPARATOR = ''; | ||
1266 | 1262 | my $para; | ||
1267 | 1263 | while ( $para = <$fh> ) { | ||
1268 | 1264 | next unless $para =~ m/^=pod$/m; | ||
1269 | 1265 | last; | ||
1270 | 1266 | } | ||
1271 | 1267 | while ( $para = <$fh> ) { | ||
1272 | 1268 | next unless $para =~ m/$regex/; | ||
1273 | 1269 | last; | ||
1274 | 1270 | } | ||
1275 | 1271 | $para = <$fh>; | ||
1276 | 1272 | chomp($para); | ||
1277 | 1273 | close $fh or die "Can't close $file: $OS_ERROR"; | ||
1278 | 1274 | return $para; | ||
1279 | 1275 | } | ||
1280 | 1276 | |||
1281 | 1277 | sub clone { | ||
1282 | 1278 | my ( $self ) = @_; | ||
1283 | 1279 | |||
1284 | 1280 | my %clone = map { | ||
1285 | 1281 | my $hashref = $self->{$_}; | ||
1286 | 1282 | my $val_copy = {}; | ||
1287 | 1283 | foreach my $key ( keys %$hashref ) { | ||
1288 | 1284 | my $ref = ref $hashref->{$key}; | ||
1289 | 1285 | $val_copy->{$key} = !$ref ? $hashref->{$key} | ||
1290 | 1286 | : $ref eq 'HASH' ? { %{$hashref->{$key}} } | ||
1291 | 1287 | : $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ] | ||
1292 | 1288 | : $hashref->{$key}; | ||
1293 | 1289 | } | ||
1294 | 1290 | $_ => $val_copy; | ||
1295 | 1291 | } qw(opts short_opts defaults); | ||
1296 | 1292 | |||
1297 | 1293 | foreach my $scalar ( qw(got_opts) ) { | ||
1298 | 1294 | $clone{$scalar} = $self->{$scalar}; | ||
1299 | 1295 | } | ||
1300 | 1296 | |||
1301 | 1297 | return bless \%clone; | ||
1302 | 1298 | } | ||
1303 | 1299 | |||
1304 | 1300 | sub _parse_size { | ||
1305 | 1301 | my ( $self, $opt, $val ) = @_; | ||
1306 | 1302 | |||
1307 | 1303 | if ( lc($val || '') eq 'null' ) { | ||
1308 | 1304 | PTDEBUG && _d('NULL size for', $opt->{long}); | ||
1309 | 1305 | $opt->{value} = 'null'; | ||
1310 | 1306 | return; | ||
1311 | 1307 | } | ||
1312 | 1308 | |||
1313 | 1309 | my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824); | ||
1314 | 1310 | my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/; | ||
1315 | 1311 | if ( defined $num ) { | ||
1316 | 1312 | if ( $factor ) { | ||
1317 | 1313 | $num *= $factor_for{$factor}; | ||
1318 | 1314 | PTDEBUG && _d('Setting option', $opt->{y}, | ||
1319 | 1315 | 'to num', $num, '* factor', $factor); | ||
1320 | 1316 | } | ||
1321 | 1317 | $opt->{value} = ($pre || '') . $num; | ||
1322 | 1318 | } | ||
1323 | 1319 | else { | ||
1324 | 1320 | $self->save_error("Invalid size for --$opt->{long}: $val"); | ||
1325 | 1321 | } | ||
1326 | 1322 | return; | ||
1327 | 1323 | } | ||
1328 | 1324 | |||
1329 | 1325 | sub _parse_attribs { | ||
1330 | 1326 | my ( $self, $option, $attribs ) = @_; | ||
1331 | 1327 | my $types = $self->{types}; | ||
1332 | 1328 | return $option | ||
1333 | 1329 | . ($attribs->{'short form'} ? '|' . $attribs->{'short form'} : '' ) | ||
1334 | 1330 | . ($attribs->{'negatable'} ? '!' : '' ) | ||
1335 | 1331 | . ($attribs->{'cumulative'} ? '+' : '' ) | ||
1336 | 1332 | . ($attribs->{'type'} ? '=' . $types->{$attribs->{type}} : '' ); | ||
1337 | 1333 | } | ||
1338 | 1334 | |||
1339 | 1335 | sub _parse_synopsis { | ||
1340 | 1336 | my ( $self, $file ) = @_; | ||
1341 | 1337 | $file ||= $self->{file} || __FILE__; | ||
1342 | 1338 | PTDEBUG && _d("Parsing SYNOPSIS in", $file); | ||
1343 | 1339 | |||
1344 | 1340 | local $INPUT_RECORD_SEPARATOR = ''; # read paragraphs | ||
1345 | 1341 | open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR"; | ||
1346 | 1342 | my $para; | ||
1347 | 1343 | 1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/; | ||
1348 | 1344 | die "$file does not contain a SYNOPSIS section" unless $para; | ||
1349 | 1345 | my @synop; | ||
1350 | 1346 | for ( 1..2 ) { # 1 for the usage, 2 for the description | ||
1351 | 1347 | my $para = <$fh>; | ||
1352 | 1348 | push @synop, $para; | ||
1353 | 1349 | } | ||
1354 | 1350 | close $fh; | ||
1355 | 1351 | PTDEBUG && _d("Raw SYNOPSIS text:", @synop); | ||
1356 | 1352 | my ($usage, $desc) = @synop; | ||
1357 | 1353 | die "The SYNOPSIS section in $file is not formatted properly" | ||
1358 | 1354 | unless $usage && $desc; | ||
1359 | 1355 | |||
1360 | 1356 | $usage =~ s/^\s*Usage:\s+(.+)/$1/; | ||
1361 | 1357 | chomp $usage; | ||
1362 | 1358 | |||
1363 | 1359 | $desc =~ s/\n/ /g; | ||
1364 | 1360 | $desc =~ s/\s{2,}/ /g; | ||
1365 | 1361 | $desc =~ s/\. ([A-Z][a-z])/. $1/g; | ||
1366 | 1362 | $desc =~ s/\s+$//; | ||
1367 | 1363 | |||
1368 | 1364 | return ( | ||
1369 | 1365 | description => $desc, | ||
1370 | 1366 | usage => $usage, | ||
1371 | 1367 | ); | ||
1372 | 1368 | }; | ||
1373 | 1369 | |||
1374 | 1370 | sub _d { | ||
1375 | 1371 | my ($package, undef, $line) = caller 0; | ||
1376 | 1372 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
1377 | 1373 | map { defined $_ ? $_ : 'undef' } | ||
1378 | 1374 | @_; | ||
1379 | 1375 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
1380 | 1376 | } | ||
1381 | 1377 | |||
1382 | 1378 | if ( PTDEBUG ) { | ||
1383 | 1379 | print '# ', $^X, ' ', $], "\n"; | ||
1384 | 1380 | if ( my $uname = `uname -a` ) { | ||
1385 | 1381 | $uname =~ s/\s+/ /g; | ||
1386 | 1382 | print "# $uname\n"; | ||
1387 | 1383 | } | ||
1388 | 1384 | print '# Arguments: ', | ||
1389 | 1385 | join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n"; | ||
1390 | 1386 | } | ||
1391 | 1387 | |||
1392 | 1388 | 1; | ||
1393 | 1389 | } | ||
1394 | 1390 | # ########################################################################### | ||
1395 | 1391 | # End OptionParser package | ||
1396 | 1392 | # ########################################################################### | ||
1397 | 1393 | |||
1398 | 1394 | # ########################################################################### | ||
1399 | 1395 | # SlowLogParser package | ||
1400 | 1396 | # This package is a copy without comments from the original. The original | ||
1401 | 1397 | # with comments and its test file can be found in the Bazaar repository at, | ||
1402 | 1398 | # lib/SlowLogParser.pm | ||
1403 | 1399 | # t/lib/SlowLogParser.t | ||
1404 | 1400 | # See https://launchpad.net/percona-toolkit for more information. | ||
1405 | 1401 | # ########################################################################### | ||
1406 | 1402 | { | ||
1407 | 1403 | package SlowLogParser; | ||
1408 | 1404 | |||
1409 | 1405 | use strict; | ||
1410 | 1406 | use warnings FATAL => 'all'; | ||
1411 | 1407 | use English qw(-no_match_vars); | ||
1412 | 1408 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
1413 | 1409 | |||
1414 | 1410 | use Data::Dumper; | ||
1415 | 1411 | $Data::Dumper::Indent = 1; | ||
1416 | 1412 | $Data::Dumper::Sortkeys = 1; | ||
1417 | 1413 | $Data::Dumper::Quotekeys = 0; | ||
1418 | 1414 | |||
1419 | 1415 | sub new { | ||
1420 | 1416 | my ( $class ) = @_; | ||
1421 | 1417 | my $self = { | ||
1422 | 1418 | pending => [], | ||
1423 | 1419 | }; | ||
1424 | 1420 | return bless $self, $class; | ||
1425 | 1421 | } | ||
1426 | 1422 | |||
1427 | 1423 | my $slow_log_ts_line = qr/^# Time: ([0-9: ]{15})/; | ||
1428 | 1424 | my $slow_log_uh_line = qr/# User\@Host: ([^\[]+|\[[^[]+\]).*?@ (\S*) \[(.*)\]/; | ||
1429 | 1425 | my $slow_log_hd_line = qr{ | ||
1430 | 1426 | ^(?: | ||
1431 | 1427 | T[cC][pP]\s[pP]ort:\s+\d+ # case differs on windows/unix | ||
1432 | 1428 | | | ||
1433 | 1429 | [/A-Z].*mysqld,\sVersion.*(?:started\swith:|embedded\slibrary) | ||
1434 | 1430 | | | ||
1435 | 1431 | Time\s+Id\s+Command | ||
1436 | 1432 | ).*\n | ||
1437 | 1433 | }xm; | ||
1438 | 1434 | |||
1439 | 1435 | sub parse_event { | ||
1440 | 1436 | my ( $self, %args ) = @_; | ||
1441 | 1437 | my @required_args = qw(next_event tell); | ||
1442 | 1438 | foreach my $arg ( @required_args ) { | ||
1443 | 1439 | die "I need a $arg argument" unless $args{$arg}; | ||
1444 | 1440 | } | ||
1445 | 1441 | my ($next_event, $tell) = @args{@required_args}; | ||
1446 | 1442 | |||
1447 | 1443 | my $pending = $self->{pending}; | ||
1448 | 1444 | local $INPUT_RECORD_SEPARATOR = ";\n#"; | ||
1449 | 1445 | my $trimlen = length($INPUT_RECORD_SEPARATOR); | ||
1450 | 1446 | my $pos_in_log = $tell->(); | ||
1451 | 1447 | my $stmt; | ||
1452 | 1448 | |||
1453 | 1449 | EVENT: | ||
1454 | 1450 | while ( | ||
1455 | 1451 | defined($stmt = shift @$pending) | ||
1456 | 1452 | or defined($stmt = $next_event->()) | ||
1457 | 1453 | ) { | ||
1458 | 1454 | my @properties = ('cmd', 'Query', 'pos_in_log', $pos_in_log); | ||
1459 | 1455 | $pos_in_log = $tell->(); | ||
1460 | 1456 | |||
1461 | 1457 | if ( $stmt =~ s/$slow_log_hd_line//go ){ # Throw away header lines in log | ||
1462 | 1458 | my @chunks = split(/$INPUT_RECORD_SEPARATOR/o, $stmt); | ||
1463 | 1459 | if ( @chunks > 1 ) { | ||
1464 | 1460 | PTDEBUG && _d("Found multiple chunks"); | ||
1465 | 1461 | $stmt = shift @chunks; | ||
1466 | 1462 | unshift @$pending, @chunks; | ||
1467 | 1463 | } | ||
1468 | 1464 | } | ||
1469 | 1465 | |||
1470 | 1466 | $stmt = '#' . $stmt unless $stmt =~ m/\A#/; | ||
1471 | 1467 | $stmt =~ s/;\n#?\Z//; | ||
1472 | 1468 | |||
1473 | 1469 | |||
1474 | 1470 | my ($got_ts, $got_uh, $got_ac, $got_db, $got_set, $got_embed); | ||
1475 | 1471 | my $pos = 0; | ||
1476 | 1472 | my $len = length($stmt); | ||
1477 | 1473 | my $found_arg = 0; | ||
1478 | 1474 | LINE: | ||
1479 | 1475 | while ( $stmt =~ m/^(.*)$/mg ) { # /g is important, requires scalar match. | ||
1480 | 1476 | $pos = pos($stmt); # Be careful not to mess this up! | ||
1481 | 1477 | my $line = $1; # Necessary for /g and pos() to work. | ||
1482 | 1478 | PTDEBUG && _d($line); | ||
1483 | 1479 | |||
1484 | 1480 | if ($line =~ m/^(?:#|use |SET (?:last_insert_id|insert_id|timestamp))/o) { | ||
1485 | 1481 | |||
1486 | 1482 | if ( !$got_ts && (my ( $time ) = $line =~ m/$slow_log_ts_line/o)) { | ||
1487 | 1483 | PTDEBUG && _d("Got ts", $time); | ||
1488 | 1484 | push @properties, 'ts', $time; | ||
1489 | 1485 | ++$got_ts; | ||
1490 | 1486 | if ( !$got_uh | ||
1491 | 1487 | && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o ) | ||
1492 | 1488 | ) { | ||
1493 | 1489 | PTDEBUG && _d("Got user, host, ip", $user, $host, $ip); | ||
1494 | 1490 | push @properties, 'user', $user, 'host', $host, 'ip', $ip; | ||
1495 | 1491 | ++$got_uh; | ||
1496 | 1492 | } | ||
1497 | 1493 | } | ||
1498 | 1494 | |||
1499 | 1495 | elsif ( !$got_uh | ||
1500 | 1496 | && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o ) | ||
1501 | 1497 | ) { | ||
1502 | 1498 | PTDEBUG && _d("Got user, host, ip", $user, $host, $ip); | ||
1503 | 1499 | push @properties, 'user', $user, 'host', $host, 'ip', $ip; | ||
1504 | 1500 | ++$got_uh; | ||
1505 | 1501 | } | ||
1506 | 1502 | |||
1507 | 1503 | elsif (!$got_ac && $line =~ m/^# (?:administrator command:.*)$/) { | ||
1508 | 1504 | PTDEBUG && _d("Got admin command"); | ||
1509 | 1505 | $line =~ s/^#\s+//; # string leading "# ". | ||
1510 | 1506 | push @properties, 'cmd', 'Admin', 'arg', $line; | ||
1511 | 1507 | push @properties, 'bytes', length($properties[-1]); | ||
1512 | 1508 | ++$found_arg; | ||
1513 | 1509 | ++$got_ac; | ||
1514 | 1510 | } | ||
1515 | 1511 | |||
1516 | 1512 | elsif ( $line =~ m/^# +[A-Z][A-Za-z_]+: \S+/ ) { # Make the test cheap! | ||
1517 | 1513 | PTDEBUG && _d("Got some line with properties"); | ||
1518 | 1514 | |||
1519 | 1515 | if ( $line =~ m/Schema:\s+\w+: / ) { | ||
1520 | 1516 | PTDEBUG && _d('Removing empty Schema attrib'); | ||
1521 | 1517 | $line =~ s/Schema:\s+//; | ||
1522 | 1518 | PTDEBUG && _d($line); | ||
1523 | 1519 | } | ||
1524 | 1520 | |||
1525 | 1521 | my @temp = $line =~ m/(\w+):\s+(\S+|\Z)/g; | ||
1526 | 1522 | push @properties, @temp; | ||
1527 | 1523 | } | ||
1528 | 1524 | |||
1529 | 1525 | elsif ( !$got_db && (my ( $db ) = $line =~ m/^use ([^;]+)/ ) ) { | ||
1530 | 1526 | PTDEBUG && _d("Got a default database:", $db); | ||
1531 | 1527 | push @properties, 'db', $db; | ||
1532 | 1528 | ++$got_db; | ||
1533 | 1529 | } | ||
1534 | 1530 | |||
1535 | 1531 | elsif (!$got_set && (my ($setting) = $line =~ m/^SET\s+([^;]*)/)) { | ||
1536 | 1532 | PTDEBUG && _d("Got some setting:", $setting); | ||
1537 | 1533 | push @properties, split(/,|\s*=\s*/, $setting); | ||
1538 | 1534 | ++$got_set; | ||
1539 | 1535 | } | ||
1540 | 1536 | |||
1541 | 1537 | if ( !$found_arg && $pos == $len ) { | ||
1542 | 1538 | PTDEBUG && _d("Did not find arg, looking for special cases"); | ||
1543 | 1539 | local $INPUT_RECORD_SEPARATOR = ";\n"; | ||
1544 | 1540 | if ( defined(my $l = $next_event->()) ) { | ||
1545 | 1541 | chomp $l; | ||
1546 | 1542 | $l =~ s/^\s+//; | ||
1547 | 1543 | PTDEBUG && _d("Found admin statement", $l); | ||
1548 | 1544 | push @properties, 'cmd', 'Admin', 'arg', $l; | ||
1549 | 1545 | push @properties, 'bytes', length($properties[-1]); | ||
1550 | 1546 | $found_arg++; | ||
1551 | 1547 | } | ||
1552 | 1548 | else { | ||
1553 | 1549 | PTDEBUG && _d("I can't figure out what to do with this line"); | ||
1554 | 1550 | next EVENT; | ||
1555 | 1551 | } | ||
1556 | 1552 | } | ||
1557 | 1553 | } | ||
1558 | 1554 | else { | ||
1559 | 1555 | PTDEBUG && _d("Got the query/arg line"); | ||
1560 | 1556 | my $arg = substr($stmt, $pos - length($line)); | ||
1561 | 1557 | push @properties, 'arg', $arg, 'bytes', length($arg); | ||
1562 | 1558 | if ( $args{misc} && $args{misc}->{embed} | ||
1563 | 1559 | && ( my ($e) = $arg =~ m/($args{misc}->{embed})/) | ||
1564 | 1560 | ) { | ||
1565 | 1561 | push @properties, $e =~ m/$args{misc}->{capture}/g; | ||
1566 | 1562 | } | ||
1567 | 1563 | last LINE; | ||
1568 | 1564 | } | ||
1569 | 1565 | } | ||
1570 | 1566 | |||
1571 | 1567 | PTDEBUG && _d('Properties of event:', Dumper(\@properties)); | ||
1572 | 1568 | my $event = { @properties }; | ||
1573 | 1569 | if ( $args{stats} ) { | ||
1574 | 1570 | $args{stats}->{events_read}++; | ||
1575 | 1571 | $args{stats}->{events_parsed}++; | ||
1576 | 1572 | } | ||
1577 | 1573 | return $event; | ||
1578 | 1574 | } # EVENT | ||
1579 | 1575 | |||
1580 | 1576 | @$pending = (); | ||
1581 | 1577 | $args{oktorun}->(0) if $args{oktorun}; | ||
1582 | 1578 | return; | ||
1583 | 1579 | } | ||
1584 | 1580 | |||
1585 | 1581 | sub _d { | ||
1586 | 1582 | my ($package, undef, $line) = caller 0; | ||
1587 | 1583 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
1588 | 1584 | map { defined $_ ? $_ : 'undef' } | ||
1589 | 1585 | @_; | ||
1590 | 1586 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
1591 | 1587 | } | ||
1592 | 1588 | |||
1593 | 1589 | 1; | ||
1594 | 1590 | } | ||
1595 | 1591 | # ########################################################################### | ||
1596 | 1592 | # End SlowLogParser package | ||
1597 | 1593 | # ########################################################################### | ||
1598 | 1594 | |||
1599 | 1595 | # ########################################################################### | ||
1600 | 1596 | # Transformers package | ||
1601 | 1597 | # This package is a copy without comments from the original. The original | ||
1602 | 1598 | # with comments and its test file can be found in the Bazaar repository at, | ||
1603 | 1599 | # lib/Transformers.pm | ||
1604 | 1600 | # t/lib/Transformers.t | ||
1605 | 1601 | # See https://launchpad.net/percona-toolkit for more information. | ||
1606 | 1602 | # ########################################################################### | ||
1607 | 1603 | { | ||
1608 | 1604 | package Transformers; | ||
1609 | 1605 | |||
1610 | 1606 | use strict; | ||
1611 | 1607 | use warnings FATAL => 'all'; | ||
1612 | 1608 | use English qw(-no_match_vars); | ||
1613 | 1609 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
1614 | 1610 | |||
1615 | 1611 | use Time::Local qw(timegm timelocal); | ||
1616 | 1612 | use Digest::MD5 qw(md5_hex); | ||
1617 | 1613 | |||
1618 | 1614 | require Exporter; | ||
1619 | 1615 | our @ISA = qw(Exporter); | ||
1620 | 1616 | our %EXPORT_TAGS = (); | ||
1621 | 1617 | our @EXPORT = (); | ||
1622 | 1618 | our @EXPORT_OK = qw( | ||
1623 | 1619 | micro_t | ||
1624 | 1620 | percentage_of | ||
1625 | 1621 | secs_to_time | ||
1626 | 1622 | time_to_secs | ||
1627 | 1623 | shorten | ||
1628 | 1624 | ts | ||
1629 | 1625 | parse_timestamp | ||
1630 | 1626 | unix_timestamp | ||
1631 | 1627 | any_unix_timestamp | ||
1632 | 1628 | make_checksum | ||
1633 | 1629 | crc32 | ||
1634 | 1630 | ); | ||
1635 | 1631 | |||
1636 | 1632 | our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/; | ||
1637 | 1633 | our $proper_ts = qr/(\d\d\d\d)-(\d\d)-(\d\d)[T ](\d\d):(\d\d):(\d\d)(\.\d+)?/; | ||
1638 | 1634 | our $n_ts = qr/(\d{1,5})([shmd]?)/; # Limit \d{1,5} because \d{6} looks | ||
1639 | 1635 | |||
1640 | 1636 | sub micro_t { | ||
1641 | 1637 | my ( $t, %args ) = @_; | ||
1642 | 1638 | my $p_ms = defined $args{p_ms} ? $args{p_ms} : 0; # precision for ms vals | ||
1643 | 1639 | my $p_s = defined $args{p_s} ? $args{p_s} : 0; # precision for s vals | ||
1644 | 1640 | my $f; | ||
1645 | 1641 | |||
1646 | 1642 | $t = 0 if $t < 0; | ||
1647 | 1643 | |||
1648 | 1644 | $t = sprintf('%.17f', $t) if $t =~ /e/; | ||
1649 | 1645 | |||
1650 | 1646 | $t =~ s/\.(\d{1,6})\d*/\.$1/; | ||
1651 | 1647 | |||
1652 | 1648 | if ($t > 0 && $t <= 0.000999) { | ||
1653 | 1649 | $f = ($t * 1000000) . 'us'; | ||
1654 | 1650 | } | ||
1655 | 1651 | elsif ($t >= 0.001000 && $t <= 0.999999) { | ||
1656 | 1652 | $f = sprintf("%.${p_ms}f", $t * 1000); | ||
1657 | 1653 | $f = ($f * 1) . 'ms'; # * 1 to remove insignificant zeros | ||
1658 | 1654 | } | ||
1659 | 1655 | elsif ($t >= 1) { | ||
1660 | 1656 | $f = sprintf("%.${p_s}f", $t); | ||
1661 | 1657 | $f = ($f * 1) . 's'; # * 1 to remove insignificant zeros | ||
1662 | 1658 | } | ||
1663 | 1659 | else { | ||
1664 | 1660 | $f = 0; # $t should = 0 at this point | ||
1665 | 1661 | } | ||
1666 | 1662 | |||
1667 | 1663 | return $f; | ||
1668 | 1664 | } | ||
1669 | 1665 | |||
1670 | 1666 | sub percentage_of { | ||
1671 | 1667 | my ( $is, $of, %args ) = @_; | ||
1672 | 1668 | my $p = $args{p} || 0; # float precision | ||
1673 | 1669 | my $fmt = $p ? "%.${p}f" : "%d"; | ||
1674 | 1670 | return sprintf $fmt, ($is * 100) / ($of ||= 1); | ||
1675 | 1671 | } | ||
1676 | 1672 | |||
1677 | 1673 | sub secs_to_time { | ||
1678 | 1674 | my ( $secs, $fmt ) = @_; | ||
1679 | 1675 | $secs ||= 0; | ||
1680 | 1676 | return '00:00' unless $secs; | ||
1681 | 1677 | |||
1682 | 1678 | $fmt ||= $secs >= 86_400 ? 'd' | ||
1683 | 1679 | : $secs >= 3_600 ? 'h' | ||
1684 | 1680 | : 'm'; | ||
1685 | 1681 | |||
1686 | 1682 | return | ||
1687 | 1683 | $fmt eq 'd' ? sprintf( | ||
1688 | 1684 | "%d+%02d:%02d:%02d", | ||
1689 | 1685 | int($secs / 86_400), | ||
1690 | 1686 | int(($secs % 86_400) / 3_600), | ||
1691 | 1687 | int(($secs % 3_600) / 60), | ||
1692 | 1688 | $secs % 60) | ||
1693 | 1689 | : $fmt eq 'h' ? sprintf( | ||
1694 | 1690 | "%02d:%02d:%02d", | ||
1695 | 1691 | int(($secs % 86_400) / 3_600), | ||
1696 | 1692 | int(($secs % 3_600) / 60), | ||
1697 | 1693 | $secs % 60) | ||
1698 | 1694 | : sprintf( | ||
1699 | 1695 | "%02d:%02d", | ||
1700 | 1696 | int(($secs % 3_600) / 60), | ||
1701 | 1697 | $secs % 60); | ||
1702 | 1698 | } | ||
1703 | 1699 | |||
1704 | 1700 | sub time_to_secs { | ||
1705 | 1701 | my ( $val, $default_suffix ) = @_; | ||
1706 | 1702 | die "I need a val argument" unless defined $val; | ||
1707 | 1703 | my $t = 0; | ||
1708 | 1704 | my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/; | ||
1709 | 1705 | $suffix = $suffix || $default_suffix || 's'; | ||
1710 | 1706 | if ( $suffix =~ m/[smhd]/ ) { | ||
1711 | 1707 | $t = $suffix eq 's' ? $num * 1 # Seconds | ||
1712 | 1708 | : $suffix eq 'm' ? $num * 60 # Minutes | ||
1713 | 1709 | : $suffix eq 'h' ? $num * 3600 # Hours | ||
1714 | 1710 | : $num * 86400; # Days | ||
1715 | 1711 | |||
1716 | 1712 | $t *= -1 if $prefix && $prefix eq '-'; | ||
1717 | 1713 | } | ||
1718 | 1714 | else { | ||
1719 | 1715 | die "Invalid suffix for $val: $suffix"; | ||
1720 | 1716 | } | ||
1721 | 1717 | return $t; | ||
1722 | 1718 | } | ||
1723 | 1719 | |||
1724 | 1720 | sub shorten { | ||
1725 | 1721 | my ( $num, %args ) = @_; | ||
1726 | 1722 | my $p = defined $args{p} ? $args{p} : 2; # float precision | ||
1727 | 1723 | my $d = defined $args{d} ? $args{d} : 1_024; # divisor | ||
1728 | 1724 | my $n = 0; | ||
1729 | 1725 | my @units = ('', qw(k M G T P E Z Y)); | ||
1730 | 1726 | while ( $num >= $d && $n < @units - 1 ) { | ||
1731 | 1727 | $num /= $d; | ||
1732 | 1728 | ++$n; | ||
1733 | 1729 | } | ||
1734 | 1730 | return sprintf( | ||
1735 | 1731 | $num =~ m/\./ || $n | ||
1736 | 1732 | ? "%.${p}f%s" | ||
1737 | 1733 | : '%d', | ||
1738 | 1734 | $num, $units[$n]); | ||
1739 | 1735 | } | ||
1740 | 1736 | |||
1741 | 1737 | sub ts { | ||
1742 | 1738 | my ( $time, $gmt ) = @_; | ||
1743 | 1739 | my ( $sec, $min, $hour, $mday, $mon, $year ) | ||
1744 | 1740 | = $gmt ? gmtime($time) : localtime($time); | ||
1745 | 1741 | $mon += 1; | ||
1746 | 1742 | $year += 1900; | ||
1747 | 1743 | my $val = sprintf("%d-%02d-%02dT%02d:%02d:%02d", | ||
1748 | 1744 | $year, $mon, $mday, $hour, $min, $sec); | ||
1749 | 1745 | if ( my ($us) = $time =~ m/(\.\d+)$/ ) { | ||
1750 | 1746 | $us = sprintf("%.6f", $us); | ||
1751 | 1747 | $us =~ s/^0\././; | ||
1752 | 1748 | $val .= $us; | ||
1753 | 1749 | } | ||
1754 | 1750 | return $val; | ||
1755 | 1751 | } | ||
1756 | 1752 | |||
1757 | 1753 | sub parse_timestamp { | ||
1758 | 1754 | my ( $val ) = @_; | ||
1759 | 1755 | if ( my($y, $m, $d, $h, $i, $s, $f) | ||
1760 | 1756 | = $val =~ m/^$mysql_ts$/ ) | ||
1761 | 1757 | { | ||
1762 | 1758 | return sprintf "%d-%02d-%02d %02d:%02d:" | ||
1763 | 1759 | . (defined $f ? '%09.6f' : '%02d'), | ||
1764 | 1760 | $y + 2000, $m, $d, $h, $i, (defined $f ? $s + $f : $s); | ||
1765 | 1761 | } | ||
1766 | 1762 | return $val; | ||
1767 | 1763 | } | ||
1768 | 1764 | |||
1769 | 1765 | sub unix_timestamp { | ||
1770 | 1766 | my ( $val, $gmt ) = @_; | ||
1771 | 1767 | if ( my($y, $m, $d, $h, $i, $s, $us) = $val =~ m/^$proper_ts$/ ) { | ||
1772 | 1768 | $val = $gmt | ||
1773 | 1769 | ? timegm($s, $i, $h, $d, $m - 1, $y) | ||
1774 | 1770 | : timelocal($s, $i, $h, $d, $m - 1, $y); | ||
1775 | 1771 | if ( defined $us ) { | ||
1776 | 1772 | $us = sprintf('%.6f', $us); | ||
1777 | 1773 | $us =~ s/^0\././; | ||
1778 | 1774 | $val .= $us; | ||
1779 | 1775 | } | ||
1780 | 1776 | } | ||
1781 | 1777 | return $val; | ||
1782 | 1778 | } | ||
1783 | 1779 | |||
1784 | 1780 | sub any_unix_timestamp { | ||
1785 | 1781 | my ( $val, $callback ) = @_; | ||
1786 | 1782 | |||
1787 | 1783 | if ( my ($n, $suffix) = $val =~ m/^$n_ts$/ ) { | ||
1788 | 1784 | $n = $suffix eq 's' ? $n # Seconds | ||
1789 | 1785 | : $suffix eq 'm' ? $n * 60 # Minutes | ||
1790 | 1786 | : $suffix eq 'h' ? $n * 3600 # Hours | ||
1791 | 1787 | : $suffix eq 'd' ? $n * 86400 # Days | ||
1792 | 1788 | : $n; # default: Seconds | ||
1793 | 1789 | PTDEBUG && _d('ts is now - N[shmd]:', $n); | ||
1794 | 1790 | return time - $n; | ||
1795 | 1791 | } | ||
1796 | 1792 | elsif ( $val =~ m/^\d{9,}/ ) { | ||
1797 | 1793 | PTDEBUG && _d('ts is already a unix timestamp'); | ||
1798 | 1794 | return $val; | ||
1799 | 1795 | } | ||
1800 | 1796 | elsif ( my ($ymd, $hms) = $val =~ m/^(\d{6})(?:\s+(\d+:\d+:\d+))?/ ) { | ||
1801 | 1797 | PTDEBUG && _d('ts is MySQL slow log timestamp'); | ||
1802 | 1798 | $val .= ' 00:00:00' unless $hms; | ||
1803 | 1799 | return unix_timestamp(parse_timestamp($val)); | ||
1804 | 1800 | } | ||
1805 | 1801 | elsif ( ($ymd, $hms) = $val =~ m/^(\d{4}-\d\d-\d\d)(?:[T ](\d+:\d+:\d+))?/) { | ||
1806 | 1802 | PTDEBUG && _d('ts is properly formatted timestamp'); | ||
1807 | 1803 | $val .= ' 00:00:00' unless $hms; | ||
1808 | 1804 | return unix_timestamp($val); | ||
1809 | 1805 | } | ||
1810 | 1806 | else { | ||
1811 | 1807 | PTDEBUG && _d('ts is MySQL expression'); | ||
1812 | 1808 | return $callback->($val) if $callback && ref $callback eq 'CODE'; | ||
1813 | 1809 | } | ||
1814 | 1810 | |||
1815 | 1811 | PTDEBUG && _d('Unknown ts type:', $val); | ||
1816 | 1812 | return; | ||
1817 | 1813 | } | ||
1818 | 1814 | |||
1819 | 1815 | sub make_checksum { | ||
1820 | 1816 | my ( $val ) = @_; | ||
1821 | 1817 | my $checksum = uc substr(md5_hex($val), -16); | ||
1822 | 1818 | PTDEBUG && _d($checksum, 'checksum for', $val); | ||
1823 | 1819 | return $checksum; | ||
1824 | 1820 | } | ||
1825 | 1821 | |||
1826 | 1822 | sub crc32 { | ||
1827 | 1823 | my ( $string ) = @_; | ||
1828 | 1824 | return unless $string; | ||
1829 | 1825 | my $poly = 0xEDB88320; | ||
1830 | 1826 | my $crc = 0xFFFFFFFF; | ||
1831 | 1827 | foreach my $char ( split(//, $string) ) { | ||
1832 | 1828 | my $comp = ($crc ^ ord($char)) & 0xFF; | ||
1833 | 1829 | for ( 1 .. 8 ) { | ||
1834 | 1830 | $comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1; | ||
1835 | 1831 | } | ||
1836 | 1832 | $crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp; | ||
1837 | 1833 | } | ||
1838 | 1834 | return $crc ^ 0xFFFFFFFF; | ||
1839 | 1835 | } | ||
1840 | 1836 | |||
1841 | 1837 | sub _d { | ||
1842 | 1838 | my ($package, undef, $line) = caller 0; | ||
1843 | 1839 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
1844 | 1840 | map { defined $_ ? $_ : 'undef' } | ||
1845 | 1841 | @_; | ||
1846 | 1842 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
1847 | 1843 | } | ||
1848 | 1844 | |||
1849 | 1845 | 1; | ||
1850 | 1846 | } | ||
1851 | 1847 | # ########################################################################### | ||
1852 | 1848 | # End Transformers package | ||
1853 | 1849 | # ########################################################################### | ||
1854 | 1850 | |||
1855 | 1851 | # ########################################################################### | ||
1856 | 1852 | # QueryRewriter package | ||
1857 | 1853 | # This package is a copy without comments from the original. The original | ||
1858 | 1854 | # with comments and its test file can be found in the Bazaar repository at, | ||
1859 | 1855 | # lib/QueryRewriter.pm | ||
1860 | 1856 | # t/lib/QueryRewriter.t | ||
1861 | 1857 | # See https://launchpad.net/percona-toolkit for more information. | ||
1862 | 1858 | # ########################################################################### | ||
1863 | 1859 | { | ||
1864 | 1860 | package QueryRewriter; | ||
1865 | 1861 | |||
1866 | 1862 | use strict; | ||
1867 | 1863 | use warnings FATAL => 'all'; | ||
1868 | 1864 | use English qw(-no_match_vars); | ||
1869 | 1865 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
1870 | 1866 | |||
1871 | 1867 | our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT | ||
1872 | 1868 | |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi; | ||
1873 | 1869 | my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly! | ||
1874 | 1870 | my $bal; | ||
1875 | 1871 | $bal = qr/ | ||
1876 | 1872 | \( | ||
1877 | 1873 | (?: | ||
1878 | 1874 | (?> [^()]+ ) # Non-parens without backtracking | ||
1879 | 1875 | | | ||
1880 | 1876 | (??{ $bal }) # Group with matching parens | ||
1881 | 1877 | )* | ||
1882 | 1878 | \) | ||
1883 | 1879 | /x; | ||
1884 | 1880 | |||
1885 | 1881 | my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments | ||
1886 | 1882 | my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */ | ||
1887 | 1883 | my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */ | ||
1888 | 1884 | my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW | ||
1889 | 1885 | |||
1890 | 1886 | |||
1891 | 1887 | sub new { | ||
1892 | 1888 | my ( $class, %args ) = @_; | ||
1893 | 1889 | my $self = { %args }; | ||
1894 | 1890 | return bless $self, $class; | ||
1895 | 1891 | } | ||
1896 | 1892 | |||
1897 | 1893 | sub strip_comments { | ||
1898 | 1894 | my ( $self, $query ) = @_; | ||
1899 | 1895 | return unless $query; | ||
1900 | 1896 | $query =~ s/$olc_re//go; | ||
1901 | 1897 | $query =~ s/$mlc_re//go; | ||
1902 | 1898 | if ( $query =~ m/$vlc_rf/i ) { # contains show + version | ||
1903 | 1899 | $query =~ s/$vlc_re//go; | ||
1904 | 1900 | } | ||
1905 | 1901 | return $query; | ||
1906 | 1902 | } | ||
1907 | 1903 | |||
1908 | 1904 | sub shorten { | ||
1909 | 1905 | my ( $self, $query, $length ) = @_; | ||
1910 | 1906 | $query =~ s{ | ||
1911 | 1907 | \A( | ||
1912 | 1908 | (?:INSERT|REPLACE) | ||
1913 | 1909 | (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)? | ||
1914 | 1910 | (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\) | ||
1915 | 1911 | ) | ||
1916 | 1912 | \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)} | ||
1917 | 1913 | {$1 /*... omitted ...*/$2}xsi; | ||
1918 | 1914 | |||
1919 | 1915 | return $query unless $query =~ m/IN\s*\(\s*(?!select)/i; | ||
1920 | 1916 | |||
1921 | 1917 | my $last_length = 0; | ||
1922 | 1918 | my $query_length = length($query); | ||
1923 | 1919 | while ( | ||
1924 | 1920 | $length > 0 | ||
1925 | 1921 | && $query_length > $length | ||
1926 | 1922 | && $query_length < ( $last_length || $query_length + 1 ) | ||
1927 | 1923 | ) { | ||
1928 | 1924 | $last_length = $query_length; | ||
1929 | 1925 | $query =~ s{ | ||
1930 | 1926 | (\bIN\s*\() # The opening of an IN list | ||
1931 | 1927 | ([^\)]+) # Contents of the list, assuming no item contains paren | ||
1932 | 1928 | (?=\)) # Close of the list | ||
1933 | 1929 | } | ||
1934 | 1930 | { | ||
1935 | 1931 | $1 . __shorten($2) | ||
1936 | 1932 | }gexsi; | ||
1937 | 1933 | } | ||
1938 | 1934 | |||
1939 | 1935 | return $query; | ||
1940 | 1936 | } | ||
1941 | 1937 | |||
1942 | 1938 | sub __shorten { | ||
1943 | 1939 | my ( $snippet ) = @_; | ||
1944 | 1940 | my @vals = split(/,/, $snippet); | ||
1945 | 1941 | return $snippet unless @vals > 20; | ||
1946 | 1942 | my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items | ||
1947 | 1943 | return | ||
1948 | 1944 | join(',', @keep) | ||
1949 | 1945 | . "/*... omitted " | ||
1950 | 1946 | . scalar(@vals) | ||
1951 | 1947 | . " items ...*/"; | ||
1952 | 1948 | } | ||
1953 | 1949 | |||
1954 | 1950 | sub fingerprint { | ||
1955 | 1951 | my ( $self, $query ) = @_; | ||
1956 | 1952 | |||
1957 | 1953 | $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query | ||
1958 | 1954 | && return 'mysqldump'; | ||
1959 | 1955 | $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # pt-table-checksum, etc query | ||
1960 | 1956 | && return 'percona-toolkit'; | ||
1961 | 1957 | $query =~ m/\Aadministrator command: / | ||
1962 | 1958 | && return $query; | ||
1963 | 1959 | $query =~ m/\A\s*(call\s+\S+)\(/i | ||
1964 | 1960 | && return lc($1); # Warning! $1 used, be careful. | ||
1965 | 1961 | if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) { | ||
1966 | 1962 | $query = $beginning; # Shorten multi-value INSERT statements ASAP | ||
1967 | 1963 | } | ||
1968 | 1964 | |||
1969 | 1965 | $query =~ s/$olc_re//go; | ||
1970 | 1966 | $query =~ s/$mlc_re//go; | ||
1971 | 1967 | $query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE | ||
1972 | 1968 | && return $query; | ||
1973 | 1969 | |||
1974 | 1970 | $query =~ s/\\["']//g; # quoted strings | ||
1975 | 1971 | $query =~ s/".*?"/?/sg; # quoted strings | ||
1976 | 1972 | $query =~ s/'.*?'/?/sg; # quoted strings | ||
1977 | 1973 | |||
1978 | 1974 | if ( $self->{match_md5_checksums} ) { | ||
1979 | 1975 | $query =~ s/([._-])[a-f0-9]{32}/$1?/g; | ||
1980 | 1976 | } | ||
1981 | 1977 | |||
1982 | 1978 | if ( !$self->{match_embedded_numbers} ) { | ||
1983 | 1979 | $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g; | ||
1984 | 1980 | } | ||
1985 | 1981 | else { | ||
1986 | 1982 | $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g; | ||
1987 | 1983 | } | ||
1988 | 1984 | |||
1989 | 1985 | if ( $self->{match_md5_checksums} ) { | ||
1990 | 1986 | $query =~ s/[xb+-]\?/?/g; | ||
1991 | 1987 | } | ||
1992 | 1988 | else { | ||
1993 | 1989 | $query =~ s/[xb.+-]\?/?/g; | ||
1994 | 1990 | } | ||
1995 | 1991 | |||
1996 | 1992 | $query =~ s/\A\s+//; # Chop off leading whitespace | ||
1997 | 1993 | chomp $query; # Kill trailing whitespace | ||
1998 | 1994 | $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace | ||
1999 | 1995 | $query = lc $query; | ||
2000 | 1996 | $query =~ s/\bnull\b/?/g; # Get rid of NULLs | ||
2001 | 1997 | $query =~ s{ # Collapse IN and VALUES lists | ||
2002 | 1998 | \b(in|values?)(?:[\s,]*\([\s?,]*\))+ | ||
2003 | 1999 | } | ||
2004 | 2000 | {$1(?+)}gx; | ||
2005 | 2001 | $query =~ s{ # Collapse UNION | ||
2006 | 2002 | \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+ | ||
2007 | 2003 | } | ||
2008 | 2004 | {$1 /*repeat$2*/}xg; | ||
2009 | 2005 | $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT | ||
2010 | 2006 | |||
2011 | 2007 | if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause | ||
2012 | 2008 | 1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query; | ||
2013 | 2009 | } | ||
2014 | 2010 | |||
2015 | 2011 | return $query; | ||
2016 | 2012 | } | ||
2017 | 2013 | |||
2018 | 2014 | sub distill_verbs { | ||
2019 | 2015 | my ( $self, $query ) = @_; | ||
2020 | 2016 | |||
2021 | 2017 | $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1"; | ||
2022 | 2018 | $query =~ m/\A\s*use\s+/ && return "USE"; | ||
2023 | 2019 | $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK"; | ||
2024 | 2020 | $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1"; | ||
2025 | 2021 | |||
2026 | 2022 | if ( $query =~ m/\Aadministrator command:/ ) { | ||
2027 | 2023 | $query =~ s/administrator command:/ADMIN/; | ||
2028 | 2024 | $query = uc $query; | ||
2029 | 2025 | return $query; | ||
2030 | 2026 | } | ||
2031 | 2027 | |||
2032 | 2028 | $query = $self->strip_comments($query); | ||
2033 | 2029 | |||
2034 | 2030 | if ( $query =~ m/\A\s*SHOW\s+/i ) { | ||
2035 | 2031 | PTDEBUG && _d($query); | ||
2036 | 2032 | |||
2037 | 2033 | $query = uc $query; | ||
2038 | 2034 | $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g; | ||
2039 | 2035 | $query =~ s/\s+COUNT[^)]+\)//g; | ||
2040 | 2036 | |||
2041 | 2037 | $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms; | ||
2042 | 2038 | |||
2043 | 2039 | $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s; | ||
2044 | 2040 | $query =~ s/\s+/ /g; | ||
2045 | 2041 | PTDEBUG && _d($query); | ||
2046 | 2042 | return $query; | ||
2047 | 2043 | } | ||
2048 | 2044 | |||
2049 | 2045 | eval $QueryParser::data_def_stmts; | ||
2050 | 2046 | eval $QueryParser::tbl_ident; | ||
2051 | 2047 | my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i; | ||
2052 | 2048 | if ( $dds) { | ||
2053 | 2049 | my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i; | ||
2054 | 2050 | $obj = uc $obj if $obj; | ||
2055 | 2051 | PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj); | ||
2056 | 2052 | my ($db_or_tbl) | ||
2057 | 2053 | = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i; | ||
2058 | 2054 | PTDEBUG && _d('Matches db or table:', $db_or_tbl); | ||
2059 | 2055 | return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl; | ||
2060 | 2056 | } | ||
2061 | 2057 | |||
2062 | 2058 | my @verbs = $query =~ m/\b($verbs)\b/gio; | ||
2063 | 2059 | @verbs = do { | ||
2064 | 2060 | my $last = ''; | ||
2065 | 2061 | grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs; | ||
2066 | 2062 | }; | ||
2067 | 2063 | |||
2068 | 2064 | if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) { | ||
2069 | 2065 | PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]); | ||
2070 | 2066 | my $union = grep { $_ eq 'UNION' } @verbs; | ||
2071 | 2067 | @verbs = $union ? qw(SELECT UNION) : qw(SELECT); | ||
2072 | 2068 | } | ||
2073 | 2069 | |||
2074 | 2070 | my $verb_str = join(q{ }, @verbs); | ||
2075 | 2071 | return $verb_str; | ||
2076 | 2072 | } | ||
2077 | 2073 | |||
2078 | 2074 | sub __distill_tables { | ||
2079 | 2075 | my ( $self, $query, $table, %args ) = @_; | ||
2080 | 2076 | my $qp = $args{QueryParser} || $self->{QueryParser}; | ||
2081 | 2077 | die "I need a QueryParser argument" unless $qp; | ||
2082 | 2078 | |||
2083 | 2079 | my @tables = map { | ||
2084 | 2080 | $_ =~ s/`//g; | ||
2085 | 2081 | $_ =~ s/(_?)[0-9]+/$1?/g; | ||
2086 | 2082 | $_; | ||
2087 | 2083 | } grep { defined $_ } $qp->get_tables($query); | ||
2088 | 2084 | |||
2089 | 2085 | push @tables, $table if $table; | ||
2090 | 2086 | |||
2091 | 2087 | @tables = do { | ||
2092 | 2088 | my $last = ''; | ||
2093 | 2089 | grep { my $pass = $_ ne $last; $last = $_; $pass } @tables; | ||
2094 | 2090 | }; | ||
2095 | 2091 | |||
2096 | 2092 | return @tables; | ||
2097 | 2093 | } | ||
2098 | 2094 | |||
2099 | 2095 | sub distill { | ||
2100 | 2096 | my ( $self, $query, %args ) = @_; | ||
2101 | 2097 | |||
2102 | 2098 | if ( $args{generic} ) { | ||
2103 | 2099 | my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/; | ||
2104 | 2100 | return '' unless $cmd; | ||
2105 | 2101 | $query = (uc $cmd) . ($arg ? " $arg" : ''); | ||
2106 | 2102 | } | ||
2107 | 2103 | else { | ||
2108 | 2104 | my ($verbs, $table) = $self->distill_verbs($query, %args); | ||
2109 | 2105 | |||
2110 | 2106 | if ( $verbs && $verbs =~ m/^SHOW/ ) { | ||
2111 | 2107 | my %alias_for = qw( | ||
2112 | 2108 | SCHEMA DATABASE | ||
2113 | 2109 | KEYS INDEX | ||
2114 | 2110 | INDEXES INDEX | ||
2115 | 2111 | ); | ||
2116 | 2112 | map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for; | ||
2117 | 2113 | $query = $verbs; | ||
2118 | 2114 | } | ||
2119 | 2115 | else { | ||
2120 | 2116 | my @tables = $self->__distill_tables($query, $table, %args); | ||
2121 | 2117 | $query = join(q{ }, $verbs, @tables); | ||
2122 | 2118 | } | ||
2123 | 2119 | } | ||
2124 | 2120 | |||
2125 | 2121 | if ( $args{trf} ) { | ||
2126 | 2122 | $query = $args{trf}->($query, %args); | ||
2127 | 2123 | } | ||
2128 | 2124 | |||
2129 | 2125 | return $query; | ||
2130 | 2126 | } | ||
2131 | 2127 | |||
2132 | 2128 | sub convert_to_select { | ||
2133 | 2129 | my ( $self, $query ) = @_; | ||
2134 | 2130 | return unless $query; | ||
2135 | 2131 | |||
2136 | 2132 | return if $query =~ m/=\s*\(\s*SELECT /i; | ||
2137 | 2133 | |||
2138 | 2134 | $query =~ s{ | ||
2139 | 2135 | \A.*? | ||
2140 | 2136 | update(?:\s+(?:low_priority|ignore))?\s+(.*?) | ||
2141 | 2137 | \s+set\b(.*?) | ||
2142 | 2138 | (?:\s*where\b(.*?))? | ||
2143 | 2139 | (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)? | ||
2144 | 2140 | \Z | ||
2145 | 2141 | } | ||
2146 | 2142 | {__update_to_select($1, $2, $3, $4)}exsi | ||
2147 | 2143 | || $query =~ s{ | ||
2148 | 2144 | \A.*? | ||
2149 | 2145 | (?:insert(?:\s+ignore)?|replace)\s+ | ||
2150 | 2146 | .*?\binto\b(.*?)\(([^\)]+)\)\s* | ||
2151 | 2147 | values?\s*(\(.*?\))\s* | ||
2152 | 2148 | (?:\blimit\b|on\s+duplicate\s+key.*)?\s* | ||
2153 | 2149 | \Z | ||
2154 | 2150 | } | ||
2155 | 2151 | {__insert_to_select($1, $2, $3)}exsi | ||
2156 | 2152 | || $query =~ s{ | ||
2157 | 2153 | \A.*? | ||
2158 | 2154 | (?:insert(?:\s+ignore)?|replace)\s+ | ||
2159 | 2155 | (?:.*?\binto)\b(.*?)\s* | ||
2160 | 2156 | set\s+(.*?)\s* | ||
2161 | 2157 | (?:\blimit\b|on\s+duplicate\s+key.*)?\s* | ||
2162 | 2158 | \Z | ||
2163 | 2159 | } | ||
2164 | 2160 | {__insert_to_select_with_set($1, $2)}exsi | ||
2165 | 2161 | || $query =~ s{ | ||
2166 | 2162 | \A.*? | ||
2167 | 2163 | delete\s+(.*?) | ||
2168 | 2164 | \bfrom\b(.*) | ||
2169 | 2165 | \Z | ||
2170 | 2166 | } | ||
2171 | 2167 | {__delete_to_select($1, $2)}exsi; | ||
2172 | 2168 | $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si; | ||
2173 | 2169 | $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism; | ||
2174 | 2170 | return $query; | ||
2175 | 2171 | } | ||
2176 | 2172 | |||
2177 | 2173 | sub convert_select_list { | ||
2178 | 2174 | my ( $self, $query ) = @_; | ||
2179 | 2175 | $query =~ s{ | ||
2180 | 2176 | \A\s*select(.*?)\bfrom\b | ||
2181 | 2177 | } | ||
2182 | 2178 | {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi; | ||
2183 | 2179 | return $query; | ||
2184 | 2180 | } | ||
2185 | 2181 | |||
2186 | 2182 | sub __delete_to_select { | ||
2187 | 2183 | my ( $delete, $join ) = @_; | ||
2188 | 2184 | if ( $join =~ m/\bjoin\b/ ) { | ||
2189 | 2185 | return "select 1 from $join"; | ||
2190 | 2186 | } | ||
2191 | 2187 | return "select * from $join"; | ||
2192 | 2188 | } | ||
2193 | 2189 | |||
2194 | 2190 | sub __insert_to_select { | ||
2195 | 2191 | my ( $tbl, $cols, $vals ) = @_; | ||
2196 | 2192 | PTDEBUG && _d('Args:', @_); | ||
2197 | 2193 | my @cols = split(/,/, $cols); | ||
2198 | 2194 | PTDEBUG && _d('Cols:', @cols); | ||
2199 | 2195 | $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens | ||
2200 | 2196 | my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g; | ||
2201 | 2197 | PTDEBUG && _d('Vals:', @vals); | ||
2202 | 2198 | if ( @cols == @vals ) { | ||
2203 | 2199 | return "select * from $tbl where " | ||
2204 | 2200 | . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols)); | ||
2205 | 2201 | } | ||
2206 | 2202 | else { | ||
2207 | 2203 | return "select * from $tbl limit 1"; | ||
2208 | 2204 | } | ||
2209 | 2205 | } | ||
2210 | 2206 | |||
2211 | 2207 | sub __insert_to_select_with_set { | ||
2212 | 2208 | my ( $from, $set ) = @_; | ||
2213 | 2209 | $set =~ s/,/ and /g; | ||
2214 | 2210 | return "select * from $from where $set "; | ||
2215 | 2211 | } | ||
2216 | 2212 | |||
2217 | 2213 | sub __update_to_select { | ||
2218 | 2214 | my ( $from, $set, $where, $limit ) = @_; | ||
2219 | 2215 | return "select $set from $from " | ||
2220 | 2216 | . ( $where ? "where $where" : '' ) | ||
2221 | 2217 | . ( $limit ? " $limit " : '' ); | ||
2222 | 2218 | } | ||
2223 | 2219 | |||
2224 | 2220 | sub wrap_in_derived { | ||
2225 | 2221 | my ( $self, $query ) = @_; | ||
2226 | 2222 | return unless $query; | ||
2227 | 2223 | return $query =~ m/\A\s*select/i | ||
2228 | 2224 | ? "select 1 from ($query) as x limit 1" | ||
2229 | 2225 | : $query; | ||
2230 | 2226 | } | ||
2231 | 2227 | |||
2232 | 2228 | sub _d { | ||
2233 | 2229 | my ($package, undef, $line) = caller 0; | ||
2234 | 2230 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
2235 | 2231 | map { defined $_ ? $_ : 'undef' } | ||
2236 | 2232 | @_; | ||
2237 | 2233 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
2238 | 2234 | } | ||
2239 | 2235 | |||
2240 | 2236 | 1; | ||
2241 | 2237 | } | ||
2242 | 2238 | # ########################################################################### | ||
2243 | 2239 | # End QueryRewriter package | ||
2244 | 2240 | # ########################################################################### | ||
2245 | 2241 | |||
2246 | 2242 | # ########################################################################### | ||
2247 | 2243 | # QueryParser package | ||
2248 | 2244 | # This package is a copy without comments from the original. The original | ||
2249 | 2245 | # with comments and its test file can be found in the Bazaar repository at, | ||
2250 | 2246 | # lib/QueryParser.pm | ||
2251 | 2247 | # t/lib/QueryParser.t | ||
2252 | 2248 | # See https://launchpad.net/percona-toolkit for more information. | ||
2253 | 2249 | # ########################################################################### | ||
2254 | 2250 | { | ||
2255 | 2251 | package QueryParser; | ||
2256 | 2252 | |||
2257 | 2253 | use strict; | ||
2258 | 2254 | use warnings FATAL => 'all'; | ||
2259 | 2255 | use English qw(-no_match_vars); | ||
2260 | 2256 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
2261 | 2257 | |||
2262 | 2258 | our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/; | ||
2263 | 2259 | our $tbl_regex = qr{ | ||
2264 | 2260 | \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names | ||
2265 | 2261 | \b\s* | ||
2266 | 2262 | \(? # Optional paren around tables | ||
2267 | 2263 | ($tbl_ident | ||
2268 | 2264 | (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )* | ||
2269 | 2265 | ) | ||
2270 | 2266 | }xio; | ||
2271 | 2267 | our $has_derived = qr{ | ||
2272 | 2268 | \b(?:FROM|JOIN|,) | ||
2273 | 2269 | \s*\(\s*SELECT | ||
2274 | 2270 | }xi; | ||
2275 | 2271 | |||
2276 | 2272 | our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i; | ||
2277 | 2273 | |||
2278 | 2274 | our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i; | ||
2279 | 2275 | |||
2280 | 2276 | sub new { | ||
2281 | 2277 | my ( $class ) = @_; | ||
2282 | 2278 | bless {}, $class; | ||
2283 | 2279 | } | ||
2284 | 2280 | |||
2285 | 2281 | sub get_tables { | ||
2286 | 2282 | my ( $self, $query ) = @_; | ||
2287 | 2283 | return unless $query; | ||
2288 | 2284 | PTDEBUG && _d('Getting tables for', $query); | ||
2289 | 2285 | |||
2290 | 2286 | my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i; | ||
2291 | 2287 | if ( $ddl_stmt ) { | ||
2292 | 2288 | PTDEBUG && _d('Special table type:', $ddl_stmt); | ||
2293 | 2289 | $query =~ s/IF\s+(?:NOT\s+)?EXISTS//i; | ||
2294 | 2290 | if ( $query =~ m/$ddl_stmt DATABASE\b/i ) { | ||
2295 | 2291 | PTDEBUG && _d('Query alters a database, not a table'); | ||
2296 | 2292 | return (); | ||
2297 | 2293 | } | ||
2298 | 2294 | if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) { | ||
2299 | 2295 | my ($select) = $query =~ m/\b(SELECT\b.+)/is; | ||
2300 | 2296 | PTDEBUG && _d('CREATE TABLE ... SELECT:', $select); | ||
2301 | 2297 | return $self->get_tables($select); | ||
2302 | 2298 | } | ||
2303 | 2299 | my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i; | ||
2304 | 2300 | PTDEBUG && _d('Matches table:', $tbl); | ||
2305 | 2301 | return ($tbl); | ||
2306 | 2302 | } | ||
2307 | 2303 | |||
2308 | 2304 | $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig; | ||
2309 | 2305 | |||
2310 | 2306 | if ( $query =~ /^\s*LOCK TABLES/i ) { | ||
2311 | 2307 | PTDEBUG && _d('Special table type: LOCK TABLES'); | ||
2312 | 2308 | $query =~ s/^(\s*LOCK TABLES\s+)//; | ||
2313 | 2309 | $query =~ s/\s+(?:READ|WRITE|LOCAL)+\s*//g; | ||
2314 | 2310 | PTDEBUG && _d('Locked tables:', $query); | ||
2315 | 2311 | $query = "FROM $query"; | ||
2316 | 2312 | } | ||
2317 | 2313 | |||
2318 | 2314 | $query =~ s/\\["']//g; # quoted strings | ||
2319 | 2315 | $query =~ s/".*?"/?/sg; # quoted strings | ||
2320 | 2316 | $query =~ s/'.*?'/?/sg; # quoted strings | ||
2321 | 2317 | |||
2322 | 2318 | my @tables; | ||
2323 | 2319 | foreach my $tbls ( $query =~ m/$tbl_regex/gio ) { | ||
2324 | 2320 | PTDEBUG && _d('Match tables:', $tbls); | ||
2325 | 2321 | |||
2326 | 2322 | next if $tbls =~ m/\ASELECT\b/i; | ||
2327 | 2323 | |||
2328 | 2324 | foreach my $tbl ( split(',', $tbls) ) { | ||
2329 | 2325 | $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio; | ||
2330 | 2326 | |||
2331 | 2327 | if ( $tbl !~ m/[a-zA-Z]/ ) { | ||
2332 | 2328 | PTDEBUG && _d('Skipping suspicious table name:', $tbl); | ||
2333 | 2329 | next; | ||
2334 | 2330 | } | ||
2335 | 2331 | |||
2336 | 2332 | push @tables, $tbl; | ||
2337 | 2333 | } | ||
2338 | 2334 | } | ||
2339 | 2335 | return @tables; | ||
2340 | 2336 | } | ||
2341 | 2337 | |||
2342 | 2338 | sub has_derived_table { | ||
2343 | 2339 | my ( $self, $query ) = @_; | ||
2344 | 2340 | my $match = $query =~ m/$has_derived/; | ||
2345 | 2341 | PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table'); | ||
2346 | 2342 | return $match; | ||
2347 | 2343 | } | ||
2348 | 2344 | |||
2349 | 2345 | sub get_aliases { | ||
2350 | 2346 | my ( $self, $query, $list ) = @_; | ||
2351 | 2347 | |||
2352 | 2348 | my $result = { | ||
2353 | 2349 | DATABASE => {}, | ||
2354 | 2350 | TABLE => {}, | ||
2355 | 2351 | }; | ||
2356 | 2352 | return $result unless $query; | ||
2357 | 2353 | |||
2358 | 2354 | $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig; | ||
2359 | 2355 | |||
2360 | 2356 | $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig; | ||
2361 | 2357 | |||
2362 | 2358 | my @tbl_refs; | ||
2363 | 2359 | my ($tbl_refs, $from) = $query =~ m{ | ||
2364 | 2360 | ( | ||
2365 | 2361 | (FROM|INTO|UPDATE)\b\s* # Keyword before table refs | ||
2366 | 2362 | .+? # Table refs | ||
2367 | 2363 | ) | ||
2368 | 2364 | (?:\s+|\z) # If the query does not end with the table | ||
2369 | 2365 | (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs | ||
2370 | 2366 | }ix; | ||
2371 | 2367 | |||
2372 | 2368 | if ( $tbl_refs ) { | ||
2373 | 2369 | |||
2374 | 2370 | if ( $query =~ m/^(?:INSERT|REPLACE)/i ) { | ||
2375 | 2371 | $tbl_refs =~ s/\([^\)]+\)\s*//; | ||
2376 | 2372 | } | ||
2377 | 2373 | |||
2378 | 2374 | PTDEBUG && _d('tbl refs:', $tbl_refs); | ||
2379 | 2375 | |||
2380 | 2376 | my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i; | ||
2381 | 2377 | |||
2382 | 2378 | my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i; | ||
2383 | 2379 | |||
2384 | 2380 | $tbl_refs =~ s/ = /=/g; | ||
2385 | 2381 | |||
2386 | 2382 | while ( | ||
2387 | 2383 | $tbl_refs =~ m{ | ||
2388 | 2384 | $before_tbl\b\s* | ||
2389 | 2385 | ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? ) | ||
2390 | 2386 | \s*$after_tbl | ||
2391 | 2387 | }xgio ) | ||
2392 | 2388 | { | ||
2393 | 2389 | my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3); | ||
2394 | 2390 | PTDEBUG && _d('Match table:', $tbl_ref); | ||
2395 | 2391 | push @tbl_refs, $tbl_ref; | ||
2396 | 2392 | $alias = $self->trim_identifier($alias); | ||
2397 | 2393 | |||
2398 | 2394 | if ( $tbl_ref =~ m/^AS\s+\w+/i ) { | ||
2399 | 2395 | PTDEBUG && _d('Subquery', $tbl_ref); | ||
2400 | 2396 | $result->{TABLE}->{$alias} = undef; | ||
2401 | 2397 | next; | ||
2402 | 2398 | } | ||
2403 | 2399 | |||
2404 | 2400 | my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/; | ||
2405 | 2401 | $db = $self->trim_identifier($db); | ||
2406 | 2402 | $tbl = $self->trim_identifier($tbl); | ||
2407 | 2403 | $result->{TABLE}->{$alias || $tbl} = $tbl; | ||
2408 | 2404 | $result->{DATABASE}->{$tbl} = $db if $db; | ||
2409 | 2405 | } | ||
2410 | 2406 | } | ||
2411 | 2407 | else { | ||
2412 | 2408 | PTDEBUG && _d("No tables ref in", $query); | ||
2413 | 2409 | } | ||
2414 | 2410 | |||
2415 | 2411 | if ( $list ) { | ||
2416 | 2412 | return \@tbl_refs; | ||
2417 | 2413 | } | ||
2418 | 2414 | else { | ||
2419 | 2415 | return $result; | ||
2420 | 2416 | } | ||
2421 | 2417 | } | ||
2422 | 2418 | |||
2423 | 2419 | sub split { | ||
2424 | 2420 | my ( $self, $query ) = @_; | ||
2425 | 2421 | return unless $query; | ||
2426 | 2422 | $query = $self->clean_query($query); | ||
2427 | 2423 | PTDEBUG && _d('Splitting', $query); | ||
2428 | 2424 | |||
2429 | 2425 | my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i; | ||
2430 | 2426 | |||
2431 | 2427 | my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query); | ||
2432 | 2428 | |||
2433 | 2429 | my @statements; | ||
2434 | 2430 | if ( @split_statements == 1 ) { | ||
2435 | 2431 | push @statements, $query; | ||
2436 | 2432 | } | ||
2437 | 2433 | else { | ||
2438 | 2434 | for ( my $i = 0; $i <= $#split_statements; $i += 2 ) { | ||
2439 | 2435 | push @statements, $split_statements[$i].$split_statements[$i+1]; | ||
2440 | 2436 | |||
2441 | 2437 | if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) { | ||
2442 | 2438 | $statements[-2] .= pop @statements; | ||
2443 | 2439 | } | ||
2444 | 2440 | } | ||
2445 | 2441 | } | ||
2446 | 2442 | |||
2447 | 2443 | PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements); | ||
2448 | 2444 | return @statements; | ||
2449 | 2445 | } | ||
2450 | 2446 | |||
2451 | 2447 | sub clean_query { | ||
2452 | 2448 | my ( $self, $query ) = @_; | ||
2453 | 2449 | return unless $query; | ||
2454 | 2450 | $query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */ | ||
2455 | 2451 | $query =~ s/^\s+//; # Remove leading spaces | ||
2456 | 2452 | $query =~ s/\s+$//; # Remove trailing spaces | ||
2457 | 2453 | $query =~ s/\s{2,}/ /g; # Remove extra spaces | ||
2458 | 2454 | return $query; | ||
2459 | 2455 | } | ||
2460 | 2456 | |||
2461 | 2457 | sub split_subquery { | ||
2462 | 2458 | my ( $self, $query ) = @_; | ||
2463 | 2459 | return unless $query; | ||
2464 | 2460 | $query = $self->clean_query($query); | ||
2465 | 2461 | $query =~ s/;$//; | ||
2466 | 2462 | |||
2467 | 2463 | my @subqueries; | ||
2468 | 2464 | my $sqno = 0; # subquery number | ||
2469 | 2465 | my $pos = 0; | ||
2470 | 2466 | while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) { | ||
2471 | 2467 | $pos = pos($query); | ||
2472 | 2468 | my $word = $1; | ||
2473 | 2469 | PTDEBUG && _d($word, $sqno); | ||
2474 | 2470 | if ( $word =~ m/^\(?SELECT\b/i ) { | ||
2475 | 2471 | my $start_pos = $pos - length($word) - 1; | ||
2476 | 2472 | if ( $start_pos ) { | ||
2477 | 2473 | $sqno++; | ||
2478 | 2474 | PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos); | ||
2479 | 2475 | $subqueries[$sqno] = { | ||
2480 | 2476 | start_pos => $start_pos, | ||
2481 | 2477 | end_pos => 0, | ||
2482 | 2478 | len => 0, | ||
2483 | 2479 | words => [$word], | ||
2484 | 2480 | lp => 1, # left parentheses | ||
2485 | 2481 | rp => 0, # right parentheses | ||
2486 | 2482 | done => 0, | ||
2487 | 2483 | }; | ||
2488 | 2484 | } | ||
2489 | 2485 | else { | ||
2490 | 2486 | PTDEBUG && _d('Main SELECT at pos 0'); | ||
2491 | 2487 | } | ||
2492 | 2488 | } | ||
2493 | 2489 | else { | ||
2494 | 2490 | next unless $sqno; # next unless we're in a subquery | ||
2495 | 2491 | PTDEBUG && _d('In subquery', $sqno); | ||
2496 | 2492 | my $sq = $subqueries[$sqno]; | ||
2497 | 2493 | if ( $sq->{done} ) { | ||
2498 | 2494 | PTDEBUG && _d('This subquery is done; SQL is for', | ||
2499 | 2495 | ($sqno - 1 ? "subquery $sqno" : "the main SELECT")); | ||
2500 | 2496 | next; | ||
2501 | 2497 | } | ||
2502 | 2498 | push @{$sq->{words}}, $word; | ||
2503 | 2499 | my $lp = ($word =~ tr/\(//) || 0; | ||
2504 | 2500 | my $rp = ($word =~ tr/\)//) || 0; | ||
2505 | 2501 | PTDEBUG && _d('parentheses left', $lp, 'right', $rp); | ||
2506 | 2502 | if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) { | ||
2507 | 2503 | my $end_pos = $pos - 1; | ||
2508 | 2504 | PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos); | ||
2509 | 2505 | $sq->{end_pos} = $end_pos; | ||
2510 | 2506 | $sq->{len} = $end_pos - $sq->{start_pos}; | ||
2511 | 2507 | } | ||
2512 | 2508 | } | ||
2513 | 2509 | } | ||
2514 | 2510 | |||
2515 | 2511 | for my $i ( 1..$#subqueries ) { | ||
2516 | 2512 | my $sq = $subqueries[$i]; | ||
2517 | 2513 | next unless $sq; | ||
2518 | 2514 | $sq->{sql} = join(' ', @{$sq->{words}}); | ||
2519 | 2515 | substr $query, | ||
2520 | 2516 | $sq->{start_pos} + 1, # +1 for ( | ||
2521 | 2517 | $sq->{len} - 1, # -1 for ) | ||
2522 | 2518 | "__subquery_$i"; | ||
2523 | 2519 | } | ||
2524 | 2520 | |||
2525 | 2521 | return $query, map { $_->{sql} } grep { defined $_ } @subqueries; | ||
2526 | 2522 | } | ||
2527 | 2523 | |||
2528 | 2524 | sub query_type { | ||
2529 | 2525 | my ( $self, $query, $qr ) = @_; | ||
2530 | 2526 | my ($type, undef) = $qr->distill_verbs($query); | ||
2531 | 2527 | my $rw; | ||
2532 | 2528 | if ( $type =~ m/^SELECT\b/ ) { | ||
2533 | 2529 | $rw = 'read'; | ||
2534 | 2530 | } | ||
2535 | 2531 | elsif ( $type =~ m/^$data_manip_stmts\b/ | ||
2536 | 2532 | || $type =~ m/^$data_def_stmts\b/ ) { | ||
2537 | 2533 | $rw = 'write' | ||
2538 | 2534 | } | ||
2539 | 2535 | |||
2540 | 2536 | return { | ||
2541 | 2537 | type => $type, | ||
2542 | 2538 | rw => $rw, | ||
2543 | 2539 | } | ||
2544 | 2540 | } | ||
2545 | 2541 | |||
2546 | 2542 | sub get_columns { | ||
2547 | 2543 | my ( $self, $query ) = @_; | ||
2548 | 2544 | my $cols = []; | ||
2549 | 2545 | return $cols unless $query; | ||
2550 | 2546 | my $cols_def; | ||
2551 | 2547 | |||
2552 | 2548 | if ( $query =~ m/^SELECT/i ) { | ||
2553 | 2549 | $query =~ s/ | ||
2554 | 2550 | ^SELECT\s+ | ||
2555 | 2551 | (?:ALL | ||
2556 | 2552 | |DISTINCT | ||
2557 | 2553 | |DISTINCTROW | ||
2558 | 2554 | |HIGH_PRIORITY | ||
2559 | 2555 | |STRAIGHT_JOIN | ||
2560 | 2556 | |SQL_SMALL_RESULT | ||
2561 | 2557 | |SQL_BIG_RESULT | ||
2562 | 2558 | |SQL_BUFFER_RESULT | ||
2563 | 2559 | |SQL_CACHE | ||
2564 | 2560 | |SQL_NO_CACHE | ||
2565 | 2561 | |SQL_CALC_FOUND_ROWS | ||
2566 | 2562 | )\s+ | ||
2567 | 2563 | /SELECT /xgi; | ||
2568 | 2564 | ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i; | ||
2569 | 2565 | } | ||
2570 | 2566 | elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) { | ||
2571 | 2567 | ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i; | ||
2572 | 2568 | } | ||
2573 | 2569 | |||
2574 | 2570 | PTDEBUG && _d('Columns:', $cols_def); | ||
2575 | 2571 | if ( $cols_def ) { | ||
2576 | 2572 | @$cols = split(',', $cols_def); | ||
2577 | 2573 | map { | ||
2578 | 2574 | my $col = $_; | ||
2579 | 2575 | $col = s/^\s+//g; | ||
2580 | 2576 | $col = s/\s+$//g; | ||
2581 | 2577 | $col; | ||
2582 | 2578 | } @$cols; | ||
2583 | 2579 | } | ||
2584 | 2580 | |||
2585 | 2581 | return $cols; | ||
2586 | 2582 | } | ||
2587 | 2583 | |||
2588 | 2584 | sub parse { | ||
2589 | 2585 | my ( $self, $query ) = @_; | ||
2590 | 2586 | return unless $query; | ||
2591 | 2587 | my $parsed = {}; | ||
2592 | 2588 | |||
2593 | 2589 | $query =~ s/\n/ /g; | ||
2594 | 2590 | $query = $self->clean_query($query); | ||
2595 | 2591 | |||
2596 | 2592 | $parsed->{query} = $query, | ||
2597 | 2593 | $parsed->{tables} = $self->get_aliases($query, 1); | ||
2598 | 2594 | $parsed->{columns} = $self->get_columns($query); | ||
2599 | 2595 | |||
2600 | 2596 | my ($type) = $query =~ m/^(\w+)/; | ||
2601 | 2597 | $parsed->{type} = lc $type; | ||
2602 | 2598 | |||
2603 | 2599 | |||
2604 | 2600 | $parsed->{sub_queries} = []; | ||
2605 | 2601 | |||
2606 | 2602 | return $parsed; | ||
2607 | 2603 | } | ||
2608 | 2604 | |||
2609 | 2605 | sub extract_tables { | ||
2610 | 2606 | my ( $self, %args ) = @_; | ||
2611 | 2607 | my $query = $args{query}; | ||
2612 | 2608 | my $default_db = $args{default_db}; | ||
2613 | 2609 | my $q = $self->{Quoter} || $args{Quoter}; | ||
2614 | 2610 | return unless $query; | ||
2615 | 2611 | PTDEBUG && _d('Extracting tables'); | ||
2616 | 2612 | my @tables; | ||
2617 | 2613 | my %seen; | ||
2618 | 2614 | foreach my $db_tbl ( $self->get_tables($query) ) { | ||
2619 | 2615 | next unless $db_tbl; | ||
2620 | 2616 | next if $seen{$db_tbl}++; # Unique-ify for issue 337. | ||
2621 | 2617 | my ( $db, $tbl ) = $q->split_unquote($db_tbl); | ||
2622 | 2618 | push @tables, [ $db || $default_db, $tbl ]; | ||
2623 | 2619 | } | ||
2624 | 2620 | return @tables; | ||
2625 | 2621 | } | ||
2626 | 2622 | |||
2627 | 2623 | sub trim_identifier { | ||
2628 | 2624 | my ($self, $str) = @_; | ||
2629 | 2625 | return unless defined $str; | ||
2630 | 2626 | $str =~ s/`//g; | ||
2631 | 2627 | $str =~ s/^\s+//; | ||
2632 | 2628 | $str =~ s/\s+$//; | ||
2633 | 2629 | return $str; | ||
2634 | 2630 | } | ||
2635 | 2631 | |||
2636 | 2632 | sub _d { | ||
2637 | 2633 | my ($package, undef, $line) = caller 0; | ||
2638 | 2634 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
2639 | 2635 | map { defined $_ ? $_ : 'undef' } | ||
2640 | 2636 | @_; | ||
2641 | 2637 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
2642 | 2638 | } | ||
2643 | 2639 | |||
2644 | 2640 | 1; | ||
2645 | 2641 | } | ||
2646 | 2642 | # ########################################################################### | ||
2647 | 2643 | # End QueryParser package | ||
2648 | 2644 | # ########################################################################### | ||
2649 | 2645 | |||
2650 | 2646 | # ########################################################################### | ||
2651 | 2647 | # FileIterator package | ||
2652 | 2648 | # This package is a copy without comments from the original. The original | ||
2653 | 2649 | # with comments and its test file can be found in the Bazaar repository at, | ||
2654 | 2650 | # lib/FileIterator.pm | ||
2655 | 2651 | # t/lib/FileIterator.t | ||
2656 | 2652 | # See https://launchpad.net/percona-toolkit for more information. | ||
2657 | 2653 | # ########################################################################### | ||
2658 | 2654 | { | ||
2659 | 2655 | package FileIterator; | ||
2660 | 2656 | |||
2661 | 2657 | use strict; | ||
2662 | 2658 | use warnings FATAL => 'all'; | ||
2663 | 2659 | use English qw(-no_match_vars); | ||
2664 | 2660 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
2665 | 2661 | |||
2666 | 2662 | sub new { | ||
2667 | 2663 | my ( $class, %args ) = @_; | ||
2668 | 2664 | my $self = { | ||
2669 | 2665 | %args, | ||
2670 | 2666 | }; | ||
2671 | 2667 | return bless $self, $class; | ||
2672 | 2668 | } | ||
2673 | 2669 | |||
2674 | 2670 | sub get_file_itr { | ||
2675 | 2671 | my ( $self, @filenames ) = @_; | ||
2676 | 2672 | |||
2677 | 2673 | my @final_filenames; | ||
2678 | 2674 | FILENAME: | ||
2679 | 2675 | foreach my $fn ( @filenames ) { | ||
2680 | 2676 | if ( !defined $fn ) { | ||
2681 | 2677 | warn "Skipping undefined filename"; | ||
2682 | 2678 | next FILENAME; | ||
2683 | 2679 | } | ||
2684 | 2680 | if ( $fn ne '-' ) { | ||
2685 | 2681 | if ( !-e $fn || !-r $fn ) { | ||
2686 | 2682 | warn "$fn does not exist or is not readable"; | ||
2687 | 2683 | next FILENAME; | ||
2688 | 2684 | } | ||
2689 | 2685 | } | ||
2690 | 2686 | push @final_filenames, $fn; | ||
2691 | 2687 | } | ||
2692 | 2688 | |||
2693 | 2689 | if ( !@filenames ) { | ||
2694 | 2690 | push @final_filenames, '-'; | ||
2695 | 2691 | PTDEBUG && _d('Auto-adding "-" to the list of filenames'); | ||
2696 | 2692 | } | ||
2697 | 2693 | |||
2698 | 2694 | PTDEBUG && _d('Final filenames:', @final_filenames); | ||
2699 | 2695 | return sub { | ||
2700 | 2696 | while ( @final_filenames ) { | ||
2701 | 2697 | my $fn = shift @final_filenames; | ||
2702 | 2698 | PTDEBUG && _d('Filename:', $fn); | ||
2703 | 2699 | if ( $fn eq '-' ) { # Magical STDIN filename. | ||
2704 | 2700 | return (*STDIN, undef, undef); | ||
2705 | 2701 | } | ||
2706 | 2702 | open my $fh, '<', $fn or warn "Cannot open $fn: $OS_ERROR"; | ||
2707 | 2703 | if ( $fh ) { | ||
2708 | 2704 | return ( $fh, $fn, -s $fn ); | ||
2709 | 2705 | } | ||
2710 | 2706 | } | ||
2711 | 2707 | return (); # Avoids $f being set to 0 in list context. | ||
2712 | 2708 | }; | ||
2713 | 2709 | } | ||
2714 | 2710 | |||
2715 | 2711 | sub _d { | ||
2716 | 2712 | my ($package, undef, $line) = caller 0; | ||
2717 | 2713 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
2718 | 2714 | map { defined $_ ? $_ : 'undef' } | ||
2719 | 2715 | @_; | ||
2720 | 2716 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
2721 | 2717 | } | ||
2722 | 2718 | |||
2723 | 2719 | 1; | ||
2724 | 2720 | } | ||
2725 | 2721 | # ########################################################################### | ||
2726 | 2722 | # End FileIterator package | ||
2727 | 2723 | # ########################################################################### | ||
2728 | 2724 | |||
2729 | 2725 | # ########################################################################### | ||
2730 | 2726 | # SQLParser r0 | ||
2731 | 2727 | # Don't update this package! | ||
2732 | 2728 | # ########################################################################### | ||
2733 | 2729 | |||
2734 | 2730 | package SQLParser; | ||
2735 | 2731 | |||
2736 | 2732 | { # package scope | ||
2737 | 2733 | use strict; | ||
2738 | 2734 | use warnings FATAL => 'all'; | ||
2739 | 2735 | use English qw(-no_match_vars); | ||
2740 | 2736 | use constant MKDEBUG => $ENV{MKDEBUG} || 0; | ||
2741 | 2737 | |||
2742 | 2738 | use Data::Dumper; | ||
2743 | 2739 | $Data::Dumper::Indent = 1; | ||
2744 | 2740 | $Data::Dumper::Sortkeys = 1; | ||
2745 | 2741 | $Data::Dumper::Quotekeys = 0; | ||
2746 | 2742 | |||
2747 | 2743 | my $quoted_ident = qr/`[^`]+`/; | ||
2748 | 2744 | my $unquoted_ident = qr/ | ||
2749 | 2745 | \@{0,2} # optional @ or @@ for variables | ||
2750 | 2746 | \w+ # the ident name | ||
2751 | 2747 | (?:\([^\)]*\))? # optional function params | ||
2752 | 2748 | /x; | ||
2753 | 2749 | |||
2754 | 2750 | my $ident_alias = qr/ | ||
2755 | 2751 | \s+ # space before alias | ||
2756 | 2752 | (?:(AS)\s+)? # optional AS keyword | ||
2757 | 2753 | ((?>$quoted_ident|$unquoted_ident)) # alais | ||
2758 | 2754 | /xi; | ||
2759 | 2755 | |||
2760 | 2756 | my $table_ident = qr/(?: | ||
2761 | 2757 | ((?:(?>$quoted_ident|$unquoted_ident)\.?){1,2}) # table | ||
2762 | 2758 | (?:$ident_alias)? # optional alias | ||
2763 | 2759 | )/xo; | ||
2764 | 2760 | |||
2765 | 2761 | my $column_ident = qr/(?: | ||
2766 | 2762 | ((?:(?>$quoted_ident|$unquoted_ident|\*)\.?){1,3}) # column | ||
2767 | 2763 | (?:$ident_alias)? # optional alias | ||
2768 | 2764 | )/xo; | ||
2769 | 2765 | |||
2770 | 2766 | my $function_ident = qr/ | ||
2771 | 2767 | \b | ||
2772 | 2768 | ( | ||
2773 | 2769 | \w+ # function name | ||
2774 | 2770 | \( # opening parenthesis | ||
2775 | 2771 | [^\)]+ # function args, if any | ||
2776 | 2772 | \) # closing parenthesis | ||
2777 | 2773 | ) | ||
2778 | 2774 | /x; | ||
2779 | 2775 | |||
2780 | 2776 | my %ignore_function = ( | ||
2781 | 2777 | INDEX => 1, | ||
2782 | 2778 | KEY => 1, | ||
2783 | 2779 | ); | ||
2784 | 2780 | |||
2785 | 2781 | sub new { | ||
2786 | 2782 | my ( $class, %args ) = @_; | ||
2787 | 2783 | my $self = { | ||
2788 | 2784 | %args, | ||
2789 | 2785 | }; | ||
2790 | 2786 | return bless $self, $class; | ||
2791 | 2787 | } | ||
2792 | 2788 | |||
2793 | 2789 | sub parse { | ||
2794 | 2790 | my ( $self, $query ) = @_; | ||
2795 | 2791 | return unless $query; | ||
2796 | 2792 | |||
2797 | 2793 | my $allowed_types = qr/(?: | ||
2798 | 2794 | DELETE | ||
2799 | 2795 | |INSERT | ||
2800 | 2796 | |REPLACE | ||
2801 | 2797 | |SELECT | ||
2802 | 2798 | |UPDATE | ||
2803 | 2799 | |CREATE | ||
2804 | 2800 | )/xi; | ||
2805 | 2801 | |||
2806 | 2802 | $query = $self->clean_query($query); | ||
2807 | 2803 | |||
2808 | 2804 | my $type; | ||
2809 | 2805 | if ( $query =~ s/^(\w+)\s+// ) { | ||
2810 | 2806 | $type = lc $1; | ||
2811 | 2807 | MKDEBUG && _d('Query type:', $type); | ||
2812 | 2808 | die "Cannot parse " . uc($type) . " queries" | ||
2813 | 2809 | unless $type =~ m/$allowed_types/i; | ||
2814 | 2810 | } | ||
2815 | 2811 | else { | ||
2816 | 2812 | die "Query does not begin with a word"; # shouldn't happen | ||
2817 | 2813 | } | ||
2818 | 2814 | |||
2819 | 2815 | $query = $self->normalize_keyword_spaces($query); | ||
2820 | 2816 | |||
2821 | 2817 | my @subqueries; | ||
2822 | 2818 | if ( $query =~ m/(\(SELECT )/i ) { | ||
2823 | 2819 | MKDEBUG && _d('Removing subqueries'); | ||
2824 | 2820 | @subqueries = $self->remove_subqueries($query); | ||
2825 | 2821 | $query = shift @subqueries; | ||
2826 | 2822 | } | ||
2827 | 2823 | elsif ( $type eq 'create' && $query =~ m/\s+SELECT/ ) { | ||
2828 | 2824 | MKDEBUG && _d('CREATE..SELECT'); | ||
2829 | 2825 | ($subqueries[0]->{query}) = $query =~ m/\s+(SELECT .+)/; | ||
2830 | 2826 | $query =~ s/\s+SELECT.+//; | ||
2831 | 2827 | } | ||
2832 | 2828 | |||
2833 | 2829 | my $parse_func = "parse_$type"; | ||
2834 | 2830 | my $struct = $self->$parse_func($query); | ||
2835 | 2831 | if ( !$struct ) { | ||
2836 | 2832 | MKDEBUG && _d($parse_func, 'failed to parse query'); | ||
2837 | 2833 | return; | ||
2838 | 2834 | } | ||
2839 | 2835 | $struct->{type} = $type; | ||
2840 | 2836 | $self->_parse_clauses($struct); | ||
2841 | 2837 | |||
2842 | 2838 | if ( @subqueries ) { | ||
2843 | 2839 | MKDEBUG && _d('Parsing subqueries'); | ||
2844 | 2840 | foreach my $subquery ( @subqueries ) { | ||
2845 | 2841 | my $subquery_struct = $self->parse($subquery->{query}); | ||
2846 | 2842 | @{$subquery_struct}{keys %$subquery} = values %$subquery; | ||
2847 | 2843 | push @{$struct->{subqueries}}, $subquery_struct; | ||
2848 | 2844 | } | ||
2849 | 2845 | } | ||
2850 | 2846 | |||
2851 | 2847 | MKDEBUG && _d('Query struct:', Dumper($struct)); | ||
2852 | 2848 | return $struct; | ||
2853 | 2849 | } | ||
2854 | 2850 | |||
2855 | 2851 | |||
2856 | 2852 | sub _parse_clauses { | ||
2857 | 2853 | my ( $self, $struct ) = @_; | ||
2858 | 2854 | foreach my $clause ( keys %{$struct->{clauses}} ) { | ||
2859 | 2855 | if ( $clause =~ m/ / ) { | ||
2860 | 2856 | (my $clause_no_space = $clause) =~ s/ /_/g; | ||
2861 | 2857 | $struct->{clauses}->{$clause_no_space} = $struct->{clauses}->{$clause}; | ||
2862 | 2858 | delete $struct->{clauses}->{$clause}; | ||
2863 | 2859 | $clause = $clause_no_space; | ||
2864 | 2860 | } | ||
2865 | 2861 | |||
2866 | 2862 | my $parse_func = "parse_$clause"; | ||
2867 | 2863 | $struct->{$clause} = $self->$parse_func($struct->{clauses}->{$clause}); | ||
2868 | 2864 | |||
2869 | 2865 | if ( $clause eq 'select' ) { | ||
2870 | 2866 | MKDEBUG && _d('Parsing subquery clauses'); | ||
2871 | 2867 | $struct->{select}->{type} = 'select'; | ||
2872 | 2868 | $self->_parse_clauses($struct->{select}); | ||
2873 | 2869 | } | ||
2874 | 2870 | } | ||
2875 | 2871 | return; | ||
2876 | 2872 | } | ||
2877 | 2873 | |||
2878 | 2874 | sub clean_query { | ||
2879 | 2875 | my ( $self, $query ) = @_; | ||
2880 | 2876 | return unless $query; | ||
2881 | 2877 | |||
2882 | 2878 | $query =~ s/^\s*--.*$//gm; # -- comments | ||
2883 | 2879 | $query =~ s/\s+/ /g; # extra spaces/flatten | ||
2884 | 2880 | $query =~ s!/\*.*?\*/!!g; # /* comments */ | ||
2885 | 2881 | $query =~ s/^\s+//; # leading spaces | ||
2886 | 2882 | $query =~ s/\s+$//; # trailing spaces | ||
2887 | 2883 | |||
2888 | 2884 | return $query; | ||
2889 | 2885 | } | ||
2890 | 2886 | |||
2891 | 2887 | sub normalize_keyword_spaces { | ||
2892 | 2888 | my ( $self, $query ) = @_; | ||
2893 | 2889 | |||
2894 | 2890 | $query =~ s/\b(VALUE(?:S)?)\(/$1 (/i; | ||
2895 | 2891 | $query =~ s/\bON\(/on (/gi; | ||
2896 | 2892 | $query =~ s/\bUSING\(/using (/gi; | ||
2897 | 2893 | |||
2898 | 2894 | $query =~ s/\(\s+SELECT\s+/(SELECT /gi; | ||
2899 | 2895 | |||
2900 | 2896 | return $query; | ||
2901 | 2897 | } | ||
2902 | 2898 | |||
2903 | 2899 | sub _parse_query { | ||
2904 | 2900 | my ( $self, $query, $keywords, $first_clause, $clauses ) = @_; | ||
2905 | 2901 | return unless $query; | ||
2906 | 2902 | my $struct = {}; | ||
2907 | 2903 | |||
2908 | 2904 | 1 while $query =~ s/$keywords\s+/$struct->{keywords}->{lc $1}=1, ''/gie; | ||
2909 | 2905 | |||
2910 | 2906 | my @clause = grep { defined $_ } | ||
2911 | 2907 | ($query =~ m/\G(.+?)(?:$clauses\s+|\Z)/gci); | ||
2912 | 2908 | |||
2913 | 2909 | my $clause = $first_clause, | ||
2914 | 2910 | my $value = shift @clause; | ||
2915 | 2911 | $struct->{clauses}->{$clause} = $value; | ||
2916 | 2912 | MKDEBUG && _d('Clause:', $clause, $value); | ||
2917 | 2913 | |||
2918 | 2914 | while ( @clause ) { | ||
2919 | 2915 | $clause = shift @clause; | ||
2920 | 2916 | $value = shift @clause; | ||
2921 | 2917 | $struct->{clauses}->{lc $clause} = $value; | ||
2922 | 2918 | MKDEBUG && _d('Clause:', $clause, $value); | ||
2923 | 2919 | } | ||
2924 | 2920 | |||
2925 | 2921 | ($struct->{unknown}) = ($query =~ m/\G(.+)/); | ||
2926 | 2922 | |||
2927 | 2923 | return $struct; | ||
2928 | 2924 | } | ||
2929 | 2925 | |||
2930 | 2926 | sub parse_delete { | ||
2931 | 2927 | my ( $self, $query ) = @_; | ||
2932 | 2928 | if ( $query =~ s/FROM\s+//i ) { | ||
2933 | 2929 | my $keywords = qr/(LOW_PRIORITY|QUICK|IGNORE)/i; | ||
2934 | 2930 | my $clauses = qr/(FROM|WHERE|ORDER BY|LIMIT)/i; | ||
2935 | 2931 | return $self->_parse_query($query, $keywords, 'from', $clauses); | ||
2936 | 2932 | } | ||
2937 | 2933 | else { | ||
2938 | 2934 | die "DELETE without FROM: $query"; | ||
2939 | 2935 | } | ||
2940 | 2936 | } | ||
2941 | 2937 | |||
2942 | 2938 | sub parse_insert { | ||
2943 | 2939 | my ( $self, $query ) = @_; | ||
2944 | 2940 | return unless $query; | ||
2945 | 2941 | my $struct = {}; | ||
2946 | 2942 | |||
2947 | 2943 | my $keywords = qr/(LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)/i; | ||
2948 | 2944 | 1 while $query =~ s/$keywords\s+/$struct->{keywords}->{lc $1}=1, ''/gie; | ||
2949 | 2945 | |||
2950 | 2946 | if ( $query =~ m/ON DUPLICATE KEY UPDATE (.+)/i ) { | ||
2951 | 2947 | my $values = $1; | ||
2952 | 2948 | die "No values after ON DUPLICATE KEY UPDATE: $query" unless $values; | ||
2953 | 2949 | $struct->{clauses}->{on_duplicate} = $values; | ||
2954 | 2950 | MKDEBUG && _d('Clause: on duplicate key update', $values); | ||
2955 | 2951 | |||
2956 | 2952 | $query =~ s/\s+ON DUPLICATE KEY UPDATE.+//; | ||
2957 | 2953 | } | ||
2958 | 2954 | |||
2959 | 2955 | if ( my @into = ($query =~ m/ | ||
2960 | 2956 | (?:INTO\s+)? # INTO, optional | ||
2961 | 2957 | (.+?)\s+ # table ref | ||
2962 | 2958 | (\([^\)]+\)\s+)? # column list, optional | ||
2963 | 2959 | (VALUE.?|SET|SELECT)\s+ # start of next caluse | ||
2964 | 2960 | /xgci) | ||
2965 | 2961 | ) { | ||
2966 | 2962 | my $tbl = shift @into; # table ref | ||
2967 | 2963 | $struct->{clauses}->{into} = $tbl; | ||
2968 | 2964 | MKDEBUG && _d('Clause: into', $tbl); | ||
2969 | 2965 | |||
2970 | 2966 | my $cols = shift @into; # columns, maybe | ||
2971 | 2967 | if ( $cols ) { | ||
2972 | 2968 | $cols =~ s/[\(\)]//g; | ||
2973 | 2969 | $struct->{clauses}->{columns} = $cols; | ||
2974 | 2970 | MKDEBUG && _d('Clause: columns', $cols); | ||
2975 | 2971 | } | ||
2976 | 2972 | |||
2977 | 2973 | my $next_clause = lc(shift @into); # VALUES, SET or SELECT | ||
2978 | 2974 | die "INSERT/REPLACE without clause after table: $query" | ||
2979 | 2975 | unless $next_clause; | ||
2980 | 2976 | $next_clause = 'values' if $next_clause eq 'value'; | ||
2981 | 2977 | my ($values) = ($query =~ m/\G(.+)/gci); | ||
2982 | 2978 | die "INSERT/REPLACE without values: $query" unless $values; | ||
2983 | 2979 | $struct->{clauses}->{$next_clause} = $values; | ||
2984 | 2980 | MKDEBUG && _d('Clause:', $next_clause, $values); | ||
2985 | 2981 | } | ||
2986 | 2982 | |||
2987 | 2983 | ($struct->{unknown}) = ($query =~ m/\G(.+)/); | ||
2988 | 2984 | |||
2989 | 2985 | return $struct; | ||
2990 | 2986 | } | ||
2991 | 2987 | { | ||
2992 | 2988 | no warnings; | ||
2993 | 2989 | *parse_replace = \&parse_insert; | ||
2994 | 2990 | } | ||
2995 | 2991 | |||
2996 | 2992 | sub parse_select { | ||
2997 | 2993 | my ( $self, $query ) = @_; | ||
2998 | 2994 | |||
2999 | 2995 | my @keywords; | ||
3000 | 2996 | my $final_keywords = qr/(FOR UPDATE|LOCK IN SHARE MODE)/i; | ||
3001 | 2997 | 1 while $query =~ s/\s+$final_keywords/(push @keywords, $1), ''/gie; | ||
3002 | 2998 | |||
3003 | 2999 | my $keywords = qr/( | ||
3004 | 3000 | ALL | ||
3005 | 3001 | |DISTINCT | ||
3006 | 3002 | |DISTINCTROW | ||
3007 | 3003 | |HIGH_PRIORITY | ||
3008 | 3004 | |STRAIGHT_JOIN | ||
3009 | 3005 | |SQL_SMALL_RESULT | ||
3010 | 3006 | |SQL_BIG_RESULT | ||
3011 | 3007 | |SQL_BUFFER_RESULT | ||
3012 | 3008 | |SQL_CACHE | ||
3013 | 3009 | |SQL_NO_CACHE | ||
3014 | 3010 | |SQL_CALC_FOUND_ROWS | ||
3015 | 3011 | )/xi; | ||
3016 | 3012 | my $clauses = qr/( | ||
3017 | 3013 | FROM | ||
3018 | 3014 | |WHERE | ||
3019 | 3015 | |GROUP\sBY | ||
3020 | 3016 | |HAVING | ||
3021 | 3017 | |ORDER\sBY | ||
3022 | 3018 | |LIMIT | ||
3023 | 3019 | |PROCEDURE | ||
3024 | 3020 | |INTO OUTFILE | ||
3025 | 3021 | )/xi; | ||
3026 | 3022 | my $struct = $self->_parse_query($query, $keywords, 'columns', $clauses); | ||
3027 | 3023 | |||
3028 | 3024 | map { s/ /_/g; $struct->{keywords}->{lc $_} = 1; } @keywords; | ||
3029 | 3025 | |||
3030 | 3026 | return $struct; | ||
3031 | 3027 | } | ||
3032 | 3028 | |||
3033 | 3029 | sub parse_update { | ||
3034 | 3030 | my $keywords = qr/(LOW_PRIORITY|IGNORE)/i; | ||
3035 | 3031 | my $clauses = qr/(SET|WHERE|ORDER BY|LIMIT)/i; | ||
3036 | 3032 | return _parse_query(@_, $keywords, 'tables', $clauses); | ||
3037 | 3033 | |||
3038 | 3034 | } | ||
3039 | 3035 | |||
3040 | 3036 | sub parse_create { | ||
3041 | 3037 | my ($self, $query) = @_; | ||
3042 | 3038 | my ($obj, $name) = $query =~ m/ | ||
3043 | 3039 | (\S+)\s+ | ||
3044 | 3040 | (?:IF NOT EXISTS\s+)? | ||
3045 | 3041 | (\S+) | ||
3046 | 3042 | /xi; | ||
3047 | 3043 | return { | ||
3048 | 3044 | object => lc $obj, | ||
3049 | 3045 | name => $name, | ||
3050 | 3046 | unknown => undef, | ||
3051 | 3047 | }; | ||
3052 | 3048 | } | ||
3053 | 3049 | |||
3054 | 3050 | sub parse_from { | ||
3055 | 3051 | my ( $self, $from ) = @_; | ||
3056 | 3052 | return unless $from; | ||
3057 | 3053 | MKDEBUG && _d('Parsing FROM', $from); | ||
3058 | 3054 | |||
3059 | 3055 | my $using_cols; | ||
3060 | 3056 | ($from, $using_cols) = $self->remove_using_columns($from); | ||
3061 | 3057 | |||
3062 | 3058 | my $funcs; | ||
3063 | 3059 | ($from, $funcs) = $self->remove_functions($from); | ||
3064 | 3060 | |||
3065 | 3061 | my $comma_join = qr/(?>\s*,\s*)/; | ||
3066 | 3062 | my $ansi_join = qr/(?> | ||
3067 | 3063 | \s+ | ||
3068 | 3064 | (?:(?:INNER|CROSS|STRAIGHT_JOIN|LEFT|RIGHT|OUTER|NATURAL)\s+)* | ||
3069 | 3065 | JOIN | ||
3070 | 3066 | \s+ | ||
3071 | 3067 | )/xi; | ||
3072 | 3068 | |||
3073 | 3069 | my @tbls; # all table refs, a hashref for each | ||
3074 | 3070 | my $tbl_ref; # current table ref hashref | ||
3075 | 3071 | my $join; # join info hahsref for current table ref | ||
3076 | 3072 | foreach my $thing ( split /($comma_join|$ansi_join)/io, $from ) { | ||
3077 | 3073 | die "Error parsing FROM clause" unless $thing; | ||
3078 | 3074 | |||
3079 | 3075 | $thing =~ s/^\s+//; | ||
3080 | 3076 | $thing =~ s/\s+$//; | ||
3081 | 3077 | MKDEBUG && _d('Table thing:', $thing); | ||
3082 | 3078 | |||
3083 | 3079 | if ( $thing =~ m/\s+(?:ON|USING)\s+/i ) { | ||
3084 | 3080 | MKDEBUG && _d("JOIN condition"); | ||
3085 | 3081 | my ($tbl_ref_txt, $join_condition_verb, $join_condition_value) | ||
3086 | 3082 | = $thing =~ m/^(.+?)\s+(ON|USING)\s+(.+)/i; | ||
3087 | 3083 | |||
3088 | 3084 | $tbl_ref = $self->parse_table_reference($tbl_ref_txt); | ||
3089 | 3085 | |||
3090 | 3086 | $join->{condition} = lc $join_condition_verb; | ||
3091 | 3087 | if ( $join->{condition} eq 'on' ) { | ||
3092 | 3088 | $join->{where} = $self->parse_where($join_condition_value, $funcs); | ||
3093 | 3089 | } | ||
3094 | 3090 | else { # USING | ||
3095 | 3091 | $join->{columns} = $self->_parse_csv(shift @$using_cols); | ||
3096 | 3092 | } | ||
3097 | 3093 | } | ||
3098 | 3094 | elsif ( $thing =~ m/(?:,|JOIN)/i ) { | ||
3099 | 3095 | if ( $join ) { | ||
3100 | 3096 | $tbl_ref->{join} = $join; | ||
3101 | 3097 | } | ||
3102 | 3098 | push @tbls, $tbl_ref; | ||
3103 | 3099 | MKDEBUG && _d("Complete table reference:", Dumper($tbl_ref)); | ||
3104 | 3100 | |||
3105 | 3101 | $tbl_ref = undef; | ||
3106 | 3102 | $join = {}; | ||
3107 | 3103 | |||
3108 | 3104 | $join->{to} = $tbls[-1]->{tbl}; | ||
3109 | 3105 | if ( $thing eq ',' ) { | ||
3110 | 3106 | $join->{type} = 'inner'; | ||
3111 | 3107 | $join->{ansi} = 0; | ||
3112 | 3108 | } | ||
3113 | 3109 | else { # ansi join | ||
3114 | 3110 | my $type = $thing =~ m/^(.+?)\s+JOIN$/i ? lc $1 : 'inner'; | ||
3115 | 3111 | $join->{type} = $type; | ||
3116 | 3112 | $join->{ansi} = 1; | ||
3117 | 3113 | } | ||
3118 | 3114 | } | ||
3119 | 3115 | else { | ||
3120 | 3116 | $tbl_ref = $self->parse_table_reference($thing); | ||
3121 | 3117 | MKDEBUG && _d('Table reference:', Dumper($tbl_ref)); | ||
3122 | 3118 | } | ||
3123 | 3119 | } | ||
3124 | 3120 | |||
3125 | 3121 | if ( $tbl_ref ) { | ||
3126 | 3122 | if ( $join ) { | ||
3127 | 3123 | $tbl_ref->{join} = $join; | ||
3128 | 3124 | } | ||
3129 | 3125 | push @tbls, $tbl_ref; | ||
3130 | 3126 | MKDEBUG && _d("Complete table reference:", Dumper($tbl_ref)); | ||
3131 | 3127 | } | ||
3132 | 3128 | |||
3133 | 3129 | return \@tbls; | ||
3134 | 3130 | } | ||
3135 | 3131 | |||
3136 | 3132 | sub parse_table_reference { | ||
3137 | 3133 | my ( $self, $tbl_ref ) = @_; | ||
3138 | 3134 | return unless $tbl_ref; | ||
3139 | 3135 | MKDEBUG && _d('Parsing table reference:', $tbl_ref); | ||
3140 | 3136 | my %tbl; | ||
3141 | 3137 | |||
3142 | 3138 | if ( $tbl_ref =~ s/ | ||
3143 | 3139 | \s+( | ||
3144 | 3140 | (?:FORCE|USE|INGORE)\s | ||
3145 | 3141 | (?:INDEX|KEY) | ||
3146 | 3142 | \s*\([^\)]+\)\s* | ||
3147 | 3143 | )//xi) | ||
3148 | 3144 | { | ||
3149 | 3145 | $tbl{index_hint} = $1; | ||
3150 | 3146 | MKDEBUG && _d('Index hint:', $tbl{index_hint}); | ||
3151 | 3147 | } | ||
3152 | 3148 | |||
3153 | 3149 | if ( $tbl_ref =~ m/$table_ident/ ) { | ||
3154 | 3150 | my ($db_tbl, $as, $alias) = ($1, $2, $3); # XXX | ||
3155 | 3151 | my $ident_struct = $self->parse_identifier('table', $db_tbl); | ||
3156 | 3152 | $alias =~ s/`//g if $alias; | ||
3157 | 3153 | @tbl{keys %$ident_struct} = values %$ident_struct; | ||
3158 | 3154 | $tbl{explicit_alias} = 1 if $as; | ||
3159 | 3155 | $tbl{alias} = $alias if $alias; | ||
3160 | 3156 | } | ||
3161 | 3157 | else { | ||
3162 | 3158 | die "Table ident match failed"; # shouldn't happen | ||
3163 | 3159 | } | ||
3164 | 3160 | |||
3165 | 3161 | return \%tbl; | ||
3166 | 3162 | } | ||
3167 | 3163 | { | ||
3168 | 3164 | no warnings; # Why? See same line above. | ||
3169 | 3165 | *parse_into = \&parse_from; | ||
3170 | 3166 | *parse_tables = \&parse_from; | ||
3171 | 3167 | } | ||
3172 | 3168 | |||
3173 | 3169 | sub parse_where { | ||
3174 | 3170 | my ( $self, $where, $functions ) = @_; | ||
3175 | 3171 | return unless $where; | ||
3176 | 3172 | MKDEBUG && _d("Parsing WHERE", $where); | ||
3177 | 3173 | |||
3178 | 3174 | my $op_symbol = qr/ | ||
3179 | 3175 | (?: | ||
3180 | 3176 | <=(?:>)? | ||
3181 | 3177 | |>= | ||
3182 | 3178 | |<> | ||
3183 | 3179 | |!= | ||
3184 | 3180 | |< | ||
3185 | 3181 | |> | ||
3186 | 3182 | |= | ||
3187 | 3183 | )/xi; | ||
3188 | 3184 | my $op_verb = qr/ | ||
3189 | 3185 | (?: | ||
3190 | 3186 | (?:(?:NOT\s)?LIKE) | ||
3191 | 3187 | |(?:IS(?:\sNOT\s)?) | ||
3192 | 3188 | |(?:(?:\sNOT\s)?BETWEEN) | ||
3193 | 3189 | |(?:(?:NOT\s)?IN) | ||
3194 | 3190 | ) | ||
3195 | 3191 | /xi; | ||
3196 | 3192 | my $op_pat = qr/ | ||
3197 | 3193 | ( | ||
3198 | 3194 | (?> | ||
3199 | 3195 | (?:$op_symbol) # don't need spaces around the symbols, e.g.: col=1 | ||
3200 | 3196 | |(?:\s+$op_verb) # must have space before verb op, e.g.: col LIKE ... | ||
3201 | 3197 | ) | ||
3202 | 3198 | )/x; | ||
3203 | 3199 | |||
3204 | 3200 | my $offset = 0; | ||
3205 | 3201 | my $pred = ""; | ||
3206 | 3202 | my @pred; | ||
3207 | 3203 | my @has_op; | ||
3208 | 3204 | while ( $where =~ m/\b(and|or)\b/gi ) { | ||
3209 | 3205 | my $pos = (pos $where) - (length $1); # pos at and|or, not after | ||
3210 | 3206 | |||
3211 | 3207 | $pred = substr $where, $offset, ($pos-$offset); | ||
3212 | 3208 | push @pred, $pred; | ||
3213 | 3209 | push @has_op, $pred =~ m/$op_pat/o ? 1 : 0; | ||
3214 | 3210 | |||
3215 | 3211 | $offset = $pos; | ||
3216 | 3212 | } | ||
3217 | 3213 | $pred = substr $where, $offset; | ||
3218 | 3214 | push @pred, $pred; | ||
3219 | 3215 | push @has_op, $pred =~ m/$op_pat/o ? 1 : 0; | ||
3220 | 3216 | MKDEBUG && _d("Predicate fragments:", Dumper(\@pred)); | ||
3221 | 3217 | MKDEBUG && _d("Predicate frags with operators:", @has_op); | ||
3222 | 3218 | |||
3223 | 3219 | my $n = scalar @pred - 1; | ||
3224 | 3220 | for my $i ( 1..$n ) { | ||
3225 | 3221 | $i *= -1; | ||
3226 | 3222 | my $j = $i - 1; # preceding pred frag | ||
3227 | 3223 | |||
3228 | 3224 | next if $pred[$j] !~ m/\s+between\s+/i && $self->_is_constant($pred[$i]); | ||
3229 | 3225 | |||
3230 | 3226 | if ( !$has_op[$i] ) { | ||
3231 | 3227 | $pred[$j] .= $pred[$i]; | ||
3232 | 3228 | $pred[$i] = undef; | ||
3233 | 3229 | } | ||
3234 | 3230 | } | ||
3235 | 3231 | MKDEBUG && _d("Predicate fragments joined:", Dumper(\@pred)); | ||
3236 | 3232 | |||
3237 | 3233 | for my $i ( 0..@pred ) { | ||
3238 | 3234 | $pred = $pred[$i]; | ||
3239 | 3235 | next unless defined $pred; | ||
3240 | 3236 | my $n_single_quotes = ($pred =~ tr/'//); | ||
3241 | 3237 | my $n_double_quotes = ($pred =~ tr/"//); | ||
3242 | 3238 | if ( ($n_single_quotes % 2) || ($n_double_quotes % 2) ) { | ||
3243 | 3239 | $pred[$i] .= $pred[$i + 1]; | ||
3244 | 3240 | $pred[$i + 1] = undef; | ||
3245 | 3241 | } | ||
3246 | 3242 | } | ||
3247 | 3243 | MKDEBUG && _d("Predicate fragments balanced:", Dumper(\@pred)); | ||
3248 | 3244 | |||
3249 | 3245 | my @predicates; | ||
3250 | 3246 | foreach my $pred ( @pred ) { | ||
3251 | 3247 | next unless defined $pred; | ||
3252 | 3248 | $pred =~ s/^\s+//; | ||
3253 | 3249 | $pred =~ s/\s+$//; | ||
3254 | 3250 | my $conj; | ||
3255 | 3251 | if ( $pred =~ s/^(and|or)\s+//i ) { | ||
3256 | 3252 | $conj = lc $1; | ||
3257 | 3253 | } | ||
3258 | 3254 | my ($col, $op, $val) = $pred =~ m/^(.+?)$op_pat(.+)$/o; | ||
3259 | 3255 | if ( !$col || !$op ) { | ||
3260 | 3256 | if ( $self->_is_constant($pred) ) { | ||
3261 | 3257 | $val = lc $pred; | ||
3262 | 3258 | } | ||
3263 | 3259 | else { | ||
3264 | 3260 | die "Failed to parse WHERE condition: $pred"; | ||
3265 | 3261 | } | ||
3266 | 3262 | } | ||
3267 | 3263 | |||
3268 | 3264 | if ( $col ) { | ||
3269 | 3265 | $col =~ s/\s+$//; | ||
3270 | 3266 | $col =~ s/^\(+//; # no unquoted column name begins with ( | ||
3271 | 3267 | } | ||
3272 | 3268 | if ( $op ) { | ||
3273 | 3269 | $op = lc $op; | ||
3274 | 3270 | $op =~ s/^\s+//; | ||
3275 | 3271 | $op =~ s/\s+$//; | ||
3276 | 3272 | } | ||
3277 | 3273 | $val =~ s/^\s+//; | ||
3278 | 3274 | |||
3279 | 3275 | if ( ($op || '') !~ m/IN/i && $val !~ m/^\w+\([^\)]+\)$/ ) { | ||
3280 | 3276 | $val =~ s/\)+$//; | ||
3281 | 3277 | } | ||
3282 | 3278 | |||
3283 | 3279 | if ( $val =~ m/NULL|TRUE|FALSE/i ) { | ||
3284 | 3280 | $val = lc $val; | ||
3285 | 3281 | } | ||
3286 | 3282 | |||
3287 | 3283 | if ( $functions ) { | ||
3288 | 3284 | $col = shift @$functions if $col =~ m/__FUNC\d+__/; | ||
3289 | 3285 | $val = shift @$functions if $val =~ m/__FUNC\d+__/; | ||
3290 | 3286 | } | ||
3291 | 3287 | |||
3292 | 3288 | push @predicates, { | ||
3293 | 3289 | predicate => $conj, | ||
3294 | 3290 | left_arg => $col, | ||
3295 | 3291 | operator => $op, | ||
3296 | 3292 | right_arg => $val, | ||
3297 | 3293 | }; | ||
3298 | 3294 | } | ||
3299 | 3295 | |||
3300 | 3296 | return \@predicates; | ||
3301 | 3297 | } | ||
3302 | 3298 | |||
3303 | 3299 | sub _is_constant { | ||
3304 | 3300 | my ( $self, $val ) = @_; | ||
3305 | 3301 | return 0 unless defined $val; | ||
3306 | 3302 | $val =~ s/^\s*(?:and|or)\s+//; | ||
3307 | 3303 | return | ||
3308 | 3304 | $val =~ m/^\s*(?:TRUE|FALSE)\s*$/i || $val =~ m/^\s*-?\d+\s*$/ ? 1 : 0; | ||
3309 | 3305 | } | ||
3310 | 3306 | |||
3311 | 3307 | sub parse_having { | ||
3312 | 3308 | my ( $self, $having ) = @_; | ||
3313 | 3309 | return $having; | ||
3314 | 3310 | } | ||
3315 | 3311 | |||
3316 | 3312 | sub parse_group_by { | ||
3317 | 3313 | my ( $self, $group_by ) = @_; | ||
3318 | 3314 | return unless $group_by; | ||
3319 | 3315 | MKDEBUG && _d('Parsing GROUP BY', $group_by); | ||
3320 | 3316 | |||
3321 | 3317 | my $with_rollup = $group_by =~ s/\s+WITH ROLLUP\s*//i; | ||
3322 | 3318 | |||
3323 | 3319 | my $idents = $self->parse_identifiers( $self->_parse_csv($group_by) ); | ||
3324 | 3320 | |||
3325 | 3321 | $idents->{with_rollup} = 1 if $with_rollup; | ||
3326 | 3322 | |||
3327 | 3323 | return $idents; | ||
3328 | 3324 | } | ||
3329 | 3325 | |||
3330 | 3326 | sub parse_order_by { | ||
3331 | 3327 | my ( $self, $order_by ) = @_; | ||
3332 | 3328 | return unless $order_by; | ||
3333 | 3329 | MKDEBUG && _d('Parsing ORDER BY', $order_by); | ||
3334 | 3330 | my $idents = $self->parse_identifiers( $self->_parse_csv($order_by) ); | ||
3335 | 3331 | return $idents; | ||
3336 | 3332 | } | ||
3337 | 3333 | |||
3338 | 3334 | sub parse_limit { | ||
3339 | 3335 | my ( $self, $limit ) = @_; | ||
3340 | 3336 | return unless $limit; | ||
3341 | 3337 | my $struct = { | ||
3342 | 3338 | row_count => undef, | ||
3343 | 3339 | }; | ||
3344 | 3340 | if ( $limit =~ m/(\S+)\s+OFFSET\s+(\S+)/i ) { | ||
3345 | 3341 | $struct->{explicit_offset} = 1; | ||
3346 | 3342 | $struct->{row_count} = $1; | ||
3347 | 3343 | $struct->{offset} = $2; | ||
3348 | 3344 | } | ||
3349 | 3345 | else { | ||
3350 | 3346 | my ($offset, $cnt) = $limit =~ m/(?:(\S+),\s+)?(\S+)/i; | ||
3351 | 3347 | $struct->{row_count} = $cnt; | ||
3352 | 3348 | $struct->{offset} = $offset if defined $offset; | ||
3353 | 3349 | } | ||
3354 | 3350 | return $struct; | ||
3355 | 3351 | } | ||
3356 | 3352 | |||
3357 | 3353 | sub parse_values { | ||
3358 | 3354 | my ( $self, $values ) = @_; | ||
3359 | 3355 | return unless $values; | ||
3360 | 3356 | $values =~ s/^\s*\(//; | ||
3361 | 3357 | $values =~ s/\s*\)//; | ||
3362 | 3358 | my $vals = $self->_parse_csv( | ||
3363 | 3359 | $values, | ||
3364 | 3360 | quoted_values => 1, | ||
3365 | 3361 | remove_quotes => 0, | ||
3366 | 3362 | ); | ||
3367 | 3363 | return $vals; | ||
3368 | 3364 | } | ||
3369 | 3365 | |||
3370 | 3366 | sub parse_set { | ||
3371 | 3367 | my ( $self, $set ) = @_; | ||
3372 | 3368 | MKDEBUG && _d("Parse SET", $set); | ||
3373 | 3369 | return unless $set; | ||
3374 | 3370 | my $vals = $self->_parse_csv($set); | ||
3375 | 3371 | return unless $vals && @$vals; | ||
3376 | 3372 | |||
3377 | 3373 | my @set; | ||
3378 | 3374 | foreach my $col_val ( @$vals ) { | ||
3379 | 3375 | my ($col, $val) = $col_val =~ m/^([^=]+)\s*=\s*(.+)/; | ||
3380 | 3376 | my $ident_struct = $self->parse_identifier('column', $col); | ||
3381 | 3377 | my $set_struct = { | ||
3382 | 3378 | %$ident_struct, | ||
3383 | 3379 | value => $val, | ||
3384 | 3380 | }; | ||
3385 | 3381 | MKDEBUG && _d("SET:", Dumper($set_struct)); | ||
3386 | 3382 | push @set, $set_struct; | ||
3387 | 3383 | } | ||
3388 | 3384 | return \@set; | ||
3389 | 3385 | } | ||
3390 | 3386 | |||
3391 | 3387 | sub _parse_csv { | ||
3392 | 3388 | my ( $self, $vals, %args ) = @_; | ||
3393 | 3389 | return unless $vals; | ||
3394 | 3390 | |||
3395 | 3391 | my @vals; | ||
3396 | 3392 | if ( $args{quoted_values} ) { | ||
3397 | 3393 | my $quote_char = ''; | ||
3398 | 3394 | VAL: | ||
3399 | 3395 | foreach my $val ( split(',', $vals) ) { | ||
3400 | 3396 | MKDEBUG && _d("Next value:", $val); | ||
3401 | 3397 | if ( $quote_char ) { | ||
3402 | 3398 | MKDEBUG && _d("Value is part of previous quoted value"); | ||
3403 | 3399 | $vals[-1] .= ",$val"; | ||
3404 | 3400 | |||
3405 | 3401 | if ( $val =~ m/[^\\]*$quote_char$/ ) { | ||
3406 | 3402 | if ( $args{remove_quotes} ) { | ||
3407 | 3403 | $vals[-1] =~ s/^\s*$quote_char//; | ||
3408 | 3404 | $vals[-1] =~ s/$quote_char\s*$//; | ||
3409 | 3405 | } | ||
3410 | 3406 | MKDEBUG && _d("Previous quoted value is complete:", $vals[-1]); | ||
3411 | 3407 | $quote_char = ''; | ||
3412 | 3408 | } | ||
3413 | 3409 | |||
3414 | 3410 | next VAL; | ||
3415 | 3411 | } | ||
3416 | 3412 | |||
3417 | 3413 | $val =~ s/^\s+//; | ||
3418 | 3414 | |||
3419 | 3415 | if ( $val =~ m/^(['"])/ ) { | ||
3420 | 3416 | MKDEBUG && _d("Value is quoted"); | ||
3421 | 3417 | $quote_char = $1; # XXX | ||
3422 | 3418 | if ( $val =~ m/.$quote_char$/ ) { | ||
3423 | 3419 | MKDEBUG && _d("Value is complete"); | ||
3424 | 3420 | $quote_char = ''; | ||
3425 | 3421 | if ( $args{remove_quotes} ) { | ||
3426 | 3422 | $vals[-1] =~ s/^\s*$quote_char//; | ||
3427 | 3423 | $vals[-1] =~ s/$quote_char\s*$//; | ||
3428 | 3424 | } | ||
3429 | 3425 | } | ||
3430 | 3426 | else { | ||
3431 | 3427 | MKDEBUG && _d("Quoted value is not complete"); | ||
3432 | 3428 | } | ||
3433 | 3429 | } | ||
3434 | 3430 | else { | ||
3435 | 3431 | $val =~ s/\s+$//; | ||
3436 | 3432 | } | ||
3437 | 3433 | |||
3438 | 3434 | MKDEBUG && _d("Saving value", ($quote_char ? "fragment" : "")); | ||
3439 | 3435 | push @vals, $val; | ||
3440 | 3436 | } | ||
3441 | 3437 | } | ||
3442 | 3438 | else { | ||
3443 | 3439 | @vals = map { s/^\s+//; s/\s+$//; $_ } split(',', $vals); | ||
3444 | 3440 | } | ||
3445 | 3441 | |||
3446 | 3442 | return \@vals; | ||
3447 | 3443 | } | ||
3448 | 3444 | { | ||
3449 | 3445 | no warnings; # Why? See same line above. | ||
3450 | 3446 | *parse_on_duplicate = \&_parse_csv; | ||
3451 | 3447 | } | ||
3452 | 3448 | |||
3453 | 3449 | sub parse_columns { | ||
3454 | 3450 | my ( $self, $cols ) = @_; | ||
3455 | 3451 | MKDEBUG && _d('Parsing columns list:', $cols); | ||
3456 | 3452 | |||
3457 | 3453 | my @cols; | ||
3458 | 3454 | pos $cols = 0; | ||
3459 | 3455 | while (pos $cols < length $cols) { | ||
3460 | 3456 | if ($cols =~ m/\G\s*$column_ident\s*(?>,|\Z)/gcxo) { | ||
3461 | 3457 | my ($db_tbl_col, $as, $alias) = ($1, $2, $3); # XXX | ||
3462 | 3458 | my $ident_struct = $self->parse_identifier('column', $db_tbl_col); | ||
3463 | 3459 | $alias =~ s/`//g if $alias; | ||
3464 | 3460 | my $col_struct = { | ||
3465 | 3461 | %$ident_struct, | ||
3466 | 3462 | ($as ? (explicit_alias => 1) : ()), | ||
3467 | 3463 | ($alias ? (alias => $alias) : ()), | ||
3468 | 3464 | }; | ||
3469 | 3465 | push @cols, $col_struct; | ||
3470 | 3466 | } | ||
3471 | 3467 | else { | ||
3472 | 3468 | die "Column ident match failed"; # shouldn't happen | ||
3473 | 3469 | } | ||
3474 | 3470 | } | ||
3475 | 3471 | |||
3476 | 3472 | return \@cols; | ||
3477 | 3473 | } | ||
3478 | 3474 | |||
3479 | 3475 | sub remove_subqueries { | ||
3480 | 3476 | my ( $self, $query ) = @_; | ||
3481 | 3477 | |||
3482 | 3478 | my @start_pos; | ||
3483 | 3479 | while ( $query =~ m/(\(SELECT )/gi ) { | ||
3484 | 3480 | my $pos = (pos $query) - (length $1); | ||
3485 | 3481 | push @start_pos, $pos; | ||
3486 | 3482 | } | ||
3487 | 3483 | |||
3488 | 3484 | @start_pos = reverse @start_pos; | ||
3489 | 3485 | my @end_pos; | ||
3490 | 3486 | for my $i ( 0..$#start_pos ) { | ||
3491 | 3487 | my $closed = 0; | ||
3492 | 3488 | pos $query = $start_pos[$i]; | ||
3493 | 3489 | while ( $query =~ m/([\(\)])/cg ) { | ||
3494 | 3490 | my $c = $1; | ||
3495 | 3491 | $closed += ($c eq '(' ? 1 : -1); | ||
3496 | 3492 | last unless $closed; | ||
3497 | 3493 | } | ||
3498 | 3494 | push @end_pos, pos $query; | ||
3499 | 3495 | } | ||
3500 | 3496 | |||
3501 | 3497 | my @subqueries; | ||
3502 | 3498 | my $len_adj = 0; | ||
3503 | 3499 | my $n = 0; | ||
3504 | 3500 | for my $i ( 0..$#start_pos ) { | ||
3505 | 3501 | MKDEBUG && _d('Query:', $query); | ||
3506 | 3502 | my $offset = $start_pos[$i]; | ||
3507 | 3503 | my $len = $end_pos[$i] - $start_pos[$i] - $len_adj; | ||
3508 | 3504 | MKDEBUG && _d("Subquery $n start", $start_pos[$i], | ||
3509 | 3505 | 'orig end', $end_pos[$i], 'adj', $len_adj, 'adj end', | ||
3510 | 3506 | $offset + $len, 'len', $len); | ||
3511 | 3507 | |||
3512 | 3508 | my $struct = {}; | ||
3513 | 3509 | my $token = '__SQ' . $n . '__'; | ||
3514 | 3510 | my $subquery = substr($query, $offset, $len, $token); | ||
3515 | 3511 | MKDEBUG && _d("Subquery $n:", $subquery); | ||
3516 | 3512 | |||
3517 | 3513 | my $outer_start = $start_pos[$i + 1]; | ||
3518 | 3514 | my $outer_end = $end_pos[$i + 1]; | ||
3519 | 3515 | if ( $outer_start && ($outer_start < $start_pos[$i]) | ||
3520 | 3516 | && $outer_end && ($outer_end > $end_pos[$i]) ) { | ||
3521 | 3517 | MKDEBUG && _d("Subquery $n nested in next subquery"); | ||
3522 | 3518 | $len_adj += $len - length $token; | ||
3523 | 3519 | $struct->{nested} = $i + 1; | ||
3524 | 3520 | } | ||
3525 | 3521 | else { | ||
3526 | 3522 | MKDEBUG && _d("Subquery $n not nested"); | ||
3527 | 3523 | $len_adj = 0; | ||
3528 | 3524 | if ( $subqueries[-1] && $subqueries[-1]->{nested} ) { | ||
3529 | 3525 | MKDEBUG && _d("Outermost subquery"); | ||
3530 | 3526 | } | ||
3531 | 3527 | } | ||
3532 | 3528 | |||
3533 | 3529 | if ( $query =~ m/(?:=|>|<|>=|<=|<>|!=|<=>)\s*$token/ ) { | ||
3534 | 3530 | $struct->{context} = 'scalar'; | ||
3535 | 3531 | } | ||
3536 | 3532 | elsif ( $query =~ m/\b(?:IN|ANY|SOME|ALL|EXISTS)\s*$token/i ) { | ||
3537 | 3533 | if ( $query !~ m/\($token\)/ ) { | ||
3538 | 3534 | $query =~ s/$token/\($token\)/; | ||
3539 | 3535 | $len_adj -= 2 if $struct->{nested}; | ||
3540 | 3536 | } | ||
3541 | 3537 | $struct->{context} = 'list'; | ||
3542 | 3538 | } | ||
3543 | 3539 | else { | ||
3544 | 3540 | $struct->{context} = 'identifier'; | ||
3545 | 3541 | } | ||
3546 | 3542 | MKDEBUG && _d("Subquery $n context:", $struct->{context}); | ||
3547 | 3543 | |||
3548 | 3544 | $subquery =~ s/^\s*\(//; | ||
3549 | 3545 | $subquery =~ s/\s*\)\s*$//; | ||
3550 | 3546 | |||
3551 | 3547 | $struct->{query} = $subquery; | ||
3552 | 3548 | push @subqueries, $struct; | ||
3553 | 3549 | $n++; | ||
3554 | 3550 | } | ||
3555 | 3551 | |||
3556 | 3552 | return $query, @subqueries; | ||
3557 | 3553 | } | ||
3558 | 3554 | |||
3559 | 3555 | sub remove_using_columns { | ||
3560 | 3556 | my ($self, $from) = @_; | ||
3561 | 3557 | return unless $from; | ||
3562 | 3558 | MKDEBUG && _d('Removing cols from USING clauses'); | ||
3563 | 3559 | my $using = qr/ | ||
3564 | 3560 | \bUSING | ||
3565 | 3561 | \s* | ||
3566 | 3562 | \( | ||
3567 | 3563 | ([^\)]+) | ||
3568 | 3564 | \) | ||
3569 | 3565 | /xi; | ||
3570 | 3566 | my @cols; | ||
3571 | 3567 | $from =~ s/$using/push @cols, $1; "USING ($#cols)"/eg; | ||
3572 | 3568 | MKDEBUG && _d('FROM:', $from, Dumper(\@cols)); | ||
3573 | 3569 | return $from, \@cols; | ||
3574 | 3570 | } | ||
3575 | 3571 | |||
3576 | 3572 | sub replace_function { | ||
3577 | 3573 | my ($func, $funcs) = @_; | ||
3578 | 3574 | my ($func_name) = $func =~ m/^(\w+)/; | ||
3579 | 3575 | if ( !$ignore_function{uc $func_name} ) { | ||
3580 | 3576 | my $n = scalar @$funcs; | ||
3581 | 3577 | push @$funcs, $func; | ||
3582 | 3578 | return "__FUNC${n}__"; | ||
3583 | 3579 | } | ||
3584 | 3580 | return $func; | ||
3585 | 3581 | } | ||
3586 | 3582 | |||
3587 | 3583 | sub remove_functions { | ||
3588 | 3584 | my ($self, $clause) = @_; | ||
3589 | 3585 | return unless $clause; | ||
3590 | 3586 | MKDEBUG && _d('Removing functions from clause:', $clause); | ||
3591 | 3587 | my @funcs; | ||
3592 | 3588 | $clause =~ s/$function_ident/replace_function($1, \@funcs)/eg; | ||
3593 | 3589 | MKDEBUG && _d('Function-stripped clause:', $clause, Dumper(\@funcs)); | ||
3594 | 3590 | return $clause, \@funcs; | ||
3595 | 3591 | } | ||
3596 | 3592 | |||
3597 | 3593 | sub parse_identifiers { | ||
3598 | 3594 | my ( $self, $idents ) = @_; | ||
3599 | 3595 | return unless $idents; | ||
3600 | 3596 | MKDEBUG && _d("Parsing identifiers"); | ||
3601 | 3597 | |||
3602 | 3598 | my @ident_parts; | ||
3603 | 3599 | foreach my $ident ( @$idents ) { | ||
3604 | 3600 | MKDEBUG && _d("Identifier:", $ident); | ||
3605 | 3601 | my $parts = {}; | ||
3606 | 3602 | |||
3607 | 3603 | if ( $ident =~ s/\s+(ASC|DESC)\s*$//i ) { | ||
3608 | 3604 | $parts->{sort} = uc $1; # XXX | ||
3609 | 3605 | } | ||
3610 | 3606 | |||
3611 | 3607 | if ( $ident =~ m/^\d+$/ ) { # Position like 5 | ||
3612 | 3608 | MKDEBUG && _d("Positional ident"); | ||
3613 | 3609 | $parts->{position} = $ident; | ||
3614 | 3610 | } | ||
3615 | 3611 | elsif ( $ident =~ m/^\w+\(/ ) { # Function like MIN(col) | ||
3616 | 3612 | MKDEBUG && _d("Expression ident"); | ||
3617 | 3613 | my ($func, $expr) = $ident =~ m/^(\w+)\(([^\)]*)\)/; | ||
3618 | 3614 | $parts->{function} = uc $func; | ||
3619 | 3615 | $parts->{expression} = $expr if $expr; | ||
3620 | 3616 | } | ||
3621 | 3617 | else { # Ref like (table.)column | ||
3622 | 3618 | MKDEBUG && _d("Table/column ident"); | ||
3623 | 3619 | my ($tbl, $col) = $self->split_unquote($ident); | ||
3624 | 3620 | $parts->{table} = $tbl if $tbl; | ||
3625 | 3621 | $parts->{column} = $col; | ||
3626 | 3622 | } | ||
3627 | 3623 | push @ident_parts, $parts; | ||
3628 | 3624 | } | ||
3629 | 3625 | |||
3630 | 3626 | return \@ident_parts; | ||
3631 | 3627 | } | ||
3632 | 3628 | |||
3633 | 3629 | sub parse_identifier { | ||
3634 | 3630 | my ( $self, $type, $ident ) = @_; | ||
3635 | 3631 | return unless $type && $ident; | ||
3636 | 3632 | MKDEBUG && _d("Parsing", $type, "identifier:", $ident); | ||
3637 | 3633 | |||
3638 | 3634 | if ( $ident =~ m/^\w+\(/ ) { # Function like MIN(col) | ||
3639 | 3635 | my ($func, $expr) = $ident =~ m/^(\w+)\(([^\)]*)\)/; | ||
3640 | 3636 | MKDEBUG && _d('Function', $func, 'arg', $expr); | ||
3641 | 3637 | return { col => $ident } unless $expr; # NOW() | ||
3642 | 3638 | $ident = $expr; # col from MAX(col) | ||
3643 | 3639 | } | ||
3644 | 3640 | |||
3645 | 3641 | my %ident_struct; | ||
3646 | 3642 | my @ident_parts = map { s/`//g; $_; } split /[.]/, $ident; | ||
3647 | 3643 | if ( @ident_parts == 3 ) { | ||
3648 | 3644 | @ident_struct{qw(db tbl col)} = @ident_parts; | ||
3649 | 3645 | } | ||
3650 | 3646 | elsif ( @ident_parts == 2 ) { | ||
3651 | 3647 | my @parts_for_type = $type eq 'column' ? qw(tbl col) | ||
3652 | 3648 | : $type eq 'table' ? qw(db tbl) | ||
3653 | 3649 | : die "Invalid identifier type: $type"; | ||
3654 | 3650 | @ident_struct{@parts_for_type} = @ident_parts; | ||
3655 | 3651 | } | ||
3656 | 3652 | elsif ( @ident_parts == 1 ) { | ||
3657 | 3653 | my $part = $type eq 'column' ? 'col' : 'tbl'; | ||
3658 | 3654 | @ident_struct{($part)} = @ident_parts; | ||
3659 | 3655 | } | ||
3660 | 3656 | else { | ||
3661 | 3657 | die "Invalid number of parts in $type reference: $ident"; | ||
3662 | 3658 | } | ||
3663 | 3659 | |||
3664 | 3660 | if ( $self->{SchemaQualifier} ) { | ||
3665 | 3661 | if ( $type eq 'column' && !$ident_struct{tbl} ) { | ||
3666 | 3662 | my $qcol = $self->{SchemaQualifier}->qualify_column( | ||
3667 | 3663 | column => $ident_struct{col}, | ||
3668 | 3664 | ); | ||
3669 | 3665 | $ident_struct{db} = $qcol->{db} if $qcol->{db}; | ||
3670 | 3666 | $ident_struct{tbl} = $qcol->{tbl} if $qcol->{tbl}; | ||
3671 | 3667 | } | ||
3672 | 3668 | elsif ( $type eq 'table' && !$ident_struct{db} ) { | ||
3673 | 3669 | my $db = $self->{SchemaQualifier}->get_database_for_table( | ||
3674 | 3670 | table => $ident_struct{tbl}, | ||
3675 | 3671 | ); | ||
3676 | 3672 | $ident_struct{db} = $db if $db; | ||
3677 | 3673 | } | ||
3678 | 3674 | } | ||
3679 | 3675 | |||
3680 | 3676 | MKDEBUG && _d($type, "identifier struct:", Dumper(\%ident_struct)); | ||
3681 | 3677 | return \%ident_struct; | ||
3682 | 3678 | } | ||
3683 | 3679 | |||
3684 | 3680 | sub split_unquote { | ||
3685 | 3681 | my ( $self, $db_tbl, $default_db ) = @_; | ||
3686 | 3682 | $db_tbl =~ s/`//g; | ||
3687 | 3683 | my ( $db, $tbl ) = split(/[.]/, $db_tbl); | ||
3688 | 3684 | if ( !$tbl ) { | ||
3689 | 3685 | $tbl = $db; | ||
3690 | 3686 | $db = $default_db; | ||
3691 | 3687 | } | ||
3692 | 3688 | return ($db, $tbl); | ||
3693 | 3689 | } | ||
3694 | 3690 | |||
3695 | 3691 | sub is_identifier { | ||
3696 | 3692 | my ( $self, $thing ) = @_; | ||
3697 | 3693 | |||
3698 | 3694 | return 0 unless $thing; | ||
3699 | 3695 | |||
3700 | 3696 | return 0 if $thing =~ m/\s*['"]/; | ||
3701 | 3697 | |||
3702 | 3698 | return 0 if $thing =~ m/^\s*\d+(?:\.\d+)?\s*$/; | ||
3703 | 3699 | |||
3704 | 3700 | return 0 if $thing =~ m/^\s*(?> | ||
3705 | 3701 | NULL | ||
3706 | 3702 | |DUAL | ||
3707 | 3703 | )\s*$/xi; | ||
3708 | 3704 | |||
3709 | 3705 | return 1 if $thing =~ m/^\s*$column_ident\s*$/; | ||
3710 | 3706 | |||
3711 | 3707 | return 0; | ||
3712 | 3708 | } | ||
3713 | 3709 | |||
3714 | 3710 | sub set_SchemaQualifier { | ||
3715 | 3711 | my ( $self, $sq ) = @_; | ||
3716 | 3712 | $self->{SchemaQualifier} = $sq; | ||
3717 | 3713 | return; | ||
3718 | 3714 | } | ||
3719 | 3715 | |||
3720 | 3716 | sub _d { | ||
3721 | 3717 | my ($package, undef, $line) = caller 0; | ||
3722 | 3718 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
3723 | 3719 | map { defined $_ ? $_ : 'undef' } | ||
3724 | 3720 | @_; | ||
3725 | 3721 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
3726 | 3722 | } | ||
3727 | 3723 | |||
3728 | 3724 | } # package scope | ||
3729 | 3725 | 1; | ||
3730 | 3726 | |||
3731 | 3727 | # ########################################################################### | ||
3732 | 3728 | # End SQLParser package | ||
3733 | 3729 | # ########################################################################### | ||
3734 | 3730 | |||
3735 | 3731 | # ########################################################################### | ||
3736 | 3732 | # TableUsage package | ||
3737 | 3733 | # This package is a copy without comments from the original. The original | ||
3738 | 3734 | # with comments and its test file can be found in the Bazaar repository at, | ||
3739 | 3735 | # lib/TableUsage.pm | ||
3740 | 3736 | # t/lib/TableUsage.t | ||
3741 | 3737 | # See https://launchpad.net/percona-toolkit for more information. | ||
3742 | 3738 | # ########################################################################### | ||
3743 | 3739 | { | ||
3744 | 3740 | package TableUsage; | ||
3745 | 3741 | |||
3746 | 3742 | { # package scope | ||
3747 | 3743 | use strict; | ||
3748 | 3744 | use warnings FATAL => 'all'; | ||
3749 | 3745 | use English qw(-no_match_vars); | ||
3750 | 3746 | |||
3751 | 3747 | use Data::Dumper; | ||
3752 | 3748 | $Data::Dumper::Indent = 1; | ||
3753 | 3749 | $Data::Dumper::Sortkeys = 1; | ||
3754 | 3750 | $Data::Dumper::Quotekeys = 0; | ||
3755 | 3751 | |||
3756 | 3752 | use constant MKDEBUG => $ENV{MKDEBUG} || 0; | ||
3757 | 3753 | |||
3758 | 3754 | sub new { | ||
3759 | 3755 | my ( $class, %args ) = @_; | ||
3760 | 3756 | my @required_args = qw(QueryParser SQLParser); | ||
3761 | 3757 | foreach my $arg ( @required_args ) { | ||
3762 | 3758 | die "I need a $arg argument" unless $args{$arg}; | ||
3763 | 3759 | } | ||
3764 | 3760 | |||
3765 | 3761 | my $self = { | ||
3766 | 3762 | constant_data_value => 'DUAL', | ||
3767 | 3763 | |||
3768 | 3764 | %args, | ||
3769 | 3765 | }; | ||
3770 | 3766 | |||
3771 | 3767 | return bless $self, $class; | ||
3772 | 3768 | } | ||
3773 | 3769 | |||
3774 | 3770 | sub get_table_usage { | ||
3775 | 3771 | my ( $self, %args ) = @_; | ||
3776 | 3772 | my @required_args = qw(query); | ||
3777 | 3773 | foreach my $arg ( @required_args ) { | ||
3778 | 3774 | die "I need a $arg argument" unless $args{$arg}; | ||
3779 | 3775 | } | ||
3780 | 3776 | my ($query) = @args{@required_args}; | ||
3781 | 3777 | MKDEBUG && _d('Getting table access for', | ||
3782 | 3778 | substr($query, 0, 100), (length $query > 100 ? '...' : '')); | ||
3783 | 3779 | |||
3784 | 3780 | $self->{errors} = []; | ||
3785 | 3781 | $self->{query_reparsed} = 0; # only explain extended once | ||
3786 | 3782 | $self->{ex_query_struct} = undef; # EXplain EXtended query struct | ||
3787 | 3783 | $self->{schemas} = undef; # db->tbl->cols from ^ | ||
3788 | 3784 | $self->{table_for} = undef; # table alias from ^ | ||
3789 | 3785 | |||
3790 | 3786 | my $tables; | ||
3791 | 3787 | my $query_struct; | ||
3792 | 3788 | eval { | ||
3793 | 3789 | $query_struct = $self->{SQLParser}->parse($query); | ||
3794 | 3790 | }; | ||
3795 | 3791 | if ( $EVAL_ERROR ) { | ||
3796 | 3792 | MKDEBUG && _d('Failed to parse query with SQLParser:', $EVAL_ERROR); | ||
3797 | 3793 | if ( $EVAL_ERROR =~ m/Cannot parse/ ) { | ||
3798 | 3794 | $tables = $self->_get_tables_used_from_query_parser(%args); | ||
3799 | 3795 | } | ||
3800 | 3796 | else { | ||
3801 | 3797 | die $EVAL_ERROR; | ||
3802 | 3798 | } | ||
3803 | 3799 | } | ||
3804 | 3800 | else { | ||
3805 | 3801 | $tables = $self->_get_tables_used_from_query_struct( | ||
3806 | 3802 | query_struct => $query_struct, | ||
3807 | 3803 | %args, | ||
3808 | 3804 | ); | ||
3809 | 3805 | } | ||
3810 | 3806 | |||
3811 | 3807 | MKDEBUG && _d('Query table usage:', Dumper($tables)); | ||
3812 | 3808 | return $tables; | ||
3813 | 3809 | } | ||
3814 | 3810 | |||
3815 | 3811 | sub errors { | ||
3816 | 3812 | my ($self) = @_; | ||
3817 | 3813 | return $self->{errors}; | ||
3818 | 3814 | } | ||
3819 | 3815 | |||
3820 | 3816 | sub _get_tables_used_from_query_parser { | ||
3821 | 3817 | my ( $self, %args ) = @_; | ||
3822 | 3818 | my @required_args = qw(query); | ||
3823 | 3819 | foreach my $arg ( @required_args ) { | ||
3824 | 3820 | die "I need a $arg argument" unless $args{$arg}; | ||
3825 | 3821 | } | ||
3826 | 3822 | my ($query) = @args{@required_args}; | ||
3827 | 3823 | MKDEBUG && _d('Getting tables used from query parser'); | ||
3828 | 3824 | |||
3829 | 3825 | $query = $self->{QueryParser}->clean_query($query); | ||
3830 | 3826 | my ($query_type) = $query =~ m/^\s*(\w+)\s+/; | ||
3831 | 3827 | $query_type = uc $query_type; | ||
3832 | 3828 | die "Query does not begin with a word" unless $query_type; # shouldn't happen | ||
3833 | 3829 | |||
3834 | 3830 | if ( $query_type eq 'DROP' ) { | ||
3835 | 3831 | my ($drop_what) = $query =~ m/^\s*DROP\s+(\w+)\s+/i; | ||
3836 | 3832 | die "Invalid DROP query: $query" unless $drop_what; | ||
3837 | 3833 | $query_type .= '_' . uc($drop_what); | ||
3838 | 3834 | } | ||
3839 | 3835 | |||
3840 | 3836 | my @tables_used; | ||
3841 | 3837 | foreach my $table ( $self->{QueryParser}->get_tables($query) ) { | ||
3842 | 3838 | $table =~ s/`//g; | ||
3843 | 3839 | push @{$tables_used[0]}, { | ||
3844 | 3840 | table => $table, | ||
3845 | 3841 | context => $query_type, | ||
3846 | 3842 | }; | ||
3847 | 3843 | } | ||
3848 | 3844 | |||
3849 | 3845 | return \@tables_used; | ||
3850 | 3846 | } | ||
3851 | 3847 | |||
3852 | 3848 | sub _get_tables_used_from_query_struct { | ||
3853 | 3849 | my ( $self, %args ) = @_; | ||
3854 | 3850 | my @required_args = qw(query_struct query); | ||
3855 | 3851 | foreach my $arg ( @required_args ) { | ||
3856 | 3852 | die "I need a $arg argument" unless $args{$arg}; | ||
3857 | 3853 | } | ||
3858 | 3854 | my ($query_struct) = @args{@required_args}; | ||
3859 | 3855 | |||
3860 | 3856 | MKDEBUG && _d('Getting table used from query struct'); | ||
3861 | 3857 | |||
3862 | 3858 | my $query_type = uc $query_struct->{type}; | ||
3863 | 3859 | |||
3864 | 3860 | if ( $query_type eq 'CREATE' ) { | ||
3865 | 3861 | MKDEBUG && _d('CREATE query'); | ||
3866 | 3862 | my $sel_tables; | ||
3867 | 3863 | if ( my $sq_struct = $query_struct->{subqueries}->[0] ) { | ||
3868 | 3864 | MKDEBUG && _d('CREATE query with SELECT'); | ||
3869 | 3865 | $sel_tables = $self->_get_tables_used_from_query_struct( | ||
3870 | 3866 | %args, | ||
3871 | 3867 | query => $sq_struct->{query}, | ||
3872 | 3868 | query_struct => $sq_struct, | ||
3873 | 3869 | ); | ||
3874 | 3870 | } | ||
3875 | 3871 | return [ | ||
3876 | 3872 | [ | ||
3877 | 3873 | { | ||
3878 | 3874 | context => 'CREATE', | ||
3879 | 3875 | table => $query_struct->{name}, | ||
3880 | 3876 | }, | ||
3881 | 3877 | ($sel_tables ? @{$sel_tables->[0]} : ()), | ||
3882 | 3878 | ], | ||
3883 | 3879 | ]; | ||
3884 | 3880 | } | ||
3885 | 3881 | |||
3886 | 3882 | my $tables = $self->_get_tables($query_struct); | ||
3887 | 3883 | if ( !$tables || @$tables == 0 ) { | ||
3888 | 3884 | MKDEBUG && _d("Query does not use any tables"); | ||
3889 | 3885 | return [ | ||
3890 | 3886 | [ { context => $query_type, table => $self->{constant_data_value} } ] | ||
3891 | 3887 | ]; | ||
3892 | 3888 | } | ||
3893 | 3889 | |||
3894 | 3890 | my ($where, $ambig); | ||
3895 | 3891 | if ( $query_struct->{where} ) { | ||
3896 | 3892 | ($where, $ambig) = $self->_get_tables_used_in_where( | ||
3897 | 3893 | %args, | ||
3898 | 3894 | tables => $tables, | ||
3899 | 3895 | where => $query_struct->{where}, | ||
3900 | 3896 | ); | ||
3901 | 3897 | |||
3902 | 3898 | if ( $ambig && $self->{dbh} && !$self->{query_reparsed} ) { | ||
3903 | 3899 | MKDEBUG && _d("Using EXPLAIN EXTENDED to disambiguate columns"); | ||
3904 | 3900 | if ( $self->_reparse_query(%args) ) { | ||
3905 | 3901 | return $self->_get_tables_used_from_query_struct(%args); | ||
3906 | 3902 | } | ||
3907 | 3903 | MKDEBUG && _d('Failed to disambiguate columns'); | ||
3908 | 3904 | } | ||
3909 | 3905 | } | ||
3910 | 3906 | |||
3911 | 3907 | my @tables_used; | ||
3912 | 3908 | if ( $query_type eq 'UPDATE' && @{$query_struct->{tables}} > 1 ) { | ||
3913 | 3909 | MKDEBUG && _d("Multi-table UPDATE"); | ||
3914 | 3910 | |||
3915 | 3911 | my @join_tables; | ||
3916 | 3912 | foreach my $table ( @$tables ) { | ||
3917 | 3913 | my $table = $self->_qualify_table_name( | ||
3918 | 3914 | %args, | ||
3919 | 3915 | tables => $tables, | ||
3920 | 3916 | db => $table->{db}, | ||
3921 | 3917 | tbl => $table->{tbl}, | ||
3922 | 3918 | ); | ||
3923 | 3919 | my $table_usage = { | ||
3924 | 3920 | context => 'JOIN', | ||
3925 | 3921 | table => $table, | ||
3926 | 3922 | }; | ||
3927 | 3923 | MKDEBUG && _d("Table usage from TLIST:", Dumper($table_usage)); | ||
3928 | 3924 | push @join_tables, $table_usage; | ||
3929 | 3925 | } | ||
3930 | 3926 | if ( $where && $where->{joined_tables} ) { | ||
3931 | 3927 | foreach my $table ( @{$where->{joined_tables}} ) { | ||
3932 | 3928 | my $table_usage = { | ||
3933 | 3929 | context => $query_type, | ||
3934 | 3930 | table => $table, | ||
3935 | 3931 | }; | ||
3936 | 3932 | MKDEBUG && _d("Table usage from WHERE (implicit join):", | ||
3937 | 3933 | Dumper($table_usage)); | ||
3938 | 3934 | push @join_tables, $table_usage; | ||
3939 | 3935 | } | ||
3940 | 3936 | } | ||
3941 | 3937 | |||
3942 | 3938 | my @where_tables; | ||
3943 | 3939 | if ( $where && $where->{filter_tables} ) { | ||
3944 | 3940 | foreach my $table ( @{$where->{filter_tables}} ) { | ||
3945 | 3941 | my $table_usage = { | ||
3946 | 3942 | context => 'WHERE', | ||
3947 | 3943 | table => $table, | ||
3948 | 3944 | }; | ||
3949 | 3945 | MKDEBUG && _d("Table usage from WHERE:", Dumper($table_usage)); | ||
3950 | 3946 | push @where_tables, $table_usage; | ||
3951 | 3947 | } | ||
3952 | 3948 | } | ||
3953 | 3949 | |||
3954 | 3950 | my $set_tables = $self->_get_tables_used_in_set( | ||
3955 | 3951 | %args, | ||
3956 | 3952 | tables => $tables, | ||
3957 | 3953 | set => $query_struct->{set}, | ||
3958 | 3954 | ); | ||
3959 | 3955 | foreach my $table ( @$set_tables ) { | ||
3960 | 3956 | my @table_usage = ( | ||
3961 | 3957 | { # the written table | ||
3962 | 3958 | context => 'UPDATE', | ||
3963 | 3959 | table => $table->{table}, | ||
3964 | 3960 | }, | ||
3965 | 3961 | { # source of data written to the written table | ||
3966 | 3962 | context => 'SELECT', | ||
3967 | 3963 | table => $table->{value}, | ||
3968 | 3964 | }, | ||
3969 | 3965 | ); | ||
3970 | 3966 | MKDEBUG && _d("Table usage from UPDATE SET:", Dumper(\@table_usage)); | ||
3971 | 3967 | push @tables_used, [ | ||
3972 | 3968 | @table_usage, | ||
3973 | 3969 | @join_tables, | ||
3974 | 3970 | @where_tables, | ||
3975 | 3971 | ]; | ||
3976 | 3972 | } | ||
3977 | 3973 | } # multi-table UPDATE | ||
3978 | 3974 | else { | ||
3979 | 3975 | if ( $query_type eq 'SELECT' ) { | ||
3980 | 3976 | my ($clist_tables, $ambig) = $self->_get_tables_used_in_columns( | ||
3981 | 3977 | %args, | ||
3982 | 3978 | tables => $tables, | ||
3983 | 3979 | columns => $query_struct->{columns}, | ||
3984 | 3980 | ); | ||
3985 | 3981 | |||
3986 | 3982 | if ( $ambig && $self->{dbh} && !$self->{query_reparsed} ) { | ||
3987 | 3983 | MKDEBUG && _d("Using EXPLAIN EXTENDED to disambiguate columns"); | ||
3988 | 3984 | if ( $self->_reparse_query(%args) ) { | ||
3989 | 3985 | return $self->_get_tables_used_from_query_struct(%args); | ||
3990 | 3986 | } | ||
3991 | 3987 | MKDEBUG && _d('Failed to disambiguate columns'); | ||
3992 | 3988 | } | ||
3993 | 3989 | |||
3994 | 3990 | foreach my $table ( @$clist_tables ) { | ||
3995 | 3991 | my $table_usage = { | ||
3996 | 3992 | context => 'SELECT', | ||
3997 | 3993 | table => $table, | ||
3998 | 3994 | }; | ||
3999 | 3995 | MKDEBUG && _d("Table usage from CLIST:", Dumper($table_usage)); | ||
4000 | 3996 | push @{$tables_used[0]}, $table_usage; | ||
4001 | 3997 | } | ||
4002 | 3998 | } | ||
4003 | 3999 | |||
4004 | 4000 | if ( @$tables > 1 || $query_type ne 'SELECT' ) { | ||
4005 | 4001 | my $default_context = @$tables > 1 ? 'TLIST' : $query_type; | ||
4006 | 4002 | foreach my $table ( @$tables ) { | ||
4007 | 4003 | my $qualified_table = $self->_qualify_table_name( | ||
4008 | 4004 | %args, | ||
4009 | 4005 | tables => $tables, | ||
4010 | 4006 | db => $table->{db}, | ||
4011 | 4007 | tbl => $table->{tbl}, | ||
4012 | 4008 | ); | ||
4013 | 4009 | |||
4014 | 4010 | my $context = $default_context; | ||
4015 | 4011 | if ( $table->{join} && $table->{join}->{condition} ) { | ||
4016 | 4012 | $context = 'JOIN'; | ||
4017 | 4013 | if ( $table->{join}->{condition} eq 'using' ) { | ||
4018 | 4014 | MKDEBUG && _d("Table joined with USING condition"); | ||
4019 | 4015 | my $joined_table = $self->_qualify_table_name( | ||
4020 | 4016 | %args, | ||
4021 | 4017 | tables => $tables, | ||
4022 | 4018 | tbl => $table->{join}->{to}, | ||
4023 | 4019 | ); | ||
4024 | 4020 | $self->_change_context( | ||
4025 | 4021 | tables => $tables, | ||
4026 | 4022 | table => $joined_table, | ||
4027 | 4023 | tables_used => $tables_used[0], | ||
4028 | 4024 | old_context => 'TLIST', | ||
4029 | 4025 | new_context => 'JOIN', | ||
4030 | 4026 | ); | ||
4031 | 4027 | } | ||
4032 | 4028 | elsif ( $table->{join}->{condition} eq 'on' ) { | ||
4033 | 4029 | MKDEBUG && _d("Table joined with ON condition"); | ||
4034 | 4030 | my ($on_tables, $ambig) = $self->_get_tables_used_in_where( | ||
4035 | 4031 | %args, | ||
4036 | 4032 | tables => $tables, | ||
4037 | 4033 | where => $table->{join}->{where}, | ||
4038 | 4034 | clause => 'JOIN condition', # just for debugging | ||
4039 | 4035 | ); | ||
4040 | 4036 | MKDEBUG && _d("JOIN ON tables:", Dumper($on_tables)); | ||
4041 | 4037 | |||
4042 | 4038 | if ( $ambig && $self->{dbh} && !$self->{query_reparsed} ) { | ||
4043 | 4039 | MKDEBUG && _d("Using EXPLAIN EXTENDED", | ||
4044 | 4040 | "to disambiguate columns"); | ||
4045 | 4041 | if ( $self->_reparse_query(%args) ) { | ||
4046 | 4042 | return $self->_get_tables_used_from_query_struct(%args); | ||
4047 | 4043 | } | ||
4048 | 4044 | MKDEBUG && _d('Failed to disambiguate columns'); | ||
4049 | 4045 | } | ||
4050 | 4046 | |||
4051 | 4047 | foreach my $joined_table ( @{$on_tables->{joined_tables}} ) { | ||
4052 | 4048 | $self->_change_context( | ||
4053 | 4049 | tables => $tables, | ||
4054 | 4050 | table => $joined_table, | ||
4055 | 4051 | tables_used => $tables_used[0], | ||
4056 | 4052 | old_context => 'TLIST', | ||
4057 | 4053 | new_context => 'JOIN', | ||
4058 | 4054 | ); | ||
4059 | 4055 | } | ||
4060 | 4056 | } | ||
4061 | 4057 | else { | ||
4062 | 4058 | warn "Unknown JOIN condition: $table->{join}->{condition}"; | ||
4063 | 4059 | } | ||
4064 | 4060 | } | ||
4065 | 4061 | |||
4066 | 4062 | my $table_usage = { | ||
4067 | 4063 | context => $context, | ||
4068 | 4064 | table => $qualified_table, | ||
4069 | 4065 | }; | ||
4070 | 4066 | MKDEBUG && _d("Table usage from TLIST:", Dumper($table_usage)); | ||
4071 | 4067 | push @{$tables_used[0]}, $table_usage; | ||
4072 | 4068 | } | ||
4073 | 4069 | } | ||
4074 | 4070 | |||
4075 | 4071 | if ( $where && $where->{joined_tables} ) { | ||
4076 | 4072 | foreach my $joined_table ( @{$where->{joined_tables}} ) { | ||
4077 | 4073 | MKDEBUG && _d("Table joined implicitly in WHERE:", $joined_table); | ||
4078 | 4074 | $self->_change_context( | ||
4079 | 4075 | tables => $tables, | ||
4080 | 4076 | table => $joined_table, | ||
4081 | 4077 | tables_used => $tables_used[0], | ||
4082 | 4078 | old_context => 'TLIST', | ||
4083 | 4079 | new_context => 'JOIN', | ||
4084 | 4080 | ); | ||
4085 | 4081 | } | ||
4086 | 4082 | } | ||
4087 | 4083 | |||
4088 | 4084 | if ( $query_type =~ m/(?:INSERT|REPLACE)/ ) { | ||
4089 | 4085 | if ( $query_struct->{select} ) { | ||
4090 | 4086 | MKDEBUG && _d("Getting tables used in INSERT-SELECT"); | ||
4091 | 4087 | my $select_tables = $self->_get_tables_used_from_query_struct( | ||
4092 | 4088 | %args, | ||
4093 | 4089 | query_struct => $query_struct->{select}, | ||
4094 | 4090 | ); | ||
4095 | 4091 | push @{$tables_used[0]}, @{$select_tables->[0]}; | ||
4096 | 4092 | } | ||
4097 | 4093 | else { | ||
4098 | 4094 | my $table_usage = { | ||
4099 | 4095 | context => 'SELECT', | ||
4100 | 4096 | table => $self->{constant_data_value}, | ||
4101 | 4097 | }; | ||
4102 | 4098 | MKDEBUG && _d("Table usage from SET/VALUES:", Dumper($table_usage)); | ||
4103 | 4099 | push @{$tables_used[0]}, $table_usage; | ||
4104 | 4100 | } | ||
4105 | 4101 | } | ||
4106 | 4102 | elsif ( $query_type eq 'UPDATE' ) { | ||
4107 | 4103 | my $set_tables = $self->_get_tables_used_in_set( | ||
4108 | 4104 | %args, | ||
4109 | 4105 | tables => $tables, | ||
4110 | 4106 | set => $query_struct->{set}, | ||
4111 | 4107 | ); | ||
4112 | 4108 | foreach my $table ( @$set_tables ) { | ||
4113 | 4109 | my $table_usage = { | ||
4114 | 4110 | context => 'SELECT', | ||
4115 | 4111 | table => $table->{value_is_table} ? $table->{table} | ||
4116 | 4112 | : $self->{constant_data_value}, | ||
4117 | 4113 | }; | ||
4118 | 4114 | MKDEBUG && _d("Table usage from SET:", Dumper($table_usage)); | ||
4119 | 4115 | push @{$tables_used[0]}, $table_usage; | ||
4120 | 4116 | } | ||
4121 | 4117 | } | ||
4122 | 4118 | |||
4123 | 4119 | if ( $where && $where->{filter_tables} ) { | ||
4124 | 4120 | foreach my $table ( @{$where->{filter_tables}} ) { | ||
4125 | 4121 | my $table_usage = { | ||
4126 | 4122 | context => 'WHERE', | ||
4127 | 4123 | table => $table, | ||
4128 | 4124 | }; | ||
4129 | 4125 | MKDEBUG && _d("Table usage from WHERE:", Dumper($table_usage)); | ||
4130 | 4126 | push @{$tables_used[0]}, $table_usage; | ||
4131 | 4127 | } | ||
4132 | 4128 | } | ||
4133 | 4129 | } | ||
4134 | 4130 | |||
4135 | 4131 | return \@tables_used; | ||
4136 | 4132 | } | ||
4137 | 4133 | |||
4138 | 4134 | sub _get_tables_used_in_columns { | ||
4139 | 4135 | my ( $self, %args ) = @_; | ||
4140 | 4136 | my @required_args = qw(tables columns); | ||
4141 | 4137 | foreach my $arg ( @required_args ) { | ||
4142 | 4138 | die "I need a $arg argument" unless $args{$arg}; | ||
4143 | 4139 | } | ||
4144 | 4140 | my ($tables, $columns) = @args{@required_args}; | ||
4145 | 4141 | |||
4146 | 4142 | MKDEBUG && _d("Getting tables used in CLIST"); | ||
4147 | 4143 | my @tables; | ||
4148 | 4144 | my $ambig = 0; # found any ambiguous columns? | ||
4149 | 4145 | if ( @$tables == 1 ) { | ||
4150 | 4146 | MKDEBUG && _d("Single table SELECT:", $tables->[0]->{tbl}); | ||
4151 | 4147 | my $table = $self->_qualify_table_name( | ||
4152 | 4148 | %args, | ||
4153 | 4149 | db => $tables->[0]->{db}, | ||
4154 | 4150 | tbl => $tables->[0]->{tbl}, | ||
4155 | 4151 | ); | ||
4156 | 4152 | @tables = ($table); | ||
4157 | 4153 | } | ||
4158 | 4154 | elsif ( @$columns == 1 && $columns->[0]->{col} eq '*' ) { | ||
4159 | 4155 | if ( $columns->[0]->{tbl} ) { | ||
4160 | 4156 | MKDEBUG && _d("SELECT all columns from one table"); | ||
4161 | 4157 | my $table = $self->_qualify_table_name( | ||
4162 | 4158 | %args, | ||
4163 | 4159 | db => $columns->[0]->{db}, | ||
4164 | 4160 | tbl => $columns->[0]->{tbl}, | ||
4165 | 4161 | ); | ||
4166 | 4162 | @tables = ($table); | ||
4167 | 4163 | } | ||
4168 | 4164 | else { | ||
4169 | 4165 | MKDEBUG && _d("SELECT all columns from all tables"); | ||
4170 | 4166 | foreach my $table ( @$tables ) { | ||
4171 | 4167 | my $table = $self->_qualify_table_name( | ||
4172 | 4168 | %args, | ||
4173 | 4169 | tables => $tables, | ||
4174 | 4170 | db => $table->{db}, | ||
4175 | 4171 | tbl => $table->{tbl}, | ||
4176 | 4172 | ); | ||
4177 | 4173 | push @tables, $table; | ||
4178 | 4174 | } | ||
4179 | 4175 | } | ||
4180 | 4176 | } | ||
4181 | 4177 | else { | ||
4182 | 4178 | MKDEBUG && _d(scalar @$tables, "table SELECT"); | ||
4183 | 4179 | my %seen; | ||
4184 | 4180 | my $colno = 0; | ||
4185 | 4181 | COLUMN: | ||
4186 | 4182 | foreach my $column ( @$columns ) { | ||
4187 | 4183 | MKDEBUG && _d('Getting table for column', Dumper($column)); | ||
4188 | 4184 | if ( $column->{col} eq '*' && !$column->{tbl} ) { | ||
4189 | 4185 | MKDEBUG && _d('Ignoring FUNC(*) column'); | ||
4190 | 4186 | $colno++; | ||
4191 | 4187 | next; | ||
4192 | 4188 | } | ||
4193 | 4189 | $column = $self->_ex_qualify_column( | ||
4194 | 4190 | col => $column, | ||
4195 | 4191 | colno => $colno, | ||
4196 | 4192 | n_cols => scalar @$columns, | ||
4197 | 4193 | ); | ||
4198 | 4194 | if ( !$column->{tbl} ) { | ||
4199 | 4195 | MKDEBUG && _d("Column", $column->{col}, "is not table-qualified;", | ||
4200 | 4196 | "and query has multiple tables; cannot determine its table"); | ||
4201 | 4197 | $ambig++; | ||
4202 | 4198 | next COLUMN; | ||
4203 | 4199 | } | ||
4204 | 4200 | my $table = $self->_qualify_table_name( | ||
4205 | 4201 | %args, | ||
4206 | 4202 | db => $column->{db}, | ||
4207 | 4203 | tbl => $column->{tbl}, | ||
4208 | 4204 | ); | ||
4209 | 4205 | push @tables, $table if $table && !$seen{$table}++; | ||
4210 | 4206 | $colno++; | ||
4211 | 4207 | } | ||
4212 | 4208 | } | ||
4213 | 4209 | |||
4214 | 4210 | return (\@tables, $ambig); | ||
4215 | 4211 | } | ||
4216 | 4212 | |||
4217 | 4213 | sub _get_tables_used_in_where { | ||
4218 | 4214 | my ( $self, %args ) = @_; | ||
4219 | 4215 | my @required_args = qw(tables where); | ||
4220 | 4216 | foreach my $arg ( @required_args ) { | ||
4221 | 4217 | die "I need a $arg argument" unless $args{$arg}; | ||
4222 | 4218 | } | ||
4223 | 4219 | my ($tables, $where) = @args{@required_args}; | ||
4224 | 4220 | my $sql_parser = $self->{SQLParser}; | ||
4225 | 4221 | |||
4226 | 4222 | MKDEBUG && _d("Getting tables used in", $args{clause} || 'WHERE'); | ||
4227 | 4223 | |||
4228 | 4224 | my %filter_tables; | ||
4229 | 4225 | my %join_tables; | ||
4230 | 4226 | my $ambig = 0; # found any ambiguous tables? | ||
4231 | 4227 | CONDITION: | ||
4232 | 4228 | foreach my $cond ( @$where ) { | ||
4233 | 4229 | MKDEBUG && _d("Condition:", Dumper($cond)); | ||
4234 | 4230 | my @tables; # tables used in this condition | ||
4235 | 4231 | my $n_vals = 0; | ||
4236 | 4232 | my $is_constant = 0; | ||
4237 | 4233 | my $unknown_table = 0; | ||
4238 | 4234 | ARG: | ||
4239 | 4235 | foreach my $arg ( qw(left_arg right_arg) ) { | ||
4240 | 4236 | if ( !defined $cond->{$arg} ) { | ||
4241 | 4237 | MKDEBUG && _d($arg, "is a constant value"); | ||
4242 | 4238 | $is_constant = 1; | ||
4243 | 4239 | next ARG; | ||
4244 | 4240 | } | ||
4245 | 4241 | |||
4246 | 4242 | if ( $sql_parser->is_identifier($cond->{$arg}) ) { | ||
4247 | 4243 | MKDEBUG && _d($arg, "is an identifier"); | ||
4248 | 4244 | my $ident_struct = $sql_parser->parse_identifier( | ||
4249 | 4245 | 'column', | ||
4250 | 4246 | $cond->{$arg} | ||
4251 | 4247 | ); | ||
4252 | 4248 | $ident_struct = $self->_ex_qualify_column( | ||
4253 | 4249 | col => $ident_struct, | ||
4254 | 4250 | where_arg => $arg, | ||
4255 | 4251 | ); | ||
4256 | 4252 | if ( !$ident_struct->{tbl} ) { | ||
4257 | 4253 | if ( @$tables == 1 ) { | ||
4258 | 4254 | MKDEBUG && _d("Condition column is not table-qualified; ", | ||
4259 | 4255 | "using query's only table:", $tables->[0]->{tbl}); | ||
4260 | 4256 | $ident_struct->{tbl} = $tables->[0]->{tbl}; | ||
4261 | 4257 | } | ||
4262 | 4258 | else { | ||
4263 | 4259 | MKDEBUG && _d("Condition column is not table-qualified and", | ||
4264 | 4260 | "query has multiple tables; cannot determine its table"); | ||
4265 | 4261 | if ( $cond->{$arg} !~ m/\w+\(/ # not a function | ||
4266 | 4262 | && $cond->{$arg} !~ m/^[\d.]+$/) { # not a number | ||
4267 | 4263 | $unknown_table = 1; | ||
4268 | 4264 | } | ||
4269 | 4265 | $ambig++; | ||
4270 | 4266 | next ARG; | ||
4271 | 4267 | } | ||
4272 | 4268 | } | ||
4273 | 4269 | |||
4274 | 4270 | if ( !$ident_struct->{db} && @$tables == 1 && $tables->[0]->{db} ) { | ||
4275 | 4271 | MKDEBUG && _d("Condition column is not database-qualified; ", | ||
4276 | 4272 | "using its table's database:", $tables->[0]->{db}); | ||
4277 | 4273 | $ident_struct->{db} = $tables->[0]->{db}; | ||
4278 | 4274 | } | ||
4279 | 4275 | |||
4280 | 4276 | my $table = $self->_qualify_table_name( | ||
4281 | 4277 | %args, | ||
4282 | 4278 | %$ident_struct, | ||
4283 | 4279 | ); | ||
4284 | 4280 | if ( $table ) { | ||
4285 | 4281 | push @tables, $table; | ||
4286 | 4282 | } | ||
4287 | 4283 | } | ||
4288 | 4284 | else { | ||
4289 | 4285 | MKDEBUG && _d($arg, "is a value"); | ||
4290 | 4286 | $n_vals++; | ||
4291 | 4287 | } | ||
4292 | 4288 | } # ARG | ||
4293 | 4289 | |||
4294 | 4290 | if ( $is_constant || $n_vals == 2 ) { | ||
4295 | 4291 | MKDEBUG && _d("Condition is a constant or two values"); | ||
4296 | 4292 | $filter_tables{$self->{constant_data_value}} = undef; | ||
4297 | 4293 | } | ||
4298 | 4294 | else { | ||
4299 | 4295 | if ( @tables == 1 ) { | ||
4300 | 4296 | if ( $unknown_table ) { | ||
4301 | 4297 | MKDEBUG && _d("Condition joins table", | ||
4302 | 4298 | $tables[0], "to column from unknown table"); | ||
4303 | 4299 | $join_tables{$tables[0]} = undef; | ||
4304 | 4300 | } | ||
4305 | 4301 | else { | ||
4306 | 4302 | MKDEBUG && _d("Condition filters table", $tables[0]); | ||
4307 | 4303 | $filter_tables{$tables[0]} = undef; | ||
4308 | 4304 | } | ||
4309 | 4305 | } | ||
4310 | 4306 | elsif ( @tables == 2 ) { | ||
4311 | 4307 | MKDEBUG && _d("Condition joins tables", | ||
4312 | 4308 | $tables[0], "and", $tables[1]); | ||
4313 | 4309 | $join_tables{$tables[0]} = undef; | ||
4314 | 4310 | $join_tables{$tables[1]} = undef; | ||
4315 | 4311 | } | ||
4316 | 4312 | } | ||
4317 | 4313 | } # CONDITION | ||
4318 | 4314 | |||
4319 | 4315 | return ( | ||
4320 | 4316 | { | ||
4321 | 4317 | filter_tables => [ sort keys %filter_tables ], | ||
4322 | 4318 | joined_tables => [ sort keys %join_tables ], | ||
4323 | 4319 | }, | ||
4324 | 4320 | $ambig, | ||
4325 | 4321 | ); | ||
4326 | 4322 | } | ||
4327 | 4323 | |||
4328 | 4324 | sub _get_tables_used_in_set { | ||
4329 | 4325 | my ( $self, %args ) = @_; | ||
4330 | 4326 | my @required_args = qw(tables set); | ||
4331 | 4327 | foreach my $arg ( @required_args ) { | ||
4332 | 4328 | die "I need a $arg argument" unless $args{$arg}; | ||
4333 | 4329 | } | ||
4334 | 4330 | my ($tables, $set) = @args{@required_args}; | ||
4335 | 4331 | my $sql_parser = $self->{SQLParser}; | ||
4336 | 4332 | |||
4337 | 4333 | MKDEBUG && _d("Getting tables used in SET"); | ||
4338 | 4334 | |||
4339 | 4335 | my @tables; | ||
4340 | 4336 | if ( @$tables == 1 ) { | ||
4341 | 4337 | my $table = $self->_qualify_table_name( | ||
4342 | 4338 | %args, | ||
4343 | 4339 | db => $tables->[0]->{db}, | ||
4344 | 4340 | tbl => $tables->[0]->{tbl}, | ||
4345 | 4341 | ); | ||
4346 | 4342 | $tables[0] = { | ||
4347 | 4343 | table => $table, | ||
4348 | 4344 | value => $self->{constant_data_value} | ||
4349 | 4345 | }; | ||
4350 | 4346 | } | ||
4351 | 4347 | else { | ||
4352 | 4348 | foreach my $cond ( @$set ) { | ||
4353 | 4349 | next unless $cond->{tbl}; | ||
4354 | 4350 | my $table = $self->_qualify_table_name( | ||
4355 | 4351 | %args, | ||
4356 | 4352 | db => $cond->{db}, | ||
4357 | 4353 | tbl => $cond->{tbl}, | ||
4358 | 4354 | ); | ||
4359 | 4355 | |||
4360 | 4356 | my $value = $self->{constant_data_value}; | ||
4361 | 4357 | my $value_is_table = 0; | ||
4362 | 4358 | if ( $sql_parser->is_identifier($cond->{value}) ) { | ||
4363 | 4359 | my $ident_struct = $sql_parser->parse_identifier( | ||
4364 | 4360 | 'column', | ||
4365 | 4361 | $cond->{value}, | ||
4366 | 4362 | ); | ||
4367 | 4363 | $value_is_table = 1; | ||
4368 | 4364 | $value = $self->_qualify_table_name( | ||
4369 | 4365 | %args, | ||
4370 | 4366 | db => $ident_struct->{db}, | ||
4371 | 4367 | tbl => $ident_struct->{tbl}, | ||
4372 | 4368 | ); | ||
4373 | 4369 | } | ||
4374 | 4370 | |||
4375 | 4371 | push @tables, { | ||
4376 | 4372 | table => $table, | ||
4377 | 4373 | value => $value, | ||
4378 | 4374 | value_is_table => $value_is_table, | ||
4379 | 4375 | }; | ||
4380 | 4376 | } | ||
4381 | 4377 | } | ||
4382 | 4378 | |||
4383 | 4379 | return \@tables; | ||
4384 | 4380 | } | ||
4385 | 4381 | |||
4386 | 4382 | sub _get_real_table_name { | ||
4387 | 4383 | my ( $self, %args ) = @_; | ||
4388 | 4384 | my @required_args = qw(tables name); | ||
4389 | 4385 | foreach my $arg ( @required_args ) { | ||
4390 | 4386 | die "I need a $arg argument" unless $args{$arg}; | ||
4391 | 4387 | } | ||
4392 | 4388 | my ($tables, $name) = @args{@required_args}; | ||
4393 | 4389 | $name = lc $name; | ||
4394 | 4390 | |||
4395 | 4391 | foreach my $table ( @$tables ) { | ||
4396 | 4392 | if ( lc($table->{tbl}) eq $name | ||
4397 | 4393 | || lc($table->{alias} || "") eq $name ) { | ||
4398 | 4394 | MKDEBUG && _d("Real table name for", $name, "is", $table->{tbl}); | ||
4399 | 4395 | return $table->{tbl}; | ||
4400 | 4396 | } | ||
4401 | 4397 | } | ||
4402 | 4398 | MKDEBUG && _d("Table", $name, "does not exist in query"); | ||
4403 | 4399 | return; | ||
4404 | 4400 | } | ||
4405 | 4401 | |||
4406 | 4402 | sub _qualify_table_name { | ||
4407 | 4403 | my ( $self, %args) = @_; | ||
4408 | 4404 | my @required_args = qw(tables tbl); | ||
4409 | 4405 | foreach my $arg ( @required_args ) { | ||
4410 | 4406 | die "I need a $arg argument" unless $args{$arg}; | ||
4411 | 4407 | } | ||
4412 | 4408 | my ($tables, $table) = @args{@required_args}; | ||
4413 | 4409 | |||
4414 | 4410 | MKDEBUG && _d("Qualifying table with database:", $table); | ||
4415 | 4411 | |||
4416 | 4412 | my ($tbl, $db) = reverse split /[.]/, $table; | ||
4417 | 4413 | |||
4418 | 4414 | if ( $self->{ex_query_struct} ) { | ||
4419 | 4415 | $tables = $self->{ex_query_struct}->{from}; | ||
4420 | 4416 | } | ||
4421 | 4417 | |||
4422 | 4418 | $tbl = $self->_get_real_table_name(tables => $tables, name => $tbl); | ||
4423 | 4419 | return unless $tbl; # shouldn't happen | ||
4424 | 4420 | |||
4425 | 4421 | my $db_tbl; | ||
4426 | 4422 | |||
4427 | 4423 | if ( $db ) { | ||
4428 | 4424 | $db_tbl = "$db.$tbl"; | ||
4429 | 4425 | } | ||
4430 | 4426 | elsif ( $args{db} ) { | ||
4431 | 4427 | $db_tbl = "$args{db}.$tbl"; | ||
4432 | 4428 | } | ||
4433 | 4429 | else { | ||
4434 | 4430 | foreach my $tbl_info ( @$tables ) { | ||
4435 | 4431 | if ( ($tbl_info->{tbl} eq $tbl) && $tbl_info->{db} ) { | ||
4436 | 4432 | $db_tbl = "$tbl_info->{db}.$tbl"; | ||
4437 | 4433 | last; | ||
4438 | 4434 | } | ||
4439 | 4435 | } | ||
4440 | 4436 | |||
4441 | 4437 | if ( !$db_tbl && $args{default_db} ) { | ||
4442 | 4438 | $db_tbl = "$args{default_db}.$tbl"; | ||
4443 | 4439 | } | ||
4444 | 4440 | |||
4445 | 4441 | if ( !$db_tbl ) { | ||
4446 | 4442 | MKDEBUG && _d("Cannot determine database for table", $tbl); | ||
4447 | 4443 | $db_tbl = $tbl; | ||
4448 | 4444 | } | ||
4449 | 4445 | } | ||
4450 | 4446 | |||
4451 | 4447 | MKDEBUG && _d("Table qualified with database:", $db_tbl); | ||
4452 | 4448 | return $db_tbl; | ||
4453 | 4449 | } | ||
4454 | 4450 | |||
4455 | 4451 | sub _change_context { | ||
4456 | 4452 | my ( $self, %args) = @_; | ||
4457 | 4453 | my @required_args = qw(tables_used table old_context new_context tables); | ||
4458 | 4454 | foreach my $arg ( @required_args ) { | ||
4459 | 4455 | die "I need a $arg argument" unless $args{$arg}; | ||
4460 | 4456 | } | ||
4461 | 4457 | my ($tables_used, $table, $old_context, $new_context) = @args{@required_args}; | ||
4462 | 4458 | MKDEBUG && _d("Change context of table", $table, "from", $old_context, | ||
4463 | 4459 | "to", $new_context); | ||
4464 | 4460 | foreach my $used_table ( @$tables_used ) { | ||
4465 | 4461 | if ( $used_table->{table} eq $table | ||
4466 | 4462 | && $used_table->{context} eq $old_context ) { | ||
4467 | 4463 | $used_table->{context} = $new_context; | ||
4468 | 4464 | return; | ||
4469 | 4465 | } | ||
4470 | 4466 | } | ||
4471 | 4467 | MKDEBUG && _d("Table", $table, "is not used; cannot set its context"); | ||
4472 | 4468 | return; | ||
4473 | 4469 | } | ||
4474 | 4470 | |||
4475 | 4471 | sub _explain_query { | ||
4476 | 4472 | my ($self, $query, $db) = @_; | ||
4477 | 4473 | my $dbh = $self->{dbh}; | ||
4478 | 4474 | |||
4479 | 4475 | my $sql; | ||
4480 | 4476 | if ( $db ) { | ||
4481 | 4477 | $sql = "USE `$db`"; | ||
4482 | 4478 | MKDEBUG && _d($dbh, $sql); | ||
4483 | 4479 | $dbh->do($sql); | ||
4484 | 4480 | } | ||
4485 | 4481 | |||
4486 | 4482 | $sql = "EXPLAIN EXTENDED $query"; | ||
4487 | 4483 | MKDEBUG && _d($dbh, $sql); | ||
4488 | 4484 | eval { | ||
4489 | 4485 | $dbh->do($sql); # don't need the result | ||
4490 | 4486 | }; | ||
4491 | 4487 | if ( $EVAL_ERROR ) { | ||
4492 | 4488 | if ( $EVAL_ERROR =~ m/No database/i ) { | ||
4493 | 4489 | MKDEBUG && _d($EVAL_ERROR); | ||
4494 | 4490 | push @{$self->{errors}}, 'NO_DB_SELECTED'; | ||
4495 | 4491 | return; | ||
4496 | 4492 | } | ||
4497 | 4493 | die $EVAL_ERROR; | ||
4498 | 4494 | } | ||
4499 | 4495 | |||
4500 | 4496 | $sql = "SHOW WARNINGS"; | ||
4501 | 4497 | MKDEBUG && _d($dbh, $sql); | ||
4502 | 4498 | my $warning = $dbh->selectrow_hashref($sql); | ||
4503 | 4499 | MKDEBUG && _d(Dumper($warning)); | ||
4504 | 4500 | if ( ($warning->{level} || "") !~ m/Note/i | ||
4505 | 4501 | || ($warning->{code} || 0) != 1003 ) { | ||
4506 | 4502 | die "EXPLAIN EXTENDED failed:\n" | ||
4507 | 4503 | . " Level: " . ($warning->{level} || "") . "\n" | ||
4508 | 4504 | . " Code: " . ($warning->{code} || "") . "\n" | ||
4509 | 4505 | . "Message: " . ($warning->{message} || "") . "\n"; | ||
4510 | 4506 | } | ||
4511 | 4507 | |||
4512 | 4508 | return $warning->{message}; | ||
4513 | 4509 | } | ||
4514 | 4510 | |||
4515 | 4511 | sub _get_tables { | ||
4516 | 4512 | my ( $self, $query_struct ) = @_; | ||
4517 | 4513 | |||
4518 | 4514 | my $query_type = uc $query_struct->{type}; | ||
4519 | 4515 | my $tbl_refs = $query_type =~ m/(?:SELECT|DELETE)/ ? 'from' | ||
4520 | 4516 | : $query_type =~ m/(?:INSERT|REPLACE)/ ? 'into' | ||
4521 | 4517 | : $query_type =~ m/UPDATE/ ? 'tables' | ||
4522 | 4518 | : die "Cannot find table references for $query_type queries"; | ||
4523 | 4519 | |||
4524 | 4520 | return $query_struct->{$tbl_refs}; | ||
4525 | 4521 | } | ||
4526 | 4522 | |||
4527 | 4523 | sub _reparse_query { | ||
4528 | 4524 | my ($self, %args) = @_; | ||
4529 | 4525 | my @required_args = qw(query query_struct); | ||
4530 | 4526 | my ($query, $query_struct) = @args{@required_args}; | ||
4531 | 4527 | MKDEBUG && _d("Reparsing query with EXPLAIN EXTENDED"); | ||
4532 | 4528 | |||
4533 | 4529 | $self->{query_reparsed} = 1; | ||
4534 | 4530 | |||
4535 | 4531 | return unless uc($query_struct->{type}) eq 'SELECT'; | ||
4536 | 4532 | |||
4537 | 4533 | my $new_query = $self->_explain_query($query); | ||
4538 | 4534 | return unless $new_query; # failure | ||
4539 | 4535 | |||
4540 | 4536 | my $schemas = {}; | ||
4541 | 4537 | my $table_for = $self->{table_for}; | ||
4542 | 4538 | my $ex_query_struct = $self->{SQLParser}->parse($new_query); | ||
4543 | 4539 | |||
4544 | 4540 | map { | ||
4545 | 4541 | if ( $_->{db} && $_->{tbl} ) { | ||
4546 | 4542 | $schemas->{lc $_->{db}}->{lc $_->{tbl}} ||= {}; | ||
4547 | 4543 | if ( $_->{alias} ) { | ||
4548 | 4544 | $table_for->{lc $_->{alias}} = { | ||
4549 | 4545 | db => lc $_->{db}, | ||
4550 | 4546 | tbl => lc $_->{tbl}, | ||
4551 | 4547 | }; | ||
4552 | 4548 | } | ||
4553 | 4549 | } | ||
4554 | 4550 | } @{$ex_query_struct->{from}}; | ||
4555 | 4551 | |||
4556 | 4552 | map { | ||
4557 | 4553 | if ( $_->{db} && $_->{tbl} ) { | ||
4558 | 4554 | $schemas->{lc $_->{db}}->{lc $_->{tbl}}->{lc $_->{col}} = 1; | ||
4559 | 4555 | } | ||
4560 | 4556 | } @{$ex_query_struct->{columns}}; | ||
4561 | 4557 | |||
4562 | 4558 | $self->{schemas} = $schemas; | ||
4563 | 4559 | $self->{ex_query_struct} = $ex_query_struct; | ||
4564 | 4560 | |||
4565 | 4561 | return 1; # success | ||
4566 | 4562 | } | ||
4567 | 4563 | |||
4568 | 4564 | sub _ex_qualify_column { | ||
4569 | 4565 | my ($self, %args) = @_; | ||
4570 | 4566 | my ($col, $colno, $n_cols, $where_arg) = @args{qw(col colno n_cols where_arg)}; | ||
4571 | 4567 | |||
4572 | 4568 | return $col unless $self->{ex_query_struct}; | ||
4573 | 4569 | my $ex = $self->{ex_query_struct}; | ||
4574 | 4570 | |||
4575 | 4571 | MKDEBUG && _d('Qualifying column',$col->{col},'with EXPLAIN EXTENDED query'); | ||
4576 | 4572 | |||
4577 | 4573 | return unless $col; | ||
4578 | 4574 | |||
4579 | 4575 | return $col if $col->{db} && $col->{tbl}; | ||
4580 | 4576 | |||
4581 | 4577 | my $colname = lc $col->{col}; | ||
4582 | 4578 | |||
4583 | 4579 | if ( !$col->{tbl} ) { | ||
4584 | 4580 | if ( $where_arg ) { | ||
4585 | 4581 | MKDEBUG && _d('Searching WHERE conditions for column'); | ||
4586 | 4582 | CONDITION: | ||
4587 | 4583 | foreach my $cond ( @{$ex->{where}} ) { | ||
4588 | 4584 | if ( defined $cond->{$where_arg} | ||
4589 | 4585 | && $self->{SQLParser}->is_identifier($cond->{$where_arg}) ) { | ||
4590 | 4586 | my $ident_struct = $cond->{"${where_arg}_ident_struct"}; | ||
4591 | 4587 | if ( !$ident_struct ) { | ||
4592 | 4588 | $ident_struct = $self->{SQLParser}->parse_identifier( | ||
4593 | 4589 | 'column', | ||
4594 | 4590 | $cond->{$where_arg}, | ||
4595 | 4591 | ); | ||
4596 | 4592 | $cond->{"${where_arg}_ident_struct"} = $ident_struct; | ||
4597 | 4593 | } | ||
4598 | 4594 | if ( lc($ident_struct->{col}) eq $colname ) { | ||
4599 | 4595 | $col = $ident_struct; | ||
4600 | 4596 | last CONDITION; | ||
4601 | 4597 | } | ||
4602 | 4598 | } | ||
4603 | 4599 | } | ||
4604 | 4600 | } | ||
4605 | 4601 | elsif ( defined $colno | ||
4606 | 4602 | && $ex->{columns}->[$colno] | ||
4607 | 4603 | && lc($ex->{columns}->[$colno]->{col}) eq $colname ) { | ||
4608 | 4604 | MKDEBUG && _d('Exact match by col name and number'); | ||
4609 | 4605 | $col = $ex->{columns}->[$colno]; | ||
4610 | 4606 | } | ||
4611 | 4607 | elsif ( defined $colno | ||
4612 | 4608 | && scalar @{$ex->{columns}} == $n_cols ) { | ||
4613 | 4609 | MKDEBUG && _d('Match by column number in CLIST'); | ||
4614 | 4610 | $col = $ex->{columns}->[$colno]; | ||
4615 | 4611 | } | ||
4616 | 4612 | else { | ||
4617 | 4613 | MKDEBUG && _d('Searching for unique column in every db.tbl'); | ||
4618 | 4614 | my ($uniq_db, $uniq_tbl); | ||
4619 | 4615 | my $colcnt = 0; | ||
4620 | 4616 | my $schemas = $self->{schemas}; | ||
4621 | 4617 | DATABASE: | ||
4622 | 4618 | foreach my $db ( keys %$schemas ) { | ||
4623 | 4619 | TABLE: | ||
4624 | 4620 | foreach my $tbl ( keys %{$schemas->{$db}} ) { | ||
4625 | 4621 | if ( $schemas->{$db}->{$tbl}->{$colname} ) { | ||
4626 | 4622 | $uniq_db = $db; | ||
4627 | 4623 | $uniq_tbl = $tbl; | ||
4628 | 4624 | last DATABASE if ++$colcnt > 1; | ||
4629 | 4625 | } | ||
4630 | 4626 | } | ||
4631 | 4627 | } | ||
4632 | 4628 | if ( $colcnt == 1 ) { | ||
4633 | 4629 | $col->{db} = $uniq_db; | ||
4634 | 4630 | $col->{tbl} = $uniq_tbl; | ||
4635 | 4631 | } | ||
4636 | 4632 | } | ||
4637 | 4633 | } | ||
4638 | 4634 | |||
4639 | 4635 | if ( !$col->{db} && $col->{tbl} ) { | ||
4640 | 4636 | MKDEBUG && _d('Column has table, needs db'); | ||
4641 | 4637 | if ( my $real_tbl = $self->{table_for}->{lc $col->{tbl}} ) { | ||
4642 | 4638 | MKDEBUG && _d('Table is an alias'); | ||
4643 | 4639 | $col->{db} = $real_tbl->{db}; | ||
4644 | 4640 | $col->{tbl} = $real_tbl->{tbl}; | ||
4645 | 4641 | } | ||
4646 | 4642 | else { | ||
4647 | 4643 | MKDEBUG && _d('Searching for unique table in every db'); | ||
4648 | 4644 | my $real_tbl = $self->_get_real_table_name( | ||
4649 | 4645 | tables => $ex->{from}, | ||
4650 | 4646 | name => $col->{tbl}, | ||
4651 | 4647 | ); | ||
4652 | 4648 | if ( $real_tbl ) { | ||
4653 | 4649 | $real_tbl = lc $real_tbl; | ||
4654 | 4650 | my $uniq_db; | ||
4655 | 4651 | my $dbcnt = 0; | ||
4656 | 4652 | my $schemas = $self->{schemas}; | ||
4657 | 4653 | DATABASE: | ||
4658 | 4654 | foreach my $db ( keys %$schemas ) { | ||
4659 | 4655 | if ( exists $schemas->{$db}->{$real_tbl} ) { | ||
4660 | 4656 | $uniq_db = $db; | ||
4661 | 4657 | last DATABASE if ++$dbcnt > 1; | ||
4662 | 4658 | } | ||
4663 | 4659 | } | ||
4664 | 4660 | if ( $dbcnt == 1 ) { | ||
4665 | 4661 | $col->{db} = $uniq_db; | ||
4666 | 4662 | $col->{tbl} = $real_tbl; | ||
4667 | 4663 | } | ||
4668 | 4664 | } | ||
4669 | 4665 | } | ||
4670 | 4666 | } | ||
4671 | 4667 | |||
4672 | 4668 | MKDEBUG && _d('Qualified column:', Dumper($col)); | ||
4673 | 4669 | return $col; | ||
4674 | 4670 | } | ||
4675 | 4671 | |||
4676 | 4672 | sub _d { | ||
4677 | 4673 | my ($package, undef, $line) = caller 0; | ||
4678 | 4674 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
4679 | 4675 | map { defined $_ ? $_ : 'undef' } | ||
4680 | 4676 | @_; | ||
4681 | 4677 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
4682 | 4678 | } | ||
4683 | 4679 | |||
4684 | 4680 | } # package scope | ||
4685 | 4681 | 1; | ||
4686 | 4682 | } | ||
4687 | 4683 | # ########################################################################### | ||
4688 | 4684 | # End TableUsage package | ||
4689 | 4685 | # ########################################################################### | ||
4690 | 4686 | |||
4691 | 4687 | # ########################################################################### | ||
4692 | 4688 | # Daemon package | ||
4693 | 4689 | # This package is a copy without comments from the original. The original | ||
4694 | 4690 | # with comments and its test file can be found in the Bazaar repository at, | ||
4695 | 4691 | # lib/Daemon.pm | ||
4696 | 4692 | # t/lib/Daemon.t | ||
4697 | 4693 | # See https://launchpad.net/percona-toolkit for more information. | ||
4698 | 4694 | # ########################################################################### | ||
4699 | 4695 | { | ||
4700 | 4696 | package Daemon; | ||
4701 | 4697 | |||
4702 | 4698 | use strict; | ||
4703 | 4699 | use warnings FATAL => 'all'; | ||
4704 | 4700 | use English qw(-no_match_vars); | ||
4705 | 4701 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
4706 | 4702 | |||
4707 | 4703 | use POSIX qw(setsid); | ||
4708 | 4704 | |||
4709 | 4705 | sub new { | ||
4710 | 4706 | my ( $class, %args ) = @_; | ||
4711 | 4707 | foreach my $arg ( qw(o) ) { | ||
4712 | 4708 | die "I need a $arg argument" unless $args{$arg}; | ||
4713 | 4709 | } | ||
4714 | 4710 | my $o = $args{o}; | ||
4715 | 4711 | my $self = { | ||
4716 | 4712 | o => $o, | ||
4717 | 4713 | log_file => $o->has('log') ? $o->get('log') : undef, | ||
4718 | 4714 | PID_file => $o->has('pid') ? $o->get('pid') : undef, | ||
4719 | 4715 | }; | ||
4720 | 4716 | |||
4721 | 4717 | check_PID_file(undef, $self->{PID_file}); | ||
4722 | 4718 | |||
4723 | 4719 | PTDEBUG && _d('Daemonized child will log to', $self->{log_file}); | ||
4724 | 4720 | return bless $self, $class; | ||
4725 | 4721 | } | ||
4726 | 4722 | |||
4727 | 4723 | sub daemonize { | ||
4728 | 4724 | my ( $self ) = @_; | ||
4729 | 4725 | |||
4730 | 4726 | PTDEBUG && _d('About to fork and daemonize'); | ||
4731 | 4727 | defined (my $pid = fork()) or die "Cannot fork: $OS_ERROR"; | ||
4732 | 4728 | if ( $pid ) { | ||
4733 | 4729 | PTDEBUG && _d('Parent PID', $PID, 'exiting after forking child PID',$pid); | ||
4734 | 4730 | exit; | ||
4735 | 4731 | } | ||
4736 | 4732 | |||
4737 | 4733 | PTDEBUG && _d('Daemonizing child PID', $PID); | ||
4738 | 4734 | $self->{PID_owner} = $PID; | ||
4739 | 4735 | $self->{child} = 1; | ||
4740 | 4736 | |||
4741 | 4737 | POSIX::setsid() or die "Cannot start a new session: $OS_ERROR"; | ||
4742 | 4738 | chdir '/' or die "Cannot chdir to /: $OS_ERROR"; | ||
4743 | 4739 | |||
4744 | 4740 | $self->_make_PID_file(); | ||
4745 | 4741 | |||
4746 | 4742 | $OUTPUT_AUTOFLUSH = 1; | ||
4747 | 4743 | |||
4748 | 4744 | PTDEBUG && _d('Redirecting STDIN to /dev/null'); | ||
4749 | 4745 | close STDIN; | ||
4750 | 4746 | open STDIN, '/dev/null' | ||
4751 | 4747 | or die "Cannot reopen STDIN to /dev/null: $OS_ERROR"; | ||
4752 | 4748 | |||
4753 | 4749 | if ( $self->{log_file} ) { | ||
4754 | 4750 | PTDEBUG && _d('Redirecting STDOUT and STDERR to', $self->{log_file}); | ||
4755 | 4751 | close STDOUT; | ||
4756 | 4752 | open STDOUT, '>>', $self->{log_file} | ||
4757 | 4753 | or die "Cannot open log file $self->{log_file}: $OS_ERROR"; | ||
4758 | 4754 | |||
4759 | 4755 | close STDERR; | ||
4760 | 4756 | open STDERR, ">&STDOUT" | ||
4761 | 4757 | or die "Cannot dupe STDERR to STDOUT: $OS_ERROR"; | ||
4762 | 4758 | } | ||
4763 | 4759 | else { | ||
4764 | 4760 | if ( -t STDOUT ) { | ||
4765 | 4761 | PTDEBUG && _d('No log file and STDOUT is a terminal;', | ||
4766 | 4762 | 'redirecting to /dev/null'); | ||
4767 | 4763 | close STDOUT; | ||
4768 | 4764 | open STDOUT, '>', '/dev/null' | ||
4769 | 4765 | or die "Cannot reopen STDOUT to /dev/null: $OS_ERROR"; | ||
4770 | 4766 | } | ||
4771 | 4767 | if ( -t STDERR ) { | ||
4772 | 4768 | PTDEBUG && _d('No log file and STDERR is a terminal;', | ||
4773 | 4769 | 'redirecting to /dev/null'); | ||
4774 | 4770 | close STDERR; | ||
4775 | 4771 | open STDERR, '>', '/dev/null' | ||
4776 | 4772 | or die "Cannot reopen STDERR to /dev/null: $OS_ERROR"; | ||
4777 | 4773 | } | ||
4778 | 4774 | } | ||
4779 | 4775 | |||
4780 | 4776 | return; | ||
4781 | 4777 | } | ||
4782 | 4778 | |||
4783 | 4779 | sub check_PID_file { | ||
4784 | 4780 | my ( $self, $file ) = @_; | ||
4785 | 4781 | my $PID_file = $self ? $self->{PID_file} : $file; | ||
4786 | 4782 | PTDEBUG && _d('Checking PID file', $PID_file); | ||
4787 | 4783 | if ( $PID_file && -f $PID_file ) { | ||
4788 | 4784 | my $pid; | ||
4789 | 4785 | eval { chomp($pid = `cat $PID_file`); }; | ||
4790 | 4786 | die "Cannot cat $PID_file: $OS_ERROR" if $EVAL_ERROR; | ||
4791 | 4787 | PTDEBUG && _d('PID file exists; it contains PID', $pid); | ||
4792 | 4788 | if ( $pid ) { | ||
4793 | 4789 | my $pid_is_alive = kill 0, $pid; | ||
4794 | 4790 | if ( $pid_is_alive ) { | ||
4795 | 4791 | die "The PID file $PID_file already exists " | ||
4796 | 4792 | . " and the PID that it contains, $pid, is running"; | ||
4797 | 4793 | } | ||
4798 | 4794 | else { | ||
4799 | 4795 | warn "Overwriting PID file $PID_file because the PID that it " | ||
4800 | 4796 | . "contains, $pid, is not running"; | ||
4801 | 4797 | } | ||
4802 | 4798 | } | ||
4803 | 4799 | else { | ||
4804 | 4800 | die "The PID file $PID_file already exists but it does not " | ||
4805 | 4801 | . "contain a PID"; | ||
4806 | 4802 | } | ||
4807 | 4803 | } | ||
4808 | 4804 | else { | ||
4809 | 4805 | PTDEBUG && _d('No PID file'); | ||
4810 | 4806 | } | ||
4811 | 4807 | return; | ||
4812 | 4808 | } | ||
4813 | 4809 | |||
4814 | 4810 | sub make_PID_file { | ||
4815 | 4811 | my ( $self ) = @_; | ||
4816 | 4812 | if ( exists $self->{child} ) { | ||
4817 | 4813 | die "Do not call Daemon::make_PID_file() for daemonized scripts"; | ||
4818 | 4814 | } | ||
4819 | 4815 | $self->_make_PID_file(); | ||
4820 | 4816 | $self->{PID_owner} = $PID; | ||
4821 | 4817 | return; | ||
4822 | 4818 | } | ||
4823 | 4819 | |||
4824 | 4820 | sub _make_PID_file { | ||
4825 | 4821 | my ( $self ) = @_; | ||
4826 | 4822 | |||
4827 | 4823 | my $PID_file = $self->{PID_file}; | ||
4828 | 4824 | if ( !$PID_file ) { | ||
4829 | 4825 | PTDEBUG && _d('No PID file to create'); | ||
4830 | 4826 | return; | ||
4831 | 4827 | } | ||
4832 | 4828 | |||
4833 | 4829 | $self->check_PID_file(); | ||
4834 | 4830 | |||
4835 | 4831 | open my $PID_FH, '>', $PID_file | ||
4836 | 4832 | or die "Cannot open PID file $PID_file: $OS_ERROR"; | ||
4837 | 4833 | print $PID_FH $PID | ||
4838 | 4834 | or die "Cannot print to PID file $PID_file: $OS_ERROR"; | ||
4839 | 4835 | close $PID_FH | ||
4840 | 4836 | or die "Cannot close PID file $PID_file: $OS_ERROR"; | ||
4841 | 4837 | |||
4842 | 4838 | PTDEBUG && _d('Created PID file:', $self->{PID_file}); | ||
4843 | 4839 | return; | ||
4844 | 4840 | } | ||
4845 | 4841 | |||
4846 | 4842 | sub _remove_PID_file { | ||
4847 | 4843 | my ( $self ) = @_; | ||
4848 | 4844 | if ( $self->{PID_file} && -f $self->{PID_file} ) { | ||
4849 | 4845 | unlink $self->{PID_file} | ||
4850 | 4846 | or warn "Cannot remove PID file $self->{PID_file}: $OS_ERROR"; | ||
4851 | 4847 | PTDEBUG && _d('Removed PID file'); | ||
4852 | 4848 | } | ||
4853 | 4849 | else { | ||
4854 | 4850 | PTDEBUG && _d('No PID to remove'); | ||
4855 | 4851 | } | ||
4856 | 4852 | return; | ||
4857 | 4853 | } | ||
4858 | 4854 | |||
4859 | 4855 | sub DESTROY { | ||
4860 | 4856 | my ( $self ) = @_; | ||
4861 | 4857 | |||
4862 | 4858 | $self->_remove_PID_file() if ($self->{PID_owner} || 0) == $PID; | ||
4863 | 4859 | |||
4864 | 4860 | return; | ||
4865 | 4861 | } | ||
4866 | 4862 | |||
4867 | 4863 | sub _d { | ||
4868 | 4864 | my ($package, undef, $line) = caller 0; | ||
4869 | 4865 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
4870 | 4866 | map { defined $_ ? $_ : 'undef' } | ||
4871 | 4867 | @_; | ||
4872 | 4868 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
4873 | 4869 | } | ||
4874 | 4870 | |||
4875 | 4871 | 1; | ||
4876 | 4872 | } | ||
4877 | 4873 | # ########################################################################### | ||
4878 | 4874 | # End Daemon package | ||
4879 | 4875 | # ########################################################################### | ||
4880 | 4876 | |||
4881 | 4877 | # ########################################################################### | ||
4882 | 4878 | # Runtime package | ||
4883 | 4879 | # This package is a copy without comments from the original. The original | ||
4884 | 4880 | # with comments and its test file can be found in the Bazaar repository at, | ||
4885 | 4881 | # lib/Runtime.pm | ||
4886 | 4882 | # t/lib/Runtime.t | ||
4887 | 4883 | # See https://launchpad.net/percona-toolkit for more information. | ||
4888 | 4884 | # ########################################################################### | ||
4889 | 4885 | { | ||
4890 | 4886 | package Runtime; | ||
4891 | 4887 | |||
4892 | 4888 | use strict; | ||
4893 | 4889 | use warnings FATAL => 'all'; | ||
4894 | 4890 | use English qw(-no_match_vars); | ||
4895 | 4891 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
4896 | 4892 | |||
4897 | 4893 | sub new { | ||
4898 | 4894 | my ( $class, %args ) = @_; | ||
4899 | 4895 | my @required_args = qw(now); | ||
4900 | 4896 | foreach my $arg ( @required_args ) { | ||
4901 | 4897 | die "I need a $arg argument" unless $args{$arg}; | ||
4902 | 4898 | } | ||
4903 | 4899 | |||
4904 | 4900 | if ( ($args{runtime} || 0) < 0 ) { | ||
4905 | 4901 | die "runtime argument must be greater than zero" | ||
4906 | 4902 | } | ||
4907 | 4903 | |||
4908 | 4904 | my $self = { | ||
4909 | 4905 | %args, | ||
4910 | 4906 | start_time => undef, | ||
4911 | 4907 | end_time => undef, | ||
4912 | 4908 | time_left => undef, | ||
4913 | 4909 | stop => 0, | ||
4914 | 4910 | }; | ||
4915 | 4911 | |||
4916 | 4912 | return bless $self, $class; | ||
4917 | 4913 | } | ||
4918 | 4914 | |||
4919 | 4915 | sub time_left { | ||
4920 | 4916 | my ( $self, %args ) = @_; | ||
4921 | 4917 | |||
4922 | 4918 | if ( $self->{stop} ) { | ||
4923 | 4919 | PTDEBUG && _d("No time left because stop was called"); | ||
4924 | 4920 | return 0; | ||
4925 | 4921 | } | ||
4926 | 4922 | |||
4927 | 4923 | my $now = $self->{now}->(%args); | ||
4928 | 4924 | PTDEBUG && _d("Current time:", $now); | ||
4929 | 4925 | |||
4930 | 4926 | if ( !defined $self->{start_time} ) { | ||
4931 | 4927 | $self->{start_time} = $now; | ||
4932 | 4928 | } | ||
4933 | 4929 | |||
4934 | 4930 | return unless defined $now; | ||
4935 | 4931 | |||
4936 | 4932 | my $runtime = $self->{runtime}; | ||
4937 | 4933 | return unless defined $runtime; | ||
4938 | 4934 | |||
4939 | 4935 | if ( !$self->{end_time} ) { | ||
4940 | 4936 | $self->{end_time} = $now + $runtime; | ||
4941 | 4937 | PTDEBUG && _d("End time:", $self->{end_time}); | ||
4942 | 4938 | } | ||
4943 | 4939 | |||
4944 | 4940 | $self->{time_left} = $self->{end_time} - $now; | ||
4945 | 4941 | PTDEBUG && _d("Time left:", $self->{time_left}); | ||
4946 | 4942 | return $self->{time_left}; | ||
4947 | 4943 | } | ||
4948 | 4944 | |||
4949 | 4945 | sub have_time { | ||
4950 | 4946 | my ( $self, %args ) = @_; | ||
4951 | 4947 | my $time_left = $self->time_left(%args); | ||
4952 | 4948 | return 1 if !defined $time_left; # run forever | ||
4953 | 4949 | return $time_left <= 0 ? 0 : 1; # <=0s means runtime has elapsed | ||
4954 | 4950 | } | ||
4955 | 4951 | |||
4956 | 4952 | sub time_elapsed { | ||
4957 | 4953 | my ( $self, %args ) = @_; | ||
4958 | 4954 | |||
4959 | 4955 | my $start_time = $self->{start_time}; | ||
4960 | 4956 | return 0 unless $start_time; | ||
4961 | 4957 | |||
4962 | 4958 | my $now = $self->{now}->(%args); | ||
4963 | 4959 | PTDEBUG && _d("Current time:", $now); | ||
4964 | 4960 | |||
4965 | 4961 | my $time_elapsed = $now - $start_time; | ||
4966 | 4962 | PTDEBUG && _d("Time elapsed:", $time_elapsed); | ||
4967 | 4963 | if ( $time_elapsed < 0 ) { | ||
4968 | 4964 | warn "Current time $now is earlier than start time $start_time"; | ||
4969 | 4965 | } | ||
4970 | 4966 | return $time_elapsed; | ||
4971 | 4967 | } | ||
4972 | 4968 | |||
4973 | 4969 | sub reset { | ||
4974 | 4970 | my ( $self ) = @_; | ||
4975 | 4971 | $self->{start_time} = undef; | ||
4976 | 4972 | $self->{end_time} = undef; | ||
4977 | 4973 | $self->{time_left} = undef; | ||
4978 | 4974 | $self->{stop} = 0; | ||
4979 | 4975 | PTDEBUG && _d("Reset runtime"); | ||
4980 | 4976 | return; | ||
4981 | 4977 | } | ||
4982 | 4978 | |||
4983 | 4979 | sub stop { | ||
4984 | 4980 | my ( $self ) = @_; | ||
4985 | 4981 | $self->{stop} = 1; | ||
4986 | 4982 | return; | ||
4987 | 4983 | } | ||
4988 | 4984 | |||
4989 | 4985 | sub start { | ||
4990 | 4986 | my ( $self ) = @_; | ||
4991 | 4987 | $self->{stop} = 0; | ||
4992 | 4988 | return; | ||
4993 | 4989 | } | ||
4994 | 4990 | |||
4995 | 4991 | sub _d { | ||
4996 | 4992 | my ($package, undef, $line) = caller 0; | ||
4997 | 4993 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
4998 | 4994 | map { defined $_ ? $_ : 'undef' } | ||
4999 | 4995 | @_; | ||
5000 | 4996 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; |
The diff has been truncated for viewing.