Merge lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint into lp:percona-toolkit/2.1
- pt-fingerprint
- Merge into 2.1
Proposed by
Daniel Nichter
Status: | Merged |
---|---|
Merged at revision: | 224 |
Proposed branch: | lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint |
Merge into: | lp:percona-toolkit/2.1 |
Diff against target: |
2398 lines (+2333/-5) 8 files modified
bin/pt-fingerprint (+2143/-0) lib/QueryRewriter.pm (+24/-4) t/lib/QueryRewriter.t (+59/-1) t/pt-fingerprint/basics.t (+101/-0) t/pt-fingerprint/samples/query001 (+2/-0) t/pt-fingerprint/samples/query001.fingerprint (+1/-0) t/pt-fingerprint/samples/query002 (+2/-0) t/pt-fingerprint/samples/query002.fingerprint (+1/-0) |
To merge this branch: | bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-fingerprint |
Related bugs: | |
Related blueprints: |
Add pt-fingerprint
(Medium)
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Daniel Nichter | Approve | ||
Review via email: mp+100250@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === added file 'bin/pt-fingerprint' | |||
2 | --- bin/pt-fingerprint 1970-01-01 00:00:00 +0000 | |||
3 | +++ bin/pt-fingerprint 2012-03-30 22:06:22 +0000 | |||
4 | @@ -0,0 +1,2143 @@ | |||
5 | 1 | #!/usr/bin/env perl | ||
6 | 2 | |||
7 | 3 | # This program is part of Percona Toolkit: http://www.percona.com/software/ | ||
8 | 4 | # See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal | ||
9 | 5 | # notices and disclaimers. | ||
10 | 6 | |||
11 | 7 | use strict; | ||
12 | 8 | use warnings FATAL => 'all'; | ||
13 | 9 | use constant MKDEBUG => $ENV{MKDEBUG} || 0; | ||
14 | 10 | |||
15 | 11 | # ########################################################################### | ||
16 | 12 | # OptionParser package | ||
17 | 13 | # This package is a copy without comments from the original. The original | ||
18 | 14 | # with comments and its test file can be found in the Bazaar repository at, | ||
19 | 15 | # lib/OptionParser.pm | ||
20 | 16 | # t/lib/OptionParser.t | ||
21 | 17 | # See https://launchpad.net/percona-toolkit for more information. | ||
22 | 18 | # ########################################################################### | ||
23 | 19 | { | ||
24 | 20 | package OptionParser; | ||
25 | 21 | |||
26 | 22 | use strict; | ||
27 | 23 | use warnings FATAL => 'all'; | ||
28 | 24 | use English qw(-no_match_vars); | ||
29 | 25 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
30 | 26 | |||
31 | 27 | use List::Util qw(max); | ||
32 | 28 | use Getopt::Long; | ||
33 | 29 | |||
34 | 30 | my $POD_link_re = '[LC]<"?([^">]+)"?>'; | ||
35 | 31 | |||
36 | 32 | sub new { | ||
37 | 33 | my ( $class, %args ) = @_; | ||
38 | 34 | my @required_args = qw(); | ||
39 | 35 | foreach my $arg ( @required_args ) { | ||
40 | 36 | die "I need a $arg argument" unless $args{$arg}; | ||
41 | 37 | } | ||
42 | 38 | |||
43 | 39 | my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/; | ||
44 | 40 | $program_name ||= $PROGRAM_NAME; | ||
45 | 41 | my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.'; | ||
46 | 42 | |||
47 | 43 | my %attributes = ( | ||
48 | 44 | 'type' => 1, | ||
49 | 45 | 'short form' => 1, | ||
50 | 46 | 'group' => 1, | ||
51 | 47 | 'default' => 1, | ||
52 | 48 | 'cumulative' => 1, | ||
53 | 49 | 'negatable' => 1, | ||
54 | 50 | ); | ||
55 | 51 | |||
56 | 52 | my $self = { | ||
57 | 53 | head1 => 'OPTIONS', # These args are used internally | ||
58 | 54 | skip_rules => 0, # to instantiate another Option- | ||
59 | 55 | item => '--(.*)', # Parser obj that parses the | ||
60 | 56 | attributes => \%attributes, # DSN OPTIONS section. Tools | ||
61 | 57 | parse_attributes => \&_parse_attribs, # don't tinker with these args. | ||
62 | 58 | |||
63 | 59 | %args, | ||
64 | 60 | |||
65 | 61 | strict => 1, # disabled by a special rule | ||
66 | 62 | program_name => $program_name, | ||
67 | 63 | opts => {}, | ||
68 | 64 | got_opts => 0, | ||
69 | 65 | short_opts => {}, | ||
70 | 66 | defaults => {}, | ||
71 | 67 | groups => {}, | ||
72 | 68 | allowed_groups => {}, | ||
73 | 69 | errors => [], | ||
74 | 70 | rules => [], # desc of rules for --help | ||
75 | 71 | mutex => [], # rule: opts are mutually exclusive | ||
76 | 72 | atleast1 => [], # rule: at least one opt is required | ||
77 | 73 | disables => {}, # rule: opt disables other opts | ||
78 | 74 | defaults_to => {}, # rule: opt defaults to value of other opt | ||
79 | 75 | DSNParser => undef, | ||
80 | 76 | default_files => [ | ||
81 | 77 | "/etc/percona-toolkit/percona-toolkit.conf", | ||
82 | 78 | "/etc/percona-toolkit/$program_name.conf", | ||
83 | 79 | "$home/.percona-toolkit.conf", | ||
84 | 80 | "$home/.$program_name.conf", | ||
85 | 81 | ], | ||
86 | 82 | types => { | ||
87 | 83 | string => 's', # standard Getopt type | ||
88 | 84 | int => 'i', # standard Getopt type | ||
89 | 85 | float => 'f', # standard Getopt type | ||
90 | 86 | Hash => 'H', # hash, formed from a comma-separated list | ||
91 | 87 | hash => 'h', # hash as above, but only if a value is given | ||
92 | 88 | Array => 'A', # array, similar to Hash | ||
93 | 89 | array => 'a', # array, similar to hash | ||
94 | 90 | DSN => 'd', # DSN | ||
95 | 91 | size => 'z', # size with kMG suffix (powers of 2^10) | ||
96 | 92 | time => 'm', # time, with an optional suffix of s/h/m/d | ||
97 | 93 | }, | ||
98 | 94 | }; | ||
99 | 95 | |||
100 | 96 | return bless $self, $class; | ||
101 | 97 | } | ||
102 | 98 | |||
103 | 99 | sub get_specs { | ||
104 | 100 | my ( $self, $file ) = @_; | ||
105 | 101 | $file ||= $self->{file} || __FILE__; | ||
106 | 102 | my @specs = $self->_pod_to_specs($file); | ||
107 | 103 | $self->_parse_specs(@specs); | ||
108 | 104 | |||
109 | 105 | open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR"; | ||
110 | 106 | my $contents = do { local $/ = undef; <$fh> }; | ||
111 | 107 | close $fh; | ||
112 | 108 | if ( $contents =~ m/^=head1 DSN OPTIONS/m ) { | ||
113 | 109 | PTDEBUG && _d('Parsing DSN OPTIONS'); | ||
114 | 110 | my $dsn_attribs = { | ||
115 | 111 | dsn => 1, | ||
116 | 112 | copy => 1, | ||
117 | 113 | }; | ||
118 | 114 | my $parse_dsn_attribs = sub { | ||
119 | 115 | my ( $self, $option, $attribs ) = @_; | ||
120 | 116 | map { | ||
121 | 117 | my $val = $attribs->{$_}; | ||
122 | 118 | if ( $val ) { | ||
123 | 119 | $val = $val eq 'yes' ? 1 | ||
124 | 120 | : $val eq 'no' ? 0 | ||
125 | 121 | : $val; | ||
126 | 122 | $attribs->{$_} = $val; | ||
127 | 123 | } | ||
128 | 124 | } keys %$attribs; | ||
129 | 125 | return { | ||
130 | 126 | key => $option, | ||
131 | 127 | %$attribs, | ||
132 | 128 | }; | ||
133 | 129 | }; | ||
134 | 130 | my $dsn_o = new OptionParser( | ||
135 | 131 | description => 'DSN OPTIONS', | ||
136 | 132 | head1 => 'DSN OPTIONS', | ||
137 | 133 | dsn => 0, # XXX don't infinitely recurse! | ||
138 | 134 | item => '\* (.)', # key opts are a single character | ||
139 | 135 | skip_rules => 1, # no rules before opts | ||
140 | 136 | attributes => $dsn_attribs, | ||
141 | 137 | parse_attributes => $parse_dsn_attribs, | ||
142 | 138 | ); | ||
143 | 139 | my @dsn_opts = map { | ||
144 | 140 | my $opts = { | ||
145 | 141 | key => $_->{spec}->{key}, | ||
146 | 142 | dsn => $_->{spec}->{dsn}, | ||
147 | 143 | copy => $_->{spec}->{copy}, | ||
148 | 144 | desc => $_->{desc}, | ||
149 | 145 | }; | ||
150 | 146 | $opts; | ||
151 | 147 | } $dsn_o->_pod_to_specs($file); | ||
152 | 148 | $self->{DSNParser} = DSNParser->new(opts => \@dsn_opts); | ||
153 | 149 | } | ||
154 | 150 | |||
155 | 151 | if ( $contents =~ m/^=head1 VERSION\n\n^(.+)$/m ) { | ||
156 | 152 | $self->{version} = $1; | ||
157 | 153 | PTDEBUG && _d($self->{version}); | ||
158 | 154 | } | ||
159 | 155 | |||
160 | 156 | return; | ||
161 | 157 | } | ||
162 | 158 | |||
163 | 159 | sub DSNParser { | ||
164 | 160 | my ( $self ) = @_; | ||
165 | 161 | return $self->{DSNParser}; | ||
166 | 162 | }; | ||
167 | 163 | |||
168 | 164 | sub get_defaults_files { | ||
169 | 165 | my ( $self ) = @_; | ||
170 | 166 | return @{$self->{default_files}}; | ||
171 | 167 | } | ||
172 | 168 | |||
173 | 169 | sub _pod_to_specs { | ||
174 | 170 | my ( $self, $file ) = @_; | ||
175 | 171 | $file ||= $self->{file} || __FILE__; | ||
176 | 172 | open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR"; | ||
177 | 173 | |||
178 | 174 | my @specs = (); | ||
179 | 175 | my @rules = (); | ||
180 | 176 | my $para; | ||
181 | 177 | |||
182 | 178 | local $INPUT_RECORD_SEPARATOR = ''; | ||
183 | 179 | while ( $para = <$fh> ) { | ||
184 | 180 | next unless $para =~ m/^=head1 $self->{head1}/; | ||
185 | 181 | last; | ||
186 | 182 | } | ||
187 | 183 | |||
188 | 184 | while ( $para = <$fh> ) { | ||
189 | 185 | last if $para =~ m/^=over/; | ||
190 | 186 | next if $self->{skip_rules}; | ||
191 | 187 | chomp $para; | ||
192 | 188 | $para =~ s/\s+/ /g; | ||
193 | 189 | $para =~ s/$POD_link_re/$1/go; | ||
194 | 190 | PTDEBUG && _d('Option rule:', $para); | ||
195 | 191 | push @rules, $para; | ||
196 | 192 | } | ||
197 | 193 | |||
198 | 194 | die "POD has no $self->{head1} section" unless $para; | ||
199 | 195 | |||
200 | 196 | do { | ||
201 | 197 | if ( my ($option) = $para =~ m/^=item $self->{item}/ ) { | ||
202 | 198 | chomp $para; | ||
203 | 199 | PTDEBUG && _d($para); | ||
204 | 200 | my %attribs; | ||
205 | 201 | |||
206 | 202 | $para = <$fh>; # read next paragraph, possibly attributes | ||
207 | 203 | |||
208 | 204 | if ( $para =~ m/: / ) { # attributes | ||
209 | 205 | $para =~ s/\s+\Z//g; | ||
210 | 206 | %attribs = map { | ||
211 | 207 | my ( $attrib, $val) = split(/: /, $_); | ||
212 | 208 | die "Unrecognized attribute for --$option: $attrib" | ||
213 | 209 | unless $self->{attributes}->{$attrib}; | ||
214 | 210 | ($attrib, $val); | ||
215 | 211 | } split(/; /, $para); | ||
216 | 212 | if ( $attribs{'short form'} ) { | ||
217 | 213 | $attribs{'short form'} =~ s/-//; | ||
218 | 214 | } | ||
219 | 215 | $para = <$fh>; # read next paragraph, probably short help desc | ||
220 | 216 | } | ||
221 | 217 | else { | ||
222 | 218 | PTDEBUG && _d('Option has no attributes'); | ||
223 | 219 | } | ||
224 | 220 | |||
225 | 221 | $para =~ s/\s+\Z//g; | ||
226 | 222 | $para =~ s/\s+/ /g; | ||
227 | 223 | $para =~ s/$POD_link_re/$1/go; | ||
228 | 224 | |||
229 | 225 | $para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s; | ||
230 | 226 | PTDEBUG && _d('Short help:', $para); | ||
231 | 227 | |||
232 | 228 | die "No description after option spec $option" if $para =~ m/^=item/; | ||
233 | 229 | |||
234 | 230 | if ( my ($base_option) = $option =~ m/^\[no\](.*)/ ) { | ||
235 | 231 | $option = $base_option; | ||
236 | 232 | $attribs{'negatable'} = 1; | ||
237 | 233 | } | ||
238 | 234 | |||
239 | 235 | push @specs, { | ||
240 | 236 | spec => $self->{parse_attributes}->($self, $option, \%attribs), | ||
241 | 237 | desc => $para | ||
242 | 238 | . (defined $attribs{default} ? " (default $attribs{default})" : ''), | ||
243 | 239 | group => ($attribs{'group'} ? $attribs{'group'} : 'default'), | ||
244 | 240 | }; | ||
245 | 241 | } | ||
246 | 242 | while ( $para = <$fh> ) { | ||
247 | 243 | last unless $para; | ||
248 | 244 | if ( $para =~ m/^=head1/ ) { | ||
249 | 245 | $para = undef; # Can't 'last' out of a do {} block. | ||
250 | 246 | last; | ||
251 | 247 | } | ||
252 | 248 | last if $para =~ m/^=item /; | ||
253 | 249 | } | ||
254 | 250 | } while ( $para ); | ||
255 | 251 | |||
256 | 252 | die "No valid specs in $self->{head1}" unless @specs; | ||
257 | 253 | |||
258 | 254 | close $fh; | ||
259 | 255 | return @specs, @rules; | ||
260 | 256 | } | ||
261 | 257 | |||
262 | 258 | sub _parse_specs { | ||
263 | 259 | my ( $self, @specs ) = @_; | ||
264 | 260 | my %disables; # special rule that requires deferred checking | ||
265 | 261 | |||
266 | 262 | foreach my $opt ( @specs ) { | ||
267 | 263 | if ( ref $opt ) { # It's an option spec, not a rule. | ||
268 | 264 | PTDEBUG && _d('Parsing opt spec:', | ||
269 | 265 | map { ($_, '=>', $opt->{$_}) } keys %$opt); | ||
270 | 266 | |||
271 | 267 | my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/; | ||
272 | 268 | if ( !$long ) { | ||
273 | 269 | die "Cannot parse long option from spec $opt->{spec}"; | ||
274 | 270 | } | ||
275 | 271 | $opt->{long} = $long; | ||
276 | 272 | |||
277 | 273 | die "Duplicate long option --$long" if exists $self->{opts}->{$long}; | ||
278 | 274 | $self->{opts}->{$long} = $opt; | ||
279 | 275 | |||
280 | 276 | if ( length $long == 1 ) { | ||
281 | 277 | PTDEBUG && _d('Long opt', $long, 'looks like short opt'); | ||
282 | 278 | $self->{short_opts}->{$long} = $long; | ||
283 | 279 | } | ||
284 | 280 | |||
285 | 281 | if ( $short ) { | ||
286 | 282 | die "Duplicate short option -$short" | ||
287 | 283 | if exists $self->{short_opts}->{$short}; | ||
288 | 284 | $self->{short_opts}->{$short} = $long; | ||
289 | 285 | $opt->{short} = $short; | ||
290 | 286 | } | ||
291 | 287 | else { | ||
292 | 288 | $opt->{short} = undef; | ||
293 | 289 | } | ||
294 | 290 | |||
295 | 291 | $opt->{is_negatable} = $opt->{spec} =~ m/!/ ? 1 : 0; | ||
296 | 292 | $opt->{is_cumulative} = $opt->{spec} =~ m/\+/ ? 1 : 0; | ||
297 | 293 | $opt->{is_required} = $opt->{desc} =~ m/required/ ? 1 : 0; | ||
298 | 294 | |||
299 | 295 | $opt->{group} ||= 'default'; | ||
300 | 296 | $self->{groups}->{ $opt->{group} }->{$long} = 1; | ||
301 | 297 | |||
302 | 298 | $opt->{value} = undef; | ||
303 | 299 | $opt->{got} = 0; | ||
304 | 300 | |||
305 | 301 | my ( $type ) = $opt->{spec} =~ m/=(.)/; | ||
306 | 302 | $opt->{type} = $type; | ||
307 | 303 | PTDEBUG && _d($long, 'type:', $type); | ||
308 | 304 | |||
309 | 305 | |||
310 | 306 | $opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ ); | ||
311 | 307 | |||
312 | 308 | if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) { | ||
313 | 309 | $self->{defaults}->{$long} = defined $def ? $def : 1; | ||
314 | 310 | PTDEBUG && _d($long, 'default:', $def); | ||
315 | 311 | } | ||
316 | 312 | |||
317 | 313 | if ( $long eq 'config' ) { | ||
318 | 314 | $self->{defaults}->{$long} = join(',', $self->get_defaults_files()); | ||
319 | 315 | } | ||
320 | 316 | |||
321 | 317 | if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) { | ||
322 | 318 | $disables{$long} = $dis; | ||
323 | 319 | PTDEBUG && _d('Deferring check of disables rule for', $opt, $dis); | ||
324 | 320 | } | ||
325 | 321 | |||
326 | 322 | $self->{opts}->{$long} = $opt; | ||
327 | 323 | } | ||
328 | 324 | else { # It's an option rule, not a spec. | ||
329 | 325 | PTDEBUG && _d('Parsing rule:', $opt); | ||
330 | 326 | push @{$self->{rules}}, $opt; | ||
331 | 327 | my @participants = $self->_get_participants($opt); | ||
332 | 328 | my $rule_ok = 0; | ||
333 | 329 | |||
334 | 330 | if ( $opt =~ m/mutually exclusive|one and only one/ ) { | ||
335 | 331 | $rule_ok = 1; | ||
336 | 332 | push @{$self->{mutex}}, \@participants; | ||
337 | 333 | PTDEBUG && _d(@participants, 'are mutually exclusive'); | ||
338 | 334 | } | ||
339 | 335 | if ( $opt =~ m/at least one|one and only one/ ) { | ||
340 | 336 | $rule_ok = 1; | ||
341 | 337 | push @{$self->{atleast1}}, \@participants; | ||
342 | 338 | PTDEBUG && _d(@participants, 'require at least one'); | ||
343 | 339 | } | ||
344 | 340 | if ( $opt =~ m/default to/ ) { | ||
345 | 341 | $rule_ok = 1; | ||
346 | 342 | $self->{defaults_to}->{$participants[0]} = $participants[1]; | ||
347 | 343 | PTDEBUG && _d($participants[0], 'defaults to', $participants[1]); | ||
348 | 344 | } | ||
349 | 345 | if ( $opt =~ m/restricted to option groups/ ) { | ||
350 | 346 | $rule_ok = 1; | ||
351 | 347 | my ($groups) = $opt =~ m/groups ([\w\s\,]+)/; | ||
352 | 348 | my @groups = split(',', $groups); | ||
353 | 349 | %{$self->{allowed_groups}->{$participants[0]}} = map { | ||
354 | 350 | s/\s+//; | ||
355 | 351 | $_ => 1; | ||
356 | 352 | } @groups; | ||
357 | 353 | } | ||
358 | 354 | if( $opt =~ m/accepts additional command-line arguments/ ) { | ||
359 | 355 | $rule_ok = 1; | ||
360 | 356 | $self->{strict} = 0; | ||
361 | 357 | PTDEBUG && _d("Strict mode disabled by rule"); | ||
362 | 358 | } | ||
363 | 359 | |||
364 | 360 | die "Unrecognized option rule: $opt" unless $rule_ok; | ||
365 | 361 | } | ||
366 | 362 | } | ||
367 | 363 | |||
368 | 364 | foreach my $long ( keys %disables ) { | ||
369 | 365 | my @participants = $self->_get_participants($disables{$long}); | ||
370 | 366 | $self->{disables}->{$long} = \@participants; | ||
371 | 367 | PTDEBUG && _d('Option', $long, 'disables', @participants); | ||
372 | 368 | } | ||
373 | 369 | |||
374 | 370 | return; | ||
375 | 371 | } | ||
376 | 372 | |||
377 | 373 | sub _get_participants { | ||
378 | 374 | my ( $self, $str ) = @_; | ||
379 | 375 | my @participants; | ||
380 | 376 | foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) { | ||
381 | 377 | die "Option --$long does not exist while processing rule $str" | ||
382 | 378 | unless exists $self->{opts}->{$long}; | ||
383 | 379 | push @participants, $long; | ||
384 | 380 | } | ||
385 | 381 | PTDEBUG && _d('Participants for', $str, ':', @participants); | ||
386 | 382 | return @participants; | ||
387 | 383 | } | ||
388 | 384 | |||
389 | 385 | sub opts { | ||
390 | 386 | my ( $self ) = @_; | ||
391 | 387 | my %opts = %{$self->{opts}}; | ||
392 | 388 | return %opts; | ||
393 | 389 | } | ||
394 | 390 | |||
395 | 391 | sub short_opts { | ||
396 | 392 | my ( $self ) = @_; | ||
397 | 393 | my %short_opts = %{$self->{short_opts}}; | ||
398 | 394 | return %short_opts; | ||
399 | 395 | } | ||
400 | 396 | |||
401 | 397 | sub set_defaults { | ||
402 | 398 | my ( $self, %defaults ) = @_; | ||
403 | 399 | $self->{defaults} = {}; | ||
404 | 400 | foreach my $long ( keys %defaults ) { | ||
405 | 401 | die "Cannot set default for nonexistent option $long" | ||
406 | 402 | unless exists $self->{opts}->{$long}; | ||
407 | 403 | $self->{defaults}->{$long} = $defaults{$long}; | ||
408 | 404 | PTDEBUG && _d('Default val for', $long, ':', $defaults{$long}); | ||
409 | 405 | } | ||
410 | 406 | return; | ||
411 | 407 | } | ||
412 | 408 | |||
413 | 409 | sub get_defaults { | ||
414 | 410 | my ( $self ) = @_; | ||
415 | 411 | return $self->{defaults}; | ||
416 | 412 | } | ||
417 | 413 | |||
418 | 414 | sub get_groups { | ||
419 | 415 | my ( $self ) = @_; | ||
420 | 416 | return $self->{groups}; | ||
421 | 417 | } | ||
422 | 418 | |||
423 | 419 | sub _set_option { | ||
424 | 420 | my ( $self, $opt, $val ) = @_; | ||
425 | 421 | my $long = exists $self->{opts}->{$opt} ? $opt | ||
426 | 422 | : exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt} | ||
427 | 423 | : die "Getopt::Long gave a nonexistent option: $opt"; | ||
428 | 424 | |||
429 | 425 | $opt = $self->{opts}->{$long}; | ||
430 | 426 | if ( $opt->{is_cumulative} ) { | ||
431 | 427 | $opt->{value}++; | ||
432 | 428 | } | ||
433 | 429 | else { | ||
434 | 430 | $opt->{value} = $val; | ||
435 | 431 | } | ||
436 | 432 | $opt->{got} = 1; | ||
437 | 433 | PTDEBUG && _d('Got option', $long, '=', $val); | ||
438 | 434 | } | ||
439 | 435 | |||
440 | 436 | sub get_opts { | ||
441 | 437 | my ( $self ) = @_; | ||
442 | 438 | |||
443 | 439 | foreach my $long ( keys %{$self->{opts}} ) { | ||
444 | 440 | $self->{opts}->{$long}->{got} = 0; | ||
445 | 441 | $self->{opts}->{$long}->{value} | ||
446 | 442 | = exists $self->{defaults}->{$long} ? $self->{defaults}->{$long} | ||
447 | 443 | : $self->{opts}->{$long}->{is_cumulative} ? 0 | ||
448 | 444 | : undef; | ||
449 | 445 | } | ||
450 | 446 | $self->{got_opts} = 0; | ||
451 | 447 | |||
452 | 448 | $self->{errors} = []; | ||
453 | 449 | |||
454 | 450 | if ( @ARGV && $ARGV[0] eq "--config" ) { | ||
455 | 451 | shift @ARGV; | ||
456 | 452 | $self->_set_option('config', shift @ARGV); | ||
457 | 453 | } | ||
458 | 454 | if ( $self->has('config') ) { | ||
459 | 455 | my @extra_args; | ||
460 | 456 | foreach my $filename ( split(',', $self->get('config')) ) { | ||
461 | 457 | eval { | ||
462 | 458 | push @extra_args, $self->_read_config_file($filename); | ||
463 | 459 | }; | ||
464 | 460 | if ( $EVAL_ERROR ) { | ||
465 | 461 | if ( $self->got('config') ) { | ||
466 | 462 | die $EVAL_ERROR; | ||
467 | 463 | } | ||
468 | 464 | elsif ( PTDEBUG ) { | ||
469 | 465 | _d($EVAL_ERROR); | ||
470 | 466 | } | ||
471 | 467 | } | ||
472 | 468 | } | ||
473 | 469 | unshift @ARGV, @extra_args; | ||
474 | 470 | } | ||
475 | 471 | |||
476 | 472 | Getopt::Long::Configure('no_ignore_case', 'bundling'); | ||
477 | 473 | GetOptions( | ||
478 | 474 | map { $_->{spec} => sub { $self->_set_option(@_); } } | ||
479 | 475 | grep { $_->{long} ne 'config' } # --config is handled specially above. | ||
480 | 476 | values %{$self->{opts}} | ||
481 | 477 | ) or $self->save_error('Error parsing options'); | ||
482 | 478 | |||
483 | 479 | if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) { | ||
484 | 480 | if ( $self->{version} ) { | ||
485 | 481 | print $self->{version}, "\n"; | ||
486 | 482 | } | ||
487 | 483 | else { | ||
488 | 484 | print "Error parsing version. See the VERSION section of the tool's documentation.\n"; | ||
489 | 485 | } | ||
490 | 486 | exit 0; | ||
491 | 487 | } | ||
492 | 488 | |||
493 | 489 | if ( @ARGV && $self->{strict} ) { | ||
494 | 490 | $self->save_error("Unrecognized command-line options @ARGV"); | ||
495 | 491 | } | ||
496 | 492 | |||
497 | 493 | foreach my $mutex ( @{$self->{mutex}} ) { | ||
498 | 494 | my @set = grep { $self->{opts}->{$_}->{got} } @$mutex; | ||
499 | 495 | if ( @set > 1 ) { | ||
500 | 496 | my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" } | ||
501 | 497 | @{$mutex}[ 0 .. scalar(@$mutex) - 2] ) | ||
502 | 498 | . ' and --'.$self->{opts}->{$mutex->[-1]}->{long} | ||
503 | 499 | . ' are mutually exclusive.'; | ||
504 | 500 | $self->save_error($err); | ||
505 | 501 | } | ||
506 | 502 | } | ||
507 | 503 | |||
508 | 504 | foreach my $required ( @{$self->{atleast1}} ) { | ||
509 | 505 | my @set = grep { $self->{opts}->{$_}->{got} } @$required; | ||
510 | 506 | if ( @set == 0 ) { | ||
511 | 507 | my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" } | ||
512 | 508 | @{$required}[ 0 .. scalar(@$required) - 2] ) | ||
513 | 509 | .' or --'.$self->{opts}->{$required->[-1]}->{long}; | ||
514 | 510 | $self->save_error("Specify at least one of $err"); | ||
515 | 511 | } | ||
516 | 512 | } | ||
517 | 513 | |||
518 | 514 | $self->_check_opts( keys %{$self->{opts}} ); | ||
519 | 515 | $self->{got_opts} = 1; | ||
520 | 516 | return; | ||
521 | 517 | } | ||
522 | 518 | |||
523 | 519 | sub _check_opts { | ||
524 | 520 | my ( $self, @long ) = @_; | ||
525 | 521 | my $long_last = scalar @long; | ||
526 | 522 | while ( @long ) { | ||
527 | 523 | foreach my $i ( 0..$#long ) { | ||
528 | 524 | my $long = $long[$i]; | ||
529 | 525 | next unless $long; | ||
530 | 526 | my $opt = $self->{opts}->{$long}; | ||
531 | 527 | if ( $opt->{got} ) { | ||
532 | 528 | if ( exists $self->{disables}->{$long} ) { | ||
533 | 529 | my @disable_opts = @{$self->{disables}->{$long}}; | ||
534 | 530 | map { $self->{opts}->{$_}->{value} = undef; } @disable_opts; | ||
535 | 531 | PTDEBUG && _d('Unset options', @disable_opts, | ||
536 | 532 | 'because', $long,'disables them'); | ||
537 | 533 | } | ||
538 | 534 | |||
539 | 535 | if ( exists $self->{allowed_groups}->{$long} ) { | ||
540 | 536 | |||
541 | 537 | my @restricted_groups = grep { | ||
542 | 538 | !exists $self->{allowed_groups}->{$long}->{$_} | ||
543 | 539 | } keys %{$self->{groups}}; | ||
544 | 540 | |||
545 | 541 | my @restricted_opts; | ||
546 | 542 | foreach my $restricted_group ( @restricted_groups ) { | ||
547 | 543 | RESTRICTED_OPT: | ||
548 | 544 | foreach my $restricted_opt ( | ||
549 | 545 | keys %{$self->{groups}->{$restricted_group}} ) | ||
550 | 546 | { | ||
551 | 547 | next RESTRICTED_OPT if $restricted_opt eq $long; | ||
552 | 548 | push @restricted_opts, $restricted_opt | ||
553 | 549 | if $self->{opts}->{$restricted_opt}->{got}; | ||
554 | 550 | } | ||
555 | 551 | } | ||
556 | 552 | |||
557 | 553 | if ( @restricted_opts ) { | ||
558 | 554 | my $err; | ||
559 | 555 | if ( @restricted_opts == 1 ) { | ||
560 | 556 | $err = "--$restricted_opts[0]"; | ||
561 | 557 | } | ||
562 | 558 | else { | ||
563 | 559 | $err = join(', ', | ||
564 | 560 | map { "--$self->{opts}->{$_}->{long}" } | ||
565 | 561 | grep { $_ } | ||
566 | 562 | @restricted_opts[0..scalar(@restricted_opts) - 2] | ||
567 | 563 | ) | ||
568 | 564 | . ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long}; | ||
569 | 565 | } | ||
570 | 566 | $self->save_error("--$long is not allowed with $err"); | ||
571 | 567 | } | ||
572 | 568 | } | ||
573 | 569 | |||
574 | 570 | } | ||
575 | 571 | elsif ( $opt->{is_required} ) { | ||
576 | 572 | $self->save_error("Required option --$long must be specified"); | ||
577 | 573 | } | ||
578 | 574 | |||
579 | 575 | $self->_validate_type($opt); | ||
580 | 576 | if ( $opt->{parsed} ) { | ||
581 | 577 | delete $long[$i]; | ||
582 | 578 | } | ||
583 | 579 | else { | ||
584 | 580 | PTDEBUG && _d('Temporarily failed to parse', $long); | ||
585 | 581 | } | ||
586 | 582 | } | ||
587 | 583 | |||
588 | 584 | die "Failed to parse options, possibly due to circular dependencies" | ||
589 | 585 | if @long == $long_last; | ||
590 | 586 | $long_last = @long; | ||
591 | 587 | } | ||
592 | 588 | |||
593 | 589 | return; | ||
594 | 590 | } | ||
595 | 591 | |||
596 | 592 | sub _validate_type { | ||
597 | 593 | my ( $self, $opt ) = @_; | ||
598 | 594 | return unless $opt; | ||
599 | 595 | |||
600 | 596 | if ( !$opt->{type} ) { | ||
601 | 597 | $opt->{parsed} = 1; | ||
602 | 598 | return; | ||
603 | 599 | } | ||
604 | 600 | |||
605 | 601 | my $val = $opt->{value}; | ||
606 | 602 | |||
607 | 603 | if ( $val && $opt->{type} eq 'm' ) { # type time | ||
608 | 604 | PTDEBUG && _d('Parsing option', $opt->{long}, 'as a time value'); | ||
609 | 605 | my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/; | ||
610 | 606 | if ( !$suffix ) { | ||
611 | 607 | my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/; | ||
612 | 608 | $suffix = $s || 's'; | ||
613 | 609 | PTDEBUG && _d('No suffix given; using', $suffix, 'for', | ||
614 | 610 | $opt->{long}, '(value:', $val, ')'); | ||
615 | 611 | } | ||
616 | 612 | if ( $suffix =~ m/[smhd]/ ) { | ||
617 | 613 | $val = $suffix eq 's' ? $num # Seconds | ||
618 | 614 | : $suffix eq 'm' ? $num * 60 # Minutes | ||
619 | 615 | : $suffix eq 'h' ? $num * 3600 # Hours | ||
620 | 616 | : $num * 86400; # Days | ||
621 | 617 | $opt->{value} = ($prefix || '') . $val; | ||
622 | 618 | PTDEBUG && _d('Setting option', $opt->{long}, 'to', $val); | ||
623 | 619 | } | ||
624 | 620 | else { | ||
625 | 621 | $self->save_error("Invalid time suffix for --$opt->{long}"); | ||
626 | 622 | } | ||
627 | 623 | } | ||
628 | 624 | elsif ( $val && $opt->{type} eq 'd' ) { # type DSN | ||
629 | 625 | PTDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN'); | ||
630 | 626 | my $prev = {}; | ||
631 | 627 | my $from_key = $self->{defaults_to}->{ $opt->{long} }; | ||
632 | 628 | if ( $from_key ) { | ||
633 | 629 | PTDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN'); | ||
634 | 630 | if ( $self->{opts}->{$from_key}->{parsed} ) { | ||
635 | 631 | $prev = $self->{opts}->{$from_key}->{value}; | ||
636 | 632 | } | ||
637 | 633 | else { | ||
638 | 634 | PTDEBUG && _d('Cannot parse', $opt->{long}, 'until', | ||
639 | 635 | $from_key, 'parsed'); | ||
640 | 636 | return; | ||
641 | 637 | } | ||
642 | 638 | } | ||
643 | 639 | my $defaults = $self->{DSNParser}->parse_options($self); | ||
644 | 640 | $opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults); | ||
645 | 641 | } | ||
646 | 642 | elsif ( $val && $opt->{type} eq 'z' ) { # type size | ||
647 | 643 | PTDEBUG && _d('Parsing option', $opt->{long}, 'as a size value'); | ||
648 | 644 | $self->_parse_size($opt, $val); | ||
649 | 645 | } | ||
650 | 646 | elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) { | ||
651 | 647 | $opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) }; | ||
652 | 648 | } | ||
653 | 649 | elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) { | ||
654 | 650 | $opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ]; | ||
655 | 651 | } | ||
656 | 652 | else { | ||
657 | 653 | PTDEBUG && _d('Nothing to validate for option', | ||
658 | 654 | $opt->{long}, 'type', $opt->{type}, 'value', $val); | ||
659 | 655 | } | ||
660 | 656 | |||
661 | 657 | $opt->{parsed} = 1; | ||
662 | 658 | return; | ||
663 | 659 | } | ||
664 | 660 | |||
665 | 661 | sub get { | ||
666 | 662 | my ( $self, $opt ) = @_; | ||
667 | 663 | my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); | ||
668 | 664 | die "Option $opt does not exist" | ||
669 | 665 | unless $long && exists $self->{opts}->{$long}; | ||
670 | 666 | return $self->{opts}->{$long}->{value}; | ||
671 | 667 | } | ||
672 | 668 | |||
673 | 669 | sub got { | ||
674 | 670 | my ( $self, $opt ) = @_; | ||
675 | 671 | my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); | ||
676 | 672 | die "Option $opt does not exist" | ||
677 | 673 | unless $long && exists $self->{opts}->{$long}; | ||
678 | 674 | return $self->{opts}->{$long}->{got}; | ||
679 | 675 | } | ||
680 | 676 | |||
681 | 677 | sub has { | ||
682 | 678 | my ( $self, $opt ) = @_; | ||
683 | 679 | my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); | ||
684 | 680 | return defined $long ? exists $self->{opts}->{$long} : 0; | ||
685 | 681 | } | ||
686 | 682 | |||
687 | 683 | sub set { | ||
688 | 684 | my ( $self, $opt, $val ) = @_; | ||
689 | 685 | my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); | ||
690 | 686 | die "Option $opt does not exist" | ||
691 | 687 | unless $long && exists $self->{opts}->{$long}; | ||
692 | 688 | $self->{opts}->{$long}->{value} = $val; | ||
693 | 689 | return; | ||
694 | 690 | } | ||
695 | 691 | |||
696 | 692 | sub save_error { | ||
697 | 693 | my ( $self, $error ) = @_; | ||
698 | 694 | push @{$self->{errors}}, $error; | ||
699 | 695 | return; | ||
700 | 696 | } | ||
701 | 697 | |||
702 | 698 | sub errors { | ||
703 | 699 | my ( $self ) = @_; | ||
704 | 700 | return $self->{errors}; | ||
705 | 701 | } | ||
706 | 702 | |||
707 | 703 | sub usage { | ||
708 | 704 | my ( $self ) = @_; | ||
709 | 705 | warn "No usage string is set" unless $self->{usage}; # XXX | ||
710 | 706 | return "Usage: " . ($self->{usage} || '') . "\n"; | ||
711 | 707 | } | ||
712 | 708 | |||
713 | 709 | sub descr { | ||
714 | 710 | my ( $self ) = @_; | ||
715 | 711 | warn "No description string is set" unless $self->{description}; # XXX | ||
716 | 712 | my $descr = ($self->{description} || $self->{program_name} || '') | ||
717 | 713 | . " For more details, please use the --help option, " | ||
718 | 714 | . "or try 'perldoc $PROGRAM_NAME' " | ||
719 | 715 | . "for complete documentation."; | ||
720 | 716 | $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g) | ||
721 | 717 | unless $ENV{DONT_BREAK_LINES}; | ||
722 | 718 | $descr =~ s/ +$//mg; | ||
723 | 719 | return $descr; | ||
724 | 720 | } | ||
725 | 721 | |||
726 | 722 | sub usage_or_errors { | ||
727 | 723 | my ( $self, $file, $return ) = @_; | ||
728 | 724 | $file ||= $self->{file} || __FILE__; | ||
729 | 725 | |||
730 | 726 | if ( !$self->{description} || !$self->{usage} ) { | ||
731 | 727 | PTDEBUG && _d("Getting description and usage from SYNOPSIS in", $file); | ||
732 | 728 | my %synop = $self->_parse_synopsis($file); | ||
733 | 729 | $self->{description} ||= $synop{description}; | ||
734 | 730 | $self->{usage} ||= $synop{usage}; | ||
735 | 731 | PTDEBUG && _d("Description:", $self->{description}, | ||
736 | 732 | "\nUsage:", $self->{usage}); | ||
737 | 733 | } | ||
738 | 734 | |||
739 | 735 | if ( $self->{opts}->{help}->{got} ) { | ||
740 | 736 | print $self->print_usage() or die "Cannot print usage: $OS_ERROR"; | ||
741 | 737 | exit 0 unless $return; | ||
742 | 738 | } | ||
743 | 739 | elsif ( scalar @{$self->{errors}} ) { | ||
744 | 740 | print $self->print_errors() or die "Cannot print errors: $OS_ERROR"; | ||
745 | 741 | exit 0 unless $return; | ||
746 | 742 | } | ||
747 | 743 | |||
748 | 744 | return; | ||
749 | 745 | } | ||
750 | 746 | |||
751 | 747 | sub print_errors { | ||
752 | 748 | my ( $self ) = @_; | ||
753 | 749 | my $usage = $self->usage() . "\n"; | ||
754 | 750 | if ( (my @errors = @{$self->{errors}}) ) { | ||
755 | 751 | $usage .= join("\n * ", 'Errors in command-line arguments:', @errors) | ||
756 | 752 | . "\n"; | ||
757 | 753 | } | ||
758 | 754 | return $usage . "\n" . $self->descr(); | ||
759 | 755 | } | ||
760 | 756 | |||
761 | 757 | sub print_usage { | ||
762 | 758 | my ( $self ) = @_; | ||
763 | 759 | die "Run get_opts() before print_usage()" unless $self->{got_opts}; | ||
764 | 760 | my @opts = values %{$self->{opts}}; | ||
765 | 761 | |||
766 | 762 | my $maxl = max( | ||
767 | 763 | map { | ||
768 | 764 | length($_->{long}) # option long name | ||
769 | 765 | + ($_->{is_negatable} ? 4 : 0) # "[no]" if opt is negatable | ||
770 | 766 | + ($_->{type} ? 2 : 0) # "=x" where x is the opt type | ||
771 | 767 | } | ||
772 | 768 | @opts); | ||
773 | 769 | |||
774 | 770 | my $maxs = max(0, | ||
775 | 771 | map { | ||
776 | 772 | length($_) | ||
777 | 773 | + ($self->{opts}->{$_}->{is_negatable} ? 4 : 0) | ||
778 | 774 | + ($self->{opts}->{$_}->{type} ? 2 : 0) | ||
779 | 775 | } | ||
780 | 776 | values %{$self->{short_opts}}); | ||
781 | 777 | |||
782 | 778 | my $lcol = max($maxl, ($maxs + 3)); | ||
783 | 779 | my $rcol = 80 - $lcol - 6; | ||
784 | 780 | my $rpad = ' ' x ( 80 - $rcol ); | ||
785 | 781 | |||
786 | 782 | $maxs = max($lcol - 3, $maxs); | ||
787 | 783 | |||
788 | 784 | my $usage = $self->descr() . "\n" . $self->usage(); | ||
789 | 785 | |||
790 | 786 | my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}}; | ||
791 | 787 | push @groups, 'default'; | ||
792 | 788 | |||
793 | 789 | foreach my $group ( reverse @groups ) { | ||
794 | 790 | $usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n"; | ||
795 | 791 | foreach my $opt ( | ||
796 | 792 | sort { $a->{long} cmp $b->{long} } | ||
797 | 793 | grep { $_->{group} eq $group } | ||
798 | 794 | @opts ) | ||
799 | 795 | { | ||
800 | 796 | my $long = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long}; | ||
801 | 797 | my $short = $opt->{short}; | ||
802 | 798 | my $desc = $opt->{desc}; | ||
803 | 799 | |||
804 | 800 | $long .= $opt->{type} ? "=$opt->{type}" : ""; | ||
805 | 801 | |||
806 | 802 | if ( $opt->{type} && $opt->{type} eq 'm' ) { | ||
807 | 803 | my ($s) = $desc =~ m/\(suffix (.)\)/; | ||
808 | 804 | $s ||= 's'; | ||
809 | 805 | $desc =~ s/\s+\(suffix .\)//; | ||
810 | 806 | $desc .= ". Optional suffix s=seconds, m=minutes, h=hours, " | ||
811 | 807 | . "d=days; if no suffix, $s is used."; | ||
812 | 808 | } | ||
813 | 809 | $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g); | ||
814 | 810 | $desc =~ s/ +$//mg; | ||
815 | 811 | if ( $short ) { | ||
816 | 812 | $usage .= sprintf(" --%-${maxs}s -%s %s\n", $long, $short, $desc); | ||
817 | 813 | } | ||
818 | 814 | else { | ||
819 | 815 | $usage .= sprintf(" --%-${lcol}s %s\n", $long, $desc); | ||
820 | 816 | } | ||
821 | 817 | } | ||
822 | 818 | } | ||
823 | 819 | |||
824 | 820 | $usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n"; | ||
825 | 821 | |||
826 | 822 | if ( (my @rules = @{$self->{rules}}) ) { | ||
827 | 823 | $usage .= "\nRules:\n\n"; | ||
828 | 824 | $usage .= join("\n", map { " $_" } @rules) . "\n"; | ||
829 | 825 | } | ||
830 | 826 | if ( $self->{DSNParser} ) { | ||
831 | 827 | $usage .= "\n" . $self->{DSNParser}->usage(); | ||
832 | 828 | } | ||
833 | 829 | $usage .= "\nOptions and values after processing arguments:\n\n"; | ||
834 | 830 | foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) { | ||
835 | 831 | my $val = $opt->{value}; | ||
836 | 832 | my $type = $opt->{type} || ''; | ||
837 | 833 | my $bool = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/; | ||
838 | 834 | $val = $bool ? ( $val ? 'TRUE' : 'FALSE' ) | ||
839 | 835 | : !defined $val ? '(No value)' | ||
840 | 836 | : $type eq 'd' ? $self->{DSNParser}->as_string($val) | ||
841 | 837 | : $type =~ m/H|h/ ? join(',', sort keys %$val) | ||
842 | 838 | : $type =~ m/A|a/ ? join(',', @$val) | ||
843 | 839 | : $val; | ||
844 | 840 | $usage .= sprintf(" --%-${lcol}s %s\n", $opt->{long}, $val); | ||
845 | 841 | } | ||
846 | 842 | return $usage; | ||
847 | 843 | } | ||
848 | 844 | |||
849 | 845 | sub prompt_noecho { | ||
850 | 846 | shift @_ if ref $_[0] eq __PACKAGE__; | ||
851 | 847 | my ( $prompt ) = @_; | ||
852 | 848 | local $OUTPUT_AUTOFLUSH = 1; | ||
853 | 849 | print $prompt | ||
854 | 850 | or die "Cannot print: $OS_ERROR"; | ||
855 | 851 | my $response; | ||
856 | 852 | eval { | ||
857 | 853 | require Term::ReadKey; | ||
858 | 854 | Term::ReadKey::ReadMode('noecho'); | ||
859 | 855 | chomp($response = <STDIN>); | ||
860 | 856 | Term::ReadKey::ReadMode('normal'); | ||
861 | 857 | print "\n" | ||
862 | 858 | or die "Cannot print: $OS_ERROR"; | ||
863 | 859 | }; | ||
864 | 860 | if ( $EVAL_ERROR ) { | ||
865 | 861 | die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR"; | ||
866 | 862 | } | ||
867 | 863 | return $response; | ||
868 | 864 | } | ||
869 | 865 | |||
870 | 866 | sub _read_config_file { | ||
871 | 867 | my ( $self, $filename ) = @_; | ||
872 | 868 | open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n"; | ||
873 | 869 | my @args; | ||
874 | 870 | my $prefix = '--'; | ||
875 | 871 | my $parse = 1; | ||
876 | 872 | |||
877 | 873 | LINE: | ||
878 | 874 | while ( my $line = <$fh> ) { | ||
879 | 875 | chomp $line; | ||
880 | 876 | next LINE if $line =~ m/^\s*(?:\#|\;|$)/; | ||
881 | 877 | $line =~ s/\s+#.*$//g; | ||
882 | 878 | $line =~ s/^\s+|\s+$//g; | ||
883 | 879 | if ( $line eq '--' ) { | ||
884 | 880 | $prefix = ''; | ||
885 | 881 | $parse = 0; | ||
886 | 882 | next LINE; | ||
887 | 883 | } | ||
888 | 884 | if ( $parse | ||
889 | 885 | && (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/) | ||
890 | 886 | ) { | ||
891 | 887 | push @args, grep { defined $_ } ("$prefix$opt", $arg); | ||
892 | 888 | } | ||
893 | 889 | elsif ( $line =~ m/./ ) { | ||
894 | 890 | push @args, $line; | ||
895 | 891 | } | ||
896 | 892 | else { | ||
897 | 893 | die "Syntax error in file $filename at line $INPUT_LINE_NUMBER"; | ||
898 | 894 | } | ||
899 | 895 | } | ||
900 | 896 | close $fh; | ||
901 | 897 | return @args; | ||
902 | 898 | } | ||
903 | 899 | |||
904 | 900 | sub read_para_after { | ||
905 | 901 | my ( $self, $file, $regex ) = @_; | ||
906 | 902 | open my $fh, "<", $file or die "Can't open $file: $OS_ERROR"; | ||
907 | 903 | local $INPUT_RECORD_SEPARATOR = ''; | ||
908 | 904 | my $para; | ||
909 | 905 | while ( $para = <$fh> ) { | ||
910 | 906 | next unless $para =~ m/^=pod$/m; | ||
911 | 907 | last; | ||
912 | 908 | } | ||
913 | 909 | while ( $para = <$fh> ) { | ||
914 | 910 | next unless $para =~ m/$regex/; | ||
915 | 911 | last; | ||
916 | 912 | } | ||
917 | 913 | $para = <$fh>; | ||
918 | 914 | chomp($para); | ||
919 | 915 | close $fh or die "Can't close $file: $OS_ERROR"; | ||
920 | 916 | return $para; | ||
921 | 917 | } | ||
922 | 918 | |||
923 | 919 | sub clone { | ||
924 | 920 | my ( $self ) = @_; | ||
925 | 921 | |||
926 | 922 | my %clone = map { | ||
927 | 923 | my $hashref = $self->{$_}; | ||
928 | 924 | my $val_copy = {}; | ||
929 | 925 | foreach my $key ( keys %$hashref ) { | ||
930 | 926 | my $ref = ref $hashref->{$key}; | ||
931 | 927 | $val_copy->{$key} = !$ref ? $hashref->{$key} | ||
932 | 928 | : $ref eq 'HASH' ? { %{$hashref->{$key}} } | ||
933 | 929 | : $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ] | ||
934 | 930 | : $hashref->{$key}; | ||
935 | 931 | } | ||
936 | 932 | $_ => $val_copy; | ||
937 | 933 | } qw(opts short_opts defaults); | ||
938 | 934 | |||
939 | 935 | foreach my $scalar ( qw(got_opts) ) { | ||
940 | 936 | $clone{$scalar} = $self->{$scalar}; | ||
941 | 937 | } | ||
942 | 938 | |||
943 | 939 | return bless \%clone; | ||
944 | 940 | } | ||
945 | 941 | |||
946 | 942 | sub _parse_size { | ||
947 | 943 | my ( $self, $opt, $val ) = @_; | ||
948 | 944 | |||
949 | 945 | if ( lc($val || '') eq 'null' ) { | ||
950 | 946 | PTDEBUG && _d('NULL size for', $opt->{long}); | ||
951 | 947 | $opt->{value} = 'null'; | ||
952 | 948 | return; | ||
953 | 949 | } | ||
954 | 950 | |||
955 | 951 | my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824); | ||
956 | 952 | my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/; | ||
957 | 953 | if ( defined $num ) { | ||
958 | 954 | if ( $factor ) { | ||
959 | 955 | $num *= $factor_for{$factor}; | ||
960 | 956 | PTDEBUG && _d('Setting option', $opt->{y}, | ||
961 | 957 | 'to num', $num, '* factor', $factor); | ||
962 | 958 | } | ||
963 | 959 | $opt->{value} = ($pre || '') . $num; | ||
964 | 960 | } | ||
965 | 961 | else { | ||
966 | 962 | $self->save_error("Invalid size for --$opt->{long}: $val"); | ||
967 | 963 | } | ||
968 | 964 | return; | ||
969 | 965 | } | ||
970 | 966 | |||
971 | 967 | sub _parse_attribs { | ||
972 | 968 | my ( $self, $option, $attribs ) = @_; | ||
973 | 969 | my $types = $self->{types}; | ||
974 | 970 | return $option | ||
975 | 971 | . ($attribs->{'short form'} ? '|' . $attribs->{'short form'} : '' ) | ||
976 | 972 | . ($attribs->{'negatable'} ? '!' : '' ) | ||
977 | 973 | . ($attribs->{'cumulative'} ? '+' : '' ) | ||
978 | 974 | . ($attribs->{'type'} ? '=' . $types->{$attribs->{type}} : '' ); | ||
979 | 975 | } | ||
980 | 976 | |||
981 | 977 | sub _parse_synopsis { | ||
982 | 978 | my ( $self, $file ) = @_; | ||
983 | 979 | $file ||= $self->{file} || __FILE__; | ||
984 | 980 | PTDEBUG && _d("Parsing SYNOPSIS in", $file); | ||
985 | 981 | |||
986 | 982 | local $INPUT_RECORD_SEPARATOR = ''; # read paragraphs | ||
987 | 983 | open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR"; | ||
988 | 984 | my $para; | ||
989 | 985 | 1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/; | ||
990 | 986 | die "$file does not contain a SYNOPSIS section" unless $para; | ||
991 | 987 | my @synop; | ||
992 | 988 | for ( 1..2 ) { # 1 for the usage, 2 for the description | ||
993 | 989 | my $para = <$fh>; | ||
994 | 990 | push @synop, $para; | ||
995 | 991 | } | ||
996 | 992 | close $fh; | ||
997 | 993 | PTDEBUG && _d("Raw SYNOPSIS text:", @synop); | ||
998 | 994 | my ($usage, $desc) = @synop; | ||
999 | 995 | die "The SYNOPSIS section in $file is not formatted properly" | ||
1000 | 996 | unless $usage && $desc; | ||
1001 | 997 | |||
1002 | 998 | $usage =~ s/^\s*Usage:\s+(.+)/$1/; | ||
1003 | 999 | chomp $usage; | ||
1004 | 1000 | |||
1005 | 1001 | $desc =~ s/\n/ /g; | ||
1006 | 1002 | $desc =~ s/\s{2,}/ /g; | ||
1007 | 1003 | $desc =~ s/\. ([A-Z][a-z])/. $1/g; | ||
1008 | 1004 | $desc =~ s/\s+$//; | ||
1009 | 1005 | |||
1010 | 1006 | return ( | ||
1011 | 1007 | description => $desc, | ||
1012 | 1008 | usage => $usage, | ||
1013 | 1009 | ); | ||
1014 | 1010 | }; | ||
1015 | 1011 | |||
1016 | 1012 | sub _d { | ||
1017 | 1013 | my ($package, undef, $line) = caller 0; | ||
1018 | 1014 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
1019 | 1015 | map { defined $_ ? $_ : 'undef' } | ||
1020 | 1016 | @_; | ||
1021 | 1017 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
1022 | 1018 | } | ||
1023 | 1019 | |||
1024 | 1020 | if ( PTDEBUG ) { | ||
1025 | 1021 | print '# ', $^X, ' ', $], "\n"; | ||
1026 | 1022 | if ( my $uname = `uname -a` ) { | ||
1027 | 1023 | $uname =~ s/\s+/ /g; | ||
1028 | 1024 | print "# $uname\n"; | ||
1029 | 1025 | } | ||
1030 | 1026 | print '# Arguments: ', | ||
1031 | 1027 | join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n"; | ||
1032 | 1028 | } | ||
1033 | 1029 | |||
1034 | 1030 | 1; | ||
1035 | 1031 | } | ||
1036 | 1032 | # ########################################################################### | ||
1037 | 1033 | # End OptionParser package | ||
1038 | 1034 | # ########################################################################### | ||
1039 | 1035 | |||
1040 | 1036 | # ########################################################################### | ||
1041 | 1037 | # QueryParser package | ||
1042 | 1038 | # This package is a copy without comments from the original. The original | ||
1043 | 1039 | # with comments and its test file can be found in the Bazaar repository at, | ||
1044 | 1040 | # lib/QueryParser.pm | ||
1045 | 1041 | # t/lib/QueryParser.t | ||
1046 | 1042 | # See https://launchpad.net/percona-toolkit for more information. | ||
1047 | 1043 | # ########################################################################### | ||
1048 | 1044 | { | ||
1049 | 1045 | package QueryParser; | ||
1050 | 1046 | |||
1051 | 1047 | use strict; | ||
1052 | 1048 | use warnings FATAL => 'all'; | ||
1053 | 1049 | use English qw(-no_match_vars); | ||
1054 | 1050 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
1055 | 1051 | |||
1056 | 1052 | our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/; | ||
1057 | 1053 | our $tbl_regex = qr{ | ||
1058 | 1054 | \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names | ||
1059 | 1055 | \b\s* | ||
1060 | 1056 | \(? # Optional paren around tables | ||
1061 | 1057 | ($tbl_ident | ||
1062 | 1058 | (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )* | ||
1063 | 1059 | ) | ||
1064 | 1060 | }xio; | ||
1065 | 1061 | our $has_derived = qr{ | ||
1066 | 1062 | \b(?:FROM|JOIN|,) | ||
1067 | 1063 | \s*\(\s*SELECT | ||
1068 | 1064 | }xi; | ||
1069 | 1065 | |||
1070 | 1066 | our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i; | ||
1071 | 1067 | |||
1072 | 1068 | our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i; | ||
1073 | 1069 | |||
1074 | 1070 | sub new { | ||
1075 | 1071 | my ( $class ) = @_; | ||
1076 | 1072 | bless {}, $class; | ||
1077 | 1073 | } | ||
1078 | 1074 | |||
1079 | 1075 | sub get_tables { | ||
1080 | 1076 | my ( $self, $query ) = @_; | ||
1081 | 1077 | return unless $query; | ||
1082 | 1078 | PTDEBUG && _d('Getting tables for', $query); | ||
1083 | 1079 | |||
1084 | 1080 | my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i; | ||
1085 | 1081 | if ( $ddl_stmt ) { | ||
1086 | 1082 | PTDEBUG && _d('Special table type:', $ddl_stmt); | ||
1087 | 1083 | $query =~ s/IF\s+(?:NOT\s+)?EXISTS//i; | ||
1088 | 1084 | if ( $query =~ m/$ddl_stmt DATABASE\b/i ) { | ||
1089 | 1085 | PTDEBUG && _d('Query alters a database, not a table'); | ||
1090 | 1086 | return (); | ||
1091 | 1087 | } | ||
1092 | 1088 | if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) { | ||
1093 | 1089 | my ($select) = $query =~ m/\b(SELECT\b.+)/is; | ||
1094 | 1090 | PTDEBUG && _d('CREATE TABLE ... SELECT:', $select); | ||
1095 | 1091 | return $self->get_tables($select); | ||
1096 | 1092 | } | ||
1097 | 1093 | my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i; | ||
1098 | 1094 | PTDEBUG && _d('Matches table:', $tbl); | ||
1099 | 1095 | return ($tbl); | ||
1100 | 1096 | } | ||
1101 | 1097 | |||
1102 | 1098 | $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig; | ||
1103 | 1099 | |||
1104 | 1100 | if ( $query =~ /^\s*LOCK TABLES/i ) { | ||
1105 | 1101 | PTDEBUG && _d('Special table type: LOCK TABLES'); | ||
1106 | 1102 | $query =~ s/^(\s*LOCK TABLES\s+)//; | ||
1107 | 1103 | $query =~ s/\s+(?:READ|WRITE|LOCAL)+\s*//g; | ||
1108 | 1104 | PTDEBUG && _d('Locked tables:', $query); | ||
1109 | 1105 | $query = "FROM $query"; | ||
1110 | 1106 | } | ||
1111 | 1107 | |||
1112 | 1108 | $query =~ s/\\["']//g; # quoted strings | ||
1113 | 1109 | $query =~ s/".*?"/?/sg; # quoted strings | ||
1114 | 1110 | $query =~ s/'.*?'/?/sg; # quoted strings | ||
1115 | 1111 | |||
1116 | 1112 | my @tables; | ||
1117 | 1113 | foreach my $tbls ( $query =~ m/$tbl_regex/gio ) { | ||
1118 | 1114 | PTDEBUG && _d('Match tables:', $tbls); | ||
1119 | 1115 | |||
1120 | 1116 | next if $tbls =~ m/\ASELECT\b/i; | ||
1121 | 1117 | |||
1122 | 1118 | foreach my $tbl ( split(',', $tbls) ) { | ||
1123 | 1119 | $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio; | ||
1124 | 1120 | |||
1125 | 1121 | if ( $tbl !~ m/[a-zA-Z]/ ) { | ||
1126 | 1122 | PTDEBUG && _d('Skipping suspicious table name:', $tbl); | ||
1127 | 1123 | next; | ||
1128 | 1124 | } | ||
1129 | 1125 | |||
1130 | 1126 | push @tables, $tbl; | ||
1131 | 1127 | } | ||
1132 | 1128 | } | ||
1133 | 1129 | return @tables; | ||
1134 | 1130 | } | ||
1135 | 1131 | |||
1136 | 1132 | sub has_derived_table { | ||
1137 | 1133 | my ( $self, $query ) = @_; | ||
1138 | 1134 | my $match = $query =~ m/$has_derived/; | ||
1139 | 1135 | PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table'); | ||
1140 | 1136 | return $match; | ||
1141 | 1137 | } | ||
1142 | 1138 | |||
1143 | 1139 | sub get_aliases { | ||
1144 | 1140 | my ( $self, $query, $list ) = @_; | ||
1145 | 1141 | |||
1146 | 1142 | my $result = { | ||
1147 | 1143 | DATABASE => {}, | ||
1148 | 1144 | TABLE => {}, | ||
1149 | 1145 | }; | ||
1150 | 1146 | return $result unless $query; | ||
1151 | 1147 | |||
1152 | 1148 | $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig; | ||
1153 | 1149 | |||
1154 | 1150 | $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig; | ||
1155 | 1151 | |||
1156 | 1152 | my @tbl_refs; | ||
1157 | 1153 | my ($tbl_refs, $from) = $query =~ m{ | ||
1158 | 1154 | ( | ||
1159 | 1155 | (FROM|INTO|UPDATE)\b\s* # Keyword before table refs | ||
1160 | 1156 | .+? # Table refs | ||
1161 | 1157 | ) | ||
1162 | 1158 | (?:\s+|\z) # If the query does not end with the table | ||
1163 | 1159 | (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs | ||
1164 | 1160 | }ix; | ||
1165 | 1161 | |||
1166 | 1162 | if ( $tbl_refs ) { | ||
1167 | 1163 | |||
1168 | 1164 | if ( $query =~ m/^(?:INSERT|REPLACE)/i ) { | ||
1169 | 1165 | $tbl_refs =~ s/\([^\)]+\)\s*//; | ||
1170 | 1166 | } | ||
1171 | 1167 | |||
1172 | 1168 | PTDEBUG && _d('tbl refs:', $tbl_refs); | ||
1173 | 1169 | |||
1174 | 1170 | my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i; | ||
1175 | 1171 | |||
1176 | 1172 | my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i; | ||
1177 | 1173 | |||
1178 | 1174 | $tbl_refs =~ s/ = /=/g; | ||
1179 | 1175 | |||
1180 | 1176 | while ( | ||
1181 | 1177 | $tbl_refs =~ m{ | ||
1182 | 1178 | $before_tbl\b\s* | ||
1183 | 1179 | ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? ) | ||
1184 | 1180 | \s*$after_tbl | ||
1185 | 1181 | }xgio ) | ||
1186 | 1182 | { | ||
1187 | 1183 | my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3); | ||
1188 | 1184 | PTDEBUG && _d('Match table:', $tbl_ref); | ||
1189 | 1185 | push @tbl_refs, $tbl_ref; | ||
1190 | 1186 | $alias = $self->trim_identifier($alias); | ||
1191 | 1187 | |||
1192 | 1188 | if ( $tbl_ref =~ m/^AS\s+\w+/i ) { | ||
1193 | 1189 | PTDEBUG && _d('Subquery', $tbl_ref); | ||
1194 | 1190 | $result->{TABLE}->{$alias} = undef; | ||
1195 | 1191 | next; | ||
1196 | 1192 | } | ||
1197 | 1193 | |||
1198 | 1194 | my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/; | ||
1199 | 1195 | $db = $self->trim_identifier($db); | ||
1200 | 1196 | $tbl = $self->trim_identifier($tbl); | ||
1201 | 1197 | $result->{TABLE}->{$alias || $tbl} = $tbl; | ||
1202 | 1198 | $result->{DATABASE}->{$tbl} = $db if $db; | ||
1203 | 1199 | } | ||
1204 | 1200 | } | ||
1205 | 1201 | else { | ||
1206 | 1202 | PTDEBUG && _d("No tables ref in", $query); | ||
1207 | 1203 | } | ||
1208 | 1204 | |||
1209 | 1205 | if ( $list ) { | ||
1210 | 1206 | return \@tbl_refs; | ||
1211 | 1207 | } | ||
1212 | 1208 | else { | ||
1213 | 1209 | return $result; | ||
1214 | 1210 | } | ||
1215 | 1211 | } | ||
1216 | 1212 | |||
1217 | 1213 | sub split { | ||
1218 | 1214 | my ( $self, $query ) = @_; | ||
1219 | 1215 | return unless $query; | ||
1220 | 1216 | $query = $self->clean_query($query); | ||
1221 | 1217 | PTDEBUG && _d('Splitting', $query); | ||
1222 | 1218 | |||
1223 | 1219 | my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i; | ||
1224 | 1220 | |||
1225 | 1221 | my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query); | ||
1226 | 1222 | |||
1227 | 1223 | my @statements; | ||
1228 | 1224 | if ( @split_statements == 1 ) { | ||
1229 | 1225 | push @statements, $query; | ||
1230 | 1226 | } | ||
1231 | 1227 | else { | ||
1232 | 1228 | for ( my $i = 0; $i <= $#split_statements; $i += 2 ) { | ||
1233 | 1229 | push @statements, $split_statements[$i].$split_statements[$i+1]; | ||
1234 | 1230 | |||
1235 | 1231 | if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) { | ||
1236 | 1232 | $statements[-2] .= pop @statements; | ||
1237 | 1233 | } | ||
1238 | 1234 | } | ||
1239 | 1235 | } | ||
1240 | 1236 | |||
1241 | 1237 | PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements); | ||
1242 | 1238 | return @statements; | ||
1243 | 1239 | } | ||
1244 | 1240 | |||
1245 | 1241 | sub clean_query { | ||
1246 | 1242 | my ( $self, $query ) = @_; | ||
1247 | 1243 | return unless $query; | ||
1248 | 1244 | $query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */ | ||
1249 | 1245 | $query =~ s/^\s+//; # Remove leading spaces | ||
1250 | 1246 | $query =~ s/\s+$//; # Remove trailing spaces | ||
1251 | 1247 | $query =~ s/\s{2,}/ /g; # Remove extra spaces | ||
1252 | 1248 | return $query; | ||
1253 | 1249 | } | ||
1254 | 1250 | |||
1255 | 1251 | sub split_subquery { | ||
1256 | 1252 | my ( $self, $query ) = @_; | ||
1257 | 1253 | return unless $query; | ||
1258 | 1254 | $query = $self->clean_query($query); | ||
1259 | 1255 | $query =~ s/;$//; | ||
1260 | 1256 | |||
1261 | 1257 | my @subqueries; | ||
1262 | 1258 | my $sqno = 0; # subquery number | ||
1263 | 1259 | my $pos = 0; | ||
1264 | 1260 | while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) { | ||
1265 | 1261 | $pos = pos($query); | ||
1266 | 1262 | my $word = $1; | ||
1267 | 1263 | PTDEBUG && _d($word, $sqno); | ||
1268 | 1264 | if ( $word =~ m/^\(?SELECT\b/i ) { | ||
1269 | 1265 | my $start_pos = $pos - length($word) - 1; | ||
1270 | 1266 | if ( $start_pos ) { | ||
1271 | 1267 | $sqno++; | ||
1272 | 1268 | PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos); | ||
1273 | 1269 | $subqueries[$sqno] = { | ||
1274 | 1270 | start_pos => $start_pos, | ||
1275 | 1271 | end_pos => 0, | ||
1276 | 1272 | len => 0, | ||
1277 | 1273 | words => [$word], | ||
1278 | 1274 | lp => 1, # left parentheses | ||
1279 | 1275 | rp => 0, # right parentheses | ||
1280 | 1276 | done => 0, | ||
1281 | 1277 | }; | ||
1282 | 1278 | } | ||
1283 | 1279 | else { | ||
1284 | 1280 | PTDEBUG && _d('Main SELECT at pos 0'); | ||
1285 | 1281 | } | ||
1286 | 1282 | } | ||
1287 | 1283 | else { | ||
1288 | 1284 | next unless $sqno; # next unless we're in a subquery | ||
1289 | 1285 | PTDEBUG && _d('In subquery', $sqno); | ||
1290 | 1286 | my $sq = $subqueries[$sqno]; | ||
1291 | 1287 | if ( $sq->{done} ) { | ||
1292 | 1288 | PTDEBUG && _d('This subquery is done; SQL is for', | ||
1293 | 1289 | ($sqno - 1 ? "subquery $sqno" : "the main SELECT")); | ||
1294 | 1290 | next; | ||
1295 | 1291 | } | ||
1296 | 1292 | push @{$sq->{words}}, $word; | ||
1297 | 1293 | my $lp = ($word =~ tr/\(//) || 0; | ||
1298 | 1294 | my $rp = ($word =~ tr/\)//) || 0; | ||
1299 | 1295 | PTDEBUG && _d('parentheses left', $lp, 'right', $rp); | ||
1300 | 1296 | if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) { | ||
1301 | 1297 | my $end_pos = $pos - 1; | ||
1302 | 1298 | PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos); | ||
1303 | 1299 | $sq->{end_pos} = $end_pos; | ||
1304 | 1300 | $sq->{len} = $end_pos - $sq->{start_pos}; | ||
1305 | 1301 | } | ||
1306 | 1302 | } | ||
1307 | 1303 | } | ||
1308 | 1304 | |||
1309 | 1305 | for my $i ( 1..$#subqueries ) { | ||
1310 | 1306 | my $sq = $subqueries[$i]; | ||
1311 | 1307 | next unless $sq; | ||
1312 | 1308 | $sq->{sql} = join(' ', @{$sq->{words}}); | ||
1313 | 1309 | substr $query, | ||
1314 | 1310 | $sq->{start_pos} + 1, # +1 for ( | ||
1315 | 1311 | $sq->{len} - 1, # -1 for ) | ||
1316 | 1312 | "__subquery_$i"; | ||
1317 | 1313 | } | ||
1318 | 1314 | |||
1319 | 1315 | return $query, map { $_->{sql} } grep { defined $_ } @subqueries; | ||
1320 | 1316 | } | ||
1321 | 1317 | |||
1322 | 1318 | sub query_type { | ||
1323 | 1319 | my ( $self, $query, $qr ) = @_; | ||
1324 | 1320 | my ($type, undef) = $qr->distill_verbs($query); | ||
1325 | 1321 | my $rw; | ||
1326 | 1322 | if ( $type =~ m/^SELECT\b/ ) { | ||
1327 | 1323 | $rw = 'read'; | ||
1328 | 1324 | } | ||
1329 | 1325 | elsif ( $type =~ m/^$data_manip_stmts\b/ | ||
1330 | 1326 | || $type =~ m/^$data_def_stmts\b/ ) { | ||
1331 | 1327 | $rw = 'write' | ||
1332 | 1328 | } | ||
1333 | 1329 | |||
1334 | 1330 | return { | ||
1335 | 1331 | type => $type, | ||
1336 | 1332 | rw => $rw, | ||
1337 | 1333 | } | ||
1338 | 1334 | } | ||
1339 | 1335 | |||
1340 | 1336 | sub get_columns { | ||
1341 | 1337 | my ( $self, $query ) = @_; | ||
1342 | 1338 | my $cols = []; | ||
1343 | 1339 | return $cols unless $query; | ||
1344 | 1340 | my $cols_def; | ||
1345 | 1341 | |||
1346 | 1342 | if ( $query =~ m/^SELECT/i ) { | ||
1347 | 1343 | $query =~ s/ | ||
1348 | 1344 | ^SELECT\s+ | ||
1349 | 1345 | (?:ALL | ||
1350 | 1346 | |DISTINCT | ||
1351 | 1347 | |DISTINCTROW | ||
1352 | 1348 | |HIGH_PRIORITY | ||
1353 | 1349 | |STRAIGHT_JOIN | ||
1354 | 1350 | |SQL_SMALL_RESULT | ||
1355 | 1351 | |SQL_BIG_RESULT | ||
1356 | 1352 | |SQL_BUFFER_RESULT | ||
1357 | 1353 | |SQL_CACHE | ||
1358 | 1354 | |SQL_NO_CACHE | ||
1359 | 1355 | |SQL_CALC_FOUND_ROWS | ||
1360 | 1356 | )\s+ | ||
1361 | 1357 | /SELECT /xgi; | ||
1362 | 1358 | ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i; | ||
1363 | 1359 | } | ||
1364 | 1360 | elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) { | ||
1365 | 1361 | ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i; | ||
1366 | 1362 | } | ||
1367 | 1363 | |||
1368 | 1364 | PTDEBUG && _d('Columns:', $cols_def); | ||
1369 | 1365 | if ( $cols_def ) { | ||
1370 | 1366 | @$cols = split(',', $cols_def); | ||
1371 | 1367 | map { | ||
1372 | 1368 | my $col = $_; | ||
1373 | 1369 | $col = s/^\s+//g; | ||
1374 | 1370 | $col = s/\s+$//g; | ||
1375 | 1371 | $col; | ||
1376 | 1372 | } @$cols; | ||
1377 | 1373 | } | ||
1378 | 1374 | |||
1379 | 1375 | return $cols; | ||
1380 | 1376 | } | ||
1381 | 1377 | |||
1382 | 1378 | sub parse { | ||
1383 | 1379 | my ( $self, $query ) = @_; | ||
1384 | 1380 | return unless $query; | ||
1385 | 1381 | my $parsed = {}; | ||
1386 | 1382 | |||
1387 | 1383 | $query =~ s/\n/ /g; | ||
1388 | 1384 | $query = $self->clean_query($query); | ||
1389 | 1385 | |||
1390 | 1386 | $parsed->{query} = $query, | ||
1391 | 1387 | $parsed->{tables} = $self->get_aliases($query, 1); | ||
1392 | 1388 | $parsed->{columns} = $self->get_columns($query); | ||
1393 | 1389 | |||
1394 | 1390 | my ($type) = $query =~ m/^(\w+)/; | ||
1395 | 1391 | $parsed->{type} = lc $type; | ||
1396 | 1392 | |||
1397 | 1393 | |||
1398 | 1394 | $parsed->{sub_queries} = []; | ||
1399 | 1395 | |||
1400 | 1396 | return $parsed; | ||
1401 | 1397 | } | ||
1402 | 1398 | |||
1403 | 1399 | sub extract_tables { | ||
1404 | 1400 | my ( $self, %args ) = @_; | ||
1405 | 1401 | my $query = $args{query}; | ||
1406 | 1402 | my $default_db = $args{default_db}; | ||
1407 | 1403 | my $q = $self->{Quoter} || $args{Quoter}; | ||
1408 | 1404 | return unless $query; | ||
1409 | 1405 | PTDEBUG && _d('Extracting tables'); | ||
1410 | 1406 | my @tables; | ||
1411 | 1407 | my %seen; | ||
1412 | 1408 | foreach my $db_tbl ( $self->get_tables($query) ) { | ||
1413 | 1409 | next unless $db_tbl; | ||
1414 | 1410 | next if $seen{$db_tbl}++; # Unique-ify for issue 337. | ||
1415 | 1411 | my ( $db, $tbl ) = $q->split_unquote($db_tbl); | ||
1416 | 1412 | push @tables, [ $db || $default_db, $tbl ]; | ||
1417 | 1413 | } | ||
1418 | 1414 | return @tables; | ||
1419 | 1415 | } | ||
1420 | 1416 | |||
1421 | 1417 | sub trim_identifier { | ||
1422 | 1418 | my ($self, $str) = @_; | ||
1423 | 1419 | return unless defined $str; | ||
1424 | 1420 | $str =~ s/`//g; | ||
1425 | 1421 | $str =~ s/^\s+//; | ||
1426 | 1422 | $str =~ s/\s+$//; | ||
1427 | 1423 | return $str; | ||
1428 | 1424 | } | ||
1429 | 1425 | |||
1430 | 1426 | sub _d { | ||
1431 | 1427 | my ($package, undef, $line) = caller 0; | ||
1432 | 1428 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
1433 | 1429 | map { defined $_ ? $_ : 'undef' } | ||
1434 | 1430 | @_; | ||
1435 | 1431 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
1436 | 1432 | } | ||
1437 | 1433 | |||
1438 | 1434 | 1; | ||
1439 | 1435 | } | ||
1440 | 1436 | # ########################################################################### | ||
1441 | 1437 | # End QueryParser package | ||
1442 | 1438 | # ########################################################################### | ||
1443 | 1439 | |||
1444 | 1440 | # ########################################################################### | ||
1445 | 1441 | # QueryRewriter package | ||
1446 | 1442 | # This package is a copy without comments from the original. The original | ||
1447 | 1443 | # with comments and its test file can be found in the Bazaar repository at, | ||
1448 | 1444 | # lib/QueryRewriter.pm | ||
1449 | 1445 | # t/lib/QueryRewriter.t | ||
1450 | 1446 | # See https://launchpad.net/percona-toolkit for more information. | ||
1451 | 1447 | # ########################################################################### | ||
1452 | 1448 | { | ||
1453 | 1449 | package QueryRewriter; | ||
1454 | 1450 | |||
1455 | 1451 | use strict; | ||
1456 | 1452 | use warnings FATAL => 'all'; | ||
1457 | 1453 | use English qw(-no_match_vars); | ||
1458 | 1454 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | ||
1459 | 1455 | |||
1460 | 1456 | our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT | ||
1461 | 1457 | |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi; | ||
1462 | 1458 | my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly! | ||
1463 | 1459 | my $bal; | ||
1464 | 1460 | $bal = qr/ | ||
1465 | 1461 | \( | ||
1466 | 1462 | (?: | ||
1467 | 1463 | (?> [^()]+ ) # Non-parens without backtracking | ||
1468 | 1464 | | | ||
1469 | 1465 | (??{ $bal }) # Group with matching parens | ||
1470 | 1466 | )* | ||
1471 | 1467 | \) | ||
1472 | 1468 | /x; | ||
1473 | 1469 | |||
1474 | 1470 | my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments | ||
1475 | 1471 | my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */ | ||
1476 | 1472 | my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */ | ||
1477 | 1473 | my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW | ||
1478 | 1474 | |||
1479 | 1475 | |||
1480 | 1476 | sub new { | ||
1481 | 1477 | my ( $class, %args ) = @_; | ||
1482 | 1478 | my $self = { %args }; | ||
1483 | 1479 | return bless $self, $class; | ||
1484 | 1480 | } | ||
1485 | 1481 | |||
1486 | 1482 | sub strip_comments { | ||
1487 | 1483 | my ( $self, $query ) = @_; | ||
1488 | 1484 | return unless $query; | ||
1489 | 1485 | $query =~ s/$olc_re//go; | ||
1490 | 1486 | $query =~ s/$mlc_re//go; | ||
1491 | 1487 | if ( $query =~ m/$vlc_rf/i ) { # contains show + version | ||
1492 | 1488 | $query =~ s/$vlc_re//go; | ||
1493 | 1489 | } | ||
1494 | 1490 | return $query; | ||
1495 | 1491 | } | ||
1496 | 1492 | |||
1497 | 1493 | sub shorten { | ||
1498 | 1494 | my ( $self, $query, $length ) = @_; | ||
1499 | 1495 | $query =~ s{ | ||
1500 | 1496 | \A( | ||
1501 | 1497 | (?:INSERT|REPLACE) | ||
1502 | 1498 | (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)? | ||
1503 | 1499 | (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\) | ||
1504 | 1500 | ) | ||
1505 | 1501 | \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)} | ||
1506 | 1502 | {$1 /*... omitted ...*/$2}xsi; | ||
1507 | 1503 | |||
1508 | 1504 | return $query unless $query =~ m/IN\s*\(\s*(?!select)/i; | ||
1509 | 1505 | |||
1510 | 1506 | my $last_length = 0; | ||
1511 | 1507 | my $query_length = length($query); | ||
1512 | 1508 | while ( | ||
1513 | 1509 | $length > 0 | ||
1514 | 1510 | && $query_length > $length | ||
1515 | 1511 | && $query_length < ( $last_length || $query_length + 1 ) | ||
1516 | 1512 | ) { | ||
1517 | 1513 | $last_length = $query_length; | ||
1518 | 1514 | $query =~ s{ | ||
1519 | 1515 | (\bIN\s*\() # The opening of an IN list | ||
1520 | 1516 | ([^\)]+) # Contents of the list, assuming no item contains paren | ||
1521 | 1517 | (?=\)) # Close of the list | ||
1522 | 1518 | } | ||
1523 | 1519 | { | ||
1524 | 1520 | $1 . __shorten($2) | ||
1525 | 1521 | }gexsi; | ||
1526 | 1522 | } | ||
1527 | 1523 | |||
1528 | 1524 | return $query; | ||
1529 | 1525 | } | ||
1530 | 1526 | |||
1531 | 1527 | sub __shorten { | ||
1532 | 1528 | my ( $snippet ) = @_; | ||
1533 | 1529 | my @vals = split(/,/, $snippet); | ||
1534 | 1530 | return $snippet unless @vals > 20; | ||
1535 | 1531 | my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items | ||
1536 | 1532 | return | ||
1537 | 1533 | join(',', @keep) | ||
1538 | 1534 | . "/*... omitted " | ||
1539 | 1535 | . scalar(@vals) | ||
1540 | 1536 | . " items ...*/"; | ||
1541 | 1537 | } | ||
1542 | 1538 | |||
1543 | 1539 | sub fingerprint { | ||
1544 | 1540 | my ( $self, $query ) = @_; | ||
1545 | 1541 | |||
1546 | 1542 | $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query | ||
1547 | 1543 | && return 'mysqldump'; | ||
1548 | 1544 | $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # pt-table-checksum, etc query | ||
1549 | 1545 | && return 'percona-toolkit'; | ||
1550 | 1546 | $query =~ m/\Aadministrator command: / | ||
1551 | 1547 | && return $query; | ||
1552 | 1548 | $query =~ m/\A\s*(call\s+\S+)\(/i | ||
1553 | 1549 | && return lc($1); # Warning! $1 used, be careful. | ||
1554 | 1550 | if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) { | ||
1555 | 1551 | $query = $beginning; # Shorten multi-value INSERT statements ASAP | ||
1556 | 1552 | } | ||
1557 | 1553 | |||
1558 | 1554 | $query =~ s/$olc_re//go; | ||
1559 | 1555 | $query =~ s/$mlc_re//go; | ||
1560 | 1556 | $query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE | ||
1561 | 1557 | && return $query; | ||
1562 | 1558 | |||
1563 | 1559 | $query =~ s/\\["']//g; # quoted strings | ||
1564 | 1560 | $query =~ s/".*?"/?/sg; # quoted strings | ||
1565 | 1561 | $query =~ s/'.*?'/?/sg; # quoted strings | ||
1566 | 1562 | |||
1567 | 1563 | if ( $self->{match_md5_checksums} ) { | ||
1568 | 1564 | $query =~ s/([._-])[a-f0-9]{32}/$1?/g; | ||
1569 | 1565 | } | ||
1570 | 1566 | |||
1571 | 1567 | if ( !$self->{match_embedded_numbers} ) { | ||
1572 | 1568 | $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g; | ||
1573 | 1569 | } | ||
1574 | 1570 | else { | ||
1575 | 1571 | $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g; | ||
1576 | 1572 | } | ||
1577 | 1573 | |||
1578 | 1574 | if ( $self->{match_md5_checksums} ) { | ||
1579 | 1575 | $query =~ s/[xb+-]\?/?/g; | ||
1580 | 1576 | } | ||
1581 | 1577 | else { | ||
1582 | 1578 | $query =~ s/[xb.+-]\?/?/g; | ||
1583 | 1579 | } | ||
1584 | 1580 | |||
1585 | 1581 | $query =~ s/\A\s+//; # Chop off leading whitespace | ||
1586 | 1582 | chomp $query; # Kill trailing whitespace | ||
1587 | 1583 | $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace | ||
1588 | 1584 | $query = lc $query; | ||
1589 | 1585 | $query =~ s/\bnull\b/?/g; # Get rid of NULLs | ||
1590 | 1586 | $query =~ s{ # Collapse IN and VALUES lists | ||
1591 | 1587 | \b(in|values?)(?:[\s,]*\([\s?,]*\))+ | ||
1592 | 1588 | } | ||
1593 | 1589 | {$1(?+)}gx; | ||
1594 | 1590 | $query =~ s{ # Collapse UNION | ||
1595 | 1591 | \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+ | ||
1596 | 1592 | } | ||
1597 | 1593 | {$1 /*repeat$2*/}xg; | ||
1598 | 1594 | $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT | ||
1599 | 1595 | |||
1600 | 1596 | if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause | ||
1601 | 1597 | 1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query; | ||
1602 | 1598 | } | ||
1603 | 1599 | |||
1604 | 1600 | return $query; | ||
1605 | 1601 | } | ||
1606 | 1602 | |||
1607 | 1603 | sub distill_verbs { | ||
1608 | 1604 | my ( $self, $query ) = @_; | ||
1609 | 1605 | |||
1610 | 1606 | $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1"; | ||
1611 | 1607 | $query =~ m/\A\s*use\s+/ && return "USE"; | ||
1612 | 1608 | $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK"; | ||
1613 | 1609 | $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1"; | ||
1614 | 1610 | |||
1615 | 1611 | if ( $query =~ m/\Aadministrator command:/ ) { | ||
1616 | 1612 | $query =~ s/administrator command:/ADMIN/; | ||
1617 | 1613 | $query = uc $query; | ||
1618 | 1614 | return $query; | ||
1619 | 1615 | } | ||
1620 | 1616 | |||
1621 | 1617 | $query = $self->strip_comments($query); | ||
1622 | 1618 | |||
1623 | 1619 | if ( $query =~ m/\A\s*SHOW\s+/i ) { | ||
1624 | 1620 | PTDEBUG && _d($query); | ||
1625 | 1621 | |||
1626 | 1622 | $query = uc $query; | ||
1627 | 1623 | $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g; | ||
1628 | 1624 | $query =~ s/\s+COUNT[^)]+\)//g; | ||
1629 | 1625 | |||
1630 | 1626 | $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms; | ||
1631 | 1627 | |||
1632 | 1628 | $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s; | ||
1633 | 1629 | $query =~ s/\s+/ /g; | ||
1634 | 1630 | PTDEBUG && _d($query); | ||
1635 | 1631 | return $query; | ||
1636 | 1632 | } | ||
1637 | 1633 | |||
1638 | 1634 | eval $QueryParser::data_def_stmts; | ||
1639 | 1635 | eval $QueryParser::tbl_ident; | ||
1640 | 1636 | my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i; | ||
1641 | 1637 | if ( $dds) { | ||
1642 | 1638 | my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i; | ||
1643 | 1639 | $obj = uc $obj if $obj; | ||
1644 | 1640 | PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj); | ||
1645 | 1641 | my ($db_or_tbl) | ||
1646 | 1642 | = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i; | ||
1647 | 1643 | PTDEBUG && _d('Matches db or table:', $db_or_tbl); | ||
1648 | 1644 | return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl; | ||
1649 | 1645 | } | ||
1650 | 1646 | |||
1651 | 1647 | my @verbs = $query =~ m/\b($verbs)\b/gio; | ||
1652 | 1648 | @verbs = do { | ||
1653 | 1649 | my $last = ''; | ||
1654 | 1650 | grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs; | ||
1655 | 1651 | }; | ||
1656 | 1652 | |||
1657 | 1653 | if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) { | ||
1658 | 1654 | PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]); | ||
1659 | 1655 | my $union = grep { $_ eq 'UNION' } @verbs; | ||
1660 | 1656 | @verbs = $union ? qw(SELECT UNION) : qw(SELECT); | ||
1661 | 1657 | } | ||
1662 | 1658 | |||
1663 | 1659 | my $verb_str = join(q{ }, @verbs); | ||
1664 | 1660 | return $verb_str; | ||
1665 | 1661 | } | ||
1666 | 1662 | |||
1667 | 1663 | sub __distill_tables { | ||
1668 | 1664 | my ( $self, $query, $table, %args ) = @_; | ||
1669 | 1665 | my $qp = $args{QueryParser} || $self->{QueryParser}; | ||
1670 | 1666 | die "I need a QueryParser argument" unless $qp; | ||
1671 | 1667 | |||
1672 | 1668 | my @tables = map { | ||
1673 | 1669 | $_ =~ s/`//g; | ||
1674 | 1670 | $_ =~ s/(_?)[0-9]+/$1?/g; | ||
1675 | 1671 | $_; | ||
1676 | 1672 | } grep { defined $_ } $qp->get_tables($query); | ||
1677 | 1673 | |||
1678 | 1674 | push @tables, $table if $table; | ||
1679 | 1675 | |||
1680 | 1676 | @tables = do { | ||
1681 | 1677 | my $last = ''; | ||
1682 | 1678 | grep { my $pass = $_ ne $last; $last = $_; $pass } @tables; | ||
1683 | 1679 | }; | ||
1684 | 1680 | |||
1685 | 1681 | return @tables; | ||
1686 | 1682 | } | ||
1687 | 1683 | |||
1688 | 1684 | sub distill { | ||
1689 | 1685 | my ( $self, $query, %args ) = @_; | ||
1690 | 1686 | |||
1691 | 1687 | if ( $args{generic} ) { | ||
1692 | 1688 | my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/; | ||
1693 | 1689 | return '' unless $cmd; | ||
1694 | 1690 | $query = (uc $cmd) . ($arg ? " $arg" : ''); | ||
1695 | 1691 | } | ||
1696 | 1692 | else { | ||
1697 | 1693 | my ($verbs, $table) = $self->distill_verbs($query, %args); | ||
1698 | 1694 | |||
1699 | 1695 | if ( $verbs && $verbs =~ m/^SHOW/ ) { | ||
1700 | 1696 | my %alias_for = qw( | ||
1701 | 1697 | SCHEMA DATABASE | ||
1702 | 1698 | KEYS INDEX | ||
1703 | 1699 | INDEXES INDEX | ||
1704 | 1700 | ); | ||
1705 | 1701 | map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for; | ||
1706 | 1702 | $query = $verbs; | ||
1707 | 1703 | } | ||
1708 | 1704 | else { | ||
1709 | 1705 | my @tables = $self->__distill_tables($query, $table, %args); | ||
1710 | 1706 | $query = join(q{ }, $verbs, @tables); | ||
1711 | 1707 | } | ||
1712 | 1708 | } | ||
1713 | 1709 | |||
1714 | 1710 | if ( $args{trf} ) { | ||
1715 | 1711 | $query = $args{trf}->($query, %args); | ||
1716 | 1712 | } | ||
1717 | 1713 | |||
1718 | 1714 | return $query; | ||
1719 | 1715 | } | ||
1720 | 1716 | |||
1721 | 1717 | sub convert_to_select { | ||
1722 | 1718 | my ( $self, $query ) = @_; | ||
1723 | 1719 | return unless $query; | ||
1724 | 1720 | |||
1725 | 1721 | return if $query =~ m/=\s*\(\s*SELECT /i; | ||
1726 | 1722 | |||
1727 | 1723 | $query =~ s{ | ||
1728 | 1724 | \A.*? | ||
1729 | 1725 | update(?:\s+(?:low_priority|ignore))?\s+(.*?) | ||
1730 | 1726 | \s+set\b(.*?) | ||
1731 | 1727 | (?:\s*where\b(.*?))? | ||
1732 | 1728 | (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)? | ||
1733 | 1729 | \Z | ||
1734 | 1730 | } | ||
1735 | 1731 | {__update_to_select($1, $2, $3, $4)}exsi | ||
1736 | 1732 | || $query =~ s{ | ||
1737 | 1733 | \A.*? | ||
1738 | 1734 | (?:insert(?:\s+ignore)?|replace)\s+ | ||
1739 | 1735 | .*?\binto\b(.*?)\(([^\)]+)\)\s* | ||
1740 | 1736 | values?\s*(\(.*?\))\s* | ||
1741 | 1737 | (?:\blimit\b|on\s+duplicate\s+key.*)?\s* | ||
1742 | 1738 | \Z | ||
1743 | 1739 | } | ||
1744 | 1740 | {__insert_to_select($1, $2, $3)}exsi | ||
1745 | 1741 | || $query =~ s{ | ||
1746 | 1742 | \A.*? | ||
1747 | 1743 | (?:insert(?:\s+ignore)?|replace)\s+ | ||
1748 | 1744 | (?:.*?\binto)\b(.*?)\s* | ||
1749 | 1745 | set\s+(.*?)\s* | ||
1750 | 1746 | (?:\blimit\b|on\s+duplicate\s+key.*)?\s* | ||
1751 | 1747 | \Z | ||
1752 | 1748 | } | ||
1753 | 1749 | {__insert_to_select_with_set($1, $2)}exsi | ||
1754 | 1750 | || $query =~ s{ | ||
1755 | 1751 | \A.*? | ||
1756 | 1752 | delete\s+(.*?) | ||
1757 | 1753 | \bfrom\b(.*) | ||
1758 | 1754 | \Z | ||
1759 | 1755 | } | ||
1760 | 1756 | {__delete_to_select($1, $2)}exsi; | ||
1761 | 1757 | $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si; | ||
1762 | 1758 | $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism; | ||
1763 | 1759 | return $query; | ||
1764 | 1760 | } | ||
1765 | 1761 | |||
1766 | 1762 | sub convert_select_list { | ||
1767 | 1763 | my ( $self, $query ) = @_; | ||
1768 | 1764 | $query =~ s{ | ||
1769 | 1765 | \A\s*select(.*?)\bfrom\b | ||
1770 | 1766 | } | ||
1771 | 1767 | {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi; | ||
1772 | 1768 | return $query; | ||
1773 | 1769 | } | ||
1774 | 1770 | |||
1775 | 1771 | sub __delete_to_select { | ||
1776 | 1772 | my ( $delete, $join ) = @_; | ||
1777 | 1773 | if ( $join =~ m/\bjoin\b/ ) { | ||
1778 | 1774 | return "select 1 from $join"; | ||
1779 | 1775 | } | ||
1780 | 1776 | return "select * from $join"; | ||
1781 | 1777 | } | ||
1782 | 1778 | |||
1783 | 1779 | sub __insert_to_select { | ||
1784 | 1780 | my ( $tbl, $cols, $vals ) = @_; | ||
1785 | 1781 | PTDEBUG && _d('Args:', @_); | ||
1786 | 1782 | my @cols = split(/,/, $cols); | ||
1787 | 1783 | PTDEBUG && _d('Cols:', @cols); | ||
1788 | 1784 | $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens | ||
1789 | 1785 | my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g; | ||
1790 | 1786 | PTDEBUG && _d('Vals:', @vals); | ||
1791 | 1787 | if ( @cols == @vals ) { | ||
1792 | 1788 | return "select * from $tbl where " | ||
1793 | 1789 | . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols)); | ||
1794 | 1790 | } | ||
1795 | 1791 | else { | ||
1796 | 1792 | return "select * from $tbl limit 1"; | ||
1797 | 1793 | } | ||
1798 | 1794 | } | ||
1799 | 1795 | |||
1800 | 1796 | sub __insert_to_select_with_set { | ||
1801 | 1797 | my ( $from, $set ) = @_; | ||
1802 | 1798 | $set =~ s/,/ and /g; | ||
1803 | 1799 | return "select * from $from where $set "; | ||
1804 | 1800 | } | ||
1805 | 1801 | |||
1806 | 1802 | sub __update_to_select { | ||
1807 | 1803 | my ( $from, $set, $where, $limit ) = @_; | ||
1808 | 1804 | return "select $set from $from " | ||
1809 | 1805 | . ( $where ? "where $where" : '' ) | ||
1810 | 1806 | . ( $limit ? " $limit " : '' ); | ||
1811 | 1807 | } | ||
1812 | 1808 | |||
1813 | 1809 | sub wrap_in_derived { | ||
1814 | 1810 | my ( $self, $query ) = @_; | ||
1815 | 1811 | return unless $query; | ||
1816 | 1812 | return $query =~ m/\A\s*select/i | ||
1817 | 1813 | ? "select 1 from ($query) as x limit 1" | ||
1818 | 1814 | : $query; | ||
1819 | 1815 | } | ||
1820 | 1816 | |||
1821 | 1817 | sub _d { | ||
1822 | 1818 | my ($package, undef, $line) = caller 0; | ||
1823 | 1819 | @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } | ||
1824 | 1820 | map { defined $_ ? $_ : 'undef' } | ||
1825 | 1821 | @_; | ||
1826 | 1822 | print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; | ||
1827 | 1823 | } | ||
1828 | 1824 | |||
1829 | 1825 | 1; | ||
1830 | 1826 | } | ||
1831 | 1827 | # ########################################################################### | ||
1832 | 1828 | # End QueryRewriter package | ||
1833 | 1829 | # ########################################################################### | ||
1834 | 1830 | |||
1835 | 1831 | # ########################################################################### | ||
1836 | 1832 | # This is a combination of modules and programs in one -- a runnable module. | ||
1837 | 1833 | # http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last | ||
1838 | 1834 | # Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition. | ||
1839 | 1835 | # | ||
1840 | 1836 | # Check at the end of this package for the call to main() which actually runs | ||
1841 | 1837 | # the program. | ||
1842 | 1838 | # ########################################################################### | ||
1843 | 1839 | package pt_fingerprint; | ||
1844 | 1840 | |||
1845 | 1841 | use English qw(-no_match_vars); | ||
1846 | 1842 | use Data::Dumper; | ||
1847 | 1843 | $Data::Dumper::Indent = 1; | ||
1848 | 1844 | $OUTPUT_AUTOFLUSH = 1; | ||
1849 | 1845 | |||
1850 | 1846 | use constant MKDEBUG => $ENV{MKDEBUG} || 0; | ||
1851 | 1847 | |||
1852 | 1848 | sub main { | ||
1853 | 1849 | @ARGV = @_; # set global ARGV for this package | ||
1854 | 1850 | |||
1855 | 1851 | # ########################################################################## | ||
1856 | 1852 | # Get configuration information. | ||
1857 | 1853 | # ########################################################################## | ||
1858 | 1854 | my $o = new OptionParser(); | ||
1859 | 1855 | $o->get_specs(); | ||
1860 | 1856 | $o->get_opts(); | ||
1861 | 1857 | $o->usage_or_errors(); | ||
1862 | 1858 | |||
1863 | 1859 | my $qp = new QueryParser(); | ||
1864 | 1860 | my $qr = new QueryRewriter( | ||
1865 | 1861 | QueryParser => $qp, | ||
1866 | 1862 | match_md5_checksums => $o->get('match-md5-checksums'), | ||
1867 | 1863 | match_embedded_numbers => $o->get('match-embedded-numbers'), | ||
1868 | 1864 | ); | ||
1869 | 1865 | |||
1870 | 1866 | if ( $o->got('query') ) { | ||
1871 | 1867 | print $qr->fingerprint($o->get('query')), "\n"; | ||
1872 | 1868 | } | ||
1873 | 1869 | else { | ||
1874 | 1870 | local $INPUT_RECORD_SEPARATOR = ";\n"; | ||
1875 | 1871 | while ( <> ) { | ||
1876 | 1872 | my $query = $_; | ||
1877 | 1873 | chomp $query; | ||
1878 | 1874 | $query =~ s/^#.+$//mg; | ||
1879 | 1875 | $query =~ s/^\s+//; | ||
1880 | 1876 | next unless $query =~ m/^\w/; | ||
1881 | 1877 | print $qr->fingerprint($query), "\n"; | ||
1882 | 1878 | } | ||
1883 | 1879 | } | ||
1884 | 1880 | } | ||
1885 | 1881 | |||
1886 | 1882 | # ############################################################################ | ||
1887 | 1883 | # Run the program. | ||
1888 | 1884 | # ############################################################################ | ||
1889 | 1885 | if ( !caller ) { exit main(@ARGV); } | ||
1890 | 1886 | |||
1891 | 1887 | 1; # Because this is a module as well as a script. | ||
1892 | 1888 | |||
1893 | 1889 | # ############################################################################# | ||
1894 | 1890 | # Documentation. | ||
1895 | 1891 | # ############################################################################# | ||
1896 | 1892 | |||
1897 | 1893 | =pod | ||
1898 | 1894 | |||
1899 | 1895 | =head1 NAME | ||
1900 | 1896 | |||
1901 | 1897 | pt-fingerprint - Convert queries into fingerprints. | ||
1902 | 1898 | |||
1903 | 1899 | =head1 SYNOPSIS | ||
1904 | 1900 | |||
1905 | 1901 | Usage: pt-fingerprint [OPTIONS] [FILES] | ||
1906 | 1902 | |||
1907 | 1903 | pt-fingerprint converts queries into fingerprints. With the --query | ||
1908 | 1904 | option, converts the option's value into a fingerprint. With no options, treats | ||
1909 | 1905 | command-line arguments as FILEs and reads and converts semicolon-separated | ||
1910 | 1906 | queries from the FILEs. When FILE is -, it read standard input. | ||
1911 | 1907 | |||
1912 | 1908 | Convert a single query: | ||
1913 | 1909 | |||
1914 | 1910 | pt-fingerprint --query "select a, b, c from users where id = 500" | ||
1915 | 1911 | |||
1916 | 1912 | Convert a file full of queries: | ||
1917 | 1913 | |||
1918 | 1914 | pt-fingerprint /path/to/file.txt | ||
1919 | 1915 | |||
1920 | 1916 | =head1 RISKS | ||
1921 | 1917 | |||
1922 | 1918 | The following section is included to inform users about the potential risks, | ||
1923 | 1919 | whether known or unknown, of using this tool. The two main categories of risks | ||
1924 | 1920 | are those created by the nature of the tool (e.g. read-only tools vs. read-write | ||
1925 | 1921 | tools) and those created by bugs. | ||
1926 | 1922 | |||
1927 | 1923 | The pt-fingerprint tool simply reads data and transforms it, so risks are | ||
1928 | 1924 | minimal. | ||
1929 | 1925 | |||
1930 | 1926 | See also L<"BUGS"> for more information on filing bugs and getting help. | ||
1931 | 1927 | |||
1932 | 1928 | =head1 DESCRIPTION | ||
1933 | 1929 | |||
1934 | 1930 | A query fingerprint is the abstracted form of a query, which makes it possible | ||
1935 | 1931 | to group similar queries together. Abstracting a query removes literal values, | ||
1936 | 1932 | normalizes whitespace, and so on. For example, consider these two queries: | ||
1937 | 1933 | |||
1938 | 1934 | SELECT name, password FROM user WHERE id='12823'; | ||
1939 | 1935 | select name, password from user | ||
1940 | 1936 | where id=5; | ||
1941 | 1937 | |||
1942 | 1938 | Both of those queries will fingerprint to | ||
1943 | 1939 | |||
1944 | 1940 | select name, password from user where id=? | ||
1945 | 1941 | |||
1946 | 1942 | Once the query's fingerprint is known, we can then talk about a query as though | ||
1947 | 1943 | it represents all similar queries. | ||
1948 | 1944 | |||
1949 | 1945 | Query fingerprinting accommodates a great many special cases, which have proven | ||
1950 | 1946 | necessary in the real world. For example, an IN list with 5 literals is really | ||
1951 | 1947 | equivalent to one with 4 literals, so lists of literals are collapsed to a | ||
1952 | 1948 | single one. If you want to understand more about how and why all of these cases | ||
1953 | 1949 | are handled, please review the test cases in the Subversion repository. If you | ||
1954 | 1950 | find something that is not fingerprinted properly, please submit a bug report | ||
1955 | 1951 | with a reproducible test case. Here is a list of transformations during | ||
1956 | 1952 | fingerprinting, which might not be exhaustive: | ||
1957 | 1953 | |||
1958 | 1954 | =over | ||
1959 | 1955 | |||
1960 | 1956 | =item * | ||
1961 | 1957 | |||
1962 | 1958 | Group all SELECT queries from mysqldump together, even if they are against | ||
1963 | 1959 | different tables. Ditto for all of pt-table-checksum's checksum queries. | ||
1964 | 1960 | |||
1965 | 1961 | =item * | ||
1966 | 1962 | |||
1967 | 1963 | Shorten multi-value INSERT statements to a single VALUES() list. | ||
1968 | 1964 | |||
1969 | 1965 | =item * | ||
1970 | 1966 | |||
1971 | 1967 | Strip comments. | ||
1972 | 1968 | |||
1973 | 1969 | =item * | ||
1974 | 1970 | |||
1975 | 1971 | Abstract the databases in USE statements, so all USE statements are grouped | ||
1976 | 1972 | together. | ||
1977 | 1973 | |||
1978 | 1974 | =item * | ||
1979 | 1975 | |||
1980 | 1976 | Replace all literals, such as quoted strings. For efficiency, the code that | ||
1981 | 1977 | replaces literal numbers is somewhat non-selective, and might replace some | ||
1982 | 1978 | things as numbers when they really are not. Hexadecimal literals are also | ||
1983 | 1979 | replaced. NULL is treated as a literal. Numbers embedded in identifiers are | ||
1984 | 1980 | also replaced, so tables named similarly will be fingerprinted to the same | ||
1985 | 1981 | values (e.g. users_2009 and users_2010 will fingerprint identically). | ||
1986 | 1982 | |||
1987 | 1983 | =item * | ||
1988 | 1984 | |||
1989 | 1985 | Collapse all whitespace into a single space. | ||
1990 | 1986 | |||
1991 | 1987 | =item * | ||
1992 | 1988 | |||
1993 | 1989 | Lowercase the entire query. | ||
1994 | 1990 | |||
1995 | 1991 | =item * | ||
1996 | 1992 | |||
1997 | 1993 | Replace all literals inside of IN() and VALUES() lists with a single | ||
1998 | 1994 | placeholder, regardless of cardinality. | ||
1999 | 1995 | |||
2000 | 1996 | =item * | ||
2001 | 1997 | |||
2002 | 1998 | Collapse multiple identical UNION queries into a single one. | ||
2003 | 1999 | |||
2004 | 2000 | =back | ||
2005 | 2001 | |||
2006 | 2002 | =head1 OPTIONS | ||
2007 | 2003 | |||
2008 | 2004 | This tool accepts additional command-line arguments. Refer to the | ||
2009 | 2005 | L<"SYNOPSIS"> and usage information for details. | ||
2010 | 2006 | |||
2011 | 2007 | =over | ||
2012 | 2008 | |||
2013 | 2009 | =item --config | ||
2014 | 2010 | |||
2015 | 2011 | type: Array | ||
2016 | 2012 | |||
2017 | 2013 | Read this comma-separated list of config files; if specified, this must be the | ||
2018 | 2014 | first option on the command line. | ||
2019 | 2015 | |||
2020 | 2016 | =item --help | ||
2021 | 2017 | |||
2022 | 2018 | Show help and exit. | ||
2023 | 2019 | |||
2024 | 2020 | =item --match-embedded-numbers | ||
2025 | 2021 | |||
2026 | 2022 | Match numbers embedded in words and replace as single values. This option | ||
2027 | 2023 | causes the tool to be more careful about matching numbers so that words | ||
2028 | 2024 | with numbers, like C<catch22> are matched and replaced as a single C<?> | ||
2029 | 2025 | placeholder. Otherwise the default number matching pattern will replace | ||
2030 | 2026 | C<catch22> as C<catch?>. | ||
2031 | 2027 | |||
2032 | 2028 | This is helpful if database or table names contain numbers. | ||
2033 | 2029 | |||
2034 | 2030 | =item --match-md5-checksums | ||
2035 | 2031 | |||
2036 | 2032 | Match MD5 checksums and replace as single values. This option causes | ||
2037 | 2033 | the tool to be more careful about matching numbers so that MD5 checksums | ||
2038 | 2034 | like C<fbc5e685a5d3d45aa1d0347fdb7c4d35> are matched and replaced as a | ||
2039 | 2035 | single C<?> placeholder. Otherwise, the default number matching pattern will | ||
2040 | 2036 | replace C<fbc5e685a5d3d45aa1d0347fdb7c4d35> as C<fbc?>. | ||
2041 | 2037 | |||
2042 | 2038 | =item --query | ||
2043 | 2039 | |||
2044 | 2040 | type: string | ||
2045 | 2041 | |||
2046 | 2042 | The query to convert into a fingerprint. | ||
2047 | 2043 | |||
2048 | 2044 | =item --version | ||
2049 | 2045 | |||
2050 | 2046 | Show version and exit. | ||
2051 | 2047 | |||
2052 | 2048 | =back | ||
2053 | 2049 | |||
2054 | 2050 | =head1 ENVIRONMENT | ||
2055 | 2051 | |||
2056 | 2052 | The environment variable C<PTDEBUG> enables verbose debugging output to STDERR. | ||
2057 | 2053 | To enable debugging and capture all output to a file, run the tool like: | ||
2058 | 2054 | |||
2059 | 2055 | PTDEBUG=1 pt-fingerprint ... > FILE 2>&1 | ||
2060 | 2056 | |||
2061 | 2057 | Be careful: debugging output is voluminous and can generate several megabytes | ||
2062 | 2058 | of output. | ||
2063 | 2059 | |||
2064 | 2060 | =head1 SYSTEM REQUIREMENTS | ||
2065 | 2061 | |||
2066 | 2062 | You need Perl, DBI, DBD::mysql, and some core packages that ought to be | ||
2067 | 2063 | installed in any reasonably new version of Perl. | ||
2068 | 2064 | |||
2069 | 2065 | =head1 BUGS | ||
2070 | 2066 | |||
2071 | 2067 | For a list of known bugs, see L<http://www.percona.com/bugs/pt-fingerprint>. | ||
2072 | 2068 | |||
2073 | 2069 | Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>. | ||
2074 | 2070 | Include the following information in your bug report: | ||
2075 | 2071 | |||
2076 | 2072 | =over | ||
2077 | 2073 | |||
2078 | 2074 | =item * Complete command-line used to run the tool | ||
2079 | 2075 | |||
2080 | 2076 | =item * Tool L<"--version"> | ||
2081 | 2077 | |||
2082 | 2078 | =item * MySQL version of all servers involved | ||
2083 | 2079 | |||
2084 | 2080 | =item * Output from the tool including STDERR | ||
2085 | 2081 | |||
2086 | 2082 | =item * Input files (log/dump/config files, etc.) | ||
2087 | 2083 | |||
2088 | 2084 | =back | ||
2089 | 2085 | |||
2090 | 2086 | If possible, include debugging output by running the tool with C<PTDEBUG>; | ||
2091 | 2087 | see L<"ENVIRONMENT">. | ||
2092 | 2088 | |||
2093 | 2089 | =head1 DOWNLOADING | ||
2094 | 2090 | |||
2095 | 2091 | Visit L<http://www.percona.com/software/percona-toolkit/> to download the | ||
2096 | 2092 | latest release of Percona Toolkit. Or, get the latest release from the | ||
2097 | 2093 | command line: | ||
2098 | 2094 | |||
2099 | 2095 | wget percona.com/get/percona-toolkit.tar.gz | ||
2100 | 2096 | |||
2101 | 2097 | wget percona.com/get/percona-toolkit.rpm | ||
2102 | 2098 | |||
2103 | 2099 | wget percona.com/get/percona-toolkit.deb | ||
2104 | 2100 | |||
2105 | 2101 | You can also get individual tools from the latest release: | ||
2106 | 2102 | |||
2107 | 2103 | wget percona.com/get/TOOL | ||
2108 | 2104 | |||
2109 | 2105 | Replace C<TOOL> with the name of any tool. | ||
2110 | 2106 | |||
2111 | 2107 | =head1 AUTHORS | ||
2112 | 2108 | |||
2113 | 2109 | Baron Schwartz and Daniel Nichter | ||
2114 | 2110 | |||
2115 | 2111 | =head1 ABOUT PERCONA TOOLKIT | ||
2116 | 2112 | |||
2117 | 2113 | This tool is part of Percona Toolkit, a collection of advanced command-line | ||
2118 | 2114 | tools developed by Percona for MySQL support and consulting. Percona Toolkit | ||
2119 | 2115 | was forked from two projects in June, 2011: Maatkit and Aspersa. Those | ||
2120 | 2116 | projects were created by Baron Schwartz and developed primarily by him and | ||
2121 | 2117 | Daniel Nichter, both of whom are employed by Percona. Visit | ||
2122 | 2118 | L<http://www.percona.com/software/> for more software developed by Percona. | ||
2123 | 2119 | |||
2124 | 2120 | =head1 COPYRIGHT, LICENSE, AND WARRANTY | ||
2125 | 2121 | |||
2126 | 2122 | This program is copyright 2011-2012 Percona Inc. | ||
2127 | 2123 | Feedback and improvements are welcome. | ||
2128 | 2124 | |||
2129 | 2125 | THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED | ||
2130 | 2126 | WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF | ||
2131 | 2127 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | ||
2132 | 2128 | |||
2133 | 2129 | This program is free software; you can redistribute it and/or modify it under | ||
2134 | 2130 | the terms of the GNU General Public License as published by the Free Software | ||
2135 | 2131 | Foundation, version 2; OR the Perl Artistic License. On UNIX and similar | ||
2136 | 2132 | systems, you can issue `man perlgpl' or `man perlartistic' to read these | ||
2137 | 2133 | licenses. | ||
2138 | 2134 | |||
2139 | 2135 | You should have received a copy of the GNU General Public License along with | ||
2140 | 2136 | this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
2141 | 2137 | Place, Suite 330, Boston, MA 02111-1307 USA. | ||
2142 | 2138 | |||
2143 | 2139 | =head1 VERSION | ||
2144 | 2140 | |||
2145 | 2141 | pt-fingerprint 2.0.0 | ||
2146 | 2142 | |||
2147 | 2143 | =cut | ||
2148 | 0 | 2144 | ||
2149 | === modified file 'lib/QueryRewriter.pm' | |||
2150 | --- lib/QueryRewriter.pm 2012-01-19 19:46:56 +0000 | |||
2151 | +++ lib/QueryRewriter.pm 2012-03-30 22:06:22 +0000 | |||
2152 | @@ -175,10 +175,30 @@ | |||
2153 | 175 | $query =~ s/\\["']//g; # quoted strings | 175 | $query =~ s/\\["']//g; # quoted strings |
2154 | 176 | $query =~ s/".*?"/?/sg; # quoted strings | 176 | $query =~ s/".*?"/?/sg; # quoted strings |
2155 | 177 | $query =~ s/'.*?'/?/sg; # quoted strings | 177 | $query =~ s/'.*?'/?/sg; # quoted strings |
2160 | 178 | # This regex is extremely broad in its definition of what looks like a | 178 | |
2161 | 179 | # number. That is for speed. | 179 | # MD5 checksums which are always 32 hex chars |
2162 | 180 | $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers | 180 | if ( $self->{match_md5_checksums} ) { |
2163 | 181 | $query =~ s/[xb.+-]\?/?/g; # Clean up leftovers | 181 | $query =~ s/([._-])[a-f0-9]{32}/$1?/g; |
2164 | 182 | } | ||
2165 | 183 | |||
2166 | 184 | # Things resembling numbers/hex. | ||
2167 | 185 | if ( !$self->{match_embedded_numbers} ) { | ||
2168 | 186 | # For speed, this regex is extremely broad in its definition | ||
2169 | 187 | # of what looks like a number. | ||
2170 | 188 | $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g; | ||
2171 | 189 | } | ||
2172 | 190 | else { | ||
2173 | 191 | $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g; | ||
2174 | 192 | } | ||
2175 | 193 | |||
2176 | 194 | # Clean up leftovers | ||
2177 | 195 | if ( $self->{match_md5_checksums} ) { | ||
2178 | 196 | $query =~ s/[xb+-]\?/?/g; | ||
2179 | 197 | } | ||
2180 | 198 | else { | ||
2181 | 199 | $query =~ s/[xb.+-]\?/?/g; | ||
2182 | 200 | } | ||
2183 | 201 | |||
2184 | 182 | $query =~ s/\A\s+//; # Chop off leading whitespace | 202 | $query =~ s/\A\s+//; # Chop off leading whitespace |
2185 | 183 | chomp $query; # Kill trailing whitespace | 203 | chomp $query; # Kill trailing whitespace |
2186 | 184 | $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace | 204 | $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace |
2187 | 185 | 205 | ||
2188 | === modified file 't/lib/QueryRewriter.t' | |||
2189 | --- t/lib/QueryRewriter.t 2012-03-06 13:56:08 +0000 | |||
2190 | +++ t/lib/QueryRewriter.t 2012-03-30 22:06:22 +0000 | |||
2191 | @@ -10,7 +10,7 @@ | |||
2192 | 10 | use strict; | 10 | use strict; |
2193 | 11 | use warnings FATAL => 'all'; | 11 | use warnings FATAL => 'all'; |
2194 | 12 | use English qw(-no_match_vars); | 12 | use English qw(-no_match_vars); |
2196 | 13 | use Test::More tests => 266; | 13 | use Test::More tests => 271; |
2197 | 14 | 14 | ||
2198 | 15 | use QueryRewriter; | 15 | use QueryRewriter; |
2199 | 16 | use QueryParser; | 16 | use QueryParser; |
2200 | @@ -349,6 +349,64 @@ | |||
2201 | 349 | "Fingerprint LOAD DATA INFILE" | 349 | "Fingerprint LOAD DATA INFILE" |
2202 | 350 | ); | 350 | ); |
2203 | 351 | 351 | ||
2204 | 352 | # fingerprint MD5 checksums, 32 char hex strings. This is a | ||
2205 | 353 | # special feature used by pt-fingerprint. | ||
2206 | 354 | $qr = new QueryRewriter( | ||
2207 | 355 | QueryParser => $qp, | ||
2208 | 356 | match_md5_checksums => 1, | ||
2209 | 357 | ); | ||
2210 | 358 | |||
2211 | 359 | is( | ||
2212 | 360 | $qr->fingerprint( | ||
2213 | 361 | "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1" | ||
2214 | 362 | ), | ||
2215 | 363 | "select * from db.?_temp where id=?", | ||
2216 | 364 | "Fingerprint db.MD5_tbl" | ||
2217 | 365 | ); | ||
2218 | 366 | |||
2219 | 367 | is( | ||
2220 | 368 | $qr->fingerprint( | ||
2221 | 369 | "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1" | ||
2222 | 370 | ), | ||
2223 | 371 | "select * from db.temp_? where id=?", | ||
2224 | 372 | "Fingerprint db.tbl_MD5" | ||
2225 | 373 | ); | ||
2226 | 374 | |||
2227 | 375 | $qr = new QueryRewriter( | ||
2228 | 376 | QueryParser => $qp, | ||
2229 | 377 | match_md5_checksums => 1, | ||
2230 | 378 | match_embedded_numbers => 1, | ||
2231 | 379 | ); | ||
2232 | 380 | |||
2233 | 381 | is( | ||
2234 | 382 | $qr->fingerprint( | ||
2235 | 383 | "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1" | ||
2236 | 384 | ), | ||
2237 | 385 | "select * from db.?_temp where id=?", | ||
2238 | 386 | "Fingerprint db.MD5_tbl (with match_embedded_numbers)" | ||
2239 | 387 | ); | ||
2240 | 388 | |||
2241 | 389 | is( | ||
2242 | 390 | $qr->fingerprint( | ||
2243 | 391 | "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1" | ||
2244 | 392 | ), | ||
2245 | 393 | "select * from db.temp_? where id=?", | ||
2246 | 394 | "Fingerprint db.tbl_MD5 (with match_embedded_numbers)" | ||
2247 | 395 | ); | ||
2248 | 396 | |||
2249 | 397 | $qr = new QueryRewriter( | ||
2250 | 398 | QueryParser => $qp, | ||
2251 | 399 | match_embedded_numbers => 1, | ||
2252 | 400 | ); | ||
2253 | 401 | |||
2254 | 402 | is( | ||
2255 | 403 | $qr->fingerprint( | ||
2256 | 404 | "SELECT * FROM prices.rt_5min where id=1" | ||
2257 | 405 | ), | ||
2258 | 406 | "select * from prices.rt_5min where id=?", | ||
2259 | 407 | "Fingerprint db.tbl<number>name (preserve number)" | ||
2260 | 408 | ); | ||
2261 | 409 | |||
2262 | 352 | # ############################################################################# | 410 | # ############################################################################# |
2263 | 353 | # convert_to_select() | 411 | # convert_to_select() |
2264 | 354 | # ############################################################################# | 412 | # ############################################################################# |
2265 | 355 | 413 | ||
2266 | === added directory 't/pt-fingerprint' | |||
2267 | === added file 't/pt-fingerprint/basics.t' | |||
2268 | --- t/pt-fingerprint/basics.t 1970-01-01 00:00:00 +0000 | |||
2269 | +++ t/pt-fingerprint/basics.t 2012-03-30 22:06:22 +0000 | |||
2270 | @@ -0,0 +1,101 @@ | |||
2271 | 1 | #!/usr/bin/env perl | ||
2272 | 2 | |||
2273 | 3 | BEGIN { | ||
2274 | 4 | die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" | ||
2275 | 5 | unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; | ||
2276 | 6 | unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; | ||
2277 | 7 | }; | ||
2278 | 8 | |||
2279 | 9 | use strict; | ||
2280 | 10 | use warnings FATAL => 'all'; | ||
2281 | 11 | use English qw(-no_match_vars); | ||
2282 | 12 | use Test::More tests => 7; | ||
2283 | 13 | |||
2284 | 14 | use PerconaTest; | ||
2285 | 15 | require "$trunk/bin/pt-fingerprint"; | ||
2286 | 16 | |||
2287 | 17 | my @args = qw(); | ||
2288 | 18 | my $output; | ||
2289 | 19 | my $sample = "$trunk/t/pt-fingerprint/samples"; | ||
2290 | 20 | my $pqd = "$trunk/bin/pt-query-digest"; | ||
2291 | 21 | |||
2292 | 22 | $output = `$trunk/bin/pt-fingerprint --help`; | ||
2293 | 23 | like( | ||
2294 | 24 | $output, | ||
2295 | 25 | qr/--help/, | ||
2296 | 26 | "It runs" | ||
2297 | 27 | ); | ||
2298 | 28 | |||
2299 | 29 | |||
2300 | 30 | sub test_query_file { | ||
2301 | 31 | my ($file) = @_; | ||
2302 | 32 | if ( ! -f "$sample/$file.fingerprint" ) { | ||
2303 | 33 | `$pqd --fingerprint $sample/$file | awk '/Fingerprint/ { getline; print; exit; }' | sed -e 's/^#[ ]*//' > $sample/$file.fingerprint`; | ||
2304 | 34 | diag("Created $sample/$file.fingerprint"); | ||
2305 | 35 | } | ||
2306 | 36 | chomp(my $expect = `cat $sample/$file.fingerprint`); | ||
2307 | 37 | my $got = output( | ||
2308 | 38 | sub { pt_fingerprint::main("$sample/$file") } | ||
2309 | 39 | ); | ||
2310 | 40 | chomp($got); | ||
2311 | 41 | is( | ||
2312 | 42 | $got, | ||
2313 | 43 | $expect, | ||
2314 | 44 | "$file fingerprint" | ||
2315 | 45 | ); | ||
2316 | 46 | }; | ||
2317 | 47 | |||
2318 | 48 | opendir my $dir, $sample or die "Cannot open $sample: $OS_ERROR\n"; | ||
2319 | 49 | while (defined(my $file = readdir($dir))) { | ||
2320 | 50 | next unless $file =~ m/^query\d+$/; | ||
2321 | 51 | test_query_file($file); | ||
2322 | 52 | } | ||
2323 | 53 | closedir $dir; | ||
2324 | 54 | |||
2325 | 55 | |||
2326 | 56 | sub test_query { | ||
2327 | 57 | my (%args) = @_; | ||
2328 | 58 | my $query = $args{query}; | ||
2329 | 59 | my $expect = $args{expect}; | ||
2330 | 60 | my @ops = $args{ops} ? @{$args{ops}} : (); | ||
2331 | 61 | |||
2332 | 62 | $output = output( | ||
2333 | 63 | sub { pt_fingerprint::main('--query', $query, @ops) } | ||
2334 | 64 | ); | ||
2335 | 65 | chomp($output); | ||
2336 | 66 | is( | ||
2337 | 67 | $output, | ||
2338 | 68 | $expect, | ||
2339 | 69 | $args{name} ? $args{name} : "Fingerprint " . substr($query, 0, 70) | ||
2340 | 70 | ); | ||
2341 | 71 | } | ||
2342 | 72 | |||
2343 | 73 | test_query( | ||
2344 | 74 | query => 'select * from tbl where id=1', | ||
2345 | 75 | expect => 'select * from tbl where id=?', | ||
2346 | 76 | ); | ||
2347 | 77 | |||
2348 | 78 | test_query( | ||
2349 | 79 | name => "Fingerprint MD5_word", | ||
2350 | 80 | query => "SELECT c FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1", | ||
2351 | 81 | expect => "select c from db.?_temp where id=?", | ||
2352 | 82 | ops => [qw(--match-md5-checksums)], | ||
2353 | 83 | ); | ||
2354 | 84 | |||
2355 | 85 | test_query( | ||
2356 | 86 | name => "Fingerprint word_MD5", | ||
2357 | 87 | query => "SELECT c FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1", | ||
2358 | 88 | expect => "select c from db.temp_? where id=?", | ||
2359 | 89 | ops => [qw(--match-md5-checksums)], | ||
2360 | 90 | ); | ||
2361 | 91 | |||
2362 | 92 | test_query( | ||
2363 | 93 | name => "Fingerprint word<number>", | ||
2364 | 94 | query => "SELECT c FROM db.catch22 WHERE id is null", | ||
2365 | 95 | expect => "select c from db.catch22 where id is ?", | ||
2366 | 96 | ops => [qw(--match-embedded-numbers)], | ||
2367 | 97 | ); | ||
2368 | 98 | # ############################################################################# | ||
2369 | 99 | # Done. | ||
2370 | 100 | # ############################################################################# | ||
2371 | 101 | exit; | ||
2372 | 0 | 102 | ||
2373 | === added directory 't/pt-fingerprint/samples' | |||
2374 | === added file 't/pt-fingerprint/samples/query001' | |||
2375 | --- t/pt-fingerprint/samples/query001 1970-01-01 00:00:00 +0000 | |||
2376 | +++ t/pt-fingerprint/samples/query001 2012-03-30 22:06:22 +0000 | |||
2377 | @@ -0,0 +1,2 @@ | |||
2378 | 1 | # Query_time: 1 | ||
2379 | 2 | select * from db.tbl where id=1 or foo='bar'; | ||
2380 | 0 | 3 | ||
2381 | === added file 't/pt-fingerprint/samples/query001.fingerprint' | |||
2382 | --- t/pt-fingerprint/samples/query001.fingerprint 1970-01-01 00:00:00 +0000 | |||
2383 | +++ t/pt-fingerprint/samples/query001.fingerprint 2012-03-30 22:06:22 +0000 | |||
2384 | @@ -0,0 +1,1 @@ | |||
2385 | 1 | select * from db.tbl where id=? or foo=? | ||
2386 | 0 | 2 | ||
2387 | === added file 't/pt-fingerprint/samples/query002' | |||
2388 | --- t/pt-fingerprint/samples/query002 1970-01-01 00:00:00 +0000 | |||
2389 | +++ t/pt-fingerprint/samples/query002 2012-03-30 22:06:22 +0000 | |||
2390 | @@ -0,0 +1,2 @@ | |||
2391 | 1 | # Query_time: 1 | ||
2392 | 2 | select col from db.tbl1 where id in (1, 2, 3); | ||
2393 | 0 | 3 | ||
2394 | === added file 't/pt-fingerprint/samples/query002.fingerprint' | |||
2395 | --- t/pt-fingerprint/samples/query002.fingerprint 1970-01-01 00:00:00 +0000 | |||
2396 | +++ t/pt-fingerprint/samples/query002.fingerprint 2012-03-30 22:06:22 +0000 | |||
2397 | @@ -0,0 +1,1 @@ | |||
2398 | 1 | select col from db.tbl? where id in(?+) |