Merge lp:~percona-toolkit-dev/percona-toolkit/pt-query-digest-does-not-fingerprint-true-false-literals-correctly-965553 into lp:percona-toolkit/2.2

Proposed by Frank Cizmich
Status: Merged
Approved by: Daniel Nichter
Approved revision: 597
Merged at revision: 603
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-query-digest-does-not-fingerprint-true-false-literals-correctly-965553
Merge into: lp:percona-toolkit/2.2
Diff against target: 364 lines (+89/-17)
7 files modified
bin/pt-index-usage (+18/-4)
bin/pt-kill (+18/-4)
bin/pt-query-digest (+3/-1)
bin/pt-table-usage (+18/-4)
bin/pt-upgrade (+18/-4)
lib/QueryRewriter.pm (+2/-0)
t/lib/QueryRewriter.t (+12/-0)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-query-digest-does-not-fingerprint-true-false-literals-correctly-965553
Reviewer Review Type Date Requested Status
Daniel Nichter Approve
Review via email: mp+220867@code.launchpad.net

Description of the change

Modified QueryRewriter to correctly abstract queries that contain true false literals.
eg:
SELECT * FROM tbl WHERE id=1 AND flag=true AND trueflag=FALSE
=>
select * from tbl where id=? and flag=? and trueflag=?

To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) wrote :

What are the other changes for and are they tested? E.g.:

- $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
+ $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g;

+ $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i;

Revision history for this message
Frank Cizmich (frank-cizmich) wrote :

These are changes that were already in QueryRewriter.pm from before, but for some reason where not updated into the rest of the tools.

Before pushing I ran all tests on the affected tools before and after the update to make sure the tests passed or at least gave identical results.

I did this for all fixes that involved modules.

> What are the other changes for and are they tested? E.g.:
>
> - $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
> + $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g;
>
> + $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i;

Revision history for this message
Daniel Nichter (daniel-nichter) wrote :

Ok good to know.

Revision history for this message
Daniel Nichter (daniel-nichter) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bin/pt-index-usage'
2--- bin/pt-index-usage 2014-02-20 08:10:16 +0000
3+++ bin/pt-index-usage 2014-05-24 22:59:27 +0000
4@@ -2374,8 +2374,8 @@
5
6 my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
7 my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
8-my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
9-my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
10+my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */
11+my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW
12
13
14 sub new {
15@@ -2390,7 +2390,8 @@
16 $query =~ s/$mlc_re//go;
17 $query =~ s/$olc_re//go;
18 if ( $query =~ m/$vlc_rf/i ) { # contains show + version
19- $query =~ s/$vlc_re//go;
20+ my $qualifier = $1 || '';
21+ $query =~ s/$vlc_re/$qualifier/go;
22 }
23 return $query;
24 }
25@@ -2465,6 +2466,8 @@
26 $query =~ s/".*?"/?/sg; # quoted strings
27 $query =~ s/'.*?'/?/sg; # quoted strings
28
29+ $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
30+
31 if ( $self->{match_md5_checksums} ) {
32 $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
33 }
34@@ -2513,6 +2516,13 @@
35 $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
36 $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
37
38+ if ( $query =~ m/\A\s*LOAD/i ) {
39+ my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i;
40+ $tbl ||= '';
41+ $tbl =~ s/`//g;
42+ return "LOAD DATA $tbl";
43+ }
44+
45 if ( $query =~ m/\Aadministrator command:/ ) {
46 $query =~ s/administrator command:/ADMIN/;
47 $query = uc $query;
48@@ -2525,7 +2535,7 @@
49 PTDEBUG && _d($query);
50
51 $query = uc $query;
52- $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
53+ $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g;
54 $query =~ s/\s+COUNT[^)]+\)//g;
55
56 $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
57@@ -2540,6 +2550,7 @@
58 eval $QueryParser::tbl_ident;
59 my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
60 if ( $dds) {
61+ $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i;
62 my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
63 $obj = uc $obj if $obj;
64 PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
65@@ -2606,6 +2617,9 @@
66 map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
67 $query = $verbs;
68 }
69+ elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) {
70+ return $verbs;
71+ }
72 else {
73 my @tables = $self->__distill_tables($query, $table, %args);
74 $query = join(q{ }, $verbs, @tables);
75
76=== modified file 'bin/pt-kill'
77--- bin/pt-kill 2014-02-20 08:10:16 +0000
78+++ bin/pt-kill 2014-05-24 22:59:27 +0000
79@@ -4652,8 +4652,8 @@
80
81 my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
82 my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
83-my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
84-my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
85+my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */
86+my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW
87
88
89 sub new {
90@@ -4668,7 +4668,8 @@
91 $query =~ s/$mlc_re//go;
92 $query =~ s/$olc_re//go;
93 if ( $query =~ m/$vlc_rf/i ) { # contains show + version
94- $query =~ s/$vlc_re//go;
95+ my $qualifier = $1 || '';
96+ $query =~ s/$vlc_re/$qualifier/go;
97 }
98 return $query;
99 }
100@@ -4743,6 +4744,8 @@
101 $query =~ s/".*?"/?/sg; # quoted strings
102 $query =~ s/'.*?'/?/sg; # quoted strings
103
104+ $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
105+
106 if ( $self->{match_md5_checksums} ) {
107 $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
108 }
109@@ -4791,6 +4794,13 @@
110 $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
111 $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
112
113+ if ( $query =~ m/\A\s*LOAD/i ) {
114+ my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i;
115+ $tbl ||= '';
116+ $tbl =~ s/`//g;
117+ return "LOAD DATA $tbl";
118+ }
119+
120 if ( $query =~ m/\Aadministrator command:/ ) {
121 $query =~ s/administrator command:/ADMIN/;
122 $query = uc $query;
123@@ -4803,7 +4813,7 @@
124 PTDEBUG && _d($query);
125
126 $query = uc $query;
127- $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
128+ $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g;
129 $query =~ s/\s+COUNT[^)]+\)//g;
130
131 $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
132@@ -4818,6 +4828,7 @@
133 eval $QueryParser::tbl_ident;
134 my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
135 if ( $dds) {
136+ $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i;
137 my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
138 $obj = uc $obj if $obj;
139 PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
140@@ -4884,6 +4895,9 @@
141 map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
142 $query = $verbs;
143 }
144+ elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) {
145+ return $verbs;
146+ }
147 else {
148 my @tables = $self->__distill_tables($query, $table, %args);
149 $query = join(q{ }, $verbs, @tables);
150
151=== modified file 'bin/pt-query-digest'
152--- bin/pt-query-digest 2014-02-20 08:10:16 +0000
153+++ bin/pt-query-digest 2014-05-24 22:59:27 +0000
154@@ -2891,6 +2891,8 @@
155 $query =~ s/".*?"/?/sg; # quoted strings
156 $query =~ s/'.*?'/?/sg; # quoted strings
157
158+ $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
159+
160 if ( $self->{match_md5_checksums} ) {
161 $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
162 }
163@@ -5797,7 +5799,7 @@
164 <=> $classes->{$b}->{$args{attrib}}->{$args{orderby}}
165 } grep {
166 defined $classes->{$_}->{$args{attrib}}->{$args{orderby}}
167- } keys %$classes;
168+ } keys %$classes; # this should first be sorted for test consistency, but many tests already in place would fail
169 my @chosen; # top events
170 my @other; # other events (< top)
171 my ($total, $count) = (0, 0);
172
173=== modified file 'bin/pt-table-usage'
174--- bin/pt-table-usage 2014-02-20 08:10:16 +0000
175+++ bin/pt-table-usage 2014-05-24 22:59:27 +0000
176@@ -2112,8 +2112,8 @@
177
178 my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
179 my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
180-my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
181-my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
182+my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */
183+my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW
184
185
186 sub new {
187@@ -2128,7 +2128,8 @@
188 $query =~ s/$mlc_re//go;
189 $query =~ s/$olc_re//go;
190 if ( $query =~ m/$vlc_rf/i ) { # contains show + version
191- $query =~ s/$vlc_re//go;
192+ my $qualifier = $1 || '';
193+ $query =~ s/$vlc_re/$qualifier/go;
194 }
195 return $query;
196 }
197@@ -2203,6 +2204,8 @@
198 $query =~ s/".*?"/?/sg; # quoted strings
199 $query =~ s/'.*?'/?/sg; # quoted strings
200
201+ $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
202+
203 if ( $self->{match_md5_checksums} ) {
204 $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
205 }
206@@ -2251,6 +2254,13 @@
207 $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
208 $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
209
210+ if ( $query =~ m/\A\s*LOAD/i ) {
211+ my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i;
212+ $tbl ||= '';
213+ $tbl =~ s/`//g;
214+ return "LOAD DATA $tbl";
215+ }
216+
217 if ( $query =~ m/\Aadministrator command:/ ) {
218 $query =~ s/administrator command:/ADMIN/;
219 $query = uc $query;
220@@ -2263,7 +2273,7 @@
221 PTDEBUG && _d($query);
222
223 $query = uc $query;
224- $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
225+ $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g;
226 $query =~ s/\s+COUNT[^)]+\)//g;
227
228 $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
229@@ -2278,6 +2288,7 @@
230 eval $QueryParser::tbl_ident;
231 my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
232 if ( $dds) {
233+ $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i;
234 my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
235 $obj = uc $obj if $obj;
236 PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
237@@ -2344,6 +2355,9 @@
238 map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
239 $query = $verbs;
240 }
241+ elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) {
242+ return $verbs;
243+ }
244 else {
245 my @tables = $self->__distill_tables($query, $table, %args);
246 $query = join(q{ }, $verbs, @tables);
247
248=== modified file 'bin/pt-upgrade'
249--- bin/pt-upgrade 2014-02-20 08:10:16 +0000
250+++ bin/pt-upgrade 2014-05-24 22:59:27 +0000
251@@ -4559,8 +4559,8 @@
252
253 my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
254 my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
255-my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
256-my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
257+my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */
258+my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW
259
260
261 sub new {
262@@ -4575,7 +4575,8 @@
263 $query =~ s/$mlc_re//go;
264 $query =~ s/$olc_re//go;
265 if ( $query =~ m/$vlc_rf/i ) { # contains show + version
266- $query =~ s/$vlc_re//go;
267+ my $qualifier = $1 || '';
268+ $query =~ s/$vlc_re/$qualifier/go;
269 }
270 return $query;
271 }
272@@ -4650,6 +4651,8 @@
273 $query =~ s/".*?"/?/sg; # quoted strings
274 $query =~ s/'.*?'/?/sg; # quoted strings
275
276+ $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
277+
278 if ( $self->{match_md5_checksums} ) {
279 $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
280 }
281@@ -4698,6 +4701,13 @@
282 $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
283 $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
284
285+ if ( $query =~ m/\A\s*LOAD/i ) {
286+ my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i;
287+ $tbl ||= '';
288+ $tbl =~ s/`//g;
289+ return "LOAD DATA $tbl";
290+ }
291+
292 if ( $query =~ m/\Aadministrator command:/ ) {
293 $query =~ s/administrator command:/ADMIN/;
294 $query = uc $query;
295@@ -4710,7 +4720,7 @@
296 PTDEBUG && _d($query);
297
298 $query = uc $query;
299- $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
300+ $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g;
301 $query =~ s/\s+COUNT[^)]+\)//g;
302
303 $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
304@@ -4725,6 +4735,7 @@
305 eval $QueryParser::tbl_ident;
306 my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
307 if ( $dds) {
308+ $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i;
309 my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
310 $obj = uc $obj if $obj;
311 PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
312@@ -4791,6 +4802,9 @@
313 map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
314 $query = $verbs;
315 }
316+ elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) {
317+ return $verbs;
318+ }
319 else {
320 my @tables = $self->__distill_tables($query, $table, %args);
321 $query = join(q{ }, $verbs, @tables);
322
323=== modified file 'lib/QueryRewriter.pm'
324--- lib/QueryRewriter.pm 2013-09-27 02:16:19 +0000
325+++ lib/QueryRewriter.pm 2014-05-24 22:59:27 +0000
326@@ -177,6 +177,8 @@
327 $query =~ s/".*?"/?/sg; # quoted strings
328 $query =~ s/'.*?'/?/sg; # quoted strings
329
330+ $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
331+
332 # MD5 checksums which are always 32 hex chars
333 if ( $self->{match_md5_checksums} ) {
334 $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
335
336=== modified file 't/lib/QueryRewriter.t'
337--- t/lib/QueryRewriter.t 2013-09-27 02:16:19 +0000
338+++ t/lib/QueryRewriter.t 2014-05-24 22:59:27 +0000
339@@ -416,6 +416,16 @@
340 "Fingerprint /* -- comment */ SELECT (bug 1174956)"
341 );
342
343+
344+# issue 965553
345+
346+is(
347+ $qr->fingerprint('SELECT * FROM tbl WHERE id=1 AND flag=true AND trueflag=FALSE'),
348+ 'select * from tbl where id=? and flag=? and trueflag=?',
349+ 'boolean values abstracted correctly',
350+);
351+
352+
353 # #############################################################################
354 # convert_to_select()
355 # #############################################################################
356@@ -1454,6 +1464,8 @@
357 "distill CREATE TABLE IF NOT EXISTS foo",
358 );
359
360+
361+
362 # #############################################################################
363 # Done.
364 # #############################################################################

Subscribers

People subscribed via source and target branches