Merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0-serialize_list-v2 into lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0

Proposed by Daniel Nichter
Status: Merged
Approved by: Daniel Nichter
Approved revision: 263
Merged at revision: 260
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0-serialize_list-v2
Merge into: lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0
Diff against target: 166 lines (+139/-1)
2 files modified
lib/Quoter.pm (+61/-0)
t/lib/Quoter.t (+78/-1)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0-serialize_list-v2
Reviewer Review Type Date Requested Status
Brian Fraser (community) Approve
Daniel Nichter Approve
Review via email: mp+87050@code.launchpad.net

This proposal supersedes a proposal from 2011-12-28.

To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) : Posted in a previous version of this proposal
review: Approve
Revision history for this message
Daniel Nichter (daniel-nichter) : Posted in a previous version of this proposal
review: Approve
Revision history for this message
Brian Fraser (fraserbn) : Posted in a previous version of this proposal
review: Approve
Revision history for this message
Daniel Nichter (daniel-nichter) :
review: Approve
Revision history for this message
Brian Fraser (fraserbn) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/Quoter.pm'
--- lib/Quoter.pm 2011-07-12 21:47:07 +0000
+++ lib/Quoter.pm 2011-12-28 23:20:29 +0000
@@ -144,6 +144,67 @@
144 return $db ? "$db.$tbl" : $tbl;144 return $db ? "$db.$tbl" : $tbl;
145}145}
146146
147# Return the list passed in, with the elements passed through quotemeta,
148# and the results concatenated with ','.
149sub serialize_list {
150 my ( $self, @args ) = @_;
151 if ( @args && $args[-1] eq '' ) {
152 # If the last element is an empty string, it conflicts
153 # with the assumptions of the somewhat lax regex below,
154 # which always leaves an empty element in the end.
155 # We could fix the regex, but it's a lot of extra
156 # complexity for little gain, or we could add a
157 # special-case here. Just by tagging another empty
158 # string, we get the desired result.
159 push @args, '';
160 }
161 return join ',', map { quotemeta } @args;
162}
163
164sub deserialize_list {
165 my ( $self, $string ) = @_;
166 my @escaped_parts = $string =~ /
167 \G # Start of string, or end of previous match.
168 ( # Each of these is an element in the original list.
169 [^\\,]* # Anything not a backslash or a comma
170 (?: # When we get here, we found one of the above.
171 \\. # A backslash followed by something so we can continue
172 [^\\,]* # Same as above.
173 )* # Repeat zero of more times.
174 )
175 ,? # Comma dividing elements or absolute end of the string.
176 /sxg;
177
178 # Last element will always be empty. Flaw in the regex.
179 # But easier to fix this way. Faster, too.
180 pop @escaped_parts;
181
182 # Undo the quotemeta().
183 my @unescaped_parts = map {
184 my $part = $_;
185 # Here be weirdness. Unfortunately quotemeta() is broken, and exposes
186 # the internal representation of scalars. Namely, the latin-1 range,
187 # \128-\377 (\p{Latin1} in newer Perls) is all escaped in downgraded
188 # strings, but left alone in UTF-8 strings. Thus, this.
189
190 # TODO: quotemeta() might change in 5.16 to mean
191 # qr/(?=\p{ASCII})\W|\p{Pattern_Syntax}/
192 # And also fix this whole weird behavior under
193 # use feature 'unicode_strings' -- If/once that's
194 # implemented, this will have to change.
195 my $char_class = utf8::is_utf8($part) # If it's a UTF-8 string,
196 ? qr/(?=\p{ASCII})\W/ # We only care about non-word
197 # characters in the ASCII range
198 : qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise,
199 # same as above, but also
200 # unescape the latin-1 range.
201 $part =~ s/\\($char_class)/$1/g;
202 $part;
203 } @escaped_parts;
204
205 return @unescaped_parts;
206}
207
1471;2081;
148}209}
149# ###########################################################################210# ###########################################################################
150211
=== modified file 't/lib/Quoter.t'
--- t/lib/Quoter.t 2011-07-12 21:47:07 +0000
+++ t/lib/Quoter.t 2011-12-28 23:20:29 +0000
@@ -9,7 +9,7 @@
9use strict;9use strict;
10use warnings FATAL => 'all';10use warnings FATAL => 'all';
11use English qw(-no_match_vars);11use English qw(-no_match_vars);
12use Test::More tests => 31;12use Test::More tests => 47;
1313
14use Quoter;14use Quoter;
15use PerconaTest;15use PerconaTest;
@@ -99,4 +99,81 @@
99is( $q->join_quote(undef, '`tbl`'), '`tbl`', 'join_merge(undef, `tbl`)' );99is( $q->join_quote(undef, '`tbl`'), '`tbl`', 'join_merge(undef, `tbl`)' );
100is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo`.`tbl`)' );100is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo`.`tbl`)' );
101101
102# ###########################################################################
103# (de)serialize_list
104# ###########################################################################
105
106my @serialize_tests = (
107 [ 'a', 'b', ],
108 [ 'a,', 'b', ],
109 [ "a,\\\nc\nas", 'b', ],
110 [ 'a\\\,a', 'c', ],
111 [ 'a\\\\,a', 'c', ],
112 [ 'a\\\\\,aa', 'c', ],
113 [ 'a\\\\\\,aa', 'c', ],
114 [ 'a\\\,a,a', 'c,d,e,d,', ],
115 [ "\\\,\x{e8},a", '!!!!__!*`,`\\', ], # Latin-1
116 [ "\x{30cb}\\\,\x{e8},a", '!!!!__!*`,`\\', ], # UTF-8
117 [ ",,,,,,,,,,,,,,", ",", ],
118 [ "\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,,,,\\", ":(", ],
119 [ "asdfa", "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,a", ],
120 [ 1, 2 ],
121 [ 7, 9 ],
122 [ '', '', '', ],
123);
124
125use DSNParser;
126use Sandbox;
127my $dp = new DSNParser(opts=>$dsn_opts);
128my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
129my $dbh = $sb->get_dbh_for('master');
130SKIP: {
131 skip 'Cannot connect to sandbox master', scalar @serialize_tests unless $dbh;
132
133 # Prevent "Wide character in print at Test/Builder.pm" warnings.
134 binmode Test::More->builder->$_(), ':encoding(UTF-8)'
135 for qw(output failure_output);
136
137 $dbh->do('CREATE DATABASE IF NOT EXISTS serialize_test');
138 $dbh->do('DROP TABLE IF EXISTS serialize_test.serialize');
139 $dbh->do('CREATE TABLE serialize_test.serialize (id INT, foo TEXT)');
140
141 my $sth = $dbh->prepare(
142 "INSERT INTO serialize_test.serialize (id, foo) VALUES (?, ?)"
143 );
144 my $selsth = $dbh->prepare(
145 "SELECT foo FROM serialize_test.serialize WHERE id=? LIMIT 1"
146 );
147
148 for my $test_index ( 0..$#serialize_tests ) {
149 my $ser = $q->serialize_list( @{$serialize_tests[$test_index]} );
150
151 # Bit of a hack, but we want to test both of Perl's internal encodings
152 # for correctness.
153 local $dbh->{'mysql_enable_utf8'} = 1 if utf8::is_utf8($ser);
154
155 $sth->execute($test_index, $ser);
156 $selsth->execute($test_index);
157
158 my $flat_string = "[" . join("][", @{$serialize_tests[$test_index]}) . "]";
159 $flat_string =~ s/\n/\\n/g;
160
161 is_deeply(
162 [ $q->deserialize_list($selsth->fetchrow_array()) ],
163 $serialize_tests[$test_index],
164 "Serialize $flat_string"
165 );
166 }
167
168 $sth->finish();
169 $selsth->finish();
170
171 $dbh->do("DROP DATABASE serialize_test");
172
173 $dbh->disconnect();
174};
175
176# ###########################################################################
177# Done.
178# ###########################################################################
102exit;179exit;

Subscribers

People subscribed via source and target branches

to all changes: