Merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0-serialize_list-v2 into lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0

Proposed by Daniel Nichter
Status: Merged
Approved by: Daniel Nichter
Approved revision: 263
Merged at revision: 260
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0-serialize_list-v2
Merge into: lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0
Diff against target: 166 lines (+139/-1)
2 files modified
lib/Quoter.pm (+61/-0)
t/lib/Quoter.t (+78/-1)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0-serialize_list-v2
Reviewer Review Type Date Requested Status
Brian Fraser (community) Approve
Daniel Nichter Approve
Review via email: mp+87050@code.launchpad.net

This proposal supersedes a proposal from 2011-12-28.

To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) : Posted in a previous version of this proposal
review: Approve
Revision history for this message
Daniel Nichter (daniel-nichter) : Posted in a previous version of this proposal
review: Approve
Revision history for this message
Brian Fraser (fraserbn) : Posted in a previous version of this proposal
review: Approve
Revision history for this message
Daniel Nichter (daniel-nichter) :
review: Approve
Revision history for this message
Brian Fraser (fraserbn) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/Quoter.pm'
2--- lib/Quoter.pm 2011-07-12 21:47:07 +0000
3+++ lib/Quoter.pm 2011-12-28 23:20:29 +0000
4@@ -144,6 +144,67 @@
5 return $db ? "$db.$tbl" : $tbl;
6 }
7
8+# Return the list passed in, with the elements passed through quotemeta,
9+# and the results concatenated with ','.
10+sub serialize_list {
11+ my ( $self, @args ) = @_;
12+ if ( @args && $args[-1] eq '' ) {
13+ # If the last element is an empty string, it conflicts
14+ # with the assumptions of the somewhat lax regex below,
15+ # which always leaves an empty element in the end.
16+ # We could fix the regex, but it's a lot of extra
17+ # complexity for little gain, or we could add a
18+ # special-case here. Just by tagging another empty
19+ # string, we get the desired result.
20+ push @args, '';
21+ }
22+ return join ',', map { quotemeta } @args;
23+}
24+
25+sub deserialize_list {
26+ my ( $self, $string ) = @_;
27+ my @escaped_parts = $string =~ /
28+ \G # Start of string, or end of previous match.
29+ ( # Each of these is an element in the original list.
30+ [^\\,]* # Anything not a backslash or a comma
31+ (?: # When we get here, we found one of the above.
32+ \\. # A backslash followed by something so we can continue
33+ [^\\,]* # Same as above.
34+ )* # Repeat zero of more times.
35+ )
36+ ,? # Comma dividing elements or absolute end of the string.
37+ /sxg;
38+
39+ # Last element will always be empty. Flaw in the regex.
40+ # But easier to fix this way. Faster, too.
41+ pop @escaped_parts;
42+
43+ # Undo the quotemeta().
44+ my @unescaped_parts = map {
45+ my $part = $_;
46+ # Here be weirdness. Unfortunately quotemeta() is broken, and exposes
47+ # the internal representation of scalars. Namely, the latin-1 range,
48+ # \128-\377 (\p{Latin1} in newer Perls) is all escaped in downgraded
49+ # strings, but left alone in UTF-8 strings. Thus, this.
50+
51+ # TODO: quotemeta() might change in 5.16 to mean
52+ # qr/(?=\p{ASCII})\W|\p{Pattern_Syntax}/
53+ # And also fix this whole weird behavior under
54+ # use feature 'unicode_strings' -- If/once that's
55+ # implemented, this will have to change.
56+ my $char_class = utf8::is_utf8($part) # If it's a UTF-8 string,
57+ ? qr/(?=\p{ASCII})\W/ # We only care about non-word
58+ # characters in the ASCII range
59+ : qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise,
60+ # same as above, but also
61+ # unescape the latin-1 range.
62+ $part =~ s/\\($char_class)/$1/g;
63+ $part;
64+ } @escaped_parts;
65+
66+ return @unescaped_parts;
67+}
68+
69 1;
70 }
71 # ###########################################################################
72
73=== modified file 't/lib/Quoter.t'
74--- t/lib/Quoter.t 2011-07-12 21:47:07 +0000
75+++ t/lib/Quoter.t 2011-12-28 23:20:29 +0000
76@@ -9,7 +9,7 @@
77 use strict;
78 use warnings FATAL => 'all';
79 use English qw(-no_match_vars);
80-use Test::More tests => 31;
81+use Test::More tests => 47;
82
83 use Quoter;
84 use PerconaTest;
85@@ -99,4 +99,81 @@
86 is( $q->join_quote(undef, '`tbl`'), '`tbl`', 'join_merge(undef, `tbl`)' );
87 is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo`.`tbl`)' );
88
89+# ###########################################################################
90+# (de)serialize_list
91+# ###########################################################################
92+
93+my @serialize_tests = (
94+ [ 'a', 'b', ],
95+ [ 'a,', 'b', ],
96+ [ "a,\\\nc\nas", 'b', ],
97+ [ 'a\\\,a', 'c', ],
98+ [ 'a\\\\,a', 'c', ],
99+ [ 'a\\\\\,aa', 'c', ],
100+ [ 'a\\\\\\,aa', 'c', ],
101+ [ 'a\\\,a,a', 'c,d,e,d,', ],
102+ [ "\\\,\x{e8},a", '!!!!__!*`,`\\', ], # Latin-1
103+ [ "\x{30cb}\\\,\x{e8},a", '!!!!__!*`,`\\', ], # UTF-8
104+ [ ",,,,,,,,,,,,,,", ",", ],
105+ [ "\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,,,,\\", ":(", ],
106+ [ "asdfa", "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,a", ],
107+ [ 1, 2 ],
108+ [ 7, 9 ],
109+ [ '', '', '', ],
110+);
111+
112+use DSNParser;
113+use Sandbox;
114+my $dp = new DSNParser(opts=>$dsn_opts);
115+my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
116+my $dbh = $sb->get_dbh_for('master');
117+SKIP: {
118+ skip 'Cannot connect to sandbox master', scalar @serialize_tests unless $dbh;
119+
120+ # Prevent "Wide character in print at Test/Builder.pm" warnings.
121+ binmode Test::More->builder->$_(), ':encoding(UTF-8)'
122+ for qw(output failure_output);
123+
124+ $dbh->do('CREATE DATABASE IF NOT EXISTS serialize_test');
125+ $dbh->do('DROP TABLE IF EXISTS serialize_test.serialize');
126+ $dbh->do('CREATE TABLE serialize_test.serialize (id INT, foo TEXT)');
127+
128+ my $sth = $dbh->prepare(
129+ "INSERT INTO serialize_test.serialize (id, foo) VALUES (?, ?)"
130+ );
131+ my $selsth = $dbh->prepare(
132+ "SELECT foo FROM serialize_test.serialize WHERE id=? LIMIT 1"
133+ );
134+
135+ for my $test_index ( 0..$#serialize_tests ) {
136+ my $ser = $q->serialize_list( @{$serialize_tests[$test_index]} );
137+
138+ # Bit of a hack, but we want to test both of Perl's internal encodings
139+ # for correctness.
140+ local $dbh->{'mysql_enable_utf8'} = 1 if utf8::is_utf8($ser);
141+
142+ $sth->execute($test_index, $ser);
143+ $selsth->execute($test_index);
144+
145+ my $flat_string = "[" . join("][", @{$serialize_tests[$test_index]}) . "]";
146+ $flat_string =~ s/\n/\\n/g;
147+
148+ is_deeply(
149+ [ $q->deserialize_list($selsth->fetchrow_array()) ],
150+ $serialize_tests[$test_index],
151+ "Serialize $flat_string"
152+ );
153+ }
154+
155+ $sth->finish();
156+ $selsth->finish();
157+
158+ $dbh->do("DROP DATABASE serialize_test");
159+
160+ $dbh->disconnect();
161+};
162+
163+# ###########################################################################
164+# Done.
165+# ###########################################################################
166 exit;

Subscribers

People subscribed via source and target branches

to all changes: