Merge lp:~laurynas-biveinis/percona-server/csv-engine-ietf-quotes into lp:percona-server/5.5

Proposed by Laurynas Biveinis on 2014-10-02
Status: Merged
Approved by: Sergei Glushchenko on 2014-10-12
Approved revision: 699
Merged at revision: 706
Proposed branch: lp:~laurynas-biveinis/percona-server/csv-engine-ietf-quotes
Merge into: lp:percona-server/5.5
Diff against target: 377 lines (+238/-8)
8 files modified
mysql-test/r/csv.result (+44/-0)
mysql-test/r/mysqld--help-notwin.result (+3/-0)
mysql-test/r/percona_server_variables_debug.result (+1/-0)
mysql-test/r/percona_server_variables_release.result (+1/-0)
mysql-test/suite/sys_vars/r/csv_mode_basic.result (+55/-0)
mysql-test/suite/sys_vars/t/csv_mode_basic.test (+47/-0)
mysql-test/t/csv.test (+44/-0)
storage/csv/ha_tina.cc (+43/-8)
To merge this branch: bzr merge lp:~laurynas-biveinis/percona-server/csv-engine-ietf-quotes
Reviewer Review Type Date Requested Status
Sergei Glushchenko (community) g2 2014-10-02 Approve on 2014-10-12
Review via email: mp+236838@code.launchpad.net

Description of the change

Implement IETF-compatible parsing mode of embedded quote and comma
characters for the CSV storage engines, implementing
https://blueprints.launchpad.net/percona-server/+spec/csv-engine-ietf-quotes
blueprint and fixing bug 1316042 (CSV engine does not properly process
"" (in quotes)).

Introduce new dynamic global/session variable csv_mode with a possible
value IETF_QUOTES, which enables the standard-compliant quote parsing:
commas are accepted in quoted fields as-is, and quoting of " is
changed from \" to "". If csv_mode is set to empty value (the
default), then the old parsing behavior is kept.

Add a testcase to csv.test, a sys_vars testcase, and update
mysqld--help-notwin, percona_server_variables_debug, and
percona_server_variables_release.

    http://jenkins.percona.com/job/percona-server-5.5-param/1027/

To post a comment you must log in.

Approve

review: Approve (g2)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'mysql-test/r/csv.result'
2--- mysql-test/r/csv.result 2010-08-16 06:46:21 +0000
3+++ mysql-test/r/csv.result 2014-10-02 09:28:25 +0000
4@@ -5470,3 +5470,47 @@
5 ERROR HY000: Table 't1' is marked as crashed and should be repaired
6 DROP TABLE t1;
7 End of 5.1 tests
8+SET @old_csv_mode = @@SESSION.csv_mode;
9+SET @@SESSION.csv_mode='ietf_quotes';
10+CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV;
11+INSERT INTO t1 VALUES("a\"b,c","d");
12+INSERT INTO t1 VALUES("d","a\"b,c");
13+INSERT INTO t1 VALUES(",\"a","e");
14+INSERT INTO t1 VALUES("e",",\"a");
15+INSERT INTO t1 VALUES("\"","f");
16+INSERT INTO t1 VALUES("f","\"");
17+INSERT INTO t1 VALUES(",","g");
18+INSERT INTO t1 VALUES("g",",");
19+SELECT * FROM t1;
20+c1 c2
21+a"b,c d
22+d a"b,c
23+,"a e
24+e ,"a
25+" f
26+f "
27+, g
28+g ,
29+CSV file contents:
30+"a""b,c","d"
31+"d","a""b,c"
32+",""a","e"
33+"e",",""a"
34+"""","f"
35+"f",""""
36+",","g"
37+"g",","
38+DROP TABLE t1;
39+CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV;
40+Replacing t1.CSV
41+SELECT * FROM t1;
42+c1 c2
43+a b
44+a b
45+a"b,c d
46+d ,"a
47+a", e
48+e "
49+, f
50+DROP TABLE t1;
51+SET @@SESSION.csv_mode=@old_csv_mode;
52
53=== modified file 'mysql-test/r/mysqld--help-notwin.result'
54--- mysql-test/r/mysqld--help-notwin.result 2014-08-22 09:58:37 +0000
55+++ mysql-test/r/mysqld--help-notwin.result 2014-10-02 09:28:25 +0000
56@@ -100,6 +100,8 @@
57 --console Write error output on screen; don't remove the console
58 window on windows.
59 --core-file Write core on errors.
60+ --csv-mode=name Control CSV parser mode: []: default, ietf_quotes:
61+ standards-compatible embedded quote and comma parsing
62 -h, --datadir=name Path to the database root directory
63 --date-format=name The DATE format (ignored)
64 --datetime-format=name
65@@ -865,6 +867,7 @@
66 concurrent-insert AUTO
67 connect-timeout 10
68 console FALSE
69+csv-mode
70 date-format %Y-%m-%d
71 datetime-format %Y-%m-%d %H:%i:%s
72 default-storage-engine InnoDB
73
74=== modified file 'mysql-test/r/percona_server_variables_debug.result'
75--- mysql-test/r/percona_server_variables_debug.result 2014-01-23 05:14:14 +0000
76+++ mysql-test/r/percona_server_variables_debug.result 2014-10-02 09:28:25 +0000
77@@ -26,6 +26,7 @@
78 COMPLETION_TYPE
79 CONCURRENT_INSERT
80 CONNECT_TIMEOUT
81+CSV_MODE
82 DATADIR
83 DATETIME_FORMAT
84 DATE_FORMAT
85
86=== modified file 'mysql-test/r/percona_server_variables_release.result'
87--- mysql-test/r/percona_server_variables_release.result 2014-01-23 05:14:14 +0000
88+++ mysql-test/r/percona_server_variables_release.result 2014-10-02 09:28:25 +0000
89@@ -26,6 +26,7 @@
90 COMPLETION_TYPE
91 CONCURRENT_INSERT
92 CONNECT_TIMEOUT
93+CSV_MODE
94 DATADIR
95 DATETIME_FORMAT
96 DATE_FORMAT
97
98=== added file 'mysql-test/suite/sys_vars/r/csv_mode_basic.result'
99--- mysql-test/suite/sys_vars/r/csv_mode_basic.result 1970-01-01 00:00:00 +0000
100+++ mysql-test/suite/sys_vars/r/csv_mode_basic.result 2014-10-02 09:28:25 +0000
101@@ -0,0 +1,55 @@
102+SET @saved_csv_mode = @@SESSION.csv_mode;
103+SET @@SESSION.csv_mode = 'ietf_quotes';
104+SELECT @@SESSION.csv_mode;
105+@@SESSION.csv_mode
106+IETF_QUOTES
107+SET @@SESSION.csv_mode = 'IETF_QUOTES';
108+SELECT @@SESSION.csv_mode;
109+@@SESSION.csv_mode
110+IETF_QUOTES
111+SET @@SESSION.csv_mode = IETF_QUOTES;
112+SELECT @@SESSION.csv_mode;
113+@@SESSION.csv_mode
114+IETF_QUOTES
115+SET @@SESSION.csv_mode = '';
116+SELECT @@SESSION.csv_mode;
117+@@SESSION.csv_mode
118+
119+SET @@SESSION.csv_mode = 0;
120+SELECT @@SESSION.csv_mode;
121+@@SESSION.csv_mode
122+
123+SET @@SESSION.csv_mode = 1;
124+SELECT @@SESSION.csv_mode;
125+@@SESSION.csv_mode
126+IETF_QUOTES
127+SET @@SESSION.csv_mode = NULL;
128+ERROR 42000: Variable 'csv_mode' can't be set to the value of 'NULL'
129+SET @@SESSION.csv_mode = FOO;
130+ERROR 42000: Variable 'csv_mode' can't be set to the value of 'FOO'
131+SET @@SESSION.csv_mode = 'foo';
132+ERROR 42000: Variable 'csv_mode' can't be set to the value of 'foo'
133+SELECT @@SESSION.csv_mode;
134+@@SESSION.csv_mode
135+IETF_QUOTES
136+SET @@SESSION.csv_mode = 0;
137+SELECT @@SESSION.csv_mode;
138+@@SESSION.csv_mode
139+
140+SET @@SESSION.csv_mode = -1;
141+ERROR 42000: Variable 'csv_mode' can't be set to the value of '-1'
142+SET @@SESSION.csv_mode = 2;
143+ERROR 42000: Variable 'csv_mode' can't be set to the value of '2'
144+SET @@SESSION.csv_mode = @saved_csv_mode;
145+SET @saved_csv_mode = @@GLOBAL.csv_mode;
146+SET @@GLOBAL.csv_mode = '';
147+SELECT @@GLOBAL.csv_mode;
148+@@GLOBAL.csv_mode
149+
150+SET @@GLOBAL.csv_mode = IETF_QUOTES;
151+SELECT @@GLOBAL.csv_mode;
152+@@GLOBAL.csv_mode
153+IETF_QUOTES
154+SET @@GLOBAL.csv_mode = FOO;
155+ERROR 42000: Variable 'csv_mode' can't be set to the value of 'FOO'
156+SET @@GLOBAL.csv_mode = @saved_csv_mode;
157
158=== added file 'mysql-test/suite/sys_vars/t/csv_mode_basic.test'
159--- mysql-test/suite/sys_vars/t/csv_mode_basic.test 1970-01-01 00:00:00 +0000
160+++ mysql-test/suite/sys_vars/t/csv_mode_basic.test 2014-10-02 09:28:25 +0000
161@@ -0,0 +1,47 @@
162+# Basic sys_vars test for csv_mode variable
163+
164+SET @saved_csv_mode = @@SESSION.csv_mode;
165+
166+# Valid values
167+SET @@SESSION.csv_mode = 'ietf_quotes';
168+SELECT @@SESSION.csv_mode;
169+SET @@SESSION.csv_mode = 'IETF_QUOTES';
170+SELECT @@SESSION.csv_mode;
171+SET @@SESSION.csv_mode = IETF_QUOTES;
172+SELECT @@SESSION.csv_mode;
173+SET @@SESSION.csv_mode = '';
174+SELECT @@SESSION.csv_mode;
175+SET @@SESSION.csv_mode = 0;
176+SELECT @@SESSION.csv_mode;
177+SET @@SESSION.csv_mode = 1;
178+SELECT @@SESSION.csv_mode;
179+
180+# Invalid values
181+--error ER_WRONG_VALUE_FOR_VAR
182+SET @@SESSION.csv_mode = NULL;
183+--error ER_WRONG_VALUE_FOR_VAR
184+SET @@SESSION.csv_mode = FOO;
185+--error ER_WRONG_VALUE_FOR_VAR
186+SET @@SESSION.csv_mode = 'foo';
187+SELECT @@SESSION.csv_mode;
188+SET @@SESSION.csv_mode = 0;
189+SELECT @@SESSION.csv_mode;
190+--error ER_WRONG_VALUE_FOR_VAR
191+SET @@SESSION.csv_mode = -1;
192+--error ER_WRONG_VALUE_FOR_VAR
193+SET @@SESSION.csv_mode = 2;
194+
195+SET @@SESSION.csv_mode = @saved_csv_mode;
196+
197+# Test GLOBAL variable
198+
199+SET @saved_csv_mode = @@GLOBAL.csv_mode;
200+
201+SET @@GLOBAL.csv_mode = '';
202+SELECT @@GLOBAL.csv_mode;
203+SET @@GLOBAL.csv_mode = IETF_QUOTES;
204+SELECT @@GLOBAL.csv_mode;
205+--error ER_WRONG_VALUE_FOR_VAR
206+SET @@GLOBAL.csv_mode = FOO;
207+
208+SET @@GLOBAL.csv_mode = @saved_csv_mode;
209
210=== modified file 'mysql-test/t/csv.test'
211--- mysql-test/t/csv.test 2010-08-16 06:46:21 +0000
212+++ mysql-test/t/csv.test 2014-10-02 09:28:25 +0000
213@@ -1899,3 +1899,47 @@
214
215 DROP TABLE t1;
216 --echo End of 5.1 tests
217+
218+#
219+# Bug #71091 CSV engine does not properly process "", in quotes
220+#
221+
222+SET @old_csv_mode = @@SESSION.csv_mode;
223+SET @@SESSION.csv_mode='ietf_quotes';
224+CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV;
225+
226+INSERT INTO t1 VALUES("a\"b,c","d");
227+INSERT INTO t1 VALUES("d","a\"b,c");
228+INSERT INTO t1 VALUES(",\"a","e");
229+INSERT INTO t1 VALUES("e",",\"a");
230+INSERT INTO t1 VALUES("\"","f");
231+INSERT INTO t1 VALUES("f","\"");
232+INSERT INTO t1 VALUES(",","g");
233+INSERT INTO t1 VALUES("g",",");
234+
235+SELECT * FROM t1;
236+
237+--echo CSV file contents:
238+--cat_file $MYSQLD_DATADIR/test/t1.CSV
239+
240+DROP TABLE t1;
241+
242+CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV;
243+
244+--echo Replacing t1.CSV
245+--remove_file $MYSQLD_DATADIR/test/t1.CSV
246+--write_file $MYSQLD_DATADIR/test/t1.CSV
247+a,b
248+"a","b"
249+"a""b,c","d"
250+"d",",""a"
251+"a"",",e
252+e,""""
253+",",f
254+EOF
255+
256+SELECT * FROM t1;
257+
258+DROP TABLE t1;
259+
260+SET @@SESSION.csv_mode=@old_csv_mode;
261
262=== modified file 'storage/csv/ha_tina.cc'
263--- storage/csv/ha_tina.cc 2012-02-16 09:48:16 +0000
264+++ storage/csv/ha_tina.cc 2014-10-02 09:28:25 +0000
265@@ -67,6 +67,23 @@
266 #define CSN_EXT ".CSN" // Files used during repair and update
267 #define CSM_EXT ".CSM" // Meta file
268
269+enum csv_mode_enum { csv_mode_none= 0, csv_mode_ietf_quotes= 1 };
270+
271+static const char *csv_mode_names[]=
272+{
273+ "IETF_QUOTES", NullS
274+};
275+
276+static TYPELIB csv_mode_typelib=
277+{
278+ array_elements(csv_mode_names) - 1, "",
279+ csv_mode_names, NULL
280+};
281+
282+static MYSQL_THDVAR_SET(mode, PLUGIN_VAR_RQCMDARG,
283+ "Control CSV parser mode: []: default, ietf_quotes: "
284+ "standards-compatible embedded quote and comma parsing",
285+ NULL, NULL, 0, &csv_mode_typelib);
286
287 static TINA_SHARE *get_share(const char *table_name, TABLE *table);
288 static int free_share(TINA_SHARE *share);
289@@ -510,7 +527,7 @@
290 char attribute_buffer[1024];
291 String attribute(attribute_buffer, sizeof(attribute_buffer),
292 &my_charset_bin);
293-
294+ bool ietf_quotes= THDVAR(current_thd, mode) & csv_mode_ietf_quotes;
295 my_bitmap_map *org_bitmap= dbug_tmp_use_all_columns(table, table->read_set);
296 buffer.length(0);
297
298@@ -546,7 +563,7 @@
299 {
300 if (*ptr == '"')
301 {
302- buffer.append('\\');
303+ buffer.append(ietf_quotes ? '"' : '\\');
304 buffer.append('"');
305 }
306 else if (*ptr == '\r')
307@@ -638,6 +655,7 @@
308 my_bitmap_map *org_bitmap;
309 int error;
310 bool read_all;
311+ bool ietf_quotes= THDVAR(current_thd, mode) & csv_mode_ietf_quotes;
312 DBUG_ENTER("ha_tina::find_current_row");
313
314 free_root(&blobroot, MYF(0));
315@@ -671,8 +689,10 @@
316 a) If end of current field is reached, move
317 to next field and jump to step 2.3
318 b) If current character is a \\ handle
319- \\n, \\r, \\, \\"
320- c) else append the current character into the buffer
321+ \\n, \\r, \\, and \\" if not in ietf_quotes mode
322+ c) if in ietf_quotes mode and the current character is
323+ a ", handle ""
324+ d) else append the current character into the buffer
325 before checking that EOL has not been reached.
326 2.2) If the current character does not begin with a quote
327 2.2.1) Until EOL has not been reached
328@@ -713,15 +733,25 @@
329 curr_offset+= 2;
330 break;
331 }
332- if (curr_char == '\\' && curr_offset != (end_offset - 1))
333- {
334+ if (ietf_quotes && curr_char == '"'
335+ && file_buff->get_value(curr_offset + 1) == '"')
336+ {
337+ /* Embedded IETF quote */
338+ curr_offset++;
339+ buffer.append('"');
340+ }
341+ else if (curr_char == '\\' && curr_offset != (end_offset - 1))
342+ {
343+ /* A quote followed by something else than a comma, end of line, or
344+ (in IETF mode) another quote will be handled as a regular
345+ character. */
346 curr_offset++;
347 curr_char= file_buff->get_value(curr_offset);
348 if (curr_char == 'r')
349 buffer.append('\r');
350 else if (curr_char == 'n' )
351 buffer.append('\n');
352- else if (curr_char == '\\' || curr_char == '"')
353+ else if (curr_char == '\\' || (!ietf_quotes && curr_char == '"'))
354 buffer.append(curr_char);
355 else /* This could only happed with an externally created file */
356 {
357@@ -1750,6 +1780,11 @@
358 return COMPATIBLE_DATA_YES;
359 }
360
361+static struct st_mysql_sys_var* csv_system_variables[]= {
362+ MYSQL_SYSVAR(mode),
363+ NULL
364+};
365+
366 struct st_mysql_storage_engine csv_storage_engine=
367 { MYSQL_HANDLERTON_INTERFACE_VERSION };
368
369@@ -1765,7 +1800,7 @@
370 tina_done_func, /* Plugin Deinit */
371 0x0100 /* 1.0 */,
372 NULL, /* status variables */
373- NULL, /* system variables */
374+ csv_system_variables,
375 NULL, /* config options */
376 0, /* flags */
377 }

Subscribers

People subscribed via source and target branches