Merge lp:~laurynas-biveinis/percona-server/csv-engine-ietf-quotes into lp:percona-server/5.5

Proposed by Laurynas Biveinis
Status: Merged
Approved by: Sergei Glushchenko
Approved revision: no longer in the source branch.
Merged at revision: 706
Proposed branch: lp:~laurynas-biveinis/percona-server/csv-engine-ietf-quotes
Merge into: lp:percona-server/5.5
Diff against target: 377 lines (+238/-8)
8 files modified
mysql-test/r/csv.result (+44/-0)
mysql-test/r/mysqld--help-notwin.result (+3/-0)
mysql-test/r/percona_server_variables_debug.result (+1/-0)
mysql-test/r/percona_server_variables_release.result (+1/-0)
mysql-test/suite/sys_vars/r/csv_mode_basic.result (+55/-0)
mysql-test/suite/sys_vars/t/csv_mode_basic.test (+47/-0)
mysql-test/t/csv.test (+44/-0)
storage/csv/ha_tina.cc (+43/-8)
To merge this branch: bzr merge lp:~laurynas-biveinis/percona-server/csv-engine-ietf-quotes
Reviewer Review Type Date Requested Status
Sergei Glushchenko (community) g2 Approve
Review via email: mp+236838@code.launchpad.net

Description of the change

Implement IETF-compatible parsing mode of embedded quote and comma
characters for the CSV storage engines, implementing
https://blueprints.launchpad.net/percona-server/+spec/csv-engine-ietf-quotes
blueprint and fixing bug 1316042 (CSV engine does not properly process
"" (in quotes)).

Introduce new dynamic global/session variable csv_mode with a possible
value IETF_QUOTES, which enables the standard-compliant quote parsing:
commas are accepted in quoted fields as-is, and quoting of " is
changed from \" to "". If csv_mode is set to empty value (the
default), then the old parsing behavior is kept.

Add a testcase to csv.test, a sys_vars testcase, and update
mysqld--help-notwin, percona_server_variables_debug, and
percona_server_variables_release.

    http://jenkins.percona.com/job/percona-server-5.5-param/1027/

To post a comment you must log in.
Revision history for this message
Sergei Glushchenko (sergei.glushchenko) wrote :

Approve

review: Approve (g2)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'mysql-test/r/csv.result'
--- mysql-test/r/csv.result 2010-08-16 06:46:21 +0000
+++ mysql-test/r/csv.result 2014-10-02 09:28:25 +0000
@@ -5470,3 +5470,47 @@
5470ERROR HY000: Table 't1' is marked as crashed and should be repaired5470ERROR HY000: Table 't1' is marked as crashed and should be repaired
5471DROP TABLE t1;5471DROP TABLE t1;
5472End of 5.1 tests5472End of 5.1 tests
5473SET @old_csv_mode = @@SESSION.csv_mode;
5474SET @@SESSION.csv_mode='ietf_quotes';
5475CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV;
5476INSERT INTO t1 VALUES("a\"b,c","d");
5477INSERT INTO t1 VALUES("d","a\"b,c");
5478INSERT INTO t1 VALUES(",\"a","e");
5479INSERT INTO t1 VALUES("e",",\"a");
5480INSERT INTO t1 VALUES("\"","f");
5481INSERT INTO t1 VALUES("f","\"");
5482INSERT INTO t1 VALUES(",","g");
5483INSERT INTO t1 VALUES("g",",");
5484SELECT * FROM t1;
5485c1 c2
5486a"b,c d
5487d a"b,c
5488,"a e
5489e ,"a
5490" f
5491f "
5492, g
5493g ,
5494CSV file contents:
5495"a""b,c","d"
5496"d","a""b,c"
5497",""a","e"
5498"e",",""a"
5499"""","f"
5500"f",""""
5501",","g"
5502"g",","
5503DROP TABLE t1;
5504CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV;
5505Replacing t1.CSV
5506SELECT * FROM t1;
5507c1 c2
5508a b
5509a b
5510a"b,c d
5511d ,"a
5512a", e
5513e "
5514, f
5515DROP TABLE t1;
5516SET @@SESSION.csv_mode=@old_csv_mode;
54735517
=== modified file 'mysql-test/r/mysqld--help-notwin.result'
--- mysql-test/r/mysqld--help-notwin.result 2014-08-22 09:58:37 +0000
+++ mysql-test/r/mysqld--help-notwin.result 2014-10-02 09:28:25 +0000
@@ -100,6 +100,8 @@
100 --console Write error output on screen; don't remove the console100 --console Write error output on screen; don't remove the console
101 window on windows.101 window on windows.
102 --core-file Write core on errors.102 --core-file Write core on errors.
103 --csv-mode=name Control CSV parser mode: []: default, ietf_quotes:
104 standards-compatible embedded quote and comma parsing
103 -h, --datadir=name Path to the database root directory105 -h, --datadir=name Path to the database root directory
104 --date-format=name The DATE format (ignored)106 --date-format=name The DATE format (ignored)
105 --datetime-format=name 107 --datetime-format=name
@@ -865,6 +867,7 @@
865concurrent-insert AUTO867concurrent-insert AUTO
866connect-timeout 10868connect-timeout 10
867console FALSE869console FALSE
870csv-mode
868date-format %Y-%m-%d871date-format %Y-%m-%d
869datetime-format %Y-%m-%d %H:%i:%s872datetime-format %Y-%m-%d %H:%i:%s
870default-storage-engine InnoDB873default-storage-engine InnoDB
871874
=== modified file 'mysql-test/r/percona_server_variables_debug.result'
--- mysql-test/r/percona_server_variables_debug.result 2014-01-23 05:14:14 +0000
+++ mysql-test/r/percona_server_variables_debug.result 2014-10-02 09:28:25 +0000
@@ -26,6 +26,7 @@
26COMPLETION_TYPE26COMPLETION_TYPE
27CONCURRENT_INSERT27CONCURRENT_INSERT
28CONNECT_TIMEOUT28CONNECT_TIMEOUT
29CSV_MODE
29DATADIR30DATADIR
30DATETIME_FORMAT31DATETIME_FORMAT
31DATE_FORMAT32DATE_FORMAT
3233
=== modified file 'mysql-test/r/percona_server_variables_release.result'
--- mysql-test/r/percona_server_variables_release.result 2014-01-23 05:14:14 +0000
+++ mysql-test/r/percona_server_variables_release.result 2014-10-02 09:28:25 +0000
@@ -26,6 +26,7 @@
26COMPLETION_TYPE26COMPLETION_TYPE
27CONCURRENT_INSERT27CONCURRENT_INSERT
28CONNECT_TIMEOUT28CONNECT_TIMEOUT
29CSV_MODE
29DATADIR30DATADIR
30DATETIME_FORMAT31DATETIME_FORMAT
31DATE_FORMAT32DATE_FORMAT
3233
=== added file 'mysql-test/suite/sys_vars/r/csv_mode_basic.result'
--- mysql-test/suite/sys_vars/r/csv_mode_basic.result 1970-01-01 00:00:00 +0000
+++ mysql-test/suite/sys_vars/r/csv_mode_basic.result 2014-10-02 09:28:25 +0000
@@ -0,0 +1,55 @@
1SET @saved_csv_mode = @@SESSION.csv_mode;
2SET @@SESSION.csv_mode = 'ietf_quotes';
3SELECT @@SESSION.csv_mode;
4@@SESSION.csv_mode
5IETF_QUOTES
6SET @@SESSION.csv_mode = 'IETF_QUOTES';
7SELECT @@SESSION.csv_mode;
8@@SESSION.csv_mode
9IETF_QUOTES
10SET @@SESSION.csv_mode = IETF_QUOTES;
11SELECT @@SESSION.csv_mode;
12@@SESSION.csv_mode
13IETF_QUOTES
14SET @@SESSION.csv_mode = '';
15SELECT @@SESSION.csv_mode;
16@@SESSION.csv_mode
17
18SET @@SESSION.csv_mode = 0;
19SELECT @@SESSION.csv_mode;
20@@SESSION.csv_mode
21
22SET @@SESSION.csv_mode = 1;
23SELECT @@SESSION.csv_mode;
24@@SESSION.csv_mode
25IETF_QUOTES
26SET @@SESSION.csv_mode = NULL;
27ERROR 42000: Variable 'csv_mode' can't be set to the value of 'NULL'
28SET @@SESSION.csv_mode = FOO;
29ERROR 42000: Variable 'csv_mode' can't be set to the value of 'FOO'
30SET @@SESSION.csv_mode = 'foo';
31ERROR 42000: Variable 'csv_mode' can't be set to the value of 'foo'
32SELECT @@SESSION.csv_mode;
33@@SESSION.csv_mode
34IETF_QUOTES
35SET @@SESSION.csv_mode = 0;
36SELECT @@SESSION.csv_mode;
37@@SESSION.csv_mode
38
39SET @@SESSION.csv_mode = -1;
40ERROR 42000: Variable 'csv_mode' can't be set to the value of '-1'
41SET @@SESSION.csv_mode = 2;
42ERROR 42000: Variable 'csv_mode' can't be set to the value of '2'
43SET @@SESSION.csv_mode = @saved_csv_mode;
44SET @saved_csv_mode = @@GLOBAL.csv_mode;
45SET @@GLOBAL.csv_mode = '';
46SELECT @@GLOBAL.csv_mode;
47@@GLOBAL.csv_mode
48
49SET @@GLOBAL.csv_mode = IETF_QUOTES;
50SELECT @@GLOBAL.csv_mode;
51@@GLOBAL.csv_mode
52IETF_QUOTES
53SET @@GLOBAL.csv_mode = FOO;
54ERROR 42000: Variable 'csv_mode' can't be set to the value of 'FOO'
55SET @@GLOBAL.csv_mode = @saved_csv_mode;
056
=== added file 'mysql-test/suite/sys_vars/t/csv_mode_basic.test'
--- mysql-test/suite/sys_vars/t/csv_mode_basic.test 1970-01-01 00:00:00 +0000
+++ mysql-test/suite/sys_vars/t/csv_mode_basic.test 2014-10-02 09:28:25 +0000
@@ -0,0 +1,47 @@
1# Basic sys_vars test for csv_mode variable
2
3SET @saved_csv_mode = @@SESSION.csv_mode;
4
5# Valid values
6SET @@SESSION.csv_mode = 'ietf_quotes';
7SELECT @@SESSION.csv_mode;
8SET @@SESSION.csv_mode = 'IETF_QUOTES';
9SELECT @@SESSION.csv_mode;
10SET @@SESSION.csv_mode = IETF_QUOTES;
11SELECT @@SESSION.csv_mode;
12SET @@SESSION.csv_mode = '';
13SELECT @@SESSION.csv_mode;
14SET @@SESSION.csv_mode = 0;
15SELECT @@SESSION.csv_mode;
16SET @@SESSION.csv_mode = 1;
17SELECT @@SESSION.csv_mode;
18
19# Invalid values
20--error ER_WRONG_VALUE_FOR_VAR
21SET @@SESSION.csv_mode = NULL;
22--error ER_WRONG_VALUE_FOR_VAR
23SET @@SESSION.csv_mode = FOO;
24--error ER_WRONG_VALUE_FOR_VAR
25SET @@SESSION.csv_mode = 'foo';
26SELECT @@SESSION.csv_mode;
27SET @@SESSION.csv_mode = 0;
28SELECT @@SESSION.csv_mode;
29--error ER_WRONG_VALUE_FOR_VAR
30SET @@SESSION.csv_mode = -1;
31--error ER_WRONG_VALUE_FOR_VAR
32SET @@SESSION.csv_mode = 2;
33
34SET @@SESSION.csv_mode = @saved_csv_mode;
35
36# Test GLOBAL variable
37
38SET @saved_csv_mode = @@GLOBAL.csv_mode;
39
40SET @@GLOBAL.csv_mode = '';
41SELECT @@GLOBAL.csv_mode;
42SET @@GLOBAL.csv_mode = IETF_QUOTES;
43SELECT @@GLOBAL.csv_mode;
44--error ER_WRONG_VALUE_FOR_VAR
45SET @@GLOBAL.csv_mode = FOO;
46
47SET @@GLOBAL.csv_mode = @saved_csv_mode;
048
=== modified file 'mysql-test/t/csv.test'
--- mysql-test/t/csv.test 2010-08-16 06:46:21 +0000
+++ mysql-test/t/csv.test 2014-10-02 09:28:25 +0000
@@ -1899,3 +1899,47 @@
18991899
1900DROP TABLE t1;1900DROP TABLE t1;
1901--echo End of 5.1 tests1901--echo End of 5.1 tests
1902
1903#
1904# Bug #71091 CSV engine does not properly process "", in quotes
1905#
1906
1907SET @old_csv_mode = @@SESSION.csv_mode;
1908SET @@SESSION.csv_mode='ietf_quotes';
1909CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV;
1910
1911INSERT INTO t1 VALUES("a\"b,c","d");
1912INSERT INTO t1 VALUES("d","a\"b,c");
1913INSERT INTO t1 VALUES(",\"a","e");
1914INSERT INTO t1 VALUES("e",",\"a");
1915INSERT INTO t1 VALUES("\"","f");
1916INSERT INTO t1 VALUES("f","\"");
1917INSERT INTO t1 VALUES(",","g");
1918INSERT INTO t1 VALUES("g",",");
1919
1920SELECT * FROM t1;
1921
1922--echo CSV file contents:
1923--cat_file $MYSQLD_DATADIR/test/t1.CSV
1924
1925DROP TABLE t1;
1926
1927CREATE TABLE t1(c1 TEXT NOT NULL, c2 TEXT NOT NULL) ENGINE=CSV;
1928
1929--echo Replacing t1.CSV
1930--remove_file $MYSQLD_DATADIR/test/t1.CSV
1931--write_file $MYSQLD_DATADIR/test/t1.CSV
1932a,b
1933"a","b"
1934"a""b,c","d"
1935"d",",""a"
1936"a"",",e
1937e,""""
1938",",f
1939EOF
1940
1941SELECT * FROM t1;
1942
1943DROP TABLE t1;
1944
1945SET @@SESSION.csv_mode=@old_csv_mode;
19021946
=== modified file 'storage/csv/ha_tina.cc'
--- storage/csv/ha_tina.cc 2012-02-16 09:48:16 +0000
+++ storage/csv/ha_tina.cc 2014-10-02 09:28:25 +0000
@@ -67,6 +67,23 @@
67#define CSN_EXT ".CSN" // Files used during repair and update67#define CSN_EXT ".CSN" // Files used during repair and update
68#define CSM_EXT ".CSM" // Meta file68#define CSM_EXT ".CSM" // Meta file
6969
70enum csv_mode_enum { csv_mode_none= 0, csv_mode_ietf_quotes= 1 };
71
72static const char *csv_mode_names[]=
73{
74 "IETF_QUOTES", NullS
75};
76
77static TYPELIB csv_mode_typelib=
78{
79 array_elements(csv_mode_names) - 1, "",
80 csv_mode_names, NULL
81};
82
83static MYSQL_THDVAR_SET(mode, PLUGIN_VAR_RQCMDARG,
84 "Control CSV parser mode: []: default, ietf_quotes: "
85 "standards-compatible embedded quote and comma parsing",
86 NULL, NULL, 0, &csv_mode_typelib);
7087
71static TINA_SHARE *get_share(const char *table_name, TABLE *table);88static TINA_SHARE *get_share(const char *table_name, TABLE *table);
72static int free_share(TINA_SHARE *share);89static int free_share(TINA_SHARE *share);
@@ -510,7 +527,7 @@
510 char attribute_buffer[1024];527 char attribute_buffer[1024];
511 String attribute(attribute_buffer, sizeof(attribute_buffer),528 String attribute(attribute_buffer, sizeof(attribute_buffer),
512 &my_charset_bin);529 &my_charset_bin);
513530 bool ietf_quotes= THDVAR(current_thd, mode) & csv_mode_ietf_quotes;
514 my_bitmap_map *org_bitmap= dbug_tmp_use_all_columns(table, table->read_set);531 my_bitmap_map *org_bitmap= dbug_tmp_use_all_columns(table, table->read_set);
515 buffer.length(0);532 buffer.length(0);
516533
@@ -546,7 +563,7 @@
546 {563 {
547 if (*ptr == '"')564 if (*ptr == '"')
548 {565 {
549 buffer.append('\\');566 buffer.append(ietf_quotes ? '"' : '\\');
550 buffer.append('"');567 buffer.append('"');
551 }568 }
552 else if (*ptr == '\r')569 else if (*ptr == '\r')
@@ -638,6 +655,7 @@
638 my_bitmap_map *org_bitmap;655 my_bitmap_map *org_bitmap;
639 int error;656 int error;
640 bool read_all;657 bool read_all;
658 bool ietf_quotes= THDVAR(current_thd, mode) & csv_mode_ietf_quotes;
641 DBUG_ENTER("ha_tina::find_current_row");659 DBUG_ENTER("ha_tina::find_current_row");
642660
643 free_root(&blobroot, MYF(0));661 free_root(&blobroot, MYF(0));
@@ -671,8 +689,10 @@
671 a) If end of current field is reached, move689 a) If end of current field is reached, move
672 to next field and jump to step 2.3690 to next field and jump to step 2.3
673 b) If current character is a \\ handle691 b) If current character is a \\ handle
674 \\n, \\r, \\, \\"692 \\n, \\r, \\, and \\" if not in ietf_quotes mode
675 c) else append the current character into the buffer693 c) if in ietf_quotes mode and the current character is
694 a ", handle ""
695 d) else append the current character into the buffer
676 before checking that EOL has not been reached.696 before checking that EOL has not been reached.
677 2.2) If the current character does not begin with a quote697 2.2) If the current character does not begin with a quote
678 2.2.1) Until EOL has not been reached698 2.2.1) Until EOL has not been reached
@@ -713,15 +733,25 @@
713 curr_offset+= 2;733 curr_offset+= 2;
714 break;734 break;
715 }735 }
716 if (curr_char == '\\' && curr_offset != (end_offset - 1))736 if (ietf_quotes && curr_char == '"'
717 {737 && file_buff->get_value(curr_offset + 1) == '"')
738 {
739 /* Embedded IETF quote */
740 curr_offset++;
741 buffer.append('"');
742 }
743 else if (curr_char == '\\' && curr_offset != (end_offset - 1))
744 {
745 /* A quote followed by something else than a comma, end of line, or
746 (in IETF mode) another quote will be handled as a regular
747 character. */
718 curr_offset++;748 curr_offset++;
719 curr_char= file_buff->get_value(curr_offset);749 curr_char= file_buff->get_value(curr_offset);
720 if (curr_char == 'r')750 if (curr_char == 'r')
721 buffer.append('\r');751 buffer.append('\r');
722 else if (curr_char == 'n' )752 else if (curr_char == 'n' )
723 buffer.append('\n');753 buffer.append('\n');
724 else if (curr_char == '\\' || curr_char == '"')754 else if (curr_char == '\\' || (!ietf_quotes && curr_char == '"'))
725 buffer.append(curr_char);755 buffer.append(curr_char);
726 else /* This could only happed with an externally created file */756 else /* This could only happed with an externally created file */
727 {757 {
@@ -1750,6 +1780,11 @@
1750 return COMPATIBLE_DATA_YES;1780 return COMPATIBLE_DATA_YES;
1751}1781}
17521782
1783static struct st_mysql_sys_var* csv_system_variables[]= {
1784 MYSQL_SYSVAR(mode),
1785 NULL
1786};
1787
1753struct st_mysql_storage_engine csv_storage_engine=1788struct st_mysql_storage_engine csv_storage_engine=
1754{ MYSQL_HANDLERTON_INTERFACE_VERSION };1789{ MYSQL_HANDLERTON_INTERFACE_VERSION };
17551790
@@ -1765,7 +1800,7 @@
1765 tina_done_func, /* Plugin Deinit */1800 tina_done_func, /* Plugin Deinit */
1766 0x0100 /* 1.0 */,1801 0x0100 /* 1.0 */,
1767 NULL, /* status variables */1802 NULL, /* status variables */
1768 NULL, /* system variables */1803 csv_system_variables,
1769 NULL, /* config options */1804 NULL, /* config options */
1770 0, /* flags */1805 0, /* flags */
1771}1806}

Subscribers

People subscribed via source and target branches