Merge lp:~paul-lucas/zorba/bug-994610 into lp:zorba

Proposed by Paul J. Lucas
Status: Merged
Approved by: Paul J. Lucas
Approved revision: 10820
Merged at revision: 10829
Proposed branch: lp:~paul-lucas/zorba/bug-994610
Merge into: lp:zorba
Diff against target: 197 lines (+103/-71)
2 files modified
src/util/regex.cpp (+98/-68)
test/rbkt/Queries/CMakeLists.txt (+5/-3)
To merge this branch: bzr merge lp:~paul-lucas/zorba/bug-994610
Reviewer Review Type Date Requested Status
Ghislain Fourny Approve
Paul J. Lucas Approve
Review via email: mp+104769@code.launchpad.net

Commit message

1. Added fix for [\s] -- should now always throw FORX0002.
2. I think all regex tests "pass" using both pre- and post-4.0 of ICU.

Description of the change

1. Added fix for [\s] -- should now always throw FORX0002.
2. I think all regex tests "pass" using both pre- and post-4.0 of ICU.

To post a comment you must log in.
Revision history for this message
Paul J. Lucas (paul-lucas) :
review: Approve
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

Validation queue job bug-994610-2012-05-04T23-48-03.876Z is finished. The final status was:

All tests succeeded!

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

Voting does not meet specified criteria. Required: Approve > 1, Disapprove < 1, Needs Fixing < 1, Pending < 1. Got: 1 Approve, 1 Pending.

Revision history for this message
Ghislain Fourny (gislenius) wrote :

The regex tests seem to now pass on my side.

review: Approve
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

Validation queue job bug-994610-2012-05-08T14-42-54.852Z is finished. The final status was:

All tests succeeded!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/util/regex.cpp'
2--- src/util/regex.cpp 2012-05-03 12:31:51 +0000
3+++ src/util/regex.cpp 2012-05-04 16:10:25 +0000
4@@ -123,74 +123,104 @@
5 continue;
6 }
7 got_backslash = false;
8- switch ( *xq_c ) {
9- case 'c': // NameChar
10- *icu_re += "[" bs_c "]";
11- continue;
12- case 'C': // [^\c]
13- *icu_re += "[^" bs_c "]";
14- continue;
15- case 'i': // initial NameChar
16- *icu_re += "[" bs_i "]";
17- continue;
18- case 'I': // [^\i]
19- *icu_re += "[^" bs_i "]";
20- continue;
21- case '0':
22- case '1':
23- case '2':
24- case '3':
25- case '4':
26- case '5':
27- case '6':
28- case '7':
29- case '8':
30- case '9':
31- backref_no = *xq_c - '0';
32- if ( !backref_no ) // \0 is illegal
33- throw INVALID_RE_EXCEPTION( xq_re, ZED( BackRef0Illegal ) );
34- if ( in_char_class ) {
35- //
36- // XQuery 3.0 F&O 5.6.1: Within a character class expression,
37- // \ followed by a digit is invalid.
38- //
39- throw INVALID_RE_EXCEPTION(
40- xq_re, ZED( BackRefIllegalInCharClass )
41- );
42- }
43- in_backref = true;
44- // no break;
45- case '$':
46- case '(':
47- case ')':
48- case '*':
49- case '+':
50- case '-':
51- case '.':
52- case '?':
53- case 'd': // [0-9]
54- case 'D': // [^\d]
55- case 'n': // newline
56- case 'p': // category escape
57- case 'P': // [^\p]
58- case 'r': // carriage return
59- case 's': // whitespace
60- case 'S': // [^\s]
61- case 't': // tab
62- case 'w': // word char
63- case 'W': // [^\w]
64- case '[':
65- case '\\':
66- case ']':
67- case '^':
68- case '{':
69- case '|':
70- case '}':
71- *icu_re += '\\';
72- break;
73- default:
74- throw INVALID_RE_EXCEPTION( xq_re, ZED( BadRegexEscape_3 ), *xq_c );
75- }
76+
77+ if ( in_char_class ) {
78+ //
79+ // When in a character class, only single-character escapes are
80+ // permitted.
81+ //
82+ switch ( *xq_c ) {
83+ case '(':
84+ case ')':
85+ case '*':
86+ case '+':
87+ case '-':
88+ case '.':
89+ case 'n': // newline
90+ case 'r': // carriage return
91+ case 't': // tab
92+ case '[':
93+ case '\\':
94+ case ']':
95+ case '^':
96+ case '{':
97+ case '|':
98+ case '}':
99+ *icu_re += '\\';
100+ break;
101+ default:
102+ throw INVALID_RE_EXCEPTION( xq_re, ZED( BadRegexEscape_3 ), *xq_c );
103+ }
104+ } else {
105+ switch ( *xq_c ) {
106+ case 'c': // NameChar
107+ *icu_re += "[" bs_c "]";
108+ continue;
109+ case 'C': // [^\c]
110+ *icu_re += "[^" bs_c "]";
111+ continue;
112+ case 'i': // initial NameChar
113+ *icu_re += "[" bs_i "]";
114+ continue;
115+ case 'I': // [^\i]
116+ *icu_re += "[^" bs_i "]";
117+ continue;
118+ case '0':
119+ case '1':
120+ case '2':
121+ case '3':
122+ case '4':
123+ case '5':
124+ case '6':
125+ case '7':
126+ case '8':
127+ case '9':
128+ backref_no = *xq_c - '0';
129+ if ( !backref_no ) // \0 is illegal
130+ throw INVALID_RE_EXCEPTION( xq_re, ZED( BackRef0Illegal ) );
131+ if ( in_char_class ) {
132+ //
133+ // XQuery 3.0 F&O 5.6.1: Within a character class expression,
134+ // \ followed by a digit is invalid.
135+ //
136+ throw INVALID_RE_EXCEPTION(
137+ xq_re, ZED( BackRefIllegalInCharClass )
138+ );
139+ }
140+ in_backref = true;
141+ // no break;
142+ case '$':
143+ case '(':
144+ case ')':
145+ case '*':
146+ case '+':
147+ case '-':
148+ case '.':
149+ case '?':
150+ case 'd': // [0-9]
151+ case 'D': // [^\d]
152+ case 'n': // newline
153+ case 'p': // category escape
154+ case 'P': // [^\p]
155+ case 'r': // carriage return
156+ case 's': // whitespace
157+ case 'S': // [^\s]
158+ case 't': // tab
159+ case 'w': // word char
160+ case 'W': // [^\w]
161+ case '[':
162+ case '\\':
163+ case ']':
164+ case '^':
165+ case '{':
166+ case '|':
167+ case '}':
168+ *icu_re += '\\';
169+ break;
170+ default:
171+ throw INVALID_RE_EXCEPTION( xq_re, ZED( BadRegexEscape_3 ), *xq_c );
172+ }
173+ } // if ( in_char_class )
174 } else {
175 if ( in_backref ) {
176 //
177
178=== modified file 'test/rbkt/Queries/CMakeLists.txt'
179--- test/rbkt/Queries/CMakeLists.txt 2012-05-03 12:31:51 +0000
180+++ test/rbkt/Queries/CMakeLists.txt 2012-05-04 16:10:25 +0000
181@@ -534,11 +534,13 @@
182 EXPECTED_FAILURE(test/rbkt/zorba/http-client/post/post3_binary_element 3391756)
183 IF(NOT ZORBA_NO_ICU)
184 IF ( ${ICU_VERSION} VERSION_LESS 4.0.0 )
185- EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err17 974477)
186+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m40 866874)
187+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m41 866874)
188+ ELSE ( ${ICU_VERSION} VERSION_LESS 4.0.0 )
189+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err10 994610)
190+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err15 866874)
191 ENDIF ( ${ICU_VERSION} VERSION_LESS 4.0.0 )
192 EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m11 866874)
193- EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m40 866874)
194- EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m41 866874)
195 ENDIF(NOT ZORBA_NO_ICU)
196
197 IF(ZORBA_NO_ICU)

Subscribers

People subscribed via source and target branches