1
=== modified file 'CMakeConfiguration.txt'
2
--- CMakeConfiguration.txt	2012-03-28 05:19:57 +0000
3
+++ CMakeConfiguration.txt	2012-04-07 00:45:26 +0000
4
@@ -135,14 +135,14 @@
5
135
SET (ZORBA_DEBUG_STRING ${ZORBA_DEBUG_STRING} CACHE BOOL "debug strings")
135
SET (ZORBA_DEBUG_STRING ${ZORBA_DEBUG_STRING} CACHE BOOL "debug strings")
6
136
MESSAGE (STATUS "ZORBA_DEBUG_STRING:                   " ${ZORBA_DEBUG_STRING})
136
MESSAGE (STATUS "ZORBA_DEBUG_STRING:                   " ${ZORBA_DEBUG_STRING})
7
137
137
10
138
SET(ZORBA_NO_UNICODE OFF CACHE BOOL "disable ICU")
138
SET(ZORBA_NO_ICU OFF CACHE BOOL "disable ICU")
11
139
MESSAGE(STATUS "ZORBA_NO_UNICODE:                     " ${ZORBA_NO_UNICODE})
139
MESSAGE(STATUS "ZORBA_NO_ICU:                         " ${ZORBA_NO_ICU})
12
140
140
14
141
IF (ZORBA_NO_UNICODE)
141
IF (ZORBA_NO_ICU)
15
142
  SET (no_full_text ON)
142
  SET (no_full_text ON)
17
143
ELSE (ZORBA_NO_UNICODE)
143
ELSE (ZORBA_NO_ICU)
18
144
  SET (no_full_text OFF)
144
  SET (no_full_text OFF)
20
145
ENDIF (ZORBA_NO_UNICODE)
145
ENDIF (ZORBA_NO_ICU)
21
146
SET (ZORBA_NO_FULL_TEXT ${no_full_text} CACHE BOOL "disable XQuery Full-Text support")
146
SET (ZORBA_NO_FULL_TEXT ${no_full_text} CACHE BOOL "disable XQuery Full-Text support")
22
147
MESSAGE(STATUS "ZORBA_NO_FULL_TEXT:                   " ${ZORBA_NO_FULL_TEXT})
147
MESSAGE(STATUS "ZORBA_NO_FULL_TEXT:                   " ${ZORBA_NO_FULL_TEXT})
23
148
148
24
149
149
25
=== modified file 'CMakeLists.txt'
26
--- CMakeLists.txt	2012-03-28 05:19:57 +0000
27
+++ CMakeLists.txt	2012-04-07 00:45:26 +0000
28
@@ -123,10 +123,14 @@
29
123
CHECK_TYPE_SIZE("int64_t" ZORBA_HAVE_INT64_T) 
123
CHECK_TYPE_SIZE("int64_t" ZORBA_HAVE_INT64_T) 
30
124
124
31
125
CHECK_CXX_SOURCE_COMPILES ("#include <type_traits>\nint main() { std::enable_if<true,int> x; }" ZORBA_CXX_ENABLE_IF)
125
CHECK_CXX_SOURCE_COMPILES ("#include <type_traits>\nint main() { std::enable_if<true,int> x; }" ZORBA_CXX_ENABLE_IF)
34
126
CHECK_CXX_SOURCE_COMPILES ("int main() { int *p = nullptr; }" ZORBA_CXX_NULLPTR)
126
SET(CMAKE_EXTRA_INCLUDE_FILES wchar.h)
35
127
CHECK_CXX_SOURCE_COMPILES ("int main() { static_assert(1,\"\"); }" ZORBA_CXX_STATIC_ASSERT)
127
CHECK_TYPE_SIZE("wchar_t" ZORBA_SIZEOF_WCHAR_T)
36
128
SET(CMAKE_EXTRA_INCLUDE_FILES)
37
128
CHECK_CXX_SOURCE_COMPILES ("#include <memory>\nint main() { std::unique_ptr<int> p; }" ZORBA_CXX_UNIQUE_PTR)
129
CHECK_CXX_SOURCE_COMPILES ("#include <memory>\nint main() { std::unique_ptr<int> p; }" ZORBA_CXX_UNIQUE_PTR)
38
129
130
39
131
CHECK_CXX_SOURCE_COMPILES("int main() { int *p = nullptr; }" ZORBA_CXX_NULLPTR)
40
132
CHECK_CXX_SOURCE_COMPILES("int main() { static_assert(1,\"\"); }" ZORBA_CXX_STATIC_ASSERT)
41
133
42
130
################################################################################
134
################################################################################
43
131
# Various cmake macros
135
# Various cmake macros
44
132
136
45
133
137
46
=== modified file 'ChangeLog'
47
--- ChangeLog	2012-04-04 15:59:01 +0000
48
+++ ChangeLog	2012-04-07 00:45:26 +0000
49
@@ -4,6 +4,7 @@
50
4
4
51
5
New Features:
5
New Features:
52
6
  * Extended API for Python, Java, PHP and Ruby.
6
  * Extended API for Python, Java, PHP and Ruby.
53
7
  * Added support for NO_ICU (to not use ICU for unicode processing)
54
7
8
55
8
Bug Fixes/Other Changes:
9
Bug Fixes/Other Changes:
56
9
  * Fixed bug #967864 (var substitution did not update theFreeVars property)
10
  * Fixed bug #967864 (var substitution did not update theFreeVars property)
57
@@ -148,7 +149,9 @@
58
148
  * Fixed bug when parsing a document with a base-uri attribute.
149
  * Fixed bug when parsing a document with a base-uri attribute.
59
149
  * Fixed bug #863320 (Sentence is incorrectly incremented when token characters end without sentence terminator)
150
  * Fixed bug #863320 (Sentence is incorrectly incremented when token characters end without sentence terminator)
60
150
  * Fixed bug #863730 (static delete-node* functions don't raise ZDDY0012)
151
  * Fixed bug #863730 (static delete-node* functions don't raise ZDDY0012)
61
152
  * Implemented the probe-index-range-value for general indexes
62
151
  * Removed ZSTR0005 and ZSTR0006 error codes
153
  * Removed ZSTR0005 and ZSTR0006 error codes
63
154
  * Fixed bug #867662 ("nullptr" warning)
64
152
  * Fixed bug #868258 (Assertion failure with two delete collection)
155
  * Fixed bug #868258 (Assertion failure with two delete collection)
65
153
  * Fixed bug #871623 and #871629 (assertion failures with insertions in dynamic collections)
156
  * Fixed bug #871623 and #871629 (assertion failures with insertions in dynamic collections)
66
154
  * Fixed bug #867262 (allow reuse of iterator over ExtFuncArgItemSequence)
157
  * Fixed bug #867262 (allow reuse of iterator over ExtFuncArgItemSequence)
67
@@ -157,6 +160,8 @@
68
157
  * New node-reference module. References can be obtained for any node, and
160
  * New node-reference module. References can be obtained for any node, and
69
158
	different nodes cannot have the same identifier.
161
	different nodes cannot have the same identifier.
70
159
  * Fixed bug #872697  (segmentation fault with validation of NMTOKENS)
162
  * Fixed bug #872697  (segmentation fault with validation of NMTOKENS)
71
163
  * General index cannot be declared as unique if the type of its key is
72
164
    xs:anyAtomicType or xs:untypedAtomic.
73
160
  * Added undo for node revalidation
165
  * Added undo for node revalidation
74
161
  * Optimization for count(collection()) expressions
166
  * Optimization for count(collection()) expressions
75
162
  * Fixed bug #872796  (validate-in-place can interfere with other update primitives)
167
  * Fixed bug #872796  (validate-in-place can interfere with other update primitives)
76
@@ -175,6 +180,8 @@
77
175
  * Fixed bug #855715 (Invalid escaped characters in regex not caught)
180
  * Fixed bug #855715 (Invalid escaped characters in regex not caught)
78
176
  * Fixed bug #862089 (Split binary/xq install directories for modules) by
181
  * Fixed bug #862089 (Split binary/xq install directories for modules) by
79
177
  splitting "module path" into separate URI and Library paths
182
  splitting "module path" into separate URI and Library paths
80
183
  * New node-position module. This module allows to obtain a representation of a node position, which
81
184
    can be used to assess structural relationships with other nodes.   
82
178
  * Fixed bug #872502 (validation of the JSON module xqdoc fails)
185
  * Fixed bug #872502 (validation of the JSON module xqdoc fails)
83
179
  * Fixed bug #897619 (testdriver_mt can not run the XQueryX tests)
186
  * Fixed bug #897619 (testdriver_mt can not run the XQueryX tests)
84
180
  * Fixed bug #867107 (xqdoc dependency to zorba is wrong)
187
  * Fixed bug #867107 (xqdoc dependency to zorba is wrong)
85
181
188
86
=== modified file 'KNOWN_ISSUES.txt'
87
--- KNOWN_ISSUES.txt	2012-03-28 05:19:57 +0000
88
+++ KNOWN_ISSUES.txt	2012-04-07 00:45:26 +0000
89
@@ -37,7 +37,7 @@
90
37
* The serializer currently doesn't implement character maps as specified
37
* The serializer currently doesn't implement character maps as specified
91
38
  (http://www.w3.org/TR/xslt-xquery-serialization/#character-maps)
38
  (http://www.w3.org/TR/xslt-xquery-serialization/#character-maps)
92
39
39
94
40
* In the 2.0 release, setting the CMake variables ZORBA_NO_UNICODE to
40
* In the 2.0 release, setting the CMake variables ZORBA_NO_ICU to
95
41
  ON is not supported.
41
  ON is not supported.
96
42
42
97
43
* The PHP language binding is not supported on Mac OS X. For details,
43
* The PHP language binding is not supported on Mac OS X. For details,
98
44
44
99
=== modified file 'doc/cxx/examples/context.cpp'
100
--- doc/cxx/examples/context.cpp	2012-03-28 05:19:57 +0000
101
+++ doc/cxx/examples/context.cpp	2012-04-07 00:45:26 +0000
102
@@ -149,7 +149,11 @@
103
149
    outStream2 << lQuery << std::endl;
149
    outStream2 << lQuery << std::endl;
104
150
    std::cout << outStream2.str() << std::endl;
150
    std::cout << outStream2.str() << std::endl;
105
151
151
106
152
#ifndef ZORBA_NO_ICU
107
152
    if (outStream2.str() != "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\nBook 1.1\n")
153
    if (outStream2.str() != "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\nBook 1.1\n")
108
154
#else
109
155
    if (outStream2.str() != "<?xml version=\"1.0\"?>\nBook 1.1\n")
110
156
#endif /* ZORBA_NO_ICU */
111
153
    {
157
    {
112
154
      std::cerr << "Test 4 failed with a wrong result : " << std::endl
158
      std::cerr << "Test 4 failed with a wrong result : " << std::endl
113
155
                << outStream2.str() << std::endl;
159
                << outStream2.str() << std::endl;
114
156
160
115
=== modified file 'include/zorba/config.h.cmake'
116
--- include/zorba/config.h.cmake	2012-03-28 05:19:57 +0000
117
+++ include/zorba/config.h.cmake	2012-04-07 00:45:26 +0000
118
@@ -96,6 +96,8 @@
119
96
typedef __int64 int64_t;
96
typedef __int64 int64_t;
120
97
#endif /* ZORBA_HAVE_INT64_T */
97
#endif /* ZORBA_HAVE_INT64_T */
121
98
98
122
99
#cmakedefine ZORBA_SIZEOF_WCHAR_T @ZORBA_SIZEOF_WCHAR_T@
123
100
124
99
// Compiler
101
// Compiler
125
100
#cmakedefine CLANG
102
#cmakedefine CLANG
126
101
#cmakedefine MSVC
103
#cmakedefine MSVC
127
@@ -148,7 +150,7 @@
128
148
150
129
149
// Zorba features
151
// Zorba features
130
150
#cmakedefine ZORBA_NO_FULL_TEXT
152
#cmakedefine ZORBA_NO_FULL_TEXT
132
151
#cmakedefine ZORBA_NO_UNICODE
153
#cmakedefine ZORBA_NO_ICU
133
152
#cmakedefine ZORBA_NO_XMLSCHEMA
154
#cmakedefine ZORBA_NO_XMLSCHEMA
134
153
#cmakedefine ZORBA_NUMERIC_OPTIMIZATION
155
#cmakedefine ZORBA_NUMERIC_OPTIMIZATION
135
154
#cmakedefine ZORBA_VERIFY_PEER_SSL_CERTIFICATE
156
#cmakedefine ZORBA_VERIFY_PEER_SSL_CERTIFICATE
136
155
157
137
=== modified file 'include/zorba/static_context.h'
138
--- include/zorba/static_context.h	2012-03-28 05:19:57 +0000
139
+++ include/zorba/static_context.h	2012-04-07 00:45:26 +0000
140
@@ -26,9 +26,13 @@
141
26
#include <zorba/function.h>
26
#include <zorba/function.h>
142
27
#include <zorba/annotation.h>
27
#include <zorba/annotation.h>
143
28
#include <zorba/smart_ptr.h>
28
#include <zorba/smart_ptr.h>
144
29
#include <zorba/smart_ptr.h>
145
29
#ifndef ZORBA_NO_FULL_TEXT
30
#ifndef ZORBA_NO_FULL_TEXT
146
30
#include <zorba/thesaurus.h>
31
#include <zorba/thesaurus.h>
147
31
#endif /* ZORBA_NO_FULL_TEXT */
32
#endif /* ZORBA_NO_FULL_TEXT */
148
33
#include <zorba/zorba.h>
149
34
#include <zorba/store_manager.h>
150
35
#include <zorba/zorba_exception.h>
151
32
36
152
33
namespace zorba {
37
namespace zorba {
153
34
38
154
35
39
155
=== modified file 'include/zorba/util/time.h'
156
--- include/zorba/util/time.h	2012-03-28 05:19:57 +0000
157
+++ include/zorba/util/time.h	2012-04-07 00:45:26 +0000
158
@@ -178,7 +178,7 @@
159
178
	
178
	
160
179
    inline long get_walltime_in_millis(const walltime& t)
179
    inline long get_walltime_in_millis(const walltime& t)
161
180
    {
180
    {
163
181
      return t.time * 1000 + t.millitm;
181
      return (long)(t.time * 1000 + t.millitm);
164
182
    }
182
    }
165
183
183
166
184
#else /* not Windows, and no clock_gettime() */
184
#else /* not Windows, and no clock_gettime() */
167
185
185
168
=== modified file 'src/CMakeLists.txt'
169
--- src/CMakeLists.txt	2012-03-28 05:19:57 +0000
170
+++ src/CMakeLists.txt	2012-04-07 00:45:26 +0000
171
@@ -59,7 +59,10 @@
172
59
#
59
#
173
60
# Next, add the files to be compiled into the library
60
# Next, add the files to be compiled into the library
174
61
#
61
#
175
62
176
63
MESSAGE(STATUS  "PRECOMPILED HEADERS: " ${ZORBA_PRECOMPILED_HEADERS})
177
62
SET(ZORBA_PRECOMPILED_HEADERS OFF CACHE BOOL "Activate Zorba precompiled headers.")
64
SET(ZORBA_PRECOMPILED_HEADERS OFF CACHE BOOL "Activate Zorba precompiled headers.")
178
65
MESSAGE(STATUS  "PRECOMPILED HEADERS: " ${ZORBA_PRECOMPILED_HEADERS})
179
63
66
180
64
SET(ZORBA_SRCS)
67
SET(ZORBA_SRCS)
181
65
ADD_SRC_SUBFOLDER(ZORBA_SRCS api API_SRCS)
68
ADD_SRC_SUBFOLDER(ZORBA_SRCS api API_SRCS)
182
@@ -97,6 +100,7 @@
183
97
ENDIF(ZORBA_WITH_DEBUGGER)
100
ENDIF(ZORBA_WITH_DEBUGGER)
184
98
ADD_SRC_SUBFOLDER(ZORBA_SRCS unit_tests UNIT_TEST_SRCS)
101
ADD_SRC_SUBFOLDER(ZORBA_SRCS unit_tests UNIT_TEST_SRCS)
185
99
102
186
103
MESSAGE(STATUS  "PRECOMPILED HEADERS: " ${ZORBA_PRECOMPILED_HEADERS})
187
100
IF(ZORBA_PRECOMPILED_HEADERS)
104
IF(ZORBA_PRECOMPILED_HEADERS)
188
101
  ADD_SRC_SUBFOLDER(ZORBA_SRCS precompiled ZORBAMISC_SRCS)
105
  ADD_SRC_SUBFOLDER(ZORBA_SRCS precompiled ZORBAMISC_SRCS)
189
102
  INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/src/precompiled")
106
  INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/src/precompiled")
190
103
107
191
=== modified file 'src/api/serialization/serializer.cpp'
192
--- src/api/serialization/serializer.cpp	2012-03-28 05:19:57 +0000
193
+++ src/api/serialization/serializer.cpp	2012-04-07 00:45:26 +0000
194
@@ -180,7 +180,6 @@
195
180
  for (; chars < chars_end; chars++ )
180
  for (; chars < chars_end; chars++ )
196
181
  {
181
  {
197
182
182
198
183
#ifndef ZORBA_NO_UNICODE
199
184
    // the input string is UTF-8
183
    // the input string is UTF-8
200
185
    int char_length = utf8::char_length(*chars);
184
    int char_length = utf8::char_length(*chars);
201
186
    if (char_length == 0)
185
    if (char_length == 0)
202
@@ -217,7 +216,6 @@
203
217
216
204
218
      continue;
217
      continue;
205
219
    }
218
    }
206
220
#endif//ZORBA_NO_UNICODE
207
221
219
208
222
    // raise an error iff (1) the serialization format is XML 1.0 and (2) the given character is an invalid XML 1.0 character
220
    // raise an error iff (1) the serialization format is XML 1.0 and (2) the given character is an invalid XML 1.0 character
209
223
    if (ser && ser->method == PARAMETER_VALUE_XML &&
221
    if (ser && ser->method == PARAMETER_VALUE_XML &&
210
@@ -332,14 +330,12 @@
211
332
    {
330
    {
212
333
      tr << (char)0xEF << (char)0xBB << (char)0xBF;
331
      tr << (char)0xEF << (char)0xBB << (char)0xBF;
213
334
    }
332
    }
214
335
#ifndef ZORBA_NO_UNICODE
215
336
    else if (ser->encoding == PARAMETER_VALUE_UTF_16)
333
    else if (ser->encoding == PARAMETER_VALUE_UTF_16)
216
337
    {
334
    {
217
338
      // Little-endian
335
      // Little-endian
218
339
      tr.verbatim((char)0xFF);
336
      tr.verbatim((char)0xFF);
219
340
      tr.verbatim((char)0xFE);
337
      tr.verbatim((char)0xFE);
220
341
    }
338
    }
221
342
#endif
222
343
  }
339
  }
223
344
}
340
}
224
345
341
225
@@ -862,13 +858,17 @@
226
862
  emitter::emit_declaration();
858
  emitter::emit_declaration();
227
863
859
228
864
  if (ser->omit_xml_declaration == PARAMETER_VALUE_NO) {
860
  if (ser->omit_xml_declaration == PARAMETER_VALUE_NO) {
236
865
    tr << "<?xml version=\"" << ser->version << "\" encoding=\"";
861
    tr << "<?xml version=\"" << ser->version;
237
866
    if (ser->encoding == PARAMETER_VALUE_UTF_8) {
862
    switch (ser->encoding) {
238
867
      tr << "UTF-8";
863
      case PARAMETER_VALUE_UTF_8:
239
868
#ifndef ZORBA_NO_UNICODE
864
      case PARAMETER_VALUE_UTF_16:
240
869
    } else if (ser->encoding == PARAMETER_VALUE_UTF_16) {
865
        tr << "\" encoding=\"";
241
870
      tr << "UTF-16";
866
        switch (ser->encoding) {
242
871
#endif
867
          case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
243
868
          case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
244
869
          default                    : ZORBA_ASSERT(false);
245
870
        }
246
871
        break;
247
872
    }
872
    }
248
873
    tr << "\"";
873
    tr << "\"";
249
874
874
250
@@ -1174,14 +1174,18 @@
251
1174
      }
1174
      }
252
1175
1175
253
1176
      tr << "<meta http-equiv=\"content-type\" content=\""
1176
      tr << "<meta http-equiv=\"content-type\" content=\""
262
1177
         << ser->media_type << "; charset=";
1177
         << ser->media_type;
263
1178
1178
      switch (ser->encoding) {
264
1179
      if (ser->encoding == PARAMETER_VALUE_UTF_8)
1179
        case PARAMETER_VALUE_UTF_8:
265
1180
        tr << "UTF-8";
1180
        case PARAMETER_VALUE_UTF_16:
266
1181
#ifndef ZORBA_NO_UNICODE
1181
          tr << "\" charset=\"";
267
1182
      else if (ser->encoding == PARAMETER_VALUE_UTF_16)
1182
          switch (ser->encoding) {
268
1183
        tr << "UTF-16";
1183
            case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
269
1184
#endif
1184
            case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
270
1185
            default                    : ZORBA_ASSERT(false);
271
1186
          }
272
1187
          break;
273
1188
      }
274
1185
      tr << "\"";
1189
      tr << "\"";
275
1186
      // closed_parent_tag = 1;
1190
      // closed_parent_tag = 1;
276
1187
    }
1191
    }
277
@@ -1371,14 +1375,18 @@
278
1371
        }
1375
        }
279
1372
1376
280
1373
        tr << "<meta http-equiv=\"content-type\" content=\""
1377
        tr << "<meta http-equiv=\"content-type\" content=\""
289
1374
           << ser->media_type << "; charset=";
1378
           << ser->media_type;
290
1375
1379
        switch (ser->encoding) {
291
1376
        if (ser->encoding == PARAMETER_VALUE_UTF_8)
1380
          case PARAMETER_VALUE_UTF_8:
292
1377
          tr << "UTF-8";
1381
          case PARAMETER_VALUE_UTF_16:
293
1378
#ifndef ZORBA_NO_UNICODE
1382
            tr << "\" charset=\"";
294
1379
        else if (ser->encoding == PARAMETER_VALUE_UTF_16)
1383
            switch (ser->encoding) {
295
1380
          tr << "UTF-16";
1384
              case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
296
1381
#endif
1385
              case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
297
1386
              default                    : ZORBA_ASSERT(false);
298
1387
            }
299
1388
            break;
300
1389
        }
301
1382
        tr << "\"/";
1390
        tr << "\"/";
302
1383
        //closed_parent_tag = 1;
1391
        //closed_parent_tag = 1;
303
1384
      }
1392
      }
304
@@ -2098,10 +2106,8 @@
305
2098
  {
2106
  {
306
2099
    if (!strcmp(aValue, "UTF-8"))
2107
    if (!strcmp(aValue, "UTF-8"))
307
2100
      encoding = PARAMETER_VALUE_UTF_8;
2108
      encoding = PARAMETER_VALUE_UTF_8;
308
2101
#ifndef ZORBA_NO_UNICODE
309
2102
    else if (!strcmp(aValue, "UTF-16"))
2109
    else if (!strcmp(aValue, "UTF-16"))
310
2103
      encoding = PARAMETER_VALUE_UTF_16;
2110
      encoding = PARAMETER_VALUE_UTF_16;
311
2104
#endif
312
2105
    else
2111
    else
313
2106
      throw XQUERY_EXCEPTION(
2112
      throw XQUERY_EXCEPTION(
314
2107
        err::SEPM0016, ERROR_PARAMS( aValue, aName, ZED( GoodValuesAreUTF8 ) )
2113
        err::SEPM0016, ERROR_PARAMS( aValue, aName, ZED( GoodValuesAreUTF8 ) )
315
@@ -2210,16 +2216,13 @@
316
2210
  {
2216
  {
317
2211
    tr = new transcoder(os, false);
2217
    tr = new transcoder(os, false);
318
2212
  }
2218
  }
319
2213
#ifndef ZORBA_NO_UNICODE
320
2214
  else if (encoding == PARAMETER_VALUE_UTF_16)
2219
  else if (encoding == PARAMETER_VALUE_UTF_16)
321
2215
  {
2220
  {
322
2216
    tr = new transcoder(os, true);
2221
    tr = new transcoder(os, true);
323
2217
  }
2222
  }
324
2218
#endif
325
2219
  else
2223
  else
326
2220
  {
2224
  {
329
2221
    ZORBA_ASSERT(0);
2225
    ZORBA_ASSERT(false);
328
2222
    return false;
330
2223
  }
2226
  }
331
2224
2227
332
2225
  if (method == PARAMETER_VALUE_XML)
2228
  if (method == PARAMETER_VALUE_XML)
333
2226
2229
334
=== modified file 'src/api/serialization/serializer.h'
335
--- src/api/serialization/serializer.h	2012-03-28 05:19:57 +0000
336
+++ src/api/serialization/serializer.h	2012-04-07 00:45:26 +0000
337
@@ -70,10 +70,8 @@
338
70
    PARAMETER_VALUE_TEXT,
70
    PARAMETER_VALUE_TEXT,
339
71
    PARAMETER_VALUE_BINARY,
71
    PARAMETER_VALUE_BINARY,
340
72
72
345
73
    PARAMETER_VALUE_UTF_8
73
    PARAMETER_VALUE_UTF_8,
346
74
#ifndef ZORBA_NO_UNICODE
74
    PARAMETER_VALUE_UTF_16
343
75
    ,PARAMETER_VALUE_UTF_16
344
76
#endif
347
77
  } PARAMETER_VALUE_TYPE;
75
  } PARAMETER_VALUE_TYPE;
348
78
76
349
79
protected:
77
protected:
350
80
78
351
=== modified file 'src/diagnostics/diagnostic_en.xml'
352
--- src/diagnostics/diagnostic_en.xml	2012-03-28 05:19:57 +0000
353
+++ src/diagnostics/diagnostic_en.xml	2012-04-07 00:45:26 +0000
354
@@ -2517,11 +2517,11 @@
355
2517
      <value>attribute node</value>
2517
      <value>attribute node</value>
356
2518
    </entry>
2518
    </entry>
357
2519
2519
359
2520
    <entry key="BackRef0Illegal">
2520
    <entry key="BackRef0Illegal" if="!defined(ZORBA_NO_ICU)">
360
2521
      <value>"0": illegal backreference</value>
2521
      <value>"0": illegal backreference</value>
361
2522
    </entry>
2522
    </entry>
362
2523
2523
364
2524
    <entry key="BackRefIllegalInCharClass">
2524
    <entry key="BackRefIllegalInCharClass" if="!defined(ZORBA_NO_ICU)">
365
2525
      <value>backreference illegal in character class</value>
2525
      <value>backreference illegal in character class</value>
366
2526
    </entry>
2526
    </entry>
367
2527
2527
368
@@ -2569,7 +2569,7 @@
369
2569
      <value>invalid library module</value>
2569
      <value>invalid library module</value>
370
2570
    </entry>
2570
    </entry>
371
2571
2571
373
2572
    <entry key="BadRegexEscape_3">
2572
    <entry key="BadRegexEscape_3" if="!defined(ZORBA_NO_ICU)">
374
2573
      <value>"$3": illegal escape character</value>
2573
      <value>"$3": illegal escape character</value>
375
2574
    </entry>
2574
    </entry>
376
2575
2575
377
@@ -3029,7 +3029,7 @@
378
3029
      <value>nodeid component too big for encoding</value>
3029
      <value>nodeid component too big for encoding</value>
379
3030
    </entry>
3030
    </entry>
380
3031
3031
382
3032
    <entry key="NonClosedBackRef_3">
3032
    <entry key="NonClosedBackRef_3" if="!defined(ZORBA_NO_ICU)">
383
3033
      <value>'$$3': non-closed backreference</value>
3033
      <value>'$$3': non-closed backreference</value>
384
3034
    </entry>
3034
    </entry>
385
3035
3035
386
@@ -3041,7 +3041,7 @@
387
3041
      <value>non-localhost authority</value>
3041
      <value>non-localhost authority</value>
388
3042
    </entry>
3042
    </entry>
389
3043
3043
391
3044
    <entry key="NonexistentBackRef_3">
3044
    <entry key="NonexistentBackRef_3" if="!defined(ZORBA_NO_ICU)">
392
3045
      <value>'$$3': non-existent backreference</value>
3045
      <value>'$$3': non-existent backreference</value>
393
3046
    </entry>
3046
    </entry>
394
3047
3047
395
@@ -3193,94 +3193,183 @@
396
3193
      <value>item type is not a subtype of "$3"</value>
3193
      <value>item type is not a subtype of "$3"</value>
397
3194
    </entry>
3194
    </entry>
398
3195
3195
400
3196
    <entry key="U_REGEX_BAD_ESCAPE_SEQUENCE" if="!defined(ZORBA_NO_UNICODE)">
3196
    <entry key="U_REGEX_BAD_ESCAPE_SEQUENCE" if="!defined(ZORBA_NO_ICU)">
401
3197
      <value>unrecognized backslash escape sequence</value>
3197
      <value>unrecognized backslash escape sequence</value>
402
3198
    </entry>
3198
    </entry>
403
3199
3199
405
3200
    <entry key="U_REGEX_BAD_INTERVAL" if="!defined(ZORBA_NO_UNICODE)">
3200
    <entry key="U_REGEX_BAD_INTERVAL" if="!defined(ZORBA_NO_ICU)">
406
3201
      <value>error in {min,max} interval</value>
3201
      <value>error in {min,max} interval</value>
407
3202
    </entry>
3202
    </entry>
408
3203
3203
410
3204
    <entry key="U_REGEX_INTERNAL_ERROR" if="!defined(ZORBA_NO_UNICODE)">
3204
    <entry key="U_REGEX_INTERNAL_ERROR" if="!defined(ZORBA_NO_ICU)">
411
3205
      <value>an internal ICU error (bug) was detected</value>
3205
      <value>an internal ICU error (bug) was detected</value>
412
3206
    </entry>
3206
    </entry>
413
3207
3207
415
3208
    <entry key="U_REGEX_INVALID_BACK_REF" if="!defined(ZORBA_NO_UNICODE)">
3208
    <entry key="U_REGEX_INVALID_BACK_REF" if="!defined(ZORBA_NO_ICU)">
416
3209
      <value>backreference to a non-existent capture group</value>
3209
      <value>backreference to a non-existent capture group</value>
417
3210
    </entry>
3210
    </entry>
418
3211
3211
420
3212
    <entry key="U_REGEX_INVALID_FLAG" if="!defined(ZORBA_NO_UNICODE)">
3212
    <entry key="U_REGEX_INVALID_FLAG" if="!defined(ZORBA_NO_ICU)">
421
3213
      <value>invalid value for match mode flags</value>
3213
      <value>invalid value for match mode flags</value>
422
3214
    </entry>
3214
    </entry>
423
3215
3215
425
3216
    <entry key="U_REGEX_INVALID_RANGE" if="!defined(ZORBA_NO_UNICODE)">
3216
    <entry key="U_REGEX_INVALID_RANGE" if="!defined(ZORBA_NO_ICU)">
426
3217
      <value>in character range [x-y], x is greater than y</value>
3217
      <value>in character range [x-y], x is greater than y</value>
427
3218
    </entry>
3218
    </entry>
428
3219
3219
430
3220
    <entry key="U_REGEX_INVALID_STATE" if="!defined(ZORBA_NO_UNICODE)">
3220
    <entry key="U_REGEX_INVALID_STATE" if="!defined(ZORBA_NO_ICU)">
431
3221
      <value>RegexMatcher in invalid state for requested operation</value>
3221
      <value>RegexMatcher in invalid state for requested operation</value>
432
3222
    </entry>
3222
    </entry>
433
3223
3223
435
3224
    <entry key="U_REGEX_LOOK_BEHIND_LIMIT" if="!defined(ZORBA_NO_UNICODE)">
3224
    <entry key="U_REGEX_LOOK_BEHIND_LIMIT" if="!defined(ZORBA_NO_ICU)">
436
3225
      <value>look-behind pattern matches must have a bounded maximum length</value>
3225
      <value>look-behind pattern matches must have a bounded maximum length</value>
437
3226
    </entry>
3226
    </entry>
438
3227
3227
440
3228
    <entry key="U_REGEX_MAX_LT_MIN" if="!defined(ZORBA_NO_UNICODE)">
3228
    <entry key="U_REGEX_MAX_LT_MIN" if="!defined(ZORBA_NO_ICU)">
441
3229
      <value>in {min,max}, max is less than min</value>
3229
      <value>in {min,max}, max is less than min</value>
442
3230
    </entry>
3230
    </entry>
443
3231
3231
445
3232
    <entry key="U_REGEX_MISMATCHED_PAREN" if="!defined(ZORBA_NO_UNICODE)">
3232
    <entry key="U_REGEX_MISMATCHED_PAREN" if="!defined(ZORBA_NO_ICU)">
446
3233
      <value>incorrectly nested parentheses</value>
3233
      <value>incorrectly nested parentheses</value>
447
3234
    </entry>
3234
    </entry>
448
3235
3235
450
3236
    <entry key="U_REGEX_MISSING_CLOSE_BRACKET" if="!defined(ZORBA_NO_UNICODE)">
3236
    <entry key="U_REGEX_MISSING_CLOSE_BRACKET" if="!defined(ZORBA_NO_ICU)">
451
3237
      <value>missing ']'</value>
3237
      <value>missing ']'</value>
452
3238
    </entry>
3238
    </entry>
453
3239
3239
455
3240
    <entry key="U_REGEX_NUMBER_TOO_BIG" if="!defined(ZORBA_NO_UNICODE)">
3240
    <entry key="U_REGEX_NUMBER_TOO_BIG" if="!defined(ZORBA_NO_ICU)">
456
3241
      <value>decimal number is too large</value>
3241
      <value>decimal number is too large</value>
457
3242
    </entry>
3242
    </entry>
458
3243
3243
460
3244
    <entry key="U_REGEX_OCTAL_TOO_BIG" if="!defined(ZORBA_NO_UNICODE)">
3244
    <entry key="U_REGEX_OCTAL_TOO_BIG" if="!defined(ZORBA_NO_ICU)">
461
3245
      <value>octal character constants must be &lt;= 0377</value>
3245
      <value>octal character constants must be &lt;= 0377</value>
462
3246
    </entry>
3246
    </entry>
463
3247
3247
465
3248
    <entry key="U_REGEX_PROPERTY_SYNTAX" if="!defined(ZORBA_NO_UNICODE)">
3248
    <entry key="U_REGEX_PROPERTY_SYNTAX" if="!defined(ZORBA_NO_ICU)">
466
3249
      <value>incorrect Unicode property</value>
3249
      <value>incorrect Unicode property</value>
467
3250
    </entry>
3250
    </entry>
468
3251
3251
470
3252
    <entry key="U_REGEX_RULE_SYNTAX" if="!defined(ZORBA_NO_UNICODE)">
3252
    <entry key="U_REGEX_RULE_SYNTAX" if="!defined(ZORBA_NO_ICU)">
471
3253
      <value>syntax error</value>
3253
      <value>syntax error</value>
472
3254
    </entry>
3254
    </entry>
473
3255
3255
475
3256
    <entry key="U_REGEX_SET_CONTAINS_STRING" if="!defined(ZORBA_NO_UNICODE)">
3256
    <entry key="U_REGEX_SET_CONTAINS_STRING" if="!defined(ZORBA_NO_ICU)">
476
3257
      <value>can not have UnicodeSets containing strings</value>
3257
      <value>can not have UnicodeSets containing strings</value>
477
3258
    </entry>
3258
    </entry>
478
3259
3259
480
3260
    <entry key="U_REGEX_STACK_OVERFLOW" if="!defined(ZORBA_NO_UNICODE)">
3260
    <entry key="U_REGEX_STACK_OVERFLOW" if="!defined(ZORBA_NO_ICU)">
481
3261
      <value>backtrack stack overflow</value>
3261
      <value>backtrack stack overflow</value>
482
3262
    </entry>
3262
    </entry>
483
3263
3263
485
3264
    <entry key="U_REGEX_STOPPED_BY_CALLER" if="!defined(ZORBA_NO_UNICODE)">
3264
    <entry key="U_REGEX_STOPPED_BY_CALLER" if="!defined(ZORBA_NO_ICU)">
486
3265
      <value>matching operation aborted by user callback fn</value>
3265
      <value>matching operation aborted by user callback fn</value>
487
3266
    </entry>
3266
    </entry>
488
3267
3267
490
3268
    <entry key="U_REGEX_TIME_OUT" if="!defined(ZORBA_NO_UNICODE)">
3268
    <entry key="U_REGEX_TIME_OUT" if="!defined(ZORBA_NO_ICU)">
491
3269
      <value>maximum allowed match time exceeded</value>
3269
      <value>maximum allowed match time exceeded</value>
492
3270
    </entry>
3270
    </entry>
493
3271
3271
496
3272
    <entry key="U_REGEX_UNIMPLEMENTED" if="!defined(ZORBA_NO_UNICODE)">
3272
    <entry key="U_REGEX_UNIMPLEMENTED" if="!defined(ZORBA_NO_ICU)">
497
3273
      <value>use of regular expression feature that is not yet implemented</value>
3273
      <value>use of regular expression feature that is not yet implemented</value>
498
3274
    </entry>
499
3275
500
3276
    <!-- Regex Ascii error messages-->
501
3277
    <entry key="REGEX_UNIMPLEMENTED" if="defined(ZORBA_NO_ICU)">
502
3278
      <value>use of regular expression feature that is not yet implemented</value>
503
3279
    </entry>
504
3280
505
3281
    <entry key="REGEX_MISMATCHED_PAREN" if="defined(ZORBA_NO_ICU)">
506
3282
      <value>incorrectly nested parentheses</value>
507
3283
    </entry>
508
3284
509
3285
    <entry key="REGEX_BROKEN_P_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
510
3286
      <value>broken \\p construct</value>
511
3287
    </entry>
512
3288
513
3289
    <entry key="REGEX_UNKNOWN_PL_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
514
3290
      <value>unknown \\p{L?} category; supported categories: L, Lu, Ll, Lt, Lm, Lo</value>
515
3291
    </entry>
516
3292
517
3293
    <entry key="REGEX_UNKNOWN_PM_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
518
3294
      <value>unknown \\p{M?} category; supported categories: M, Mn, Mc, Me</value>
519
3295
    </entry>
520
3296
521
3297
    <entry key="REGEX_UNKNOWN_PN_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
522
3298
      <value>unknown \\p{N?} category; supported categories: N, Nd, Nl, No</value>
523
3299
    </entry>
524
3300
525
3301
    <entry key="REGEX_UNKNOWN_PP_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
526
3302
      <value>unknown \\p{P?} category; supported categories: P, Pc, Pd, Ps, Pe, Pi, Pf, Po</value>
527
3303
    </entry>
528
3304
529
3305
    <entry key="REGEX_UNKNOWN_PZ_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
530
3306
      <value>unknown \\p{Z?} category; supported categories: Z, Zs, Zl, Zp</value>
531
3307
    </entry>
532
3308
533
3309
    <entry key="REGEX_UNKNOWN_PS_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
534
3310
      <value>unknown \\p{S?} category; supported categories: S, Sm, Sc, Sk, So</value>
535
3311
    </entry>
536
3312
537
3313
    <entry key="REGEX_UNKNOWN_PC_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
538
3314
      <value>unknown \\p{C?} category; supported categories: C, Cc, Cf, Co, Cn(for not assigned)</value>
539
3315
    </entry>
540
3316
541
3317
    <entry key="REGEX_BROKEN_PIs_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
542
3318
      <value>broken \\p{Is} construct; valid characters are [a-zA-Z0-9-]</value>
543
3319
    </entry>
544
3320
545
3321
    <entry key="REGEX_UNKNOWN_PIs_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
546
3322
      <value>unknown \\p{Is} category block; see supported block escapes here: http://www.w3.org/TR/xmlschema-2/#charcter-classes</value>
547
3323
    </entry>
548
3324
549
3325
    <entry key="REGEX_INVALID_UNICODE_CODEPOINT_u" if="defined(ZORBA_NO_ICU)">
550
3326
      <value>invalid unicode hex, should be in form \\uXXXX or \\UXXXXXXXX</value>
551
3327
    </entry>
552
3328
553
3329
    <entry key="REGEX_UNKNOWN_ESC_CHAR" if="defined(ZORBA_NO_ICU)">
554
3330
      <value>unknown \\? escape char; supported escapes are: \\[nrt\\|.?*+(){}[]-^$] for char escapes, \\[pP] for categories and \\[sSiIcCdDwW] for multichar groups</value>
555
3331
    </entry>
556
3332
557
3333
    <entry key="REGEX_INVALID_BACK_REF" if="defined(ZORBA_NO_ICU)">
558
3334
      <value>\\$3 backreference to a non-existent capture group ($4 groups so far)</value>
559
3335
    </entry>
560
3336
561
3337
    <entry key="REGEX_INVALID_ATOM_CHAR" if="defined(ZORBA_NO_ICU)">
562
3338
      <value>'$3': invalid character for an atom; forbidden characters are: [{}?*+|^]</value>
563
3339
    </entry>
564
3340
565
3341
    <entry key="REGEX_INVALID_SUBCLASS" if="defined(ZORBA_NO_ICU)">
566
3342
      <value>malformed class subtraction</value>
567
3343
    </entry>
568
3344
569
3345
    <entry key="REGEX_INVALID_USE_OF_SUBCLASS" if="defined(ZORBA_NO_ICU)">
570
3346
      <value>improper use of class subtraction: it must be the last construct in a class group [xxx-[yyy]]</value>
571
3347
    </entry>
572
3348
573
3349
    <entry key="REGEX_MULTICHAR_IN_CHAR_RANGE" if="defined(ZORBA_NO_ICU)">
574
3350
      <value>multichars or char categories cannot be part of a char range</value>
575
3351
    </entry>
576
3352
577
3353
    <entry key="REGEX_MISSING_CLOSE_BRACKET" if="defined(ZORBA_NO_ICU)">
578
3354
      <value>missing ']' in character group</value>
579
3355
    </entry>
580
3356
581
3357
    <entry key="REGEX_MAX_LT_MIN" if="defined(ZORBA_NO_ICU)">
582
3358
      <value>in {min,max}, max is less than min</value>
583
3274
    </entry>
3359
    </entry>
584
3275
3360
585
3276
    <entry key="UnaryArithOp">
3361
    <entry key="UnaryArithOp">
586
3277
      <value>unary arithmetic operator</value>
3362
      <value>unary arithmetic operator</value>
587
3278
    </entry>
3363
    </entry>
588
3279
3364
590
3280
    <entry key="UnbalancedChar_3">
3365
    <entry key="UnbalancedChar_3" if="!defined(ZORBA_NO_ICU)">
591
3281
      <value>missing '$3'</value>
3366
      <value>missing '$3'</value>
592
3282
    </entry>
3367
    </entry>
593
3283
3368
594
3369
    <entry key="UnescapedChar_3" if="!defined(ZORBA_NO_ICU)">
595
3370
      <value>character '$3' must be escaped here</value>
596
3371
    </entry>
597
3372
598
3284
    <entry key="UnexpectedElement">
3373
    <entry key="UnexpectedElement">
599
3285
      <value>unexpected element</value>
3374
      <value>unexpected element</value>
600
3286
    </entry>
3375
    </entry>
601
3287
3376
602
=== modified file 'src/diagnostics/pregenerated/dict_en.cpp'
603
--- src/diagnostics/pregenerated/dict_en.cpp	2012-03-28 05:19:57 +0000
604
+++ src/diagnostics/pregenerated/dict_en.cpp	2012-04-07 00:45:26 +0000
605
@@ -437,8 +437,12 @@
606
437
  { "~AtomizationOfGroupByMakesMoreThanOneItem", "atomization of groupby variable produces more than one item" },
437
  { "~AtomizationOfGroupByMakesMoreThanOneItem", "atomization of groupby variable produces more than one item" },
607
438
  { "~AttributeName", "attribute name" },
438
  { "~AttributeName", "attribute name" },
608
439
  { "~AttributeNode", "attribute node" },
439
  { "~AttributeNode", "attribute node" },
609
440
#if !defined(ZORBA_NO_ICU)
610
440
  { "~BackRef0Illegal", "\"0\": illegal backreference" },
441
  { "~BackRef0Illegal", "\"0\": illegal backreference" },
611
442
#endif
612
443
#if !defined(ZORBA_NO_ICU)
613
441
  { "~BackRefIllegalInCharClass", "backreference illegal in character class" },
444
  { "~BackRefIllegalInCharClass", "backreference illegal in character class" },
614
445
#endif
615
442
  { "~BadAnyURI", "invalid xs:anyURI" },
446
  { "~BadAnyURI", "invalid xs:anyURI" },
616
443
  { "~BadArgTypeForFn_2o34o", "${\"2\": }invalid argument type for function $3()${: 4}" },
447
  { "~BadArgTypeForFn_2o34o", "${\"2\": }invalid argument type for function $3()${: 4}" },
617
444
  { "~BadCharAfter_34", "'$3': illegal character after '$4'" },
448
  { "~BadCharAfter_34", "'$3': illegal character after '$4'" },
618
@@ -451,7 +455,9 @@
619
451
  { "~BadIterator", "invalid iterator" },
455
  { "~BadIterator", "invalid iterator" },
620
452
  { "~BadLibraryModule", "invalid library module" },
456
  { "~BadLibraryModule", "invalid library module" },
621
453
  { "~BadPath", "invalid path" },
457
  { "~BadPath", "invalid path" },
622
458
#if !defined(ZORBA_NO_ICU)
623
454
  { "~BadRegexEscape_3", "\"$3\": illegal escape character" },
459
  { "~BadRegexEscape_3", "\"$3\": illegal escape character" },
624
460
#endif
625
455
  { "~BadStreamState", "bad I/O stream state" },
461
  { "~BadStreamState", "bad I/O stream state" },
626
456
  { "~BadTokenInBraces_3", "\"$3\": illegal token within { }" },
462
  { "~BadTokenInBraces_3", "\"$3\": illegal token within { }" },
627
457
  { "~BadTraceStream", "trace stream not retrievable using SerializationCallback" },
463
  { "~BadTraceStream", "trace stream not retrievable using SerializationCallback" },
628
@@ -567,10 +573,14 @@
629
567
  { "~NoUntypedKeyNodeValue_2", "node with untyped key value found during probe on index \"$2\"" },
573
  { "~NoUntypedKeyNodeValue_2", "node with untyped key value found during probe on index \"$2\"" },
630
568
  { "~NodeIDNeedsBytes_2", "nodeid requires more than $2 bytes" },
574
  { "~NodeIDNeedsBytes_2", "nodeid requires more than $2 bytes" },
631
569
  { "~NodeIDTooBig", "nodeid component too big for encoding" },
575
  { "~NodeIDTooBig", "nodeid component too big for encoding" },
632
576
#if !defined(ZORBA_NO_ICU)
633
570
  { "~NonClosedBackRef_3", "'$$3': non-closed backreference" },
577
  { "~NonClosedBackRef_3", "'$$3': non-closed backreference" },
634
578
#endif
635
571
  { "~NonFileThesaurusURI", "non-file thesaurus URI" },
579
  { "~NonFileThesaurusURI", "non-file thesaurus URI" },
636
572
  { "~NonLocalhostAuthority", "non-localhost authority" },
580
  { "~NonLocalhostAuthority", "non-localhost authority" },
637
581
#if !defined(ZORBA_NO_ICU)
638
573
  { "~NonexistentBackRef_3", "'$$3': non-existent backreference" },
582
  { "~NonexistentBackRef_3", "'$$3': non-existent backreference" },
639
583
#endif
640
574
  { "~NotAllowedForTypeName", "not allowed for typeName (use xsd:untyped instead)" },
584
  { "~NotAllowedForTypeName", "not allowed for typeName (use xsd:untyped instead)" },
641
575
  { "~NotAmongInScopeSchemaTypes", "not among in-scope schema types" },
585
  { "~NotAmongInScopeSchemaTypes", "not among in-scope schema types" },
642
576
  { "~NotDefInDynamicCtx", "not defined in dynamic context" },
586
  { "~NotDefInDynamicCtx", "not defined in dynamic context" },
643
@@ -589,6 +599,69 @@
644
589
  { "~ParserNoCreateTree", "XML tree creation failed" },
599
  { "~ParserNoCreateTree", "XML tree creation failed" },
645
590
  { "~PromotionImpossible", "promotion not possible" },
600
  { "~PromotionImpossible", "promotion not possible" },
646
591
  { "~QuotedColon_23", "\"$2\": $3" },
601
  { "~QuotedColon_23", "\"$2\": $3" },
647
602
#if defined(ZORBA_NO_ICU)
648
603
  { "~REGEX_BROKEN_PIs_CONSTRUCT", "broken \\p{Is} construct; valid characters are [a-zA-Z0-9-]" },
649
604
#endif
650
605
#if defined(ZORBA_NO_ICU)
651
606
  { "~REGEX_BROKEN_P_CONSTRUCT", "broken \\p construct" },
652
607
#endif
653
608
#if defined(ZORBA_NO_ICU)
654
609
  { "~REGEX_INVALID_ATOM_CHAR", "'$3': invalid character for an atom; forbidden characters are: [{}?*+|^]" },
655
610
#endif
656
611
#if defined(ZORBA_NO_ICU)
657
612
  { "~REGEX_INVALID_BACK_REF", "\\$3 backreference to a non-existent capture group ($4 groups so far)" },
658
613
#endif
659
614
#if defined(ZORBA_NO_ICU)
660
615
  { "~REGEX_INVALID_SUBCLASS", "malformed class subtraction" },
661
616
#endif
662
617
#if defined(ZORBA_NO_ICU)
663
618
  { "~REGEX_INVALID_UNICODE_CODEPOINT_u", "invalid unicode hex, should be in form \\uXXXX or \\UXXXXXXXX" },
664
619
#endif
665
620
#if defined(ZORBA_NO_ICU)
666
621
  { "~REGEX_INVALID_USE_OF_SUBCLASS", "improper use of class subtraction: it must be the last construct in a class group [xxx-[yyy]]" },
667
622
#endif
668
623
#if defined(ZORBA_NO_ICU)
669
624
  { "~REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" },
670
625
#endif
671
626
#if defined(ZORBA_NO_ICU)
672
627
  { "~REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" },
673
628
#endif
674
629
#if defined(ZORBA_NO_ICU)
675
630
  { "~REGEX_MISSING_CLOSE_BRACKET", "missing ']' in character group" },
676
631
#endif
677
632
#if defined(ZORBA_NO_ICU)
678
633
  { "~REGEX_MULTICHAR_IN_CHAR_RANGE", "multichars or char categories cannot be part of a char range" },
679
634
#endif
680
635
#if defined(ZORBA_NO_ICU)
681
636
  { "~REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" },
682
637
#endif
683
638
#if defined(ZORBA_NO_ICU)
684
639
  { "~REGEX_UNKNOWN_ESC_CHAR", "unknown \\? escape char; supported escapes are: \\[nrt\\|.?*+(){}[]-^$] for char escapes, \\[pP] for categories and \\[sSiIcCdDwW] for multichar groups" },
685
640
#endif
686
641
#if defined(ZORBA_NO_ICU)
687
642
  { "~REGEX_UNKNOWN_PC_CONSTRUCT", "unknown \\p{C?} category; supported categories: C, Cc, Cf, Co, Cn(for not assigned)" },
688
643
#endif
689
644
#if defined(ZORBA_NO_ICU)
690
645
  { "~REGEX_UNKNOWN_PIs_CONSTRUCT", "unknown \\p{Is} category block; see supported block escapes here: http://www.w3.org/TR/xmlschema-2/#charcter-classes" },
691
646
#endif
692
647
#if defined(ZORBA_NO_ICU)
693
648
  { "~REGEX_UNKNOWN_PL_CONSTRUCT", "unknown \\p{L?} category; supported categories: L, Lu, Ll, Lt, Lm, Lo" },
694
649
#endif
695
650
#if defined(ZORBA_NO_ICU)
696
651
  { "~REGEX_UNKNOWN_PM_CONSTRUCT", "unknown \\p{M?} category; supported categories: M, Mn, Mc, Me" },
697
652
#endif
698
653
#if defined(ZORBA_NO_ICU)
699
654
  { "~REGEX_UNKNOWN_PN_CONSTRUCT", "unknown \\p{N?} category; supported categories: N, Nd, Nl, No" },
700
655
#endif
701
656
#if defined(ZORBA_NO_ICU)
702
657
  { "~REGEX_UNKNOWN_PP_CONSTRUCT", "unknown \\p{P?} category; supported categories: P, Pc, Pd, Ps, Pe, Pi, Pf, Po" },
703
658
#endif
704
659
#if defined(ZORBA_NO_ICU)
705
660
  { "~REGEX_UNKNOWN_PS_CONSTRUCT", "unknown \\p{S?} category; supported categories: S, Sm, Sc, Sk, So" },
706
661
#endif
707
662
#if defined(ZORBA_NO_ICU)
708
663
  { "~REGEX_UNKNOWN_PZ_CONSTRUCT", "unknown \\p{Z?} category; supported categories: Z, Zs, Zl, Zp" },
709
664
#endif
710
592
  { "~SEPM0009_Not10", "the version parameter has a value other than \"1.0\" and the doctype-system parameter is specified" },
665
  { "~SEPM0009_Not10", "the version parameter has a value other than \"1.0\" and the doctype-system parameter is specified" },
711
593
  { "~SEPM0009_NotOmit", "the standalone attribute has a value other than \"omit\"" },
666
  { "~SEPM0009_NotOmit", "the standalone attribute has a value other than \"omit\"" },
712
594
  { "~SchemaAttributeName", "schema-attribute name" },
667
  { "~SchemaAttributeName", "schema-attribute name" },
713
@@ -610,68 +683,73 @@
714
610
  { "~TwoDecimalFormatsSameName_2", "\"$2\": two decimal formats with this name" },
683
  { "~TwoDecimalFormatsSameName_2", "\"$2\": two decimal formats with this name" },
715
611
  { "~TwoDefaultDecimalFormats", "two default decimal formats" },
684
  { "~TwoDefaultDecimalFormats", "two default decimal formats" },
716
612
  { "~TypeIsNotSubtype", "item type is not a subtype of \"$3\"" },
685
  { "~TypeIsNotSubtype", "item type is not a subtype of \"$3\"" },
718
613
#if !defined(ZORBA_NO_UNICODE)
686
#if !defined(ZORBA_NO_ICU)
719
614
  { "~U_REGEX_BAD_ESCAPE_SEQUENCE", "unrecognized backslash escape sequence" },
687
  { "~U_REGEX_BAD_ESCAPE_SEQUENCE", "unrecognized backslash escape sequence" },
720
615
#endif
688
#endif
722
616
#if !defined(ZORBA_NO_UNICODE)
689
#if !defined(ZORBA_NO_ICU)
723
617
  { "~U_REGEX_BAD_INTERVAL", "error in {min,max} interval" },
690
  { "~U_REGEX_BAD_INTERVAL", "error in {min,max} interval" },
724
618
#endif
691
#endif
726
619
#if !defined(ZORBA_NO_UNICODE)
692
#if !defined(ZORBA_NO_ICU)
727
620
  { "~U_REGEX_INTERNAL_ERROR", "an internal ICU error (bug) was detected" },
693
  { "~U_REGEX_INTERNAL_ERROR", "an internal ICU error (bug) was detected" },
728
621
#endif
694
#endif
730
622
#if !defined(ZORBA_NO_UNICODE)
695
#if !defined(ZORBA_NO_ICU)
731
623
  { "~U_REGEX_INVALID_BACK_REF", "backreference to a non-existent capture group" },
696
  { "~U_REGEX_INVALID_BACK_REF", "backreference to a non-existent capture group" },
732
624
#endif
697
#endif
734
625
#if !defined(ZORBA_NO_UNICODE)
698
#if !defined(ZORBA_NO_ICU)
735
626
  { "~U_REGEX_INVALID_FLAG", "invalid value for match mode flags" },
699
  { "~U_REGEX_INVALID_FLAG", "invalid value for match mode flags" },
736
627
#endif
700
#endif
738
628
#if !defined(ZORBA_NO_UNICODE)
701
#if !defined(ZORBA_NO_ICU)
739
629
  { "~U_REGEX_INVALID_RANGE", "in character range [x-y], x is greater than y" },
702
  { "~U_REGEX_INVALID_RANGE", "in character range [x-y], x is greater than y" },
740
630
#endif
703
#endif
742
631
#if !defined(ZORBA_NO_UNICODE)
704
#if !defined(ZORBA_NO_ICU)
743
632
  { "~U_REGEX_INVALID_STATE", "RegexMatcher in invalid state for requested operation" },
705
  { "~U_REGEX_INVALID_STATE", "RegexMatcher in invalid state for requested operation" },
744
633
#endif
706
#endif
746
634
#if !defined(ZORBA_NO_UNICODE)
707
#if !defined(ZORBA_NO_ICU)
747
635
  { "~U_REGEX_LOOK_BEHIND_LIMIT", "look-behind pattern matches must have a bounded maximum length" },
708
  { "~U_REGEX_LOOK_BEHIND_LIMIT", "look-behind pattern matches must have a bounded maximum length" },
748
636
#endif
709
#endif
750
637
#if !defined(ZORBA_NO_UNICODE)
710
#if !defined(ZORBA_NO_ICU)
751
638
  { "~U_REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" },
711
  { "~U_REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" },
752
639
#endif
712
#endif
754
640
#if !defined(ZORBA_NO_UNICODE)
713
#if !defined(ZORBA_NO_ICU)
755
641
  { "~U_REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" },
714
  { "~U_REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" },
756
642
#endif
715
#endif
758
643
#if !defined(ZORBA_NO_UNICODE)
716
#if !defined(ZORBA_NO_ICU)
759
644
  { "~U_REGEX_MISSING_CLOSE_BRACKET", "missing ']'" },
717
  { "~U_REGEX_MISSING_CLOSE_BRACKET", "missing ']'" },
760
645
#endif
718
#endif
762
646
#if !defined(ZORBA_NO_UNICODE)
719
#if !defined(ZORBA_NO_ICU)
763
647
  { "~U_REGEX_NUMBER_TOO_BIG", "decimal number is too large" },
720
  { "~U_REGEX_NUMBER_TOO_BIG", "decimal number is too large" },
764
648
#endif
721
#endif
766
649
#if !defined(ZORBA_NO_UNICODE)
722
#if !defined(ZORBA_NO_ICU)
767
650
  { "~U_REGEX_OCTAL_TOO_BIG", "octal character constants must be <= 0377" },
723
  { "~U_REGEX_OCTAL_TOO_BIG", "octal character constants must be <= 0377" },
768
651
#endif
724
#endif
770
652
#if !defined(ZORBA_NO_UNICODE)
725
#if !defined(ZORBA_NO_ICU)
771
653
  { "~U_REGEX_PROPERTY_SYNTAX", "incorrect Unicode property" },
726
  { "~U_REGEX_PROPERTY_SYNTAX", "incorrect Unicode property" },
772
654
#endif
727
#endif
774
655
#if !defined(ZORBA_NO_UNICODE)
728
#if !defined(ZORBA_NO_ICU)
775
656
  { "~U_REGEX_RULE_SYNTAX", "syntax error" },
729
  { "~U_REGEX_RULE_SYNTAX", "syntax error" },
776
657
#endif
730
#endif
778
658
#if !defined(ZORBA_NO_UNICODE)
731
#if !defined(ZORBA_NO_ICU)
779
659
  { "~U_REGEX_SET_CONTAINS_STRING", "can not have UnicodeSets containing strings" },
732
  { "~U_REGEX_SET_CONTAINS_STRING", "can not have UnicodeSets containing strings" },
780
660
#endif
733
#endif
782
661
#if !defined(ZORBA_NO_UNICODE)
734
#if !defined(ZORBA_NO_ICU)
783
662
  { "~U_REGEX_STACK_OVERFLOW", "backtrack stack overflow" },
735
  { "~U_REGEX_STACK_OVERFLOW", "backtrack stack overflow" },
784
663
#endif
736
#endif
786
664
#if !defined(ZORBA_NO_UNICODE)
737
#if !defined(ZORBA_NO_ICU)
787
665
  { "~U_REGEX_STOPPED_BY_CALLER", "matching operation aborted by user callback fn" },
738
  { "~U_REGEX_STOPPED_BY_CALLER", "matching operation aborted by user callback fn" },
788
666
#endif
739
#endif
790
667
#if !defined(ZORBA_NO_UNICODE)
740
#if !defined(ZORBA_NO_ICU)
791
668
  { "~U_REGEX_TIME_OUT", "maximum allowed match time exceeded" },
741
  { "~U_REGEX_TIME_OUT", "maximum allowed match time exceeded" },
792
669
#endif
742
#endif
794
670
#if !defined(ZORBA_NO_UNICODE)
743
#if !defined(ZORBA_NO_ICU)
795
671
  { "~U_REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" },
744
  { "~U_REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" },
796
672
#endif
745
#endif
797
673
  { "~UnaryArithOp", "unary arithmetic operator" },
746
  { "~UnaryArithOp", "unary arithmetic operator" },
798
747
#if !defined(ZORBA_NO_ICU)
799
674
  { "~UnbalancedChar_3", "missing '$3'" },
748
  { "~UnbalancedChar_3", "missing '$3'" },
800
749
#endif
801
750
#if !defined(ZORBA_NO_ICU)
802
751
  { "~UnescapedChar_3", "character '$3' must be escaped here" },
803
752
#endif
804
675
  { "~UnexpectedElement", "unexpected element" },
753
  { "~UnexpectedElement", "unexpected element" },
805
676
  { "~VarValMustBeSingleItem_2", "\"$2\": variable value must be single item" },
754
  { "~VarValMustBeSingleItem_2", "\"$2\": variable value must be single item" },
806
677
  { "~Variable", "variable" },
755
  { "~Variable", "variable" },
807
678
756
808
=== modified file 'src/precompiled/stdafx.h'
809
--- src/precompiled/stdafx.h	2012-03-28 05:19:57 +0000
810
+++ src/precompiled/stdafx.h	2012-04-07 00:45:26 +0000
811
@@ -15,363 +15,81 @@
812
15
15
813
16
 */
16
 */
814
17
 
17
 
880
18
#if defined STDAFX
18
#ifdef STDAFX
881
19
#include <iostream>
19
882
20
#include <stdexcept>
20
  #include <fstream>
883
21
#include <cassert>
21
  #include <iostream>
884
22
#include <cstring>
22
  #include <stdexcept>
885
23
#include <memory>
23
  #include <cassert>
886
24
24
  #include <cstring>
887
25
#include <sstream>
25
  #include <memory>
888
26
#include <xfwrap>
26
889
27
#include <xfwrap1>
27
  #include <sstream>
890
28
#include <istream>
28
  #include <xfwrap>
891
29
#include <cstdio>
29
  #include <xfwrap1>
892
30
#include <xxshared>
30
  #include <istream>
893
31
#include <crtdefs.h>
31
  #include <cstdio>
894
32
#include <map>
32
  #include <xxshared>
895
33
#include <set>
33
  #include <crtdefs.h>
896
34
//#include <poppack.h>
34
  #include <map>
897
35
//#include <xxtype_traits>
35
  #include <set>
898
36
//#include <xxcallwrap>
36
899
37
37
  #include "runtime/sequences/sequences.h"
900
38
// #include <xxcallpmf>
38
  #include "diagnostics/xquery_diagnostics.h"
901
39
// //#include <xxbind0>
39
  #include "xercesc/util/xercesdefs.hpp"
902
40
// //#include <xxbind1>
40
  #include "runtime/collections/collections.h"
903
41
// //#include <xxresult>
41
  #include "unicode/utypes.h"
904
42
// #include <zorba/audit.h>
42
  #include "zorba/config.h"
905
43
// #include "api/auditimpl.h"
43
  #include "store/api/store.h"
906
44
// #include <zorba/audit.h>
44
  #include "zorba/zorba.h"
907
45
45
  #include "zorba/api_shared_types.h"
908
46
 //#include "unicode/unistr.h"
46
  #include "compiler/parsetree/parsenodes.h"
909
47
 #include "runtime/sequences/sequences.h"
47
  #include "compiler/parser/parse_constants.h"
910
48
 #include "diagnostics/xquery_diagnostics.h"
48
  #include "zorbautils/checked_vector.h"
911
49
 #include "xercesc/util/xercesdefs.hpp"
49
  #include "compiler/parser/xquery_driver.h"
912
50
 #include "runtime/collections/collections.h"
50
  #include "util/sorter.h"
913
51
 #include "unicode/utypes.h"
51
  #include "compiler/xqueryx/xqueryx_to_xquery.h"
914
52
 #include "zorba/config.h"
52
  #include <zorba/store_manager.h>
915
53
 #include "store/api/store.h"
53
  #include <zorba/xquery.h>
916
54
 #include "zorba/zorba.h"
54
  #include <zorba/xquery_exception.h>
852
55
 #include "zorba/api_shared_types.h"
853
56
 #include "compiler/parsetree/parsenodes.h"
854
57
 #include "compiler/parser/parse_constants.h"
855
58
 //#include "compiler/api/compilercb.h"
856
59
 #include "zorbautils/checked_vector.h"
857
60
 #include "compiler/parser/xquery_driver.h"
858
61
 #include "util/sorter.h"
859
62
 #include "compiler/xqueryx/xqueryx_to_xquery.h"
860
63
// #include "compiler/xqueryx/xqueryx_xslt.h"
861
64
//#include "compiler/parser/xquery_scanner.h"
862
65
//#include "compiler/parsetree/parsenode_base.h"
863
66
//#include "compiler/parsetree/parsenode_visitor.h"
864
67
// #include "runtime/core/flwor_iterator.h"
865
68
// #include "context/static_context.h"
866
69
// #include "zorbautils/fatal.h"
867
70
// #include "runtime/base/unarybase.h"
868
71
// #include "compiler/expression/expr_consts.h"
869
72
// #include "api/iterator_singleton.h"
870
73
// #include "runtime/visitors/printer_visitor_api.h"
871
74
// //#include "compiler/parsetree/parsenode_print_dot_visitor.h"
872
75
// //#include "compiler/parsetree/parsenode_print_dot_visitor.h"
873
76
// //#include "runtime/visitors/planiter_visitor_impl_code.h"
874
77
// //#include "runtime/visitors/planiter_visitor_impl_include.h"
875
78
// //#include "runtime/visitors/printer_visitor_impl.h"
876
79
// //#include "runtime/core/path.h"
877
80
// #include "compiler/expression/ft_expr.h"
878
81
// #include "compiler/expression/ftnode.h"
879
82
// #include "compiler/parser/query_loc.h"
917
83
  #include "util/cxx_util.h"
55
  #include "util/cxx_util.h"
922
84
// #include "util/indent.h"
56
  #include "diagnostics/assert.h"
923
85
// #include "util/stl_util.h"
57
  #include "zorbatypes/mapm/m_apm_lc.h"
924
86
// #include "diagnostics/xquery_diagnostics.h"
58
  #include "zorbatypes/datetime/parse.h"
925
87
// #include "zorbatypes/numconversions.h"
59
  #include "zorbatypes/chartype.h"
926
60
  #include "zorbatypes/collation_manager.h"
927
61
  #include "zorbatypes/ft_token.h"
928
62
  #include "zorbatypes/m_apm.h"
929
63
  #include "zorbatypes/rclock.h"
930
64
  #include "zorbatypes/schema_types.h"
931
65
  #include "zorbatypes/timezone.h"
932
66
  #include "zorbatypes/transcoder.h"
933
67
  #include "zorbatypes/URI.h"
934
68
  #include "zorbatypes/xerces_xmlcharray.h"
935
69
  #include "zorbatypes/zorbatypes_decl.h"
936
70
  #include "zorbatypes/zstring.h"
937
71
  #include "zorbautils/condition.h"
938
72
  #include "zorbautils/hashfun.h"
939
73
  #include "zorbautils/hashmap.h"
940
74
  #include "zorbautils/hashmap_itemp.h"
941
75
  #include "zorbautils/hashmap_str_obj.h"
942
76
  #include "zorbautils/hashmap_zstring.h"
943
77
  #include "zorbautils/hashset.h"
944
78
  #include "zorbautils/hashset_itemh.h"
945
79
  #include "zorbautils/latch.h"
946
80
  #include "zorbautils/locale.h"
947
81
  #include "zorbautils/lock.h"
948
82
  #include "zorbautils/mutex.h"
949
83
  #include "zorbautils/runnable.h"
950
84
  #include "zorbautils/SAXParser.h"
951
85
  #include "zorbautils/stack.h"
952
86
  #include "zorbautils/string_util.h"
953
87
  #include "unit_tests/unit_test_list.h"
954
88
  #include "zorba/diagnostic_handler.h"
955
89
  #include "zorba/xquery_warning.h"
956
90
  #include "runtime/full_text/ftcontains_visitor.h"
957
91
  #include "store/api/ft_token_iterator.h"
958
92
  #include "store/naive/ft_token_store.h"
959
88
93
960
89
// #include "api/serialization/serializable.h"
961
90
// #include "api/serialization/serializer.h"
962
91
// #include "api/collectionimpl.h"
963
92
// #include "api/dynamiccontextimpl.h"
964
93
// #include "api/fileimpl.h"
965
94
// #include "api/functionimpl.h"
966
95
// #include "api/invoke_item_sequence.h"
967
96
// #include "api/itemfactoryimpl.h"
968
97
// #include "api/resultiteratorchainer.h"
969
98
// #include "api/resultiteratorimpl.h"
970
99
// #include "api/sax2impl.h"
971
100
// #include "api/serializerimpl.h"
972
101
// #include "api/staticcontextimpl.h"
973
102
// #include "api/storeiteratorimpl.h"
974
103
// #include "api/unmarshaller.h"
975
104
// #include "api/uri_resolver_wrappers.h"
976
105
// #include "api/vectoriterator.h"
977
106
// #include "api/xmldatamanagerimpl.h"
978
107
// //#include "api/xqueryimpl.h"
979
108
// #include "api/zorbaimpl.h"
980
109
// #include "capi/cdynamic_context.h"
981
110
// #include "capi/cexpression.h"
982
111
// #include "capi/cexternal_function.h"
983
112
// #include "capi/cimplementation.h"
984
113
// #include "capi/csequence.h"
985
114
// #include "capi/cstatic_context.h"
986
115
// #include "capi/error.h"
987
116
// #include "capi/external_module.h"
988
117
// #include "capi/single_item_sequence.h"
989
118
// #include "capi/user_item_sequence.h"
990
119
// #include "compiler/parser/flexlexer.h"
991
120
// #include "compiler/parser/ft_types.h"
992
121
// #include "compiler/parser/symbol_table.h"
993
122
// #include "compiler/parser/xqdoc_comment.h"
994
123
// #include "compiler/parsetree/parsenode_print_xml_visitor.h"
995
124
// #include "compiler/parsetree/parsenode_print_xqdoc_visitor.h"
996
125
// #include "compiler/parsetree/parsenode_print_xquery_visitor.h"
997
126
// #include "compiler/parsetree/parsenode_xqdoc_visitor.h"
998
127
// #include "compiler/translator/prolog_graph.h"
999
128
// #include "compiler/translator/translator.h"
1000
129
// #include "compiler/codegen/plan_visitor.h"
1001
130
// #include "compiler/expression/abstract_expr_visitor.h"
1002
131
// #include "compiler/expression/expr.h"
1003
132
// #include "compiler/expression/expr_annotations.h"
1004
133
// #include "compiler/expression/expr_base.h"
1005
134
// #include "compiler/expression/expr_classes.h"
1006
135
// #include "compiler/expression/expr_iter.h"
1007
136
// #include "compiler/expression/expr_utils.h"
1008
137
// #include "compiler/expression/expr_visitor.h"
1009
138
// #include "compiler/expression/flwor_expr.h"
1010
139
// //#include "compiler/expression/fo_expr.h"
1011
140
// #include "compiler/expression/ftnode_classes.h"
1012
141
// #include "compiler/expression/ftnode_visitor.h"
1013
142
// #include "compiler/expression/function_item_expr.h"
1014
143
// #include "compiler/expression/path_expr.h"
1015
144
// #include "compiler/expression/script_exprs.h"
1016
145
// #include "compiler/expression/update_exprs.h"
1017
146
// #include "compiler/expression/var_expr.h"
1018
147
// #include "compiler/rewriter/framework/rewriter.h"
1019
148
// #include "compiler/rewriter/framework/rewriter_context.h"
1020
149
// #include "compiler/rewriter/framework/rule_driver.h"
1021
150
// #include "compiler/rewriter/framework/sequential_rewriter.h"
1022
151
// #include "compiler/rewriter/rewriters/common_rewriter.h"
1023
152
// #include "compiler/rewriter/rewriters/default_optimizer.h"
1024
153
// #include "compiler/rewriter/rewriters/phase1_rewriter.h"
1025
154
// #include "compiler/rewriter/rules/ruleset.h"
1026
155
// #include "compiler/rewriter/rules/rule_base.h"
1027
156
// #include "compiler/rewriter/rules/type_rules.h"
1028
157
// #include "compiler/rewriter/tools/dataflow_annotations.h"
1029
158
// #include "compiler/rewriter/tools/expr_tools.h"
1030
159
// #include "compiler/rewriter/tools/udf_graph.h"
1031
160
// #include "compiler/xqddf/collection_decl.h"
1032
161
// #include "compiler/xqddf/value_ic.h"
1033
162
// #include "compiler/xqddf/value_index.h"
1034
163
// #include "compiler/semantic_annotations/annotations.h"
1035
164
// #include "compiler/semantic_annotations/annotation_holder.h"
1036
165
// #include "compiler/semantic_annotations/annotation_keys.h"
1037
166
// #include "compiler/api/compiler_api.h"
1038
167
// #include "compiler/api/compiler_api_impl.h"
1039
168
// #include "system/globalenv.h"
1040
169
// #include "system/properties.h"
1041
170
// #include "system/zorba_properties.h"
1042
171
// #include "context/decimal_format.h"
1043
172
// #include "context/default_uri_mappers.h"
1044
173
// #include "context/default_url_resolvers.h"
1045
174
// #include "context/dynamic_context.h"
1046
175
// #include "context/dynamic_loader.h"
1047
176
// #include "context/internal_uri_resolvers.h"
1048
177
// //#include "context/namespace_context.h"
1049
178
// #include "context/root_static_context.h"
1050
179
// #include "context/sctx_map_iterator.h"
1051
180
// #include "context/standard_uri_resolvers.h"
1052
181
// #include "context/static_context_consts.h"
1053
182
// #include "context/stemmer_wrappers.h"
1054
183
// #include "context/uri_resolver.h"
1055
184
// #include "context/uri_resolver_wrapper.h"
1056
185
#include "diagnostics/assert.h"
1057
186
// #include "diagnostics/diagnostic.h"
1058
187
// #include "diagnostics/dict.h"
1059
188
// #include "diagnostics/dict_impl.h"
1060
189
// #include "diagnostics/StackWalker.h"
1061
190
// #include "diagnostics/user_error.h"
1062
191
// #include "diagnostics/user_exception.h"
1063
192
// #include "diagnostics/xquery_exception.h"
1064
193
// #include "diagnostics/xquery_stack_trace.h"
1065
194
// #include "diagnostics/xquery_warning.h"
1066
195
// #include "diagnostics/zorba_exception.h"
1067
196
// //#include "functions/annotation.h"
1068
197
// #include "functions/external_function.h"
1069
198
// #include "functions/function.h"
1070
199
// #include "functions/function_consts.h"
1071
200
// #include "functions/function_impl.h"
1072
201
// #include "functions/func_accessors_impl.h"
1073
202
// #include "functions/func_apply.h"
1074
203
// #include "functions/func_arithmetic.h"
1075
204
// #include "functions/func_booleans_impl.h"
1076
205
// #include "functions/func_durations_dates_times_impl.h"
1077
206
// #include "functions/func_enclosed.h"
1078
207
// #include "functions/func_eval.h"
1079
208
// #include "functions/func_hoist.h"
1080
209
// #include "functions/func_index_ddl.h"
1081
210
// #include "functions/func_node_sort_distinct.h"
1082
211
// #include "functions/func_numerics_impl.h"
1083
212
// #include "functions/func_reflection.h"
1084
213
// #include "functions/func_sequences_impl.h"
1085
214
// #include "functions/func_var_decl.h"
1086
215
// #include "functions/library.h"
1087
216
// #include "functions/signature.h"
1088
217
// #include "functions/udf.h"
1089
218
// #include "runtime/full_text/thesauri/decode_base128.h"
1090
219
// #include "runtime/full_text/thesauri/encoded_list.h"
1091
220
// #include "runtime/full_text/thesauri/iso2788.h"
1092
221
// #include "runtime/full_text/thesauri/wn_db_segment.h"
1093
222
// #include "runtime/full_text/thesauri/wn_synset.h"
1094
223
// #include "runtime/full_text/thesauri/wn_thesaurus.h"
1095
224
// #include "runtime/full_text/thesauri/wn_types.h"
1096
225
// #include "runtime/full_text/thesauri/xqftts_relationship.h"
1097
226
// #include "runtime/full_text/thesauri/xqftts_thesaurus.h"
1098
227
// #include "runtime/full_text/ft_match.h"
1099
228
// #include "runtime/full_text/ft_query_item.h"
1100
229
// #include "runtime/full_text/ft_single_token_iterator.h"
1101
230
// #include "runtime/full_text/ft_stop_words_set.h"
1102
231
// #include "runtime/full_text/ft_thesaurus.h"
1103
232
// #include "runtime/full_text/ft_token_matcher.h"
1104
233
// #include "runtime/full_text/ft_token_seq_iterator.h"
1105
234
// #include "runtime/full_text/ft_token_span.h"
1106
235
// #include "runtime/full_text/ft_wildcard.h"
1107
236
// #include "runtime/full_text/full_text.h"
1108
237
// #include "runtime/full_text/apply.h"
1109
238
// #include "runtime/full_text/ft_util.h"
1110
239
// #include "runtime/collections/collections_base.h"
1111
240
// #include "runtime/core/apply_updates.h"
1112
241
// #include "runtime/core/arithmetic_impl.h"
1113
242
// #include "runtime/core/constructors.h"
1114
243
// #include "runtime/core/fncall_iterator.h"
1115
244
// #include "runtime/core/internal_operators.h"
1116
245
// #include "runtime/core/item_iterator.h"
1117
246
// #include "runtime/core/nodeid_iterators.h"
1118
247
// #include "runtime/core/path_iterators.h"
1119
248
// #include "runtime/core/sequencetypes.h"
1120
249
// #include "runtime/core/trycatch.h"
1121
250
// #include "runtime/core/var_iterators.h"
1122
251
// #include "runtime/numerics/NumericsImpl.h"
1123
252
// #include "runtime/booleans/BooleanImpl.h"
1124
253
// #include "runtime/base/binarybase.h"
1125
254
// #include "runtime/base/narybase.h"
1126
255
// #include "runtime/base/noarybase.h"
1127
256
// #include "runtime/base/plan_iterator.h"
1128
257
// #include "runtime/sequences/SequencesImpl.h"
1129
258
// #include "runtime/visitors/iterprinter.h"
1130
259
// #include "runtime/misc/materialize.h"
1131
260
// #include "runtime/scripting/scripting.h"
1132
261
// #include "types/schema/EventSchemaValidator.h"
1133
262
// #include "types/schema/LoadSchemaErrorHandler.h"
1134
263
// #include "types/schema/PrintSchema.h"
1135
264
// #include "types/schema/revalidateUtils.h"
1136
265
// #include "types/schema/schema.h"
1137
266
// #include "types/schema/SchemaValidatorFilter.h"
1138
267
// #include "types/schema/StrX.h"
1139
268
// #include "types/schema/validate.h"
1140
269
// #include "types/schema/ValidationEventHandler.h"
1141
270
// #include "types/schema/xercesIncludes.h"
1142
271
// #include "types/schema/XercesParseUtils.h"
1143
272
// #include "types/schema/XercSchemaValidator.h"
1144
273
// #include "types/casting.h"
1145
274
// #include "types/collation.h"
1146
275
// #include "types/node_test.h"
1147
276
// #include "types/root_typemanager.h"
1148
277
// #include "types/typeconstants.h"
1149
278
// #include "types/typeimpl.h"
1150
279
// #include "types/typemanager.h"
1151
280
// #include "types/typemanagerimpl.h"
1152
281
// #include "types/typeops.h"
1153
282
// #include "util/fx/fxarray.h"
1154
283
// #include "util/fx/fxcharheap.h"
1155
284
// #include "util/ascii_util.h"
1156
285
// #include "util/atomic_int.h"
1157
286
// #include "util/auto_vector.h"
1158
287
// #include "util/curl_util.h"
1159
288
// #include "util/dir.h"
1160
289
// #include "util/dynamic_bitset.h"
1161
290
// #include "util/empty.h"
1162
291
// #include "util/error_util.h"
1163
292
// #include "util/fs_util.h"
1164
293
// #include "util/hashmap.h"
1165
294
// //#include "util/hashmap32.h"
1166
295
// #include "util/less.h"
1167
296
// #include "util/mmap_file.h"
1168
297
// #include "util/nonatomic_int.h"
1169
298
// #include "util/omanip.h"
1170
299
// #include "util/oseparator.h"
1171
300
// #include "util/regex.h"
1172
301
// #include "util/singleton.h"
1173
302
// #include "util/string_util.h"
1174
303
// #include "util/threads.h"
1175
304
// #include "util/tokenbuf.h"
1176
305
// #include "util/tracer.h"
1177
306
// #include "util/triple.h"
1178
307
// #include "util/unicode_categories.h"
1179
308
// #include "util/unicode_util.h"
1180
309
// #include "util/uri_util.h"
1181
310
// #include "util/utf8_string.h"
1182
311
// #include "util/utf8_util.h"
1183
312
// #include "util/utf8_util_base.h"
1184
313
// #include "util/void_int.h"
1185
314
// #include "util/xml_util.h"
1186
315
// #include "zorbamisc/config/platform.h"
1187
316
// //#include "zorbaserialization/archiver.h"
1188
317
// #include "zorbaserialization/base64impl.h"
1189
318
// #include "zorbaserialization/bin_archiver.h"
1190
319
// //#include "zorbaserialization/class_serializer.h"
1191
320
// #include "zorbaserialization/mem_archiver.h"
1192
321
// #include "zorbaserialization/serialization_engine.h"
1193
322
// #include "zorbaserialization/template_serializer.h"
1194
323
// #include "zorbaserialization/xml_archiver.h"
1195
324
// #include "zorbaserialization/zorba_class_serializer.h"
1196
325
 #include "zorbatypes/mapm/m_apm_lc.h"
1197
326
 #include "zorbatypes/datetime/parse.h"
1198
327
 //#include "zorbatypes/binary.h"
1199
328
 #include "zorbatypes/chartype.h"
1200
329
 #include "zorbatypes/collation_manager.h"
1201
330
 //#include "zorbatypes/datetime.h"
1202
331
 //#include "zorbatypes/decimal.h"
1203
332
 //#include "zorbatypes/duration.h"
1204
333
 //#include "zorbatypes/floatimpl.h"
1205
334
 #include "zorbatypes/ft_token.h"
1206
335
 //#include "zorbatypes/integer.h"
1207
336
 #include "zorbatypes/libicu.h"
1208
337
 #include "zorbatypes/m_apm.h"
1209
338
 //#include "zorbatypes/rchandle.h"
1210
339
 #include "zorbatypes/rclock.h"
1211
340
 //#include "zorbatypes/regex_ascii.h"
1212
341
 #include "zorbatypes/schema_types.h"
1213
342
 #include "zorbatypes/timezone.h"
1214
343
 #include "zorbatypes/transcoder.h"
1215
344
 #include "zorbatypes/URI.h"
1216
345
 #include "zorbatypes/xerces_xmlcharray.h"
1217
346
 #include "zorbatypes/zorbatypes_decl.h"
1218
347
 #include "zorbatypes/zstring.h"
1219
348
 //#include "zorbautils/stemmer/sb_stemmer.h"
1220
349
 #include "zorbautils/condition.h"
1221
350
 #include "zorbautils/hashfun.h"
1222
351
 #include "zorbautils/hashmap.h"
1223
352
 #include "zorbautils/hashmap_itemp.h"
1224
353
 #include "zorbautils/hashmap_str_obj.h"
1225
354
 #include "zorbautils/hashmap_zstring.h"
1226
355
 #include "zorbautils/hashset.h"
1227
356
 #include "zorbautils/hashset_itemh.h"
1228
357
 //#include "zorbautils/icu_tokenizer.h"
1229
358
 #include "zorbautils/latch.h"
1230
359
 #include "zorbautils/locale.h"
1231
360
 #include "zorbautils/lock.h"
1232
361
 #include "zorbautils/mutex.h"
1233
362
 #include "zorbautils/runnable.h"
1234
363
 #include "zorbautils/SAXParser.h"
1235
364
 #include "zorbautils/stack.h"
1236
365
// #include "zorbautils/stemmer.h"
1237
366
 #include "zorbautils/string_util.h"
1238
367
 //#include "zorbautils/synchronous_logger.h"
1239
368
 //#include "zorbautils/tokenizer.h"
1240
369
 #include "unit_tests/unit_test_list.h"
1241
370
 #include "zorba/diagnostic_handler.h"
1242
371
 #include "zorba/xquery_warning.h"
1243
372
 #include "runtime/full_text/ftcontains_visitor.h"
1244
373
 #include "store/naive/naive_ft_token_iterator.h"
1245
374
 #include "store/api/ft_token_iterator.h"
1246
375
 #include "store/naive/ft_token_store.h"
1247
376
#endif
94
#endif
1248
377
/* vim:set et sw=2 ts=2: */
95
/* vim:set et sw=2 ts=2: */
1249
378
96
1250
=== modified file 'src/runtime/full_text/CMakeLists.txt'
1251
--- src/runtime/full_text/CMakeLists.txt	2012-03-28 05:19:57 +0000
1252
+++ src/runtime/full_text/CMakeLists.txt	2012-04-07 00:45:26 +0000
1253
@@ -42,11 +42,11 @@
1254
42
    default_tokenizer.cpp
42
    default_tokenizer.cpp
1255
43
    )
43
    )
1256
44
44
1258
45
IF (ZORBA_NO_UNICODE)
45
IF (ZORBA_NO_ICU)
1259
46
  LIST(APPEND FULLTEXT_SRCS latin_tokenizer.cpp)
46
  LIST(APPEND FULLTEXT_SRCS latin_tokenizer.cpp)
1261
47
ELSE (ZORBA_NO_UNICODE)
47
ELSE (ZORBA_NO_ICU)
1262
48
  LIST(APPEND FULLTEXT_SRCS icu_tokenizer.cpp)
48
  LIST(APPEND FULLTEXT_SRCS icu_tokenizer.cpp)
1264
49
ENDIF (ZORBA_NO_UNICODE)
49
ENDIF (ZORBA_NO_ICU)
1265
50
50
1266
51
ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS)
51
ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS)
1267
52
52
1268
53
53
1269
=== modified file 'src/runtime/full_text/default_tokenizer.cpp'
1270
--- src/runtime/full_text/default_tokenizer.cpp	2012-03-28 05:19:57 +0000
1271
+++ src/runtime/full_text/default_tokenizer.cpp	2012-04-07 00:45:26 +0000
1272
@@ -19,22 +19,22 @@
1273
19
#include <zorba/config.h>
19
#include <zorba/config.h>
1274
20
20
1275
21
#include "default_tokenizer.h"
21
#include "default_tokenizer.h"
1277
22
#ifdef ZORBA_NO_UNICODE
22
#ifdef ZORBA_NO_ICU
1278
23
# include "latin_tokenizer.h"
23
# include "latin_tokenizer.h"
1279
24
#else
24
#else
1280
25
# include "icu_tokenizer.h"
25
# include "icu_tokenizer.h"
1282
26
#endif /* ZORBA_NO_UNICODE */
26
#endif /* ZORBA_NO_ICU */
1283
27
27
1284
28
namespace zorba {
28
namespace zorba {
1285
29
29
1286
30
///////////////////////////////////////////////////////////////////////////////
30
///////////////////////////////////////////////////////////////////////////////
1287
31
31
1288
32
TokenizerProvider const& default_tokenizer_provider() {
32
TokenizerProvider const& default_tokenizer_provider() {
1290
33
#ifdef ZORBA_NO_UNICODE
33
#ifdef ZORBA_NO_ICU
1291
34
  static LatinTokenizerProvider const instance;
34
  static LatinTokenizerProvider const instance;
1292
35
#else
35
#else
1293
36
  static ICU_TokenizerProvider const instance;
36
  static ICU_TokenizerProvider const instance;
1295
37
#endif /* ZORBA_NO_UNICODE */
37
#endif /* ZORBA_NO_ICU */
1296
38
  return instance;
38
  return instance;
1297
39
};
39
};
1298
40
40
1299
41
41
1300
=== modified file 'src/runtime/full_text/latin_tokenizer.cpp'
1301
--- src/runtime/full_text/latin_tokenizer.cpp	2012-03-28 05:19:57 +0000
1302
+++ src/runtime/full_text/latin_tokenizer.cpp	2012-04-07 00:45:26 +0000
1303
@@ -18,8 +18,9 @@
1304
18
#include <functional>
18
#include <functional>
1305
19
19
1306
20
#include <zorba/diagnostic_list.h>
20
#include <zorba/diagnostic_list.h>
1309
21
#include <zorba/xquery_exception.h>
21
1310
22
#include <zorba/zorba.h>
22
#include "diagnostics/dict.h"
1311
23
#include "diagnostics/xquery_exception.h"
1312
23
24
1313
24
#include "latin_tokenizer.h"
25
#include "latin_tokenizer.h"
1314
25
26
1315
26
27
1316
=== modified file 'src/runtime/full_text/latin_tokenizer.h'
1317
--- src/runtime/full_text/latin_tokenizer.h	2012-03-28 05:19:57 +0000
1318
+++ src/runtime/full_text/latin_tokenizer.h	2012-04-07 00:45:26 +0000
1319
@@ -14,12 +14,12 @@
1320
14
 * limitations under the License.
14
 * limitations under the License.
1321
15
 */
15
 */
1322
16
16
1325
17
#ifndef ZORBA_WESTERN_TOKENIZER_H
17
#ifndef ZORBA_LATIN_TOKENIZER_H
1326
18
#define ZORBA_WESTERN_TOKENIZER_H
18
#define ZORBA_LATIN_TOKENIZER_H
1327
19
19
1328
20
#include <zorba/config.h>
20
#include <zorba/config.h>
1329
21
21
1331
22
#ifdef ZORBA_NO_FULL_TEXT
22
#ifdef ZORBA_NO_ICU
1332
23
23
1333
24
#include <zorba/tokenizer.h>
24
#include <zorba/tokenizer.h>
1334
25
#include "zorbatypes/zstring.h"
25
#include "zorbatypes/zstring.h"
1335
@@ -38,8 +38,8 @@
1336
38
38
1337
39
  // inherited
39
  // inherited
1338
40
  void destroy() const;
40
  void destroy() const;
1341
41
  void tokenize( char const*, size_type, iso639_1::type, bool, Callback&,
41
  void tokenize( char const*, size_type, locale::iso639_1::type, bool,
1342
42
                 void* );
42
                 Callback&, void* );
1343
43
43
1344
44
private:
44
private:
1345
45
  typedef zstring string_type;
45
  typedef zstring string_type;
1346
@@ -64,13 +64,14 @@
1347
64
class LatinTokenizerProvider : public TokenizerProvider {
64
class LatinTokenizerProvider : public TokenizerProvider {
1348
65
public:
65
public:
1349
66
  // inherited
66
  // inherited
1351
67
  Tokenizer::ptr getTokenizer( iso639_1::type, Tokenizer::Numbers& ) const;
67
  Tokenizer::ptr getTokenizer( locale::iso639_1::type,
1352
68
                               Tokenizer::Numbers& ) const;
1353
68
};
69
};
1354
69
70
1355
70
///////////////////////////////////////////////////////////////////////////////
71
///////////////////////////////////////////////////////////////////////////////
1356
71
72
1357
72
} // namespace zorba
73
} // namespace zorba
1358
73
74
1361
74
#endif /* ZORBA_NO_FULL_TEXT */
75
#endif /* ZORBA_NO_ICU */
1362
75
#endif /* ZORBA_WESTERN_TOKENIZER_H */
76
#endif /* ZORBA_LATIN_TOKENIZER_H */
1363
76
/* vim:set et sw=2 ts=2: */
77
/* vim:set et sw=2 ts=2: */
1364
77
78
1365
=== modified file 'src/runtime/numerics/format_integer_impl.cpp'
1366
--- src/runtime/numerics/format_integer_impl.cpp	2012-03-28 05:19:57 +0000
1367
+++ src/runtime/numerics/format_integer_impl.cpp	2012-04-07 00:45:26 +0000
1368
@@ -881,7 +881,7 @@
1369
881
            utf8_result += (*valueit);
881
            utf8_result += (*valueit);
1370
882
          }
882
          }
1371
883
          else
883
          else
1373
884
            utf8_result += (0x2080 + *valueit - '0');
884
            utf8_result += (unicode::code_point)(0x2080 + *valueit - '0');
1374
885
        }
885
        }
1375
886
      }
886
      }
1376
887
      else if((c0 == 0x2460) || //CIRCLED DIGIT ONE  (1-20)
887
      else if((c0 == 0x2460) || //CIRCLED DIGIT ONE  (1-20)
1377
888
888
1378
=== modified file 'src/runtime/numerics/numerics_impl.cpp'
1379
--- src/runtime/numerics/numerics_impl.cpp	2012-03-28 05:19:57 +0000
1380
+++ src/runtime/numerics/numerics_impl.cpp	2012-04-07 00:45:26 +0000
1381
@@ -462,7 +462,7 @@
1382
462
    minus( "-" )
462
    minus( "-" )
1383
463
  {
463
  {
1384
464
    utf8_string<zstring> u_per_mille( per_mille );
464
    utf8_string<zstring> u_per_mille( per_mille );
1386
465
    u_per_mille = 0x2030;
465
    u_per_mille = (unicode::code_point)0x2030;
1387
466
  }
466
  }
1388
467
467
1389
468
  void readFormat(const DecimalFormat_t& df_t)
468
  void readFormat(const DecimalFormat_t& df_t)
1390
469
469
1391
=== modified file 'src/runtime/strings/strings_impl.cpp'
1392
--- src/runtime/strings/strings_impl.cpp	2012-03-28 05:19:57 +0000
1393
+++ src/runtime/strings/strings_impl.cpp	2012-04-07 00:45:26 +0000
1394
@@ -810,7 +810,9 @@
1395
810
  zstring normForm;
810
  zstring normForm;
1396
811
  zstring resStr;
811
  zstring resStr;
1397
812
  unicode::normalization::type normType;
812
  unicode::normalization::type normType;
1398
813
#ifndef ZORBA_NO_ICU
1399
813
  bool success;
814
  bool success;
1400
815
#endif /* ZORBA_NO_ICU */
1401
814
816
1402
815
  PlanIteratorState* state;
817
  PlanIteratorState* state;
1403
816
  DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
818
  DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1404
@@ -860,10 +862,10 @@
1405
860
    }
862
    }
1406
861
863
1407
862
    item0->getStringValue2(resStr);
864
    item0->getStringValue2(resStr);
1409
863
#ifndef ZORBA_NO_UNICODE
865
#ifndef ZORBA_NO_ICU
1410
864
    success = utf8::normalize(resStr, normType, &resStr);
866
    success = utf8::normalize(resStr, normType, &resStr);
1411
865
    ZORBA_ASSERT(success);
867
    ZORBA_ASSERT(success);
1413
866
#endif//#ifndef ZORBA_NO_UNICODE
868
#endif//#ifndef ZORBA_NO_ICU
1414
867
    STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state );
869
    STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state );
1415
868
  }
870
  }
1416
869
  else
871
  else
1417
@@ -992,7 +994,7 @@
1418
992
        trans_map[ *map_i ] = *trans_i;
994
        trans_map[ *map_i ] = *trans_i;
1419
993
995
1420
994
      for ( ; map_i != map_end; ++map_i )
996
      for ( ; map_i != map_end; ++map_i )
1422
995
        trans_map[ *map_i ] = ~0;
997
        trans_map[ *map_i ] = static_cast<unicode::code_point>( ~0 );
1423
996
    }
998
    }
1424
997
999
1425
998
    utf8_string<zstring> u_result_string( result_string );
1000
    utf8_string<zstring> u_result_string( result_string );
1426
@@ -1007,7 +1009,7 @@
1427
1007
      cp_map_type::const_iterator const found_i = trans_map.find( cp );
1009
      cp_map_type::const_iterator const found_i = trans_map.find( cp );
1428
1008
      if ( found_i != trans_map.end() ) {
1010
      if ( found_i != trans_map.end() ) {
1429
1009
        cp = found_i->second;
1011
        cp = found_i->second;
1431
1010
        if ( cp == ~0 )
1012
        if ( cp == static_cast<unicode::code_point>( ~0 ) )
1432
1011
          continue;
1013
          continue;
1433
1012
      }
1014
      }
1434
1013
      u_result_string += cp;
1015
      u_result_string += cp;
1435
@@ -1795,16 +1797,33 @@
1436
1795
                          int &utf8start,
1797
                          int &utf8start,
1437
1796
                          unsigned int &bytestart,
1798
                          unsigned int &bytestart,
1438
1797
                          int utf8end,
1799
                          int utf8end,
1439
1800
                          unsigned int byteend,
1440
1798
                          zstring &out)
1801
                          zstring &out)
1441
1799
{
1802
{
1442
1803
#ifndef ZORBA_NO_ICU
1443
1800
  utf8::size_type clen;
1804
  utf8::size_type clen;
1451
1801
  while(utf8start < utf8end)
1805
  if(utf8end)
1452
1802
  {
1806
  {
1453
1803
    clen = utf8::char_length(*sin);
1807
    while(utf8start < utf8end)
1454
1804
    out.append(sin, clen);
1808
    {
1455
1805
    utf8start++;
1809
      clen = utf8::char_length(*sin);
1456
1806
    bytestart += clen;
1810
      if(clen == 0)
1457
1807
    sin += clen;
1811
        clen = 1;
1458
1812
      out.append(sin, clen);
1459
1813
      utf8start++;
1460
1814
      bytestart += clen;
1461
1815
      sin += clen;
1462
1816
    }
1463
1817
  }
1464
1818
  else
1465
1819
#endif
1466
1820
  {
1467
1821
    if(!utf8end)
1468
1822
      utf8end = byteend;
1469
1823
    out.append(sin, utf8end-bytestart);
1470
1824
    sin += utf8end-bytestart;
1471
1825
    utf8start = utf8end;
1472
1826
    bytestart = utf8end;
1473
1808
  }
1827
  }
1474
1809
}
1828
}
1475
1810
1829
1476
@@ -1812,6 +1831,7 @@
1477
1812
                               int &match_end1,
1831
                               int &match_end1,
1478
1813
                               unsigned int &match_end1_bytes,
1832
                               unsigned int &match_end1_bytes,
1479
1814
                               int match_start2,
1833
                               int match_start2,
1480
1834
                               unsigned int match_start2_bytes,
1481
1815
                               const char *&strin)
1835
                               const char *&strin)
1482
1816
{
1836
{
1483
1817
  store::Item_t non_match_elem;
1837
  store::Item_t non_match_elem;
1484
@@ -1833,7 +1853,7 @@
1485
1833
  //  utf8_it++;
1853
  //  utf8_it++;
1486
1834
  //  match_end1++;
1854
  //  match_end1++;
1487
1835
  //}
1855
  //}
1489
1836
  copyUtf8Chars(strin, match_end1, match_end1_bytes, match_start2, non_match_str);
1856
  copyUtf8Chars(strin, match_end1, match_end1_bytes, match_start2, match_start2_bytes, non_match_str);
1490
1837
  store::Item_t non_match_text_item;
1857
  store::Item_t non_match_text_item;
1491
1838
  GENV_ITEMFACTORY->createTextNode(non_match_text_item, non_match_elem, non_match_str);
1858
  GENV_ITEMFACTORY->createTextNode(non_match_text_item, non_match_elem, non_match_str);
1492
1839
}
1859
}
1493
@@ -1864,19 +1884,31 @@
1494
1864
      i--;
1884
      i--;
1495
1865
      break;
1885
      break;
1496
1866
    }
1886
    }
1497
1887
#ifndef ZORBA_NO_ICU
1498
1867
    match_startg = rx.get_match_start(i+1);
1888
    match_startg = rx.get_match_start(i+1);
1499
1868
    if((match_startg < 0) && (gparent < 0))
1889
    if((match_startg < 0) && (gparent < 0))
1500
1869
      continue;
1890
      continue;
1501
1891
#else
1502
1892
    int temp_endg;
1503
1893
    match_startg = -1;
1504
1894
    temp_endg = -1;
1505
1895
    if(!rx.get_match_start_end_bytes(i+1, &match_startg, &temp_endg) && (gparent < 0))
1506
1896
      continue;
1507
1897
#endif
1508
1870
    if(match_endgood < match_startg)
1898
    if(match_endgood < match_startg)
1509
1871
    {
1899
    {
1510
1872
      //add non-group match text
1900
      //add non-group match text
1511
1873
      zstring                non_group_str;
1901
      zstring                non_group_str;
1512
1874
1902
1514
1875
      copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_startg, non_group_str);
1903
      copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_startg, 0, non_group_str);
1515
1876
      store::Item_t non_group_text_item;
1904
      store::Item_t non_group_text_item;
1516
1877
      GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent.getp(), non_group_str);
1905
      GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent.getp(), non_group_str);
1517
1878
    }
1906
    }
1518
1907
#ifndef ZORBA_NO_ICU
1519
1879
    match_endg = rx.get_match_end(i+1);
1908
    match_endg = rx.get_match_end(i+1);
1520
1909
#else
1521
1910
    match_endg = temp_endg;
1522
1911
#endif
1523
1880
    //add group match text
1912
    //add group match text
1524
1881
    GENV_ITEMFACTORY->createQName(group_element_name,
1913
    GENV_ITEMFACTORY->createQName(group_element_name,
1525
1882
                                  static_context::W3C_FN_NS, "fn", "group");
1914
                                  static_context::W3C_FN_NS, "fn", "group");
1526
@@ -1907,7 +1939,7 @@
1527
1907
    }
1939
    }
1528
1908
    zstring                group_str;
1940
    zstring                group_str;
1529
1909
1941
1531
1910
    copyUtf8Chars(sin, match_startg, match_end1_bytes, match_endg, group_str);
1942
    copyUtf8Chars(sin, match_startg, match_end1_bytes, match_endg, 0, group_str);
1532
1911
    store::Item_t group_text_item;
1943
    store::Item_t group_text_item;
1533
1912
    GENV_ITEMFACTORY->createTextNode(group_text_item, group_elem.getp(), group_str);
1944
    GENV_ITEMFACTORY->createTextNode(group_text_item, group_elem.getp(), group_str);
1534
1913
  }
1945
  }
1535
@@ -1916,7 +1948,7 @@
1536
1916
  {
1948
  {
1537
1917
    zstring                non_group_str;
1949
    zstring                non_group_str;
1538
1918
1950
1540
1919
    copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_end2, non_group_str);
1951
    copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_end2, 0, non_group_str);
1541
1920
    store::Item_t non_group_text_item;
1952
    store::Item_t non_group_text_item;
1542
1921
    GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent, non_group_str);
1953
    GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent, non_group_str);
1543
1922
  }
1954
  }
1544
@@ -2144,8 +2176,14 @@
1545
2144
      reachedEnd = false;
2176
      reachedEnd = false;
1546
2145
      while(rx.find_next_match(&reachedEnd))
2177
      while(rx.find_next_match(&reachedEnd))
1547
2146
      {
2178
      {
1550
2147
        int    match_start2 = rx.get_match_start();
2179
        int    match_start2;
1551
2148
        int    match_end2 = rx.get_match_end();
2180
        int    match_end2;
1552
2181
#ifndef ZORBA_NO_ICU
1553
2182
        match_start2 = rx.get_match_start();
1554
2183
        match_end2 = rx.get_match_end();
1555
2184
#else
1556
2185
        rx.get_match_start_end_bytes(0, &match_start2, &match_end2);
1557
2186
#endif
1558
2149
        ZORBA_ASSERT(match_start2 >= 0);
2187
        ZORBA_ASSERT(match_start2 >= 0);
1559
2150
2188
1560
2151
        if(is_input_stream && reachedEnd && !instream->eof())
2189
        if(is_input_stream && reachedEnd && !instream->eof())
1561
@@ -2157,7 +2195,7 @@
1562
2157
        //construct the fn:non-match
2195
        //construct the fn:non-match
1563
2158
        if(match_start2 > match_end1)
2196
        if(match_start2 > match_end1)
1564
2159
        {
2197
        {
1566
2160
          addNonMatchElement(result, match_end1, match_end1_bytes, match_start2, instr);
2198
          addNonMatchElement(result, match_end1, match_end1_bytes, match_start2, 0, instr);
1567
2161
        }
2199
        }
1568
2162
2200
1569
2163
        //construct the fn:match
2201
        //construct the fn:match
1570
@@ -2165,7 +2203,7 @@
1571
2165
        match_end1 = match_end2;
2203
        match_end1 = match_end2;
1572
2166
      }
2204
      }
1573
2167
2205
1575
2168
      if(is_input_stream && reachedEnd && !instream->eof())
2206
      if(is_input_stream && !instream->eof())
1576
2169
      {
2207
      {
1577
2170
        //load some more data, maybe the match will be different
2208
        //load some more data, maybe the match will be different
1578
2171
        if(match_end1_bytes)
2209
        if(match_end1_bytes)
1579
@@ -2213,7 +2251,7 @@
1580
2213
      else
2251
      else
1581
2214
      {
2252
      {
1582
2215
        if(match_end1_bytes < streambuf_read)
2253
        if(match_end1_bytes < streambuf_read)
1584
2216
          addNonMatchElement(result, match_end1, match_end1_bytes, streambuf_read, instr);
2254
          addNonMatchElement(result, match_end1, match_end1_bytes, 0, streambuf_read, instr);
1585
2217
        if(is_input_stream && instream->eof())
2255
        if(is_input_stream && instream->eof())
1586
2218
          reachedEnd = true;
2256
          reachedEnd = true;
1587
2219
      }
2257
      }
1588
2220
2258
1589
=== modified file 'src/store/api/store.h'
1590
--- src/store/api/store.h	2012-03-28 05:19:57 +0000
1591
+++ src/store/api/store.h	2012-04-07 00:45:26 +0000
1592
@@ -16,7 +16,7 @@
1593
16
#ifndef ZORBA_STORE_STORE_H
16
#ifndef ZORBA_STORE_STORE_H
1594
17
#define ZORBA_STORE_STORE_H
17
#define ZORBA_STORE_STORE_H
1595
18
18
1597
19
#include <zorba/config.h>
19
#include "zorba/config.h"
1598
20
#include "zorbatypes/schema_types.h"
20
#include "zorbatypes/schema_types.h"
1599
21
21
1600
22
#include "store/api/shared_types.h"
22
#include "store/api/shared_types.h"
1601
23
23
1602
=== modified file 'src/store/naive/simple_store.h'
1603
--- src/store/naive/simple_store.h	2012-03-28 23:58:23 +0000
1604
+++ src/store/naive/simple_store.h	2012-04-07 00:45:26 +0000
1605
@@ -16,7 +16,11 @@
1606
16
#ifndef ZORBA_SIMPLE_STORE
16
#ifndef ZORBA_SIMPLE_STORE
1607
17
#define ZORBA_SIMPLE_STORE
17
#define ZORBA_SIMPLE_STORE
1608
18
18
1610
19
#include "store.h"
19
#include "store/naive/store.h"
1611
20
1612
21
#include "store/naive/node_factory.h"
1613
22
#include "store/naive/pul_primitive_factory.h"
1614
23
#include "store/naive/tree_id_generator.h"
1615
20
24
1616
21
namespace zorba {
25
namespace zorba {
1617
22
namespace simplestore {
26
namespace simplestore {
1618
@@ -72,7 +76,7 @@
1619
72
76
1620
73
  NodeFactory* createNodeFactory() const;
77
  NodeFactory* createNodeFactory() const;
1621
74
78
1623
75
  void destroyNodeFactory(NodeFactory*) const;
79
  void destroyNodeFactory(zorba::simplestore::NodeFactory*) const;
1624
76
80
1625
77
  store::ItemFactory* createItemFactory() const;
81
  store::ItemFactory* createItemFactory() const;
1626
78
82
1627
@@ -84,7 +88,7 @@
1628
84
88
1629
85
  PULPrimitiveFactory* createPULFactory() const;
89
  PULPrimitiveFactory* createPULFactory() const;
1630
86
90
1632
87
  void destroyPULFactory(PULPrimitiveFactory*) const;
91
  void destroyPULFactory(zorba::simplestore::PULPrimitiveFactory*) const;
1633
88
92
1634
89
  CollectionSet* createCollectionSet() const;
93
  CollectionSet* createCollectionSet() const;
1635
90
94
1636
91
95
1637
=== modified file 'src/store/naive/store.cpp'
1638
--- src/store/naive/store.cpp	2012-03-28 22:09:36 +0000
1639
+++ src/store/naive/store.cpp	2012-04-07 00:45:26 +0000
1640
@@ -33,7 +33,7 @@
1641
33
33
1642
34
#include "properties.h"
34
#include "properties.h"
1643
35
#include "string_pool.h"
35
#include "string_pool.h"
1645
36
#include "store.h"
36
#include "simple_store.h"
1646
37
#include "simple_temp_seq.h"
37
#include "simple_temp_seq.h"
1647
38
#include "simple_lazy_temp_seq.h"
38
#include "simple_lazy_temp_seq.h"
1648
39
#include "collection.h"
39
#include "collection.h"
1649
40
40
1650
=== modified file 'src/store/naive/store.h'
1651
--- src/store/naive/store.h	2012-03-28 22:09:36 +0000
1652
+++ src/store/naive/store.h	2012-04-07 00:45:26 +0000
1653
@@ -16,10 +16,18 @@
1654
16
#ifndef ZORBA_SIMPLESTORE_STORE_H
16
#ifndef ZORBA_SIMPLESTORE_STORE_H
1655
17
#define ZORBA_SIMPLESTORE_STORE_H
17
#define ZORBA_SIMPLESTORE_STORE_H
1656
18
18
1657
19
#include "store/api/store.h"
1658
20
1659
19
#include "shared_types.h"
21
#include "shared_types.h"
1660
20
#include "store_defs.h"
22
#include "store_defs.h"
1661
21
#include "hashmap_nodep.h"
23
#include "hashmap_nodep.h"
1662
22
#include "tree_id.h"
24
#include "tree_id.h"
1663
25
#include "store/util/hashmap_stringbuf.h"
1664
26
#include "zorbautils/mutex.h"
1665
27
#include "zorbautils/lock.h"
1666
28
#include "zorbautils/hashmap.h"
1667
29
#include "zorbautils/hashmap_itemp.h"
1668
30
#include "zorbautils/hashmap_zstring_nonserializable.h"
1669
23
31
1670
24
#if (defined (WIN32) || defined (WINCE))
32
#if (defined (WIN32) || defined (WINCE))
1671
25
#include "node_items.h"
33
#include "node_items.h"
1672
@@ -28,14 +36,7 @@
1673
28
#include "store/api/ic.h"
36
#include "store/api/ic.h"
1674
29
#endif
37
#endif
1675
30
38
1684
31
#include "store/api/store.h"
39
using namespace zorba;
1677
32
1678
33
#include "store/util/hashmap_stringbuf.h"
1679
34
1680
35
#include "zorbautils/mutex.h"
1681
36
#include "zorbautils/lock.h"
1682
37
#include "zorbautils/hashmap_itemp.h"
1683
38
#include "zorbautils/hashmap_zstring_nonserializable.h"
1685
39
40
1686
40
namespace zorba
41
namespace zorba
1687
41
{
42
{
1688
@@ -63,9 +64,9 @@
1689
63
class TreeIdGeneratorFactory;
64
class TreeIdGeneratorFactory;
1690
64
class TreeIdGenerator;
65
class TreeIdGenerator;
1691
65
66
1695
66
typedef zorba::HashMapZString<XmlNode_t> DocumentSet;
67
typedef HashMapZString<XmlNode_t> DocumentSet;
1696
67
typedef ItemPointerHashMap<store::Index_t> IndexSet;
68
typedef zorba::ItemPointerHashMap<store::Index_t> IndexSet;
1697
68
typedef ItemPointerHashMap<store::IC_t> ICSet;
69
typedef zorba::ItemPointerHashMap<store::IC_t> ICSet;
1698
69
70
1699
70
71
1700
71
72
1701
72
73
1702
=== modified file 'src/system/globalenv.cpp'
1703
--- src/system/globalenv.cpp	2012-03-28 05:19:57 +0000
1704
+++ src/system/globalenv.cpp	2012-04-07 00:45:26 +0000
1705
@@ -17,11 +17,11 @@
1706
17
17
1707
18
#include "common/common.h"
18
#include "common/common.h"
1708
19
19
1710
20
#ifndef ZORBA_NO_UNICODE
20
#ifndef ZORBA_NO_ICU
1711
21
# include <unicode/uclean.h>
21
# include <unicode/uclean.h>
1712
22
# include <unicode/utypes.h>
22
# include <unicode/utypes.h>
1713
23
# include <unicode/udata.h>
23
# include <unicode/udata.h>
1715
24
#endif /* ZORBA_NO_UNICODE */
24
#endif /* ZORBA_NO_ICU */
1716
25
25
1717
26
#ifdef ZORBA_WITH_BIG_INTEGER
26
#ifdef ZORBA_WITH_BIG_INTEGER
1718
27
# include "zorbatypes/m_apm.h"
27
# include "zorbatypes/m_apm.h"
1719
@@ -208,7 +208,7 @@
1720
208
  // from one thread only
208
  // from one thread only
1721
209
  // see http://www.icu-project.org/userguide/design.html#Init_and_Termination
209
  // see http://www.icu-project.org/userguide/design.html#Init_and_Termination
1722
210
  // and http://www.icu-project.org/apiref/icu4c/uclean_8h.html
210
  // and http://www.icu-project.org/apiref/icu4c/uclean_8h.html
1724
211
#ifndef ZORBA_NO_UNICODE
211
#ifndef ZORBA_NO_ICU
1725
212
#  if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE)
212
#  if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE)
1726
213
  {
213
  {
1727
214
    TCHAR    self_path[1024];
214
    TCHAR    self_path[1024];
1728
@@ -238,13 +238,13 @@
1729
238
    udata_setCommonData(icu_appdata, &data_err);
238
    udata_setCommonData(icu_appdata, &data_err);
1730
239
    ZORBA_ASSERT(data_err == U_ZERO_ERROR);
239
    ZORBA_ASSERT(data_err == U_ZERO_ERROR);
1731
240
  
240
  
1733
241
      //  u_setDataDirectory(self_path);
241
    // u_setDataDirectory(self_path);
1734
242
  }
242
  }
1735
243
#  endif
243
#  endif
1736
244
  UErrorCode lICUInitStatus = U_ZERO_ERROR;
244
  UErrorCode lICUInitStatus = U_ZERO_ERROR;
1737
245
  u_init(&lICUInitStatus);
245
  u_init(&lICUInitStatus);
1738
246
  ZORBA_ASSERT(lICUInitStatus == U_ZERO_ERROR);
246
  ZORBA_ASSERT(lICUInitStatus == U_ZERO_ERROR);
1740
247
#endif//ifndef ZORBA_NO_UNICODE
247
#endif /* ZORBA_NO_ICU */
1741
248
}
248
}
1742
249
249
1743
250
250
1744
@@ -256,12 +256,12 @@
1745
256
  // releases statically initialized memory and prevents
256
  // releases statically initialized memory and prevents
1746
257
  // valgrind from reporting those problems at the end
257
  // valgrind from reporting those problems at the end
1747
258
  // see http://www.icu-project.org/apiref/icu4c/uclean_8h.html#93f27d0ddc7c196a1da864763f2d8920
258
  // see http://www.icu-project.org/apiref/icu4c/uclean_8h.html#93f27d0ddc7c196a1da864763f2d8920
1749
259
#ifndef ZORBA_NO_UNICODE
259
#ifndef ZORBA_NO_ICU
1750
260
  u_cleanup();
260
  u_cleanup();
1751
261
# if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE)
261
# if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE)
1752
262
  delete[] icu_appdata;
262
  delete[] icu_appdata;
1753
263
# endif
263
# endif
1755
264
#endif//ifndef ZORBA_NO_UNICODE
264
#endif /* ZORBA_NO_ICU */
1756
265
}
265
}
1757
266
266
1758
267
267
1759
268
268
1760
=== modified file 'src/unit_tests/CMakeLists.txt'
1761
--- src/unit_tests/CMakeLists.txt	2012-03-28 05:19:57 +0000
1762
+++ src/unit_tests/CMakeLists.txt	2012-04-07 00:45:26 +0000
1763
@@ -29,9 +29,9 @@
1764
29
    tokenizer.cpp)
29
    tokenizer.cpp)
1765
30
ENDIF (NOT ZORBA_NO_FULL_TEXT)
30
ENDIF (NOT ZORBA_NO_FULL_TEXT)
1766
31
31
1768
32
IF (NOT ZORBA_NO_UNICODE)
32
IF (NOT ZORBA_NO_ICU)
1769
33
  LIST (APPEND UNIT_TEST_SRCS
33
  LIST (APPEND UNIT_TEST_SRCS
1770
34
    test_icu_streambuf.cpp)
34
    test_icu_streambuf.cpp)
1772
35
ENDIF (NOT ZORBA_NO_UNICODE)
35
ENDIF (NOT ZORBA_NO_ICU)
1773
36
36
1774
37
# vim:set et sw=2 tw=2:
37
# vim:set et sw=2 tw=2:
1775
38
38
1776
=== modified file 'src/unit_tests/string.cpp'
1777
--- src/unit_tests/string.cpp	2012-03-28 05:19:57 +0000
1778
+++ src/unit_tests/string.cpp	2012-04-07 00:45:26 +0000
1779
@@ -569,6 +569,7 @@
1780
569
  ASSERT_TRUE( t == s );
569
  ASSERT_TRUE( t == s );
1781
570
}
570
}
1782
571
571
1783
572
#ifndef ZORBA_NO_ICU
1784
572
template<class StringType>
573
template<class StringType>
1785
573
static void test_to_string_from_wchar_t() {
574
static void test_to_string_from_wchar_t() {
1786
574
  wchar_t const w[] = L"hello";
575
  wchar_t const w[] = L"hello";
1787
@@ -578,6 +579,7 @@
1788
578
  for ( string::size_type i = 0; i < s.length(); ++i )
579
  for ( string::size_type i = 0; i < s.length(); ++i )
1789
579
    ASSERT_TRUE( s[i] == w[i] );
580
    ASSERT_TRUE( s[i] == w[i] );
1790
580
}
581
}
1791
582
#endif /* ZORBA_NO_ICU */
1792
581
583
1793
582
template<class StringType>
584
template<class StringType>
1794
583
static void test_to_upper() {
585
static void test_to_upper() {
1795
@@ -605,6 +607,7 @@
1796
605
  }
607
  }
1797
606
}
608
}
1798
607
609
1799
610
#ifndef ZORBA_NO_ICU
1800
608
static void test_to_wchar_t() {
611
static void test_to_wchar_t() {
1801
609
  string const s = "hello";
612
  string const s = "hello";
1802
610
  wchar_t *w;
613
  wchar_t *w;
1803
@@ -616,6 +619,7 @@
1804
616
    ASSERT_TRUE( w[i] == s[i] );
619
    ASSERT_TRUE( w[i] == s[i] );
1805
617
  delete[] w;
620
  delete[] w;
1806
618
}
621
}
1807
622
#endif /* ZORBA_NO_ICU */
1808
619
623
1809
620
static void test_trim_start() {
624
static void test_trim_start() {
1810
621
  char const *s;
625
  char const *s;
1811
@@ -873,16 +877,20 @@
1812
873
  test_to_string_from_utf8<zstring>();
877
  test_to_string_from_utf8<zstring>();
1813
874
  test_to_string_from_utf8<zstring_p>();
878
  test_to_string_from_utf8<zstring_p>();
1814
875
879
1815
880
#ifndef ZORBA_NO_ICU
1816
876
  test_to_string_from_wchar_t<string>();
881
  test_to_string_from_wchar_t<string>();
1817
877
  test_to_string_from_wchar_t<zstring>();
882
  test_to_string_from_wchar_t<zstring>();
1818
878
  test_to_string_from_wchar_t<zstring_p>();
883
  test_to_string_from_wchar_t<zstring_p>();
1819
884
#endif /* ZORBA_NO_ICU */
1820
879
885
1821
880
  test_to_upper<string>();
886
  test_to_upper<string>();
1822
881
  test_to_upper<zstring>();
887
  test_to_upper<zstring>();
1823
882
  test_to_upper<zstring_p>();
888
  test_to_upper<zstring_p>();
1824
883
  test_to_upper<String>();
889
  test_to_upper<String>();
1825
884
890
1826
891
#ifndef ZORBA_NO_ICU
1827
885
  test_to_wchar_t();
892
  test_to_wchar_t();
1828
893
#endif /* ZORBA_NO_ICU */
1829
886
894
1830
887
  test_trim_start();
895
  test_trim_start();
1831
888
  test_trim_end();
896
  test_trim_end();
1832
889
897
1833
=== modified file 'src/unit_tests/unit_test_list.h'
1834
--- src/unit_tests/unit_test_list.h	2012-03-28 05:19:57 +0000
1835
+++ src/unit_tests/unit_test_list.h	2012-04-07 00:45:26 +0000
1836
@@ -36,9 +36,9 @@
1837
36
    /**
36
    /**
1838
37
     * ADD NEW UNIT TESTS HERE
37
     * ADD NEW UNIT TESTS HERE
1839
38
     */
38
     */
1841
39
#ifndef ZORBA_NO_UNICODE
39
#ifndef ZORBA_NO_ICU
1842
40
    int test_icu_streambuf( int, char*[] );
40
    int test_icu_streambuf( int, char*[] );
1844
41
#endif /* ZORBA_NO_UNICODE */
41
#endif /* ZORBA_NO_ICU */
1845
42
    int json_parser( int, char*[] );
42
    int json_parser( int, char*[] );
1846
43
43
1847
44
    void initializeTestList();
44
    void initializeTestList();
1848
45
45
1849
=== modified file 'src/unit_tests/unit_tests.cpp'
1850
--- src/unit_tests/unit_tests.cpp	2012-03-28 05:19:57 +0000
1851
+++ src/unit_tests/unit_tests.cpp	2012-04-07 00:45:26 +0000
1852
@@ -39,9 +39,9 @@
1853
39
    void initializeTestList() {
39
    void initializeTestList() {
1854
40
      libunittests["string"] = test_string;
40
      libunittests["string"] = test_string;
1855
41
      libunittests["uri"] = runUriTest;
41
      libunittests["uri"] = runUriTest;
1857
42
#ifndef ZORBA_NO_UNICODE
42
#ifndef ZORBA_NO_ICU
1858
43
      libunittests["icu_streambuf"] = test_icu_streambuf;
43
      libunittests["icu_streambuf"] = test_icu_streambuf;
1860
44
#endif /* ZORBA_NO_UNICODE */
44
#endif /* ZORBA_NO_ICU */
1861
45
      libunittests["json_parser"] = json_parser;
45
      libunittests["json_parser"] = json_parser;
1862
46
      libunittests["unique_ptr"] = test_unique_ptr;
46
      libunittests["unique_ptr"] = test_unique_ptr;
1863
47
#ifndef ZORBA_NO_FULL_TEXT
47
#ifndef ZORBA_NO_FULL_TEXT
1864
48
48
1865
=== modified file 'src/util/CMakeLists.txt'
1866
--- src/util/CMakeLists.txt	2012-03-28 05:19:57 +0000
1867
+++ src/util/CMakeLists.txt	2012-04-07 00:45:26 +0000
1868
@@ -40,14 +40,14 @@
1869
40
  LIST(APPEND UTIL_SRCS mmap_file.cpp)
40
  LIST(APPEND UTIL_SRCS mmap_file.cpp)
1870
41
ENDIF(ZORBA_WITH_FILE_ACCESS)
41
ENDIF(ZORBA_WITH_FILE_ACCESS)
1871
42
42
1873
43
IF(ZORBA_NO_UNICODE)
43
IF(ZORBA_NO_ICU)
1874
44
  LIST(APPEND UTIL_SRCS
44
  LIST(APPEND UTIL_SRCS
1876
45
    regex_ascii.cpp
45
    regex_xquery.cpp
1877
46
    passthru_streambuf.cpp)
46
    passthru_streambuf.cpp)
1879
47
ELSE(ZORBA_NO_UNICODE)
47
ELSE(ZORBA_NO_ICU)
1880
48
  LIST(APPEND UTIL_SRCS
48
  LIST(APPEND UTIL_SRCS
1881
49
    icu_streambuf.cpp)
49
    icu_streambuf.cpp)
1883
50
ENDIF(ZORBA_NO_UNICODE)
50
ENDIF(ZORBA_NO_ICU)
1884
51
51
1885
52
HEADER_GROUP_SUBFOLDER(UTIL_SRCS fx)
52
HEADER_GROUP_SUBFOLDER(UTIL_SRCS fx)
1886
53
HEADER_GROUP_SUBFOLDER(UTIL_SRCS win32)
53
HEADER_GROUP_SUBFOLDER(UTIL_SRCS win32)
1887
54
54
1888
=== modified file 'src/util/icu_streambuf.h'
1889
--- src/util/icu_streambuf.h	2012-02-04 01:26:18 +0000
1890
+++ src/util/icu_streambuf.h	2012-04-07 00:45:26 +0000
1891
@@ -17,6 +17,7 @@
1892
17
#ifndef ZORBA_ICU_STREAMBUF_H
17
#ifndef ZORBA_ICU_STREAMBUF_H
1893
18
#define ZORBA_ICU_STREAMBUF_H
18
#define ZORBA_ICU_STREAMBUF_H
1894
19
19
1895
20
#include <unicode/ucnv.h>
1896
20
#include <zorba/transcode_stream.h>
21
#include <zorba/transcode_stream.h>
1897
21
22
1898
22
#include "util/utf8_util.h"
23
#include "util/utf8_util.h"
1899
23
24
1900
=== modified file 'src/util/passthru_streambuf.cpp'
1901
--- src/util/passthru_streambuf.cpp	2012-02-04 01:26:18 +0000
1902
+++ src/util/passthru_streambuf.cpp	2012-04-07 00:45:26 +0000
1903
@@ -14,8 +14,8 @@
1904
14
 * limitations under the License.
14
 * limitations under the License.
1905
15
 */
15
 */
1906
16
16
1907
17
#include "stdafx.h"
1908
17
#include "passthru_streambuf.h"
18
#include "passthru_streambuf.h"
1909
18
1910
19
using namespace std;
19
using namespace std;
1911
20
20
1912
21
namespace zorba {
21
namespace zorba {
1913
@@ -47,7 +47,7 @@
1914
47
}
47
}
1915
48
48
1916
49
bool passthru_streambuf::is_supported( char const *cc_charset ) {
49
bool passthru_streambuf::is_supported( char const *cc_charset ) {
1918
50
  return !is_necessary( charset );
50
  return !is_necessary( cc_charset );
1919
51
}
51
}
1920
52
52
1921
53
passthru_streambuf::pos_type
53
passthru_streambuf::pos_type
1922
54
54
1923
=== modified file 'src/util/passthru_streambuf.h'
1924
--- src/util/passthru_streambuf.h	2012-02-02 18:37:24 +0000
1925
+++ src/util/passthru_streambuf.h	2012-04-07 00:45:26 +0000
1926
@@ -17,8 +17,9 @@
1927
17
#ifndef ZORBA_PASSTHRU_STREAMBUF_H
17
#ifndef ZORBA_PASSTHRU_STREAMBUF_H
1928
18
#define ZORBA_PASSTHRU_STREAMBUF_H
18
#define ZORBA_PASSTHRU_STREAMBUF_H
1929
19
19
1932
20
#include <zorba/transcode_streambuf.h>
20
#include <zorba/transcode_stream.h>
1933
21
21
#include "zorbatypes/zstring.h"
1934
22
#include "util/ascii_util.h"
1935
22
namespace zorba {
23
namespace zorba {
1936
23
24
1937
24
///////////////////////////////////////////////////////////////////////////////
25
///////////////////////////////////////////////////////////////////////////////
1938
@@ -48,6 +49,13 @@
1939
48
   * @return \c true only if the character encoding is supported.
49
   * @return \c true only if the character encoding is supported.
1940
49
   */
50
   */
1941
50
  static bool is_supported( char const *charset );
51
  static bool is_supported( char const *charset );
1942
52
  static bool is_necessary( char const *cc_charset );
1943
53
1944
54
  typedef std::streambuf::char_type char_type;
1945
55
  typedef std::streambuf::int_type int_type;
1946
56
  typedef std::streambuf::off_type off_type;
1947
57
  typedef std::streambuf::pos_type pos_type;
1948
58
  typedef std::streambuf::traits_type traits_type;
1949
51
59
1950
52
protected:
60
protected:
1951
53
  void imbue( std::locale const& );
61
  void imbue( std::locale const& );
1952
54
62
1953
=== modified file 'src/util/regex.cpp'
1954
--- src/util/regex.cpp	2012-03-28 05:19:57 +0000
1955
+++ src/util/regex.cpp	2012-04-07 00:45:26 +0000
1956
@@ -15,8 +15,6 @@
1957
15
 */
15
 */
1958
16
#include "stdafx.h"
16
#include "stdafx.h"
1959
17
17
1960
18
#include "regex.h"
1961
19
1962
20
#include <cstring>
18
#include <cstring>
1963
21
#include <vector>
19
#include <vector>
1964
22
20
1965
@@ -28,13 +26,13 @@
1966
28
26
1967
29
#include "ascii_util.h"
27
#include "ascii_util.h"
1968
30
#include "cxx_util.h"
28
#include "cxx_util.h"
1969
29
#include "regex.h"
1970
31
#include "stl_util.h"
30
#include "stl_util.h"
1971
32
31
1972
33
#define INVALID_RE_EXCEPTION(...) \
32
#define INVALID_RE_EXCEPTION(...) \
1973
34
  XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS( __VA_ARGS__ ) )
33
  XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS( __VA_ARGS__ ) )
1974
35
34
1977
36
35
#ifndef ZORBA_NO_ICU
1976
37
#ifndef ZORBA_NO_UNICODE
1978
38
# include <unicode/uversion.h>
36
# include <unicode/uversion.h>
1979
39
U_NAMESPACE_USE
37
U_NAMESPACE_USE
1980
40
38
1981
@@ -103,6 +101,7 @@
1982
103
101
1983
104
  bool got_backslash = false;
102
  bool got_backslash = false;
1984
105
  bool in_char_class = false;           // within [...]
103
  bool in_char_class = false;           // within [...]
1985
104
  bool is_first_char = true;            // to check ^ placement
1986
106
105
1987
107
  bool in_backref = false;              // '\'[1-9][0-9]*
106
  bool in_backref = false;              // '\'[1-9][0-9]*
1988
108
  unsigned backref_no = 0;              // 1-based
107
  unsigned backref_no = 0;              // 1-based
1989
@@ -231,6 +230,8 @@
1990
231
            ++open_cap_subs;
230
            ++open_cap_subs;
1991
232
            cap_sub.push_back( true );
231
            cap_sub.push_back( true );
1992
233
            cur_cap_sub = cap_sub.size();
232
            cur_cap_sub = cap_sub.size();
1993
233
            is_first_char = true;
1994
234
            goto append;
1995
234
          }
235
          }
1996
235
          break;
236
          break;
1997
236
        case ')':
237
        case ')':
1998
@@ -245,8 +246,10 @@
1999
245
        case '[':
246
        case '[':
2000
246
          if ( q_flag )
247
          if ( q_flag )
2001
247
            *icu_re += '\\';
248
            *icu_re += '\\';
2003
248
          else
249
          else {
2004
249
            in_char_class = true;
250
            in_char_class = true;
2005
251
            goto append;
2006
252
          }
2007
250
          break;
253
          break;
2008
251
        case ']':
254
        case ']':
2009
252
          if ( q_flag )
255
          if ( q_flag )
2010
@@ -254,6 +257,19 @@
2011
254
          else
257
          else
2012
255
            in_char_class = false;
258
            in_char_class = false;
2013
256
          break;
259
          break;
2014
260
        case '^':
2015
261
          if ( q_flag )
2016
262
            *icu_re += '\\';
2017
263
          else if ( !is_first_char && !in_char_class )
2018
264
            throw INVALID_RE_EXCEPTION( xq_re, ZED( UnescapedChar_3 ), *xq_c );
2019
265
          break;
2020
266
        case '|':
2021
267
          if ( q_flag )
2022
268
            *icu_re += '\\';
2023
269
          else {
2024
270
            is_first_char = true;
2025
271
            goto append;
2026
272
          }
2027
257
        default:
273
        default:
2028
258
          if ( x_flag && ascii::is_space( *xq_c ) ) {
274
          if ( x_flag && ascii::is_space( *xq_c ) ) {
2029
259
            if ( !in_char_class )
275
            if ( !in_char_class )
2030
@@ -265,37 +281,42 @@
2031
265
            //
281
            //
2032
266
            *icu_re += '\\';
282
            *icu_re += '\\';
2033
267
          }
283
          }
2036
268
      }
284
      } // switch
2037
269
    }
285
    } // else
2038
286
    is_first_char = false;
2039
287
append:
2040
270
    *icu_re += *xq_c;
288
    *icu_re += *xq_c;
2041
271
  } // FOR_EACH
289
  } // FOR_EACH
2042
272
290
2056
273
  if ( i_flag ) {
291
  if ( !q_flag ) {
2057
274
    //
292
    if ( i_flag ) {
2058
275
    // XQuery 3.0 F&O 5.6.1.1: All other constructs are unaffected by the "i"
293
      //
2059
276
    // flag.  For example, "\p{Lu}" continues to match upper-case letters only.
294
      // XQuery 3.0 F&O 5.6.1.1: All other constructs are unaffected by the "i"
2060
277
    //
295
      // flag.  For example, "\p{Lu}" continues to match upper-case letters
2061
278
    // However, ICU lower-cases everything for the 'i' flag; hence we have to
296
      // only.
2062
279
    // turn off the 'i' flag for just the \p{Lu}.
297
      //
2063
280
    //
298
      // However, ICU lower-cases everything for the 'i' flag; hence we have to
2064
281
    // Note that the "6" and "12" below are correct since "\\" represents a
299
      // turn off the 'i' flag for just the \p{Lu}.
2065
282
    // single '\'.
300
      //
2066
283
    //
301
      // Note that the "6" and "12" below are correct since "\\" represents a
2067
284
    ascii::replace_all( *icu_re, "\\p{Lu}", 6, "(?-i:\\p{Lu})", 12 );
302
      // single '\'.
2068
285
  }
303
      //
2069
304
      ascii::replace_all( *icu_re, "\\p{Lu}", 6, "(?-i:\\p{Lu})", 12 );
2070
305
    }
2071
286
306
2084
287
  //
307
    //
2085
288
  // XML Schema Part 2 F.1.1: [Unicode Database] groups code points into a
308
    // XML Schema Part 2 F.1.1: [Unicode Database] groups code points into a
2086
289
  // number of blocks such as Basic Latin (i.e., ASCII), Latin-1 Supplement,
309
    // number of blocks such as Basic Latin (i.e., ASCII), Latin-1 Supplement,
2087
290
  // Hangul Jamo, CJK Compatibility, etc. The set containing all characters
310
    // Hangul Jamo, CJK Compatibility, etc. The set containing all characters
2088
291
  // that have block name X (with all white space stripped out), can be
311
    // that have block name X (with all white space stripped out), can be
2089
292
  // identified with a block escape \p{IsX}.
312
    // identified with a block escape \p{IsX}.
2090
293
  //
313
    //
2091
294
  // However, ICU uses \p{InX} rather than \p{IsX}.
314
    // However, ICU uses \p{InX} rather than \p{IsX}.
2092
295
  //
315
    //
2093
296
  // Note that the "5" below is correct since "\\" represents a single '\'.
316
    // Note that the "5" below is correct since "\\" represents a single '\'.
2094
297
  //
317
    //
2095
298
  ascii::replace_all( *icu_re, "\\p{Is", 5, "\\p{In", 5 );
318
    ascii::replace_all( *icu_re, "\\p{Is", 5, "\\p{In", 5 );
2096
319
  } // q_flag
2097
299
}
320
}
2098
300
321
2099
301
///////////////////////////////////////////////////////////////////////////////
322
///////////////////////////////////////////////////////////////////////////////
2100
@@ -442,11 +463,11 @@
2101
442
}
463
}
2102
443
464
2103
444
} // namespace unicode
465
} // namespace unicode
2109
445
466
} // namespace zorba
2110
446
}//namespace zorba
467
2111
447
468
///////////////////////////////////////////////////////////////////////////////
2112
448
469
2113
449
#else /* ZORBA_NO_UNICODE */
470
#else /* ZORBA_NO_ICU */
2114
450
471
2115
451
#include "zorbatypes/zstring.h"
472
#include "zorbatypes/zstring.h"
2116
452
473
2117
@@ -470,7 +491,7 @@
2118
470
    case 'i': flags |= REGEX_ASCII_CASE_INSENSITIVE; break;
491
    case 'i': flags |= REGEX_ASCII_CASE_INSENSITIVE; break;
2119
471
    case 's': flags |= REGEX_ASCII_DOTALL; break;
492
    case 's': flags |= REGEX_ASCII_DOTALL; break;
2120
472
    case 'm': flags |= REGEX_ASCII_MULTILINE; break;
493
    case 'm': flags |= REGEX_ASCII_MULTILINE; break;
2122
473
    case 'x': flags |= REGEX_ASCII_COMMENTS; break;
494
    case 'x': flags |= REGEX_ASCII_NO_WHITESPACE; break;
2123
474
    case 'q': flags |= REGEX_ASCII_LITERAL; break;
495
    case 'q': flags |= REGEX_ASCII_LITERAL; break;
2124
475
    default:
496
    default:
2125
476
      throw XQUERY_EXCEPTION( err::FORX0001, ERROR_PARAMS( *p ) );
497
      throw XQUERY_EXCEPTION( err::FORX0001, ERROR_PARAMS( *p ) );
2126
@@ -483,6 +504,7 @@
2127
483
void regex::compile( char const *pattern, char const *flags)
504
void regex::compile( char const *pattern, char const *flags)
2128
484
{
505
{
2129
485
  parsed_flags = parse_regex_flags(flags);
506
  parsed_flags = parse_regex_flags(flags);
2130
507
  regex_xquery::CRegexXQuery_parser regex_parser;
2131
486
  regex_matcher = regex_parser.parse(pattern, parsed_flags);
508
  regex_matcher = regex_parser.parse(pattern, parsed_flags);
2132
487
  if(!regex_matcher)
509
  if(!regex_matcher)
2133
488
    throw INVALID_RE_EXCEPTION(pattern);
510
    throw INVALID_RE_EXCEPTION(pattern);
2134
@@ -517,6 +539,8 @@
2135
517
bool regex::next_token( char const *s, size_type *pos, zstring *token,
539
bool regex::next_token( char const *s, size_type *pos, zstring *token,
2136
518
                  bool *matched)
540
                  bool *matched)
2137
519
{
541
{
2138
542
  if(!s[*pos])
2139
543
    return false;
2140
520
  bool  retval;
544
  bool  retval;
2141
521
  int   match_pos;
545
  int   match_pos;
2142
522
  int   matched_len;
546
  int   matched_len;
2143
@@ -528,14 +552,8 @@
2144
528
      token->assign(s+*pos, match_pos);
552
      token->assign(s+*pos, match_pos);
2145
529
    *pos += match_pos + matched_len;
553
    *pos += match_pos + matched_len;
2146
530
    if(matched)
554
    if(matched)
2155
531
      if(match_pos)
555
      *matched = true;
2156
532
        *matched = true;
556
    return true;
2149
533
      else
2150
534
        *matched = false;
2151
535
    if(match_pos)
2152
536
      return true;
2153
537
    else
2154
538
      return false;
2157
539
  }
557
  }
2158
540
  else
558
  else
2159
541
  {
559
  {
2160
@@ -544,7 +562,7 @@
2161
544
    *pos += strlen(s+*pos);
562
    *pos += strlen(s+*pos);
2162
545
    if(matched)
563
    if(matched)
2163
546
      *matched = false;
564
      *matched = false;
2165
547
    return s[*pos] != 0;
565
    return true;
2166
548
  }
566
  }
2167
549
}
567
}
2168
550
568
2169
@@ -554,13 +572,9 @@
2170
554
  int   matched_pos;
572
  int   matched_pos;
2171
555
  int   matched_len;
573
  int   matched_len;
2172
556
574
2176
557
  bool prev_align = regex_matcher->set_align_begin(true);
575
  retval = regex_matcher->match_anywhere(s, parsed_flags|REGEX_ASCII_WHOLE_MATCH, &matched_pos, &matched_len);
2174
558
  retval = regex_matcher->match_from(s, parsed_flags, &matched_pos, &matched_len);
2175
559
  regex_matcher->set_align_begin(prev_align);
2177
560
  if(!retval)
576
  if(!retval)
2178
561
    return false;
577
    return false;
2179
562
  if(matched_len != strlen(s))
2180
563
    return false;
2181
564
  return true;
578
  return true;
2182
565
}
579
}
2183
566
580
2184
@@ -587,14 +601,19 @@
2185
587
      //look for dollars
601
      //look for dollars
2186
588
      if(*temprepl == '\\')
602
      if(*temprepl == '\\')
2187
589
      {
603
      {
2191
590
        temprepl++;
604
        if(!(parsed_flags & REGEX_ASCII_LITERAL))
2192
591
        if(!*temprepl || (*temprepl != '\\') || (*temprepl != '$'))//Invalid replacement string.
605
        {
2193
592
          throw XQUERY_EXCEPTION( err::FORX0004, ERROR_PARAMS( replacement ) );
606
          temprepl++;
2194
607
          if(!*temprepl) 
2195
608
            temprepl--;
2196
609
          else if((*temprepl != '\\') && (*temprepl != '$'))//Invalid replacement string.
2197
610
            throw XQUERY_EXCEPTION( err::FORX0004, ERROR_PARAMS( replacement ) );
2198
611
        }
2199
593
        result->append(1, *temprepl);
612
        result->append(1, *temprepl);
2200
594
        temprepl++;
613
        temprepl++;
2201
595
        continue;
614
        continue;
2202
596
      }
615
      }
2204
597
      if(*temprepl == '$')
616
      if((*temprepl == '$') && !(parsed_flags & REGEX_ASCII_LITERAL))
2205
598
      {
617
      {
2206
599
        temprepl++;
618
        temprepl++;
2207
600
        index = 0;
619
        index = 0;
2208
@@ -648,7 +667,7 @@
2209
648
  if(retval)
667
  if(retval)
2210
649
  {
668
  {
2211
650
    m_match_pos += m_pos;
669
    m_match_pos += m_pos;
2213
651
    m_pos = m_match_pos = m_matched_len;
670
    m_pos = m_match_pos + m_matched_len;
2214
652
  }
671
  }
2215
653
  else
672
  else
2216
654
  {
673
  {
2217
@@ -666,35 +685,30 @@
2218
666
  return (int)regex_matcher->get_indexed_regex_count();
685
  return (int)regex_matcher->get_indexed_regex_count();
2219
667
}
686
}
2220
668
687
2245
669
int regex::get_match_start( int groupId )
688
bool regex::get_match_start_end_bytes( int groupId, int *start, int *end )
2246
670
{
689
{
2247
671
  if(groupId == 0)
690
  *start = -1;
2248
672
    return m_match_pos;
691
  *end = -1;
2249
673
  if(groupId > (int)regex_matcher->get_indexed_regex_count())
692
  if(groupId == 0)
2250
674
    return -1;
693
  {
2251
675
  const char *submatched_source;
694
    *start = m_match_pos;
2252
676
  int   submatched_len;
695
    *end = m_match_pos + m_matched_len;
2253
677
  if(!regex_matcher->get_indexed_match(groupId, &submatched_source, &submatched_len))
696
    return true;
2254
678
    return -1;
697
  }
2255
679
  return submatched_source - s_in_.c_str();
698
  if(groupId > (int)regex_matcher->get_indexed_regex_count())
2256
680
}
699
    return false;
2257
681
700
  const char *submatched_source;
2258
682
int regex::get_match_end( int groupId )
701
  int   submatched_len;
2259
683
{
702
  if(!regex_matcher->get_indexed_match(groupId, &submatched_source, &submatched_len))
2260
684
  if(groupId == 0)
703
    return false;
2261
685
    return m_match_pos + m_matched_len;
704
  *start = submatched_source - s_in_.c_str();
2262
686
  if(groupId > (int)regex_matcher->get_indexed_regex_count())
705
  *end = *start + submatched_len;
2263
687
    return -1;
706
  return true;
2240
688
  const char *submatched_source;
2241
689
  int   submatched_len;
2242
690
  if(!regex_matcher->get_indexed_match(groupId, &submatched_source, &submatched_len))
2243
691
    return -1;
2244
692
  return submatched_source - s_in_.c_str() + submatched_len;
2264
693
}
707
}
2265
694
708
2266
695
} // namespace unicode
709
} // namespace unicode
2267
696
} // namespace zorba
710
} // namespace zorba
2269
697
#endif /* ZORBA_NO_UNICODE */
711
#endif /* ZORBA_NO_ICU */
2270
698
712
2271
699
///////////////////////////////////////////////////////////////////////////////
713
///////////////////////////////////////////////////////////////////////////////
2272
700
714
2273
701
715
2274
=== modified file 'src/util/regex.h'
2275
--- src/util/regex.h	2012-03-28 05:19:57 +0000
2276
+++ src/util/regex.h	2012-04-07 00:45:26 +0000
2277
@@ -17,15 +17,13 @@
2278
17
#ifndef ZORBA_REGEX_H
17
#ifndef ZORBA_REGEX_H
2279
18
#define ZORBA_REGEX_H
18
#define ZORBA_REGEX_H
2280
19
19
2281
20
#ifndef ZORBA_NO_UNICODE
2282
21
#include <unicode/regex.h>
2283
22
#endif
2284
23
2285
24
#include "cxx_util.h"
20
#include "cxx_util.h"
2286
25
#include "unicode_util.h"
21
#include "unicode_util.h"
2287
26
#include "zorbatypes/zstring.h"
22
#include "zorbatypes/zstring.h"
2288
27
23
2290
28
#ifndef ZORBA_NO_UNICODE
24
#ifndef ZORBA_NO_ICU
2291
25
2292
26
#include <unicode/regex.h>
2293
29
27
2294
30
namespace zorba {
28
namespace zorba {
2295
31
29
2296
@@ -496,15 +494,17 @@
2297
496
} // namespace unicode
494
} // namespace unicode
2298
497
} // namespace zorba
495
} // namespace zorba
2299
498
496
2303
499
#else ///ZORBA_NO_UNICODE (ascii part:)
497
///////////////////////////////////////////////////////////////////////////////
2304
500
498
2305
501
#include "util/regex_ascii.h"
499
#else /* ZORBA_NO_ICU */
2306
500
2307
501
#include "util/regex_xquery.h"
2308
502
#include <string>
502
#include <string>
2309
503
503
2310
504
namespace zorba{
504
namespace zorba{
2311
505
/**
505
/**
2312
506
 * Converts an XQuery regular expression to the form used by the regular
506
 * Converts an XQuery regular expression to the form used by the regular
2314
507
 * expression library Zorba is using (here regex_ascii).
507
 * expression library Zorba is using (here regex_xquery).
2315
508
 *
508
 *
2316
509
 * @param xq_re The XQuery regular expression.
509
 * @param xq_re The XQuery regular expression.
2317
510
 * @param lib_re A pointer to the resuling library regular expression.
510
 * @param lib_re A pointer to the resuling library regular expression.
2318
@@ -525,7 +525,7 @@
2319
525
  /**
525
  /**
2320
526
   * Constructs a %regex.
526
   * Constructs a %regex.
2321
527
   */
527
   */
2323
528
  regex() : regex_matcher( NULL ) { }
528
  regex() : regex_matcher( nullptr ) { }
2324
529
529
2325
530
  /**
530
  /**
2326
531
   * Destroys a %regex.
531
   * Destroys a %regex.
2327
@@ -835,31 +835,21 @@
2328
835
835
2329
836
  /**
836
  /**
2330
837
   * Get the start position of the matched group.
837
   * Get the start position of the matched group.
2334
838
   * If groupId is zero, then the start position of the whole match is returned.
838
   * If groupId is zero, then the start and end position of the whole match is returned.
2335
839
   * If groupId is non-zero, then the start position of that group is returned.
839
   * If groupId is non-zero, then the start and end position of that group is returned.
2336
840
   * If that group has not been matched, -1 is returned.
840
   * If that group has not been matched, false is returned.
2337
841
   *
841
   *
2338
842
   * @param groupId the id of the group, either zero for the entire regex,
842
   * @param groupId the id of the group, either zero for the entire regex,
2339
843
   *  or [1 .. group_count] for that specific group
843
   *  or [1 .. group_count] for that specific group
2341
844
   * @return the start position, zero based, or -1 if that group didn't match
844
   * @param start to return start position in bytes
2342
845
   * @param end to return end position in bytes
2343
846
   * @return true if that group exists and has been matched
2344
845
   */
847
   */
2346
846
  int get_match_start( int groupId = 0 );
848
  bool get_match_start_end_bytes( int groupId, int *start, int *end );
2347
847
849
2348
848
  /**
2349
849
   * Get the end position of the matched group.
2350
850
   * If groupId is zero, then the end position of the whole match is returned.
2351
851
   * If groupId is non-zero, then the end position of that group is returned.
2352
852
   * If that group has not been matched, -1 is returned.
2353
853
   *
2354
854
   * @param groupId the id of the group, either zero for the entire regex,
2355
855
   *  or [1 .. group_count] for that specific group
2356
856
   * @return the end position, zero based, or -1 if that group didn't match
2357
857
   */
2358
858
  int get_match_end( int groupId = 0 );
2359
859
850
2360
860
private:
851
private:
2363
861
  regex_ascii::CRegexAscii_parser regex_parser;
852
  regex_xquery::CRegexXQuery_regex  *regex_matcher;
2362
862
  regex_ascii::CRegexAscii_regex  *regex_matcher;
2364
863
  uint32_t    parsed_flags;
853
  uint32_t    parsed_flags;
2365
864
854
2366
865
  zstring s_in_;
855
  zstring s_in_;
2367
@@ -873,15 +863,13 @@
2368
873
  regex( regex const& );
863
  regex( regex const& );
2369
874
  regex& operator=( regex const& );
864
  regex& operator=( regex const& );
2370
875
};
865
};
2371
866
2372
867
///////////////////////////////////////////////////////////////////////////////
2373
868
2374
876
} // namespace unicode
869
} // namespace unicode
2375
877
} // namespace zorba
870
} // namespace zorba
2376
878
871
2383
879
#endif /* ZORBA_NO_UNICODE */
872
#endif /* ZORBA_NO_ICU */
2378
880
2379
881
2380
882
///////////////////////////////////////////////////////////////////////////////
2381
883
2382
884
2384
885
#endif /* ZORBA_REGEX_H */
873
#endif /* ZORBA_REGEX_H */
2385
886
/*
874
/*
2386
887
 * Local variables:
875
 * Local variables:
2387
888
876
2388
=== renamed file 'src/util/regex_ascii.cpp' => 'src/util/regex_xquery.cpp'
2389
--- src/util/regex_ascii.cpp	2012-03-28 05:19:57 +0000
2390
+++ src/util/regex_xquery.cpp	2012-04-07 00:45:26 +0000
2391
@@ -1,4 +1,4 @@
2393
1
a/*
1
/*
2394
2
 * Copyright 2006-2008 The FLWOR Foundation.
2
 * Copyright 2006-2008 The FLWOR Foundation.
2395
3
 * 
3
 * 
2396
4
 * Licensed under the Apache License, Version 2.0 (the "License");
4
 * Licensed under the Apache License, Version 2.0 (the "License");
2397
@@ -18,12 +18,15 @@
2398
18
18
2399
19
#include "diagnostics/xquery_diagnostics.h"
19
#include "diagnostics/xquery_diagnostics.h"
2400
20
20
2402
21
#include "regex_ascii.h"
21
#include "regex_xquery.h"
2403
22
#include <string.h>
22
#include <string.h>
2404
23
#include "zorbatypes/chartype.h"
23
#include "zorbatypes/chartype.h"
2405
24
#include "util/unicode_categories.h"
2406
25
#include "util/ascii_util.h"
2407
26
#include "util/utf8_string.h"
2408
24
27
2409
25
namespace zorba {
28
namespace zorba {
2411
26
  namespace regex_ascii{
29
  namespace regex_xquery{
2412
27
//ascii regular expression matching
30
//ascii regular expression matching
2413
28
31
2414
29
/*http://www.w3.org/TR/xmlschema-2/#regexs
32
/*http://www.w3.org/TR/xmlschema-2/#regexs
2415
@@ -62,96 +65,138 @@
2416
62
+ http://www.w3.org/TR/xquery-operators/#regex-syntax (not implemented)
65
+ http://www.w3.org/TR/xquery-operators/#regex-syntax (not implemented)
2417
63
*/
66
*/
2418
64
67
2419
68
2420
69
static bool compare_ascii_i(const char *str1, const char *str2)
2421
70
{
2422
71
  while(*str1 && *str2)
2423
72
  {
2424
73
    if(ascii::to_lower(*str1) != ascii::to_lower(*str2))
2425
74
      return false;
2426
75
    str1++;
2427
76
    str2++;
2428
77
  }
2429
78
  if(*str1 || *str2)
2430
79
    return false;
2431
80
  return true;
2432
81
}
2433
82
2434
83
static bool compare_unicode_ni(const char *str1, const char *str2, int len)
2435
84
{
2436
85
  while(len > 0)
2437
86
  {
2438
87
    const char *temp_str1 = str1;
2439
88
    const char *temp_str2 = str2;
2440
89
    unicode::code_point cp1 = unicode::to_upper(utf8::next_char(temp_str1));
2441
90
    unicode::code_point cp2 = unicode::to_upper(utf8::next_char(temp_str2));
2442
91
    if(cp1 != cp2)
2443
92
      return false;
2444
93
    len -= temp_str1-str1;
2445
94
    str1 = temp_str1;
2446
95
    str2 = temp_str2;
2447
96
  }
2448
97
  return true;
2449
98
}
2450
99
static utf8::size_type myutf8len(const char *source)
2451
100
{
2452
101
  utf8::size_type  len = utf8::char_length(*source);
2453
102
  if(!len)
2454
103
    return 1;
2455
104
  else
2456
105
    return len;
2457
106
}
2458
65
////////////////////////////////////
107
////////////////////////////////////
2459
66
////Regular expression parsing and building of the tree
108
////Regular expression parsing and building of the tree
2460
67
////////////////////////////////////
109
////////////////////////////////////
2461
68
110
2463
69
CRegexAscii_regex* CRegexAscii_parser::parse(const char *pattern, unsigned int flags)
111
CRegexXQuery_regex* CRegexXQuery_parser::parse(const char *pattern, unsigned int flags)
2464
70
{
112
{
2465
71
  this->flags = flags;
113
  this->flags = flags;
2466
72
  bool align_begin = false;
2467
73
  
114
  
2468
74
  if(!(flags & REGEX_ASCII_LITERAL) && (pattern[0] == '^'))
2469
75
    align_begin = true;
2470
76
2471
77
  int   regex_len;
115
  int   regex_len;
2473
78
  CRegexAscii_regex*  regex = parse_regexp(pattern + (align_begin?1:0), &regex_len);
116
  CRegexXQuery_regex*  regex = parse_regexp(pattern, &regex_len);
2474
79
  
117
  
2475
80
  if(regex)
2476
81
    regex->set_align_begin(align_begin);
2477
82
2478
83
  return regex;
118
  return regex;
2479
84
}
119
}
2480
85
120
2481
86
//until '\0' or ')'
121
//until '\0' or ')'
2483
87
CRegexAscii_regex* CRegexAscii_parser::parse_regexp(const char *pattern, 
122
CRegexXQuery_regex* CRegexXQuery_parser::parse_regexp(const char *pattern, 
2484
88
                                                    int *regex_len)
123
                                                    int *regex_len)
2485
89
{
124
{
2486
90
  *regex_len = 0;
125
  *regex_len = 0;
2487
91
  int   branch_len;
126
  int   branch_len;
2488
92
  regex_depth++;
127
  regex_depth++;
2490
93
  CRegexAscii_regex *regex = new CRegexAscii_regex(current_regex);
128
  std::auto_ptr<CRegexXQuery_regex>  regex(new CRegexXQuery_regex(current_regex));
2491
94
  if(!current_regex)
129
  if(!current_regex)
2493
95
    current_regex = regex;
130
    current_regex = regex.get();
2494
96
  if(regex_depth >= 2)
131
  if(regex_depth >= 2)
2495
97
  {
132
  {
2496
98
    //mark this as group if it does not start with ?:
133
    //mark this as group if it does not start with ?:
2497
99
    if(pattern[0] != '?' || pattern[1] != ':')
134
    if(pattern[0] != '?' || pattern[1] != ':')
2499
100
      current_regex->subregex.push_back(regex);
135
      current_regex->subregex.push_back(regex.get());
2500
101
    else
136
    else
2501
102
      *regex_len = 2;
137
      *regex_len = 2;
2502
103
  }
138
  }
2504
104
  CRegexAscii_branch  *branch;
139
  CRegexXQuery_branch  *branch;
2505
140
  bool must_read_another_branch = true;
2506
105
  while(pattern[*regex_len] && (pattern[*regex_len] != ')'))
141
  while(pattern[*regex_len] && (pattern[*regex_len] != ')'))
2507
106
  {
142
  {
2508
107
    branch = parse_branch(pattern+*regex_len, &branch_len);
143
    branch = parse_branch(pattern+*regex_len, &branch_len);
2509
108
    if(!branch)
144
    if(!branch)
2510
109
    {
145
    {
2511
110
      regex_depth--;
146
      regex_depth--;
2512
111
      delete regex;
2513
112
      return NULL;
147
      return NULL;
2514
113
    }
148
    }
2515
114
    regex->add_branch(branch);
149
    regex->add_branch(branch);
2516
115
    *regex_len += branch_len;
150
    *regex_len += branch_len;
2517
151
    if(pattern[*regex_len] == '|')
2518
152
      (*regex_len)++;
2519
153
    else
2520
154
      must_read_another_branch = false;
2521
116
  }
155
  }
2523
117
  if((current_regex == regex) && (pattern[*regex_len] == ')'))
156
  if((current_regex == regex.get()) && (pattern[*regex_len] == ')'))
2524
118
  {
157
  {
2526
119
    throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_MISMATCHED_PAREN)) );
158
    throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MISMATCHED_PAREN)) );
2527
120
  }
159
  }
2528
121
  if(pattern[*regex_len])
160
  if(pattern[*regex_len])
2529
122
    (*regex_len)++;
161
    (*regex_len)++;
2530
162
  if(must_read_another_branch)
2531
163
    regex->add_branch(new CRegexXQuery_branch(current_regex));//add empty branch
2532
123
  regex->flags = 0;//finished initialization
164
  regex->flags = 0;//finished initialization
2533
124
  regex_depth--;
165
  regex_depth--;
2535
125
  return regex;
166
  return regex.release();
2536
126
}
167
}
2537
127
168
2539
128
CRegexAscii_branch* CRegexAscii_parser::parse_branch(const char *pattern, int *branch_len)
169
CRegexXQuery_branch* CRegexXQuery_parser::parse_branch(const char *pattern, int *branch_len)
2540
129
{
170
{
2541
130
  int piece_len;
171
  int piece_len;
2542
131
172
2545
132
  CRegexAscii_branch    *branch = new CRegexAscii_branch(current_regex);
173
  std::auto_ptr<CRegexXQuery_branch>    branch(new CRegexXQuery_branch(current_regex));
2546
133
  CRegexAscii_piece     *piece;
174
  CRegexXQuery_piece     *piece;
2547
134
  *branch_len = 0;
175
  *branch_len = 0;
2548
135
  while(pattern[*branch_len] && (pattern[*branch_len] != '|') && (pattern[*branch_len] != ')'))
176
  while(pattern[*branch_len] && (pattern[*branch_len] != '|') && (pattern[*branch_len] != ')'))
2549
136
  {
177
  {
2550
137
    piece = parse_piece(pattern+*branch_len, &piece_len);
178
    piece = parse_piece(pattern+*branch_len, &piece_len);
2551
138
    if(!piece)
179
    if(!piece)
2552
139
    {
180
    {
2553
140
      delete branch;
2554
141
      return NULL;
181
      return NULL;
2555
142
    }
182
    }
2556
183
    if(branch->piece_list.size() && dynamic_cast<CRegexXQuery_pinstart*>(piece->atom))
2557
184
    {
2558
185
      //found ^ that is not at the beginning of branch
2559
186
      throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_ATOM_CHAR), '^') );
2560
187
    }
2561
143
    branch->add_piece(piece);
188
    branch->add_piece(piece);
2562
144
    *branch_len += piece_len;
189
    *branch_len += piece_len;
2563
145
  }
190
  }
2567
146
  if(pattern[*branch_len] == '|')
191
  //if(pattern[*branch_len] == '|')
2568
147
    (*branch_len)++;
192
  //  (*branch_len)++;
2569
148
  return branch;
193
  return branch.release();
2570
149
}
194
}
2571
150
195
2572
151
//piece = atom + quantifier
196
//piece = atom + quantifier
2574
152
CRegexAscii_piece* CRegexAscii_parser::parse_piece(const char *pattern, int *piece_len)
197
CRegexXQuery_piece* CRegexXQuery_parser::parse_piece(const char *pattern, int *piece_len)
2575
153
{
198
{
2577
154
  CRegexAscii_piece *piece = new CRegexAscii_piece;
199
  std::auto_ptr<CRegexXQuery_piece>  piece(new CRegexXQuery_piece);
2578
155
  IRegexAtom  *atom;
200
  IRegexAtom  *atom;
2579
156
  *piece_len = 0;
201
  *piece_len = 0;
2580
157
202
2581
@@ -160,19 +205,18 @@
2582
160
  atom = read_atom(pattern, &atom_len);
205
  atom = read_atom(pattern, &atom_len);
2583
161
  if(!atom)
206
  if(!atom)
2584
162
  {
207
  {
2585
163
    delete piece;
2586
164
    return NULL;
208
    return NULL;
2587
165
  }
209
  }
2588
166
  piece->set_atom(atom);
210
  piece->set_atom(atom);
2589
167
  if(!(flags & REGEX_ASCII_LITERAL))
211
  if(!(flags & REGEX_ASCII_LITERAL))
2591
168
    read_quantifier(piece, pattern+atom_len, &quantif_len);
212
    read_quantifier(piece.get(), pattern+atom_len, &quantif_len);
2592
169
213
2593
170
  *piece_len += atom_len + quantif_len;
214
  *piece_len += atom_len + quantif_len;
2594
171
215
2596
172
  return piece;
216
  return piece.release();
2597
173
}
217
}
2598
174
218
2600
175
char CRegexAscii_parser::myishex(char c)
219
char CRegexXQuery_parser::myishex(char c)
2601
176
{
220
{
2602
177
  if((c >= '0') && (c <= '9'))
221
  if((c >= '0') && (c <= '9'))
2603
178
    return c-'0'+1;
222
    return c-'0'+1;
2604
@@ -183,26 +227,125 @@
2605
183
  return 0;//not a hex
227
  return 0;//not a hex
2606
184
}
228
}
2607
185
229
2614
186
bool CRegexAscii_parser::myisdigit(char c)
230
bool CRegexXQuery_parser::myisdigit(char c)
2615
187
{
231
{
2616
188
  return (c >= '0') || (c <= '9');
232
  return (c >= '0') && (c <= '9');
2617
189
}
233
}
2618
190
234
2619
191
char CRegexAscii_parser::readChar(const char *pattern, int *char_len, bool *is_multichar)
235
bool CRegexXQuery_parser::myisletterAZ(char c)
2620
236
{
2621
237
  return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
2622
238
}
2623
239
2624
240
static const unicode::code_point specials_extcp[] = {0xFFF0, 0xFFFD, 0};
2625
241
2626
242
static CRegexXQuery_parser::block_escape_t block_escape[] = 
2627
243
{
2628
244
{{0x0000, 0x007F}, NULL, "BasicLatin"},
2629
245
{{0x0080, 0x00FF}, NULL, "Latin-1Supplement"},
2630
246
{{0x0100, 0x017F}, NULL, "LatinExtended-A"},
2631
247
{{0x0180, 0x024F}, NULL, "LatinExtended-B"},
2632
248
{{0x0250, 0x02AF}, NULL, "IPAExtensions"},
2633
249
{{0x02B0, 0x02FF}, NULL, "SpacingModifierLetters"},
2634
250
{{0x0300, 0x036F}, NULL, "CombiningDiacriticalMarks"},
2635
251
{{0x0370, 0x03FF}, NULL, "Greek"},
2636
252
{{0x0400, 0x04FF}, NULL, "Cyrillic"},
2637
253
{{0x0530, 0x058F}, NULL, "Armenian"},
2638
254
{{0x0590, 0x05FF}, NULL, "Hebrew"},
2639
255
{{0x0600, 0x06FF}, NULL, "Arabic"},
2640
256
{{0x0700, 0x074F}, NULL, "Syriac"},
2641
257
{{0x0780, 0x07BF}, NULL, "Thaana"},
2642
258
{{0x0900, 0x097F}, NULL, "Devanagari"},
2643
259
{{0x0980, 0x09FF}, NULL, "Bengali"},
2644
260
{{0x0A00, 0x0A7F}, NULL, "Gurmukhi"},
2645
261
{{0x0A80, 0x0AFF}, NULL, "Gujarati"},
2646
262
{{0x0B00, 0x0B7F}, NULL, "Oriya"},
2647
263
{{0x0B80, 0x0BFF}, NULL, "Tamil"},
2648
264
{{0x0C00, 0x0C7F}, NULL, "Telugu"},
2649
265
{{0x0C80, 0x0CFF}, NULL, "Kannada"},
2650
266
{{0x0D00, 0x0D7F}, NULL, "Malayalam"},
2651
267
{{0x0D80, 0x0DFF}, NULL, "Sinhala"},
2652
268
{{0x0E00, 0x0E7F}, NULL, "Thai"},
2653
269
{{0x0E80, 0x0EFF}, NULL, "Lao"},
2654
270
{{0x0F00, 0x0FFF}, NULL, "Tibetan"},
2655
271
{{0x1000, 0x109F}, NULL, "Myanmar"},
2656
272
{{0x10A0, 0x10FF}, NULL, "Georgian"},
2657
273
{{0x1100, 0x11FF}, NULL, "HangulJamo"},
2658
274
{{0x1200, 0x137F}, NULL, "Ethiopic"},
2659
275
{{0x13A0, 0x13FF}, NULL, "Cherokee"},
2660
276
{{0x1400, 0x167F}, NULL, "UnifiedCanadianAboriginalSyllabics"},
2661
277
{{0x1680, 0x169F}, NULL, "Ogham"},
2662
278
{{0x16A0, 0x16FF}, NULL, "Runic"},
2663
279
{{0x1780, 0x17FF}, NULL, "Khmer"},
2664
280
{{0x1800, 0x18AF}, NULL, "Mongolian"},
2665
281
{{0x1E00, 0x1EFF}, NULL, "LatinExtendedAdditional"},
2666
282
{{0x1F00, 0x1FFF}, NULL, "GreekExtended"},
2667
283
{{0x2000, 0x206F}, NULL, "GeneralPunctuation"},
2668
284
{{0x2070, 0x209F}, NULL, "SuperscriptsandSubscripts"},
2669
285
{{0x20A0, 0x20CF}, NULL, "CurrencySymbols"},
2670
286
{{0x20D0, 0x20FF}, NULL, "CombiningMarksforSymbols"},
2671
287
{{0x2100, 0x214F}, NULL, "LetterlikeSymbols"},
2672
288
{{0x2150, 0x218F}, NULL, "NumberForms"},
2673
289
{{0x2190, 0x21FF}, NULL, "Arrows"},
2674
290
{{0x2200, 0x22FF}, NULL, "MathematicalOperators"},
2675
291
{{0x2300, 0x23FF}, NULL, "MiscellaneousTechnical"},
2676
292
{{0x2400, 0x243F}, NULL, "ControlPictures"},
2677
293
{{0x2440, 0x245F}, NULL, "OpticalCharacterRecognition"},
2678
294
{{0x2460, 0x24FF}, NULL, "EnclosedAlphanumerics"},
2679
295
{{0x2500, 0x257F}, NULL, "BoxDrawing"},
2680
296
{{0x2580, 0x259F}, NULL, "BlockElements"},
2681
297
{{0x25A0, 0x25FF}, NULL, "GeometricShapes"},
2682
298
{{0x2600, 0x26FF}, NULL, "MiscellaneousSymbols"},
2683
299
{{0x2700, 0x27BF}, NULL, "Dingbats"},
2684
300
{{0x2800, 0x28FF}, NULL, "BraillePatterns"},
2685
301
{{0x2E80, 0x2EFF}, NULL, "CJKRadicalsSupplement"},
2686
302
{{0x2F00, 0x2FDF}, NULL, "KangxiRadicals"},
2687
303
{{0x2FF0, 0x2FFF}, NULL, "IdeographicDescriptionCharacters"},
2688
304
{{0x3000, 0x303F}, NULL, "CJKSymbolsandPunctuation"},
2689
305
{{0x3040, 0x309F}, NULL, "Hiragana"},
2690
306
{{0x30A0, 0x30FF}, NULL, "Katakana"},
2691
307
{{0x3100, 0x312F}, NULL, "Bopomofo"},
2692
308
{{0x3130, 0x318F}, NULL, "HangulCompatibilityJamo"},
2693
309
{{0x3190, 0x319F}, NULL, "Kanbun"},
2694
310
{{0x31A0, 0x31BF}, NULL, "BopomofoExtended"},
2695
311
{{0x3200, 0x32FF}, NULL, "EnclosedCJKLettersandMonths"},
2696
312
{{0x3300, 0x33FF}, NULL, "CJKCompatibility"},
2697
313
{{0x3400, 0x4DB5}, NULL, "CJKUnifiedIdeographsExtensionA"},
2698
314
{{0x4E00, 0x9FFF}, NULL, "CJKUnifiedIdeographs"},
2699
315
{{0xA000, 0xA48F}, NULL, "YiSyllables"},
2700
316
{{0xA490, 0xA4CF}, NULL, "YiRadicals"},
2701
317
{{0xAC00, 0xD7A3}, NULL, "HangulSyllables"},
2702
318
{{0xE000, 0xF8FF}, NULL, "PrivateUse"},
2703
319
{{0xF900, 0xFAFF}, NULL, "CJKCompatibilityIdeographs"},
2704
320
{{0xFB00, 0xFB4F}, NULL, "AlphabeticPresentationForms"},
2705
321
{{0xFB50, 0xFDFF}, NULL, "ArabicPresentationForms-A"},
2706
322
{{0xFE20, 0xFE2F}, NULL, "CombiningHalfMarks"},
2707
323
{{0xFE30, 0xFE4F}, NULL, "CJKCompatibilityForms"},
2708
324
{{0xFE50, 0xFE6F}, NULL, "SmallFormVariants"},
2709
325
{{0xFE70, 0xFEFE}, NULL, "ArabicPresentationForms-B"},
2710
326
{{0xFEFF, 0xFEFF}, specials_extcp, "Specials"},
2711
327
{{0xFF00, 0xFFEF}, NULL, "HalfwidthandFullwidthForms"}
2712
328
};
2713
329
2714
330
CRegexXQuery_charmatch* CRegexXQuery_parser::readChar(const char *pattern, 
2715
331
                                  int *char_len,
2716
332
                                  enum CHARGROUP_t *multichar_type)
2717
192
{
333
{
2718
193
  char  c = 0;
334
  char  c = 0;
2719
194
  *char_len = 0;
335
  *char_len = 0;
2721
195
  *is_multichar = false;
336
  *multichar_type = CHARGROUP_NO_MULTICHAR;
2722
196
  switch(pattern[*char_len])
337
  switch(pattern[*char_len])
2723
197
  {
338
  {
2724
198
  case '\\':
339
  case '\\':
2726
199
  {  (*char_len)++;
340
  {
2727
341
    (*char_len)++;
2728
200
    switch(pattern[*char_len])
342
    switch(pattern[*char_len])
2729
201
    {
343
    {
2733
202
    case 'n': c = '\n';break;
344
    case 'n': c = '\n';(*char_len)++;return new CRegexXQuery_char_ascii(current_regex, c);
2734
203
    case 'r': c = '\r';break;
345
    case 'r': c = '\r';(*char_len)++;return new CRegexXQuery_char_ascii(current_regex, c);
2735
204
    case 't': c = '\t';break;
346
    case 't': c = '\t';(*char_len)++;return new CRegexXQuery_char_ascii(current_regex, c);
2736
205
    case '\\':
347
    case '\\':
2737
348
    case '/'://+
2738
206
    case '|':
349
    case '|':
2739
207
    case '.':
350
    case '.':
2740
208
    case '?':
351
    case '?':
2741
@@ -216,19 +359,205 @@
2742
216
    case '['://#x5B
359
    case '['://#x5B
2743
217
    case ']'://#x5D
360
    case ']'://#x5D
2744
218
    case '^'://#x5E
361
    case '^'://#x5E
2745
362
    case '$'://+
2746
219
       c = pattern[*char_len];
363
       c = pattern[*char_len];
2748
220
       break;
364
      (*char_len)++;
2749
365
      *multichar_type = CHARGROUP_FLAGS_ONECHAR_ASCII;
2750
366
      return new CRegexXQuery_char_ascii(current_regex, c);
2751
221
    case 'p'://catEsc
367
    case 'p'://catEsc
2752
222
    case 'P'://complEsc
368
    case 'P'://complEsc
2753
369
    {
2754
223
      //ignore the prop for now
370
      //ignore the prop for now
2760
224
      c = pattern[*char_len];
371
      *multichar_type = CHARGROUP_FLAGS_MULTICHAR_p;//(CHARGROUP_t)((pattern[*char_len] == 'P') ? 128 : 0);
2761
225
      *is_multichar = true;
372
      bool is_reverse = (pattern[*char_len] == 'P');
2762
226
      if(pattern[*char_len+1] == '{')
373
      c = 0;
2763
227
      {
374
      if(pattern[(*char_len)+1] != '{')
2764
228
        while(pattern[*char_len] != '}')
375
      {
2765
376
        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
2766
377
      }
2767
378
      (*char_len) += 2;
2768
379
      switch(pattern[*char_len])
2769
380
      {//IsCategory
2770
381
      case 'L':
2771
382
      {
2772
383
        switch(pattern[(*char_len)+1])
2773
384
        {
2774
385
        case '}':
2775
386
          c = unicode::UNICODE_Ll + 50;break;
2776
387
        case 'u':
2777
388
          c = unicode::UNICODE_Lu; (*char_len)++;break;
2778
389
        case 'l':
2779
390
          c = unicode::UNICODE_Ll; (*char_len)++;break;
2780
391
        case 't':
2781
392
          c = unicode::UNICODE_Lt; (*char_len)++;break;
2782
393
        case 'm':
2783
394
          c = unicode::UNICODE_Lm; (*char_len)++;break;
2784
395
        case 'o':
2785
396
          c = unicode::UNICODE_Lo; (*char_len)++;break;
2786
397
        default:
2787
398
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PL_CONSTRUCT)) );
2788
399
        }
2789
400
      }break;
2790
401
      case 'M':
2791
402
      {
2792
403
        switch(pattern[(*char_len)+1])
2793
404
        {
2794
405
        case '}':
2795
406
          c = unicode::UNICODE_Mc + 50;break;
2796
407
        case 'n':
2797
408
          c = unicode::UNICODE_Mn; (*char_len)++;break;
2798
409
        case 'c':
2799
410
          c = unicode::UNICODE_Mc; (*char_len)++;break;
2800
411
        case 'e':
2801
412
          c = unicode::UNICODE_Me; (*char_len)++;break;
2802
413
        default:
2803
414
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PM_CONSTRUCT)) );
2804
415
        }
2805
416
      }break;
2806
417
      case 'N':
2807
418
      {
2808
419
        switch(pattern[(*char_len)+1])
2809
420
        {
2810
421
        case '}':
2811
422
          c = unicode::UNICODE_Nd + 50;break;
2812
423
        case 'd':
2813
424
          c = unicode::UNICODE_Nd; (*char_len)++;break;
2814
425
        case 'l':
2815
426
          c = unicode::UNICODE_Nl; (*char_len)++;break;
2816
427
        case 'o':
2817
428
          c = unicode::UNICODE_No; (*char_len)++;break;
2818
429
        default:
2819
430
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PN_CONSTRUCT)) );
2820
431
        }
2821
432
      }break;
2822
433
      case 'P':
2823
434
      {
2824
435
        switch(pattern[(*char_len)+1])
2825
436
        {
2826
437
        case '}':
2827
438
          c = unicode::UNICODE_Pc + 50;break;
2828
439
        case 'c':
2829
440
          c = unicode::UNICODE_Pc; (*char_len)++;break;
2830
441
        case 'd':
2831
442
          c = unicode::UNICODE_Pd; (*char_len)++;break;
2832
443
        case 's':
2833
444
          c = unicode::UNICODE_Ps; (*char_len)++;break;
2834
445
        case 'e':
2835
446
          c = unicode::UNICODE_Pe; (*char_len)++;break;
2836
447
        case 'i':
2837
448
          c = unicode::UNICODE_Pi; (*char_len)++;break;
2838
449
        case 'f':
2839
450
          c = unicode::UNICODE_Pf; (*char_len)++;break;
2840
451
        case 'o':
2841
452
          c = unicode::UNICODE_Po; (*char_len)++;break;
2842
453
        default:
2843
454
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PP_CONSTRUCT)) );
2844
455
        }
2845
456
      }break;
2846
457
      case 'Z':
2847
458
      {
2848
459
        switch(pattern[(*char_len)+1])
2849
460
        {
2850
461
        case '}':
2851
462
          c = unicode::UNICODE_Zl + 50;break;
2852
463
        case 's':
2853
464
          c = unicode::UNICODE_Zs; (*char_len)++;break;
2854
465
        case 'l':
2855
466
          c = unicode::UNICODE_Zl; (*char_len)++;break;
2856
467
        case 'p':
2857
468
          c = unicode::UNICODE_Zp; (*char_len)++;break;
2858
469
        default:
2859
470
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PZ_CONSTRUCT)) );
2860
471
        }
2861
472
      }break;
2862
473
      case 'S':
2863
474
      {
2864
475
        switch(pattern[(*char_len)+1])
2865
476
        {
2866
477
        case '}':
2867
478
          c = unicode::UNICODE_Sc + 50;break;
2868
479
        case 'm':
2869
480
          c = unicode::UNICODE_Sm; (*char_len)++;break;
2870
481
        case 'c':
2871
482
          c = unicode::UNICODE_Sc; (*char_len)++;break;
2872
483
        case 'k':
2873
484
          c = unicode::UNICODE_Sk; (*char_len)++;break;
2874
485
        case 'o':
2875
486
          c = unicode::UNICODE_So; (*char_len)++;break;
2876
487
        default:
2877
488
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PS_CONSTRUCT)) );
2878
489
        }
2879
490
      }break;
2880
491
      case 'C':
2881
492
      {
2882
493
        switch(pattern[(*char_len)+1])
2883
494
        {
2884
495
        case '}':
2885
496
          c = unicode::UNICODE_Cc + 50;break;
2886
497
        case 'c':
2887
498
          c = unicode::UNICODE_Cc; (*char_len)++;break;
2888
499
        case 'f':
2889
500
          c = unicode::UNICODE_Cf; (*char_len)++;break;
2890
501
        case 'o':
2891
502
          c = unicode::UNICODE_Co; (*char_len)++;break;
2892
503
        case 'n':
2893
504
          c = unicode::UNICODE_Cn; (*char_len)++;break;
2894
505
        default:
2895
506
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PC_CONSTRUCT)) );
2896
507
        }
2897
508
      }break;
2898
509
      }//end switch
2899
510
      if(c)
2900
511
      {
2901
512
        if(pattern[(*char_len) + 1] != '}')
2902
513
            throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
2903
514
        (*char_len)++;
2904
515
        (*char_len)++;
2905
516
        return new CRegexXQuery_multicharP(current_regex, c, is_reverse);
2906
517
      }
2907
518
      if(pattern[*char_len] == 'I')
2908
519
      {
2909
520
        if(pattern[(*char_len)+1] == 's')//IsBlock
2910
521
        {
2911
522
          *multichar_type = CHARGROUP_FLAGS_MULTICHAR_Is;
2912
523
          (*char_len) += 2;
2913
524
          zstring block_name;
2914
525
          char tempc = pattern[(*char_len)];
2915
526
          while(tempc && (tempc != '}'))
2916
527
          {
2917
528
            if(!myisletterAZ(tempc) && !myisdigit(tempc) && (tempc != '-'))
2918
529
              throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
2919
530
            block_name.append(1, tempc);
2920
531
            (*char_len)++;
2921
532
            tempc = pattern[(*char_len)];
2922
533
          }
2923
534
          if(!tempc)
2924
535
            throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
2925
536
          //search for the block name
2926
537
          int i;
2927
538
          int nr_blocks = sizeof(block_escape)/sizeof(CRegexXQuery_parser::block_escape_t);
2928
539
          for(i=0;i<nr_blocks;i++)
2929
540
          {
2930
541
            if(compare_ascii_i(block_name.c_str(), block_escape[i].group_name))
2931
542
            {
2932
543
              c = i;
2933
544
              break;
2934
545
            }
2935
546
          }
2936
547
          if(i==nr_blocks)
2937
548
            throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PIs_CONSTRUCT)) );
2938
229
          (*char_len)++;
549
          (*char_len)++;
2941
230
      }
550
          return new CRegexXQuery_multicharIs(current_regex, i, is_reverse);
2942
231
      break;
551
        }
2943
552
        else
2944
553
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
2945
554
      }
2946
555
      else
2947
556
      {
2948
557
        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
2949
558
      }
2950
559
      break;//unreachable
2951
560
    }//end case 'p'
2952
232
      //multiCharEsc
561
      //multiCharEsc
2953
233
    case 's':
562
    case 's':
2954
234
    case 'S':
563
    case 'S':
2955
@@ -240,40 +569,104 @@
2956
240
    case 'D':
569
    case 'D':
2957
241
    case 'w':
570
    case 'w':
2958
242
    case 'W':
571
    case 'W':
2960
243
      *is_multichar = true;
572
       *multichar_type = CHARGROUP_FLAGS_MULTICHAR_OTHER;
2961
244
       c = pattern[*char_len];
573
       c = pattern[*char_len];
2976
245
       break;
574
       (*char_len)++;
2977
246
    }
575
       return new CRegexXQuery_multicharOther(current_regex, c);
2978
247
    break;
576
    case 'u'://unicode codepoint \uXXXX
2979
248
  }
577
    {
2980
249
  case '#':///might be #xXX
578
      unicode::code_point utf8c = 0;
2981
250
  {
579
      (*char_len)++;
2982
251
    if((pattern[*char_len+1] == 'x') &&
580
      for(int i=0;i<4;i++)
2983
252
      myishex(pattern[*char_len+2]) && myishex(pattern[*char_len+3]))
581
      {
2984
253
    {
582
        char hex = myishex(pattern[*char_len]);
2985
254
      c = (myishex(pattern[*char_len+2])-1)<<4 | (myishex(pattern[*char_len+3])-1);
583
        if(!hex)
2986
255
      *char_len += 3;
584
        {
2987
256
      break;
585
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_UNICODE_CODEPOINT_u)) );
2988
257
    }
586
        }
2989
258
  }
587
        utf8c <<= 4;
2990
588
        utf8c |= (hex-1) & 0x0f;
2991
589
        (*char_len)++;
2992
590
      }
2993
591
      return create_charmatch(utf8c, NULL, 0, multichar_type);
2994
592
    }
2995
593
    case 'U'://unicode codepoint \UXXXXXXXX
2996
594
    {
2997
595
      unicode::code_point utf8c = 0;
2998
596
      (*char_len)++;
2999
597
      for(int i=0;i<8;i++)
3000
598
      {
3001
599
        char hex = myishex(pattern[*char_len]);
3002
600
        if(!hex)
3003
601
        {
3004
602
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_UNICODE_CODEPOINT_u)) );
3005
603
        }
3006
604
        utf8c <<= 4;
3007
605
        utf8c |= (hex-1) & 0x0f;
3008
606
        (*char_len)++;
3009
607
      }
3010
608
      return create_charmatch(utf8c, NULL, 0, multichar_type);
3011
609
    }
3012
610
    default:
3013
611
      throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_ESC_CHAR)) );
3014
612
    }
3015
613
    assert(false);
3016
614
    break;//unreachable
3017
615
  }//end case '\'
3018
259
  default:
616
  default:
3030
260
     c = pattern[*char_len];
617
  {
3031
261
     break;
618
    const char *temp_pattern = pattern;
3032
262
  }
619
    unicode::code_point utf8c = utf8::next_char(temp_pattern);
3033
263
620
    (*char_len) = temp_pattern - pattern;
3034
264
  (*char_len)++;
621
    return create_charmatch(utf8c, pattern, *char_len, multichar_type);
3035
265
  return c;
622
  }
3036
266
}
623
  }
3037
267
624
  return NULL;
3038
268
625
}
3039
269
626
3040
270
IRegexAtom* CRegexAscii_parser::read_atom(const char *pattern, int *atom_len)
627
CRegexXQuery_charmatch *CRegexXQuery_parser::create_charmatch(unicode::code_point utf8c,
3041
628
                                                              const char *pattern, int utf8len,
3042
629
                                                              enum CHARGROUP_t *multichar_type)
3043
630
{
3044
631
  if(utf8c <= 0x7F)
3045
632
  {
3046
633
    *multichar_type = CHARGROUP_FLAGS_ONECHAR_ASCII;
3047
634
    if(flags & REGEX_ASCII_CASE_INSENSITIVE)
3048
635
      return new CRegexXQuery_char_ascii_i(current_regex, (char)utf8c);
3049
636
    else
3050
637
      return new CRegexXQuery_char_ascii(current_regex, (char)utf8c);
3051
638
  }
3052
639
  else
3053
640
  {
3054
641
    *multichar_type = CHARGROUP_FLAGS_ONECHAR_UNICODE;
3055
642
    if(flags & REGEX_ASCII_CASE_INSENSITIVE)
3056
643
      return new CRegexXQuery_char_unicode_i(current_regex, utf8c);
3057
644
    else
3058
645
    {
3059
646
      if(pattern)
3060
647
        return new CRegexXQuery_char_unicode(current_regex, pattern, utf8len);
3061
648
      else
3062
649
        return new CRegexXQuery_char_unicode_cp(current_regex, utf8c);
3063
650
    }
3064
651
  }
3065
652
}
3066
653
3067
654
IRegexAtom* CRegexXQuery_parser::read_atom(const char *pattern, int *atom_len)
3068
271
{
655
{
3069
272
  *atom_len = 0;
656
  *atom_len = 0;
3074
273
  char  c;
657
  if(flags & REGEX_ASCII_LITERAL)
3075
274
  bool is_end_line = false;
658
  {
3076
275
  c = pattern[*atom_len];
659
    unicode::code_point  utf8c;
3077
276
  if((!(flags & REGEX_ASCII_LITERAL)) && (c == '\\'))
660
    //bool is_end_line = false;
3078
661
    const char *temp_pattern = pattern;
3079
662
    utf8c = utf8::next_char(temp_pattern);
3080
663
    *atom_len = temp_pattern - pattern;
3081
664
    enum CHARGROUP_t multichar_type;
3082
665
    return create_charmatch(utf8c, pattern, *atom_len, &multichar_type);
3083
666
  }
3084
667
3085
668
  char c = *pattern;
3086
669
  if(c == '\\')
3087
277
  {
670
  {
3088
278
    //check for back reference
671
    //check for back reference
3089
279
    if(myisdigit(pattern[(*atom_len)+1]))
672
    if(myisdigit(pattern[(*atom_len)+1]))
3090
@@ -281,13 +674,13 @@
3091
281
      (*atom_len)++;
674
      (*atom_len)++;
3092
282
      if(pattern[*atom_len] == '0')
675
      if(pattern[*atom_len] == '0')
3093
283
      {
676
      {
3095
284
        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_INVALID_BACK_REF)) );
677
        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_BACK_REF), 0, current_regex->subregex.size()) );
3096
285
      }
678
      }
3097
286
      unsigned int backref = pattern[*atom_len] - '0';
679
      unsigned int backref = pattern[*atom_len] - '0';
3098
287
      if((backref > current_regex->subregex.size()) ||
680
      if((backref > current_regex->subregex.size()) ||
3099
288
        (current_regex->subregex.at(backref-1)->flags != 0))
681
        (current_regex->subregex.at(backref-1)->flags != 0))
3100
289
      {
682
      {
3102
290
        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_INVALID_BACK_REF)) );
683
        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_BACK_REF), backref, current_regex->subregex.size()) );
3103
291
      }
684
      }
3104
292
      while(current_regex->subregex.size() >= backref*10)
685
      while(current_regex->subregex.size() >= backref*10)
3105
293
      {
686
      {
3106
@@ -303,70 +696,86 @@
3107
303
            break;
696
            break;
3108
304
        }
697
        }
3109
305
      }
698
      }
3111
306
      return new CRegexAscii_backref(current_regex, backref);
699
      (*atom_len)++;
3112
700
      return new CRegexXQuery_backref(current_regex, backref);
3113
307
    }
701
    }
3114
308
  }
702
  }
3115
703
  if(c == '^')
3116
704
  {
3117
705
    (*atom_len)++;
3118
706
    return new CRegexXQuery_pinstart(current_regex);
3119
707
  }
3120
708
  if((c == '}') || (c == '{') || (c == '?') || (c == '*') || (c == '+') || (c == '|'))
3121
709
  {
3122
710
    throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_ATOM_CHAR), c) );
3123
711
  }
3124
309
  switch(c)
712
  switch(c)
3125
310
  {
713
  {
3126
311
  case '[':
714
  case '[':
3127
312
  {
715
  {
3137
313
    if(!(flags & REGEX_ASCII_LITERAL))
716
    (*atom_len)++;
3138
314
    {
717
    CRegexXQuery_chargroup *chargroup = NULL;
3139
315
      (*atom_len)++;
718
    int chargroup_len;
3140
316
      CRegexAscii_chargroup *chargroup = NULL;
719
    chargroup = readchargroup(pattern+*atom_len, &chargroup_len);
3141
317
      int chargroup_len;
720
    *atom_len += chargroup_len;
3142
318
      chargroup = readchargroup(pattern+*atom_len, &chargroup_len);
721
    return chargroup;
3134
319
      *atom_len += chargroup_len;
3135
320
      return chargroup;
3136
321
    }
3143
322
  }
722
  }
3144
323
  case '.'://WildCharEsc
723
  case '.'://WildCharEsc
3145
324
  {
724
  {
3152
325
    if(!(flags & REGEX_ASCII_LITERAL))
725
    (*atom_len)++;
3153
326
    {
726
    return new CRegexXQuery_wildchar(current_regex);
3148
327
      CRegexAscii_wildchar  *wildchar = new CRegexAscii_wildchar(current_regex);
3149
328
      (*atom_len)++;
3150
329
      return wildchar;
3151
330
    }
3154
331
  }
727
  }
3155
332
  case '('://begin an embedded reg exp
728
  case '('://begin an embedded reg exp
3156
333
  {  
729
  {  
3166
334
    if(!(flags & REGEX_ASCII_LITERAL))
730
    (*atom_len)++;
3167
335
    {
731
    CRegexXQuery_regex *emb_regex = NULL;
3168
336
      (*atom_len)++;
732
    int   regex_len;
3169
337
      CRegexAscii_regex *emb_regex = NULL;
733
    emb_regex = parse_regexp(pattern + *atom_len, &regex_len);
3170
338
      int   regex_len;
734
    *atom_len += regex_len;
3171
339
      emb_regex = parse_regexp(pattern + *atom_len, &regex_len);
735
    return emb_regex;
3163
340
      *atom_len += regex_len;
3164
341
      return emb_regex;
3165
342
    }
3172
343
  }
736
  }
3173
344
  case '$'://end line
737
  case '$'://end line
3178
345
    if(!(flags & REGEX_ASCII_LITERAL))
738
    //is_end_line = true;
3179
346
    {
739
    (*atom_len)++;
3180
347
      is_end_line = true;
740
    return new CRegexXQuery_endline(current_regex);
3177
348
    }
3181
349
  default:
741
  default:
3182
350
  {  
742
  {  
3184
351
    char  c;
743
    //char  c;
3185
744
    CRegexXQuery_charmatch *charmatch = NULL;
3186
352
    int   c_len;
745
    int   c_len;
3191
353
    bool  is_multichar = false;
746
    CHARGROUP_t   multichar_type = CHARGROUP_NO_MULTICHAR;
3192
354
    if(!(flags & REGEX_ASCII_LITERAL))
747
    *atom_len = 0;
3193
355
      c = readChar(pattern+*atom_len, &c_len, &is_multichar);
748
    while(pattern[*atom_len])
3190
356
    else
3194
357
    {
749
    {
3197
358
      c = pattern[*atom_len];
750
      charmatch = readChar(pattern+*atom_len, &c_len, &multichar_type);
3198
359
      c_len = 1;
751
      *atom_len += c_len;
3199
752
      if((flags & REGEX_ASCII_NO_WHITESPACE) && (multichar_type == CHARGROUP_FLAGS_ONECHAR_ASCII))
3200
753
      {
3201
754
        char c = (char)charmatch->get_c();
3202
755
        if((c == ' ') || (c == '\t') || (c == '\r') || (c == '\n'))
3203
756
        {
3204
757
          //ignore this whitespace
3205
758
          delete charmatch;
3206
759
          continue;
3207
760
        }
3208
761
        else
3209
762
          break;
3210
763
      }
3211
764
      else
3212
765
        break;
3213
360
    }
766
    }
3217
361
    CRegexAscii_chargroup *chargroup = new CRegexAscii_chargroup(current_regex);
767
    /*
3218
362
    if(is_multichar)
768
    std::auto_ptr<CRegexXQuery_chargroup> chargroup(new CRegexXQuery_chargroup(current_regex));
3219
363
      chargroup->addMultiChar(c);
769
    if(multichar_type)
3220
770
      chargroup->addMultiChar(c, multichar_type);
3221
364
    else if(is_end_line)
771
    else if(is_end_line)
3222
365
      chargroup->addEndLine();
772
      chargroup->addEndLine();
3223
366
    else
773
    else
3225
367
      chargroup->addCharRange(c, c);
774
      chargroup->addOneChar(c);
3226
368
    *atom_len += c_len;
775
    *atom_len += c_len;
3228
369
    return chargroup;
776
    return chargroup.release();
3229
777
    */
3230
778
    return charmatch;
3231
370
  }
779
  }
3232
371
  }
780
  }
3233
372
}
781
}
3234
@@ -374,81 +783,119 @@
3235
374
//read until ']'
783
//read until ']'
3236
375
//posCharGroup  ::=   ( charRange | charClassEsc )+  
784
//posCharGroup  ::=   ( charRange | charClassEsc )+  
3237
376
//charRange     ::=    seRange | XmlCharIncDash
785
//charRange     ::=    seRange | XmlCharIncDash
3239
377
CRegexAscii_chargroup* CRegexAscii_parser::readchargroup(const char *pattern, int *chargroup_len)
786
CRegexXQuery_chargroup* CRegexXQuery_parser::readchargroup(const char *pattern, int *chargroup_len)
3240
378
{
787
{
3242
379
  CRegexAscii_chargroup *chargroup = NULL;
788
  std::auto_ptr<CRegexXQuery_chargroup> chargroup;
3243
380
  *chargroup_len = 0;
789
  *chargroup_len = 0;
3244
381
  if(pattern[*chargroup_len] == '^')//negative group
790
  if(pattern[*chargroup_len] == '^')//negative group
3245
382
  {
791
  {
3246
383
    (*chargroup_len)++;
792
    (*chargroup_len)++;
3248
384
    chargroup = new CRegexAscii_negchargroup(current_regex);
793
    chargroup.reset(new CRegexXQuery_negchargroup(current_regex));
3249
385
  }
794
  }
3250
386
  else
795
  else
3252
387
    chargroup = new CRegexAscii_chargroup(current_regex);
796
    chargroup.reset(new CRegexXQuery_chargroup(current_regex));
3253
388
  while(pattern[*chargroup_len] && (pattern[*chargroup_len]!=']'))
797
  while(pattern[*chargroup_len] && (pattern[*chargroup_len]!=']'))
3254
389
  {
798
  {
3257
390
    char  c1, c2;
799
    //char  c1, c2;
3258
391
    bool  is_multichar;
800
    CHARGROUP_t  multichar_type = CHARGROUP_NO_MULTICHAR;
3259
392
    int   c1_len;
801
    int   c1_len;
3263
393
    c1 = pattern[*chargroup_len];
802
    if((pattern[*chargroup_len] == '-') && (pattern[(*chargroup_len)+1] == '['))//charClassSub
3261
394
    c2 = pattern[*chargroup_len+1];
3262
395
    if((c1 == '-') && (c2 == '['))//charClassSub
3264
396
    {
803
    {
3265
397
      int classsub_len;
804
      int classsub_len;
3267
398
      CRegexAscii_chargroup *classsub = readchargroup(pattern + *chargroup_len+1 + 1, &classsub_len);
805
      CRegexXQuery_chargroup *classsub = readchargroup(pattern + (*chargroup_len)+1 + 1, &classsub_len);
3268
399
      if(!classsub)
806
      if(!classsub)
3269
400
      {
807
      {
3272
401
        delete chargroup;
808
        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_SUBCLASS)) );
3271
402
        return NULL;
3273
403
      }
809
      }
3274
404
      chargroup->addClassSub(classsub);
810
      chargroup->addClassSub(classsub);
3275
405
      *chargroup_len += 2 + classsub_len + 1;
811
      *chargroup_len += 2 + classsub_len + 1;
3276
406
      if(pattern[*chargroup_len-1] != ']')
812
      if(pattern[*chargroup_len-1] != ']')
3277
407
      {
813
      {
3280
408
        delete chargroup;
814
        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_USE_OF_SUBCLASS)) );
3279
409
        return NULL;
3281
410
      }
815
      }
3283
411
      return chargroup;
816
      return chargroup.release();
3284
412
    }
817
    }
3285
413
818
3288
414
    c1 = readChar(pattern+*chargroup_len, &c1_len, &is_multichar);
819
    std::unique_ptr<CRegexXQuery_charmatch> charmatch(readChar(pattern+*chargroup_len, &c1_len, &multichar_type));
3289
415
    if(is_multichar)//first char is multichar
820
    if((multichar_type == CHARGROUP_FLAGS_MULTICHAR_p) ||
3290
821
      (multichar_type == CHARGROUP_FLAGS_MULTICHAR_Is) ||
3291
822
      (multichar_type == CHARGROUP_FLAGS_MULTICHAR_OTHER))//first char is multichar
3292
416
    {
823
    {
3294
417
      chargroup->addMultiChar(c1);
824
      if((pattern[*chargroup_len+c1_len] == '-') &&///should not be a range
3295
825
        (pattern[*chargroup_len+c1_len+1] != ']'))
3296
826
      {
3297
827
        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MULTICHAR_IN_CHAR_RANGE)) );
3298
828
      }
3299
829
      //chargroup->addMultiChar(c1, multichar_type);
3300
830
      chargroup->addCharMatch(charmatch.release());
3301
418
      *chargroup_len += c1_len;
831
      *chargroup_len += c1_len;
3302
419
      continue;
832
      continue;
3303
420
    }
833
    }
3305
421
    if(pattern[*chargroup_len+c1_len] == '-')///might be a range
834
    (*chargroup_len) += c1_len;
3306
835
    if(pattern[*chargroup_len] == '-')///might be a range
3307
422
    {
836
    {
3309
423
      if(pattern[*chargroup_len+c1_len+1] == ']')//no range, just the last char is '-'
837
      if(pattern[(*chargroup_len)+1] == ']')//no range, just the last char is '-'
3310
424
      {
838
      {
3314
425
        chargroup->addCharRange(c1, c1);
839
        //chargroup->addOneChar(c1);
3315
426
        chargroup->addCharRange('-', '-');
840
        //chargroup->addOneChar('-');
3316
427
        *chargroup_len += c1_len + 1;
841
        chargroup->addCharMatch(charmatch.release());
3317
842
        chargroup->addCharMatch(new CRegexXQuery_char_ascii(current_regex, '-'));
3318
843
        (*chargroup_len)++;
3319
428
        continue;
844
        continue;
3320
429
      }
845
      }
3322
430
      else
846
      else if(pattern[(*chargroup_len)+1] != '[')
3323
431
      {
847
      {
3324
432
        //it is a range
848
        //it is a range
3332
433
        char c3;
849
        (*chargroup_len)++;
3333
434
        int  c3_len;
850
        std::unique_ptr<CRegexXQuery_charmatch>  charmatch2;
3334
435
        c3 = readChar(pattern+*chargroup_len+c1_len+1, &c3_len, &is_multichar);
851
        CHARGROUP_t  multichar_type2 = CHARGROUP_NO_MULTICHAR;
3335
436
        if(is_multichar)
852
        int  c2_len;
3336
437
          return NULL;//error
853
        charmatch2.reset(readChar(pattern+(*chargroup_len), &c2_len, &multichar_type2));
3337
438
        chargroup->addCharRange(c1, c3);
854
        if((multichar_type2 != CHARGROUP_FLAGS_ONECHAR_ASCII) &&
3338
439
        *chargroup_len += c1_len + 1 + c3_len;
855
          (multichar_type2 != CHARGROUP_FLAGS_ONECHAR_ASCII))//second char in range is multichar
3339
856
        {
3340
857
          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MULTICHAR_IN_CHAR_RANGE)) );
3341
858
        }
3342
859
        //chargroup->addCharRange(c1, c3);
3343
860
        if((multichar_type == CHARGROUP_FLAGS_ONECHAR_ASCII) && (multichar_type2 == CHARGROUP_FLAGS_ONECHAR_ASCII))
3344
861
        {
3345
862
          if(flags & REGEX_ASCII_CASE_INSENSITIVE)
3346
863
            chargroup->addCharMatch(new CRegexXQuery_char_range_ascii_i(current_regex, 
3347
864
                                                                    (char)charmatch->get_c(),
3348
865
                                                                    (char)charmatch2->get_c()));
3349
866
          else
3350
867
            chargroup->addCharMatch(new CRegexXQuery_char_range_ascii(current_regex, 
3351
868
                                                                    (char)charmatch->get_c(),
3352
869
                                                                    (char)charmatch2->get_c()));
3353
870
        }
3354
871
        else
3355
872
        {
3356
873
          if(flags & REGEX_ASCII_CASE_INSENSITIVE)
3357
874
            chargroup->addCharMatch(new CRegexXQuery_char_range_unicode_i(current_regex, 
3358
875
                                                                    charmatch->get_c(),
3359
876
                                                                    charmatch2->get_c()));
3360
877
          else
3361
878
            chargroup->addCharMatch(new CRegexXQuery_char_range_unicode(current_regex, 
3362
879
                                                                    charmatch->get_c(),
3363
880
                                                                    charmatch2->get_c()));
3364
881
        }
3365
882
        *chargroup_len += c2_len;
3366
440
        continue;
883
        continue;
3367
441
      }
884
      }
3368
442
    }
885
    }
3371
443
    chargroup->addCharRange(c1, c1);
886
    //chargroup->addOneChar(c1);
3372
444
    *chargroup_len += c1_len;
887
    chargroup->addCharMatch(charmatch.release());
3373
445
  }
888
  }
3374
446
  if(pattern[*chargroup_len])
889
  if(pattern[*chargroup_len])
3375
447
    (*chargroup_len)++;
890
    (*chargroup_len)++;
3377
448
  return chargroup;
891
  else
3378
892
  {
3379
893
    throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MISSING_CLOSE_BRACKET)) );
3380
894
  }
3381
895
  return chargroup.release();
3382
449
}
896
}
3383
450
897
3385
451
void CRegexAscii_parser::read_quantifier(CRegexAscii_piece *piece,
898
void CRegexXQuery_parser::read_quantifier(CRegexXQuery_piece *piece,
3386
452
                                         const char *pattern, int *quantif_len)
899
                                         const char *pattern, int *quantif_len)
3387
453
{
900
{
3388
454
  *quantif_len = 0;
901
  *quantif_len = 0;
3389
@@ -496,6 +943,10 @@
3390
496
        max = max*10 + pattern[*quantif_len] - '0';
943
        max = max*10 + pattern[*quantif_len] - '0';
3391
497
        (*quantif_len)++;
944
        (*quantif_len)++;
3392
498
      }
945
      }
3393
946
      if(max < min)
3394
947
      {
3395
948
        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MAX_LT_MIN)) );
3396
949
      }
3397
499
      piece->set_quantifier_min_max(min, max, true);
950
      piece->set_quantifier_min_max(min, max, true);
3398
500
    }
951
    }
3399
501
    while(pattern[*quantif_len] && (pattern[*quantif_len] != '}'))
952
    while(pattern[*quantif_len] && (pattern[*quantif_len] != '}'))
3400
@@ -524,23 +975,25 @@
3401
524
///Constructors and destructors and internal functions
975
///Constructors and destructors and internal functions
3402
525
////////////////////////////
976
////////////////////////////
3403
526
977
3405
527
CRegexAscii_regex::CRegexAscii_regex(CRegexAscii_regex *topregex) : IRegexAtom(topregex?topregex:this)
978
CRegexXQuery_regex::CRegexXQuery_regex(CRegexXQuery_regex *topregex) : IRegexAtom(topregex?topregex:this)
3406
528
{
979
{
3407
529
  matched_source = NULL;
980
  matched_source = NULL;
3408
530
  matched_len = 0;
981
  matched_len = 0;
3409
982
//  backup_matched_source = NULL;
3410
983
//  backup_matched_len = 0;
3411
531
  flags = 128;//set to 0 after initialization
984
  flags = 128;//set to 0 after initialization
3412
532
}
985
}
3413
533
986
3415
534
CRegexAscii_regex::~CRegexAscii_regex()
987
CRegexXQuery_regex::~CRegexXQuery_regex()
3416
535
{
988
{
3418
536
  std::list<CRegexAscii_branch*>::iterator  branch_it;
989
  std::list<CRegexXQuery_branch*>::iterator  branch_it;
3419
537
990
3420
538
  for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
991
  for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
3421
539
  {
992
  {
3422
540
    delete (*branch_it);
993
    delete (*branch_it);
3423
541
  }
994
  }
3424
542
/*
995
/*
3426
543
  std::vector<CRegexAscii_regex*>::iterator   subregex_it;
996
  std::vector<CRegexXQuery_regex*>::iterator   subregex_it;
3427
544
  for(subregex_it = subregex.begin(); subregex_it != subregex.end(); subregex_it++)
997
  for(subregex_it = subregex.begin(); subregex_it != subregex.end(); subregex_it++)
3428
545
  {
998
  {
3429
546
    delete (*subregex_it);
999
    delete (*subregex_it);
3430
@@ -548,25 +1001,18 @@
3431
548
*/
1001
*/
3432
549
}
1002
}
3433
550
1003
3442
551
bool CRegexAscii_regex::set_align_begin(bool align_begin)
1004
void CRegexXQuery_regex::add_branch(CRegexXQuery_branch *branch)
3435
552
{
3436
553
  bool prev_align = this->align_begin;
3437
554
  this->align_begin = align_begin;
3438
555
  return prev_align;
3439
556
}
3440
557
3441
558
void CRegexAscii_regex::add_branch(CRegexAscii_branch *branch)
3443
559
{
1005
{
3444
560
  branch_list.push_back(branch);
1006
  branch_list.push_back(branch);
3445
561
}
1007
}
3446
562
1008
3448
563
bool  CRegexAscii_regex::get_indexed_match(int index, 
1009
bool  CRegexXQuery_regex::get_indexed_match(int index, 
3449
564
                                           const char **matched_source, 
1010
                                           const char **matched_source, 
3450
565
                                           int *matched_len)
1011
                                           int *matched_len)
3451
566
{
1012
{
3452
567
  if(!index || index > (int)subregex.size())
1013
  if(!index || index > (int)subregex.size())
3453
568
    return false;
1014
    return false;
3455
569
  CRegexAscii_regex *subr = subregex[index-1];
1015
  CRegexXQuery_regex *subr = subregex[index-1];
3456
570
  *matched_source = subr->matched_source;
1016
  *matched_source = subr->matched_source;
3457
571
  if(!*matched_source)
1017
  if(!*matched_source)
3458
572
    return false;
1018
    return false;
3459
@@ -574,145 +1020,209 @@
3460
574
  return true;
1020
  return true;
3461
575
}
1021
}
3462
576
1022
3464
577
unsigned int CRegexAscii_regex::get_indexed_regex_count()
1023
unsigned int CRegexXQuery_regex::get_indexed_regex_count()
3465
578
{
1024
{
3466
579
  return subregex.size();
1025
  return subregex.size();
3467
580
}
1026
}
3468
581
1027
3471
582
CRegexAscii_branch::CRegexAscii_branch(CRegexAscii_regex* regex) :
1028
CRegexXQuery_branch::CRegexXQuery_branch(CRegexXQuery_regex* regex) 
3472
583
      IRegexMatcher(regex)
1029
      //:
3473
1030
      //IRegexMatcher(regex)
3474
584
{
1031
{
3475
585
}
1032
}
3476
586
1033
3478
587
CRegexAscii_branch::~CRegexAscii_branch()
1034
CRegexXQuery_branch::~CRegexXQuery_branch()
3479
588
{
1035
{
3481
589
  std::list<CRegexAscii_piece*>::iterator  piece_it;
1036
  std::list<RegexAscii_pieceinfo>::iterator  piece_it;
3482
590
1037
3483
591
  for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
1038
  for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
3484
592
  {
1039
  {
3486
593
    delete (*piece_it);
1040
    delete (*piece_it).piece;
3487
594
  }
1041
  }
3488
595
}
1042
}
3489
596
1043
3491
597
void CRegexAscii_branch::add_piece(CRegexAscii_piece *piece)
1044
void CRegexXQuery_branch::add_piece(CRegexXQuery_piece *piece)
3492
598
{
1045
{
3493
599
  piece_list.push_back(piece);
1046
  piece_list.push_back(piece);
3494
600
}
1047
}
3495
601
1048
3497
602
CRegexAscii_piece::CRegexAscii_piece()
1049
CRegexXQuery_piece::CRegexXQuery_piece()
3498
603
{
1050
{
3499
1051
  atom = NULL;
3500
1052
  regex_atom = NULL;
3501
604
}
1053
}
3502
605
1054
3504
606
CRegexAscii_piece::~CRegexAscii_piece()
1055
CRegexXQuery_piece::~CRegexXQuery_piece()
3505
607
{
1056
{
3506
608
  delete atom;
1057
  delete atom;
3507
609
}
1058
}
3508
610
1059
3510
611
void CRegexAscii_piece::set_atom(IRegexAtom *atom)
1060
void CRegexXQuery_piece::set_atom(IRegexAtom *atom)
3511
612
{
1061
{
3512
613
  this->atom = atom;
1062
  this->atom = atom;
3513
1063
  this->regex_atom = dynamic_cast<CRegexXQuery_regex*>(atom);
3514
614
}
1064
}
3515
615
1065
3517
616
void CRegexAscii_piece::set_quantifier_min_max(int min, int max, bool strict_max)
1066
void CRegexXQuery_piece::set_quantifier_min_max(int min, int max, bool strict_max)
3518
617
{
1067
{
3519
618
  this->min = min;
1068
  this->min = min;
3520
619
  this->max = max;
1069
  this->max = max;
3521
620
  this->strict_max = strict_max;
1070
  this->strict_max = strict_max;
3522
621
}
1071
}
3524
622
void CRegexAscii_piece::set_is_reluctant(bool is_reluctant)
1072
void CRegexXQuery_piece::set_is_reluctant(bool is_reluctant)
3525
623
{
1073
{
3526
624
  this->is_reluctant = is_reluctant;
1074
  this->is_reluctant = is_reluctant;
3527
625
}
1075
}
3529
626
void CRegexAscii_piece::get_quantifier(int *min, int *max, bool *strict_max)
1076
void CRegexXQuery_piece::get_quantifier(int *min, int *max, bool *strict_max)
3530
627
{
1077
{
3531
628
  *min = this->min;
1078
  *min = this->min;
3532
629
  *max = this->max;
1079
  *max = this->max;
3533
630
  *strict_max = this->strict_max;
1080
  *strict_max = this->strict_max;
3534
631
}
1081
}
3536
632
bool CRegexAscii_piece::get_is_reluctant()
1082
bool CRegexXQuery_piece::get_is_reluctant()
3537
633
{
1083
{
3538
1084
  if(atom->regex_intern->flags & REGEX_ASCII_MINIMAL_MATCH)
3539
1085
    return true;
3540
634
  return is_reluctant;
1086
  return is_reluctant;
3541
635
}
1087
}
3542
636
1088
3543
637
1089
3545
638
CRegexAscii_chargroup::CRegexAscii_chargroup(CRegexAscii_regex* regex) :
1090
CRegexXQuery_charmatch::CRegexXQuery_charmatch(CRegexXQuery_regex* regex) :
3546
1091
    IRegexAtom(regex)
3547
1092
{
3548
1093
}
3549
1094
CRegexXQuery_multicharP::CRegexXQuery_multicharP(CRegexXQuery_regex* regex, char type, bool is_reverse) :
3550
1095
    CRegexXQuery_charmatch(regex)
3551
1096
{
3552
1097
  this->multichar_type = type; this->is_reverse = is_reverse;
3553
1098
}
3554
1099
CRegexXQuery_multicharIs::CRegexXQuery_multicharIs(CRegexXQuery_regex* regex, int block_index, bool is_reverse) :
3555
1100
    CRegexXQuery_charmatch(regex)
3556
1101
{
3557
1102
  this->block_index = block_index; this->is_reverse = is_reverse;
3558
1103
}
3559
1104
CRegexXQuery_multicharOther::CRegexXQuery_multicharOther(CRegexXQuery_regex* regex, char type) :
3560
1105
    CRegexXQuery_charmatch(regex)
3561
1106
{
3562
1107
  this->multichar_type = type;
3563
1108
}
3564
1109
CRegexXQuery_char_ascii::CRegexXQuery_char_ascii(CRegexXQuery_regex* regex, char c) :
3565
1110
    CRegexXQuery_charmatch(regex)
3566
1111
{
3567
1112
  this->c = c;
3568
1113
}
3569
1114
CRegexXQuery_char_ascii_i::CRegexXQuery_char_ascii_i(CRegexXQuery_regex* regex, char c) :
3570
1115
    CRegexXQuery_char_ascii(regex, toupper(c))
3571
1116
{
3572
1117
}
3573
1118
CRegexXQuery_char_range_ascii::CRegexXQuery_char_range_ascii(CRegexXQuery_regex* regex, char c1, char c2) :
3574
1119
    CRegexXQuery_charmatch(regex)
3575
1120
{
3576
1121
  this->c1 = c1; this->c2 = c2;
3577
1122
}
3578
1123
CRegexXQuery_char_range_ascii_i::CRegexXQuery_char_range_ascii_i(CRegexXQuery_regex* regex, char c1, char c2) :
3579
1124
    CRegexXQuery_char_range_ascii(regex, toupper(c1), toupper(c2))
3580
1125
{
3581
1126
}
3582
1127
CRegexXQuery_char_unicode::CRegexXQuery_char_unicode(CRegexXQuery_regex* regex, const char *source, int len) :
3583
1128
    CRegexXQuery_charmatch(regex)
3584
1129
{
3585
1130
  this->len = len;
3586
1131
  memcpy(c, source, len);
3587
1132
}
3588
1133
CRegexXQuery_char_unicode_cp::CRegexXQuery_char_unicode_cp(CRegexXQuery_regex* regex, unicode::code_point c) :
3589
1134
    CRegexXQuery_charmatch(regex)
3590
1135
{
3591
1136
  this->c = c;
3592
1137
}
3593
1138
CRegexXQuery_char_unicode_i::CRegexXQuery_char_unicode_i(CRegexXQuery_regex* regex, unicode::code_point c) :
3594
1139
    CRegexXQuery_char_unicode_cp(regex, unicode::to_upper(c))
3595
1140
{
3596
1141
}
3597
1142
CRegexXQuery_char_range_unicode::CRegexXQuery_char_range_unicode(CRegexXQuery_regex* regex, unicode::code_point c1, unicode::code_point c2) :
3598
1143
    CRegexXQuery_charmatch(regex)
3599
1144
{
3600
1145
  this->c1 = c1; this->c2 = c2;
3601
1146
}
3602
1147
CRegexXQuery_char_range_unicode_i::CRegexXQuery_char_range_unicode_i(CRegexXQuery_regex* regex, unicode::code_point c1, unicode::code_point c2) :
3603
1148
    CRegexXQuery_char_range_unicode(regex, unicode::to_upper(c1), unicode::to_upper(c2))
3604
1149
{
3605
1150
}
3606
1151
CRegexXQuery_endline::CRegexXQuery_endline(CRegexXQuery_regex* regex) :
3607
1152
    CRegexXQuery_charmatch(regex)
3608
1153
{
3609
1154
}
3610
1155
3611
1156
unicode::code_point CRegexXQuery_char_unicode::get_c()
3612
1157
{
3613
1158
  const char *temp_c = (const char*)c;
3614
1159
  return utf8::next_char(temp_c);
3615
1160
}
3616
1161
3617
1162
3618
1163
CRegexXQuery_chargroup::CRegexXQuery_chargroup(CRegexXQuery_regex* regex) :
3619
639
    IRegexAtom(regex)
1164
    IRegexAtom(regex)
3620
640
{
1165
{
3621
641
  classsub = NULL;
1166
  classsub = NULL;
3622
642
}
1167
}
3623
643
1168
3625
644
CRegexAscii_chargroup::~CRegexAscii_chargroup()
1169
CRegexXQuery_chargroup::~CRegexXQuery_chargroup()
3626
645
{
1170
{
3627
646
  delete classsub;
1171
  delete classsub;
3658
647
}
1172
  std::list<CRegexXQuery_charmatch* >::iterator charmatch_it;
3659
648
1173
  for(charmatch_it=chargroup_list.begin(); charmatch_it != chargroup_list.end(); charmatch_it++)
3660
649
void CRegexAscii_chargroup::addMultiChar(char c)
1174
    delete (*charmatch_it);
3661
650
{
1175
}
3662
651
  chargroup_t cgt;
1176
3663
652
  cgt.flags = CHARGROUP_FLAGS_MULTICHAR;
1177
void CRegexXQuery_chargroup::addCharMatch(CRegexXQuery_charmatch *charmatch)
3664
653
  cgt.c1 = c;
1178
{
3665
654
  cgt.c2 = 0;
1179
  chargroup_list.push_back(charmatch);
3666
655
  chargroup_list.push_back(cgt);
1180
}
3667
656
}
1181
void CRegexXQuery_chargroup::addClassSub(CRegexXQuery_chargroup* classsub)
3638
657
3639
658
void CRegexAscii_chargroup::addEndLine()
3640
659
{
3641
660
  chargroup_t cgt;
3642
661
  cgt.flags = CHARGROUP_FLAGS_ENDLINE;
3643
662
  cgt.c1 = '$';
3644
663
  cgt.c2 = 0;
3645
664
  chargroup_list.push_back(cgt);
3646
665
}
3647
666
3648
667
void CRegexAscii_chargroup::addCharRange(char c1, char c2)
3649
668
{
3650
669
  chargroup_t cgt;
3651
670
  cgt.flags = 0;
3652
671
  cgt.c1 = c1;
3653
672
  cgt.c2 = c2;
3654
673
  chargroup_list.push_back(cgt);
3655
674
}
3656
675
3657
676
void CRegexAscii_chargroup::addClassSub(CRegexAscii_chargroup* classsub)
3668
677
{
1182
{
3669
678
  this->classsub = classsub;
1183
  this->classsub = classsub;
3670
679
}
1184
}
3671
680
1185
3682
681
CRegexAscii_negchargroup::CRegexAscii_negchargroup(CRegexAscii_regex* regex) :
1186
CRegexXQuery_negchargroup::CRegexXQuery_negchargroup(CRegexXQuery_regex* regex) :
3683
682
  CRegexAscii_chargroup(regex)
1187
  CRegexXQuery_chargroup(regex)
3684
683
{
1188
{
3685
684
}
1189
}
3686
685
1190
3687
686
CRegexAscii_negchargroup::~CRegexAscii_negchargroup()
1191
CRegexXQuery_negchargroup::~CRegexXQuery_negchargroup()
3688
687
{
1192
{
3689
688
}
1193
}
3690
689
1194
3691
690
CRegexAscii_wildchar::CRegexAscii_wildchar(CRegexAscii_regex* regex) :
1195
CRegexXQuery_wildchar::CRegexXQuery_wildchar(CRegexXQuery_regex* regex) :
3692
691
      IRegexAtom(regex)
1196
      IRegexAtom(regex)
3693
692
{
1197
{
3694
693
}
1198
}
3695
694
1199
3697
695
CRegexAscii_wildchar::~CRegexAscii_wildchar()
1200
CRegexXQuery_wildchar::~CRegexXQuery_wildchar()
3698
696
{
1201
{
3699
697
}
1202
}
3700
698
1203
3702
699
CRegexAscii_backref::CRegexAscii_backref(CRegexAscii_regex* regex, unsigned int backref_) :
1204
CRegexXQuery_backref::CRegexXQuery_backref(CRegexXQuery_regex* regex, unsigned int backref_) :
3703
700
      IRegexAtom(regex),
1205
      IRegexAtom(regex),
3704
701
      backref(backref_)
1206
      backref(backref_)
3705
702
{
1207
{
3706
703
}
1208
}
3707
704
1209
3713
705
CRegexAscii_backref::~CRegexAscii_backref()
1210
CRegexXQuery_backref::~CRegexXQuery_backref()
3714
706
{
1211
{
3715
707
}
1212
}
3716
708
1213
3717
709
CRegexAscii_parser::CRegexAscii_parser()
1214
CRegexXQuery_pinstart::CRegexXQuery_pinstart(CRegexXQuery_regex* regex):
3718
1215
      IRegexAtom(regex)
3719
1216
{
3720
1217
}
3721
1218
3722
1219
CRegexXQuery_parser::CRegexXQuery_parser()
3723
710
{
1220
{
3724
711
  current_regex = NULL;
1221
  current_regex = NULL;
3725
712
  regex_depth = 0;
1222
  regex_depth = 0;
3726
713
}
1223
}
3727
714
1224
3729
715
CRegexAscii_parser::~CRegexAscii_parser()
1225
CRegexXQuery_parser::~CRegexXQuery_parser()
3730
716
{
1226
{
3731
717
}
1227
}
3732
718
1228
3733
@@ -720,9 +1230,68 @@
3734
720
//////////////////////////////////////////
1230
//////////////////////////////////////////
3735
721
////Matching the pattern on a string
1231
////Matching the pattern on a string
3736
722
/////////////////////////////////////////
1232
/////////////////////////////////////////
3737
1233
static std::list<RegexAscii_pieceinfo> empty_pieces;//empty list of pieces
3738
1234
/*
3739
1235
std::list<RegexAscii_pieceinfo>::iterator  
3740
1236
IRegexAtom::choose_next_piece(const char *source, int *matched_len, 
3741
1237
                              std::list<RegexAscii_pieceinfo>::iterator this_piece,
3742
1238
                              std::list<RegexAscii_pieceinfo>::iterator end_piece)
3743
1239
{
3744
1240
  //if this_piece is repetition, repeat until max, then go to next piece
3745
1241
  int min, max;
3746
1242
  bool strict_max;
3747
1243
  while(this_piece != end_piece)
3748
1244
  {
3749
1245
    (*this_piece).piece->get_quantifier(&min, &max, &strict_max);
3750
1246
    if(max <= ((*this_piece).nr_matches))//finished this piece
3751
1247
    {
3752
1248
      this_piece++;
3753
1249
    }
3754
1250
    else
3755
1251
      break;
3756
1252
  }
3757
1253
  return this_piece;
3758
1254
}
3759
1255
*/
3760
1256
3761
1257
bool IRegexAtom::match(const char *source, int *start_from_branch, int *matched_len,
3762
1258
                  std::list<RegexAscii_pieceinfo>::iterator this_piece,
3763
1259
                  std::list<RegexAscii_pieceinfo>::iterator end_piece)
3764
1260
{
3765
1261
  *start_from_branch = 0;
3766
1262
  bool retmatch;
3767
1263
  retmatch = match_internal(source, start_from_branch, matched_len);
3768
1264
  if(!retmatch)
3769
1265
    return false;
3770
1266
3771
1267
  if(this_piece == end_piece)
3772
1268
    return true;
3773
1269
3774
1270
  (*this_piece).nr_matches++;
3775
1271
  int min,max;
3776
1272
  bool strict_max;
3777
1273
  (*this_piece).piece->get_quantifier(&min, &max, &strict_max);
3778
1274
  std::list<RegexAscii_pieceinfo>::iterator init_piece = this_piece;
3779
1275
  if(((min == 1) && (max == 1)) || //the simple common case
3780
1276
    ((*matched_len == 0) && ((*this_piece).nr_matches>=min)))//to avoid infinite loop
3781
1277
  {
3782
1278
    this_piece++;
3783
1279
    if(this_piece == end_piece)
3784
1280
      return true;
3785
1281
  }
3786
1282
  int matched_len2;
3787
1283
  retmatch = (*this_piece).piece->match_piece(this_piece, end_piece, source + *matched_len, &matched_len2);
3788
1284
  if(!retmatch)
3789
1285
  {
3790
1286
    (*init_piece).nr_matches--;
3791
1287
    return false;
3792
1288
  }
3793
1289
  *matched_len += matched_len2;
3794
1290
  return true;
3795
1291
}
3796
723
1292
3797
724
//try every position in source to match the pattern
1293
//try every position in source to match the pattern
3799
725
bool CRegexAscii_regex::match_anywhere(const char *source, unsigned int flags,
1294
bool CRegexXQuery_regex::match_anywhere(const char *source, unsigned int flags,
3800
726
                                       int *match_pos, int *matched_len)
1295
                                       int *match_pos, int *matched_len)
3801
727
{
1296
{
3802
728
  *match_pos = 0;
1297
  *match_pos = 0;
3803
@@ -730,43 +1299,66 @@
3804
730
  return match_from(source, flags, match_pos, matched_len);
1299
  return match_from(source, flags, match_pos, matched_len);
3805
731
}
1300
}
3806
732
1301
3808
733
bool CRegexAscii_regex::match_from(const char *source, unsigned int flags,
1302
bool CRegexXQuery_regex::match_from(const char *source, unsigned int flags,
3809
734
                                       int *match_pos, int *matched_len)
1303
                                       int *match_pos, int *matched_len)
3810
735
{
1304
{
3811
736
  this->flags = flags;
1305
  this->flags = flags;
3812
1306
  this->source_start = source;
3813
737
  reachedEnd = false;
1307
  reachedEnd = false;
3814
738
1308
3816
739
  std::vector<CRegexAscii_regex*>::iterator regex_it;
1309
  std::vector<CRegexXQuery_regex*>::iterator regex_it;
3817
740
  for(regex_it = subregex.begin(); regex_it != subregex.end(); regex_it++)
1310
  for(regex_it = subregex.begin(); regex_it != subregex.end(); regex_it++)
3818
741
  {
1311
  {
3819
742
    (*regex_it)->matched_source = NULL;
1312
    (*regex_it)->matched_source = NULL;
3820
743
  }
1313
  }
3832
744
//  if(!source[0])
1314
3833
745
//  {
1315
  std::vector<std::pair<const char*, int> >  saved_subregex;
3834
746
//    if(branch_list.empty())
1316
3835
747
//      return true;
1317
  if(*match_pos && (flags & REGEX_ASCII_WHOLE_MATCH))
3836
748
//    else
1318
    return false;
3837
749
//      return false;
1319
3827
750
//  }
3828
751
3829
752
  bool  skip_first_match = false;
3830
753
  if(*match_pos && align_begin)
3831
754
    skip_first_match = true;
3838
755
  do
1320
  do
3839
756
  {
1321
  {
3847
757
    if(!skip_first_match)
1322
    int   start_from_branch = 0;
3848
758
    {
1323
    int   longest_match = -1;
3849
759
      if(match(source + *match_pos, matched_len))
1324
    while(1)
3850
760
        return true;
1325
    {
3851
761
    }
1326
      if(!match(source + *match_pos, &start_from_branch, matched_len, empty_pieces.begin(), empty_pieces.end()))
3852
762
    skip_first_match = false;
1327
        break;
3853
763
    if(align_begin)
1328
      if(longest_match < *matched_len)
3854
1329
      {
3855
1330
        longest_match = *matched_len;
3856
1331
        if(start_from_branch && (flags & REGEX_ASCII_GET_LONGEST_BRANCH))
3857
1332
          save_subregex_list(saved_subregex);
3858
1333
      }
3859
1334
      if(!start_from_branch || !(flags & REGEX_ASCII_GET_LONGEST_BRANCH))
3860
1335
        break;
3861
1336
      //else try the other branches to see which is longer
3862
1337
    }
3863
1338
    if(longest_match != -1)
3864
1339
    {
3865
1340
      *matched_len = longest_match;
3866
1341
      if(saved_subregex.size())
3867
1342
        load_subregex_list(saved_subregex);
3868
1343
      if(flags & REGEX_ASCII_WHOLE_MATCH)
3869
1344
      {
3870
1345
        if(!source[*match_pos+*matched_len])
3871
1346
          return true;
3872
1347
        if((flags & REGEX_ASCII_MULTILINE) && 
3873
1348
          ((source[*match_pos+*matched_len] == '\n') || (source[*match_pos+*matched_len] == '\r')))
3874
1349
          return true;
3875
1350
        return false;
3876
1351
      }
3877
1352
      return true;
3878
1353
    }
3879
1354
3880
1355
    if(flags & REGEX_ASCII_WHOLE_MATCH)
3881
764
    {
1356
    {
3882
765
      if(flags & REGEX_ASCII_MULTILINE)
1357
      if(flags & REGEX_ASCII_MULTILINE)
3883
766
      {
1358
      {
3885
767
        //goto the next line
1359
        //go to next line
3886
768
        while(source[*match_pos] && (source[*match_pos] != '\n') && (source[*match_pos] != '\r'))
1360
        while(source[*match_pos] && (source[*match_pos] != '\n') && (source[*match_pos] != '\r'))
3888
769
          (*match_pos)++;
1361
          (*match_pos) += myutf8len(source);
3889
770
        if(source[*match_pos] == '\n')
1362
        if(source[*match_pos] == '\n')
3890
771
        {
1363
        {
3891
772
          (*match_pos)++;
1364
          (*match_pos)++;
3892
@@ -780,190 +1372,1039 @@
3893
780
            (*match_pos)++;
1372
            (*match_pos)++;
3894
781
        }
1373
        }
3895
782
        if(!source[*match_pos])
1374
        if(!source[*match_pos])
3897
783
          return false;
1375
          break;
3898
784
        continue;
1376
        continue;
3899
785
      }
1377
      }
3901
786
      return false;
1378
      break;
3902
787
    }
1379
    }
3903
788
    if(!source[*match_pos])
1380
    if(!source[*match_pos])
3904
789
      break;
1381
      break;
3906
790
    (*match_pos)++;
1382
    (*match_pos) += myutf8len(source);
3907
791
  }
1383
  }
3908
792
  while(source[*match_pos]);
1384
  while(source[*match_pos]);
3909
1385
//  if(!source[*match_pos])
3910
1386
//  {
3911
1387
//    reachedEnd = true;
3912
1388
//  }
3913
793
  return false;
1389
  return false;
3914
794
}
1390
}
3915
795
1391
3916
1392
void CRegexXQuery_regex::reset_match()
3917
1393
{
3918
1394
//  this->backup_matched_source = this->matched_source;
3919
1395
//  this->backup_matched_len = this->matched_len;
3920
1396
  this->matched_source = NULL;
3921
1397
  this->matched_len = 0;
3922
1398
  std::list<CRegexXQuery_branch*>::iterator  branch_it;
3923
1399
  for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
3924
1400
  {
3925
1401
    (*branch_it)->reset();
3926
1402
  }
3927
1403
}
3928
1404
/*
3929
1405
void CRegexXQuery_regex::restore_match()
3930
1406
{
3931
1407
  this->matched_source = this->backup_matched_source;
3932
1408
  this->matched_len = this->backup_matched_len;
3933
1409
  std::list<CRegexXQuery_branch*>::iterator  branch_it;
3934
1410
  for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
3935
1411
  {
3936
1412
    (*branch_it)->restore();
3937
1413
  }
3938
1414
}
3939
1415
*/
3940
796
//match any of the branches
1416
//match any of the branches
3942
797
bool CRegexAscii_regex::match(const char *source, int *matched_len)
1417
bool CRegexXQuery_regex::match(const char *source, int *start_from_branch, int *matched_len,
3943
1418
                              std::list<RegexAscii_pieceinfo>::iterator next_piece,
3944
1419
                              std::list<RegexAscii_pieceinfo>::iterator end_piece)
3945
798
{
1420
{
3946
799
  reachedEnd = false;
1421
  reachedEnd = false;
3955
800
  std::list<CRegexAscii_branch*>::iterator  branch_it;
1422
  if(!(flags & REGEX_ASCII_GROUPING_LEN_WHOLE_PIECE) || 
3956
801
1423
    (this->matched_source == NULL) || ((this->matched_source + this->matched_len) != source))
3957
802
  for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
1424
    this->matched_source = source;
3958
803
  {
1425
  *matched_len = 0;
3959
804
    if((*branch_it)->match(source, matched_len))
1426
  std::list<CRegexXQuery_branch*>::iterator  branch_it;
3960
805
    {
1427
3961
806
      matched_source = source;
1428
  if(*start_from_branch == 0)
3962
807
      this->matched_len = *matched_len;
1429
  {
3963
1430
    for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
3964
1431
    {
3965
1432
      (*branch_it)->reset();
3966
1433
    }
3967
1434
  }
3968
1435
3969
1436
  branch_it = branch_list.begin();
3970
1437
  if(*start_from_branch)
3971
1438
  {
3972
1439
    for(int i=0;i<*start_from_branch;i++)
3973
1440
      branch_it++;
3974
1441
  }
3975
1442
  (*start_from_branch)++;
3976
1443
  for(; branch_it != branch_list.end(); branch_it++,(*start_from_branch)++)
3977
1444
  {
3978
1445
    if((*branch_it)->match(source, matched_len, this, next_piece, end_piece))
3979
1446
    {
3980
1447
      //matched_source = source;
3981
1448
      //this->matched_len = *matched_len;
3982
808
      return true;
1449
      return true;
3983
809
    }
1450
    }
3984
810
  }
1451
  }
3987
811
  matched_source = NULL;
1452
  *start_from_branch = 0;
3988
812
  matched_len = 0;
1453
  if(this->matched_source == source)
3989
1454
    this->matched_source = NULL;
3990
1455
  *matched_len = 0;
3991
813
  return false;
1456
  return false;
3992
814
}
1457
}
3993
815
1458
3994
1459
void CRegexXQuery_regex::save_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex)
3995
1460
{
3996
1461
  saved_subregex.resize(0);
3997
1462
  saved_subregex.reserve(subregex.size());
3998
1463
  std::vector<CRegexXQuery_regex*>::iterator   it;
3999
1464
  for(it=subregex.begin(); it != subregex.end(); it++)
4000
1465
  {
4001
1466
    saved_subregex.push_back(std::pair<const char*, int>((*it)->matched_source, (*it)->matched_len));
4002
1467
  }
4003
1468
}
4004
1469
4005
1470
void CRegexXQuery_regex::load_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex)
4006
1471
{
4007
1472
  std::vector<std::pair<const char*, int> >::iterator   it;
4008
1473
  std::vector<CRegexXQuery_regex*>::iterator            subit;
4009
1474
  for(it=saved_subregex.begin(), subit = subregex.begin(); it != saved_subregex.end(); it++, subit++)
4010
1475
  {
4011
1476
    (*subit)->matched_source = (*it).first;
4012
1477
    (*subit)->matched_len = (*it).second;
4013
1478
  }
4014
1479
}
4015
1480
4016
1481
void CRegexXQuery_branch::reset()
4017
1482
{
4018
1483
  std::list<RegexAscii_pieceinfo>::iterator  piece_it;
4019
1484
  for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
4020
1485
  {
4021
1486
    (*piece_it).piece->atom->reset_match();
4022
1487
  }
4023
1488
}
4024
1489
/*
4025
1490
void CRegexXQuery_branch::restore()
4026
1491
{
4027
1492
  std::list<RegexAscii_pieceinfo>::iterator  piece_it;
4028
1493
  for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
4029
1494
  {
4030
1495
    (*piece_it).piece->atom->restore_match();
4031
1496
  }
4032
1497
}
4033
1498
*/
4034
816
//match all the pieces
1499
//match all the pieces
4036
817
bool CRegexAscii_branch::match(const char *source, int *matched_len)
1500
bool CRegexXQuery_branch::match(const char *source, int *matched_len,
4037
1501
                              CRegexXQuery_regex* group_regex,
4038
1502
                              std::list<RegexAscii_pieceinfo>::iterator next_piece,
4039
1503
                              std::list<RegexAscii_pieceinfo>::iterator end_piece)
4040
818
{
1504
{
4042
819
  std::list<CRegexAscii_piece*>::iterator  piece_it;
1505
  std::list<RegexAscii_pieceinfo>::iterator  piece_it;
4043
820
1506
4044
821
  piece_it = piece_list.begin(); 
1507
  piece_it = piece_list.begin(); 
4045
1508
  //if(piece_it == piece_list.end())
4046
1509
    //if(!source[0])
4047
1510
  //    return true;
4048
1511
    //else
4049
1512
    //  return false;
4050
822
  if(piece_it == piece_list.end())
1513
  if(piece_it == piece_list.end())
4053
823
    if(source[0])
1514
  {
4054
824
      return false;
1515
    piece_it = next_piece;
4055
1516
    if(next_piece == end_piece)
4056
1517
    {
4057
1518
      group_regex->matched_len = 0;
4058
1519
      return true;
4059
1520
    }
4060
1521
  }
4061
1522
4062
1523
  std::list<RegexAscii_pieceinfo>   temp_pieces(piece_list);
4063
1524
  temp_pieces.push_back(group_regex);//this will be used to store the group match
4064
1525
  temp_pieces.insert(temp_pieces.end(), next_piece, end_piece);
4065
1526
4066
1527
  return (*piece_it).piece->match_piece(temp_pieces.begin(), temp_pieces.end(), source, matched_len);
4067
1528
}
4068
1529
4069
1530
bool CRegexXQuery_piece::match_piece(std::list<RegexAscii_pieceinfo>::iterator piece_it,
4070
1531
                                    std::list<RegexAscii_pieceinfo>::iterator end_it,
4071
1532
                                    const char *source, int *matched_len)
4072
1533
{
4073
1534
  if((*piece_it).nr_matches < 0)
4074
1535
  {
4075
1536
    //special case, store the group match
4076
1537
    (*piece_it).group_regex->matched_len = source - (*piece_it).group_regex->matched_source;
4077
1538
    piece_it++;
4078
1539
    if(piece_it == end_it)
4079
1540
      return true;
4080
825
    else
1541
    else
4084
826
      return true;
1542
      return (*piece_it).piece->match_piece(piece_it, end_it, source, matched_len);
4085
827
  if(!(*piece_it)->get_is_reluctant())
1543
  }
4086
828
    return match_piece_iter_normal(piece_it, source, matched_len);
1544
4087
1545
  if(!get_is_reluctant())
4088
1546
    return match_piece_iter_normal(piece_it, end_it, source, matched_len);
4089
829
  else
1547
  else
4096
830
    return match_piece_iter_reluctant(piece_it, source, matched_len);
1548
    return match_piece_iter_reluctant(piece_it, end_it, source, matched_len);
4097
831
}
1549
}
4098
832
1550
4099
833
//match as less as possible
1551
int CRegexXQuery_piece::choose_another_branch(std::vector<std::pair<int,int> > &match_lens)
4100
834
bool CRegexAscii_branch::match_piece_iter_reluctant(
1552
{
4101
835
                                        std::list<CRegexAscii_piece*>::iterator piece_it,
1553
  int i = match_lens.size()-1;
4102
1554
  i--;
4103
1555
  while((i >= 0) && (match_lens.at(i).second == 0))
4104
1556
    i--;
4105
1557
  if(i < 0)
4106
1558
    return -1;//no more branches
4107
1559
  match_lens.resize(i+1);
4108
1560
  i++;
4109
1561
  return i;
4110
1562
}
4111
1563
4112
1564
bool CRegexXQuery_piece::is_regex_atom()
4113
1565
{
4114
1566
  return regex_atom != NULL;
4115
1567
}
4116
1568
4117
1569
//match as less as possible (shortest string)
4118
1570
bool CRegexXQuery_piece::match_piece_iter_reluctant(
4119
1571
                                        std::list<RegexAscii_pieceinfo>::iterator piece_it,
4120
1572
                                        std::list<RegexAscii_pieceinfo>::iterator end_it,
4121
836
                                        const char *source, int *matched_len)
1573
                                        const char *source, int *matched_len)
4122
837
{
1574
{
4123
838
  *matched_len = 0;
1575
  *matched_len = 0;
4125
839
  if(piece_it == piece_list.end())
1576
  if(piece_it == end_it)
4126
840
    return true;
1577
    return true;
4127
841
1578
4128
842
  int min, max;
1579
  int min, max;
4129
843
  bool  strict_max;
1580
  bool  strict_max;
4130
844
  //std::vector<int>    match_lens;
1581
  //std::vector<int>    match_lens;
4133
845
  (*piece_it)->get_quantifier(&min, &max, &strict_max);
1582
  (*piece_it).piece->get_quantifier(&min, &max, &strict_max);
4134
846
  if(strict_max && (max >= 0))
1583
4135
1584
  std::vector<std::pair<const char*, int> >  saved_subregex;
4136
1585
4137
1586
  if(is_regex_atom())
4138
847
  {
1587
  {
4143
848
    int   timeslen;
1588
    //recursive
4144
849
    //check if the piece doesn't exceed the max match
1589
    bool retmatch;
4145
850
    if((*piece_it)->match_piece_times(source, &timeslen, max+1, NULL))
1590
    atom->regex_intern->save_subregex_list(saved_subregex);
4146
851
      return false;///too many matches
1591
    if((*piece_it).nr_matches >= min)
4147
1592
    {
4148
1593
      //go to next piece
4149
1594
      std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
4150
1595
      next_it++;
4151
1596
      if(next_it == end_it)
4152
1597
        return true;
4153
1598
      retmatch = (*next_it).piece->match_piece(next_it, end_it, source, matched_len);
4154
1599
      if(retmatch)
4155
1600
        return true;
4156
1601
    }
4157
1602
    if(((max == -1) || ((*piece_it).nr_matches < max)) &&//try further with this piece
4158
1603
      (((*piece_it).nr_matches < min) || ((*piece_it).nr_matches == 0) || ((*piece_it).piece->regex_atom->matched_len)))//if matched_len is zero, avoid infinite loop
4159
1604
    {
4160
1605
      int start_from_branch = 0;
4161
1606
      int shortest_len = -1;
4162
1607
      bool branch_saved = false;
4163
1608
      //try all branches to get the shortest len
4164
1609
      (*piece_it).nr_matches++;
4165
1610
      while(atom->match(source, &start_from_branch, matched_len, piece_it, end_it))
4166
1611
      {
4167
1612
        if((shortest_len == -1) || (shortest_len > *matched_len))
4168
1613
        {
4169
1614
          shortest_len = *matched_len;
4170
1615
          if(start_from_branch && (atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4171
1616
          {
4172
1617
            atom->regex_intern->save_subregex_list(saved_subregex);
4173
1618
            branch_saved = true;
4174
1619
          }
4175
1620
        }
4176
1621
        if(!start_from_branch || !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4177
1622
          break;
4178
1623
      }
4179
1624
      if(shortest_len != -1)
4180
1625
      {
4181
1626
        *matched_len = shortest_len;
4182
1627
        if(branch_saved)
4183
1628
          atom->regex_intern->load_subregex_list(saved_subregex);
4184
1629
        return true;
4185
1630
      }
4186
1631
      else
4187
1632
      {
4188
1633
        (*piece_it).nr_matches--;
4189
1634
        atom->regex_intern->load_subregex_list(saved_subregex);
4190
1635
        return false;
4191
1636
      }
4192
1637
    }
4193
1638
    else
4194
1639
    {
4195
1640
      atom->regex_intern->load_subregex_list(saved_subregex);
4196
1641
      return false;
4197
1642
    }
4198
852
  }
1643
  }
4199
853
1644
4202
854
  int i=min;
1645
  int i=0;
4203
855
  std::list<CRegexAscii_piece*>::iterator next_it = piece_it;
1646
  int shortest_len = -1;
4204
1647
  int otherpieces_shortest = -1;
4205
1648
  int i_shortest = -1;
4206
1649
  std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
4207
1650
  std::vector<std::pair<int,int> >    match_lens;
4208
856
  next_it++;
1651
  next_it++;
4209
857
  int pieceslen = 0;
1652
  int pieceslen = 0;
4210
858
  while(1)
1653
  while(1)
4211
859
  {
1654
  {
4218
860
    if((max > 0) && (i>max))
1655
    int piecelen = 0;
4219
861
      break;
1656
    bool retmatch;
4220
862
   int piecelen = 0;
1657
    retmatch = match_piece_times(source, &piecelen, i < min ? min : i, &match_lens);
4221
863
   if((*piece_it)->match_piece_times(source+pieceslen, &piecelen, !pieceslen ? i : 1, NULL))
1658
    i = match_lens.size()-1;//number of matches
4222
864
   {
1659
    if(i<0)
4223
865
      pieceslen += piecelen;
1660
      i = 0;
4224
1661
    if((i>=min))
4225
1662
    {
4226
1663
      pieceslen = piecelen;
4227
1664
      if((shortest_len >= 0) && (shortest_len <= pieceslen))//this branch is longer
4228
1665
      {//try another branch
4229
1666
        i = choose_another_branch(match_lens);
4230
1667
        if(i >= 0)
4231
1668
          continue;//try another branch
4232
1669
        else
4233
1670
          break;
4234
1671
      }
4235
866
      int   otherpieces = 0;
1672
      int   otherpieces = 0;
4243
867
      if((next_it == piece_list.end()) ||
1673
      if((next_it == end_it) ||
4244
868
        ((*next_it)->get_is_reluctant() && match_piece_iter_reluctant(next_it, source+pieceslen, &otherpieces)) ||
1674
        (*next_it).piece->match_piece(next_it, end_it, source+pieceslen, &otherpieces)
4245
869
        (!(*next_it)->get_is_reluctant() && match_piece_iter_normal(next_it, source+pieceslen, &otherpieces)))
1675
        )
4246
870
      {
1676
      {
4247
871
        *matched_len = pieceslen + otherpieces;
1677
        if((i == pieceslen) || (match_lens.at(0).second == 0) ||//minimum achieved already, cannot go lower than that
4248
872
        return true;
1678
            !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4249
873
      }
1679
        {
4250
1680
          *matched_len = pieceslen + otherpieces;
4251
1681
          return true;
4252
1682
        }
4253
1683
        if((shortest_len < 0) || (shortest_len > pieceslen))
4254
1684
        {
4255
1685
          shortest_len = pieceslen;
4256
1686
          otherpieces_shortest = otherpieces;
4257
1687
          i_shortest = i;
4258
1688
          if(match_lens.at(0).second != 0)
4259
1689
            atom->regex_intern->save_subregex_list(saved_subregex);
4260
1690
        }
4261
1691
        i = choose_another_branch(match_lens);
4262
1692
        if(i >= 0)
4263
1693
          continue;//try another branch
4264
1694
        else
4265
1695
          break;
4266
1696
      }
4267
1697
      else
4268
1698
      {
4269
1699
        //try further
4270
1700
        if(retmatch)
4271
1701
        {
4272
1702
          i++;
4273
1703
          if((max < 0) || (i<=max))
4274
1704
            continue;
4275
1705
          i--;
4276
1706
        }
4277
1707
      }
4278
1708
    }
4279
1709
    
4280
1710
    if(i==0)
4281
1711
    {
4282
1712
      break;
4283
874
    }
1713
    }
4284
875
    else
1714
    else
4287
876
      break;
1715
    {
4288
877
    i++;
1716
      i = choose_another_branch(match_lens);
4289
1717
      if(i >= 0)
4290
1718
        continue;//try another branch
4291
1719
      else
4292
1720
        break;
4293
1721
    }
4294
878
  }
1722
  }
4295
879
1723
4296
1724
  if(shortest_len >= 0)
4297
1725
  {
4298
1726
    if(strict_max && (max>=0) && (i_shortest > max))
4299
1727
      return false;
4300
1728
    *matched_len = shortest_len + otherpieces_shortest;
4301
1729
    if(saved_subregex.size())
4302
1730
      atom->regex_intern->load_subregex_list(saved_subregex);
4303
1731
    return true;
4304
1732
  }
4305
880
  return false;
1733
  return false;
4306
881
}
1734
}
4307
882
1735
4308
883
//match as much as possible
1736
//match as much as possible
4311
884
bool CRegexAscii_branch::match_piece_iter_normal(
1737
bool CRegexXQuery_piece::match_piece_iter_normal(
4312
885
                                        std::list<CRegexAscii_piece*>::iterator piece_it,
1738
                                        std::list<RegexAscii_pieceinfo>::iterator piece_it,
4313
1739
                                        std::list<RegexAscii_pieceinfo>::iterator end_it,
4314
886
                                        const char *source, int *matched_len)
1740
                                        const char *source, int *matched_len)
4315
887
{
1741
{
4316
888
  *matched_len = 0;
1742
  *matched_len = 0;
4317
889
1743
4318
890
  int min, max;
1744
  int min, max;
4319
891
  bool  strict_max;
1745
  bool  strict_max;
4324
892
  std::vector<int>    match_lens;
1746
  std::vector<std::pair<int,int> >    match_lens;
4325
893
  (*piece_it)->get_quantifier(&min, &max, &strict_max);
1747
  (*piece_it).piece->get_quantifier(&min, &max, &strict_max);
4326
894
  int   timeslen;
1748
  int   timeslen = 0;
4327
895
  if(strict_max && (max >= 0))
1749
  std::vector<std::pair<const char*, int> >  saved_subregex;
4328
1750
4329
1751
  if(is_regex_atom())
4330
896
  {
1752
  {
4335
897
    //check if the piece doesn't exceed the max match
1753
    //recursive
4336
898
    //if((*piece_it)->match_piece_times(source, &timeslen, max+1, &match_lens))
1754
    bool retmatch;
4337
899
    //  return false;///too many matches
1755
    atom->regex_intern->save_subregex_list(saved_subregex);
4338
900
    (*piece_it)->match_piece_times(source, &timeslen, max, &match_lens);
1756
    if(((max == -1) || ((*piece_it).nr_matches < max)) && //try further with this piece
4339
1757
      (((*piece_it).nr_matches < min) || ((*piece_it).nr_matches == 0) || ((*piece_it).piece->regex_atom->matched_len)))//if matched_len is zero, avoid infinite loop
4340
1758
    {
4341
1759
      int start_from_branch = 0;
4342
1760
      int longest_len = -1;
4343
1761
      bool branch_saved = false;
4344
1762
      //try all branches to get the longest len
4345
1763
      (*piece_it).nr_matches++;
4346
1764
      while(atom->match(source, &start_from_branch, matched_len, piece_it, end_it))
4347
1765
      {
4348
1766
        if((longest_len < *matched_len))
4349
1767
        {
4350
1768
          longest_len = *matched_len;
4351
1769
          if(start_from_branch && (atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4352
1770
          {
4353
1771
            atom->regex_intern->save_subregex_list(saved_subregex);
4354
1772
            branch_saved = true;
4355
1773
          }
4356
1774
        }
4357
1775
        if(!start_from_branch || !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4358
1776
          break;
4359
1777
      }
4360
1778
      if(longest_len != -1)
4361
1779
      {
4362
1780
        *matched_len = longest_len;
4363
1781
        if(branch_saved)
4364
1782
          atom->regex_intern->load_subregex_list(saved_subregex);
4365
1783
        return true;
4366
1784
      }
4367
1785
      else
4368
1786
      {
4369
1787
        atom->regex_intern->load_subregex_list(saved_subregex);
4370
1788
        (*piece_it).nr_matches--;
4371
1789
      }
4372
1790
    }
4373
1791
    if((*piece_it).nr_matches >= min)
4374
1792
    {
4375
1793
      //go to next piece
4376
1794
      std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
4377
1795
      next_it++;
4378
1796
      if(next_it == end_it)
4379
1797
        return true;
4380
1798
      retmatch = (*next_it).piece->match_piece(next_it, end_it, source, matched_len);
4381
1799
      if(!retmatch)
4382
1800
        atom->regex_intern->load_subregex_list(saved_subregex);
4383
1801
      return retmatch;
4384
1802
    }
4385
1803
    else
4386
1804
    {
4387
1805
    //  regex_atom->restore_match();
4388
1806
      atom->regex_intern->load_subregex_list(saved_subregex);
4389
1807
      return false;
4390
1808
    }
4391
901
  }
1809
  }
4392
902
  else if(!strict_max && (max >= 0))
4393
903
    (*piece_it)->match_piece_times(source, &timeslen, max, &match_lens);
4394
904
  else
4395
905
    (*piece_it)->match_piece_times(source, &timeslen, -1, &match_lens);
4396
906
1810
4399
907
  int i;
1811
  int longest_len = -1;
4400
908
  std::list<CRegexAscii_piece*>::iterator next_it = piece_it;
1812
  int otherpieces_longest = -1;
4401
1813
  int i_longest = -1;
4402
1814
  int i = max;
4403
1815
  std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
4404
909
  next_it++;
1816
  next_it++;
4406
910
  if(next_it == piece_list.end())
1817
4407
1818
  bool retmatch;
4408
1819
  while(1)
4409
911
  {
1820
  {
4414
912
    if((int)match_lens.size() > min)
1821
    retmatch = match_piece_times(source, &timeslen, i, &match_lens);
4415
913
    {
1822
    i=match_lens.size()-1;//number of matches
4416
914
      *matched_len = timeslen;
1823
    if((i>=min))
4417
915
      return true;
1824
    {
4418
1825
      if(timeslen < longest_len)
4419
1826
      {//this branch is no use
4420
1827
        i = choose_another_branch(match_lens);
4421
1828
        if(i >= 0)
4422
1829
        {
4423
1830
          i = max;
4424
1831
          continue;//try another branch
4425
1832
        }
4426
1833
        else
4427
1834
          break;
4428
1835
      }
4429
1836
      //int piecelen = 0;
4430
1837
      int   otherpieces = 0;
4431
1838
      if((next_it == end_it) ||
4432
1839
        (*next_it).piece->match_piece(next_it, end_it, source+timeslen, &otherpieces)
4433
1840
        )
4434
1841
      {
4435
1842
        if(timeslen > longest_len)
4436
1843
        {
4437
1844
          longest_len = timeslen;
4438
1845
          otherpieces_longest = otherpieces;
4439
1846
          i_longest = i;
4440
1847
          if(!(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4441
1848
          {
4442
1849
            *matched_len = longest_len + otherpieces_longest;
4443
1850
            return true;
4444
1851
          }
4445
1852
          else
4446
1853
          {
4447
1854
            if(match_lens.at(0).second)
4448
1855
              atom->regex_intern->save_subregex_list(saved_subregex);
4449
1856
          }
4450
1857
        }
4451
1858
      }
4452
1859
      else
4453
1860
      {
4454
1861
        if(!match_lens.at(0).second)
4455
1862
        {
4456
1863
          match_lens.resize(match_lens.size()-1);
4457
1864
          i--;
4458
1865
          if(i >= 0)
4459
1866
            continue;//try smaller 
4460
1867
          else
4461
1868
            break;
4462
1869
        }
4463
1870
        else
4464
1871
        {
4465
1872
          i = choose_another_branch(match_lens);
4466
1873
          if(i >= 0)
4467
1874
            continue;//try another branch
4468
1875
          else
4469
1876
            break;
4470
1877
        }
4471
1878
      }
4472
1879
    }
4473
1880
    //now try another branch
4474
1881
    i = choose_another_branch(match_lens);
4475
1882
    if(i >= 0)
4476
1883
    {
4477
1884
      i = max;
4478
1885
      continue;//try another branch
4479
916
    }
1886
    }
4480
917
    else
1887
    else
4484
918
      return false;
1888
      break;
4485
919
  }
1889
  }//end while
4486
920
  for(i=match_lens.size()-1; i>=min; i--)
1890
4487
1891
  if(longest_len >= 0)
4488
921
  {
1892
  {
4497
922
    int piecelen = 0;
1893
    *matched_len = longest_len + otherpieces_longest;
4498
923
    int   otherpieces = 0;
1894
    if(saved_subregex.size())
4499
924
    if(((*next_it)->get_is_reluctant() && match_piece_iter_reluctant(next_it, source+match_lens[i]+piecelen, &otherpieces)) ||
1895
      atom->regex_intern->load_subregex_list(saved_subregex);
4500
925
      (!(*next_it)->get_is_reluctant() && match_piece_iter_normal(next_it, source+match_lens[i]+piecelen, &otherpieces)))
1896
    return true;
4493
926
    {
4494
927
      *matched_len = match_lens[i] + piecelen + otherpieces;
4495
928
      return true;
4496
929
    }
4501
930
  }
1897
  }
4502
931
1898
4503
932
  return false;
1899
  return false;
4504
933
}
1900
}
4505
934
1901
4507
935
bool CRegexAscii_piece::match_piece_times(const char *source, 
1902
bool CRegexXQuery_piece::match_piece_times(const char *source, 
4508
936
                                          int *piecelen, 
1903
                                          int *piecelen, 
4509
937
                                          int times,
1904
                                          int times,
4511
938
                                          std::vector<int>    *match_lens)
1905
                                          std::vector<std::pair<int,int> >    *match_lens)
4512
939
{
1906
{
4516
940
  *piecelen = 0;
1907
  int i=0;
4517
941
  for(int i=0;(times < 0) || (i<times);i++)
1908
  if(match_lens && match_lens->size())
4518
942
  {
1909
  {
4519
1910
    i = match_lens->size()-1;
4520
1911
  }
4521
1912
  if(match_lens && match_lens->size())
4522
1913
    *piecelen = match_lens->at(match_lens->size()-1).first;
4523
1914
  else
4524
1915
    *piecelen = 0;
4525
1916
  if((times >= 0) && (i>=times))
4526
1917
    return true;
4527
1918
  for(;(times < 0) || (i<times);i++)
4528
1919
  {
4529
1920
    int   atomlen;
4530
1921
    int   start_from_branch = 0;
4531
1922
    if(match_lens && (i<(int)match_lens->size()))
4532
1923
      start_from_branch = match_lens->at(i).second;
4533
1924
    bool first_branch = (start_from_branch == 0);
4534
1925
    if(!atom->match(source+*piecelen, &start_from_branch, &atomlen, empty_pieces.begin(), empty_pieces.end()))
4535
1926
    {
4536
1927
      if(match_lens)
4537
1928
      {
4538
1929
        if(i >= (int)match_lens->size())
4539
1930
          match_lens->push_back(std::pair<int,int>(*piecelen, 0));
4540
1931
        else
4541
1932
          (*match_lens)[i] = std::pair<int,int>(*piecelen, 0);
4542
1933
      }
4543
1934
      return false;
4544
1935
    }
4545
943
    if(match_lens)
1936
    if(match_lens)
4550
944
      match_lens->push_back(*piecelen);
1937
    {
4551
945
    int   atomlen;
1938
      if(i >= (int)match_lens->size())
4552
946
    if(!atom->match(source+*piecelen, &atomlen))
1939
        match_lens->push_back(std::pair<int,int>(*piecelen, start_from_branch));
4553
947
      return false;
1940
      else
4554
1941
        (*match_lens)[i] = std::pair<int,int>(*piecelen, start_from_branch);
4555
1942
    }
4556
948
    *piecelen += atomlen;
1943
    *piecelen += atomlen;
4557
949
    if(!atomlen && !source[*piecelen])
1944
    if(!atomlen && !source[*piecelen])
4558
950
    {
1945
    {
4560
951
      atom->regex_intern->reachedEnd = true;
1946
    //  atom->regex_intern->set_reachedEnd(source);
4561
1947
      break;
4562
1948
    }
4563
1949
    if(first_branch && (atomlen == 0))//avoid infinite loop
4564
1950
    {
4565
952
      break;
1951
      break;
4566
953
    }
1952
    }
4567
954
  }
1953
  }
4568
955
  if(match_lens)
1954
  if(match_lens)
4570
956
    match_lens->push_back(*piecelen);
1955
  {
4571
1956
  //  if(i >= match_lens->size())
4572
1957
      match_lens->push_back(std::pair<int,int>(*piecelen, 0));
4573
1958
  //  else
4574
1959
  //    (*match_lens)[i] = std::pair<int,int>(*piecelen, 0);
4575
1960
  }
4576
957
1961
4577
958
  return true;
1962
  return true;
4578
959
}
1963
}
4579
960
1964
4580
1965
bool CRegexXQuery_multicharP::match_internal(const char *source, int *start_from_branch, int *matched_len)
4581
1966
{
4582
1967
  if(!source[0])
4583
1968
  {
4584
1969
    regex_intern->set_reachedEnd(source);
4585
1970
    return false;
4586
1971
  }
4587
1972
  bool found = false;
4588
1973
  const char *temp_source = source;
4589
1974
  unicode::code_point utf8c = utf8::next_char(temp_source);
4590
1975
  switch(multichar_type)
4591
1976
  {
4592
1977
  case unicode::UNICODE_Ll + 50:
4593
1978
    if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Ll) ||
4594
1979
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lm) ||
4595
1980
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lo) ||
4596
1981
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lt) ||
4597
1982
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lu))
4598
1983
    {
4599
1984
      if(!is_reverse)
4600
1985
        found = true;
4601
1986
    }
4602
1987
    else
4603
1988
    {
4604
1989
      if(is_reverse)
4605
1990
        found = true;
4606
1991
    }
4607
1992
    break;
4608
1993
  case unicode::UNICODE_Mc + 50:
4609
1994
    if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Mn) ||
4610
1995
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Mc) ||
4611
1996
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Me))
4612
1997
    {
4613
1998
      if(!is_reverse)
4614
1999
        found = true;
4615
2000
    }
4616
2001
    else
4617
2002
    {
4618
2003
      if(is_reverse)
4619
2004
        found = true;
4620
2005
    }
4621
2006
    break;
4622
2007
  case unicode::UNICODE_Nd + 50:
4623
2008
    if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Nd) ||
4624
2009
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Nl) ||
4625
2010
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_No))
4626
2011
    {
4627
2012
      if(!is_reverse)
4628
2013
        found = true;
4629
2014
    }
4630
2015
    else
4631
2016
    {
4632
2017
      if(is_reverse)
4633
2018
        found = true;
4634
2019
    }
4635
2020
    break;
4636
2021
  case unicode::UNICODE_Pc + 50:
4637
2022
    if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pc) ||
4638
2023
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pd) ||
4639
2024
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Ps) ||
4640
2025
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pe) ||
4641
2026
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pi) ||
4642
2027
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pf) ||
4643
2028
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Po))
4644
2029
    {
4645
2030
      if(!is_reverse)
4646
2031
        found = true;
4647
2032
    }
4648
2033
    else
4649
2034
    {
4650
2035
      if(is_reverse)
4651
2036
        found = true;
4652
2037
    }
4653
2038
    break;
4654
2039
  case unicode::UNICODE_Zl + 50:
4655
2040
    if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zs) ||
4656
2041
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zl) ||
4657
2042
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zp))
4658
2043
    {
4659
2044
      if(!is_reverse)
4660
2045
        found = true;
4661
2046
    }
4662
2047
    else
4663
2048
    {
4664
2049
      if(is_reverse)
4665
2050
        found = true;
4666
2051
    }
4667
2052
    break;
4668
2053
  case unicode::UNICODE_Sc + 50:
4669
2054
    if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Sm) ||
4670
2055
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Sc) ||
4671
2056
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Sk) ||
4672
2057
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_So))
4673
2058
    {
4674
2059
      if(!is_reverse)
4675
2060
        found = true;
4676
2061
    }
4677
2062
    else
4678
2063
    {
4679
2064
      if(is_reverse)
4680
2065
        found = true;
4681
2066
    }
4682
2067
    break;
4683
2068
  case unicode::UNICODE_Cc + 50:
4684
2069
    if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cc) ||
4685
2070
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cf) ||
4686
2071
        unicode::check_codepoint_category(utf8c, unicode::UNICODE_Co))//ignore unicode::UNICODE_Cn
4687
2072
    {
4688
2073
      if(!is_reverse)
4689
2074
        found = true;
4690
2075
    }
4691
2076
    else
4692
2077
    {
4693
2078
      if(is_reverse)
4694
2079
        found = true;
4695
2080
    }
4696
2081
    break;
4697
2082
  default:
4698
2083
    if(unicode::check_codepoint_category(utf8c, (unicode::category)multichar_type))
4699
2084
    {
4700
2085
      if(!is_reverse)
4701
2086
        found = true;
4702
2087
    }
4703
2088
    else
4704
2089
    {
4705
2090
      if(is_reverse)
4706
2091
        found = true;
4707
2092
    }
4708
2093
    break;
4709
2094
  }
4710
2095
4711
2096
  if(found)
4712
2097
  {
4713
2098
    *matched_len = temp_source - source;
4714
2099
  }
4715
2100
  return found;
4716
2101
}
4717
2102
4718
2103
bool CRegexXQuery_multicharIs::match_internal(const char *source, int *start_from_branch, int *matched_len)
4719
2104
{
4720
2105
  if(!source[0])
4721
2106
  {
4722
2107
    regex_intern->set_reachedEnd(source);
4723
2108
    return false;
4724
2109
  }
4725
2110
  bool found = false;
4726
2111
  const char *temp_source = source;
4727
2112
  unicode::code_point utf8c = utf8::next_char(temp_source);
4728
2113
  const unicode::code_point *cp = block_escape[block_index].cp;
4729
2114
  if((utf8c >= cp[0]) && (utf8c <= cp[1]))
4730
2115
  {
4731
2116
    if(!is_reverse)
4732
2117
      found = true;
4733
2118
  }
4734
2119
  else if(block_escape[block_index].ext_cp)
4735
2120
  {
4736
2121
    cp = block_escape[block_index].ext_cp;
4737
2122
    while(*cp)
4738
2123
    {
4739
2124
      if((utf8c >= cp[0]) && (utf8c <= cp[1]))
4740
2125
        break;
4741
2126
      cp += 2;
4742
2127
    }
4743
2128
    if(*cp)
4744
2129
    {
4745
2130
      if(!is_reverse)
4746
2131
        found = true;
4747
2132
    }
4748
2133
    else
4749
2134
    {
4750
2135
      if(is_reverse)
4751
2136
        found = true;
4752
2137
    }
4753
2138
  }
4754
2139
  else
4755
2140
  {
4756
2141
    if(is_reverse)
4757
2142
      found = true;
4758
2143
  }
4759
2144
  if(found)
4760
2145
  {
4761
2146
    *matched_len = temp_source - source;
4762
2147
  }
4763
2148
  return found;
4764
2149
}
4765
2150
4766
2151
bool CRegexXQuery_multicharOther::match_internal(const char *source, int *start_from_branch, int *matched_len)
4767
2152
{
4768
2153
  if(!source[0])
4769
2154
  {
4770
2155
    regex_intern->set_reachedEnd(source);
4771
2156
    return false;
4772
2157
  }
4773
2158
  bool found = false;
4774
2159
  bool value_true = true;
4775
2160
  const char *temp_source = source;
4776
2161
  unicode::code_point utf8c = utf8::next_char(temp_source);
4777
2162
  switch(multichar_type)
4778
2163
  {
4779
2164
    case 'S':value_true = false;//[^\s]
4780
2165
    case 's'://[#x20\t\n\r]
4781
2166
      switch(utf8c)
4782
2167
      {
4783
2168
      case '\t':
4784
2169
      case '\r':
4785
2170
      case '\n':
4786
2171
      case ' ':
4787
2172
        found = true;
4788
2173
      default:
4789
2174
        break;
4790
2175
      }
4791
2176
      break;
4792
2177
    case 'I':value_true = false;//[^\i]
4793
2178
    case 'i'://the set of initial name characters, those matched by Letter | '_' | ':'
4794
2179
      if((utf8c == '_') ||
4795
2180
        (utf8c == ':') ||
4796
2181
        XQCharType::isLetter(utf8c))
4797
2182
      {
4798
2183
        found = true;
4799
2184
      }
4800
2185
      break;
4801
2186
    case 'C':value_true = false;//[^\c]
4802
2187
    case 'c'://the set of name characters, those matched by NameChar
4803
2188
      if(XQCharType::isNameChar(utf8c))
4804
2189
      {
4805
2190
        found = true;
4806
2191
      }
4807
2192
      break;
4808
2193
    case 'D':value_true = false;//[^\d]
4809
2194
    case 'd':
4810
2195
      if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Nd))
4811
2196
        found = true;
4812
2197
      break;
4813
2198
    case 'W':value_true = false;//[^\w]
4814
2199
    case 'w':
4815
2200
      found = !(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pc) ||
4816
2201
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pd) ||
4817
2202
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Ps) ||
4818
2203
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pe) ||
4819
2204
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pi) ||
4820
2205
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pf) ||
4821
2206
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Po) ||
4822
2207
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zs) ||
4823
2208
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zl) ||
4824
2209
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zp) ||
4825
2210
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cc) ||
4826
2211
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cf) ||
4827
2212
                unicode::check_codepoint_category(utf8c, unicode::UNICODE_Co));//ignore unicode::UNICODE_Cn
4828
2213
      break;
4829
2214
    default:
4830
2215
      throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(source, ZED(REGEX_UNIMPLEMENTED)) );
4831
2216
  }
4832
2217
  if((found && value_true) || (!found && !value_true))
4833
2218
  {
4834
2219
    *matched_len = temp_source - source;
4835
2220
    return true;
4836
2221
  }
4837
2222
  else
4838
2223
  {
4839
2224
    return false;
4840
2225
  }
4841
2226
}
4842
2227
4843
2228
bool CRegexXQuery_char_ascii::match_internal(const char *source, int *start_from_branch, int *matched_len)
4844
2229
{
4845
2230
  if(!source[0])
4846
2231
  {
4847
2232
    regex_intern->set_reachedEnd(source);
4848
2233
    return false;
4849
2234
  }
4850
2235
  if(source[0] == c)
4851
2236
  {
4852
2237
    *matched_len = 1;
4853
2238
    return true;
4854
2239
  }
4855
2240
  else
4856
2241
    return false;
4857
2242
}
4858
2243
4859
2244
bool CRegexXQuery_char_ascii_i::match_internal(const char *source, int *start_from_branch, int *matched_len)
4860
2245
{
4861
2246
  if(!source[0])
4862
2247
  {
4863
2248
    regex_intern->set_reachedEnd(source);
4864
2249
    return false;
4865
2250
  }
4866
2251
  char  sup = toupper(source[0]);
4867
2252
  if(sup == c)
4868
2253
  {
4869
2254
    *matched_len = 1;
4870
2255
    return true;
4871
2256
  }
4872
2257
  else
4873
2258
    return false;
4874
2259
}
4875
2260
4876
2261
bool CRegexXQuery_char_range_ascii::match_internal(const char *source, int *start_from_branch, int *matched_len)
4877
2262
{
4878
2263
  if(!source[0])
4879
2264
  {
4880
2265
    regex_intern->set_reachedEnd(source);
4881
2266
    return false;
4882
2267
  }
4883
2268
  if((source[0] >= c1) && (source[0] <= c2))
4884
2269
  {
4885
2270
    *matched_len = 1;
4886
2271
    return true;
4887
2272
  }
4888
2273
  else
4889
2274
    return false;
4890
2275
}
4891
2276
4892
2277
bool CRegexXQuery_char_range_ascii_i::match_internal(const char *source, int *start_from_branch, int *matched_len)
4893
2278
{
4894
2279
  if(!source[0])
4895
2280
  {
4896
2281
    regex_intern->set_reachedEnd(source);
4897
2282
    return false;
4898
2283
  }
4899
2284
  char  sup = toupper(source[0]);
4900
2285
  if((sup >= c1) && (sup <= c2))
4901
2286
  {
4902
2287
    *matched_len = 1;
4903
2288
    return true;
4904
2289
  }
4905
2290
  else
4906
2291
    return false;
4907
2292
}
4908
2293
4909
2294
bool CRegexXQuery_char_unicode::match_internal(const char *source, int *start_from_branch, int *matched_len)
4910
2295
{
4911
2296
  if(!source[0])
4912
2297
  {
4913
2298
    regex_intern->set_reachedEnd(source);
4914
2299
    return false;
4915
2300
  }
4916
2301
  if(!memcmp(source, c, len))
4917
2302
  {
4918
2303
    *matched_len = len;
4919
2304
    return true;
4920
2305
  }
4921
2306
  else
4922
2307
    return false;
4923
2308
}
4924
2309
4925
2310
bool CRegexXQuery_char_unicode_cp::match_internal(const char *source, int *start_from_branch, int *matched_len)
4926
2311
{
4927
2312
  if(!source[0])
4928
2313
  {
4929
2314
    regex_intern->set_reachedEnd(source);
4930
2315
    return false;
4931
2316
  }
4932
2317
  const char *temp_source = source;
4933
2318
  unicode::code_point  utf8c = utf8::next_char(temp_source);
4934
2319
  if(utf8c == c)
4935
2320
  {
4936
2321
    *matched_len = temp_source - source;
4937
2322
    return true;
4938
2323
  }
4939
2324
  else
4940
2325
    return false;
4941
2326
}
4942
2327
4943
2328
bool CRegexXQuery_char_unicode_i::match_internal(const char *source, int *start_from_branch, int *matched_len)
4944
2329
{
4945
2330
  if(!source[0])
4946
2331
  {
4947
2332
    regex_intern->set_reachedEnd(source);
4948
2333
    return false;
4949
2334
  }
4950
2335
  const char *temp_source = source;
4951
2336
  unicode::code_point  sup = unicode::to_upper(utf8::next_char(temp_source));
4952
2337
  if(sup == c)
4953
2338
  {
4954
2339
    *matched_len = temp_source - source;
4955
2340
    return true;
4956
2341
  }
4957
2342
  else
4958
2343
    return false;
4959
2344
}
4960
2345
4961
2346
bool CRegexXQuery_char_range_unicode::match_internal(const char *source, int *start_from_branch, int *matched_len)
4962
2347
{
4963
2348
  if(!source[0])
4964
2349
  {
4965
2350
    regex_intern->set_reachedEnd(source);
4966
2351
    return false;
4967
2352
  }
4968
2353
  const char *temp_source = source;
4969
2354
  unicode::code_point  utf8c = utf8::next_char(temp_source);
4970
2355
  if((utf8c >= c1) && (utf8c <= c2))
4971
2356
  {
4972
2357
    *matched_len = temp_source - source;
4973
2358
    return true;
4974
2359
  }
4975
2360
  else
4976
2361
    return false;
4977
2362
}
4978
2363
4979
2364
bool CRegexXQuery_char_range_unicode_i::match_internal(const char *source, int *start_from_branch, int *matched_len)
4980
2365
{
4981
2366
  if(!source[0])
4982
2367
  {
4983
2368
    regex_intern->set_reachedEnd(source);
4984
2369
    return false;
4985
2370
  }
4986
2371
  const char *temp_source = source;
4987
2372
  unicode::code_point  sup = unicode::to_upper(utf8::next_char(temp_source));
4988
2373
  if((sup >= c1) && (sup <= c2))
4989
2374
  {
4990
2375
    *matched_len = temp_source - source;
4991
2376
    return true;
4992
2377
  }
4993
2378
  else
4994
2379
    return false;
4995
2380
}
4996
2381
4997
2382
bool CRegexXQuery_endline::match_internal(const char *source, int *start_from_branch, int *matched_len)
4998
2383
{
4999
2384
  *matched_len = 0;
5000
2385
  if(!source[0])
Status:	Superseded
Proposed branch:	lp:~zorba-coders/zorba/no_unicode
Merge into:	lp:zorba
Diff against target:	9029 lines (+3908/-1422) 270 files modified CMakeConfiguration.txt (+5/-5) CMakeLists.txt (+6/-2) ChangeLog (+7/-0) KNOWN_ISSUES.txt (+1/-1) doc/cxx/examples/context.cpp (+4/-0) include/zorba/config.h.cmake (+3/-1) include/zorba/static_context.h (+4/-0) include/zorba/util/time.h (+1/-1) src/CMakeLists.txt (+4/-0) src/api/serialization/serializer.cpp (+36/-33) src/api/serialization/serializer.h (+2/-4) src/diagnostics/diagnostic_en.xml (+116/-27) src/diagnostics/pregenerated/dict_en.cpp (+98/-20) src/precompiled/stdafx.h (+74/-356) src/runtime/full_text/CMakeLists.txt (+3/-3) src/runtime/full_text/default_tokenizer.cpp (+4/-4) src/runtime/full_text/latin_tokenizer.cpp (+3/-2) src/runtime/full_text/latin_tokenizer.h (+9/-8) src/runtime/numerics/format_integer_impl.cpp (+1/-1) src/runtime/numerics/numerics_impl.cpp (+1/-1) src/runtime/strings/strings_impl.cpp (+58/-20) src/store/api/store.h (+1/-1) src/store/naive/simple_store.h (+7/-3) src/store/naive/store.cpp (+1/-1) src/store/naive/store.h (+12/-11) src/system/globalenv.cpp (+7/-7) src/unit_tests/CMakeLists.txt (+2/-2) src/unit_tests/string.cpp (+8/-0) src/unit_tests/unit_test_list.h (+2/-2) src/unit_tests/unit_tests.cpp (+2/-2) src/util/CMakeLists.txt (+4/-4) src/util/icu_streambuf.h (+1/-0) src/util/passthru_streambuf.cpp (+2/-2) src/util/passthru_streambuf.h (+10/-2) src/util/regex.cpp (+96/-82) src/util/regex.h (+22/-34) src/util/regex_xquery.cpp (+1860/-489) src/util/regex_xquery.h (+359/-123) src/util/transcode_streambuf.h (+5/-5) src/util/unicode_categories.cpp (+3/-3) src/util/unicode_categories.h (+44/-37) src/util/unicode_util.cpp (+20/-2) src/util/unicode_util.h (+47/-15) src/util/utf8_util.cpp (+6/-6) src/util/utf8_util.h (+29/-13) src/util/utf8_util.tcc (+10/-2) src/zorbatypes/collation_manager.cpp (+17/-17) src/zorbatypes/collation_manager.h (+3/-3) src/zorbatypes/libicu.h (+0/-32) src/zorbatypes/transcoder.cpp (+8/-4) src/zorbatypes/transcoder.h (+9/-9) src/zorbautils/hashmap_itemh.h (+4/-0) src/zorbautils/string_util.cpp (+19/-18) src/zorbautils/string_util.h (+15/-1) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a10.xml.res (+242/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a11.xml.res (+6/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a3.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a5.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a6.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a7.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a8.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a9.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m10.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m11.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m12.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m13.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m14.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m15.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m16.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m17.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m18.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m19.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m20.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m21.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m22.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m23.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m24.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m25.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m26.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m27.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m28.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m29.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m3.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m30.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m31.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m32.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m33.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m34.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m35.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m36.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m37.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m38.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m39.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m4.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m40.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m41.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m42.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m43.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m44.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m45.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m46.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m47.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m48.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m49.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m5.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m50.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m51.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m52.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m53.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m6.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m7.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m8.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m9.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_prime1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r10.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r11.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r12.xml.res (+5/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r3.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r4.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r5.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r6.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r9.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t4.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t5.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/testdriver/bom_bug.xml.res (+1/-0) test/rbkt/Queries/CMakeLists.txt (+16/-1) test/rbkt/Queries/zorba/string/Regex/regex_a1.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a10.xq (+11/-0) test/rbkt/Queries/zorba/string/Regex/regex_a11.xq (+9/-0) test/rbkt/Queries/zorba/string/Regex/regex_a2.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a3.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a5.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a6.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a7.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a8.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a9.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err1.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err1.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err10.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err10.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err11.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err11.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err12.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err12.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err13.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err13.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err14.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err14.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err15.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err15.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err16.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err16.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err17.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err17.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err18.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err18.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err19.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err19.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err2.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err2.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err20.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err20.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err21.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err21.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err22.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err22.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err23.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err23.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err24.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err24.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err25.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err25.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err3.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err3.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err4.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err4.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err5.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err5.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err7.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err7.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err8.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err8.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err9.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err9.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m1.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m10.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m11.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m12.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m13.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m14.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m15.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m16.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m17.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m18.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_m19.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m2.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m20.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m21.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m22.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m23.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m24.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m25.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m26.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m27.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m28.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m29.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m3.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m30.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m31.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m32.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m33.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m34.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m35.xq (+4/-0) test/rbkt/Queries/zorba/string/Regex/regex_m36.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m37.xq (+4/-0) test/rbkt/Queries/zorba/string/Regex/regex_m38.xq (+4/-0) test/rbkt/Queries/zorba/string/Regex/regex_m39.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m4.xq (+6/-0) test/rbkt/Queries/zorba/string/Regex/regex_m40.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m41.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m42.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m43.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m44.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m45.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m46.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m47.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m48.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m49.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m5.xq (+6/-0) test/rbkt/Queries/zorba/string/Regex/regex_m50.xq (+2/-0) test/rbkt/Queries/zorba/string/Regex/regex_m51.xq (+2/-0) test/rbkt/Queries/zorba/string/Regex/regex_m52.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m53.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m6.xq (+6/-0) test/rbkt/Queries/zorba/string/Regex/regex_m7.xq (+6/-0) test/rbkt/Queries/zorba/string/Regex/regex_m8.xq (+7/-0) test/rbkt/Queries/zorba/string/Regex/regex_m9.xq (+7/-0) test/rbkt/Queries/zorba/string/Regex/regex_prime1.xq (+17/-0) test/rbkt/Queries/zorba/string/Regex/regex_r1.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r10.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r11.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r12.xq (+7/-0) test/rbkt/Queries/zorba/string/Regex/regex_r2.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r3.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r4.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r5.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r6.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r7_err.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r7_err.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r8_err.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r8_err.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r9.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_t1.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_t2.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_t3_err.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_t3_err.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_t4.xq (+2/-0) test/rbkt/Queries/zorba/string/Regex/regex_t5.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/zorba.html (+242/-0) test/rbkt/Queries/zorba/string/Regex/zorba2.html (+5/-0) test/rbkt/Queries/zorba/testdriver/bom_bug.xq (+1/-0) test/unit/static_context.cpp (+2/-0) test/update/CMakeLists.txt (+9/-0)
To merge this branch:	bzr merge lp:~zorba-coders/zorba/no_unicode
Related bugs:	Link a bug report
Reviewer	Date Requested	Status
Matthias Brantner	2012-04-06	Pending
Markos Zaharioudakis	2012-04-06	Pending
Review via email: mp+101052@code.launchpad.net