Merge lp:~zorba-coders/zorba/no_unicode into lp:zorba

Proposed by Paul J. Lucas
Status: Superseded
Proposed branch: lp:~zorba-coders/zorba/no_unicode
Merge into: lp:zorba
Diff against target: 9029 lines (+3908/-1422)
270 files modified
CMakeConfiguration.txt (+5/-5)
CMakeLists.txt (+6/-2)
ChangeLog (+7/-0)
KNOWN_ISSUES.txt (+1/-1)
doc/cxx/examples/context.cpp (+4/-0)
include/zorba/config.h.cmake (+3/-1)
include/zorba/static_context.h (+4/-0)
include/zorba/util/time.h (+1/-1)
src/CMakeLists.txt (+4/-0)
src/api/serialization/serializer.cpp (+36/-33)
src/api/serialization/serializer.h (+2/-4)
src/diagnostics/diagnostic_en.xml (+116/-27)
src/diagnostics/pregenerated/dict_en.cpp (+98/-20)
src/precompiled/stdafx.h (+74/-356)
src/runtime/full_text/CMakeLists.txt (+3/-3)
src/runtime/full_text/default_tokenizer.cpp (+4/-4)
src/runtime/full_text/latin_tokenizer.cpp (+3/-2)
src/runtime/full_text/latin_tokenizer.h (+9/-8)
src/runtime/numerics/format_integer_impl.cpp (+1/-1)
src/runtime/numerics/numerics_impl.cpp (+1/-1)
src/runtime/strings/strings_impl.cpp (+58/-20)
src/store/api/store.h (+1/-1)
src/store/naive/simple_store.h (+7/-3)
src/store/naive/store.cpp (+1/-1)
src/store/naive/store.h (+12/-11)
src/system/globalenv.cpp (+7/-7)
src/unit_tests/CMakeLists.txt (+2/-2)
src/unit_tests/string.cpp (+8/-0)
src/unit_tests/unit_test_list.h (+2/-2)
src/unit_tests/unit_tests.cpp (+2/-2)
src/util/CMakeLists.txt (+4/-4)
src/util/icu_streambuf.h (+1/-0)
src/util/passthru_streambuf.cpp (+2/-2)
src/util/passthru_streambuf.h (+10/-2)
src/util/regex.cpp (+96/-82)
src/util/regex.h (+22/-34)
src/util/regex_xquery.cpp (+1860/-489)
src/util/regex_xquery.h (+359/-123)
src/util/transcode_streambuf.h (+5/-5)
src/util/unicode_categories.cpp (+3/-3)
src/util/unicode_categories.h (+44/-37)
src/util/unicode_util.cpp (+20/-2)
src/util/unicode_util.h (+47/-15)
src/util/utf8_util.cpp (+6/-6)
src/util/utf8_util.h (+29/-13)
src/util/utf8_util.tcc (+10/-2)
src/zorbatypes/collation_manager.cpp (+17/-17)
src/zorbatypes/collation_manager.h (+3/-3)
src/zorbatypes/libicu.h (+0/-32)
src/zorbatypes/transcoder.cpp (+8/-4)
src/zorbatypes/transcoder.h (+9/-9)
src/zorbautils/hashmap_itemh.h (+4/-0)
src/zorbautils/string_util.cpp (+19/-18)
src/zorbautils/string_util.h (+15/-1)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a10.xml.res (+242/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a11.xml.res (+6/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a5.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a6.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a7.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a8.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a9.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m10.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m11.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m12.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m13.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m14.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m15.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m16.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m17.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m18.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m19.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m20.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m21.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m22.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m23.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m24.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m25.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m26.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m27.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m28.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m29.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m30.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m31.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m32.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m33.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m34.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m35.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m36.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m37.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m38.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m39.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m40.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m41.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m42.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m43.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m44.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m45.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m46.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m47.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m48.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m49.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m5.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m50.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m51.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m52.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m53.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m6.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m7.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m8.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m9.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_prime1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r10.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r11.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r12.xml.res (+5/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r5.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r6.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r9.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t5.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/testdriver/bom_bug.xml.res (+1/-0)
test/rbkt/Queries/CMakeLists.txt (+16/-1)
test/rbkt/Queries/zorba/string/Regex/regex_a1.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_a10.xq (+11/-0)
test/rbkt/Queries/zorba/string/Regex/regex_a11.xq (+9/-0)
test/rbkt/Queries/zorba/string/Regex/regex_a2.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_a3.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_a5.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_a6.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_a7.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_a8.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_a9.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err1.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err1.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err10.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err10.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err11.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err11.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err12.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err12.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err13.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err13.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err14.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err14.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err15.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err15.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err16.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err16.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err17.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err17.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err18.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err18.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err19.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err19.xq (+3/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err2.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err2.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err20.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err20.xq (+3/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err21.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err21.xq (+3/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err22.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err22.xq (+3/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err23.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err23.xq (+3/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err24.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err24.xq (+3/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err25.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err25.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err3.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err3.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err4.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err4.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err5.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err5.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err7.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err7.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err8.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err8.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err9.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_err9.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m1.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m10.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m11.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m12.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m13.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m14.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m15.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m16.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m17.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m18.xq (+3/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m19.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m2.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m20.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m21.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m22.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m23.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m24.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m25.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m26.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m27.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m28.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m29.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m3.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m30.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m31.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m32.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m33.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m34.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m35.xq (+4/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m36.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m37.xq (+4/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m38.xq (+4/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m39.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m4.xq (+6/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m40.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m41.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m42.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m43.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m44.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m45.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m46.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m47.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m48.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m49.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m5.xq (+6/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m50.xq (+2/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m51.xq (+2/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m52.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m53.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m6.xq (+6/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m7.xq (+6/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m8.xq (+7/-0)
test/rbkt/Queries/zorba/string/Regex/regex_m9.xq (+7/-0)
test/rbkt/Queries/zorba/string/Regex/regex_prime1.xq (+17/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r1.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r10.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r11.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r12.xq (+7/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r2.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r3.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r4.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r5.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r6.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r7_err.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r7_err.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r8_err.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r8_err.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_r9.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_t1.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_t2.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_t3_err.spec (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_t3_err.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/regex_t4.xq (+2/-0)
test/rbkt/Queries/zorba/string/Regex/regex_t5.xq (+1/-0)
test/rbkt/Queries/zorba/string/Regex/zorba.html (+242/-0)
test/rbkt/Queries/zorba/string/Regex/zorba2.html (+5/-0)
test/rbkt/Queries/zorba/testdriver/bom_bug.xq (+1/-0)
test/unit/static_context.cpp (+2/-0)
test/update/CMakeLists.txt (+9/-0)
To merge this branch: bzr merge lp:~zorba-coders/zorba/no_unicode
Reviewer Review Type Date Requested Status
Matthias Brantner Pending
Markos Zaharioudakis Pending
Review via email: mp+101052@code.launchpad.net

This proposal supersedes a proposal from 2012-01-18.

This proposal has been superseded by a proposal from 2012-04-07.

Commit message

"No Unicode" is now "No ICU."

Description of the change

"No Unicode" is now "No ICU."

To post a comment you must log in.
Revision history for this message
Matthias Brantner (matthias-brantner) wrote : Posted in a previous version of this proposal

Compiling with ZORBA_NO_ICU=ON fails on Linux:

[ 1%] Building CXX object src/CMakeFiles/zorba_simplestore.dir/api/zorba_string.cpp.o
In file included from /home/mbrantner/zorba/sandbox/src/util/regex.h:501:0,
                 from /home/mbrantner/zorba/sandbox/src/api/zorba_string.cpp:23:
/home/mbrantner/zorba/sandbox/src/util/regex_xquery.h:209:3: error: a class-key must be used when declaring a friend
/home/mbrantner/zorba/sandbox/src/util/regex_xquery.h:209:3: error: friend declaration does not name a class or function
/home/mbrantner/zorba/sandbox/src/util/regex_xquery.h:253:3: error: a class-key must be used when declaring a friend
/home/mbrantner/zorba/sandbox/src/util/regex_xquery.h:253:3: error: friend declaration does not name a class or function
make[2]: *** [src/CMakeFiles/zorba_simplestore.dir/api/zorba_string.cpp.o] Erro

Revision history for this message
Matthias Brantner (matthias-brantner) : Posted in a previous version of this proposal
review: Needs Fixing
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal

There are additional revisions which have not been approved in review. Please seek review and approval of these new revisions.

Revision history for this message
Matthias Brantner (matthias-brantner) wrote : Posted in a previous version of this proposal

The test suite doesn't run clean on my system (Linux) without ICU. This prevents us from adding the built to the remote queue. For example, the following tests fail without ICU (some of them also seem to fail with ICU):

 1294 - test/rbkt/zorba/string/Regex/regex_a10 (Failed)
 1548 - test/rbkt/zorba/fulltext/ft-wildcard-true-2 (Failed)
 1560 - test/rbkt/zorba/fulltext/ft-wildcard-true-4 (Failed)
 1574 - test/rbkt/zorba/fulltext/ft-same-sentence-true-4 (Failed)
 1581 - test/rbkt/zorba/fulltext/ft-wildcard-true-3 (Failed)
 1587 - test/rbkt/zorba/fulltext/ft-wildcard-true-9 (Failed)
 1600 - test/rbkt/zorba/fulltext/ft-diacritics-insensitive-true-1 (Failed)
 1605 - test/rbkt/zorba/fulltext/ft-wildcard-true-8 (Failed)
 1612 - test/rbkt/zorba/fulltext/ft-wildcard-true-10 (Failed)
 1635 - test/rbkt/zorba/fulltext/ft-wildcard-true-7 (Failed)
 1637 - test/rbkt/zorba/fulltext/ft-wildcard-true-11 (Failed)
 1643 - test/rbkt/zorba/fulltext/ft-wildcard-FTDY0020-3 (Failed)
 1789 - test/rbkt/zorba/index/numbers (Failed)
 2345 - test/unit/string_test (Failed)
 2534 - test/update/zorba/store/sc3 (Failed)
 2544 - doc/cxx/examples/context.cpp (Failed)

Please make sure the test suite runs clean.

review: Needs Fixing
Revision history for this message
Paul J. Lucas (paul-lucas) wrote : Posted in a previous version of this proposal

Try it now.

Revision history for this message
Daniel Turcanu (danielturcanu) wrote : Posted in a previous version of this proposal

Before commiting this branch, the branch lp:~danielturcanu/zorba/my_conv_module should be merged.

Revision history for this message
Chris Hillery (ceejatec) wrote : Posted in a previous version of this proposal

FWIW, I've skimmed the change for CMake-related changes, and they all look fine (mostly quite trivial).

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal

Attempt to merge into lp:zorba failed due to conflicts:

text conflict in ChangeLog

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal

The attempt to merge lp:~zorba-coders/zorba/no_unicode into lp:zorba failed. Below is the output from the failed tests.

CMake Error at /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake:274 (message):
  Validation queue job no_unicode-2012-03-30T19-15-23.23Z is finished. The
  final status was:

  6 tests did not succeed - changes not commited.

Error in read script: /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal

The attempt to merge lp:~zorba-coders/zorba/no_unicode into lp:zorba failed. Below is the output from the failed tests.

CMake Error at /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake:274 (message):
  Validation queue job no_unicode-2012-04-03T15-17-37.639Z is finished. The
  final status was:

  6 tests did not succeed - changes not commited.

Error in read script: /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

The attempt to merge lp:~zorba-coders/zorba/no_unicode into lp:zorba failed. Below is the output from the failed tests.

CMake Error at /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake:274 (message):
  Validation queue job no_unicode-2012-04-06T00-21-13.829Z is finished. The
  final status was:

  6 tests did not succeed - changes not commited.

Error in read script: /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake

lp:~zorba-coders/zorba/no_unicode updated
10534. By Paul J. Lucas

No longer doing some stuff when q_flag is set.

10535. By Paul J. Lucas

Tweaked one error message.

10536. By Paul J. Lucas

Merge from trunk.

10537. By Rodolfo Ochoa

Merge from trunk

10538. By Rodolfo Ochoa

Strange error on include guards

10539. By Rodolfo Ochoa

merge from trunk

10540. By Rodolfo Ochoa

fix for regex errors in RQ

Unmerged revisions

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'CMakeConfiguration.txt'
2--- CMakeConfiguration.txt 2012-03-28 05:19:57 +0000
3+++ CMakeConfiguration.txt 2012-04-07 00:45:26 +0000
4@@ -135,14 +135,14 @@
5 SET (ZORBA_DEBUG_STRING ${ZORBA_DEBUG_STRING} CACHE BOOL "debug strings")
6 MESSAGE (STATUS "ZORBA_DEBUG_STRING: " ${ZORBA_DEBUG_STRING})
7
8-SET(ZORBA_NO_UNICODE OFF CACHE BOOL "disable ICU")
9-MESSAGE(STATUS "ZORBA_NO_UNICODE: " ${ZORBA_NO_UNICODE})
10+SET(ZORBA_NO_ICU OFF CACHE BOOL "disable ICU")
11+MESSAGE(STATUS "ZORBA_NO_ICU: " ${ZORBA_NO_ICU})
12
13-IF (ZORBA_NO_UNICODE)
14+IF (ZORBA_NO_ICU)
15 SET (no_full_text ON)
16-ELSE (ZORBA_NO_UNICODE)
17+ELSE (ZORBA_NO_ICU)
18 SET (no_full_text OFF)
19-ENDIF (ZORBA_NO_UNICODE)
20+ENDIF (ZORBA_NO_ICU)
21 SET (ZORBA_NO_FULL_TEXT ${no_full_text} CACHE BOOL "disable XQuery Full-Text support")
22 MESSAGE(STATUS "ZORBA_NO_FULL_TEXT: " ${ZORBA_NO_FULL_TEXT})
23
24
25=== modified file 'CMakeLists.txt'
26--- CMakeLists.txt 2012-03-28 05:19:57 +0000
27+++ CMakeLists.txt 2012-04-07 00:45:26 +0000
28@@ -123,10 +123,14 @@
29 CHECK_TYPE_SIZE("int64_t" ZORBA_HAVE_INT64_T)
30
31 CHECK_CXX_SOURCE_COMPILES ("#include <type_traits>\nint main() { std::enable_if<true,int> x; }" ZORBA_CXX_ENABLE_IF)
32-CHECK_CXX_SOURCE_COMPILES ("int main() { int *p = nullptr; }" ZORBA_CXX_NULLPTR)
33-CHECK_CXX_SOURCE_COMPILES ("int main() { static_assert(1,\"\"); }" ZORBA_CXX_STATIC_ASSERT)
34+SET(CMAKE_EXTRA_INCLUDE_FILES wchar.h)
35+CHECK_TYPE_SIZE("wchar_t" ZORBA_SIZEOF_WCHAR_T)
36+SET(CMAKE_EXTRA_INCLUDE_FILES)
37 CHECK_CXX_SOURCE_COMPILES ("#include <memory>\nint main() { std::unique_ptr<int> p; }" ZORBA_CXX_UNIQUE_PTR)
38
39+CHECK_CXX_SOURCE_COMPILES("int main() { int *p = nullptr; }" ZORBA_CXX_NULLPTR)
40+CHECK_CXX_SOURCE_COMPILES("int main() { static_assert(1,\"\"); }" ZORBA_CXX_STATIC_ASSERT)
41+
42 ################################################################################
43 # Various cmake macros
44
45
46=== modified file 'ChangeLog'
47--- ChangeLog 2012-04-04 15:59:01 +0000
48+++ ChangeLog 2012-04-07 00:45:26 +0000
49@@ -4,6 +4,7 @@
50
51 New Features:
52 * Extended API for Python, Java, PHP and Ruby.
53+ * Added support for NO_ICU (to not use ICU for unicode processing)
54
55 Bug Fixes/Other Changes:
56 * Fixed bug #967864 (var substitution did not update theFreeVars property)
57@@ -148,7 +149,9 @@
58 * Fixed bug when parsing a document with a base-uri attribute.
59 * Fixed bug #863320 (Sentence is incorrectly incremented when token characters end without sentence terminator)
60 * Fixed bug #863730 (static delete-node* functions don't raise ZDDY0012)
61+ * Implemented the probe-index-range-value for general indexes
62 * Removed ZSTR0005 and ZSTR0006 error codes
63+ * Fixed bug #867662 ("nullptr" warning)
64 * Fixed bug #868258 (Assertion failure with two delete collection)
65 * Fixed bug #871623 and #871629 (assertion failures with insertions in dynamic collections)
66 * Fixed bug #867262 (allow reuse of iterator over ExtFuncArgItemSequence)
67@@ -157,6 +160,8 @@
68 * New node-reference module. References can be obtained for any node, and
69 different nodes cannot have the same identifier.
70 * Fixed bug #872697 (segmentation fault with validation of NMTOKENS)
71+ * General index cannot be declared as unique if the type of its key is
72+ xs:anyAtomicType or xs:untypedAtomic.
73 * Added undo for node revalidation
74 * Optimization for count(collection()) expressions
75 * Fixed bug #872796 (validate-in-place can interfere with other update primitives)
76@@ -175,6 +180,8 @@
77 * Fixed bug #855715 (Invalid escaped characters in regex not caught)
78 * Fixed bug #862089 (Split binary/xq install directories for modules) by
79 splitting "module path" into separate URI and Library paths
80+ * New node-position module. This module allows to obtain a representation of a node position, which
81+ can be used to assess structural relationships with other nodes.
82 * Fixed bug #872502 (validation of the JSON module xqdoc fails)
83 * Fixed bug #897619 (testdriver_mt can not run the XQueryX tests)
84 * Fixed bug #867107 (xqdoc dependency to zorba is wrong)
85
86=== modified file 'KNOWN_ISSUES.txt'
87--- KNOWN_ISSUES.txt 2012-03-28 05:19:57 +0000
88+++ KNOWN_ISSUES.txt 2012-04-07 00:45:26 +0000
89@@ -37,7 +37,7 @@
90 * The serializer currently doesn't implement character maps as specified
91 (http://www.w3.org/TR/xslt-xquery-serialization/#character-maps)
92
93-* In the 2.0 release, setting the CMake variables ZORBA_NO_UNICODE to
94+* In the 2.0 release, setting the CMake variables ZORBA_NO_ICU to
95 ON is not supported.
96
97 * The PHP language binding is not supported on Mac OS X. For details,
98
99=== modified file 'doc/cxx/examples/context.cpp'
100--- doc/cxx/examples/context.cpp 2012-03-28 05:19:57 +0000
101+++ doc/cxx/examples/context.cpp 2012-04-07 00:45:26 +0000
102@@ -149,7 +149,11 @@
103 outStream2 << lQuery << std::endl;
104 std::cout << outStream2.str() << std::endl;
105
106+#ifndef ZORBA_NO_ICU
107 if (outStream2.str() != "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\nBook 1.1\n")
108+#else
109+ if (outStream2.str() != "<?xml version=\"1.0\"?>\nBook 1.1\n")
110+#endif /* ZORBA_NO_ICU */
111 {
112 std::cerr << "Test 4 failed with a wrong result : " << std::endl
113 << outStream2.str() << std::endl;
114
115=== modified file 'include/zorba/config.h.cmake'
116--- include/zorba/config.h.cmake 2012-03-28 05:19:57 +0000
117+++ include/zorba/config.h.cmake 2012-04-07 00:45:26 +0000
118@@ -96,6 +96,8 @@
119 typedef __int64 int64_t;
120 #endif /* ZORBA_HAVE_INT64_T */
121
122+#cmakedefine ZORBA_SIZEOF_WCHAR_T @ZORBA_SIZEOF_WCHAR_T@
123+
124 // Compiler
125 #cmakedefine CLANG
126 #cmakedefine MSVC
127@@ -148,7 +150,7 @@
128
129 // Zorba features
130 #cmakedefine ZORBA_NO_FULL_TEXT
131-#cmakedefine ZORBA_NO_UNICODE
132+#cmakedefine ZORBA_NO_ICU
133 #cmakedefine ZORBA_NO_XMLSCHEMA
134 #cmakedefine ZORBA_NUMERIC_OPTIMIZATION
135 #cmakedefine ZORBA_VERIFY_PEER_SSL_CERTIFICATE
136
137=== modified file 'include/zorba/static_context.h'
138--- include/zorba/static_context.h 2012-03-28 05:19:57 +0000
139+++ include/zorba/static_context.h 2012-04-07 00:45:26 +0000
140@@ -26,9 +26,13 @@
141 #include <zorba/function.h>
142 #include <zorba/annotation.h>
143 #include <zorba/smart_ptr.h>
144+#include <zorba/smart_ptr.h>
145 #ifndef ZORBA_NO_FULL_TEXT
146 #include <zorba/thesaurus.h>
147 #endif /* ZORBA_NO_FULL_TEXT */
148+#include <zorba/zorba.h>
149+#include <zorba/store_manager.h>
150+#include <zorba/zorba_exception.h>
151
152 namespace zorba {
153
154
155=== modified file 'include/zorba/util/time.h'
156--- include/zorba/util/time.h 2012-03-28 05:19:57 +0000
157+++ include/zorba/util/time.h 2012-04-07 00:45:26 +0000
158@@ -178,7 +178,7 @@
159
160 inline long get_walltime_in_millis(const walltime& t)
161 {
162- return t.time * 1000 + t.millitm;
163+ return (long)(t.time * 1000 + t.millitm);
164 }
165
166 #else /* not Windows, and no clock_gettime() */
167
168=== modified file 'src/CMakeLists.txt'
169--- src/CMakeLists.txt 2012-03-28 05:19:57 +0000
170+++ src/CMakeLists.txt 2012-04-07 00:45:26 +0000
171@@ -59,7 +59,10 @@
172 #
173 # Next, add the files to be compiled into the library
174 #
175+
176+MESSAGE(STATUS "PRECOMPILED HEADERS: " ${ZORBA_PRECOMPILED_HEADERS})
177 SET(ZORBA_PRECOMPILED_HEADERS OFF CACHE BOOL "Activate Zorba precompiled headers.")
178+MESSAGE(STATUS "PRECOMPILED HEADERS: " ${ZORBA_PRECOMPILED_HEADERS})
179
180 SET(ZORBA_SRCS)
181 ADD_SRC_SUBFOLDER(ZORBA_SRCS api API_SRCS)
182@@ -97,6 +100,7 @@
183 ENDIF(ZORBA_WITH_DEBUGGER)
184 ADD_SRC_SUBFOLDER(ZORBA_SRCS unit_tests UNIT_TEST_SRCS)
185
186+MESSAGE(STATUS "PRECOMPILED HEADERS: " ${ZORBA_PRECOMPILED_HEADERS})
187 IF(ZORBA_PRECOMPILED_HEADERS)
188 ADD_SRC_SUBFOLDER(ZORBA_SRCS precompiled ZORBAMISC_SRCS)
189 INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/src/precompiled")
190
191=== modified file 'src/api/serialization/serializer.cpp'
192--- src/api/serialization/serializer.cpp 2012-03-28 05:19:57 +0000
193+++ src/api/serialization/serializer.cpp 2012-04-07 00:45:26 +0000
194@@ -180,7 +180,6 @@
195 for (; chars < chars_end; chars++ )
196 {
197
198-#ifndef ZORBA_NO_UNICODE
199 // the input string is UTF-8
200 int char_length = utf8::char_length(*chars);
201 if (char_length == 0)
202@@ -217,7 +216,6 @@
203
204 continue;
205 }
206-#endif//ZORBA_NO_UNICODE
207
208 // raise an error iff (1) the serialization format is XML 1.0 and (2) the given character is an invalid XML 1.0 character
209 if (ser && ser->method == PARAMETER_VALUE_XML &&
210@@ -332,14 +330,12 @@
211 {
212 tr << (char)0xEF << (char)0xBB << (char)0xBF;
213 }
214-#ifndef ZORBA_NO_UNICODE
215 else if (ser->encoding == PARAMETER_VALUE_UTF_16)
216 {
217 // Little-endian
218 tr.verbatim((char)0xFF);
219 tr.verbatim((char)0xFE);
220 }
221-#endif
222 }
223 }
224
225@@ -862,13 +858,17 @@
226 emitter::emit_declaration();
227
228 if (ser->omit_xml_declaration == PARAMETER_VALUE_NO) {
229- tr << "<?xml version=\"" << ser->version << "\" encoding=\"";
230- if (ser->encoding == PARAMETER_VALUE_UTF_8) {
231- tr << "UTF-8";
232-#ifndef ZORBA_NO_UNICODE
233- } else if (ser->encoding == PARAMETER_VALUE_UTF_16) {
234- tr << "UTF-16";
235-#endif
236+ tr << "<?xml version=\"" << ser->version;
237+ switch (ser->encoding) {
238+ case PARAMETER_VALUE_UTF_8:
239+ case PARAMETER_VALUE_UTF_16:
240+ tr << "\" encoding=\"";
241+ switch (ser->encoding) {
242+ case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
243+ case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
244+ default : ZORBA_ASSERT(false);
245+ }
246+ break;
247 }
248 tr << "\"";
249
250@@ -1174,14 +1174,18 @@
251 }
252
253 tr << "<meta http-equiv=\"content-type\" content=\""
254- << ser->media_type << "; charset=";
255-
256- if (ser->encoding == PARAMETER_VALUE_UTF_8)
257- tr << "UTF-8";
258-#ifndef ZORBA_NO_UNICODE
259- else if (ser->encoding == PARAMETER_VALUE_UTF_16)
260- tr << "UTF-16";
261-#endif
262+ << ser->media_type;
263+ switch (ser->encoding) {
264+ case PARAMETER_VALUE_UTF_8:
265+ case PARAMETER_VALUE_UTF_16:
266+ tr << "\" charset=\"";
267+ switch (ser->encoding) {
268+ case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
269+ case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
270+ default : ZORBA_ASSERT(false);
271+ }
272+ break;
273+ }
274 tr << "\"";
275 // closed_parent_tag = 1;
276 }
277@@ -1371,14 +1375,18 @@
278 }
279
280 tr << "<meta http-equiv=\"content-type\" content=\""
281- << ser->media_type << "; charset=";
282-
283- if (ser->encoding == PARAMETER_VALUE_UTF_8)
284- tr << "UTF-8";
285-#ifndef ZORBA_NO_UNICODE
286- else if (ser->encoding == PARAMETER_VALUE_UTF_16)
287- tr << "UTF-16";
288-#endif
289+ << ser->media_type;
290+ switch (ser->encoding) {
291+ case PARAMETER_VALUE_UTF_8:
292+ case PARAMETER_VALUE_UTF_16:
293+ tr << "\" charset=\"";
294+ switch (ser->encoding) {
295+ case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
296+ case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
297+ default : ZORBA_ASSERT(false);
298+ }
299+ break;
300+ }
301 tr << "\"/";
302 //closed_parent_tag = 1;
303 }
304@@ -2098,10 +2106,8 @@
305 {
306 if (!strcmp(aValue, "UTF-8"))
307 encoding = PARAMETER_VALUE_UTF_8;
308-#ifndef ZORBA_NO_UNICODE
309 else if (!strcmp(aValue, "UTF-16"))
310 encoding = PARAMETER_VALUE_UTF_16;
311-#endif
312 else
313 throw XQUERY_EXCEPTION(
314 err::SEPM0016, ERROR_PARAMS( aValue, aName, ZED( GoodValuesAreUTF8 ) )
315@@ -2210,16 +2216,13 @@
316 {
317 tr = new transcoder(os, false);
318 }
319-#ifndef ZORBA_NO_UNICODE
320 else if (encoding == PARAMETER_VALUE_UTF_16)
321 {
322 tr = new transcoder(os, true);
323 }
324-#endif
325 else
326 {
327- ZORBA_ASSERT(0);
328- return false;
329+ ZORBA_ASSERT(false);
330 }
331
332 if (method == PARAMETER_VALUE_XML)
333
334=== modified file 'src/api/serialization/serializer.h'
335--- src/api/serialization/serializer.h 2012-03-28 05:19:57 +0000
336+++ src/api/serialization/serializer.h 2012-04-07 00:45:26 +0000
337@@ -70,10 +70,8 @@
338 PARAMETER_VALUE_TEXT,
339 PARAMETER_VALUE_BINARY,
340
341- PARAMETER_VALUE_UTF_8
342-#ifndef ZORBA_NO_UNICODE
343- ,PARAMETER_VALUE_UTF_16
344-#endif
345+ PARAMETER_VALUE_UTF_8,
346+ PARAMETER_VALUE_UTF_16
347 } PARAMETER_VALUE_TYPE;
348
349 protected:
350
351=== modified file 'src/diagnostics/diagnostic_en.xml'
352--- src/diagnostics/diagnostic_en.xml 2012-03-28 05:19:57 +0000
353+++ src/diagnostics/diagnostic_en.xml 2012-04-07 00:45:26 +0000
354@@ -2517,11 +2517,11 @@
355 <value>attribute node</value>
356 </entry>
357
358- <entry key="BackRef0Illegal">
359+ <entry key="BackRef0Illegal" if="!defined(ZORBA_NO_ICU)">
360 <value>"0": illegal backreference</value>
361 </entry>
362
363- <entry key="BackRefIllegalInCharClass">
364+ <entry key="BackRefIllegalInCharClass" if="!defined(ZORBA_NO_ICU)">
365 <value>backreference illegal in character class</value>
366 </entry>
367
368@@ -2569,7 +2569,7 @@
369 <value>invalid library module</value>
370 </entry>
371
372- <entry key="BadRegexEscape_3">
373+ <entry key="BadRegexEscape_3" if="!defined(ZORBA_NO_ICU)">
374 <value>"$3": illegal escape character</value>
375 </entry>
376
377@@ -3029,7 +3029,7 @@
378 <value>nodeid component too big for encoding</value>
379 </entry>
380
381- <entry key="NonClosedBackRef_3">
382+ <entry key="NonClosedBackRef_3" if="!defined(ZORBA_NO_ICU)">
383 <value>'$$3': non-closed backreference</value>
384 </entry>
385
386@@ -3041,7 +3041,7 @@
387 <value>non-localhost authority</value>
388 </entry>
389
390- <entry key="NonexistentBackRef_3">
391+ <entry key="NonexistentBackRef_3" if="!defined(ZORBA_NO_ICU)">
392 <value>'$$3': non-existent backreference</value>
393 </entry>
394
395@@ -3193,94 +3193,183 @@
396 <value>item type is not a subtype of "$3"</value>
397 </entry>
398
399- <entry key="U_REGEX_BAD_ESCAPE_SEQUENCE" if="!defined(ZORBA_NO_UNICODE)">
400+ <entry key="U_REGEX_BAD_ESCAPE_SEQUENCE" if="!defined(ZORBA_NO_ICU)">
401 <value>unrecognized backslash escape sequence</value>
402 </entry>
403
404- <entry key="U_REGEX_BAD_INTERVAL" if="!defined(ZORBA_NO_UNICODE)">
405+ <entry key="U_REGEX_BAD_INTERVAL" if="!defined(ZORBA_NO_ICU)">
406 <value>error in {min,max} interval</value>
407 </entry>
408
409- <entry key="U_REGEX_INTERNAL_ERROR" if="!defined(ZORBA_NO_UNICODE)">
410+ <entry key="U_REGEX_INTERNAL_ERROR" if="!defined(ZORBA_NO_ICU)">
411 <value>an internal ICU error (bug) was detected</value>
412 </entry>
413
414- <entry key="U_REGEX_INVALID_BACK_REF" if="!defined(ZORBA_NO_UNICODE)">
415+ <entry key="U_REGEX_INVALID_BACK_REF" if="!defined(ZORBA_NO_ICU)">
416 <value>backreference to a non-existent capture group</value>
417 </entry>
418
419- <entry key="U_REGEX_INVALID_FLAG" if="!defined(ZORBA_NO_UNICODE)">
420+ <entry key="U_REGEX_INVALID_FLAG" if="!defined(ZORBA_NO_ICU)">
421 <value>invalid value for match mode flags</value>
422 </entry>
423
424- <entry key="U_REGEX_INVALID_RANGE" if="!defined(ZORBA_NO_UNICODE)">
425+ <entry key="U_REGEX_INVALID_RANGE" if="!defined(ZORBA_NO_ICU)">
426 <value>in character range [x-y], x is greater than y</value>
427 </entry>
428
429- <entry key="U_REGEX_INVALID_STATE" if="!defined(ZORBA_NO_UNICODE)">
430+ <entry key="U_REGEX_INVALID_STATE" if="!defined(ZORBA_NO_ICU)">
431 <value>RegexMatcher in invalid state for requested operation</value>
432 </entry>
433
434- <entry key="U_REGEX_LOOK_BEHIND_LIMIT" if="!defined(ZORBA_NO_UNICODE)">
435+ <entry key="U_REGEX_LOOK_BEHIND_LIMIT" if="!defined(ZORBA_NO_ICU)">
436 <value>look-behind pattern matches must have a bounded maximum length</value>
437 </entry>
438
439- <entry key="U_REGEX_MAX_LT_MIN" if="!defined(ZORBA_NO_UNICODE)">
440+ <entry key="U_REGEX_MAX_LT_MIN" if="!defined(ZORBA_NO_ICU)">
441 <value>in {min,max}, max is less than min</value>
442 </entry>
443
444- <entry key="U_REGEX_MISMATCHED_PAREN" if="!defined(ZORBA_NO_UNICODE)">
445+ <entry key="U_REGEX_MISMATCHED_PAREN" if="!defined(ZORBA_NO_ICU)">
446 <value>incorrectly nested parentheses</value>
447 </entry>
448
449- <entry key="U_REGEX_MISSING_CLOSE_BRACKET" if="!defined(ZORBA_NO_UNICODE)">
450+ <entry key="U_REGEX_MISSING_CLOSE_BRACKET" if="!defined(ZORBA_NO_ICU)">
451 <value>missing ']'</value>
452 </entry>
453
454- <entry key="U_REGEX_NUMBER_TOO_BIG" if="!defined(ZORBA_NO_UNICODE)">
455+ <entry key="U_REGEX_NUMBER_TOO_BIG" if="!defined(ZORBA_NO_ICU)">
456 <value>decimal number is too large</value>
457 </entry>
458
459- <entry key="U_REGEX_OCTAL_TOO_BIG" if="!defined(ZORBA_NO_UNICODE)">
460+ <entry key="U_REGEX_OCTAL_TOO_BIG" if="!defined(ZORBA_NO_ICU)">
461 <value>octal character constants must be &lt;= 0377</value>
462 </entry>
463
464- <entry key="U_REGEX_PROPERTY_SYNTAX" if="!defined(ZORBA_NO_UNICODE)">
465+ <entry key="U_REGEX_PROPERTY_SYNTAX" if="!defined(ZORBA_NO_ICU)">
466 <value>incorrect Unicode property</value>
467 </entry>
468
469- <entry key="U_REGEX_RULE_SYNTAX" if="!defined(ZORBA_NO_UNICODE)">
470+ <entry key="U_REGEX_RULE_SYNTAX" if="!defined(ZORBA_NO_ICU)">
471 <value>syntax error</value>
472 </entry>
473
474- <entry key="U_REGEX_SET_CONTAINS_STRING" if="!defined(ZORBA_NO_UNICODE)">
475+ <entry key="U_REGEX_SET_CONTAINS_STRING" if="!defined(ZORBA_NO_ICU)">
476 <value>can not have UnicodeSets containing strings</value>
477 </entry>
478
479- <entry key="U_REGEX_STACK_OVERFLOW" if="!defined(ZORBA_NO_UNICODE)">
480+ <entry key="U_REGEX_STACK_OVERFLOW" if="!defined(ZORBA_NO_ICU)">
481 <value>backtrack stack overflow</value>
482 </entry>
483
484- <entry key="U_REGEX_STOPPED_BY_CALLER" if="!defined(ZORBA_NO_UNICODE)">
485+ <entry key="U_REGEX_STOPPED_BY_CALLER" if="!defined(ZORBA_NO_ICU)">
486 <value>matching operation aborted by user callback fn</value>
487 </entry>
488
489- <entry key="U_REGEX_TIME_OUT" if="!defined(ZORBA_NO_UNICODE)">
490+ <entry key="U_REGEX_TIME_OUT" if="!defined(ZORBA_NO_ICU)">
491 <value>maximum allowed match time exceeded</value>
492 </entry>
493
494- <entry key="U_REGEX_UNIMPLEMENTED" if="!defined(ZORBA_NO_UNICODE)">
495- <value>use of regular expression feature that is not yet implemented</value>
496+ <entry key="U_REGEX_UNIMPLEMENTED" if="!defined(ZORBA_NO_ICU)">
497+ <value>use of regular expression feature that is not yet implemented</value>
498+ </entry>
499+
500+ <!-- Regex Ascii error messages-->
501+ <entry key="REGEX_UNIMPLEMENTED" if="defined(ZORBA_NO_ICU)">
502+ <value>use of regular expression feature that is not yet implemented</value>
503+ </entry>
504+
505+ <entry key="REGEX_MISMATCHED_PAREN" if="defined(ZORBA_NO_ICU)">
506+ <value>incorrectly nested parentheses</value>
507+ </entry>
508+
509+ <entry key="REGEX_BROKEN_P_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
510+ <value>broken \\p construct</value>
511+ </entry>
512+
513+ <entry key="REGEX_UNKNOWN_PL_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
514+ <value>unknown \\p{L?} category; supported categories: L, Lu, Ll, Lt, Lm, Lo</value>
515+ </entry>
516+
517+ <entry key="REGEX_UNKNOWN_PM_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
518+ <value>unknown \\p{M?} category; supported categories: M, Mn, Mc, Me</value>
519+ </entry>
520+
521+ <entry key="REGEX_UNKNOWN_PN_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
522+ <value>unknown \\p{N?} category; supported categories: N, Nd, Nl, No</value>
523+ </entry>
524+
525+ <entry key="REGEX_UNKNOWN_PP_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
526+ <value>unknown \\p{P?} category; supported categories: P, Pc, Pd, Ps, Pe, Pi, Pf, Po</value>
527+ </entry>
528+
529+ <entry key="REGEX_UNKNOWN_PZ_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
530+ <value>unknown \\p{Z?} category; supported categories: Z, Zs, Zl, Zp</value>
531+ </entry>
532+
533+ <entry key="REGEX_UNKNOWN_PS_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
534+ <value>unknown \\p{S?} category; supported categories: S, Sm, Sc, Sk, So</value>
535+ </entry>
536+
537+ <entry key="REGEX_UNKNOWN_PC_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
538+ <value>unknown \\p{C?} category; supported categories: C, Cc, Cf, Co, Cn(for not assigned)</value>
539+ </entry>
540+
541+ <entry key="REGEX_BROKEN_PIs_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
542+ <value>broken \\p{Is} construct; valid characters are [a-zA-Z0-9-]</value>
543+ </entry>
544+
545+ <entry key="REGEX_UNKNOWN_PIs_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
546+ <value>unknown \\p{Is} category block; see supported block escapes here: http://www.w3.org/TR/xmlschema-2/#charcter-classes</value>
547+ </entry>
548+
549+ <entry key="REGEX_INVALID_UNICODE_CODEPOINT_u" if="defined(ZORBA_NO_ICU)">
550+ <value>invalid unicode hex, should be in form \\uXXXX or \\UXXXXXXXX</value>
551+ </entry>
552+
553+ <entry key="REGEX_UNKNOWN_ESC_CHAR" if="defined(ZORBA_NO_ICU)">
554+ <value>unknown \\? escape char; supported escapes are: \\[nrt\\|.?*+(){}[]-^$] for char escapes, \\[pP] for categories and \\[sSiIcCdDwW] for multichar groups</value>
555+ </entry>
556+
557+ <entry key="REGEX_INVALID_BACK_REF" if="defined(ZORBA_NO_ICU)">
558+ <value>\\$3 backreference to a non-existent capture group ($4 groups so far)</value>
559+ </entry>
560+
561+ <entry key="REGEX_INVALID_ATOM_CHAR" if="defined(ZORBA_NO_ICU)">
562+ <value>'$3': invalid character for an atom; forbidden characters are: [{}?*+|^]</value>
563+ </entry>
564+
565+ <entry key="REGEX_INVALID_SUBCLASS" if="defined(ZORBA_NO_ICU)">
566+ <value>malformed class subtraction</value>
567+ </entry>
568+
569+ <entry key="REGEX_INVALID_USE_OF_SUBCLASS" if="defined(ZORBA_NO_ICU)">
570+ <value>improper use of class subtraction: it must be the last construct in a class group [xxx-[yyy]]</value>
571+ </entry>
572+
573+ <entry key="REGEX_MULTICHAR_IN_CHAR_RANGE" if="defined(ZORBA_NO_ICU)">
574+ <value>multichars or char categories cannot be part of a char range</value>
575+ </entry>
576+
577+ <entry key="REGEX_MISSING_CLOSE_BRACKET" if="defined(ZORBA_NO_ICU)">
578+ <value>missing ']' in character group</value>
579+ </entry>
580+
581+ <entry key="REGEX_MAX_LT_MIN" if="defined(ZORBA_NO_ICU)">
582+ <value>in {min,max}, max is less than min</value>
583 </entry>
584
585 <entry key="UnaryArithOp">
586 <value>unary arithmetic operator</value>
587 </entry>
588
589- <entry key="UnbalancedChar_3">
590+ <entry key="UnbalancedChar_3" if="!defined(ZORBA_NO_ICU)">
591 <value>missing '$3'</value>
592 </entry>
593
594+ <entry key="UnescapedChar_3" if="!defined(ZORBA_NO_ICU)">
595+ <value>character '$3' must be escaped here</value>
596+ </entry>
597+
598 <entry key="UnexpectedElement">
599 <value>unexpected element</value>
600 </entry>
601
602=== modified file 'src/diagnostics/pregenerated/dict_en.cpp'
603--- src/diagnostics/pregenerated/dict_en.cpp 2012-03-28 05:19:57 +0000
604+++ src/diagnostics/pregenerated/dict_en.cpp 2012-04-07 00:45:26 +0000
605@@ -437,8 +437,12 @@
606 { "~AtomizationOfGroupByMakesMoreThanOneItem", "atomization of groupby variable produces more than one item" },
607 { "~AttributeName", "attribute name" },
608 { "~AttributeNode", "attribute node" },
609+#if !defined(ZORBA_NO_ICU)
610 { "~BackRef0Illegal", "\"0\": illegal backreference" },
611+#endif
612+#if !defined(ZORBA_NO_ICU)
613 { "~BackRefIllegalInCharClass", "backreference illegal in character class" },
614+#endif
615 { "~BadAnyURI", "invalid xs:anyURI" },
616 { "~BadArgTypeForFn_2o34o", "${\"2\": }invalid argument type for function $3()${: 4}" },
617 { "~BadCharAfter_34", "'$3': illegal character after '$4'" },
618@@ -451,7 +455,9 @@
619 { "~BadIterator", "invalid iterator" },
620 { "~BadLibraryModule", "invalid library module" },
621 { "~BadPath", "invalid path" },
622+#if !defined(ZORBA_NO_ICU)
623 { "~BadRegexEscape_3", "\"$3\": illegal escape character" },
624+#endif
625 { "~BadStreamState", "bad I/O stream state" },
626 { "~BadTokenInBraces_3", "\"$3\": illegal token within { }" },
627 { "~BadTraceStream", "trace stream not retrievable using SerializationCallback" },
628@@ -567,10 +573,14 @@
629 { "~NoUntypedKeyNodeValue_2", "node with untyped key value found during probe on index \"$2\"" },
630 { "~NodeIDNeedsBytes_2", "nodeid requires more than $2 bytes" },
631 { "~NodeIDTooBig", "nodeid component too big for encoding" },
632+#if !defined(ZORBA_NO_ICU)
633 { "~NonClosedBackRef_3", "'$$3': non-closed backreference" },
634+#endif
635 { "~NonFileThesaurusURI", "non-file thesaurus URI" },
636 { "~NonLocalhostAuthority", "non-localhost authority" },
637+#if !defined(ZORBA_NO_ICU)
638 { "~NonexistentBackRef_3", "'$$3': non-existent backreference" },
639+#endif
640 { "~NotAllowedForTypeName", "not allowed for typeName (use xsd:untyped instead)" },
641 { "~NotAmongInScopeSchemaTypes", "not among in-scope schema types" },
642 { "~NotDefInDynamicCtx", "not defined in dynamic context" },
643@@ -589,6 +599,69 @@
644 { "~ParserNoCreateTree", "XML tree creation failed" },
645 { "~PromotionImpossible", "promotion not possible" },
646 { "~QuotedColon_23", "\"$2\": $3" },
647+#if defined(ZORBA_NO_ICU)
648+ { "~REGEX_BROKEN_PIs_CONSTRUCT", "broken \\p{Is} construct; valid characters are [a-zA-Z0-9-]" },
649+#endif
650+#if defined(ZORBA_NO_ICU)
651+ { "~REGEX_BROKEN_P_CONSTRUCT", "broken \\p construct" },
652+#endif
653+#if defined(ZORBA_NO_ICU)
654+ { "~REGEX_INVALID_ATOM_CHAR", "'$3': invalid character for an atom; forbidden characters are: [{}?*+|^]" },
655+#endif
656+#if defined(ZORBA_NO_ICU)
657+ { "~REGEX_INVALID_BACK_REF", "\\$3 backreference to a non-existent capture group ($4 groups so far)" },
658+#endif
659+#if defined(ZORBA_NO_ICU)
660+ { "~REGEX_INVALID_SUBCLASS", "malformed class subtraction" },
661+#endif
662+#if defined(ZORBA_NO_ICU)
663+ { "~REGEX_INVALID_UNICODE_CODEPOINT_u", "invalid unicode hex, should be in form \\uXXXX or \\UXXXXXXXX" },
664+#endif
665+#if defined(ZORBA_NO_ICU)
666+ { "~REGEX_INVALID_USE_OF_SUBCLASS", "improper use of class subtraction: it must be the last construct in a class group [xxx-[yyy]]" },
667+#endif
668+#if defined(ZORBA_NO_ICU)
669+ { "~REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" },
670+#endif
671+#if defined(ZORBA_NO_ICU)
672+ { "~REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" },
673+#endif
674+#if defined(ZORBA_NO_ICU)
675+ { "~REGEX_MISSING_CLOSE_BRACKET", "missing ']' in character group" },
676+#endif
677+#if defined(ZORBA_NO_ICU)
678+ { "~REGEX_MULTICHAR_IN_CHAR_RANGE", "multichars or char categories cannot be part of a char range" },
679+#endif
680+#if defined(ZORBA_NO_ICU)
681+ { "~REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" },
682+#endif
683+#if defined(ZORBA_NO_ICU)
684+ { "~REGEX_UNKNOWN_ESC_CHAR", "unknown \\? escape char; supported escapes are: \\[nrt\\|.?*+(){}[]-^$] for char escapes, \\[pP] for categories and \\[sSiIcCdDwW] for multichar groups" },
685+#endif
686+#if defined(ZORBA_NO_ICU)
687+ { "~REGEX_UNKNOWN_PC_CONSTRUCT", "unknown \\p{C?} category; supported categories: C, Cc, Cf, Co, Cn(for not assigned)" },
688+#endif
689+#if defined(ZORBA_NO_ICU)
690+ { "~REGEX_UNKNOWN_PIs_CONSTRUCT", "unknown \\p{Is} category block; see supported block escapes here: http://www.w3.org/TR/xmlschema-2/#charcter-classes" },
691+#endif
692+#if defined(ZORBA_NO_ICU)
693+ { "~REGEX_UNKNOWN_PL_CONSTRUCT", "unknown \\p{L?} category; supported categories: L, Lu, Ll, Lt, Lm, Lo" },
694+#endif
695+#if defined(ZORBA_NO_ICU)
696+ { "~REGEX_UNKNOWN_PM_CONSTRUCT", "unknown \\p{M?} category; supported categories: M, Mn, Mc, Me" },
697+#endif
698+#if defined(ZORBA_NO_ICU)
699+ { "~REGEX_UNKNOWN_PN_CONSTRUCT", "unknown \\p{N?} category; supported categories: N, Nd, Nl, No" },
700+#endif
701+#if defined(ZORBA_NO_ICU)
702+ { "~REGEX_UNKNOWN_PP_CONSTRUCT", "unknown \\p{P?} category; supported categories: P, Pc, Pd, Ps, Pe, Pi, Pf, Po" },
703+#endif
704+#if defined(ZORBA_NO_ICU)
705+ { "~REGEX_UNKNOWN_PS_CONSTRUCT", "unknown \\p{S?} category; supported categories: S, Sm, Sc, Sk, So" },
706+#endif
707+#if defined(ZORBA_NO_ICU)
708+ { "~REGEX_UNKNOWN_PZ_CONSTRUCT", "unknown \\p{Z?} category; supported categories: Z, Zs, Zl, Zp" },
709+#endif
710 { "~SEPM0009_Not10", "the version parameter has a value other than \"1.0\" and the doctype-system parameter is specified" },
711 { "~SEPM0009_NotOmit", "the standalone attribute has a value other than \"omit\"" },
712 { "~SchemaAttributeName", "schema-attribute name" },
713@@ -610,68 +683,73 @@
714 { "~TwoDecimalFormatsSameName_2", "\"$2\": two decimal formats with this name" },
715 { "~TwoDefaultDecimalFormats", "two default decimal formats" },
716 { "~TypeIsNotSubtype", "item type is not a subtype of \"$3\"" },
717-#if !defined(ZORBA_NO_UNICODE)
718+#if !defined(ZORBA_NO_ICU)
719 { "~U_REGEX_BAD_ESCAPE_SEQUENCE", "unrecognized backslash escape sequence" },
720 #endif
721-#if !defined(ZORBA_NO_UNICODE)
722+#if !defined(ZORBA_NO_ICU)
723 { "~U_REGEX_BAD_INTERVAL", "error in {min,max} interval" },
724 #endif
725-#if !defined(ZORBA_NO_UNICODE)
726+#if !defined(ZORBA_NO_ICU)
727 { "~U_REGEX_INTERNAL_ERROR", "an internal ICU error (bug) was detected" },
728 #endif
729-#if !defined(ZORBA_NO_UNICODE)
730+#if !defined(ZORBA_NO_ICU)
731 { "~U_REGEX_INVALID_BACK_REF", "backreference to a non-existent capture group" },
732 #endif
733-#if !defined(ZORBA_NO_UNICODE)
734+#if !defined(ZORBA_NO_ICU)
735 { "~U_REGEX_INVALID_FLAG", "invalid value for match mode flags" },
736 #endif
737-#if !defined(ZORBA_NO_UNICODE)
738+#if !defined(ZORBA_NO_ICU)
739 { "~U_REGEX_INVALID_RANGE", "in character range [x-y], x is greater than y" },
740 #endif
741-#if !defined(ZORBA_NO_UNICODE)
742+#if !defined(ZORBA_NO_ICU)
743 { "~U_REGEX_INVALID_STATE", "RegexMatcher in invalid state for requested operation" },
744 #endif
745-#if !defined(ZORBA_NO_UNICODE)
746+#if !defined(ZORBA_NO_ICU)
747 { "~U_REGEX_LOOK_BEHIND_LIMIT", "look-behind pattern matches must have a bounded maximum length" },
748 #endif
749-#if !defined(ZORBA_NO_UNICODE)
750+#if !defined(ZORBA_NO_ICU)
751 { "~U_REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" },
752 #endif
753-#if !defined(ZORBA_NO_UNICODE)
754+#if !defined(ZORBA_NO_ICU)
755 { "~U_REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" },
756 #endif
757-#if !defined(ZORBA_NO_UNICODE)
758+#if !defined(ZORBA_NO_ICU)
759 { "~U_REGEX_MISSING_CLOSE_BRACKET", "missing ']'" },
760 #endif
761-#if !defined(ZORBA_NO_UNICODE)
762+#if !defined(ZORBA_NO_ICU)
763 { "~U_REGEX_NUMBER_TOO_BIG", "decimal number is too large" },
764 #endif
765-#if !defined(ZORBA_NO_UNICODE)
766+#if !defined(ZORBA_NO_ICU)
767 { "~U_REGEX_OCTAL_TOO_BIG", "octal character constants must be <= 0377" },
768 #endif
769-#if !defined(ZORBA_NO_UNICODE)
770+#if !defined(ZORBA_NO_ICU)
771 { "~U_REGEX_PROPERTY_SYNTAX", "incorrect Unicode property" },
772 #endif
773-#if !defined(ZORBA_NO_UNICODE)
774+#if !defined(ZORBA_NO_ICU)
775 { "~U_REGEX_RULE_SYNTAX", "syntax error" },
776 #endif
777-#if !defined(ZORBA_NO_UNICODE)
778+#if !defined(ZORBA_NO_ICU)
779 { "~U_REGEX_SET_CONTAINS_STRING", "can not have UnicodeSets containing strings" },
780 #endif
781-#if !defined(ZORBA_NO_UNICODE)
782+#if !defined(ZORBA_NO_ICU)
783 { "~U_REGEX_STACK_OVERFLOW", "backtrack stack overflow" },
784 #endif
785-#if !defined(ZORBA_NO_UNICODE)
786+#if !defined(ZORBA_NO_ICU)
787 { "~U_REGEX_STOPPED_BY_CALLER", "matching operation aborted by user callback fn" },
788 #endif
789-#if !defined(ZORBA_NO_UNICODE)
790+#if !defined(ZORBA_NO_ICU)
791 { "~U_REGEX_TIME_OUT", "maximum allowed match time exceeded" },
792 #endif
793-#if !defined(ZORBA_NO_UNICODE)
794+#if !defined(ZORBA_NO_ICU)
795 { "~U_REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" },
796 #endif
797 { "~UnaryArithOp", "unary arithmetic operator" },
798+#if !defined(ZORBA_NO_ICU)
799 { "~UnbalancedChar_3", "missing '$3'" },
800+#endif
801+#if !defined(ZORBA_NO_ICU)
802+ { "~UnescapedChar_3", "character '$3' must be escaped here" },
803+#endif
804 { "~UnexpectedElement", "unexpected element" },
805 { "~VarValMustBeSingleItem_2", "\"$2\": variable value must be single item" },
806 { "~Variable", "variable" },
807
808=== modified file 'src/precompiled/stdafx.h'
809--- src/precompiled/stdafx.h 2012-03-28 05:19:57 +0000
810+++ src/precompiled/stdafx.h 2012-04-07 00:45:26 +0000
811@@ -15,363 +15,81 @@
812
813 */
814
815-#if defined STDAFX
816-#include <iostream>
817-#include <stdexcept>
818-#include <cassert>
819-#include <cstring>
820-#include <memory>
821-
822-#include <sstream>
823-#include <xfwrap>
824-#include <xfwrap1>
825-#include <istream>
826-#include <cstdio>
827-#include <xxshared>
828-#include <crtdefs.h>
829-#include <map>
830-#include <set>
831-//#include <poppack.h>
832-//#include <xxtype_traits>
833-//#include <xxcallwrap>
834-
835-// #include <xxcallpmf>
836-// //#include <xxbind0>
837-// //#include <xxbind1>
838-// //#include <xxresult>
839-// #include <zorba/audit.h>
840-// #include "api/auditimpl.h"
841-// #include <zorba/audit.h>
842-
843- //#include "unicode/unistr.h"
844- #include "runtime/sequences/sequences.h"
845- #include "diagnostics/xquery_diagnostics.h"
846- #include "xercesc/util/xercesdefs.hpp"
847- #include "runtime/collections/collections.h"
848- #include "unicode/utypes.h"
849- #include "zorba/config.h"
850- #include "store/api/store.h"
851- #include "zorba/zorba.h"
852- #include "zorba/api_shared_types.h"
853- #include "compiler/parsetree/parsenodes.h"
854- #include "compiler/parser/parse_constants.h"
855- //#include "compiler/api/compilercb.h"
856- #include "zorbautils/checked_vector.h"
857- #include "compiler/parser/xquery_driver.h"
858- #include "util/sorter.h"
859- #include "compiler/xqueryx/xqueryx_to_xquery.h"
860-// #include "compiler/xqueryx/xqueryx_xslt.h"
861-//#include "compiler/parser/xquery_scanner.h"
862-//#include "compiler/parsetree/parsenode_base.h"
863-//#include "compiler/parsetree/parsenode_visitor.h"
864-// #include "runtime/core/flwor_iterator.h"
865-// #include "context/static_context.h"
866-// #include "zorbautils/fatal.h"
867-// #include "runtime/base/unarybase.h"
868-// #include "compiler/expression/expr_consts.h"
869-// #include "api/iterator_singleton.h"
870-// #include "runtime/visitors/printer_visitor_api.h"
871-// //#include "compiler/parsetree/parsenode_print_dot_visitor.h"
872-// //#include "compiler/parsetree/parsenode_print_dot_visitor.h"
873-// //#include "runtime/visitors/planiter_visitor_impl_code.h"
874-// //#include "runtime/visitors/planiter_visitor_impl_include.h"
875-// //#include "runtime/visitors/printer_visitor_impl.h"
876-// //#include "runtime/core/path.h"
877-// #include "compiler/expression/ft_expr.h"
878-// #include "compiler/expression/ftnode.h"
879-// #include "compiler/parser/query_loc.h"
880+#ifdef STDAFX
881+
882+ #include <fstream>
883+ #include <iostream>
884+ #include <stdexcept>
885+ #include <cassert>
886+ #include <cstring>
887+ #include <memory>
888+
889+ #include <sstream>
890+ #include <xfwrap>
891+ #include <xfwrap1>
892+ #include <istream>
893+ #include <cstdio>
894+ #include <xxshared>
895+ #include <crtdefs.h>
896+ #include <map>
897+ #include <set>
898+
899+ #include "runtime/sequences/sequences.h"
900+ #include "diagnostics/xquery_diagnostics.h"
901+ #include "xercesc/util/xercesdefs.hpp"
902+ #include "runtime/collections/collections.h"
903+ #include "unicode/utypes.h"
904+ #include "zorba/config.h"
905+ #include "store/api/store.h"
906+ #include "zorba/zorba.h"
907+ #include "zorba/api_shared_types.h"
908+ #include "compiler/parsetree/parsenodes.h"
909+ #include "compiler/parser/parse_constants.h"
910+ #include "zorbautils/checked_vector.h"
911+ #include "compiler/parser/xquery_driver.h"
912+ #include "util/sorter.h"
913+ #include "compiler/xqueryx/xqueryx_to_xquery.h"
914+ #include <zorba/store_manager.h>
915+ #include <zorba/xquery.h>
916+ #include <zorba/xquery_exception.h>
917 #include "util/cxx_util.h"
918-// #include "util/indent.h"
919-// #include "util/stl_util.h"
920-// #include "diagnostics/xquery_diagnostics.h"
921-// #include "zorbatypes/numconversions.h"
922+ #include "diagnostics/assert.h"
923+ #include "zorbatypes/mapm/m_apm_lc.h"
924+ #include "zorbatypes/datetime/parse.h"
925+ #include "zorbatypes/chartype.h"
926+ #include "zorbatypes/collation_manager.h"
927+ #include "zorbatypes/ft_token.h"
928+ #include "zorbatypes/m_apm.h"
929+ #include "zorbatypes/rclock.h"
930+ #include "zorbatypes/schema_types.h"
931+ #include "zorbatypes/timezone.h"
932+ #include "zorbatypes/transcoder.h"
933+ #include "zorbatypes/URI.h"
934+ #include "zorbatypes/xerces_xmlcharray.h"
935+ #include "zorbatypes/zorbatypes_decl.h"
936+ #include "zorbatypes/zstring.h"
937+ #include "zorbautils/condition.h"
938+ #include "zorbautils/hashfun.h"
939+ #include "zorbautils/hashmap.h"
940+ #include "zorbautils/hashmap_itemp.h"
941+ #include "zorbautils/hashmap_str_obj.h"
942+ #include "zorbautils/hashmap_zstring.h"
943+ #include "zorbautils/hashset.h"
944+ #include "zorbautils/hashset_itemh.h"
945+ #include "zorbautils/latch.h"
946+ #include "zorbautils/locale.h"
947+ #include "zorbautils/lock.h"
948+ #include "zorbautils/mutex.h"
949+ #include "zorbautils/runnable.h"
950+ #include "zorbautils/SAXParser.h"
951+ #include "zorbautils/stack.h"
952+ #include "zorbautils/string_util.h"
953+ #include "unit_tests/unit_test_list.h"
954+ #include "zorba/diagnostic_handler.h"
955+ #include "zorba/xquery_warning.h"
956+ #include "runtime/full_text/ftcontains_visitor.h"
957+ #include "store/api/ft_token_iterator.h"
958+ #include "store/naive/ft_token_store.h"
959
960-// #include "api/serialization/serializable.h"
961-// #include "api/serialization/serializer.h"
962-// #include "api/collectionimpl.h"
963-// #include "api/dynamiccontextimpl.h"
964-// #include "api/fileimpl.h"
965-// #include "api/functionimpl.h"
966-// #include "api/invoke_item_sequence.h"
967-// #include "api/itemfactoryimpl.h"
968-// #include "api/resultiteratorchainer.h"
969-// #include "api/resultiteratorimpl.h"
970-// #include "api/sax2impl.h"
971-// #include "api/serializerimpl.h"
972-// #include "api/staticcontextimpl.h"
973-// #include "api/storeiteratorimpl.h"
974-// #include "api/unmarshaller.h"
975-// #include "api/uri_resolver_wrappers.h"
976-// #include "api/vectoriterator.h"
977-// #include "api/xmldatamanagerimpl.h"
978-// //#include "api/xqueryimpl.h"
979-// #include "api/zorbaimpl.h"
980-// #include "capi/cdynamic_context.h"
981-// #include "capi/cexpression.h"
982-// #include "capi/cexternal_function.h"
983-// #include "capi/cimplementation.h"
984-// #include "capi/csequence.h"
985-// #include "capi/cstatic_context.h"
986-// #include "capi/error.h"
987-// #include "capi/external_module.h"
988-// #include "capi/single_item_sequence.h"
989-// #include "capi/user_item_sequence.h"
990-// #include "compiler/parser/flexlexer.h"
991-// #include "compiler/parser/ft_types.h"
992-// #include "compiler/parser/symbol_table.h"
993-// #include "compiler/parser/xqdoc_comment.h"
994-// #include "compiler/parsetree/parsenode_print_xml_visitor.h"
995-// #include "compiler/parsetree/parsenode_print_xqdoc_visitor.h"
996-// #include "compiler/parsetree/parsenode_print_xquery_visitor.h"
997-// #include "compiler/parsetree/parsenode_xqdoc_visitor.h"
998-// #include "compiler/translator/prolog_graph.h"
999-// #include "compiler/translator/translator.h"
1000-// #include "compiler/codegen/plan_visitor.h"
1001-// #include "compiler/expression/abstract_expr_visitor.h"
1002-// #include "compiler/expression/expr.h"
1003-// #include "compiler/expression/expr_annotations.h"
1004-// #include "compiler/expression/expr_base.h"
1005-// #include "compiler/expression/expr_classes.h"
1006-// #include "compiler/expression/expr_iter.h"
1007-// #include "compiler/expression/expr_utils.h"
1008-// #include "compiler/expression/expr_visitor.h"
1009-// #include "compiler/expression/flwor_expr.h"
1010-// //#include "compiler/expression/fo_expr.h"
1011-// #include "compiler/expression/ftnode_classes.h"
1012-// #include "compiler/expression/ftnode_visitor.h"
1013-// #include "compiler/expression/function_item_expr.h"
1014-// #include "compiler/expression/path_expr.h"
1015-// #include "compiler/expression/script_exprs.h"
1016-// #include "compiler/expression/update_exprs.h"
1017-// #include "compiler/expression/var_expr.h"
1018-// #include "compiler/rewriter/framework/rewriter.h"
1019-// #include "compiler/rewriter/framework/rewriter_context.h"
1020-// #include "compiler/rewriter/framework/rule_driver.h"
1021-// #include "compiler/rewriter/framework/sequential_rewriter.h"
1022-// #include "compiler/rewriter/rewriters/common_rewriter.h"
1023-// #include "compiler/rewriter/rewriters/default_optimizer.h"
1024-// #include "compiler/rewriter/rewriters/phase1_rewriter.h"
1025-// #include "compiler/rewriter/rules/ruleset.h"
1026-// #include "compiler/rewriter/rules/rule_base.h"
1027-// #include "compiler/rewriter/rules/type_rules.h"
1028-// #include "compiler/rewriter/tools/dataflow_annotations.h"
1029-// #include "compiler/rewriter/tools/expr_tools.h"
1030-// #include "compiler/rewriter/tools/udf_graph.h"
1031-// #include "compiler/xqddf/collection_decl.h"
1032-// #include "compiler/xqddf/value_ic.h"
1033-// #include "compiler/xqddf/value_index.h"
1034-// #include "compiler/semantic_annotations/annotations.h"
1035-// #include "compiler/semantic_annotations/annotation_holder.h"
1036-// #include "compiler/semantic_annotations/annotation_keys.h"
1037-// #include "compiler/api/compiler_api.h"
1038-// #include "compiler/api/compiler_api_impl.h"
1039-// #include "system/globalenv.h"
1040-// #include "system/properties.h"
1041-// #include "system/zorba_properties.h"
1042-// #include "context/decimal_format.h"
1043-// #include "context/default_uri_mappers.h"
1044-// #include "context/default_url_resolvers.h"
1045-// #include "context/dynamic_context.h"
1046-// #include "context/dynamic_loader.h"
1047-// #include "context/internal_uri_resolvers.h"
1048-// //#include "context/namespace_context.h"
1049-// #include "context/root_static_context.h"
1050-// #include "context/sctx_map_iterator.h"
1051-// #include "context/standard_uri_resolvers.h"
1052-// #include "context/static_context_consts.h"
1053-// #include "context/stemmer_wrappers.h"
1054-// #include "context/uri_resolver.h"
1055-// #include "context/uri_resolver_wrapper.h"
1056-#include "diagnostics/assert.h"
1057-// #include "diagnostics/diagnostic.h"
1058-// #include "diagnostics/dict.h"
1059-// #include "diagnostics/dict_impl.h"
1060-// #include "diagnostics/StackWalker.h"
1061-// #include "diagnostics/user_error.h"
1062-// #include "diagnostics/user_exception.h"
1063-// #include "diagnostics/xquery_exception.h"
1064-// #include "diagnostics/xquery_stack_trace.h"
1065-// #include "diagnostics/xquery_warning.h"
1066-// #include "diagnostics/zorba_exception.h"
1067-// //#include "functions/annotation.h"
1068-// #include "functions/external_function.h"
1069-// #include "functions/function.h"
1070-// #include "functions/function_consts.h"
1071-// #include "functions/function_impl.h"
1072-// #include "functions/func_accessors_impl.h"
1073-// #include "functions/func_apply.h"
1074-// #include "functions/func_arithmetic.h"
1075-// #include "functions/func_booleans_impl.h"
1076-// #include "functions/func_durations_dates_times_impl.h"
1077-// #include "functions/func_enclosed.h"
1078-// #include "functions/func_eval.h"
1079-// #include "functions/func_hoist.h"
1080-// #include "functions/func_index_ddl.h"
1081-// #include "functions/func_node_sort_distinct.h"
1082-// #include "functions/func_numerics_impl.h"
1083-// #include "functions/func_reflection.h"
1084-// #include "functions/func_sequences_impl.h"
1085-// #include "functions/func_var_decl.h"
1086-// #include "functions/library.h"
1087-// #include "functions/signature.h"
1088-// #include "functions/udf.h"
1089-// #include "runtime/full_text/thesauri/decode_base128.h"
1090-// #include "runtime/full_text/thesauri/encoded_list.h"
1091-// #include "runtime/full_text/thesauri/iso2788.h"
1092-// #include "runtime/full_text/thesauri/wn_db_segment.h"
1093-// #include "runtime/full_text/thesauri/wn_synset.h"
1094-// #include "runtime/full_text/thesauri/wn_thesaurus.h"
1095-// #include "runtime/full_text/thesauri/wn_types.h"
1096-// #include "runtime/full_text/thesauri/xqftts_relationship.h"
1097-// #include "runtime/full_text/thesauri/xqftts_thesaurus.h"
1098-// #include "runtime/full_text/ft_match.h"
1099-// #include "runtime/full_text/ft_query_item.h"
1100-// #include "runtime/full_text/ft_single_token_iterator.h"
1101-// #include "runtime/full_text/ft_stop_words_set.h"
1102-// #include "runtime/full_text/ft_thesaurus.h"
1103-// #include "runtime/full_text/ft_token_matcher.h"
1104-// #include "runtime/full_text/ft_token_seq_iterator.h"
1105-// #include "runtime/full_text/ft_token_span.h"
1106-// #include "runtime/full_text/ft_wildcard.h"
1107-// #include "runtime/full_text/full_text.h"
1108-// #include "runtime/full_text/apply.h"
1109-// #include "runtime/full_text/ft_util.h"
1110-// #include "runtime/collections/collections_base.h"
1111-// #include "runtime/core/apply_updates.h"
1112-// #include "runtime/core/arithmetic_impl.h"
1113-// #include "runtime/core/constructors.h"
1114-// #include "runtime/core/fncall_iterator.h"
1115-// #include "runtime/core/internal_operators.h"
1116-// #include "runtime/core/item_iterator.h"
1117-// #include "runtime/core/nodeid_iterators.h"
1118-// #include "runtime/core/path_iterators.h"
1119-// #include "runtime/core/sequencetypes.h"
1120-// #include "runtime/core/trycatch.h"
1121-// #include "runtime/core/var_iterators.h"
1122-// #include "runtime/numerics/NumericsImpl.h"
1123-// #include "runtime/booleans/BooleanImpl.h"
1124-// #include "runtime/base/binarybase.h"
1125-// #include "runtime/base/narybase.h"
1126-// #include "runtime/base/noarybase.h"
1127-// #include "runtime/base/plan_iterator.h"
1128-// #include "runtime/sequences/SequencesImpl.h"
1129-// #include "runtime/visitors/iterprinter.h"
1130-// #include "runtime/misc/materialize.h"
1131-// #include "runtime/scripting/scripting.h"
1132-// #include "types/schema/EventSchemaValidator.h"
1133-// #include "types/schema/LoadSchemaErrorHandler.h"
1134-// #include "types/schema/PrintSchema.h"
1135-// #include "types/schema/revalidateUtils.h"
1136-// #include "types/schema/schema.h"
1137-// #include "types/schema/SchemaValidatorFilter.h"
1138-// #include "types/schema/StrX.h"
1139-// #include "types/schema/validate.h"
1140-// #include "types/schema/ValidationEventHandler.h"
1141-// #include "types/schema/xercesIncludes.h"
1142-// #include "types/schema/XercesParseUtils.h"
1143-// #include "types/schema/XercSchemaValidator.h"
1144-// #include "types/casting.h"
1145-// #include "types/collation.h"
1146-// #include "types/node_test.h"
1147-// #include "types/root_typemanager.h"
1148-// #include "types/typeconstants.h"
1149-// #include "types/typeimpl.h"
1150-// #include "types/typemanager.h"
1151-// #include "types/typemanagerimpl.h"
1152-// #include "types/typeops.h"
1153-// #include "util/fx/fxarray.h"
1154-// #include "util/fx/fxcharheap.h"
1155-// #include "util/ascii_util.h"
1156-// #include "util/atomic_int.h"
1157-// #include "util/auto_vector.h"
1158-// #include "util/curl_util.h"
1159-// #include "util/dir.h"
1160-// #include "util/dynamic_bitset.h"
1161-// #include "util/empty.h"
1162-// #include "util/error_util.h"
1163-// #include "util/fs_util.h"
1164-// #include "util/hashmap.h"
1165-// //#include "util/hashmap32.h"
1166-// #include "util/less.h"
1167-// #include "util/mmap_file.h"
1168-// #include "util/nonatomic_int.h"
1169-// #include "util/omanip.h"
1170-// #include "util/oseparator.h"
1171-// #include "util/regex.h"
1172-// #include "util/singleton.h"
1173-// #include "util/string_util.h"
1174-// #include "util/threads.h"
1175-// #include "util/tokenbuf.h"
1176-// #include "util/tracer.h"
1177-// #include "util/triple.h"
1178-// #include "util/unicode_categories.h"
1179-// #include "util/unicode_util.h"
1180-// #include "util/uri_util.h"
1181-// #include "util/utf8_string.h"
1182-// #include "util/utf8_util.h"
1183-// #include "util/utf8_util_base.h"
1184-// #include "util/void_int.h"
1185-// #include "util/xml_util.h"
1186-// #include "zorbamisc/config/platform.h"
1187-// //#include "zorbaserialization/archiver.h"
1188-// #include "zorbaserialization/base64impl.h"
1189-// #include "zorbaserialization/bin_archiver.h"
1190-// //#include "zorbaserialization/class_serializer.h"
1191-// #include "zorbaserialization/mem_archiver.h"
1192-// #include "zorbaserialization/serialization_engine.h"
1193-// #include "zorbaserialization/template_serializer.h"
1194-// #include "zorbaserialization/xml_archiver.h"
1195-// #include "zorbaserialization/zorba_class_serializer.h"
1196- #include "zorbatypes/mapm/m_apm_lc.h"
1197- #include "zorbatypes/datetime/parse.h"
1198- //#include "zorbatypes/binary.h"
1199- #include "zorbatypes/chartype.h"
1200- #include "zorbatypes/collation_manager.h"
1201- //#include "zorbatypes/datetime.h"
1202- //#include "zorbatypes/decimal.h"
1203- //#include "zorbatypes/duration.h"
1204- //#include "zorbatypes/floatimpl.h"
1205- #include "zorbatypes/ft_token.h"
1206- //#include "zorbatypes/integer.h"
1207- #include "zorbatypes/libicu.h"
1208- #include "zorbatypes/m_apm.h"
1209- //#include "zorbatypes/rchandle.h"
1210- #include "zorbatypes/rclock.h"
1211- //#include "zorbatypes/regex_ascii.h"
1212- #include "zorbatypes/schema_types.h"
1213- #include "zorbatypes/timezone.h"
1214- #include "zorbatypes/transcoder.h"
1215- #include "zorbatypes/URI.h"
1216- #include "zorbatypes/xerces_xmlcharray.h"
1217- #include "zorbatypes/zorbatypes_decl.h"
1218- #include "zorbatypes/zstring.h"
1219- //#include "zorbautils/stemmer/sb_stemmer.h"
1220- #include "zorbautils/condition.h"
1221- #include "zorbautils/hashfun.h"
1222- #include "zorbautils/hashmap.h"
1223- #include "zorbautils/hashmap_itemp.h"
1224- #include "zorbautils/hashmap_str_obj.h"
1225- #include "zorbautils/hashmap_zstring.h"
1226- #include "zorbautils/hashset.h"
1227- #include "zorbautils/hashset_itemh.h"
1228- //#include "zorbautils/icu_tokenizer.h"
1229- #include "zorbautils/latch.h"
1230- #include "zorbautils/locale.h"
1231- #include "zorbautils/lock.h"
1232- #include "zorbautils/mutex.h"
1233- #include "zorbautils/runnable.h"
1234- #include "zorbautils/SAXParser.h"
1235- #include "zorbautils/stack.h"
1236-// #include "zorbautils/stemmer.h"
1237- #include "zorbautils/string_util.h"
1238- //#include "zorbautils/synchronous_logger.h"
1239- //#include "zorbautils/tokenizer.h"
1240- #include "unit_tests/unit_test_list.h"
1241- #include "zorba/diagnostic_handler.h"
1242- #include "zorba/xquery_warning.h"
1243- #include "runtime/full_text/ftcontains_visitor.h"
1244- #include "store/naive/naive_ft_token_iterator.h"
1245- #include "store/api/ft_token_iterator.h"
1246- #include "store/naive/ft_token_store.h"
1247 #endif
1248 /* vim:set et sw=2 ts=2: */
1249
1250=== modified file 'src/runtime/full_text/CMakeLists.txt'
1251--- src/runtime/full_text/CMakeLists.txt 2012-03-28 05:19:57 +0000
1252+++ src/runtime/full_text/CMakeLists.txt 2012-04-07 00:45:26 +0000
1253@@ -42,11 +42,11 @@
1254 default_tokenizer.cpp
1255 )
1256
1257-IF (ZORBA_NO_UNICODE)
1258+IF (ZORBA_NO_ICU)
1259 LIST(APPEND FULLTEXT_SRCS latin_tokenizer.cpp)
1260-ELSE (ZORBA_NO_UNICODE)
1261+ELSE (ZORBA_NO_ICU)
1262 LIST(APPEND FULLTEXT_SRCS icu_tokenizer.cpp)
1263-ENDIF (ZORBA_NO_UNICODE)
1264+ENDIF (ZORBA_NO_ICU)
1265
1266 ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS)
1267
1268
1269=== modified file 'src/runtime/full_text/default_tokenizer.cpp'
1270--- src/runtime/full_text/default_tokenizer.cpp 2012-03-28 05:19:57 +0000
1271+++ src/runtime/full_text/default_tokenizer.cpp 2012-04-07 00:45:26 +0000
1272@@ -19,22 +19,22 @@
1273 #include <zorba/config.h>
1274
1275 #include "default_tokenizer.h"
1276-#ifdef ZORBA_NO_UNICODE
1277+#ifdef ZORBA_NO_ICU
1278 # include "latin_tokenizer.h"
1279 #else
1280 # include "icu_tokenizer.h"
1281-#endif /* ZORBA_NO_UNICODE */
1282+#endif /* ZORBA_NO_ICU */
1283
1284 namespace zorba {
1285
1286 ///////////////////////////////////////////////////////////////////////////////
1287
1288 TokenizerProvider const& default_tokenizer_provider() {
1289-#ifdef ZORBA_NO_UNICODE
1290+#ifdef ZORBA_NO_ICU
1291 static LatinTokenizerProvider const instance;
1292 #else
1293 static ICU_TokenizerProvider const instance;
1294-#endif /* ZORBA_NO_UNICODE */
1295+#endif /* ZORBA_NO_ICU */
1296 return instance;
1297 };
1298
1299
1300=== modified file 'src/runtime/full_text/latin_tokenizer.cpp'
1301--- src/runtime/full_text/latin_tokenizer.cpp 2012-03-28 05:19:57 +0000
1302+++ src/runtime/full_text/latin_tokenizer.cpp 2012-04-07 00:45:26 +0000
1303@@ -18,8 +18,9 @@
1304 #include <functional>
1305
1306 #include <zorba/diagnostic_list.h>
1307-#include <zorba/xquery_exception.h>
1308-#include <zorba/zorba.h>
1309+
1310+#include "diagnostics/dict.h"
1311+#include "diagnostics/xquery_exception.h"
1312
1313 #include "latin_tokenizer.h"
1314
1315
1316=== modified file 'src/runtime/full_text/latin_tokenizer.h'
1317--- src/runtime/full_text/latin_tokenizer.h 2012-03-28 05:19:57 +0000
1318+++ src/runtime/full_text/latin_tokenizer.h 2012-04-07 00:45:26 +0000
1319@@ -14,12 +14,12 @@
1320 * limitations under the License.
1321 */
1322
1323-#ifndef ZORBA_WESTERN_TOKENIZER_H
1324-#define ZORBA_WESTERN_TOKENIZER_H
1325+#ifndef ZORBA_LATIN_TOKENIZER_H
1326+#define ZORBA_LATIN_TOKENIZER_H
1327
1328 #include <zorba/config.h>
1329
1330-#ifdef ZORBA_NO_FULL_TEXT
1331+#ifdef ZORBA_NO_ICU
1332
1333 #include <zorba/tokenizer.h>
1334 #include "zorbatypes/zstring.h"
1335@@ -38,8 +38,8 @@
1336
1337 // inherited
1338 void destroy() const;
1339- void tokenize( char const*, size_type, iso639_1::type, bool, Callback&,
1340- void* );
1341+ void tokenize( char const*, size_type, locale::iso639_1::type, bool,
1342+ Callback&, void* );
1343
1344 private:
1345 typedef zstring string_type;
1346@@ -64,13 +64,14 @@
1347 class LatinTokenizerProvider : public TokenizerProvider {
1348 public:
1349 // inherited
1350- Tokenizer::ptr getTokenizer( iso639_1::type, Tokenizer::Numbers& ) const;
1351+ Tokenizer::ptr getTokenizer( locale::iso639_1::type,
1352+ Tokenizer::Numbers& ) const;
1353 };
1354
1355 ///////////////////////////////////////////////////////////////////////////////
1356
1357 } // namespace zorba
1358
1359-#endif /* ZORBA_NO_FULL_TEXT */
1360-#endif /* ZORBA_WESTERN_TOKENIZER_H */
1361+#endif /* ZORBA_NO_ICU */
1362+#endif /* ZORBA_LATIN_TOKENIZER_H */
1363 /* vim:set et sw=2 ts=2: */
1364
1365=== modified file 'src/runtime/numerics/format_integer_impl.cpp'
1366--- src/runtime/numerics/format_integer_impl.cpp 2012-03-28 05:19:57 +0000
1367+++ src/runtime/numerics/format_integer_impl.cpp 2012-04-07 00:45:26 +0000
1368@@ -881,7 +881,7 @@
1369 utf8_result += (*valueit);
1370 }
1371 else
1372- utf8_result += (0x2080 + *valueit - '0');
1373+ utf8_result += (unicode::code_point)(0x2080 + *valueit - '0');
1374 }
1375 }
1376 else if((c0 == 0x2460) || //CIRCLED DIGIT ONE (1-20)
1377
1378=== modified file 'src/runtime/numerics/numerics_impl.cpp'
1379--- src/runtime/numerics/numerics_impl.cpp 2012-03-28 05:19:57 +0000
1380+++ src/runtime/numerics/numerics_impl.cpp 2012-04-07 00:45:26 +0000
1381@@ -462,7 +462,7 @@
1382 minus( "-" )
1383 {
1384 utf8_string<zstring> u_per_mille( per_mille );
1385- u_per_mille = 0x2030;
1386+ u_per_mille = (unicode::code_point)0x2030;
1387 }
1388
1389 void readFormat(const DecimalFormat_t& df_t)
1390
1391=== modified file 'src/runtime/strings/strings_impl.cpp'
1392--- src/runtime/strings/strings_impl.cpp 2012-03-28 05:19:57 +0000
1393+++ src/runtime/strings/strings_impl.cpp 2012-04-07 00:45:26 +0000
1394@@ -810,7 +810,9 @@
1395 zstring normForm;
1396 zstring resStr;
1397 unicode::normalization::type normType;
1398+#ifndef ZORBA_NO_ICU
1399 bool success;
1400+#endif /* ZORBA_NO_ICU */
1401
1402 PlanIteratorState* state;
1403 DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
1404@@ -860,10 +862,10 @@
1405 }
1406
1407 item0->getStringValue2(resStr);
1408-#ifndef ZORBA_NO_UNICODE
1409+#ifndef ZORBA_NO_ICU
1410 success = utf8::normalize(resStr, normType, &resStr);
1411 ZORBA_ASSERT(success);
1412-#endif//#ifndef ZORBA_NO_UNICODE
1413+#endif//#ifndef ZORBA_NO_ICU
1414 STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state );
1415 }
1416 else
1417@@ -992,7 +994,7 @@
1418 trans_map[ *map_i ] = *trans_i;
1419
1420 for ( ; map_i != map_end; ++map_i )
1421- trans_map[ *map_i ] = ~0;
1422+ trans_map[ *map_i ] = static_cast<unicode::code_point>( ~0 );
1423 }
1424
1425 utf8_string<zstring> u_result_string( result_string );
1426@@ -1007,7 +1009,7 @@
1427 cp_map_type::const_iterator const found_i = trans_map.find( cp );
1428 if ( found_i != trans_map.end() ) {
1429 cp = found_i->second;
1430- if ( cp == ~0 )
1431+ if ( cp == static_cast<unicode::code_point>( ~0 ) )
1432 continue;
1433 }
1434 u_result_string += cp;
1435@@ -1795,16 +1797,33 @@
1436 int &utf8start,
1437 unsigned int &bytestart,
1438 int utf8end,
1439+ unsigned int byteend,
1440 zstring &out)
1441 {
1442+#ifndef ZORBA_NO_ICU
1443 utf8::size_type clen;
1444- while(utf8start < utf8end)
1445- {
1446- clen = utf8::char_length(*sin);
1447- out.append(sin, clen);
1448- utf8start++;
1449- bytestart += clen;
1450- sin += clen;
1451+ if(utf8end)
1452+ {
1453+ while(utf8start < utf8end)
1454+ {
1455+ clen = utf8::char_length(*sin);
1456+ if(clen == 0)
1457+ clen = 1;
1458+ out.append(sin, clen);
1459+ utf8start++;
1460+ bytestart += clen;
1461+ sin += clen;
1462+ }
1463+ }
1464+ else
1465+#endif
1466+ {
1467+ if(!utf8end)
1468+ utf8end = byteend;
1469+ out.append(sin, utf8end-bytestart);
1470+ sin += utf8end-bytestart;
1471+ utf8start = utf8end;
1472+ bytestart = utf8end;
1473 }
1474 }
1475
1476@@ -1812,6 +1831,7 @@
1477 int &match_end1,
1478 unsigned int &match_end1_bytes,
1479 int match_start2,
1480+ unsigned int match_start2_bytes,
1481 const char *&strin)
1482 {
1483 store::Item_t non_match_elem;
1484@@ -1833,7 +1853,7 @@
1485 // utf8_it++;
1486 // match_end1++;
1487 //}
1488- copyUtf8Chars(strin, match_end1, match_end1_bytes, match_start2, non_match_str);
1489+ copyUtf8Chars(strin, match_end1, match_end1_bytes, match_start2, match_start2_bytes, non_match_str);
1490 store::Item_t non_match_text_item;
1491 GENV_ITEMFACTORY->createTextNode(non_match_text_item, non_match_elem, non_match_str);
1492 }
1493@@ -1864,19 +1884,31 @@
1494 i--;
1495 break;
1496 }
1497+#ifndef ZORBA_NO_ICU
1498 match_startg = rx.get_match_start(i+1);
1499 if((match_startg < 0) && (gparent < 0))
1500 continue;
1501+#else
1502+ int temp_endg;
1503+ match_startg = -1;
1504+ temp_endg = -1;
1505+ if(!rx.get_match_start_end_bytes(i+1, &match_startg, &temp_endg) && (gparent < 0))
1506+ continue;
1507+#endif
1508 if(match_endgood < match_startg)
1509 {
1510 //add non-group match text
1511 zstring non_group_str;
1512
1513- copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_startg, non_group_str);
1514+ copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_startg, 0, non_group_str);
1515 store::Item_t non_group_text_item;
1516 GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent.getp(), non_group_str);
1517 }
1518+#ifndef ZORBA_NO_ICU
1519 match_endg = rx.get_match_end(i+1);
1520+#else
1521+ match_endg = temp_endg;
1522+#endif
1523 //add group match text
1524 GENV_ITEMFACTORY->createQName(group_element_name,
1525 static_context::W3C_FN_NS, "fn", "group");
1526@@ -1907,7 +1939,7 @@
1527 }
1528 zstring group_str;
1529
1530- copyUtf8Chars(sin, match_startg, match_end1_bytes, match_endg, group_str);
1531+ copyUtf8Chars(sin, match_startg, match_end1_bytes, match_endg, 0, group_str);
1532 store::Item_t group_text_item;
1533 GENV_ITEMFACTORY->createTextNode(group_text_item, group_elem.getp(), group_str);
1534 }
1535@@ -1916,7 +1948,7 @@
1536 {
1537 zstring non_group_str;
1538
1539- copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_end2, non_group_str);
1540+ copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_end2, 0, non_group_str);
1541 store::Item_t non_group_text_item;
1542 GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent, non_group_str);
1543 }
1544@@ -2144,8 +2176,14 @@
1545 reachedEnd = false;
1546 while(rx.find_next_match(&reachedEnd))
1547 {
1548- int match_start2 = rx.get_match_start();
1549- int match_end2 = rx.get_match_end();
1550+ int match_start2;
1551+ int match_end2;
1552+#ifndef ZORBA_NO_ICU
1553+ match_start2 = rx.get_match_start();
1554+ match_end2 = rx.get_match_end();
1555+#else
1556+ rx.get_match_start_end_bytes(0, &match_start2, &match_end2);
1557+#endif
1558 ZORBA_ASSERT(match_start2 >= 0);
1559
1560 if(is_input_stream && reachedEnd && !instream->eof())
1561@@ -2157,7 +2195,7 @@
1562 //construct the fn:non-match
1563 if(match_start2 > match_end1)
1564 {
1565- addNonMatchElement(result, match_end1, match_end1_bytes, match_start2, instr);
1566+ addNonMatchElement(result, match_end1, match_end1_bytes, match_start2, 0, instr);
1567 }
1568
1569 //construct the fn:match
1570@@ -2165,7 +2203,7 @@
1571 match_end1 = match_end2;
1572 }
1573
1574- if(is_input_stream && reachedEnd && !instream->eof())
1575+ if(is_input_stream && !instream->eof())
1576 {
1577 //load some more data, maybe the match will be different
1578 if(match_end1_bytes)
1579@@ -2213,7 +2251,7 @@
1580 else
1581 {
1582 if(match_end1_bytes < streambuf_read)
1583- addNonMatchElement(result, match_end1, match_end1_bytes, streambuf_read, instr);
1584+ addNonMatchElement(result, match_end1, match_end1_bytes, 0, streambuf_read, instr);
1585 if(is_input_stream && instream->eof())
1586 reachedEnd = true;
1587 }
1588
1589=== modified file 'src/store/api/store.h'
1590--- src/store/api/store.h 2012-03-28 05:19:57 +0000
1591+++ src/store/api/store.h 2012-04-07 00:45:26 +0000
1592@@ -16,7 +16,7 @@
1593 #ifndef ZORBA_STORE_STORE_H
1594 #define ZORBA_STORE_STORE_H
1595
1596-#include <zorba/config.h>
1597+#include "zorba/config.h"
1598 #include "zorbatypes/schema_types.h"
1599
1600 #include "store/api/shared_types.h"
1601
1602=== modified file 'src/store/naive/simple_store.h'
1603--- src/store/naive/simple_store.h 2012-03-28 23:58:23 +0000
1604+++ src/store/naive/simple_store.h 2012-04-07 00:45:26 +0000
1605@@ -16,7 +16,11 @@
1606 #ifndef ZORBA_SIMPLE_STORE
1607 #define ZORBA_SIMPLE_STORE
1608
1609-#include "store.h"
1610+#include "store/naive/store.h"
1611+
1612+#include "store/naive/node_factory.h"
1613+#include "store/naive/pul_primitive_factory.h"
1614+#include "store/naive/tree_id_generator.h"
1615
1616 namespace zorba {
1617 namespace simplestore {
1618@@ -72,7 +76,7 @@
1619
1620 NodeFactory* createNodeFactory() const;
1621
1622- void destroyNodeFactory(NodeFactory*) const;
1623+ void destroyNodeFactory(zorba::simplestore::NodeFactory*) const;
1624
1625 store::ItemFactory* createItemFactory() const;
1626
1627@@ -84,7 +88,7 @@
1628
1629 PULPrimitiveFactory* createPULFactory() const;
1630
1631- void destroyPULFactory(PULPrimitiveFactory*) const;
1632+ void destroyPULFactory(zorba::simplestore::PULPrimitiveFactory*) const;
1633
1634 CollectionSet* createCollectionSet() const;
1635
1636
1637=== modified file 'src/store/naive/store.cpp'
1638--- src/store/naive/store.cpp 2012-03-28 22:09:36 +0000
1639+++ src/store/naive/store.cpp 2012-04-07 00:45:26 +0000
1640@@ -33,7 +33,7 @@
1641
1642 #include "properties.h"
1643 #include "string_pool.h"
1644-#include "store.h"
1645+#include "simple_store.h"
1646 #include "simple_temp_seq.h"
1647 #include "simple_lazy_temp_seq.h"
1648 #include "collection.h"
1649
1650=== modified file 'src/store/naive/store.h'
1651--- src/store/naive/store.h 2012-03-28 22:09:36 +0000
1652+++ src/store/naive/store.h 2012-04-07 00:45:26 +0000
1653@@ -16,10 +16,18 @@
1654 #ifndef ZORBA_SIMPLESTORE_STORE_H
1655 #define ZORBA_SIMPLESTORE_STORE_H
1656
1657+#include "store/api/store.h"
1658+
1659 #include "shared_types.h"
1660 #include "store_defs.h"
1661 #include "hashmap_nodep.h"
1662 #include "tree_id.h"
1663+#include "store/util/hashmap_stringbuf.h"
1664+#include "zorbautils/mutex.h"
1665+#include "zorbautils/lock.h"
1666+#include "zorbautils/hashmap.h"
1667+#include "zorbautils/hashmap_itemp.h"
1668+#include "zorbautils/hashmap_zstring_nonserializable.h"
1669
1670 #if (defined (WIN32) || defined (WINCE))
1671 #include "node_items.h"
1672@@ -28,14 +36,7 @@
1673 #include "store/api/ic.h"
1674 #endif
1675
1676-#include "store/api/store.h"
1677-
1678-#include "store/util/hashmap_stringbuf.h"
1679-
1680-#include "zorbautils/mutex.h"
1681-#include "zorbautils/lock.h"
1682-#include "zorbautils/hashmap_itemp.h"
1683-#include "zorbautils/hashmap_zstring_nonserializable.h"
1684+using namespace zorba;
1685
1686 namespace zorba
1687 {
1688@@ -63,9 +64,9 @@
1689 class TreeIdGeneratorFactory;
1690 class TreeIdGenerator;
1691
1692-typedef zorba::HashMapZString<XmlNode_t> DocumentSet;
1693-typedef ItemPointerHashMap<store::Index_t> IndexSet;
1694-typedef ItemPointerHashMap<store::IC_t> ICSet;
1695+typedef HashMapZString<XmlNode_t> DocumentSet;
1696+typedef zorba::ItemPointerHashMap<store::Index_t> IndexSet;
1697+typedef zorba::ItemPointerHashMap<store::IC_t> ICSet;
1698
1699
1700
1701
1702=== modified file 'src/system/globalenv.cpp'
1703--- src/system/globalenv.cpp 2012-03-28 05:19:57 +0000
1704+++ src/system/globalenv.cpp 2012-04-07 00:45:26 +0000
1705@@ -17,11 +17,11 @@
1706
1707 #include "common/common.h"
1708
1709-#ifndef ZORBA_NO_UNICODE
1710+#ifndef ZORBA_NO_ICU
1711 # include <unicode/uclean.h>
1712 # include <unicode/utypes.h>
1713 # include <unicode/udata.h>
1714-#endif /* ZORBA_NO_UNICODE */
1715+#endif /* ZORBA_NO_ICU */
1716
1717 #ifdef ZORBA_WITH_BIG_INTEGER
1718 # include "zorbatypes/m_apm.h"
1719@@ -208,7 +208,7 @@
1720 // from one thread only
1721 // see http://www.icu-project.org/userguide/design.html#Init_and_Termination
1722 // and http://www.icu-project.org/apiref/icu4c/uclean_8h.html
1723-#ifndef ZORBA_NO_UNICODE
1724+#ifndef ZORBA_NO_ICU
1725 # if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE)
1726 {
1727 TCHAR self_path[1024];
1728@@ -238,13 +238,13 @@
1729 udata_setCommonData(icu_appdata, &data_err);
1730 ZORBA_ASSERT(data_err == U_ZERO_ERROR);
1731
1732- // u_setDataDirectory(self_path);
1733+ // u_setDataDirectory(self_path);
1734 }
1735 # endif
1736 UErrorCode lICUInitStatus = U_ZERO_ERROR;
1737 u_init(&lICUInitStatus);
1738 ZORBA_ASSERT(lICUInitStatus == U_ZERO_ERROR);
1739-#endif//ifndef ZORBA_NO_UNICODE
1740+#endif /* ZORBA_NO_ICU */
1741 }
1742
1743
1744@@ -256,12 +256,12 @@
1745 // releases statically initialized memory and prevents
1746 // valgrind from reporting those problems at the end
1747 // see http://www.icu-project.org/apiref/icu4c/uclean_8h.html#93f27d0ddc7c196a1da864763f2d8920
1748-#ifndef ZORBA_NO_UNICODE
1749+#ifndef ZORBA_NO_ICU
1750 u_cleanup();
1751 # if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE)
1752 delete[] icu_appdata;
1753 # endif
1754-#endif//ifndef ZORBA_NO_UNICODE
1755+#endif /* ZORBA_NO_ICU */
1756 }
1757
1758
1759
1760=== modified file 'src/unit_tests/CMakeLists.txt'
1761--- src/unit_tests/CMakeLists.txt 2012-03-28 05:19:57 +0000
1762+++ src/unit_tests/CMakeLists.txt 2012-04-07 00:45:26 +0000
1763@@ -29,9 +29,9 @@
1764 tokenizer.cpp)
1765 ENDIF (NOT ZORBA_NO_FULL_TEXT)
1766
1767-IF (NOT ZORBA_NO_UNICODE)
1768+IF (NOT ZORBA_NO_ICU)
1769 LIST (APPEND UNIT_TEST_SRCS
1770 test_icu_streambuf.cpp)
1771-ENDIF (NOT ZORBA_NO_UNICODE)
1772+ENDIF (NOT ZORBA_NO_ICU)
1773
1774 # vim:set et sw=2 tw=2:
1775
1776=== modified file 'src/unit_tests/string.cpp'
1777--- src/unit_tests/string.cpp 2012-03-28 05:19:57 +0000
1778+++ src/unit_tests/string.cpp 2012-04-07 00:45:26 +0000
1779@@ -569,6 +569,7 @@
1780 ASSERT_TRUE( t == s );
1781 }
1782
1783+#ifndef ZORBA_NO_ICU
1784 template<class StringType>
1785 static void test_to_string_from_wchar_t() {
1786 wchar_t const w[] = L"hello";
1787@@ -578,6 +579,7 @@
1788 for ( string::size_type i = 0; i < s.length(); ++i )
1789 ASSERT_TRUE( s[i] == w[i] );
1790 }
1791+#endif /* ZORBA_NO_ICU */
1792
1793 template<class StringType>
1794 static void test_to_upper() {
1795@@ -605,6 +607,7 @@
1796 }
1797 }
1798
1799+#ifndef ZORBA_NO_ICU
1800 static void test_to_wchar_t() {
1801 string const s = "hello";
1802 wchar_t *w;
1803@@ -616,6 +619,7 @@
1804 ASSERT_TRUE( w[i] == s[i] );
1805 delete[] w;
1806 }
1807+#endif /* ZORBA_NO_ICU */
1808
1809 static void test_trim_start() {
1810 char const *s;
1811@@ -873,16 +877,20 @@
1812 test_to_string_from_utf8<zstring>();
1813 test_to_string_from_utf8<zstring_p>();
1814
1815+#ifndef ZORBA_NO_ICU
1816 test_to_string_from_wchar_t<string>();
1817 test_to_string_from_wchar_t<zstring>();
1818 test_to_string_from_wchar_t<zstring_p>();
1819+#endif /* ZORBA_NO_ICU */
1820
1821 test_to_upper<string>();
1822 test_to_upper<zstring>();
1823 test_to_upper<zstring_p>();
1824 test_to_upper<String>();
1825
1826+#ifndef ZORBA_NO_ICU
1827 test_to_wchar_t();
1828+#endif /* ZORBA_NO_ICU */
1829
1830 test_trim_start();
1831 test_trim_end();
1832
1833=== modified file 'src/unit_tests/unit_test_list.h'
1834--- src/unit_tests/unit_test_list.h 2012-03-28 05:19:57 +0000
1835+++ src/unit_tests/unit_test_list.h 2012-04-07 00:45:26 +0000
1836@@ -36,9 +36,9 @@
1837 /**
1838 * ADD NEW UNIT TESTS HERE
1839 */
1840-#ifndef ZORBA_NO_UNICODE
1841+#ifndef ZORBA_NO_ICU
1842 int test_icu_streambuf( int, char*[] );
1843-#endif /* ZORBA_NO_UNICODE */
1844+#endif /* ZORBA_NO_ICU */
1845 int json_parser( int, char*[] );
1846
1847 void initializeTestList();
1848
1849=== modified file 'src/unit_tests/unit_tests.cpp'
1850--- src/unit_tests/unit_tests.cpp 2012-03-28 05:19:57 +0000
1851+++ src/unit_tests/unit_tests.cpp 2012-04-07 00:45:26 +0000
1852@@ -39,9 +39,9 @@
1853 void initializeTestList() {
1854 libunittests["string"] = test_string;
1855 libunittests["uri"] = runUriTest;
1856-#ifndef ZORBA_NO_UNICODE
1857+#ifndef ZORBA_NO_ICU
1858 libunittests["icu_streambuf"] = test_icu_streambuf;
1859-#endif /* ZORBA_NO_UNICODE */
1860+#endif /* ZORBA_NO_ICU */
1861 libunittests["json_parser"] = json_parser;
1862 libunittests["unique_ptr"] = test_unique_ptr;
1863 #ifndef ZORBA_NO_FULL_TEXT
1864
1865=== modified file 'src/util/CMakeLists.txt'
1866--- src/util/CMakeLists.txt 2012-03-28 05:19:57 +0000
1867+++ src/util/CMakeLists.txt 2012-04-07 00:45:26 +0000
1868@@ -40,14 +40,14 @@
1869 LIST(APPEND UTIL_SRCS mmap_file.cpp)
1870 ENDIF(ZORBA_WITH_FILE_ACCESS)
1871
1872-IF(ZORBA_NO_UNICODE)
1873+IF(ZORBA_NO_ICU)
1874 LIST(APPEND UTIL_SRCS
1875- regex_ascii.cpp
1876+ regex_xquery.cpp
1877 passthru_streambuf.cpp)
1878-ELSE(ZORBA_NO_UNICODE)
1879+ELSE(ZORBA_NO_ICU)
1880 LIST(APPEND UTIL_SRCS
1881 icu_streambuf.cpp)
1882-ENDIF(ZORBA_NO_UNICODE)
1883+ENDIF(ZORBA_NO_ICU)
1884
1885 HEADER_GROUP_SUBFOLDER(UTIL_SRCS fx)
1886 HEADER_GROUP_SUBFOLDER(UTIL_SRCS win32)
1887
1888=== modified file 'src/util/icu_streambuf.h'
1889--- src/util/icu_streambuf.h 2012-02-04 01:26:18 +0000
1890+++ src/util/icu_streambuf.h 2012-04-07 00:45:26 +0000
1891@@ -17,6 +17,7 @@
1892 #ifndef ZORBA_ICU_STREAMBUF_H
1893 #define ZORBA_ICU_STREAMBUF_H
1894
1895+#include <unicode/ucnv.h>
1896 #include <zorba/transcode_stream.h>
1897
1898 #include "util/utf8_util.h"
1899
1900=== modified file 'src/util/passthru_streambuf.cpp'
1901--- src/util/passthru_streambuf.cpp 2012-02-04 01:26:18 +0000
1902+++ src/util/passthru_streambuf.cpp 2012-04-07 00:45:26 +0000
1903@@ -14,8 +14,8 @@
1904 * limitations under the License.
1905 */
1906
1907+#include "stdafx.h"
1908 #include "passthru_streambuf.h"
1909-
1910 using namespace std;
1911
1912 namespace zorba {
1913@@ -47,7 +47,7 @@
1914 }
1915
1916 bool passthru_streambuf::is_supported( char const *cc_charset ) {
1917- return !is_necessary( charset );
1918+ return !is_necessary( cc_charset );
1919 }
1920
1921 passthru_streambuf::pos_type
1922
1923=== modified file 'src/util/passthru_streambuf.h'
1924--- src/util/passthru_streambuf.h 2012-02-02 18:37:24 +0000
1925+++ src/util/passthru_streambuf.h 2012-04-07 00:45:26 +0000
1926@@ -17,8 +17,9 @@
1927 #ifndef ZORBA_PASSTHRU_STREAMBUF_H
1928 #define ZORBA_PASSTHRU_STREAMBUF_H
1929
1930-#include <zorba/transcode_streambuf.h>
1931-
1932+#include <zorba/transcode_stream.h>
1933+#include "zorbatypes/zstring.h"
1934+#include "util/ascii_util.h"
1935 namespace zorba {
1936
1937 ///////////////////////////////////////////////////////////////////////////////
1938@@ -48,6 +49,13 @@
1939 * @return \c true only if the character encoding is supported.
1940 */
1941 static bool is_supported( char const *charset );
1942+ static bool is_necessary( char const *cc_charset );
1943+
1944+ typedef std::streambuf::char_type char_type;
1945+ typedef std::streambuf::int_type int_type;
1946+ typedef std::streambuf::off_type off_type;
1947+ typedef std::streambuf::pos_type pos_type;
1948+ typedef std::streambuf::traits_type traits_type;
1949
1950 protected:
1951 void imbue( std::locale const& );
1952
1953=== modified file 'src/util/regex.cpp'
1954--- src/util/regex.cpp 2012-03-28 05:19:57 +0000
1955+++ src/util/regex.cpp 2012-04-07 00:45:26 +0000
1956@@ -15,8 +15,6 @@
1957 */
1958 #include "stdafx.h"
1959
1960-#include "regex.h"
1961-
1962 #include <cstring>
1963 #include <vector>
1964
1965@@ -28,13 +26,13 @@
1966
1967 #include "ascii_util.h"
1968 #include "cxx_util.h"
1969+#include "regex.h"
1970 #include "stl_util.h"
1971
1972 #define INVALID_RE_EXCEPTION(...) \
1973 XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS( __VA_ARGS__ ) )
1974
1975-
1976-#ifndef ZORBA_NO_UNICODE
1977+#ifndef ZORBA_NO_ICU
1978 # include <unicode/uversion.h>
1979 U_NAMESPACE_USE
1980
1981@@ -103,6 +101,7 @@
1982
1983 bool got_backslash = false;
1984 bool in_char_class = false; // within [...]
1985+ bool is_first_char = true; // to check ^ placement
1986
1987 bool in_backref = false; // '\'[1-9][0-9]*
1988 unsigned backref_no = 0; // 1-based
1989@@ -231,6 +230,8 @@
1990 ++open_cap_subs;
1991 cap_sub.push_back( true );
1992 cur_cap_sub = cap_sub.size();
1993+ is_first_char = true;
1994+ goto append;
1995 }
1996 break;
1997 case ')':
1998@@ -245,8 +246,10 @@
1999 case '[':
2000 if ( q_flag )
2001 *icu_re += '\\';
2002- else
2003+ else {
2004 in_char_class = true;
2005+ goto append;
2006+ }
2007 break;
2008 case ']':
2009 if ( q_flag )
2010@@ -254,6 +257,19 @@
2011 else
2012 in_char_class = false;
2013 break;
2014+ case '^':
2015+ if ( q_flag )
2016+ *icu_re += '\\';
2017+ else if ( !is_first_char && !in_char_class )
2018+ throw INVALID_RE_EXCEPTION( xq_re, ZED( UnescapedChar_3 ), *xq_c );
2019+ break;
2020+ case '|':
2021+ if ( q_flag )
2022+ *icu_re += '\\';
2023+ else {
2024+ is_first_char = true;
2025+ goto append;
2026+ }
2027 default:
2028 if ( x_flag && ascii::is_space( *xq_c ) ) {
2029 if ( !in_char_class )
2030@@ -265,37 +281,42 @@
2031 //
2032 *icu_re += '\\';
2033 }
2034- }
2035- }
2036+ } // switch
2037+ } // else
2038+ is_first_char = false;
2039+append:
2040 *icu_re += *xq_c;
2041 } // FOR_EACH
2042
2043- if ( i_flag ) {
2044- //
2045- // XQuery 3.0 F&O 5.6.1.1: All other constructs are unaffected by the "i"
2046- // flag. For example, "\p{Lu}" continues to match upper-case letters only.
2047- //
2048- // However, ICU lower-cases everything for the 'i' flag; hence we have to
2049- // turn off the 'i' flag for just the \p{Lu}.
2050- //
2051- // Note that the "6" and "12" below are correct since "\\" represents a
2052- // single '\'.
2053- //
2054- ascii::replace_all( *icu_re, "\\p{Lu}", 6, "(?-i:\\p{Lu})", 12 );
2055- }
2056+ if ( !q_flag ) {
2057+ if ( i_flag ) {
2058+ //
2059+ // XQuery 3.0 F&O 5.6.1.1: All other constructs are unaffected by the "i"
2060+ // flag. For example, "\p{Lu}" continues to match upper-case letters
2061+ // only.
2062+ //
2063+ // However, ICU lower-cases everything for the 'i' flag; hence we have to
2064+ // turn off the 'i' flag for just the \p{Lu}.
2065+ //
2066+ // Note that the "6" and "12" below are correct since "\\" represents a
2067+ // single '\'.
2068+ //
2069+ ascii::replace_all( *icu_re, "\\p{Lu}", 6, "(?-i:\\p{Lu})", 12 );
2070+ }
2071
2072- //
2073- // XML Schema Part 2 F.1.1: [Unicode Database] groups code points into a
2074- // number of blocks such as Basic Latin (i.e., ASCII), Latin-1 Supplement,
2075- // Hangul Jamo, CJK Compatibility, etc. The set containing all characters
2076- // that have block name X (with all white space stripped out), can be
2077- // identified with a block escape \p{IsX}.
2078- //
2079- // However, ICU uses \p{InX} rather than \p{IsX}.
2080- //
2081- // Note that the "5" below is correct since "\\" represents a single '\'.
2082- //
2083- ascii::replace_all( *icu_re, "\\p{Is", 5, "\\p{In", 5 );
2084+ //
2085+ // XML Schema Part 2 F.1.1: [Unicode Database] groups code points into a
2086+ // number of blocks such as Basic Latin (i.e., ASCII), Latin-1 Supplement,
2087+ // Hangul Jamo, CJK Compatibility, etc. The set containing all characters
2088+ // that have block name X (with all white space stripped out), can be
2089+ // identified with a block escape \p{IsX}.
2090+ //
2091+ // However, ICU uses \p{InX} rather than \p{IsX}.
2092+ //
2093+ // Note that the "5" below is correct since "\\" represents a single '\'.
2094+ //
2095+ ascii::replace_all( *icu_re, "\\p{Is", 5, "\\p{In", 5 );
2096+ } // q_flag
2097 }
2098
2099 ///////////////////////////////////////////////////////////////////////////////
2100@@ -442,11 +463,11 @@
2101 }
2102
2103 } // namespace unicode
2104-
2105-}//namespace zorba
2106-
2107-
2108-#else /* ZORBA_NO_UNICODE */
2109+} // namespace zorba
2110+
2111+///////////////////////////////////////////////////////////////////////////////
2112+
2113+#else /* ZORBA_NO_ICU */
2114
2115 #include "zorbatypes/zstring.h"
2116
2117@@ -470,7 +491,7 @@
2118 case 'i': flags |= REGEX_ASCII_CASE_INSENSITIVE; break;
2119 case 's': flags |= REGEX_ASCII_DOTALL; break;
2120 case 'm': flags |= REGEX_ASCII_MULTILINE; break;
2121- case 'x': flags |= REGEX_ASCII_COMMENTS; break;
2122+ case 'x': flags |= REGEX_ASCII_NO_WHITESPACE; break;
2123 case 'q': flags |= REGEX_ASCII_LITERAL; break;
2124 default:
2125 throw XQUERY_EXCEPTION( err::FORX0001, ERROR_PARAMS( *p ) );
2126@@ -483,6 +504,7 @@
2127 void regex::compile( char const *pattern, char const *flags)
2128 {
2129 parsed_flags = parse_regex_flags(flags);
2130+ regex_xquery::CRegexXQuery_parser regex_parser;
2131 regex_matcher = regex_parser.parse(pattern, parsed_flags);
2132 if(!regex_matcher)
2133 throw INVALID_RE_EXCEPTION(pattern);
2134@@ -517,6 +539,8 @@
2135 bool regex::next_token( char const *s, size_type *pos, zstring *token,
2136 bool *matched)
2137 {
2138+ if(!s[*pos])
2139+ return false;
2140 bool retval;
2141 int match_pos;
2142 int matched_len;
2143@@ -528,14 +552,8 @@
2144 token->assign(s+*pos, match_pos);
2145 *pos += match_pos + matched_len;
2146 if(matched)
2147- if(match_pos)
2148- *matched = true;
2149- else
2150- *matched = false;
2151- if(match_pos)
2152- return true;
2153- else
2154- return false;
2155+ *matched = true;
2156+ return true;
2157 }
2158 else
2159 {
2160@@ -544,7 +562,7 @@
2161 *pos += strlen(s+*pos);
2162 if(matched)
2163 *matched = false;
2164- return s[*pos] != 0;
2165+ return true;
2166 }
2167 }
2168
2169@@ -554,13 +572,9 @@
2170 int matched_pos;
2171 int matched_len;
2172
2173- bool prev_align = regex_matcher->set_align_begin(true);
2174- retval = regex_matcher->match_from(s, parsed_flags, &matched_pos, &matched_len);
2175- regex_matcher->set_align_begin(prev_align);
2176+ retval = regex_matcher->match_anywhere(s, parsed_flags|REGEX_ASCII_WHOLE_MATCH, &matched_pos, &matched_len);
2177 if(!retval)
2178 return false;
2179- if(matched_len != strlen(s))
2180- return false;
2181 return true;
2182 }
2183
2184@@ -587,14 +601,19 @@
2185 //look for dollars
2186 if(*temprepl == '\\')
2187 {
2188- temprepl++;
2189- if(!*temprepl || (*temprepl != '\\') || (*temprepl != '$'))//Invalid replacement string.
2190- throw XQUERY_EXCEPTION( err::FORX0004, ERROR_PARAMS( replacement ) );
2191+ if(!(parsed_flags & REGEX_ASCII_LITERAL))
2192+ {
2193+ temprepl++;
2194+ if(!*temprepl)
2195+ temprepl--;
2196+ else if((*temprepl != '\\') && (*temprepl != '$'))//Invalid replacement string.
2197+ throw XQUERY_EXCEPTION( err::FORX0004, ERROR_PARAMS( replacement ) );
2198+ }
2199 result->append(1, *temprepl);
2200 temprepl++;
2201 continue;
2202 }
2203- if(*temprepl == '$')
2204+ if((*temprepl == '$') && !(parsed_flags & REGEX_ASCII_LITERAL))
2205 {
2206 temprepl++;
2207 index = 0;
2208@@ -648,7 +667,7 @@
2209 if(retval)
2210 {
2211 m_match_pos += m_pos;
2212- m_pos = m_match_pos = m_matched_len;
2213+ m_pos = m_match_pos + m_matched_len;
2214 }
2215 else
2216 {
2217@@ -666,35 +685,30 @@
2218 return (int)regex_matcher->get_indexed_regex_count();
2219 }
2220
2221-int regex::get_match_start( int groupId )
2222-{
2223- if(groupId == 0)
2224- return m_match_pos;
2225- if(groupId > (int)regex_matcher->get_indexed_regex_count())
2226- return -1;
2227- const char *submatched_source;
2228- int submatched_len;
2229- if(!regex_matcher->get_indexed_match(groupId, &submatched_source, &submatched_len))
2230- return -1;
2231- return submatched_source - s_in_.c_str();
2232-}
2233-
2234-int regex::get_match_end( int groupId )
2235-{
2236- if(groupId == 0)
2237- return m_match_pos + m_matched_len;
2238- if(groupId > (int)regex_matcher->get_indexed_regex_count())
2239- return -1;
2240- const char *submatched_source;
2241- int submatched_len;
2242- if(!regex_matcher->get_indexed_match(groupId, &submatched_source, &submatched_len))
2243- return -1;
2244- return submatched_source - s_in_.c_str() + submatched_len;
2245+bool regex::get_match_start_end_bytes( int groupId, int *start, int *end )
2246+{
2247+ *start = -1;
2248+ *end = -1;
2249+ if(groupId == 0)
2250+ {
2251+ *start = m_match_pos;
2252+ *end = m_match_pos + m_matched_len;
2253+ return true;
2254+ }
2255+ if(groupId > (int)regex_matcher->get_indexed_regex_count())
2256+ return false;
2257+ const char *submatched_source;
2258+ int submatched_len;
2259+ if(!regex_matcher->get_indexed_match(groupId, &submatched_source, &submatched_len))
2260+ return false;
2261+ *start = submatched_source - s_in_.c_str();
2262+ *end = *start + submatched_len;
2263+ return true;
2264 }
2265
2266 } // namespace unicode
2267 } // namespace zorba
2268-#endif /* ZORBA_NO_UNICODE */
2269+#endif /* ZORBA_NO_ICU */
2270
2271 ///////////////////////////////////////////////////////////////////////////////
2272
2273
2274=== modified file 'src/util/regex.h'
2275--- src/util/regex.h 2012-03-28 05:19:57 +0000
2276+++ src/util/regex.h 2012-04-07 00:45:26 +0000
2277@@ -17,15 +17,13 @@
2278 #ifndef ZORBA_REGEX_H
2279 #define ZORBA_REGEX_H
2280
2281-#ifndef ZORBA_NO_UNICODE
2282-#include <unicode/regex.h>
2283-#endif
2284-
2285 #include "cxx_util.h"
2286 #include "unicode_util.h"
2287 #include "zorbatypes/zstring.h"
2288
2289-#ifndef ZORBA_NO_UNICODE
2290+#ifndef ZORBA_NO_ICU
2291+
2292+#include <unicode/regex.h>
2293
2294 namespace zorba {
2295
2296@@ -496,15 +494,17 @@
2297 } // namespace unicode
2298 } // namespace zorba
2299
2300-#else ///ZORBA_NO_UNICODE (ascii part:)
2301-
2302-#include "util/regex_ascii.h"
2303+///////////////////////////////////////////////////////////////////////////////
2304+
2305+#else /* ZORBA_NO_ICU */
2306+
2307+#include "util/regex_xquery.h"
2308 #include <string>
2309
2310 namespace zorba{
2311 /**
2312 * Converts an XQuery regular expression to the form used by the regular
2313- * expression library Zorba is using (here regex_ascii).
2314+ * expression library Zorba is using (here regex_xquery).
2315 *
2316 * @param xq_re The XQuery regular expression.
2317 * @param lib_re A pointer to the resuling library regular expression.
2318@@ -525,7 +525,7 @@
2319 /**
2320 * Constructs a %regex.
2321 */
2322- regex() : regex_matcher( NULL ) { }
2323+ regex() : regex_matcher( nullptr ) { }
2324
2325 /**
2326 * Destroys a %regex.
2327@@ -835,31 +835,21 @@
2328
2329 /**
2330 * Get the start position of the matched group.
2331- * If groupId is zero, then the start position of the whole match is returned.
2332- * If groupId is non-zero, then the start position of that group is returned.
2333- * If that group has not been matched, -1 is returned.
2334+ * If groupId is zero, then the start and end position of the whole match is returned.
2335+ * If groupId is non-zero, then the start and end position of that group is returned.
2336+ * If that group has not been matched, false is returned.
2337 *
2338 * @param groupId the id of the group, either zero for the entire regex,
2339 * or [1 .. group_count] for that specific group
2340- * @return the start position, zero based, or -1 if that group didn't match
2341+ * @param start to return start position in bytes
2342+ * @param end to return end position in bytes
2343+ * @return true if that group exists and has been matched
2344 */
2345- int get_match_start( int groupId = 0 );
2346+ bool get_match_start_end_bytes( int groupId, int *start, int *end );
2347
2348- /**
2349- * Get the end position of the matched group.
2350- * If groupId is zero, then the end position of the whole match is returned.
2351- * If groupId is non-zero, then the end position of that group is returned.
2352- * If that group has not been matched, -1 is returned.
2353- *
2354- * @param groupId the id of the group, either zero for the entire regex,
2355- * or [1 .. group_count] for that specific group
2356- * @return the end position, zero based, or -1 if that group didn't match
2357- */
2358- int get_match_end( int groupId = 0 );
2359
2360 private:
2361- regex_ascii::CRegexAscii_parser regex_parser;
2362- regex_ascii::CRegexAscii_regex *regex_matcher;
2363+ regex_xquery::CRegexXQuery_regex *regex_matcher;
2364 uint32_t parsed_flags;
2365
2366 zstring s_in_;
2367@@ -873,15 +863,13 @@
2368 regex( regex const& );
2369 regex& operator=( regex const& );
2370 };
2371+
2372+///////////////////////////////////////////////////////////////////////////////
2373+
2374 } // namespace unicode
2375 } // namespace zorba
2376
2377-#endif /* ZORBA_NO_UNICODE */
2378-
2379-
2380-///////////////////////////////////////////////////////////////////////////////
2381-
2382-
2383+#endif /* ZORBA_NO_ICU */
2384 #endif /* ZORBA_REGEX_H */
2385 /*
2386 * Local variables:
2387
2388=== renamed file 'src/util/regex_ascii.cpp' => 'src/util/regex_xquery.cpp'
2389--- src/util/regex_ascii.cpp 2012-03-28 05:19:57 +0000
2390+++ src/util/regex_xquery.cpp 2012-04-07 00:45:26 +0000
2391@@ -1,4 +1,4 @@
2392-a/*
2393+/*
2394 * Copyright 2006-2008 The FLWOR Foundation.
2395 *
2396 * Licensed under the Apache License, Version 2.0 (the "License");
2397@@ -18,12 +18,15 @@
2398
2399 #include "diagnostics/xquery_diagnostics.h"
2400
2401-#include "regex_ascii.h"
2402+#include "regex_xquery.h"
2403 #include <string.h>
2404 #include "zorbatypes/chartype.h"
2405+#include "util/unicode_categories.h"
2406+#include "util/ascii_util.h"
2407+#include "util/utf8_string.h"
2408
2409 namespace zorba {
2410- namespace regex_ascii{
2411+ namespace regex_xquery{
2412 //ascii regular expression matching
2413
2414 /*http://www.w3.org/TR/xmlschema-2/#regexs
2415@@ -62,96 +65,138 @@
2416 + http://www.w3.org/TR/xquery-operators/#regex-syntax (not implemented)
2417 */
2418
2419+
2420+static bool compare_ascii_i(const char *str1, const char *str2)
2421+{
2422+ while(*str1 && *str2)
2423+ {
2424+ if(ascii::to_lower(*str1) != ascii::to_lower(*str2))
2425+ return false;
2426+ str1++;
2427+ str2++;
2428+ }
2429+ if(*str1 || *str2)
2430+ return false;
2431+ return true;
2432+}
2433+
2434+static bool compare_unicode_ni(const char *str1, const char *str2, int len)
2435+{
2436+ while(len > 0)
2437+ {
2438+ const char *temp_str1 = str1;
2439+ const char *temp_str2 = str2;
2440+ unicode::code_point cp1 = unicode::to_upper(utf8::next_char(temp_str1));
2441+ unicode::code_point cp2 = unicode::to_upper(utf8::next_char(temp_str2));
2442+ if(cp1 != cp2)
2443+ return false;
2444+ len -= temp_str1-str1;
2445+ str1 = temp_str1;
2446+ str2 = temp_str2;
2447+ }
2448+ return true;
2449+}
2450+static utf8::size_type myutf8len(const char *source)
2451+{
2452+ utf8::size_type len = utf8::char_length(*source);
2453+ if(!len)
2454+ return 1;
2455+ else
2456+ return len;
2457+}
2458 ////////////////////////////////////
2459 ////Regular expression parsing and building of the tree
2460 ////////////////////////////////////
2461
2462-CRegexAscii_regex* CRegexAscii_parser::parse(const char *pattern, unsigned int flags)
2463+CRegexXQuery_regex* CRegexXQuery_parser::parse(const char *pattern, unsigned int flags)
2464 {
2465 this->flags = flags;
2466- bool align_begin = false;
2467
2468- if(!(flags & REGEX_ASCII_LITERAL) && (pattern[0] == '^'))
2469- align_begin = true;
2470-
2471 int regex_len;
2472- CRegexAscii_regex* regex = parse_regexp(pattern + (align_begin?1:0), &regex_len);
2473+ CRegexXQuery_regex* regex = parse_regexp(pattern, &regex_len);
2474
2475- if(regex)
2476- regex->set_align_begin(align_begin);
2477-
2478 return regex;
2479 }
2480
2481 //until '\0' or ')'
2482-CRegexAscii_regex* CRegexAscii_parser::parse_regexp(const char *pattern,
2483+CRegexXQuery_regex* CRegexXQuery_parser::parse_regexp(const char *pattern,
2484 int *regex_len)
2485 {
2486 *regex_len = 0;
2487 int branch_len;
2488 regex_depth++;
2489- CRegexAscii_regex *regex = new CRegexAscii_regex(current_regex);
2490+ std::auto_ptr<CRegexXQuery_regex> regex(new CRegexXQuery_regex(current_regex));
2491 if(!current_regex)
2492- current_regex = regex;
2493+ current_regex = regex.get();
2494 if(regex_depth >= 2)
2495 {
2496 //mark this as group if it does not start with ?:
2497 if(pattern[0] != '?' || pattern[1] != ':')
2498- current_regex->subregex.push_back(regex);
2499+ current_regex->subregex.push_back(regex.get());
2500 else
2501 *regex_len = 2;
2502 }
2503- CRegexAscii_branch *branch;
2504+ CRegexXQuery_branch *branch;
2505+ bool must_read_another_branch = true;
2506 while(pattern[*regex_len] && (pattern[*regex_len] != ')'))
2507 {
2508 branch = parse_branch(pattern+*regex_len, &branch_len);
2509 if(!branch)
2510 {
2511 regex_depth--;
2512- delete regex;
2513 return NULL;
2514 }
2515 regex->add_branch(branch);
2516 *regex_len += branch_len;
2517+ if(pattern[*regex_len] == '|')
2518+ (*regex_len)++;
2519+ else
2520+ must_read_another_branch = false;
2521 }
2522- if((current_regex == regex) && (pattern[*regex_len] == ')'))
2523+ if((current_regex == regex.get()) && (pattern[*regex_len] == ')'))
2524 {
2525- throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_MISMATCHED_PAREN)) );
2526+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MISMATCHED_PAREN)) );
2527 }
2528 if(pattern[*regex_len])
2529 (*regex_len)++;
2530+ if(must_read_another_branch)
2531+ regex->add_branch(new CRegexXQuery_branch(current_regex));//add empty branch
2532 regex->flags = 0;//finished initialization
2533 regex_depth--;
2534- return regex;
2535+ return regex.release();
2536 }
2537
2538-CRegexAscii_branch* CRegexAscii_parser::parse_branch(const char *pattern, int *branch_len)
2539+CRegexXQuery_branch* CRegexXQuery_parser::parse_branch(const char *pattern, int *branch_len)
2540 {
2541 int piece_len;
2542
2543- CRegexAscii_branch *branch = new CRegexAscii_branch(current_regex);
2544- CRegexAscii_piece *piece;
2545+ std::auto_ptr<CRegexXQuery_branch> branch(new CRegexXQuery_branch(current_regex));
2546+ CRegexXQuery_piece *piece;
2547 *branch_len = 0;
2548 while(pattern[*branch_len] && (pattern[*branch_len] != '|') && (pattern[*branch_len] != ')'))
2549 {
2550 piece = parse_piece(pattern+*branch_len, &piece_len);
2551 if(!piece)
2552 {
2553- delete branch;
2554 return NULL;
2555 }
2556+ if(branch->piece_list.size() && dynamic_cast<CRegexXQuery_pinstart*>(piece->atom))
2557+ {
2558+ //found ^ that is not at the beginning of branch
2559+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_ATOM_CHAR), '^') );
2560+ }
2561 branch->add_piece(piece);
2562 *branch_len += piece_len;
2563 }
2564- if(pattern[*branch_len] == '|')
2565- (*branch_len)++;
2566- return branch;
2567+ //if(pattern[*branch_len] == '|')
2568+ // (*branch_len)++;
2569+ return branch.release();
2570 }
2571
2572 //piece = atom + quantifier
2573-CRegexAscii_piece* CRegexAscii_parser::parse_piece(const char *pattern, int *piece_len)
2574+CRegexXQuery_piece* CRegexXQuery_parser::parse_piece(const char *pattern, int *piece_len)
2575 {
2576- CRegexAscii_piece *piece = new CRegexAscii_piece;
2577+ std::auto_ptr<CRegexXQuery_piece> piece(new CRegexXQuery_piece);
2578 IRegexAtom *atom;
2579 *piece_len = 0;
2580
2581@@ -160,19 +205,18 @@
2582 atom = read_atom(pattern, &atom_len);
2583 if(!atom)
2584 {
2585- delete piece;
2586 return NULL;
2587 }
2588 piece->set_atom(atom);
2589 if(!(flags & REGEX_ASCII_LITERAL))
2590- read_quantifier(piece, pattern+atom_len, &quantif_len);
2591+ read_quantifier(piece.get(), pattern+atom_len, &quantif_len);
2592
2593 *piece_len += atom_len + quantif_len;
2594
2595- return piece;
2596+ return piece.release();
2597 }
2598
2599-char CRegexAscii_parser::myishex(char c)
2600+char CRegexXQuery_parser::myishex(char c)
2601 {
2602 if((c >= '0') && (c <= '9'))
2603 return c-'0'+1;
2604@@ -183,26 +227,125 @@
2605 return 0;//not a hex
2606 }
2607
2608-bool CRegexAscii_parser::myisdigit(char c)
2609-{
2610- return (c >= '0') || (c <= '9');
2611-}
2612-
2613-char CRegexAscii_parser::readChar(const char *pattern, int *char_len, bool *is_multichar)
2614+bool CRegexXQuery_parser::myisdigit(char c)
2615+{
2616+ return (c >= '0') && (c <= '9');
2617+}
2618+
2619+bool CRegexXQuery_parser::myisletterAZ(char c)
2620+{
2621+ return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
2622+}
2623+
2624+static const unicode::code_point specials_extcp[] = {0xFFF0, 0xFFFD, 0};
2625+
2626+static CRegexXQuery_parser::block_escape_t block_escape[] =
2627+{
2628+{{0x0000, 0x007F}, NULL, "BasicLatin"},
2629+{{0x0080, 0x00FF}, NULL, "Latin-1Supplement"},
2630+{{0x0100, 0x017F}, NULL, "LatinExtended-A"},
2631+{{0x0180, 0x024F}, NULL, "LatinExtended-B"},
2632+{{0x0250, 0x02AF}, NULL, "IPAExtensions"},
2633+{{0x02B0, 0x02FF}, NULL, "SpacingModifierLetters"},
2634+{{0x0300, 0x036F}, NULL, "CombiningDiacriticalMarks"},
2635+{{0x0370, 0x03FF}, NULL, "Greek"},
2636+{{0x0400, 0x04FF}, NULL, "Cyrillic"},
2637+{{0x0530, 0x058F}, NULL, "Armenian"},
2638+{{0x0590, 0x05FF}, NULL, "Hebrew"},
2639+{{0x0600, 0x06FF}, NULL, "Arabic"},
2640+{{0x0700, 0x074F}, NULL, "Syriac"},
2641+{{0x0780, 0x07BF}, NULL, "Thaana"},
2642+{{0x0900, 0x097F}, NULL, "Devanagari"},
2643+{{0x0980, 0x09FF}, NULL, "Bengali"},
2644+{{0x0A00, 0x0A7F}, NULL, "Gurmukhi"},
2645+{{0x0A80, 0x0AFF}, NULL, "Gujarati"},
2646+{{0x0B00, 0x0B7F}, NULL, "Oriya"},
2647+{{0x0B80, 0x0BFF}, NULL, "Tamil"},
2648+{{0x0C00, 0x0C7F}, NULL, "Telugu"},
2649+{{0x0C80, 0x0CFF}, NULL, "Kannada"},
2650+{{0x0D00, 0x0D7F}, NULL, "Malayalam"},
2651+{{0x0D80, 0x0DFF}, NULL, "Sinhala"},
2652+{{0x0E00, 0x0E7F}, NULL, "Thai"},
2653+{{0x0E80, 0x0EFF}, NULL, "Lao"},
2654+{{0x0F00, 0x0FFF}, NULL, "Tibetan"},
2655+{{0x1000, 0x109F}, NULL, "Myanmar"},
2656+{{0x10A0, 0x10FF}, NULL, "Georgian"},
2657+{{0x1100, 0x11FF}, NULL, "HangulJamo"},
2658+{{0x1200, 0x137F}, NULL, "Ethiopic"},
2659+{{0x13A0, 0x13FF}, NULL, "Cherokee"},
2660+{{0x1400, 0x167F}, NULL, "UnifiedCanadianAboriginalSyllabics"},
2661+{{0x1680, 0x169F}, NULL, "Ogham"},
2662+{{0x16A0, 0x16FF}, NULL, "Runic"},
2663+{{0x1780, 0x17FF}, NULL, "Khmer"},
2664+{{0x1800, 0x18AF}, NULL, "Mongolian"},
2665+{{0x1E00, 0x1EFF}, NULL, "LatinExtendedAdditional"},
2666+{{0x1F00, 0x1FFF}, NULL, "GreekExtended"},
2667+{{0x2000, 0x206F}, NULL, "GeneralPunctuation"},
2668+{{0x2070, 0x209F}, NULL, "SuperscriptsandSubscripts"},
2669+{{0x20A0, 0x20CF}, NULL, "CurrencySymbols"},
2670+{{0x20D0, 0x20FF}, NULL, "CombiningMarksforSymbols"},
2671+{{0x2100, 0x214F}, NULL, "LetterlikeSymbols"},
2672+{{0x2150, 0x218F}, NULL, "NumberForms"},
2673+{{0x2190, 0x21FF}, NULL, "Arrows"},
2674+{{0x2200, 0x22FF}, NULL, "MathematicalOperators"},
2675+{{0x2300, 0x23FF}, NULL, "MiscellaneousTechnical"},
2676+{{0x2400, 0x243F}, NULL, "ControlPictures"},
2677+{{0x2440, 0x245F}, NULL, "OpticalCharacterRecognition"},
2678+{{0x2460, 0x24FF}, NULL, "EnclosedAlphanumerics"},
2679+{{0x2500, 0x257F}, NULL, "BoxDrawing"},
2680+{{0x2580, 0x259F}, NULL, "BlockElements"},
2681+{{0x25A0, 0x25FF}, NULL, "GeometricShapes"},
2682+{{0x2600, 0x26FF}, NULL, "MiscellaneousSymbols"},
2683+{{0x2700, 0x27BF}, NULL, "Dingbats"},
2684+{{0x2800, 0x28FF}, NULL, "BraillePatterns"},
2685+{{0x2E80, 0x2EFF}, NULL, "CJKRadicalsSupplement"},
2686+{{0x2F00, 0x2FDF}, NULL, "KangxiRadicals"},
2687+{{0x2FF0, 0x2FFF}, NULL, "IdeographicDescriptionCharacters"},
2688+{{0x3000, 0x303F}, NULL, "CJKSymbolsandPunctuation"},
2689+{{0x3040, 0x309F}, NULL, "Hiragana"},
2690+{{0x30A0, 0x30FF}, NULL, "Katakana"},
2691+{{0x3100, 0x312F}, NULL, "Bopomofo"},
2692+{{0x3130, 0x318F}, NULL, "HangulCompatibilityJamo"},
2693+{{0x3190, 0x319F}, NULL, "Kanbun"},
2694+{{0x31A0, 0x31BF}, NULL, "BopomofoExtended"},
2695+{{0x3200, 0x32FF}, NULL, "EnclosedCJKLettersandMonths"},
2696+{{0x3300, 0x33FF}, NULL, "CJKCompatibility"},
2697+{{0x3400, 0x4DB5}, NULL, "CJKUnifiedIdeographsExtensionA"},
2698+{{0x4E00, 0x9FFF}, NULL, "CJKUnifiedIdeographs"},
2699+{{0xA000, 0xA48F}, NULL, "YiSyllables"},
2700+{{0xA490, 0xA4CF}, NULL, "YiRadicals"},
2701+{{0xAC00, 0xD7A3}, NULL, "HangulSyllables"},
2702+{{0xE000, 0xF8FF}, NULL, "PrivateUse"},
2703+{{0xF900, 0xFAFF}, NULL, "CJKCompatibilityIdeographs"},
2704+{{0xFB00, 0xFB4F}, NULL, "AlphabeticPresentationForms"},
2705+{{0xFB50, 0xFDFF}, NULL, "ArabicPresentationForms-A"},
2706+{{0xFE20, 0xFE2F}, NULL, "CombiningHalfMarks"},
2707+{{0xFE30, 0xFE4F}, NULL, "CJKCompatibilityForms"},
2708+{{0xFE50, 0xFE6F}, NULL, "SmallFormVariants"},
2709+{{0xFE70, 0xFEFE}, NULL, "ArabicPresentationForms-B"},
2710+{{0xFEFF, 0xFEFF}, specials_extcp, "Specials"},
2711+{{0xFF00, 0xFFEF}, NULL, "HalfwidthandFullwidthForms"}
2712+};
2713+
2714+CRegexXQuery_charmatch* CRegexXQuery_parser::readChar(const char *pattern,
2715+ int *char_len,
2716+ enum CHARGROUP_t *multichar_type)
2717 {
2718 char c = 0;
2719 *char_len = 0;
2720- *is_multichar = false;
2721+ *multichar_type = CHARGROUP_NO_MULTICHAR;
2722 switch(pattern[*char_len])
2723 {
2724 case '\\':
2725- { (*char_len)++;
2726+ {
2727+ (*char_len)++;
2728 switch(pattern[*char_len])
2729 {
2730- case 'n': c = '\n';break;
2731- case 'r': c = '\r';break;
2732- case 't': c = '\t';break;
2733+ case 'n': c = '\n';(*char_len)++;return new CRegexXQuery_char_ascii(current_regex, c);
2734+ case 'r': c = '\r';(*char_len)++;return new CRegexXQuery_char_ascii(current_regex, c);
2735+ case 't': c = '\t';(*char_len)++;return new CRegexXQuery_char_ascii(current_regex, c);
2736 case '\\':
2737+ case '/'://+
2738 case '|':
2739 case '.':
2740 case '?':
2741@@ -216,19 +359,205 @@
2742 case '['://#x5B
2743 case ']'://#x5D
2744 case '^'://#x5E
2745+ case '$'://+
2746 c = pattern[*char_len];
2747- break;
2748+ (*char_len)++;
2749+ *multichar_type = CHARGROUP_FLAGS_ONECHAR_ASCII;
2750+ return new CRegexXQuery_char_ascii(current_regex, c);
2751 case 'p'://catEsc
2752 case 'P'://complEsc
2753+ {
2754 //ignore the prop for now
2755- c = pattern[*char_len];
2756- *is_multichar = true;
2757- if(pattern[*char_len+1] == '{')
2758- {
2759- while(pattern[*char_len] != '}')
2760+ *multichar_type = CHARGROUP_FLAGS_MULTICHAR_p;//(CHARGROUP_t)((pattern[*char_len] == 'P') ? 128 : 0);
2761+ bool is_reverse = (pattern[*char_len] == 'P');
2762+ c = 0;
2763+ if(pattern[(*char_len)+1] != '{')
2764+ {
2765+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
2766+ }
2767+ (*char_len) += 2;
2768+ switch(pattern[*char_len])
2769+ {//IsCategory
2770+ case 'L':
2771+ {
2772+ switch(pattern[(*char_len)+1])
2773+ {
2774+ case '}':
2775+ c = unicode::UNICODE_Ll + 50;break;
2776+ case 'u':
2777+ c = unicode::UNICODE_Lu; (*char_len)++;break;
2778+ case 'l':
2779+ c = unicode::UNICODE_Ll; (*char_len)++;break;
2780+ case 't':
2781+ c = unicode::UNICODE_Lt; (*char_len)++;break;
2782+ case 'm':
2783+ c = unicode::UNICODE_Lm; (*char_len)++;break;
2784+ case 'o':
2785+ c = unicode::UNICODE_Lo; (*char_len)++;break;
2786+ default:
2787+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PL_CONSTRUCT)) );
2788+ }
2789+ }break;
2790+ case 'M':
2791+ {
2792+ switch(pattern[(*char_len)+1])
2793+ {
2794+ case '}':
2795+ c = unicode::UNICODE_Mc + 50;break;
2796+ case 'n':
2797+ c = unicode::UNICODE_Mn; (*char_len)++;break;
2798+ case 'c':
2799+ c = unicode::UNICODE_Mc; (*char_len)++;break;
2800+ case 'e':
2801+ c = unicode::UNICODE_Me; (*char_len)++;break;
2802+ default:
2803+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PM_CONSTRUCT)) );
2804+ }
2805+ }break;
2806+ case 'N':
2807+ {
2808+ switch(pattern[(*char_len)+1])
2809+ {
2810+ case '}':
2811+ c = unicode::UNICODE_Nd + 50;break;
2812+ case 'd':
2813+ c = unicode::UNICODE_Nd; (*char_len)++;break;
2814+ case 'l':
2815+ c = unicode::UNICODE_Nl; (*char_len)++;break;
2816+ case 'o':
2817+ c = unicode::UNICODE_No; (*char_len)++;break;
2818+ default:
2819+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PN_CONSTRUCT)) );
2820+ }
2821+ }break;
2822+ case 'P':
2823+ {
2824+ switch(pattern[(*char_len)+1])
2825+ {
2826+ case '}':
2827+ c = unicode::UNICODE_Pc + 50;break;
2828+ case 'c':
2829+ c = unicode::UNICODE_Pc; (*char_len)++;break;
2830+ case 'd':
2831+ c = unicode::UNICODE_Pd; (*char_len)++;break;
2832+ case 's':
2833+ c = unicode::UNICODE_Ps; (*char_len)++;break;
2834+ case 'e':
2835+ c = unicode::UNICODE_Pe; (*char_len)++;break;
2836+ case 'i':
2837+ c = unicode::UNICODE_Pi; (*char_len)++;break;
2838+ case 'f':
2839+ c = unicode::UNICODE_Pf; (*char_len)++;break;
2840+ case 'o':
2841+ c = unicode::UNICODE_Po; (*char_len)++;break;
2842+ default:
2843+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PP_CONSTRUCT)) );
2844+ }
2845+ }break;
2846+ case 'Z':
2847+ {
2848+ switch(pattern[(*char_len)+1])
2849+ {
2850+ case '}':
2851+ c = unicode::UNICODE_Zl + 50;break;
2852+ case 's':
2853+ c = unicode::UNICODE_Zs; (*char_len)++;break;
2854+ case 'l':
2855+ c = unicode::UNICODE_Zl; (*char_len)++;break;
2856+ case 'p':
2857+ c = unicode::UNICODE_Zp; (*char_len)++;break;
2858+ default:
2859+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PZ_CONSTRUCT)) );
2860+ }
2861+ }break;
2862+ case 'S':
2863+ {
2864+ switch(pattern[(*char_len)+1])
2865+ {
2866+ case '}':
2867+ c = unicode::UNICODE_Sc + 50;break;
2868+ case 'm':
2869+ c = unicode::UNICODE_Sm; (*char_len)++;break;
2870+ case 'c':
2871+ c = unicode::UNICODE_Sc; (*char_len)++;break;
2872+ case 'k':
2873+ c = unicode::UNICODE_Sk; (*char_len)++;break;
2874+ case 'o':
2875+ c = unicode::UNICODE_So; (*char_len)++;break;
2876+ default:
2877+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PS_CONSTRUCT)) );
2878+ }
2879+ }break;
2880+ case 'C':
2881+ {
2882+ switch(pattern[(*char_len)+1])
2883+ {
2884+ case '}':
2885+ c = unicode::UNICODE_Cc + 50;break;
2886+ case 'c':
2887+ c = unicode::UNICODE_Cc; (*char_len)++;break;
2888+ case 'f':
2889+ c = unicode::UNICODE_Cf; (*char_len)++;break;
2890+ case 'o':
2891+ c = unicode::UNICODE_Co; (*char_len)++;break;
2892+ case 'n':
2893+ c = unicode::UNICODE_Cn; (*char_len)++;break;
2894+ default:
2895+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PC_CONSTRUCT)) );
2896+ }
2897+ }break;
2898+ }//end switch
2899+ if(c)
2900+ {
2901+ if(pattern[(*char_len) + 1] != '}')
2902+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
2903+ (*char_len)++;
2904+ (*char_len)++;
2905+ return new CRegexXQuery_multicharP(current_regex, c, is_reverse);
2906+ }
2907+ if(pattern[*char_len] == 'I')
2908+ {
2909+ if(pattern[(*char_len)+1] == 's')//IsBlock
2910+ {
2911+ *multichar_type = CHARGROUP_FLAGS_MULTICHAR_Is;
2912+ (*char_len) += 2;
2913+ zstring block_name;
2914+ char tempc = pattern[(*char_len)];
2915+ while(tempc && (tempc != '}'))
2916+ {
2917+ if(!myisletterAZ(tempc) && !myisdigit(tempc) && (tempc != '-'))
2918+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
2919+ block_name.append(1, tempc);
2920+ (*char_len)++;
2921+ tempc = pattern[(*char_len)];
2922+ }
2923+ if(!tempc)
2924+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
2925+ //search for the block name
2926+ int i;
2927+ int nr_blocks = sizeof(block_escape)/sizeof(CRegexXQuery_parser::block_escape_t);
2928+ for(i=0;i<nr_blocks;i++)
2929+ {
2930+ if(compare_ascii_i(block_name.c_str(), block_escape[i].group_name))
2931+ {
2932+ c = i;
2933+ break;
2934+ }
2935+ }
2936+ if(i==nr_blocks)
2937+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PIs_CONSTRUCT)) );
2938 (*char_len)++;
2939- }
2940- break;
2941+ return new CRegexXQuery_multicharIs(current_regex, i, is_reverse);
2942+ }
2943+ else
2944+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
2945+ }
2946+ else
2947+ {
2948+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
2949+ }
2950+ break;//unreachable
2951+ }//end case 'p'
2952 //multiCharEsc
2953 case 's':
2954 case 'S':
2955@@ -240,40 +569,104 @@
2956 case 'D':
2957 case 'w':
2958 case 'W':
2959- *is_multichar = true;
2960+ *multichar_type = CHARGROUP_FLAGS_MULTICHAR_OTHER;
2961 c = pattern[*char_len];
2962- break;
2963- }
2964- break;
2965- }
2966- case '#':///might be #xXX
2967- {
2968- if((pattern[*char_len+1] == 'x') &&
2969- myishex(pattern[*char_len+2]) && myishex(pattern[*char_len+3]))
2970- {
2971- c = (myishex(pattern[*char_len+2])-1)<<4 | (myishex(pattern[*char_len+3])-1);
2972- *char_len += 3;
2973- break;
2974- }
2975- }
2976+ (*char_len)++;
2977+ return new CRegexXQuery_multicharOther(current_regex, c);
2978+ case 'u'://unicode codepoint \uXXXX
2979+ {
2980+ unicode::code_point utf8c = 0;
2981+ (*char_len)++;
2982+ for(int i=0;i<4;i++)
2983+ {
2984+ char hex = myishex(pattern[*char_len]);
2985+ if(!hex)
2986+ {
2987+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_UNICODE_CODEPOINT_u)) );
2988+ }
2989+ utf8c <<= 4;
2990+ utf8c |= (hex-1) & 0x0f;
2991+ (*char_len)++;
2992+ }
2993+ return create_charmatch(utf8c, NULL, 0, multichar_type);
2994+ }
2995+ case 'U'://unicode codepoint \UXXXXXXXX
2996+ {
2997+ unicode::code_point utf8c = 0;
2998+ (*char_len)++;
2999+ for(int i=0;i<8;i++)
3000+ {
3001+ char hex = myishex(pattern[*char_len]);
3002+ if(!hex)
3003+ {
3004+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_UNICODE_CODEPOINT_u)) );
3005+ }
3006+ utf8c <<= 4;
3007+ utf8c |= (hex-1) & 0x0f;
3008+ (*char_len)++;
3009+ }
3010+ return create_charmatch(utf8c, NULL, 0, multichar_type);
3011+ }
3012+ default:
3013+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_ESC_CHAR)) );
3014+ }
3015+ assert(false);
3016+ break;//unreachable
3017+ }//end case '\'
3018 default:
3019- c = pattern[*char_len];
3020- break;
3021- }
3022-
3023- (*char_len)++;
3024- return c;
3025-}
3026-
3027-
3028-
3029-IRegexAtom* CRegexAscii_parser::read_atom(const char *pattern, int *atom_len)
3030+ {
3031+ const char *temp_pattern = pattern;
3032+ unicode::code_point utf8c = utf8::next_char(temp_pattern);
3033+ (*char_len) = temp_pattern - pattern;
3034+ return create_charmatch(utf8c, pattern, *char_len, multichar_type);
3035+ }
3036+ }
3037+ return NULL;
3038+}
3039+
3040+CRegexXQuery_charmatch *CRegexXQuery_parser::create_charmatch(unicode::code_point utf8c,
3041+ const char *pattern, int utf8len,
3042+ enum CHARGROUP_t *multichar_type)
3043+{
3044+ if(utf8c <= 0x7F)
3045+ {
3046+ *multichar_type = CHARGROUP_FLAGS_ONECHAR_ASCII;
3047+ if(flags & REGEX_ASCII_CASE_INSENSITIVE)
3048+ return new CRegexXQuery_char_ascii_i(current_regex, (char)utf8c);
3049+ else
3050+ return new CRegexXQuery_char_ascii(current_regex, (char)utf8c);
3051+ }
3052+ else
3053+ {
3054+ *multichar_type = CHARGROUP_FLAGS_ONECHAR_UNICODE;
3055+ if(flags & REGEX_ASCII_CASE_INSENSITIVE)
3056+ return new CRegexXQuery_char_unicode_i(current_regex, utf8c);
3057+ else
3058+ {
3059+ if(pattern)
3060+ return new CRegexXQuery_char_unicode(current_regex, pattern, utf8len);
3061+ else
3062+ return new CRegexXQuery_char_unicode_cp(current_regex, utf8c);
3063+ }
3064+ }
3065+}
3066+
3067+IRegexAtom* CRegexXQuery_parser::read_atom(const char *pattern, int *atom_len)
3068 {
3069 *atom_len = 0;
3070- char c;
3071- bool is_end_line = false;
3072- c = pattern[*atom_len];
3073- if((!(flags & REGEX_ASCII_LITERAL)) && (c == '\\'))
3074+ if(flags & REGEX_ASCII_LITERAL)
3075+ {
3076+ unicode::code_point utf8c;
3077+ //bool is_end_line = false;
3078+ const char *temp_pattern = pattern;
3079+ utf8c = utf8::next_char(temp_pattern);
3080+ *atom_len = temp_pattern - pattern;
3081+ enum CHARGROUP_t multichar_type;
3082+ return create_charmatch(utf8c, pattern, *atom_len, &multichar_type);
3083+ }
3084+
3085+ char c = *pattern;
3086+ if(c == '\\')
3087 {
3088 //check for back reference
3089 if(myisdigit(pattern[(*atom_len)+1]))
3090@@ -281,13 +674,13 @@
3091 (*atom_len)++;
3092 if(pattern[*atom_len] == '0')
3093 {
3094- throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_INVALID_BACK_REF)) );
3095+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_BACK_REF), 0, current_regex->subregex.size()) );
3096 }
3097 unsigned int backref = pattern[*atom_len] - '0';
3098 if((backref > current_regex->subregex.size()) ||
3099 (current_regex->subregex.at(backref-1)->flags != 0))
3100 {
3101- throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_INVALID_BACK_REF)) );
3102+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_BACK_REF), backref, current_regex->subregex.size()) );
3103 }
3104 while(current_regex->subregex.size() >= backref*10)
3105 {
3106@@ -303,70 +696,86 @@
3107 break;
3108 }
3109 }
3110- return new CRegexAscii_backref(current_regex, backref);
3111+ (*atom_len)++;
3112+ return new CRegexXQuery_backref(current_regex, backref);
3113 }
3114 }
3115+ if(c == '^')
3116+ {
3117+ (*atom_len)++;
3118+ return new CRegexXQuery_pinstart(current_regex);
3119+ }
3120+ if((c == '}') || (c == '{') || (c == '?') || (c == '*') || (c == '+') || (c == '|'))
3121+ {
3122+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_ATOM_CHAR), c) );
3123+ }
3124 switch(c)
3125 {
3126 case '[':
3127 {
3128- if(!(flags & REGEX_ASCII_LITERAL))
3129- {
3130- (*atom_len)++;
3131- CRegexAscii_chargroup *chargroup = NULL;
3132- int chargroup_len;
3133- chargroup = readchargroup(pattern+*atom_len, &chargroup_len);
3134- *atom_len += chargroup_len;
3135- return chargroup;
3136- }
3137+ (*atom_len)++;
3138+ CRegexXQuery_chargroup *chargroup = NULL;
3139+ int chargroup_len;
3140+ chargroup = readchargroup(pattern+*atom_len, &chargroup_len);
3141+ *atom_len += chargroup_len;
3142+ return chargroup;
3143 }
3144 case '.'://WildCharEsc
3145 {
3146- if(!(flags & REGEX_ASCII_LITERAL))
3147- {
3148- CRegexAscii_wildchar *wildchar = new CRegexAscii_wildchar(current_regex);
3149- (*atom_len)++;
3150- return wildchar;
3151- }
3152+ (*atom_len)++;
3153+ return new CRegexXQuery_wildchar(current_regex);
3154 }
3155 case '('://begin an embedded reg exp
3156 {
3157- if(!(flags & REGEX_ASCII_LITERAL))
3158- {
3159- (*atom_len)++;
3160- CRegexAscii_regex *emb_regex = NULL;
3161- int regex_len;
3162- emb_regex = parse_regexp(pattern + *atom_len, &regex_len);
3163- *atom_len += regex_len;
3164- return emb_regex;
3165- }
3166+ (*atom_len)++;
3167+ CRegexXQuery_regex *emb_regex = NULL;
3168+ int regex_len;
3169+ emb_regex = parse_regexp(pattern + *atom_len, &regex_len);
3170+ *atom_len += regex_len;
3171+ return emb_regex;
3172 }
3173 case '$'://end line
3174- if(!(flags & REGEX_ASCII_LITERAL))
3175- {
3176- is_end_line = true;
3177- }
3178+ //is_end_line = true;
3179+ (*atom_len)++;
3180+ return new CRegexXQuery_endline(current_regex);
3181 default:
3182 {
3183- char c;
3184+ //char c;
3185+ CRegexXQuery_charmatch *charmatch = NULL;
3186 int c_len;
3187- bool is_multichar = false;
3188- if(!(flags & REGEX_ASCII_LITERAL))
3189- c = readChar(pattern+*atom_len, &c_len, &is_multichar);
3190- else
3191+ CHARGROUP_t multichar_type = CHARGROUP_NO_MULTICHAR;
3192+ *atom_len = 0;
3193+ while(pattern[*atom_len])
3194 {
3195- c = pattern[*atom_len];
3196- c_len = 1;
3197+ charmatch = readChar(pattern+*atom_len, &c_len, &multichar_type);
3198+ *atom_len += c_len;
3199+ if((flags & REGEX_ASCII_NO_WHITESPACE) && (multichar_type == CHARGROUP_FLAGS_ONECHAR_ASCII))
3200+ {
3201+ char c = (char)charmatch->get_c();
3202+ if((c == ' ') || (c == '\t') || (c == '\r') || (c == '\n'))
3203+ {
3204+ //ignore this whitespace
3205+ delete charmatch;
3206+ continue;
3207+ }
3208+ else
3209+ break;
3210+ }
3211+ else
3212+ break;
3213 }
3214- CRegexAscii_chargroup *chargroup = new CRegexAscii_chargroup(current_regex);
3215- if(is_multichar)
3216- chargroup->addMultiChar(c);
3217+ /*
3218+ std::auto_ptr<CRegexXQuery_chargroup> chargroup(new CRegexXQuery_chargroup(current_regex));
3219+ if(multichar_type)
3220+ chargroup->addMultiChar(c, multichar_type);
3221 else if(is_end_line)
3222 chargroup->addEndLine();
3223 else
3224- chargroup->addCharRange(c, c);
3225+ chargroup->addOneChar(c);
3226 *atom_len += c_len;
3227- return chargroup;
3228+ return chargroup.release();
3229+ */
3230+ return charmatch;
3231 }
3232 }
3233 }
3234@@ -374,81 +783,119 @@
3235 //read until ']'
3236 //posCharGroup ::= ( charRange | charClassEsc )+
3237 //charRange ::= seRange | XmlCharIncDash
3238-CRegexAscii_chargroup* CRegexAscii_parser::readchargroup(const char *pattern, int *chargroup_len)
3239+CRegexXQuery_chargroup* CRegexXQuery_parser::readchargroup(const char *pattern, int *chargroup_len)
3240 {
3241- CRegexAscii_chargroup *chargroup = NULL;
3242+ std::auto_ptr<CRegexXQuery_chargroup> chargroup;
3243 *chargroup_len = 0;
3244 if(pattern[*chargroup_len] == '^')//negative group
3245 {
3246 (*chargroup_len)++;
3247- chargroup = new CRegexAscii_negchargroup(current_regex);
3248+ chargroup.reset(new CRegexXQuery_negchargroup(current_regex));
3249 }
3250 else
3251- chargroup = new CRegexAscii_chargroup(current_regex);
3252+ chargroup.reset(new CRegexXQuery_chargroup(current_regex));
3253 while(pattern[*chargroup_len] && (pattern[*chargroup_len]!=']'))
3254 {
3255- char c1, c2;
3256- bool is_multichar;
3257+ //char c1, c2;
3258+ CHARGROUP_t multichar_type = CHARGROUP_NO_MULTICHAR;
3259 int c1_len;
3260- c1 = pattern[*chargroup_len];
3261- c2 = pattern[*chargroup_len+1];
3262- if((c1 == '-') && (c2 == '['))//charClassSub
3263+ if((pattern[*chargroup_len] == '-') && (pattern[(*chargroup_len)+1] == '['))//charClassSub
3264 {
3265 int classsub_len;
3266- CRegexAscii_chargroup *classsub = readchargroup(pattern + *chargroup_len+1 + 1, &classsub_len);
3267+ CRegexXQuery_chargroup *classsub = readchargroup(pattern + (*chargroup_len)+1 + 1, &classsub_len);
3268 if(!classsub)
3269 {
3270- delete chargroup;
3271- return NULL;
3272+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_SUBCLASS)) );
3273 }
3274 chargroup->addClassSub(classsub);
3275 *chargroup_len += 2 + classsub_len + 1;
3276 if(pattern[*chargroup_len-1] != ']')
3277 {
3278- delete chargroup;
3279- return NULL;
3280+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_USE_OF_SUBCLASS)) );
3281 }
3282- return chargroup;
3283+ return chargroup.release();
3284 }
3285
3286- c1 = readChar(pattern+*chargroup_len, &c1_len, &is_multichar);
3287- if(is_multichar)//first char is multichar
3288+ std::unique_ptr<CRegexXQuery_charmatch> charmatch(readChar(pattern+*chargroup_len, &c1_len, &multichar_type));
3289+ if((multichar_type == CHARGROUP_FLAGS_MULTICHAR_p) ||
3290+ (multichar_type == CHARGROUP_FLAGS_MULTICHAR_Is) ||
3291+ (multichar_type == CHARGROUP_FLAGS_MULTICHAR_OTHER))//first char is multichar
3292 {
3293- chargroup->addMultiChar(c1);
3294+ if((pattern[*chargroup_len+c1_len] == '-') &&///should not be a range
3295+ (pattern[*chargroup_len+c1_len+1] != ']'))
3296+ {
3297+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MULTICHAR_IN_CHAR_RANGE)) );
3298+ }
3299+ //chargroup->addMultiChar(c1, multichar_type);
3300+ chargroup->addCharMatch(charmatch.release());
3301 *chargroup_len += c1_len;
3302 continue;
3303 }
3304- if(pattern[*chargroup_len+c1_len] == '-')///might be a range
3305+ (*chargroup_len) += c1_len;
3306+ if(pattern[*chargroup_len] == '-')///might be a range
3307 {
3308- if(pattern[*chargroup_len+c1_len+1] == ']')//no range, just the last char is '-'
3309+ if(pattern[(*chargroup_len)+1] == ']')//no range, just the last char is '-'
3310 {
3311- chargroup->addCharRange(c1, c1);
3312- chargroup->addCharRange('-', '-');
3313- *chargroup_len += c1_len + 1;
3314+ //chargroup->addOneChar(c1);
3315+ //chargroup->addOneChar('-');
3316+ chargroup->addCharMatch(charmatch.release());
3317+ chargroup->addCharMatch(new CRegexXQuery_char_ascii(current_regex, '-'));
3318+ (*chargroup_len)++;
3319 continue;
3320 }
3321- else
3322+ else if(pattern[(*chargroup_len)+1] != '[')
3323 {
3324 //it is a range
3325- char c3;
3326- int c3_len;
3327- c3 = readChar(pattern+*chargroup_len+c1_len+1, &c3_len, &is_multichar);
3328- if(is_multichar)
3329- return NULL;//error
3330- chargroup->addCharRange(c1, c3);
3331- *chargroup_len += c1_len + 1 + c3_len;
3332+ (*chargroup_len)++;
3333+ std::unique_ptr<CRegexXQuery_charmatch> charmatch2;
3334+ CHARGROUP_t multichar_type2 = CHARGROUP_NO_MULTICHAR;
3335+ int c2_len;
3336+ charmatch2.reset(readChar(pattern+(*chargroup_len), &c2_len, &multichar_type2));
3337+ if((multichar_type2 != CHARGROUP_FLAGS_ONECHAR_ASCII) &&
3338+ (multichar_type2 != CHARGROUP_FLAGS_ONECHAR_ASCII))//second char in range is multichar
3339+ {
3340+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MULTICHAR_IN_CHAR_RANGE)) );
3341+ }
3342+ //chargroup->addCharRange(c1, c3);
3343+ if((multichar_type == CHARGROUP_FLAGS_ONECHAR_ASCII) && (multichar_type2 == CHARGROUP_FLAGS_ONECHAR_ASCII))
3344+ {
3345+ if(flags & REGEX_ASCII_CASE_INSENSITIVE)
3346+ chargroup->addCharMatch(new CRegexXQuery_char_range_ascii_i(current_regex,
3347+ (char)charmatch->get_c(),
3348+ (char)charmatch2->get_c()));
3349+ else
3350+ chargroup->addCharMatch(new CRegexXQuery_char_range_ascii(current_regex,
3351+ (char)charmatch->get_c(),
3352+ (char)charmatch2->get_c()));
3353+ }
3354+ else
3355+ {
3356+ if(flags & REGEX_ASCII_CASE_INSENSITIVE)
3357+ chargroup->addCharMatch(new CRegexXQuery_char_range_unicode_i(current_regex,
3358+ charmatch->get_c(),
3359+ charmatch2->get_c()));
3360+ else
3361+ chargroup->addCharMatch(new CRegexXQuery_char_range_unicode(current_regex,
3362+ charmatch->get_c(),
3363+ charmatch2->get_c()));
3364+ }
3365+ *chargroup_len += c2_len;
3366 continue;
3367 }
3368 }
3369- chargroup->addCharRange(c1, c1);
3370- *chargroup_len += c1_len;
3371+ //chargroup->addOneChar(c1);
3372+ chargroup->addCharMatch(charmatch.release());
3373 }
3374 if(pattern[*chargroup_len])
3375 (*chargroup_len)++;
3376- return chargroup;
3377+ else
3378+ {
3379+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MISSING_CLOSE_BRACKET)) );
3380+ }
3381+ return chargroup.release();
3382 }
3383
3384-void CRegexAscii_parser::read_quantifier(CRegexAscii_piece *piece,
3385+void CRegexXQuery_parser::read_quantifier(CRegexXQuery_piece *piece,
3386 const char *pattern, int *quantif_len)
3387 {
3388 *quantif_len = 0;
3389@@ -496,6 +943,10 @@
3390 max = max*10 + pattern[*quantif_len] - '0';
3391 (*quantif_len)++;
3392 }
3393+ if(max < min)
3394+ {
3395+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MAX_LT_MIN)) );
3396+ }
3397 piece->set_quantifier_min_max(min, max, true);
3398 }
3399 while(pattern[*quantif_len] && (pattern[*quantif_len] != '}'))
3400@@ -524,23 +975,25 @@
3401 ///Constructors and destructors and internal functions
3402 ////////////////////////////
3403
3404-CRegexAscii_regex::CRegexAscii_regex(CRegexAscii_regex *topregex) : IRegexAtom(topregex?topregex:this)
3405+CRegexXQuery_regex::CRegexXQuery_regex(CRegexXQuery_regex *topregex) : IRegexAtom(topregex?topregex:this)
3406 {
3407 matched_source = NULL;
3408 matched_len = 0;
3409+// backup_matched_source = NULL;
3410+// backup_matched_len = 0;
3411 flags = 128;//set to 0 after initialization
3412 }
3413
3414-CRegexAscii_regex::~CRegexAscii_regex()
3415+CRegexXQuery_regex::~CRegexXQuery_regex()
3416 {
3417- std::list<CRegexAscii_branch*>::iterator branch_it;
3418+ std::list<CRegexXQuery_branch*>::iterator branch_it;
3419
3420 for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
3421 {
3422 delete (*branch_it);
3423 }
3424 /*
3425- std::vector<CRegexAscii_regex*>::iterator subregex_it;
3426+ std::vector<CRegexXQuery_regex*>::iterator subregex_it;
3427 for(subregex_it = subregex.begin(); subregex_it != subregex.end(); subregex_it++)
3428 {
3429 delete (*subregex_it);
3430@@ -548,25 +1001,18 @@
3431 */
3432 }
3433
3434-bool CRegexAscii_regex::set_align_begin(bool align_begin)
3435-{
3436- bool prev_align = this->align_begin;
3437- this->align_begin = align_begin;
3438- return prev_align;
3439-}
3440-
3441-void CRegexAscii_regex::add_branch(CRegexAscii_branch *branch)
3442+void CRegexXQuery_regex::add_branch(CRegexXQuery_branch *branch)
3443 {
3444 branch_list.push_back(branch);
3445 }
3446
3447-bool CRegexAscii_regex::get_indexed_match(int index,
3448+bool CRegexXQuery_regex::get_indexed_match(int index,
3449 const char **matched_source,
3450 int *matched_len)
3451 {
3452 if(!index || index > (int)subregex.size())
3453 return false;
3454- CRegexAscii_regex *subr = subregex[index-1];
3455+ CRegexXQuery_regex *subr = subregex[index-1];
3456 *matched_source = subr->matched_source;
3457 if(!*matched_source)
3458 return false;
3459@@ -574,145 +1020,209 @@
3460 return true;
3461 }
3462
3463-unsigned int CRegexAscii_regex::get_indexed_regex_count()
3464+unsigned int CRegexXQuery_regex::get_indexed_regex_count()
3465 {
3466 return subregex.size();
3467 }
3468
3469-CRegexAscii_branch::CRegexAscii_branch(CRegexAscii_regex* regex) :
3470- IRegexMatcher(regex)
3471+CRegexXQuery_branch::CRegexXQuery_branch(CRegexXQuery_regex* regex)
3472+ //:
3473+ //IRegexMatcher(regex)
3474 {
3475 }
3476
3477-CRegexAscii_branch::~CRegexAscii_branch()
3478+CRegexXQuery_branch::~CRegexXQuery_branch()
3479 {
3480- std::list<CRegexAscii_piece*>::iterator piece_it;
3481+ std::list<RegexAscii_pieceinfo>::iterator piece_it;
3482
3483 for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
3484 {
3485- delete (*piece_it);
3486+ delete (*piece_it).piece;
3487 }
3488 }
3489
3490-void CRegexAscii_branch::add_piece(CRegexAscii_piece *piece)
3491+void CRegexXQuery_branch::add_piece(CRegexXQuery_piece *piece)
3492 {
3493 piece_list.push_back(piece);
3494 }
3495
3496-CRegexAscii_piece::CRegexAscii_piece()
3497+CRegexXQuery_piece::CRegexXQuery_piece()
3498 {
3499+ atom = NULL;
3500+ regex_atom = NULL;
3501 }
3502
3503-CRegexAscii_piece::~CRegexAscii_piece()
3504+CRegexXQuery_piece::~CRegexXQuery_piece()
3505 {
3506 delete atom;
3507 }
3508
3509-void CRegexAscii_piece::set_atom(IRegexAtom *atom)
3510+void CRegexXQuery_piece::set_atom(IRegexAtom *atom)
3511 {
3512 this->atom = atom;
3513+ this->regex_atom = dynamic_cast<CRegexXQuery_regex*>(atom);
3514 }
3515
3516-void CRegexAscii_piece::set_quantifier_min_max(int min, int max, bool strict_max)
3517+void CRegexXQuery_piece::set_quantifier_min_max(int min, int max, bool strict_max)
3518 {
3519 this->min = min;
3520 this->max = max;
3521 this->strict_max = strict_max;
3522 }
3523-void CRegexAscii_piece::set_is_reluctant(bool is_reluctant)
3524+void CRegexXQuery_piece::set_is_reluctant(bool is_reluctant)
3525 {
3526 this->is_reluctant = is_reluctant;
3527 }
3528-void CRegexAscii_piece::get_quantifier(int *min, int *max, bool *strict_max)
3529+void CRegexXQuery_piece::get_quantifier(int *min, int *max, bool *strict_max)
3530 {
3531 *min = this->min;
3532 *max = this->max;
3533 *strict_max = this->strict_max;
3534 }
3535-bool CRegexAscii_piece::get_is_reluctant()
3536+bool CRegexXQuery_piece::get_is_reluctant()
3537 {
3538+ if(atom->regex_intern->flags & REGEX_ASCII_MINIMAL_MATCH)
3539+ return true;
3540 return is_reluctant;
3541 }
3542
3543
3544-CRegexAscii_chargroup::CRegexAscii_chargroup(CRegexAscii_regex* regex) :
3545+CRegexXQuery_charmatch::CRegexXQuery_charmatch(CRegexXQuery_regex* regex) :
3546+ IRegexAtom(regex)
3547+{
3548+}
3549+CRegexXQuery_multicharP::CRegexXQuery_multicharP(CRegexXQuery_regex* regex, char type, bool is_reverse) :
3550+ CRegexXQuery_charmatch(regex)
3551+{
3552+ this->multichar_type = type; this->is_reverse = is_reverse;
3553+}
3554+CRegexXQuery_multicharIs::CRegexXQuery_multicharIs(CRegexXQuery_regex* regex, int block_index, bool is_reverse) :
3555+ CRegexXQuery_charmatch(regex)
3556+{
3557+ this->block_index = block_index; this->is_reverse = is_reverse;
3558+}
3559+CRegexXQuery_multicharOther::CRegexXQuery_multicharOther(CRegexXQuery_regex* regex, char type) :
3560+ CRegexXQuery_charmatch(regex)
3561+{
3562+ this->multichar_type = type;
3563+}
3564+CRegexXQuery_char_ascii::CRegexXQuery_char_ascii(CRegexXQuery_regex* regex, char c) :
3565+ CRegexXQuery_charmatch(regex)
3566+{
3567+ this->c = c;
3568+}
3569+CRegexXQuery_char_ascii_i::CRegexXQuery_char_ascii_i(CRegexXQuery_regex* regex, char c) :
3570+ CRegexXQuery_char_ascii(regex, toupper(c))
3571+{
3572+}
3573+CRegexXQuery_char_range_ascii::CRegexXQuery_char_range_ascii(CRegexXQuery_regex* regex, char c1, char c2) :
3574+ CRegexXQuery_charmatch(regex)
3575+{
3576+ this->c1 = c1; this->c2 = c2;
3577+}
3578+CRegexXQuery_char_range_ascii_i::CRegexXQuery_char_range_ascii_i(CRegexXQuery_regex* regex, char c1, char c2) :
3579+ CRegexXQuery_char_range_ascii(regex, toupper(c1), toupper(c2))
3580+{
3581+}
3582+CRegexXQuery_char_unicode::CRegexXQuery_char_unicode(CRegexXQuery_regex* regex, const char *source, int len) :
3583+ CRegexXQuery_charmatch(regex)
3584+{
3585+ this->len = len;
3586+ memcpy(c, source, len);
3587+}
3588+CRegexXQuery_char_unicode_cp::CRegexXQuery_char_unicode_cp(CRegexXQuery_regex* regex, unicode::code_point c) :
3589+ CRegexXQuery_charmatch(regex)
3590+{
3591+ this->c = c;
3592+}
3593+CRegexXQuery_char_unicode_i::CRegexXQuery_char_unicode_i(CRegexXQuery_regex* regex, unicode::code_point c) :
3594+ CRegexXQuery_char_unicode_cp(regex, unicode::to_upper(c))
3595+{
3596+}
3597+CRegexXQuery_char_range_unicode::CRegexXQuery_char_range_unicode(CRegexXQuery_regex* regex, unicode::code_point c1, unicode::code_point c2) :
3598+ CRegexXQuery_charmatch(regex)
3599+{
3600+ this->c1 = c1; this->c2 = c2;
3601+}
3602+CRegexXQuery_char_range_unicode_i::CRegexXQuery_char_range_unicode_i(CRegexXQuery_regex* regex, unicode::code_point c1, unicode::code_point c2) :
3603+ CRegexXQuery_char_range_unicode(regex, unicode::to_upper(c1), unicode::to_upper(c2))
3604+{
3605+}
3606+CRegexXQuery_endline::CRegexXQuery_endline(CRegexXQuery_regex* regex) :
3607+ CRegexXQuery_charmatch(regex)
3608+{
3609+}
3610+
3611+unicode::code_point CRegexXQuery_char_unicode::get_c()
3612+{
3613+ const char *temp_c = (const char*)c;
3614+ return utf8::next_char(temp_c);
3615+}
3616+
3617+
3618+CRegexXQuery_chargroup::CRegexXQuery_chargroup(CRegexXQuery_regex* regex) :
3619 IRegexAtom(regex)
3620 {
3621 classsub = NULL;
3622 }
3623
3624-CRegexAscii_chargroup::~CRegexAscii_chargroup()
3625+CRegexXQuery_chargroup::~CRegexXQuery_chargroup()
3626 {
3627 delete classsub;
3628-}
3629-
3630-void CRegexAscii_chargroup::addMultiChar(char c)
3631-{
3632- chargroup_t cgt;
3633- cgt.flags = CHARGROUP_FLAGS_MULTICHAR;
3634- cgt.c1 = c;
3635- cgt.c2 = 0;
3636- chargroup_list.push_back(cgt);
3637-}
3638-
3639-void CRegexAscii_chargroup::addEndLine()
3640-{
3641- chargroup_t cgt;
3642- cgt.flags = CHARGROUP_FLAGS_ENDLINE;
3643- cgt.c1 = '$';
3644- cgt.c2 = 0;
3645- chargroup_list.push_back(cgt);
3646-}
3647-
3648-void CRegexAscii_chargroup::addCharRange(char c1, char c2)
3649-{
3650- chargroup_t cgt;
3651- cgt.flags = 0;
3652- cgt.c1 = c1;
3653- cgt.c2 = c2;
3654- chargroup_list.push_back(cgt);
3655-}
3656-
3657-void CRegexAscii_chargroup::addClassSub(CRegexAscii_chargroup* classsub)
3658+ std::list<CRegexXQuery_charmatch* >::iterator charmatch_it;
3659+ for(charmatch_it=chargroup_list.begin(); charmatch_it != chargroup_list.end(); charmatch_it++)
3660+ delete (*charmatch_it);
3661+}
3662+
3663+void CRegexXQuery_chargroup::addCharMatch(CRegexXQuery_charmatch *charmatch)
3664+{
3665+ chargroup_list.push_back(charmatch);
3666+}
3667+void CRegexXQuery_chargroup::addClassSub(CRegexXQuery_chargroup* classsub)
3668 {
3669 this->classsub = classsub;
3670 }
3671
3672-CRegexAscii_negchargroup::CRegexAscii_negchargroup(CRegexAscii_regex* regex) :
3673- CRegexAscii_chargroup(regex)
3674-{
3675-}
3676-
3677-CRegexAscii_negchargroup::~CRegexAscii_negchargroup()
3678-{
3679-}
3680-
3681-CRegexAscii_wildchar::CRegexAscii_wildchar(CRegexAscii_regex* regex) :
3682+CRegexXQuery_negchargroup::CRegexXQuery_negchargroup(CRegexXQuery_regex* regex) :
3683+ CRegexXQuery_chargroup(regex)
3684+{
3685+}
3686+
3687+CRegexXQuery_negchargroup::~CRegexXQuery_negchargroup()
3688+{
3689+}
3690+
3691+CRegexXQuery_wildchar::CRegexXQuery_wildchar(CRegexXQuery_regex* regex) :
3692 IRegexAtom(regex)
3693 {
3694 }
3695
3696-CRegexAscii_wildchar::~CRegexAscii_wildchar()
3697+CRegexXQuery_wildchar::~CRegexXQuery_wildchar()
3698 {
3699 }
3700
3701-CRegexAscii_backref::CRegexAscii_backref(CRegexAscii_regex* regex, unsigned int backref_) :
3702+CRegexXQuery_backref::CRegexXQuery_backref(CRegexXQuery_regex* regex, unsigned int backref_) :
3703 IRegexAtom(regex),
3704 backref(backref_)
3705 {
3706 }
3707
3708-CRegexAscii_backref::~CRegexAscii_backref()
3709-{
3710-}
3711-
3712-CRegexAscii_parser::CRegexAscii_parser()
3713+CRegexXQuery_backref::~CRegexXQuery_backref()
3714+{
3715+}
3716+
3717+CRegexXQuery_pinstart::CRegexXQuery_pinstart(CRegexXQuery_regex* regex):
3718+ IRegexAtom(regex)
3719+{
3720+}
3721+
3722+CRegexXQuery_parser::CRegexXQuery_parser()
3723 {
3724 current_regex = NULL;
3725 regex_depth = 0;
3726 }
3727
3728-CRegexAscii_parser::~CRegexAscii_parser()
3729+CRegexXQuery_parser::~CRegexXQuery_parser()
3730 {
3731 }
3732
3733@@ -720,9 +1230,68 @@
3734 //////////////////////////////////////////
3735 ////Matching the pattern on a string
3736 /////////////////////////////////////////
3737+static std::list<RegexAscii_pieceinfo> empty_pieces;//empty list of pieces
3738+/*
3739+std::list<RegexAscii_pieceinfo>::iterator
3740+IRegexAtom::choose_next_piece(const char *source, int *matched_len,
3741+ std::list<RegexAscii_pieceinfo>::iterator this_piece,
3742+ std::list<RegexAscii_pieceinfo>::iterator end_piece)
3743+{
3744+ //if this_piece is repetition, repeat until max, then go to next piece
3745+ int min, max;
3746+ bool strict_max;
3747+ while(this_piece != end_piece)
3748+ {
3749+ (*this_piece).piece->get_quantifier(&min, &max, &strict_max);
3750+ if(max <= ((*this_piece).nr_matches))//finished this piece
3751+ {
3752+ this_piece++;
3753+ }
3754+ else
3755+ break;
3756+ }
3757+ return this_piece;
3758+}
3759+*/
3760+
3761+bool IRegexAtom::match(const char *source, int *start_from_branch, int *matched_len,
3762+ std::list<RegexAscii_pieceinfo>::iterator this_piece,
3763+ std::list<RegexAscii_pieceinfo>::iterator end_piece)
3764+{
3765+ *start_from_branch = 0;
3766+ bool retmatch;
3767+ retmatch = match_internal(source, start_from_branch, matched_len);
3768+ if(!retmatch)
3769+ return false;
3770+
3771+ if(this_piece == end_piece)
3772+ return true;
3773+
3774+ (*this_piece).nr_matches++;
3775+ int min,max;
3776+ bool strict_max;
3777+ (*this_piece).piece->get_quantifier(&min, &max, &strict_max);
3778+ std::list<RegexAscii_pieceinfo>::iterator init_piece = this_piece;
3779+ if(((min == 1) && (max == 1)) || //the simple common case
3780+ ((*matched_len == 0) && ((*this_piece).nr_matches>=min)))//to avoid infinite loop
3781+ {
3782+ this_piece++;
3783+ if(this_piece == end_piece)
3784+ return true;
3785+ }
3786+ int matched_len2;
3787+ retmatch = (*this_piece).piece->match_piece(this_piece, end_piece, source + *matched_len, &matched_len2);
3788+ if(!retmatch)
3789+ {
3790+ (*init_piece).nr_matches--;
3791+ return false;
3792+ }
3793+ *matched_len += matched_len2;
3794+ return true;
3795+}
3796
3797 //try every position in source to match the pattern
3798-bool CRegexAscii_regex::match_anywhere(const char *source, unsigned int flags,
3799+bool CRegexXQuery_regex::match_anywhere(const char *source, unsigned int flags,
3800 int *match_pos, int *matched_len)
3801 {
3802 *match_pos = 0;
3803@@ -730,43 +1299,66 @@
3804 return match_from(source, flags, match_pos, matched_len);
3805 }
3806
3807-bool CRegexAscii_regex::match_from(const char *source, unsigned int flags,
3808+bool CRegexXQuery_regex::match_from(const char *source, unsigned int flags,
3809 int *match_pos, int *matched_len)
3810 {
3811 this->flags = flags;
3812+ this->source_start = source;
3813 reachedEnd = false;
3814
3815- std::vector<CRegexAscii_regex*>::iterator regex_it;
3816+ std::vector<CRegexXQuery_regex*>::iterator regex_it;
3817 for(regex_it = subregex.begin(); regex_it != subregex.end(); regex_it++)
3818 {
3819 (*regex_it)->matched_source = NULL;
3820 }
3821-// if(!source[0])
3822-// {
3823-// if(branch_list.empty())
3824-// return true;
3825-// else
3826-// return false;
3827-// }
3828-
3829- bool skip_first_match = false;
3830- if(*match_pos && align_begin)
3831- skip_first_match = true;
3832+
3833+ std::vector<std::pair<const char*, int> > saved_subregex;
3834+
3835+ if(*match_pos && (flags & REGEX_ASCII_WHOLE_MATCH))
3836+ return false;
3837+
3838 do
3839 {
3840- if(!skip_first_match)
3841- {
3842- if(match(source + *match_pos, matched_len))
3843- return true;
3844- }
3845- skip_first_match = false;
3846- if(align_begin)
3847+ int start_from_branch = 0;
3848+ int longest_match = -1;
3849+ while(1)
3850+ {
3851+ if(!match(source + *match_pos, &start_from_branch, matched_len, empty_pieces.begin(), empty_pieces.end()))
3852+ break;
3853+ if(longest_match < *matched_len)
3854+ {
3855+ longest_match = *matched_len;
3856+ if(start_from_branch && (flags & REGEX_ASCII_GET_LONGEST_BRANCH))
3857+ save_subregex_list(saved_subregex);
3858+ }
3859+ if(!start_from_branch || !(flags & REGEX_ASCII_GET_LONGEST_BRANCH))
3860+ break;
3861+ //else try the other branches to see which is longer
3862+ }
3863+ if(longest_match != -1)
3864+ {
3865+ *matched_len = longest_match;
3866+ if(saved_subregex.size())
3867+ load_subregex_list(saved_subregex);
3868+ if(flags & REGEX_ASCII_WHOLE_MATCH)
3869+ {
3870+ if(!source[*match_pos+*matched_len])
3871+ return true;
3872+ if((flags & REGEX_ASCII_MULTILINE) &&
3873+ ((source[*match_pos+*matched_len] == '\n') || (source[*match_pos+*matched_len] == '\r')))
3874+ return true;
3875+ return false;
3876+ }
3877+ return true;
3878+ }
3879+
3880+ if(flags & REGEX_ASCII_WHOLE_MATCH)
3881 {
3882 if(flags & REGEX_ASCII_MULTILINE)
3883 {
3884- //goto the next line
3885+ //go to next line
3886 while(source[*match_pos] && (source[*match_pos] != '\n') && (source[*match_pos] != '\r'))
3887- (*match_pos)++;
3888+ (*match_pos) += myutf8len(source);
3889 if(source[*match_pos] == '\n')
3890 {
3891 (*match_pos)++;
3892@@ -780,190 +1372,1039 @@
3893 (*match_pos)++;
3894 }
3895 if(!source[*match_pos])
3896- return false;
3897+ break;
3898 continue;
3899 }
3900- return false;
3901+ break;
3902 }
3903 if(!source[*match_pos])
3904 break;
3905- (*match_pos)++;
3906+ (*match_pos) += myutf8len(source);
3907 }
3908 while(source[*match_pos]);
3909+// if(!source[*match_pos])
3910+// {
3911+// reachedEnd = true;
3912+// }
3913 return false;
3914 }
3915
3916+void CRegexXQuery_regex::reset_match()
3917+{
3918+// this->backup_matched_source = this->matched_source;
3919+// this->backup_matched_len = this->matched_len;
3920+ this->matched_source = NULL;
3921+ this->matched_len = 0;
3922+ std::list<CRegexXQuery_branch*>::iterator branch_it;
3923+ for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
3924+ {
3925+ (*branch_it)->reset();
3926+ }
3927+}
3928+/*
3929+void CRegexXQuery_regex::restore_match()
3930+{
3931+ this->matched_source = this->backup_matched_source;
3932+ this->matched_len = this->backup_matched_len;
3933+ std::list<CRegexXQuery_branch*>::iterator branch_it;
3934+ for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
3935+ {
3936+ (*branch_it)->restore();
3937+ }
3938+}
3939+*/
3940 //match any of the branches
3941-bool CRegexAscii_regex::match(const char *source, int *matched_len)
3942+bool CRegexXQuery_regex::match(const char *source, int *start_from_branch, int *matched_len,
3943+ std::list<RegexAscii_pieceinfo>::iterator next_piece,
3944+ std::list<RegexAscii_pieceinfo>::iterator end_piece)
3945 {
3946 reachedEnd = false;
3947- std::list<CRegexAscii_branch*>::iterator branch_it;
3948-
3949- for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
3950- {
3951- if((*branch_it)->match(source, matched_len))
3952- {
3953- matched_source = source;
3954- this->matched_len = *matched_len;
3955+ if(!(flags & REGEX_ASCII_GROUPING_LEN_WHOLE_PIECE) ||
3956+ (this->matched_source == NULL) || ((this->matched_source + this->matched_len) != source))
3957+ this->matched_source = source;
3958+ *matched_len = 0;
3959+ std::list<CRegexXQuery_branch*>::iterator branch_it;
3960+
3961+ if(*start_from_branch == 0)
3962+ {
3963+ for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
3964+ {
3965+ (*branch_it)->reset();
3966+ }
3967+ }
3968+
3969+ branch_it = branch_list.begin();
3970+ if(*start_from_branch)
3971+ {
3972+ for(int i=0;i<*start_from_branch;i++)
3973+ branch_it++;
3974+ }
3975+ (*start_from_branch)++;
3976+ for(; branch_it != branch_list.end(); branch_it++,(*start_from_branch)++)
3977+ {
3978+ if((*branch_it)->match(source, matched_len, this, next_piece, end_piece))
3979+ {
3980+ //matched_source = source;
3981+ //this->matched_len = *matched_len;
3982 return true;
3983 }
3984 }
3985- matched_source = NULL;
3986- matched_len = 0;
3987+ *start_from_branch = 0;
3988+ if(this->matched_source == source)
3989+ this->matched_source = NULL;
3990+ *matched_len = 0;
3991 return false;
3992 }
3993
3994+void CRegexXQuery_regex::save_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex)
3995+{
3996+ saved_subregex.resize(0);
3997+ saved_subregex.reserve(subregex.size());
3998+ std::vector<CRegexXQuery_regex*>::iterator it;
3999+ for(it=subregex.begin(); it != subregex.end(); it++)
4000+ {
4001+ saved_subregex.push_back(std::pair<const char*, int>((*it)->matched_source, (*it)->matched_len));
4002+ }
4003+}
4004+
4005+void CRegexXQuery_regex::load_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex)
4006+{
4007+ std::vector<std::pair<const char*, int> >::iterator it;
4008+ std::vector<CRegexXQuery_regex*>::iterator subit;
4009+ for(it=saved_subregex.begin(), subit = subregex.begin(); it != saved_subregex.end(); it++, subit++)
4010+ {
4011+ (*subit)->matched_source = (*it).first;
4012+ (*subit)->matched_len = (*it).second;
4013+ }
4014+}
4015+
4016+void CRegexXQuery_branch::reset()
4017+{
4018+ std::list<RegexAscii_pieceinfo>::iterator piece_it;
4019+ for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
4020+ {
4021+ (*piece_it).piece->atom->reset_match();
4022+ }
4023+}
4024+/*
4025+void CRegexXQuery_branch::restore()
4026+{
4027+ std::list<RegexAscii_pieceinfo>::iterator piece_it;
4028+ for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
4029+ {
4030+ (*piece_it).piece->atom->restore_match();
4031+ }
4032+}
4033+*/
4034 //match all the pieces
4035-bool CRegexAscii_branch::match(const char *source, int *matched_len)
4036+bool CRegexXQuery_branch::match(const char *source, int *matched_len,
4037+ CRegexXQuery_regex* group_regex,
4038+ std::list<RegexAscii_pieceinfo>::iterator next_piece,
4039+ std::list<RegexAscii_pieceinfo>::iterator end_piece)
4040 {
4041- std::list<CRegexAscii_piece*>::iterator piece_it;
4042+ std::list<RegexAscii_pieceinfo>::iterator piece_it;
4043
4044 piece_it = piece_list.begin();
4045+ //if(piece_it == piece_list.end())
4046+ //if(!source[0])
4047+ // return true;
4048+ //else
4049+ // return false;
4050 if(piece_it == piece_list.end())
4051- if(source[0])
4052- return false;
4053+ {
4054+ piece_it = next_piece;
4055+ if(next_piece == end_piece)
4056+ {
4057+ group_regex->matched_len = 0;
4058+ return true;
4059+ }
4060+ }
4061+
4062+ std::list<RegexAscii_pieceinfo> temp_pieces(piece_list);
4063+ temp_pieces.push_back(group_regex);//this will be used to store the group match
4064+ temp_pieces.insert(temp_pieces.end(), next_piece, end_piece);
4065+
4066+ return (*piece_it).piece->match_piece(temp_pieces.begin(), temp_pieces.end(), source, matched_len);
4067+}
4068+
4069+bool CRegexXQuery_piece::match_piece(std::list<RegexAscii_pieceinfo>::iterator piece_it,
4070+ std::list<RegexAscii_pieceinfo>::iterator end_it,
4071+ const char *source, int *matched_len)
4072+{
4073+ if((*piece_it).nr_matches < 0)
4074+ {
4075+ //special case, store the group match
4076+ (*piece_it).group_regex->matched_len = source - (*piece_it).group_regex->matched_source;
4077+ piece_it++;
4078+ if(piece_it == end_it)
4079+ return true;
4080 else
4081- return true;
4082- if(!(*piece_it)->get_is_reluctant())
4083- return match_piece_iter_normal(piece_it, source, matched_len);
4084+ return (*piece_it).piece->match_piece(piece_it, end_it, source, matched_len);
4085+ }
4086+
4087+ if(!get_is_reluctant())
4088+ return match_piece_iter_normal(piece_it, end_it, source, matched_len);
4089 else
4090- return match_piece_iter_reluctant(piece_it, source, matched_len);
4091-}
4092-
4093-//match as less as possible
4094-bool CRegexAscii_branch::match_piece_iter_reluctant(
4095- std::list<CRegexAscii_piece*>::iterator piece_it,
4096+ return match_piece_iter_reluctant(piece_it, end_it, source, matched_len);
4097+}
4098+
4099+int CRegexXQuery_piece::choose_another_branch(std::vector<std::pair<int,int> > &match_lens)
4100+{
4101+ int i = match_lens.size()-1;
4102+ i--;
4103+ while((i >= 0) && (match_lens.at(i).second == 0))
4104+ i--;
4105+ if(i < 0)
4106+ return -1;//no more branches
4107+ match_lens.resize(i+1);
4108+ i++;
4109+ return i;
4110+}
4111+
4112+bool CRegexXQuery_piece::is_regex_atom()
4113+{
4114+ return regex_atom != NULL;
4115+}
4116+
4117+//match as less as possible (shortest string)
4118+bool CRegexXQuery_piece::match_piece_iter_reluctant(
4119+ std::list<RegexAscii_pieceinfo>::iterator piece_it,
4120+ std::list<RegexAscii_pieceinfo>::iterator end_it,
4121 const char *source, int *matched_len)
4122 {
4123 *matched_len = 0;
4124- if(piece_it == piece_list.end())
4125+ if(piece_it == end_it)
4126 return true;
4127
4128 int min, max;
4129 bool strict_max;
4130 //std::vector<int> match_lens;
4131- (*piece_it)->get_quantifier(&min, &max, &strict_max);
4132- if(strict_max && (max >= 0))
4133+ (*piece_it).piece->get_quantifier(&min, &max, &strict_max);
4134+
4135+ std::vector<std::pair<const char*, int> > saved_subregex;
4136+
4137+ if(is_regex_atom())
4138 {
4139- int timeslen;
4140- //check if the piece doesn't exceed the max match
4141- if((*piece_it)->match_piece_times(source, &timeslen, max+1, NULL))
4142- return false;///too many matches
4143+ //recursive
4144+ bool retmatch;
4145+ atom->regex_intern->save_subregex_list(saved_subregex);
4146+ if((*piece_it).nr_matches >= min)
4147+ {
4148+ //go to next piece
4149+ std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
4150+ next_it++;
4151+ if(next_it == end_it)
4152+ return true;
4153+ retmatch = (*next_it).piece->match_piece(next_it, end_it, source, matched_len);
4154+ if(retmatch)
4155+ return true;
4156+ }
4157+ if(((max == -1) || ((*piece_it).nr_matches < max)) &&//try further with this piece
4158+ (((*piece_it).nr_matches < min) || ((*piece_it).nr_matches == 0) || ((*piece_it).piece->regex_atom->matched_len)))//if matched_len is zero, avoid infinite loop
4159+ {
4160+ int start_from_branch = 0;
4161+ int shortest_len = -1;
4162+ bool branch_saved = false;
4163+ //try all branches to get the shortest len
4164+ (*piece_it).nr_matches++;
4165+ while(atom->match(source, &start_from_branch, matched_len, piece_it, end_it))
4166+ {
4167+ if((shortest_len == -1) || (shortest_len > *matched_len))
4168+ {
4169+ shortest_len = *matched_len;
4170+ if(start_from_branch && (atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4171+ {
4172+ atom->regex_intern->save_subregex_list(saved_subregex);
4173+ branch_saved = true;
4174+ }
4175+ }
4176+ if(!start_from_branch || !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4177+ break;
4178+ }
4179+ if(shortest_len != -1)
4180+ {
4181+ *matched_len = shortest_len;
4182+ if(branch_saved)
4183+ atom->regex_intern->load_subregex_list(saved_subregex);
4184+ return true;
4185+ }
4186+ else
4187+ {
4188+ (*piece_it).nr_matches--;
4189+ atom->regex_intern->load_subregex_list(saved_subregex);
4190+ return false;
4191+ }
4192+ }
4193+ else
4194+ {
4195+ atom->regex_intern->load_subregex_list(saved_subregex);
4196+ return false;
4197+ }
4198 }
4199
4200- int i=min;
4201- std::list<CRegexAscii_piece*>::iterator next_it = piece_it;
4202+ int i=0;
4203+ int shortest_len = -1;
4204+ int otherpieces_shortest = -1;
4205+ int i_shortest = -1;
4206+ std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
4207+ std::vector<std::pair<int,int> > match_lens;
4208 next_it++;
4209 int pieceslen = 0;
4210 while(1)
4211 {
4212- if((max > 0) && (i>max))
4213- break;
4214- int piecelen = 0;
4215- if((*piece_it)->match_piece_times(source+pieceslen, &piecelen, !pieceslen ? i : 1, NULL))
4216- {
4217- pieceslen += piecelen;
4218+ int piecelen = 0;
4219+ bool retmatch;
4220+ retmatch = match_piece_times(source, &piecelen, i < min ? min : i, &match_lens);
4221+ i = match_lens.size()-1;//number of matches
4222+ if(i<0)
4223+ i = 0;
4224+ if((i>=min))
4225+ {
4226+ pieceslen = piecelen;
4227+ if((shortest_len >= 0) && (shortest_len <= pieceslen))//this branch is longer
4228+ {//try another branch
4229+ i = choose_another_branch(match_lens);
4230+ if(i >= 0)
4231+ continue;//try another branch
4232+ else
4233+ break;
4234+ }
4235 int otherpieces = 0;
4236- if((next_it == piece_list.end()) ||
4237- ((*next_it)->get_is_reluctant() && match_piece_iter_reluctant(next_it, source+pieceslen, &otherpieces)) ||
4238- (!(*next_it)->get_is_reluctant() && match_piece_iter_normal(next_it, source+pieceslen, &otherpieces)))
4239- {
4240- *matched_len = pieceslen + otherpieces;
4241- return true;
4242- }
4243+ if((next_it == end_it) ||
4244+ (*next_it).piece->match_piece(next_it, end_it, source+pieceslen, &otherpieces)
4245+ )
4246+ {
4247+ if((i == pieceslen) || (match_lens.at(0).second == 0) ||//minimum achieved already, cannot go lower than that
4248+ !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4249+ {
4250+ *matched_len = pieceslen + otherpieces;
4251+ return true;
4252+ }
4253+ if((shortest_len < 0) || (shortest_len > pieceslen))
4254+ {
4255+ shortest_len = pieceslen;
4256+ otherpieces_shortest = otherpieces;
4257+ i_shortest = i;
4258+ if(match_lens.at(0).second != 0)
4259+ atom->regex_intern->save_subregex_list(saved_subregex);
4260+ }
4261+ i = choose_another_branch(match_lens);
4262+ if(i >= 0)
4263+ continue;//try another branch
4264+ else
4265+ break;
4266+ }
4267+ else
4268+ {
4269+ //try further
4270+ if(retmatch)
4271+ {
4272+ i++;
4273+ if((max < 0) || (i<=max))
4274+ continue;
4275+ i--;
4276+ }
4277+ }
4278+ }
4279+
4280+ if(i==0)
4281+ {
4282+ break;
4283 }
4284 else
4285- break;
4286- i++;
4287+ {
4288+ i = choose_another_branch(match_lens);
4289+ if(i >= 0)
4290+ continue;//try another branch
4291+ else
4292+ break;
4293+ }
4294 }
4295
4296+ if(shortest_len >= 0)
4297+ {
4298+ if(strict_max && (max>=0) && (i_shortest > max))
4299+ return false;
4300+ *matched_len = shortest_len + otherpieces_shortest;
4301+ if(saved_subregex.size())
4302+ atom->regex_intern->load_subregex_list(saved_subregex);
4303+ return true;
4304+ }
4305 return false;
4306 }
4307
4308 //match as much as possible
4309-bool CRegexAscii_branch::match_piece_iter_normal(
4310- std::list<CRegexAscii_piece*>::iterator piece_it,
4311+bool CRegexXQuery_piece::match_piece_iter_normal(
4312+ std::list<RegexAscii_pieceinfo>::iterator piece_it,
4313+ std::list<RegexAscii_pieceinfo>::iterator end_it,
4314 const char *source, int *matched_len)
4315 {
4316 *matched_len = 0;
4317
4318 int min, max;
4319 bool strict_max;
4320- std::vector<int> match_lens;
4321- (*piece_it)->get_quantifier(&min, &max, &strict_max);
4322- int timeslen;
4323- if(strict_max && (max >= 0))
4324+ std::vector<std::pair<int,int> > match_lens;
4325+ (*piece_it).piece->get_quantifier(&min, &max, &strict_max);
4326+ int timeslen = 0;
4327+ std::vector<std::pair<const char*, int> > saved_subregex;
4328+
4329+ if(is_regex_atom())
4330 {
4331- //check if the piece doesn't exceed the max match
4332- //if((*piece_it)->match_piece_times(source, &timeslen, max+1, &match_lens))
4333- // return false;///too many matches
4334- (*piece_it)->match_piece_times(source, &timeslen, max, &match_lens);
4335+ //recursive
4336+ bool retmatch;
4337+ atom->regex_intern->save_subregex_list(saved_subregex);
4338+ if(((max == -1) || ((*piece_it).nr_matches < max)) && //try further with this piece
4339+ (((*piece_it).nr_matches < min) || ((*piece_it).nr_matches == 0) || ((*piece_it).piece->regex_atom->matched_len)))//if matched_len is zero, avoid infinite loop
4340+ {
4341+ int start_from_branch = 0;
4342+ int longest_len = -1;
4343+ bool branch_saved = false;
4344+ //try all branches to get the longest len
4345+ (*piece_it).nr_matches++;
4346+ while(atom->match(source, &start_from_branch, matched_len, piece_it, end_it))
4347+ {
4348+ if((longest_len < *matched_len))
4349+ {
4350+ longest_len = *matched_len;
4351+ if(start_from_branch && (atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4352+ {
4353+ atom->regex_intern->save_subregex_list(saved_subregex);
4354+ branch_saved = true;
4355+ }
4356+ }
4357+ if(!start_from_branch || !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4358+ break;
4359+ }
4360+ if(longest_len != -1)
4361+ {
4362+ *matched_len = longest_len;
4363+ if(branch_saved)
4364+ atom->regex_intern->load_subregex_list(saved_subregex);
4365+ return true;
4366+ }
4367+ else
4368+ {
4369+ atom->regex_intern->load_subregex_list(saved_subregex);
4370+ (*piece_it).nr_matches--;
4371+ }
4372+ }
4373+ if((*piece_it).nr_matches >= min)
4374+ {
4375+ //go to next piece
4376+ std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
4377+ next_it++;
4378+ if(next_it == end_it)
4379+ return true;
4380+ retmatch = (*next_it).piece->match_piece(next_it, end_it, source, matched_len);
4381+ if(!retmatch)
4382+ atom->regex_intern->load_subregex_list(saved_subregex);
4383+ return retmatch;
4384+ }
4385+ else
4386+ {
4387+ // regex_atom->restore_match();
4388+ atom->regex_intern->load_subregex_list(saved_subregex);
4389+ return false;
4390+ }
4391 }
4392- else if(!strict_max && (max >= 0))
4393- (*piece_it)->match_piece_times(source, &timeslen, max, &match_lens);
4394- else
4395- (*piece_it)->match_piece_times(source, &timeslen, -1, &match_lens);
4396
4397- int i;
4398- std::list<CRegexAscii_piece*>::iterator next_it = piece_it;
4399+ int longest_len = -1;
4400+ int otherpieces_longest = -1;
4401+ int i_longest = -1;
4402+ int i = max;
4403+ std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
4404 next_it++;
4405- if(next_it == piece_list.end())
4406+
4407+ bool retmatch;
4408+ while(1)
4409 {
4410- if((int)match_lens.size() > min)
4411- {
4412- *matched_len = timeslen;
4413- return true;
4414+ retmatch = match_piece_times(source, &timeslen, i, &match_lens);
4415+ i=match_lens.size()-1;//number of matches
4416+ if((i>=min))
4417+ {
4418+ if(timeslen < longest_len)
4419+ {//this branch is no use
4420+ i = choose_another_branch(match_lens);
4421+ if(i >= 0)
4422+ {
4423+ i = max;
4424+ continue;//try another branch
4425+ }
4426+ else
4427+ break;
4428+ }
4429+ //int piecelen = 0;
4430+ int otherpieces = 0;
4431+ if((next_it == end_it) ||
4432+ (*next_it).piece->match_piece(next_it, end_it, source+timeslen, &otherpieces)
4433+ )
4434+ {
4435+ if(timeslen > longest_len)
4436+ {
4437+ longest_len = timeslen;
4438+ otherpieces_longest = otherpieces;
4439+ i_longest = i;
4440+ if(!(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
4441+ {
4442+ *matched_len = longest_len + otherpieces_longest;
4443+ return true;
4444+ }
4445+ else
4446+ {
4447+ if(match_lens.at(0).second)
4448+ atom->regex_intern->save_subregex_list(saved_subregex);
4449+ }
4450+ }
4451+ }
4452+ else
4453+ {
4454+ if(!match_lens.at(0).second)
4455+ {
4456+ match_lens.resize(match_lens.size()-1);
4457+ i--;
4458+ if(i >= 0)
4459+ continue;//try smaller
4460+ else
4461+ break;
4462+ }
4463+ else
4464+ {
4465+ i = choose_another_branch(match_lens);
4466+ if(i >= 0)
4467+ continue;//try another branch
4468+ else
4469+ break;
4470+ }
4471+ }
4472+ }
4473+ //now try another branch
4474+ i = choose_another_branch(match_lens);
4475+ if(i >= 0)
4476+ {
4477+ i = max;
4478+ continue;//try another branch
4479 }
4480 else
4481- return false;
4482- }
4483- for(i=match_lens.size()-1; i>=min; i--)
4484+ break;
4485+ }//end while
4486+
4487+ if(longest_len >= 0)
4488 {
4489- int piecelen = 0;
4490- int otherpieces = 0;
4491- if(((*next_it)->get_is_reluctant() && match_piece_iter_reluctant(next_it, source+match_lens[i]+piecelen, &otherpieces)) ||
4492- (!(*next_it)->get_is_reluctant() && match_piece_iter_normal(next_it, source+match_lens[i]+piecelen, &otherpieces)))
4493- {
4494- *matched_len = match_lens[i] + piecelen + otherpieces;
4495- return true;
4496- }
4497+ *matched_len = longest_len + otherpieces_longest;
4498+ if(saved_subregex.size())
4499+ atom->regex_intern->load_subregex_list(saved_subregex);
4500+ return true;
4501 }
4502
4503 return false;
4504 }
4505
4506-bool CRegexAscii_piece::match_piece_times(const char *source,
4507+bool CRegexXQuery_piece::match_piece_times(const char *source,
4508 int *piecelen,
4509 int times,
4510- std::vector<int> *match_lens)
4511+ std::vector<std::pair<int,int> > *match_lens)
4512 {
4513- *piecelen = 0;
4514- for(int i=0;(times < 0) || (i<times);i++)
4515- {
4516+ int i=0;
4517+ if(match_lens && match_lens->size())
4518+ {
4519+ i = match_lens->size()-1;
4520+ }
4521+ if(match_lens && match_lens->size())
4522+ *piecelen = match_lens->at(match_lens->size()-1).first;
4523+ else
4524+ *piecelen = 0;
4525+ if((times >= 0) && (i>=times))
4526+ return true;
4527+ for(;(times < 0) || (i<times);i++)
4528+ {
4529+ int atomlen;
4530+ int start_from_branch = 0;
4531+ if(match_lens && (i<(int)match_lens->size()))
4532+ start_from_branch = match_lens->at(i).second;
4533+ bool first_branch = (start_from_branch == 0);
4534+ if(!atom->match(source+*piecelen, &start_from_branch, &atomlen, empty_pieces.begin(), empty_pieces.end()))
4535+ {
4536+ if(match_lens)
4537+ {
4538+ if(i >= (int)match_lens->size())
4539+ match_lens->push_back(std::pair<int,int>(*piecelen, 0));
4540+ else
4541+ (*match_lens)[i] = std::pair<int,int>(*piecelen, 0);
4542+ }
4543+ return false;
4544+ }
4545 if(match_lens)
4546- match_lens->push_back(*piecelen);
4547- int atomlen;
4548- if(!atom->match(source+*piecelen, &atomlen))
4549- return false;
4550+ {
4551+ if(i >= (int)match_lens->size())
4552+ match_lens->push_back(std::pair<int,int>(*piecelen, start_from_branch));
4553+ else
4554+ (*match_lens)[i] = std::pair<int,int>(*piecelen, start_from_branch);
4555+ }
4556 *piecelen += atomlen;
4557 if(!atomlen && !source[*piecelen])
4558 {
4559- atom->regex_intern->reachedEnd = true;
4560+ // atom->regex_intern->set_reachedEnd(source);
4561+ break;
4562+ }
4563+ if(first_branch && (atomlen == 0))//avoid infinite loop
4564+ {
4565 break;
4566 }
4567 }
4568 if(match_lens)
4569- match_lens->push_back(*piecelen);
4570+ {
4571+ // if(i >= match_lens->size())
4572+ match_lens->push_back(std::pair<int,int>(*piecelen, 0));
4573+ // else
4574+ // (*match_lens)[i] = std::pair<int,int>(*piecelen, 0);
4575+ }
4576
4577 return true;
4578 }
4579
4580+bool CRegexXQuery_multicharP::match_internal(const char *source, int *start_from_branch, int *matched_len)
4581+{
4582+ if(!source[0])
4583+ {
4584+ regex_intern->set_reachedEnd(source);
4585+ return false;
4586+ }
4587+ bool found = false;
4588+ const char *temp_source = source;
4589+ unicode::code_point utf8c = utf8::next_char(temp_source);
4590+ switch(multichar_type)
4591+ {
4592+ case unicode::UNICODE_Ll + 50:
4593+ if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Ll) ||
4594+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lm) ||
4595+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lo) ||
4596+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lt) ||
4597+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lu))
4598+ {
4599+ if(!is_reverse)
4600+ found = true;
4601+ }
4602+ else
4603+ {
4604+ if(is_reverse)
4605+ found = true;
4606+ }
4607+ break;
4608+ case unicode::UNICODE_Mc + 50:
4609+ if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Mn) ||
4610+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Mc) ||
4611+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Me))
4612+ {
4613+ if(!is_reverse)
4614+ found = true;
4615+ }
4616+ else
4617+ {
4618+ if(is_reverse)
4619+ found = true;
4620+ }
4621+ break;
4622+ case unicode::UNICODE_Nd + 50:
4623+ if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Nd) ||
4624+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Nl) ||
4625+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_No))
4626+ {
4627+ if(!is_reverse)
4628+ found = true;
4629+ }
4630+ else
4631+ {
4632+ if(is_reverse)
4633+ found = true;
4634+ }
4635+ break;
4636+ case unicode::UNICODE_Pc + 50:
4637+ if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pc) ||
4638+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pd) ||
4639+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Ps) ||
4640+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pe) ||
4641+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pi) ||
4642+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pf) ||
4643+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Po))
4644+ {
4645+ if(!is_reverse)
4646+ found = true;
4647+ }
4648+ else
4649+ {
4650+ if(is_reverse)
4651+ found = true;
4652+ }
4653+ break;
4654+ case unicode::UNICODE_Zl + 50:
4655+ if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zs) ||
4656+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zl) ||
4657+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zp))
4658+ {
4659+ if(!is_reverse)
4660+ found = true;
4661+ }
4662+ else
4663+ {
4664+ if(is_reverse)
4665+ found = true;
4666+ }
4667+ break;
4668+ case unicode::UNICODE_Sc + 50:
4669+ if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Sm) ||
4670+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Sc) ||
4671+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Sk) ||
4672+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_So))
4673+ {
4674+ if(!is_reverse)
4675+ found = true;
4676+ }
4677+ else
4678+ {
4679+ if(is_reverse)
4680+ found = true;
4681+ }
4682+ break;
4683+ case unicode::UNICODE_Cc + 50:
4684+ if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cc) ||
4685+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cf) ||
4686+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Co))//ignore unicode::UNICODE_Cn
4687+ {
4688+ if(!is_reverse)
4689+ found = true;
4690+ }
4691+ else
4692+ {
4693+ if(is_reverse)
4694+ found = true;
4695+ }
4696+ break;
4697+ default:
4698+ if(unicode::check_codepoint_category(utf8c, (unicode::category)multichar_type))
4699+ {
4700+ if(!is_reverse)
4701+ found = true;
4702+ }
4703+ else
4704+ {
4705+ if(is_reverse)
4706+ found = true;
4707+ }
4708+ break;
4709+ }
4710+
4711+ if(found)
4712+ {
4713+ *matched_len = temp_source - source;
4714+ }
4715+ return found;
4716+}
4717+
4718+bool CRegexXQuery_multicharIs::match_internal(const char *source, int *start_from_branch, int *matched_len)
4719+{
4720+ if(!source[0])
4721+ {
4722+ regex_intern->set_reachedEnd(source);
4723+ return false;
4724+ }
4725+ bool found = false;
4726+ const char *temp_source = source;
4727+ unicode::code_point utf8c = utf8::next_char(temp_source);
4728+ const unicode::code_point *cp = block_escape[block_index].cp;
4729+ if((utf8c >= cp[0]) && (utf8c <= cp[1]))
4730+ {
4731+ if(!is_reverse)
4732+ found = true;
4733+ }
4734+ else if(block_escape[block_index].ext_cp)
4735+ {
4736+ cp = block_escape[block_index].ext_cp;
4737+ while(*cp)
4738+ {
4739+ if((utf8c >= cp[0]) && (utf8c <= cp[1]))
4740+ break;
4741+ cp += 2;
4742+ }
4743+ if(*cp)
4744+ {
4745+ if(!is_reverse)
4746+ found = true;
4747+ }
4748+ else
4749+ {
4750+ if(is_reverse)
4751+ found = true;
4752+ }
4753+ }
4754+ else
4755+ {
4756+ if(is_reverse)
4757+ found = true;
4758+ }
4759+ if(found)
4760+ {
4761+ *matched_len = temp_source - source;
4762+ }
4763+ return found;
4764+}
4765+
4766+bool CRegexXQuery_multicharOther::match_internal(const char *source, int *start_from_branch, int *matched_len)
4767+{
4768+ if(!source[0])
4769+ {
4770+ regex_intern->set_reachedEnd(source);
4771+ return false;
4772+ }
4773+ bool found = false;
4774+ bool value_true = true;
4775+ const char *temp_source = source;
4776+ unicode::code_point utf8c = utf8::next_char(temp_source);
4777+ switch(multichar_type)
4778+ {
4779+ case 'S':value_true = false;//[^\s]
4780+ case 's'://[#x20\t\n\r]
4781+ switch(utf8c)
4782+ {
4783+ case '\t':
4784+ case '\r':
4785+ case '\n':
4786+ case ' ':
4787+ found = true;
4788+ default:
4789+ break;
4790+ }
4791+ break;
4792+ case 'I':value_true = false;//[^\i]
4793+ case 'i'://the set of initial name characters, those matched by Letter | '_' | ':'
4794+ if((utf8c == '_') ||
4795+ (utf8c == ':') ||
4796+ XQCharType::isLetter(utf8c))
4797+ {
4798+ found = true;
4799+ }
4800+ break;
4801+ case 'C':value_true = false;//[^\c]
4802+ case 'c'://the set of name characters, those matched by NameChar
4803+ if(XQCharType::isNameChar(utf8c))
4804+ {
4805+ found = true;
4806+ }
4807+ break;
4808+ case 'D':value_true = false;//[^\d]
4809+ case 'd':
4810+ if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Nd))
4811+ found = true;
4812+ break;
4813+ case 'W':value_true = false;//[^\w]
4814+ case 'w':
4815+ found = !(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pc) ||
4816+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pd) ||
4817+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Ps) ||
4818+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pe) ||
4819+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pi) ||
4820+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pf) ||
4821+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Po) ||
4822+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zs) ||
4823+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zl) ||
4824+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zp) ||
4825+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cc) ||
4826+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cf) ||
4827+ unicode::check_codepoint_category(utf8c, unicode::UNICODE_Co));//ignore unicode::UNICODE_Cn
4828+ break;
4829+ default:
4830+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(source, ZED(REGEX_UNIMPLEMENTED)) );
4831+ }
4832+ if((found && value_true) || (!found && !value_true))
4833+ {
4834+ *matched_len = temp_source - source;
4835+ return true;
4836+ }
4837+ else
4838+ {
4839+ return false;
4840+ }
4841+}
4842+
4843+bool CRegexXQuery_char_ascii::match_internal(const char *source, int *start_from_branch, int *matched_len)
4844+{
4845+ if(!source[0])
4846+ {
4847+ regex_intern->set_reachedEnd(source);
4848+ return false;
4849+ }
4850+ if(source[0] == c)
4851+ {
4852+ *matched_len = 1;
4853+ return true;
4854+ }
4855+ else
4856+ return false;
4857+}
4858+
4859+bool CRegexXQuery_char_ascii_i::match_internal(const char *source, int *start_from_branch, int *matched_len)
4860+{
4861+ if(!source[0])
4862+ {
4863+ regex_intern->set_reachedEnd(source);
4864+ return false;
4865+ }
4866+ char sup = toupper(source[0]);
4867+ if(sup == c)
4868+ {
4869+ *matched_len = 1;
4870+ return true;
4871+ }
4872+ else
4873+ return false;
4874+}
4875+
4876+bool CRegexXQuery_char_range_ascii::match_internal(const char *source, int *start_from_branch, int *matched_len)
4877+{
4878+ if(!source[0])
4879+ {
4880+ regex_intern->set_reachedEnd(source);
4881+ return false;
4882+ }
4883+ if((source[0] >= c1) && (source[0] <= c2))
4884+ {
4885+ *matched_len = 1;
4886+ return true;
4887+ }
4888+ else
4889+ return false;
4890+}
4891+
4892+bool CRegexXQuery_char_range_ascii_i::match_internal(const char *source, int *start_from_branch, int *matched_len)
4893+{
4894+ if(!source[0])
4895+ {
4896+ regex_intern->set_reachedEnd(source);
4897+ return false;
4898+ }
4899+ char sup = toupper(source[0]);
4900+ if((sup >= c1) && (sup <= c2))
4901+ {
4902+ *matched_len = 1;
4903+ return true;
4904+ }
4905+ else
4906+ return false;
4907+}
4908+
4909+bool CRegexXQuery_char_unicode::match_internal(const char *source, int *start_from_branch, int *matched_len)
4910+{
4911+ if(!source[0])
4912+ {
4913+ regex_intern->set_reachedEnd(source);
4914+ return false;
4915+ }
4916+ if(!memcmp(source, c, len))
4917+ {
4918+ *matched_len = len;
4919+ return true;
4920+ }
4921+ else
4922+ return false;
4923+}
4924+
4925+bool CRegexXQuery_char_unicode_cp::match_internal(const char *source, int *start_from_branch, int *matched_len)
4926+{
4927+ if(!source[0])
4928+ {
4929+ regex_intern->set_reachedEnd(source);
4930+ return false;
4931+ }
4932+ const char *temp_source = source;
4933+ unicode::code_point utf8c = utf8::next_char(temp_source);
4934+ if(utf8c == c)
4935+ {
4936+ *matched_len = temp_source - source;
4937+ return true;
4938+ }
4939+ else
4940+ return false;
4941+}
4942+
4943+bool CRegexXQuery_char_unicode_i::match_internal(const char *source, int *start_from_branch, int *matched_len)
4944+{
4945+ if(!source[0])
4946+ {
4947+ regex_intern->set_reachedEnd(source);
4948+ return false;
4949+ }
4950+ const char *temp_source = source;
4951+ unicode::code_point sup = unicode::to_upper(utf8::next_char(temp_source));
4952+ if(sup == c)
4953+ {
4954+ *matched_len = temp_source - source;
4955+ return true;
4956+ }
4957+ else
4958+ return false;
4959+}
4960+
4961+bool CRegexXQuery_char_range_unicode::match_internal(const char *source, int *start_from_branch, int *matched_len)
4962+{
4963+ if(!source[0])
4964+ {
4965+ regex_intern->set_reachedEnd(source);
4966+ return false;
4967+ }
4968+ const char *temp_source = source;
4969+ unicode::code_point utf8c = utf8::next_char(temp_source);
4970+ if((utf8c >= c1) && (utf8c <= c2))
4971+ {
4972+ *matched_len = temp_source - source;
4973+ return true;
4974+ }
4975+ else
4976+ return false;
4977+}
4978+
4979+bool CRegexXQuery_char_range_unicode_i::match_internal(const char *source, int *start_from_branch, int *matched_len)
4980+{
4981+ if(!source[0])
4982+ {
4983+ regex_intern->set_reachedEnd(source);
4984+ return false;
4985+ }
4986+ const char *temp_source = source;
4987+ unicode::code_point sup = unicode::to_upper(utf8::next_char(temp_source));
4988+ if((sup >= c1) && (sup <= c2))
4989+ {
4990+ *matched_len = temp_source - source;
4991+ return true;
4992+ }
4993+ else
4994+ return false;
4995+}
4996+
4997+bool CRegexXQuery_endline::match_internal(const char *source, int *start_from_branch, int *matched_len)
4998+{
4999+ *matched_len = 0;
5000+ if(!source[0])
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches