Merge lp:~zorba-coders/zorba/no_unicode into lp:zorba
- no_unicode
- Merge into trunk
Status: | Superseded |
---|---|
Proposed branch: | lp:~zorba-coders/zorba/no_unicode |
Merge into: | lp:zorba |
Diff against target: |
9029 lines (+3908/-1422) 270 files modified
CMakeConfiguration.txt (+5/-5) CMakeLists.txt (+6/-2) ChangeLog (+7/-0) KNOWN_ISSUES.txt (+1/-1) doc/cxx/examples/context.cpp (+4/-0) include/zorba/config.h.cmake (+3/-1) include/zorba/static_context.h (+4/-0) include/zorba/util/time.h (+1/-1) src/CMakeLists.txt (+4/-0) src/api/serialization/serializer.cpp (+36/-33) src/api/serialization/serializer.h (+2/-4) src/diagnostics/diagnostic_en.xml (+116/-27) src/diagnostics/pregenerated/dict_en.cpp (+98/-20) src/precompiled/stdafx.h (+74/-356) src/runtime/full_text/CMakeLists.txt (+3/-3) src/runtime/full_text/default_tokenizer.cpp (+4/-4) src/runtime/full_text/latin_tokenizer.cpp (+3/-2) src/runtime/full_text/latin_tokenizer.h (+9/-8) src/runtime/numerics/format_integer_impl.cpp (+1/-1) src/runtime/numerics/numerics_impl.cpp (+1/-1) src/runtime/strings/strings_impl.cpp (+58/-20) src/store/api/store.h (+1/-1) src/store/naive/simple_store.h (+7/-3) src/store/naive/store.cpp (+1/-1) src/store/naive/store.h (+12/-11) src/system/globalenv.cpp (+7/-7) src/unit_tests/CMakeLists.txt (+2/-2) src/unit_tests/string.cpp (+8/-0) src/unit_tests/unit_test_list.h (+2/-2) src/unit_tests/unit_tests.cpp (+2/-2) src/util/CMakeLists.txt (+4/-4) src/util/icu_streambuf.h (+1/-0) src/util/passthru_streambuf.cpp (+2/-2) src/util/passthru_streambuf.h (+10/-2) src/util/regex.cpp (+96/-82) src/util/regex.h (+22/-34) src/util/regex_xquery.cpp (+1860/-489) src/util/regex_xquery.h (+359/-123) src/util/transcode_streambuf.h (+5/-5) src/util/unicode_categories.cpp (+3/-3) src/util/unicode_categories.h (+44/-37) src/util/unicode_util.cpp (+20/-2) src/util/unicode_util.h (+47/-15) src/util/utf8_util.cpp (+6/-6) src/util/utf8_util.h (+29/-13) src/util/utf8_util.tcc (+10/-2) src/zorbatypes/collation_manager.cpp (+17/-17) src/zorbatypes/collation_manager.h (+3/-3) src/zorbatypes/libicu.h (+0/-32) src/zorbatypes/transcoder.cpp (+8/-4) src/zorbatypes/transcoder.h (+9/-9) src/zorbautils/hashmap_itemh.h (+4/-0) src/zorbautils/string_util.cpp (+19/-18) src/zorbautils/string_util.h (+15/-1) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a10.xml.res (+242/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a11.xml.res (+6/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a3.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a5.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a6.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a7.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a8.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a9.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m10.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m11.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m12.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m13.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m14.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m15.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m16.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m17.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m18.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m19.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m20.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m21.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m22.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m23.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m24.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m25.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m26.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m27.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m28.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m29.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m3.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m30.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m31.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m32.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m33.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m34.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m35.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m36.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m37.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m38.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m39.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m4.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m40.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m41.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m42.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m43.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m44.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m45.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m46.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m47.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m48.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m49.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m5.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m50.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m51.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m52.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m53.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m6.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m7.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m8.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m9.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_prime1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r10.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r11.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r12.xml.res (+5/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r3.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r4.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r5.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r6.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r9.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t4.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t5.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/testdriver/bom_bug.xml.res (+1/-0) test/rbkt/Queries/CMakeLists.txt (+16/-1) test/rbkt/Queries/zorba/string/Regex/regex_a1.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a10.xq (+11/-0) test/rbkt/Queries/zorba/string/Regex/regex_a11.xq (+9/-0) test/rbkt/Queries/zorba/string/Regex/regex_a2.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a3.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a5.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a6.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a7.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a8.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_a9.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err1.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err1.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err10.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err10.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err11.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err11.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err12.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err12.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err13.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err13.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err14.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err14.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err15.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err15.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err16.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err16.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err17.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err17.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err18.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err18.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err19.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err19.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err2.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err2.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err20.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err20.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err21.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err21.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err22.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err22.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err23.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err23.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err24.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err24.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_err25.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err25.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err3.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err3.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err4.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err4.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err5.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err5.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err7.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err7.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err8.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err8.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err9.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_err9.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m1.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m10.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m11.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m12.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m13.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m14.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m15.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m16.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m17.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m18.xq (+3/-0) test/rbkt/Queries/zorba/string/Regex/regex_m19.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m2.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m20.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m21.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m22.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m23.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m24.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m25.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m26.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m27.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m28.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m29.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m3.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m30.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m31.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m32.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m33.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m34.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m35.xq (+4/-0) test/rbkt/Queries/zorba/string/Regex/regex_m36.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m37.xq (+4/-0) test/rbkt/Queries/zorba/string/Regex/regex_m38.xq (+4/-0) test/rbkt/Queries/zorba/string/Regex/regex_m39.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m4.xq (+6/-0) test/rbkt/Queries/zorba/string/Regex/regex_m40.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m41.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m42.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m43.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m44.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m45.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m46.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m47.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m48.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m49.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m5.xq (+6/-0) test/rbkt/Queries/zorba/string/Regex/regex_m50.xq (+2/-0) test/rbkt/Queries/zorba/string/Regex/regex_m51.xq (+2/-0) test/rbkt/Queries/zorba/string/Regex/regex_m52.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m53.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_m6.xq (+6/-0) test/rbkt/Queries/zorba/string/Regex/regex_m7.xq (+6/-0) test/rbkt/Queries/zorba/string/Regex/regex_m8.xq (+7/-0) test/rbkt/Queries/zorba/string/Regex/regex_m9.xq (+7/-0) test/rbkt/Queries/zorba/string/Regex/regex_prime1.xq (+17/-0) test/rbkt/Queries/zorba/string/Regex/regex_r1.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r10.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r11.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r12.xq (+7/-0) test/rbkt/Queries/zorba/string/Regex/regex_r2.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r3.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r4.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r5.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r6.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r7_err.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r7_err.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r8_err.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r8_err.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_r9.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_t1.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_t2.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_t3_err.spec (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_t3_err.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/regex_t4.xq (+2/-0) test/rbkt/Queries/zorba/string/Regex/regex_t5.xq (+1/-0) test/rbkt/Queries/zorba/string/Regex/zorba.html (+242/-0) test/rbkt/Queries/zorba/string/Regex/zorba2.html (+5/-0) test/rbkt/Queries/zorba/testdriver/bom_bug.xq (+1/-0) test/unit/static_context.cpp (+2/-0) test/update/CMakeLists.txt (+9/-0) |
To merge this branch: | bzr merge lp:~zorba-coders/zorba/no_unicode |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Matthias Brantner | Pending | ||
Markos Zaharioudakis | Pending | ||
Review via email: mp+101052@code.launchpad.net |
This proposal supersedes a proposal from 2012-01-18.
This proposal has been superseded by a proposal from 2012-04-07.
Commit message
"No Unicode" is now "No ICU."
Description of the change
"No Unicode" is now "No ICU."
Matthias Brantner (matthias-brantner) wrote : Posted in a previous version of this proposal | # |
Matthias Brantner (matthias-brantner) : Posted in a previous version of this proposal | # |
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
There are additional revisions which have not been approved in review. Please seek review and approval of these new revisions.
Matthias Brantner (matthias-brantner) wrote : Posted in a previous version of this proposal | # |
The test suite doesn't run clean on my system (Linux) without ICU. This prevents us from adding the built to the remote queue. For example, the following tests fail without ICU (some of them also seem to fail with ICU):
1294 - test/rbkt/
1548 - test/rbkt/
1560 - test/rbkt/
1574 - test/rbkt/
1581 - test/rbkt/
1587 - test/rbkt/
1600 - test/rbkt/
1605 - test/rbkt/
1612 - test/rbkt/
1635 - test/rbkt/
1637 - test/rbkt/
1643 - test/rbkt/
1789 - test/rbkt/
2345 - test/unit/
2534 - test/update/
2544 - doc/cxx/
Please make sure the test suite runs clean.
Paul J. Lucas (paul-lucas) wrote : Posted in a previous version of this proposal | # |
Try it now.
Daniel Turcanu (danielturcanu) wrote : Posted in a previous version of this proposal | # |
Before commiting this branch, the branch lp:~danielturcanu/zorba/my_conv_module should be merged.
Chris Hillery (ceejatec) wrote : Posted in a previous version of this proposal | # |
FWIW, I've skimmed the change for CMake-related changes, and they all look fine (mostly quite trivial).
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Attempt to merge into lp:zorba failed due to conflicts:
text conflict in ChangeLog
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
The attempt to merge lp:~zorba-coders/zorba/no_unicode into lp:zorba failed. Below is the output from the failed tests.
CMake Error at /home/ceej/
Validation queue job no_unicode-
final status was:
6 tests did not succeed - changes not commited.
Error in read script: /home/ceej/
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
The attempt to merge lp:~zorba-coders/zorba/no_unicode into lp:zorba failed. Below is the output from the failed tests.
CMake Error at /home/ceej/
Validation queue job no_unicode-
final status was:
6 tests did not succeed - changes not commited.
Error in read script: /home/ceej/
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : | # |
The attempt to merge lp:~zorba-coders/zorba/no_unicode into lp:zorba failed. Below is the output from the failed tests.
CMake Error at /home/ceej/
Validation queue job no_unicode-
final status was:
6 tests did not succeed - changes not commited.
Error in read script: /home/ceej/
- 10534. By Paul J. Lucas
-
No longer doing some stuff when q_flag is set.
- 10535. By Paul J. Lucas
-
Tweaked one error message.
- 10536. By Paul J. Lucas
-
Merge from trunk.
- 10537. By Rodolfo Ochoa
-
Merge from trunk
- 10538. By Rodolfo Ochoa
-
Strange error on include guards
- 10539. By Rodolfo Ochoa
-
merge from trunk
- 10540. By Rodolfo Ochoa
-
fix for regex errors in RQ
Unmerged revisions
Preview Diff
1 | === modified file 'CMakeConfiguration.txt' | |||
2 | --- CMakeConfiguration.txt 2012-03-28 05:19:57 +0000 | |||
3 | +++ CMakeConfiguration.txt 2012-04-07 00:45:26 +0000 | |||
4 | @@ -135,14 +135,14 @@ | |||
5 | 135 | SET (ZORBA_DEBUG_STRING ${ZORBA_DEBUG_STRING} CACHE BOOL "debug strings") | 135 | SET (ZORBA_DEBUG_STRING ${ZORBA_DEBUG_STRING} CACHE BOOL "debug strings") |
6 | 136 | MESSAGE (STATUS "ZORBA_DEBUG_STRING: " ${ZORBA_DEBUG_STRING}) | 136 | MESSAGE (STATUS "ZORBA_DEBUG_STRING: " ${ZORBA_DEBUG_STRING}) |
7 | 137 | 137 | ||
10 | 138 | SET(ZORBA_NO_UNICODE OFF CACHE BOOL "disable ICU") | 138 | SET(ZORBA_NO_ICU OFF CACHE BOOL "disable ICU") |
11 | 139 | MESSAGE(STATUS "ZORBA_NO_UNICODE: " ${ZORBA_NO_UNICODE}) | 139 | MESSAGE(STATUS "ZORBA_NO_ICU: " ${ZORBA_NO_ICU}) |
12 | 140 | 140 | ||
14 | 141 | IF (ZORBA_NO_UNICODE) | 141 | IF (ZORBA_NO_ICU) |
15 | 142 | SET (no_full_text ON) | 142 | SET (no_full_text ON) |
17 | 143 | ELSE (ZORBA_NO_UNICODE) | 143 | ELSE (ZORBA_NO_ICU) |
18 | 144 | SET (no_full_text OFF) | 144 | SET (no_full_text OFF) |
20 | 145 | ENDIF (ZORBA_NO_UNICODE) | 145 | ENDIF (ZORBA_NO_ICU) |
21 | 146 | SET (ZORBA_NO_FULL_TEXT ${no_full_text} CACHE BOOL "disable XQuery Full-Text support") | 146 | SET (ZORBA_NO_FULL_TEXT ${no_full_text} CACHE BOOL "disable XQuery Full-Text support") |
22 | 147 | MESSAGE(STATUS "ZORBA_NO_FULL_TEXT: " ${ZORBA_NO_FULL_TEXT}) | 147 | MESSAGE(STATUS "ZORBA_NO_FULL_TEXT: " ${ZORBA_NO_FULL_TEXT}) |
23 | 148 | 148 | ||
24 | 149 | 149 | ||
25 | === modified file 'CMakeLists.txt' | |||
26 | --- CMakeLists.txt 2012-03-28 05:19:57 +0000 | |||
27 | +++ CMakeLists.txt 2012-04-07 00:45:26 +0000 | |||
28 | @@ -123,10 +123,14 @@ | |||
29 | 123 | CHECK_TYPE_SIZE("int64_t" ZORBA_HAVE_INT64_T) | 123 | CHECK_TYPE_SIZE("int64_t" ZORBA_HAVE_INT64_T) |
30 | 124 | 124 | ||
31 | 125 | CHECK_CXX_SOURCE_COMPILES ("#include <type_traits>\nint main() { std::enable_if<true,int> x; }" ZORBA_CXX_ENABLE_IF) | 125 | CHECK_CXX_SOURCE_COMPILES ("#include <type_traits>\nint main() { std::enable_if<true,int> x; }" ZORBA_CXX_ENABLE_IF) |
34 | 126 | CHECK_CXX_SOURCE_COMPILES ("int main() { int *p = nullptr; }" ZORBA_CXX_NULLPTR) | 126 | SET(CMAKE_EXTRA_INCLUDE_FILES wchar.h) |
35 | 127 | CHECK_CXX_SOURCE_COMPILES ("int main() { static_assert(1,\"\"); }" ZORBA_CXX_STATIC_ASSERT) | 127 | CHECK_TYPE_SIZE("wchar_t" ZORBA_SIZEOF_WCHAR_T) |
36 | 128 | SET(CMAKE_EXTRA_INCLUDE_FILES) | ||
37 | 128 | CHECK_CXX_SOURCE_COMPILES ("#include <memory>\nint main() { std::unique_ptr<int> p; }" ZORBA_CXX_UNIQUE_PTR) | 129 | CHECK_CXX_SOURCE_COMPILES ("#include <memory>\nint main() { std::unique_ptr<int> p; }" ZORBA_CXX_UNIQUE_PTR) |
38 | 129 | 130 | ||
39 | 131 | CHECK_CXX_SOURCE_COMPILES("int main() { int *p = nullptr; }" ZORBA_CXX_NULLPTR) | ||
40 | 132 | CHECK_CXX_SOURCE_COMPILES("int main() { static_assert(1,\"\"); }" ZORBA_CXX_STATIC_ASSERT) | ||
41 | 133 | |||
42 | 130 | ################################################################################ | 134 | ################################################################################ |
43 | 131 | # Various cmake macros | 135 | # Various cmake macros |
44 | 132 | 136 | ||
45 | 133 | 137 | ||
46 | === modified file 'ChangeLog' | |||
47 | --- ChangeLog 2012-04-04 15:59:01 +0000 | |||
48 | +++ ChangeLog 2012-04-07 00:45:26 +0000 | |||
49 | @@ -4,6 +4,7 @@ | |||
50 | 4 | 4 | ||
51 | 5 | New Features: | 5 | New Features: |
52 | 6 | * Extended API for Python, Java, PHP and Ruby. | 6 | * Extended API for Python, Java, PHP and Ruby. |
53 | 7 | * Added support for NO_ICU (to not use ICU for unicode processing) | ||
54 | 7 | 8 | ||
55 | 8 | Bug Fixes/Other Changes: | 9 | Bug Fixes/Other Changes: |
56 | 9 | * Fixed bug #967864 (var substitution did not update theFreeVars property) | 10 | * Fixed bug #967864 (var substitution did not update theFreeVars property) |
57 | @@ -148,7 +149,9 @@ | |||
58 | 148 | * Fixed bug when parsing a document with a base-uri attribute. | 149 | * Fixed bug when parsing a document with a base-uri attribute. |
59 | 149 | * Fixed bug #863320 (Sentence is incorrectly incremented when token characters end without sentence terminator) | 150 | * Fixed bug #863320 (Sentence is incorrectly incremented when token characters end without sentence terminator) |
60 | 150 | * Fixed bug #863730 (static delete-node* functions don't raise ZDDY0012) | 151 | * Fixed bug #863730 (static delete-node* functions don't raise ZDDY0012) |
61 | 152 | * Implemented the probe-index-range-value for general indexes | ||
62 | 151 | * Removed ZSTR0005 and ZSTR0006 error codes | 153 | * Removed ZSTR0005 and ZSTR0006 error codes |
63 | 154 | * Fixed bug #867662 ("nullptr" warning) | ||
64 | 152 | * Fixed bug #868258 (Assertion failure with two delete collection) | 155 | * Fixed bug #868258 (Assertion failure with two delete collection) |
65 | 153 | * Fixed bug #871623 and #871629 (assertion failures with insertions in dynamic collections) | 156 | * Fixed bug #871623 and #871629 (assertion failures with insertions in dynamic collections) |
66 | 154 | * Fixed bug #867262 (allow reuse of iterator over ExtFuncArgItemSequence) | 157 | * Fixed bug #867262 (allow reuse of iterator over ExtFuncArgItemSequence) |
67 | @@ -157,6 +160,8 @@ | |||
68 | 157 | * New node-reference module. References can be obtained for any node, and | 160 | * New node-reference module. References can be obtained for any node, and |
69 | 158 | different nodes cannot have the same identifier. | 161 | different nodes cannot have the same identifier. |
70 | 159 | * Fixed bug #872697 (segmentation fault with validation of NMTOKENS) | 162 | * Fixed bug #872697 (segmentation fault with validation of NMTOKENS) |
71 | 163 | * General index cannot be declared as unique if the type of its key is | ||
72 | 164 | xs:anyAtomicType or xs:untypedAtomic. | ||
73 | 160 | * Added undo for node revalidation | 165 | * Added undo for node revalidation |
74 | 161 | * Optimization for count(collection()) expressions | 166 | * Optimization for count(collection()) expressions |
75 | 162 | * Fixed bug #872796 (validate-in-place can interfere with other update primitives) | 167 | * Fixed bug #872796 (validate-in-place can interfere with other update primitives) |
76 | @@ -175,6 +180,8 @@ | |||
77 | 175 | * Fixed bug #855715 (Invalid escaped characters in regex not caught) | 180 | * Fixed bug #855715 (Invalid escaped characters in regex not caught) |
78 | 176 | * Fixed bug #862089 (Split binary/xq install directories for modules) by | 181 | * Fixed bug #862089 (Split binary/xq install directories for modules) by |
79 | 177 | splitting "module path" into separate URI and Library paths | 182 | splitting "module path" into separate URI and Library paths |
80 | 183 | * New node-position module. This module allows to obtain a representation of a node position, which | ||
81 | 184 | can be used to assess structural relationships with other nodes. | ||
82 | 178 | * Fixed bug #872502 (validation of the JSON module xqdoc fails) | 185 | * Fixed bug #872502 (validation of the JSON module xqdoc fails) |
83 | 179 | * Fixed bug #897619 (testdriver_mt can not run the XQueryX tests) | 186 | * Fixed bug #897619 (testdriver_mt can not run the XQueryX tests) |
84 | 180 | * Fixed bug #867107 (xqdoc dependency to zorba is wrong) | 187 | * Fixed bug #867107 (xqdoc dependency to zorba is wrong) |
85 | 181 | 188 | ||
86 | === modified file 'KNOWN_ISSUES.txt' | |||
87 | --- KNOWN_ISSUES.txt 2012-03-28 05:19:57 +0000 | |||
88 | +++ KNOWN_ISSUES.txt 2012-04-07 00:45:26 +0000 | |||
89 | @@ -37,7 +37,7 @@ | |||
90 | 37 | * The serializer currently doesn't implement character maps as specified | 37 | * The serializer currently doesn't implement character maps as specified |
91 | 38 | (http://www.w3.org/TR/xslt-xquery-serialization/#character-maps) | 38 | (http://www.w3.org/TR/xslt-xquery-serialization/#character-maps) |
92 | 39 | 39 | ||
94 | 40 | * In the 2.0 release, setting the CMake variables ZORBA_NO_UNICODE to | 40 | * In the 2.0 release, setting the CMake variables ZORBA_NO_ICU to |
95 | 41 | ON is not supported. | 41 | ON is not supported. |
96 | 42 | 42 | ||
97 | 43 | * The PHP language binding is not supported on Mac OS X. For details, | 43 | * The PHP language binding is not supported on Mac OS X. For details, |
98 | 44 | 44 | ||
99 | === modified file 'doc/cxx/examples/context.cpp' | |||
100 | --- doc/cxx/examples/context.cpp 2012-03-28 05:19:57 +0000 | |||
101 | +++ doc/cxx/examples/context.cpp 2012-04-07 00:45:26 +0000 | |||
102 | @@ -149,7 +149,11 @@ | |||
103 | 149 | outStream2 << lQuery << std::endl; | 149 | outStream2 << lQuery << std::endl; |
104 | 150 | std::cout << outStream2.str() << std::endl; | 150 | std::cout << outStream2.str() << std::endl; |
105 | 151 | 151 | ||
106 | 152 | #ifndef ZORBA_NO_ICU | ||
107 | 152 | if (outStream2.str() != "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\nBook 1.1\n") | 153 | if (outStream2.str() != "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\nBook 1.1\n") |
108 | 154 | #else | ||
109 | 155 | if (outStream2.str() != "<?xml version=\"1.0\"?>\nBook 1.1\n") | ||
110 | 156 | #endif /* ZORBA_NO_ICU */ | ||
111 | 153 | { | 157 | { |
112 | 154 | std::cerr << "Test 4 failed with a wrong result : " << std::endl | 158 | std::cerr << "Test 4 failed with a wrong result : " << std::endl |
113 | 155 | << outStream2.str() << std::endl; | 159 | << outStream2.str() << std::endl; |
114 | 156 | 160 | ||
115 | === modified file 'include/zorba/config.h.cmake' | |||
116 | --- include/zorba/config.h.cmake 2012-03-28 05:19:57 +0000 | |||
117 | +++ include/zorba/config.h.cmake 2012-04-07 00:45:26 +0000 | |||
118 | @@ -96,6 +96,8 @@ | |||
119 | 96 | typedef __int64 int64_t; | 96 | typedef __int64 int64_t; |
120 | 97 | #endif /* ZORBA_HAVE_INT64_T */ | 97 | #endif /* ZORBA_HAVE_INT64_T */ |
121 | 98 | 98 | ||
122 | 99 | #cmakedefine ZORBA_SIZEOF_WCHAR_T @ZORBA_SIZEOF_WCHAR_T@ | ||
123 | 100 | |||
124 | 99 | // Compiler | 101 | // Compiler |
125 | 100 | #cmakedefine CLANG | 102 | #cmakedefine CLANG |
126 | 101 | #cmakedefine MSVC | 103 | #cmakedefine MSVC |
127 | @@ -148,7 +150,7 @@ | |||
128 | 148 | 150 | ||
129 | 149 | // Zorba features | 151 | // Zorba features |
130 | 150 | #cmakedefine ZORBA_NO_FULL_TEXT | 152 | #cmakedefine ZORBA_NO_FULL_TEXT |
132 | 151 | #cmakedefine ZORBA_NO_UNICODE | 153 | #cmakedefine ZORBA_NO_ICU |
133 | 152 | #cmakedefine ZORBA_NO_XMLSCHEMA | 154 | #cmakedefine ZORBA_NO_XMLSCHEMA |
134 | 153 | #cmakedefine ZORBA_NUMERIC_OPTIMIZATION | 155 | #cmakedefine ZORBA_NUMERIC_OPTIMIZATION |
135 | 154 | #cmakedefine ZORBA_VERIFY_PEER_SSL_CERTIFICATE | 156 | #cmakedefine ZORBA_VERIFY_PEER_SSL_CERTIFICATE |
136 | 155 | 157 | ||
137 | === modified file 'include/zorba/static_context.h' | |||
138 | --- include/zorba/static_context.h 2012-03-28 05:19:57 +0000 | |||
139 | +++ include/zorba/static_context.h 2012-04-07 00:45:26 +0000 | |||
140 | @@ -26,9 +26,13 @@ | |||
141 | 26 | #include <zorba/function.h> | 26 | #include <zorba/function.h> |
142 | 27 | #include <zorba/annotation.h> | 27 | #include <zorba/annotation.h> |
143 | 28 | #include <zorba/smart_ptr.h> | 28 | #include <zorba/smart_ptr.h> |
144 | 29 | #include <zorba/smart_ptr.h> | ||
145 | 29 | #ifndef ZORBA_NO_FULL_TEXT | 30 | #ifndef ZORBA_NO_FULL_TEXT |
146 | 30 | #include <zorba/thesaurus.h> | 31 | #include <zorba/thesaurus.h> |
147 | 31 | #endif /* ZORBA_NO_FULL_TEXT */ | 32 | #endif /* ZORBA_NO_FULL_TEXT */ |
148 | 33 | #include <zorba/zorba.h> | ||
149 | 34 | #include <zorba/store_manager.h> | ||
150 | 35 | #include <zorba/zorba_exception.h> | ||
151 | 32 | 36 | ||
152 | 33 | namespace zorba { | 37 | namespace zorba { |
153 | 34 | 38 | ||
154 | 35 | 39 | ||
155 | === modified file 'include/zorba/util/time.h' | |||
156 | --- include/zorba/util/time.h 2012-03-28 05:19:57 +0000 | |||
157 | +++ include/zorba/util/time.h 2012-04-07 00:45:26 +0000 | |||
158 | @@ -178,7 +178,7 @@ | |||
159 | 178 | 178 | ||
160 | 179 | inline long get_walltime_in_millis(const walltime& t) | 179 | inline long get_walltime_in_millis(const walltime& t) |
161 | 180 | { | 180 | { |
163 | 181 | return t.time * 1000 + t.millitm; | 181 | return (long)(t.time * 1000 + t.millitm); |
164 | 182 | } | 182 | } |
165 | 183 | 183 | ||
166 | 184 | #else /* not Windows, and no clock_gettime() */ | 184 | #else /* not Windows, and no clock_gettime() */ |
167 | 185 | 185 | ||
168 | === modified file 'src/CMakeLists.txt' | |||
169 | --- src/CMakeLists.txt 2012-03-28 05:19:57 +0000 | |||
170 | +++ src/CMakeLists.txt 2012-04-07 00:45:26 +0000 | |||
171 | @@ -59,7 +59,10 @@ | |||
172 | 59 | # | 59 | # |
173 | 60 | # Next, add the files to be compiled into the library | 60 | # Next, add the files to be compiled into the library |
174 | 61 | # | 61 | # |
175 | 62 | |||
176 | 63 | MESSAGE(STATUS "PRECOMPILED HEADERS: " ${ZORBA_PRECOMPILED_HEADERS}) | ||
177 | 62 | SET(ZORBA_PRECOMPILED_HEADERS OFF CACHE BOOL "Activate Zorba precompiled headers.") | 64 | SET(ZORBA_PRECOMPILED_HEADERS OFF CACHE BOOL "Activate Zorba precompiled headers.") |
178 | 65 | MESSAGE(STATUS "PRECOMPILED HEADERS: " ${ZORBA_PRECOMPILED_HEADERS}) | ||
179 | 63 | 66 | ||
180 | 64 | SET(ZORBA_SRCS) | 67 | SET(ZORBA_SRCS) |
181 | 65 | ADD_SRC_SUBFOLDER(ZORBA_SRCS api API_SRCS) | 68 | ADD_SRC_SUBFOLDER(ZORBA_SRCS api API_SRCS) |
182 | @@ -97,6 +100,7 @@ | |||
183 | 97 | ENDIF(ZORBA_WITH_DEBUGGER) | 100 | ENDIF(ZORBA_WITH_DEBUGGER) |
184 | 98 | ADD_SRC_SUBFOLDER(ZORBA_SRCS unit_tests UNIT_TEST_SRCS) | 101 | ADD_SRC_SUBFOLDER(ZORBA_SRCS unit_tests UNIT_TEST_SRCS) |
185 | 99 | 102 | ||
186 | 103 | MESSAGE(STATUS "PRECOMPILED HEADERS: " ${ZORBA_PRECOMPILED_HEADERS}) | ||
187 | 100 | IF(ZORBA_PRECOMPILED_HEADERS) | 104 | IF(ZORBA_PRECOMPILED_HEADERS) |
188 | 101 | ADD_SRC_SUBFOLDER(ZORBA_SRCS precompiled ZORBAMISC_SRCS) | 105 | ADD_SRC_SUBFOLDER(ZORBA_SRCS precompiled ZORBAMISC_SRCS) |
189 | 102 | INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/src/precompiled") | 106 | INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/src/precompiled") |
190 | 103 | 107 | ||
191 | === modified file 'src/api/serialization/serializer.cpp' | |||
192 | --- src/api/serialization/serializer.cpp 2012-03-28 05:19:57 +0000 | |||
193 | +++ src/api/serialization/serializer.cpp 2012-04-07 00:45:26 +0000 | |||
194 | @@ -180,7 +180,6 @@ | |||
195 | 180 | for (; chars < chars_end; chars++ ) | 180 | for (; chars < chars_end; chars++ ) |
196 | 181 | { | 181 | { |
197 | 182 | 182 | ||
198 | 183 | #ifndef ZORBA_NO_UNICODE | ||
199 | 184 | // the input string is UTF-8 | 183 | // the input string is UTF-8 |
200 | 185 | int char_length = utf8::char_length(*chars); | 184 | int char_length = utf8::char_length(*chars); |
201 | 186 | if (char_length == 0) | 185 | if (char_length == 0) |
202 | @@ -217,7 +216,6 @@ | |||
203 | 217 | 216 | ||
204 | 218 | continue; | 217 | continue; |
205 | 219 | } | 218 | } |
206 | 220 | #endif//ZORBA_NO_UNICODE | ||
207 | 221 | 219 | ||
208 | 222 | // raise an error iff (1) the serialization format is XML 1.0 and (2) the given character is an invalid XML 1.0 character | 220 | // raise an error iff (1) the serialization format is XML 1.0 and (2) the given character is an invalid XML 1.0 character |
209 | 223 | if (ser && ser->method == PARAMETER_VALUE_XML && | 221 | if (ser && ser->method == PARAMETER_VALUE_XML && |
210 | @@ -332,14 +330,12 @@ | |||
211 | 332 | { | 330 | { |
212 | 333 | tr << (char)0xEF << (char)0xBB << (char)0xBF; | 331 | tr << (char)0xEF << (char)0xBB << (char)0xBF; |
213 | 334 | } | 332 | } |
214 | 335 | #ifndef ZORBA_NO_UNICODE | ||
215 | 336 | else if (ser->encoding == PARAMETER_VALUE_UTF_16) | 333 | else if (ser->encoding == PARAMETER_VALUE_UTF_16) |
216 | 337 | { | 334 | { |
217 | 338 | // Little-endian | 335 | // Little-endian |
218 | 339 | tr.verbatim((char)0xFF); | 336 | tr.verbatim((char)0xFF); |
219 | 340 | tr.verbatim((char)0xFE); | 337 | tr.verbatim((char)0xFE); |
220 | 341 | } | 338 | } |
221 | 342 | #endif | ||
222 | 343 | } | 339 | } |
223 | 344 | } | 340 | } |
224 | 345 | 341 | ||
225 | @@ -862,13 +858,17 @@ | |||
226 | 862 | emitter::emit_declaration(); | 858 | emitter::emit_declaration(); |
227 | 863 | 859 | ||
228 | 864 | if (ser->omit_xml_declaration == PARAMETER_VALUE_NO) { | 860 | if (ser->omit_xml_declaration == PARAMETER_VALUE_NO) { |
236 | 865 | tr << "<?xml version=\"" << ser->version << "\" encoding=\""; | 861 | tr << "<?xml version=\"" << ser->version; |
237 | 866 | if (ser->encoding == PARAMETER_VALUE_UTF_8) { | 862 | switch (ser->encoding) { |
238 | 867 | tr << "UTF-8"; | 863 | case PARAMETER_VALUE_UTF_8: |
239 | 868 | #ifndef ZORBA_NO_UNICODE | 864 | case PARAMETER_VALUE_UTF_16: |
240 | 869 | } else if (ser->encoding == PARAMETER_VALUE_UTF_16) { | 865 | tr << "\" encoding=\""; |
241 | 870 | tr << "UTF-16"; | 866 | switch (ser->encoding) { |
242 | 871 | #endif | 867 | case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break; |
243 | 868 | case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break; | ||
244 | 869 | default : ZORBA_ASSERT(false); | ||
245 | 870 | } | ||
246 | 871 | break; | ||
247 | 872 | } | 872 | } |
248 | 873 | tr << "\""; | 873 | tr << "\""; |
249 | 874 | 874 | ||
250 | @@ -1174,14 +1174,18 @@ | |||
251 | 1174 | } | 1174 | } |
252 | 1175 | 1175 | ||
253 | 1176 | tr << "<meta http-equiv=\"content-type\" content=\"" | 1176 | tr << "<meta http-equiv=\"content-type\" content=\"" |
262 | 1177 | << ser->media_type << "; charset="; | 1177 | << ser->media_type; |
263 | 1178 | 1178 | switch (ser->encoding) { | |
264 | 1179 | if (ser->encoding == PARAMETER_VALUE_UTF_8) | 1179 | case PARAMETER_VALUE_UTF_8: |
265 | 1180 | tr << "UTF-8"; | 1180 | case PARAMETER_VALUE_UTF_16: |
266 | 1181 | #ifndef ZORBA_NO_UNICODE | 1181 | tr << "\" charset=\""; |
267 | 1182 | else if (ser->encoding == PARAMETER_VALUE_UTF_16) | 1182 | switch (ser->encoding) { |
268 | 1183 | tr << "UTF-16"; | 1183 | case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break; |
269 | 1184 | #endif | 1184 | case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break; |
270 | 1185 | default : ZORBA_ASSERT(false); | ||
271 | 1186 | } | ||
272 | 1187 | break; | ||
273 | 1188 | } | ||
274 | 1185 | tr << "\""; | 1189 | tr << "\""; |
275 | 1186 | // closed_parent_tag = 1; | 1190 | // closed_parent_tag = 1; |
276 | 1187 | } | 1191 | } |
277 | @@ -1371,14 +1375,18 @@ | |||
278 | 1371 | } | 1375 | } |
279 | 1372 | 1376 | ||
280 | 1373 | tr << "<meta http-equiv=\"content-type\" content=\"" | 1377 | tr << "<meta http-equiv=\"content-type\" content=\"" |
289 | 1374 | << ser->media_type << "; charset="; | 1378 | << ser->media_type; |
290 | 1375 | 1379 | switch (ser->encoding) { | |
291 | 1376 | if (ser->encoding == PARAMETER_VALUE_UTF_8) | 1380 | case PARAMETER_VALUE_UTF_8: |
292 | 1377 | tr << "UTF-8"; | 1381 | case PARAMETER_VALUE_UTF_16: |
293 | 1378 | #ifndef ZORBA_NO_UNICODE | 1382 | tr << "\" charset=\""; |
294 | 1379 | else if (ser->encoding == PARAMETER_VALUE_UTF_16) | 1383 | switch (ser->encoding) { |
295 | 1380 | tr << "UTF-16"; | 1384 | case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break; |
296 | 1381 | #endif | 1385 | case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break; |
297 | 1386 | default : ZORBA_ASSERT(false); | ||
298 | 1387 | } | ||
299 | 1388 | break; | ||
300 | 1389 | } | ||
301 | 1382 | tr << "\"/"; | 1390 | tr << "\"/"; |
302 | 1383 | //closed_parent_tag = 1; | 1391 | //closed_parent_tag = 1; |
303 | 1384 | } | 1392 | } |
304 | @@ -2098,10 +2106,8 @@ | |||
305 | 2098 | { | 2106 | { |
306 | 2099 | if (!strcmp(aValue, "UTF-8")) | 2107 | if (!strcmp(aValue, "UTF-8")) |
307 | 2100 | encoding = PARAMETER_VALUE_UTF_8; | 2108 | encoding = PARAMETER_VALUE_UTF_8; |
308 | 2101 | #ifndef ZORBA_NO_UNICODE | ||
309 | 2102 | else if (!strcmp(aValue, "UTF-16")) | 2109 | else if (!strcmp(aValue, "UTF-16")) |
310 | 2103 | encoding = PARAMETER_VALUE_UTF_16; | 2110 | encoding = PARAMETER_VALUE_UTF_16; |
311 | 2104 | #endif | ||
312 | 2105 | else | 2111 | else |
313 | 2106 | throw XQUERY_EXCEPTION( | 2112 | throw XQUERY_EXCEPTION( |
314 | 2107 | err::SEPM0016, ERROR_PARAMS( aValue, aName, ZED( GoodValuesAreUTF8 ) ) | 2113 | err::SEPM0016, ERROR_PARAMS( aValue, aName, ZED( GoodValuesAreUTF8 ) ) |
315 | @@ -2210,16 +2216,13 @@ | |||
316 | 2210 | { | 2216 | { |
317 | 2211 | tr = new transcoder(os, false); | 2217 | tr = new transcoder(os, false); |
318 | 2212 | } | 2218 | } |
319 | 2213 | #ifndef ZORBA_NO_UNICODE | ||
320 | 2214 | else if (encoding == PARAMETER_VALUE_UTF_16) | 2219 | else if (encoding == PARAMETER_VALUE_UTF_16) |
321 | 2215 | { | 2220 | { |
322 | 2216 | tr = new transcoder(os, true); | 2221 | tr = new transcoder(os, true); |
323 | 2217 | } | 2222 | } |
324 | 2218 | #endif | ||
325 | 2219 | else | 2223 | else |
326 | 2220 | { | 2224 | { |
329 | 2221 | ZORBA_ASSERT(0); | 2225 | ZORBA_ASSERT(false); |
328 | 2222 | return false; | ||
330 | 2223 | } | 2226 | } |
331 | 2224 | 2227 | ||
332 | 2225 | if (method == PARAMETER_VALUE_XML) | 2228 | if (method == PARAMETER_VALUE_XML) |
333 | 2226 | 2229 | ||
334 | === modified file 'src/api/serialization/serializer.h' | |||
335 | --- src/api/serialization/serializer.h 2012-03-28 05:19:57 +0000 | |||
336 | +++ src/api/serialization/serializer.h 2012-04-07 00:45:26 +0000 | |||
337 | @@ -70,10 +70,8 @@ | |||
338 | 70 | PARAMETER_VALUE_TEXT, | 70 | PARAMETER_VALUE_TEXT, |
339 | 71 | PARAMETER_VALUE_BINARY, | 71 | PARAMETER_VALUE_BINARY, |
340 | 72 | 72 | ||
345 | 73 | PARAMETER_VALUE_UTF_8 | 73 | PARAMETER_VALUE_UTF_8, |
346 | 74 | #ifndef ZORBA_NO_UNICODE | 74 | PARAMETER_VALUE_UTF_16 |
343 | 75 | ,PARAMETER_VALUE_UTF_16 | ||
344 | 76 | #endif | ||
347 | 77 | } PARAMETER_VALUE_TYPE; | 75 | } PARAMETER_VALUE_TYPE; |
348 | 78 | 76 | ||
349 | 79 | protected: | 77 | protected: |
350 | 80 | 78 | ||
351 | === modified file 'src/diagnostics/diagnostic_en.xml' | |||
352 | --- src/diagnostics/diagnostic_en.xml 2012-03-28 05:19:57 +0000 | |||
353 | +++ src/diagnostics/diagnostic_en.xml 2012-04-07 00:45:26 +0000 | |||
354 | @@ -2517,11 +2517,11 @@ | |||
355 | 2517 | <value>attribute node</value> | 2517 | <value>attribute node</value> |
356 | 2518 | </entry> | 2518 | </entry> |
357 | 2519 | 2519 | ||
359 | 2520 | <entry key="BackRef0Illegal"> | 2520 | <entry key="BackRef0Illegal" if="!defined(ZORBA_NO_ICU)"> |
360 | 2521 | <value>"0": illegal backreference</value> | 2521 | <value>"0": illegal backreference</value> |
361 | 2522 | </entry> | 2522 | </entry> |
362 | 2523 | 2523 | ||
364 | 2524 | <entry key="BackRefIllegalInCharClass"> | 2524 | <entry key="BackRefIllegalInCharClass" if="!defined(ZORBA_NO_ICU)"> |
365 | 2525 | <value>backreference illegal in character class</value> | 2525 | <value>backreference illegal in character class</value> |
366 | 2526 | </entry> | 2526 | </entry> |
367 | 2527 | 2527 | ||
368 | @@ -2569,7 +2569,7 @@ | |||
369 | 2569 | <value>invalid library module</value> | 2569 | <value>invalid library module</value> |
370 | 2570 | </entry> | 2570 | </entry> |
371 | 2571 | 2571 | ||
373 | 2572 | <entry key="BadRegexEscape_3"> | 2572 | <entry key="BadRegexEscape_3" if="!defined(ZORBA_NO_ICU)"> |
374 | 2573 | <value>"$3": illegal escape character</value> | 2573 | <value>"$3": illegal escape character</value> |
375 | 2574 | </entry> | 2574 | </entry> |
376 | 2575 | 2575 | ||
377 | @@ -3029,7 +3029,7 @@ | |||
378 | 3029 | <value>nodeid component too big for encoding</value> | 3029 | <value>nodeid component too big for encoding</value> |
379 | 3030 | </entry> | 3030 | </entry> |
380 | 3031 | 3031 | ||
382 | 3032 | <entry key="NonClosedBackRef_3"> | 3032 | <entry key="NonClosedBackRef_3" if="!defined(ZORBA_NO_ICU)"> |
383 | 3033 | <value>'$$3': non-closed backreference</value> | 3033 | <value>'$$3': non-closed backreference</value> |
384 | 3034 | </entry> | 3034 | </entry> |
385 | 3035 | 3035 | ||
386 | @@ -3041,7 +3041,7 @@ | |||
387 | 3041 | <value>non-localhost authority</value> | 3041 | <value>non-localhost authority</value> |
388 | 3042 | </entry> | 3042 | </entry> |
389 | 3043 | 3043 | ||
391 | 3044 | <entry key="NonexistentBackRef_3"> | 3044 | <entry key="NonexistentBackRef_3" if="!defined(ZORBA_NO_ICU)"> |
392 | 3045 | <value>'$$3': non-existent backreference</value> | 3045 | <value>'$$3': non-existent backreference</value> |
393 | 3046 | </entry> | 3046 | </entry> |
394 | 3047 | 3047 | ||
395 | @@ -3193,94 +3193,183 @@ | |||
396 | 3193 | <value>item type is not a subtype of "$3"</value> | 3193 | <value>item type is not a subtype of "$3"</value> |
397 | 3194 | </entry> | 3194 | </entry> |
398 | 3195 | 3195 | ||
400 | 3196 | <entry key="U_REGEX_BAD_ESCAPE_SEQUENCE" if="!defined(ZORBA_NO_UNICODE)"> | 3196 | <entry key="U_REGEX_BAD_ESCAPE_SEQUENCE" if="!defined(ZORBA_NO_ICU)"> |
401 | 3197 | <value>unrecognized backslash escape sequence</value> | 3197 | <value>unrecognized backslash escape sequence</value> |
402 | 3198 | </entry> | 3198 | </entry> |
403 | 3199 | 3199 | ||
405 | 3200 | <entry key="U_REGEX_BAD_INTERVAL" if="!defined(ZORBA_NO_UNICODE)"> | 3200 | <entry key="U_REGEX_BAD_INTERVAL" if="!defined(ZORBA_NO_ICU)"> |
406 | 3201 | <value>error in {min,max} interval</value> | 3201 | <value>error in {min,max} interval</value> |
407 | 3202 | </entry> | 3202 | </entry> |
408 | 3203 | 3203 | ||
410 | 3204 | <entry key="U_REGEX_INTERNAL_ERROR" if="!defined(ZORBA_NO_UNICODE)"> | 3204 | <entry key="U_REGEX_INTERNAL_ERROR" if="!defined(ZORBA_NO_ICU)"> |
411 | 3205 | <value>an internal ICU error (bug) was detected</value> | 3205 | <value>an internal ICU error (bug) was detected</value> |
412 | 3206 | </entry> | 3206 | </entry> |
413 | 3207 | 3207 | ||
415 | 3208 | <entry key="U_REGEX_INVALID_BACK_REF" if="!defined(ZORBA_NO_UNICODE)"> | 3208 | <entry key="U_REGEX_INVALID_BACK_REF" if="!defined(ZORBA_NO_ICU)"> |
416 | 3209 | <value>backreference to a non-existent capture group</value> | 3209 | <value>backreference to a non-existent capture group</value> |
417 | 3210 | </entry> | 3210 | </entry> |
418 | 3211 | 3211 | ||
420 | 3212 | <entry key="U_REGEX_INVALID_FLAG" if="!defined(ZORBA_NO_UNICODE)"> | 3212 | <entry key="U_REGEX_INVALID_FLAG" if="!defined(ZORBA_NO_ICU)"> |
421 | 3213 | <value>invalid value for match mode flags</value> | 3213 | <value>invalid value for match mode flags</value> |
422 | 3214 | </entry> | 3214 | </entry> |
423 | 3215 | 3215 | ||
425 | 3216 | <entry key="U_REGEX_INVALID_RANGE" if="!defined(ZORBA_NO_UNICODE)"> | 3216 | <entry key="U_REGEX_INVALID_RANGE" if="!defined(ZORBA_NO_ICU)"> |
426 | 3217 | <value>in character range [x-y], x is greater than y</value> | 3217 | <value>in character range [x-y], x is greater than y</value> |
427 | 3218 | </entry> | 3218 | </entry> |
428 | 3219 | 3219 | ||
430 | 3220 | <entry key="U_REGEX_INVALID_STATE" if="!defined(ZORBA_NO_UNICODE)"> | 3220 | <entry key="U_REGEX_INVALID_STATE" if="!defined(ZORBA_NO_ICU)"> |
431 | 3221 | <value>RegexMatcher in invalid state for requested operation</value> | 3221 | <value>RegexMatcher in invalid state for requested operation</value> |
432 | 3222 | </entry> | 3222 | </entry> |
433 | 3223 | 3223 | ||
435 | 3224 | <entry key="U_REGEX_LOOK_BEHIND_LIMIT" if="!defined(ZORBA_NO_UNICODE)"> | 3224 | <entry key="U_REGEX_LOOK_BEHIND_LIMIT" if="!defined(ZORBA_NO_ICU)"> |
436 | 3225 | <value>look-behind pattern matches must have a bounded maximum length</value> | 3225 | <value>look-behind pattern matches must have a bounded maximum length</value> |
437 | 3226 | </entry> | 3226 | </entry> |
438 | 3227 | 3227 | ||
440 | 3228 | <entry key="U_REGEX_MAX_LT_MIN" if="!defined(ZORBA_NO_UNICODE)"> | 3228 | <entry key="U_REGEX_MAX_LT_MIN" if="!defined(ZORBA_NO_ICU)"> |
441 | 3229 | <value>in {min,max}, max is less than min</value> | 3229 | <value>in {min,max}, max is less than min</value> |
442 | 3230 | </entry> | 3230 | </entry> |
443 | 3231 | 3231 | ||
445 | 3232 | <entry key="U_REGEX_MISMATCHED_PAREN" if="!defined(ZORBA_NO_UNICODE)"> | 3232 | <entry key="U_REGEX_MISMATCHED_PAREN" if="!defined(ZORBA_NO_ICU)"> |
446 | 3233 | <value>incorrectly nested parentheses</value> | 3233 | <value>incorrectly nested parentheses</value> |
447 | 3234 | </entry> | 3234 | </entry> |
448 | 3235 | 3235 | ||
450 | 3236 | <entry key="U_REGEX_MISSING_CLOSE_BRACKET" if="!defined(ZORBA_NO_UNICODE)"> | 3236 | <entry key="U_REGEX_MISSING_CLOSE_BRACKET" if="!defined(ZORBA_NO_ICU)"> |
451 | 3237 | <value>missing ']'</value> | 3237 | <value>missing ']'</value> |
452 | 3238 | </entry> | 3238 | </entry> |
453 | 3239 | 3239 | ||
455 | 3240 | <entry key="U_REGEX_NUMBER_TOO_BIG" if="!defined(ZORBA_NO_UNICODE)"> | 3240 | <entry key="U_REGEX_NUMBER_TOO_BIG" if="!defined(ZORBA_NO_ICU)"> |
456 | 3241 | <value>decimal number is too large</value> | 3241 | <value>decimal number is too large</value> |
457 | 3242 | </entry> | 3242 | </entry> |
458 | 3243 | 3243 | ||
460 | 3244 | <entry key="U_REGEX_OCTAL_TOO_BIG" if="!defined(ZORBA_NO_UNICODE)"> | 3244 | <entry key="U_REGEX_OCTAL_TOO_BIG" if="!defined(ZORBA_NO_ICU)"> |
461 | 3245 | <value>octal character constants must be <= 0377</value> | 3245 | <value>octal character constants must be <= 0377</value> |
462 | 3246 | </entry> | 3246 | </entry> |
463 | 3247 | 3247 | ||
465 | 3248 | <entry key="U_REGEX_PROPERTY_SYNTAX" if="!defined(ZORBA_NO_UNICODE)"> | 3248 | <entry key="U_REGEX_PROPERTY_SYNTAX" if="!defined(ZORBA_NO_ICU)"> |
466 | 3249 | <value>incorrect Unicode property</value> | 3249 | <value>incorrect Unicode property</value> |
467 | 3250 | </entry> | 3250 | </entry> |
468 | 3251 | 3251 | ||
470 | 3252 | <entry key="U_REGEX_RULE_SYNTAX" if="!defined(ZORBA_NO_UNICODE)"> | 3252 | <entry key="U_REGEX_RULE_SYNTAX" if="!defined(ZORBA_NO_ICU)"> |
471 | 3253 | <value>syntax error</value> | 3253 | <value>syntax error</value> |
472 | 3254 | </entry> | 3254 | </entry> |
473 | 3255 | 3255 | ||
475 | 3256 | <entry key="U_REGEX_SET_CONTAINS_STRING" if="!defined(ZORBA_NO_UNICODE)"> | 3256 | <entry key="U_REGEX_SET_CONTAINS_STRING" if="!defined(ZORBA_NO_ICU)"> |
476 | 3257 | <value>can not have UnicodeSets containing strings</value> | 3257 | <value>can not have UnicodeSets containing strings</value> |
477 | 3258 | </entry> | 3258 | </entry> |
478 | 3259 | 3259 | ||
480 | 3260 | <entry key="U_REGEX_STACK_OVERFLOW" if="!defined(ZORBA_NO_UNICODE)"> | 3260 | <entry key="U_REGEX_STACK_OVERFLOW" if="!defined(ZORBA_NO_ICU)"> |
481 | 3261 | <value>backtrack stack overflow</value> | 3261 | <value>backtrack stack overflow</value> |
482 | 3262 | </entry> | 3262 | </entry> |
483 | 3263 | 3263 | ||
485 | 3264 | <entry key="U_REGEX_STOPPED_BY_CALLER" if="!defined(ZORBA_NO_UNICODE)"> | 3264 | <entry key="U_REGEX_STOPPED_BY_CALLER" if="!defined(ZORBA_NO_ICU)"> |
486 | 3265 | <value>matching operation aborted by user callback fn</value> | 3265 | <value>matching operation aborted by user callback fn</value> |
487 | 3266 | </entry> | 3266 | </entry> |
488 | 3267 | 3267 | ||
490 | 3268 | <entry key="U_REGEX_TIME_OUT" if="!defined(ZORBA_NO_UNICODE)"> | 3268 | <entry key="U_REGEX_TIME_OUT" if="!defined(ZORBA_NO_ICU)"> |
491 | 3269 | <value>maximum allowed match time exceeded</value> | 3269 | <value>maximum allowed match time exceeded</value> |
492 | 3270 | </entry> | 3270 | </entry> |
493 | 3271 | 3271 | ||
496 | 3272 | <entry key="U_REGEX_UNIMPLEMENTED" if="!defined(ZORBA_NO_UNICODE)"> | 3272 | <entry key="U_REGEX_UNIMPLEMENTED" if="!defined(ZORBA_NO_ICU)"> |
497 | 3273 | <value>use of regular expression feature that is not yet implemented</value> | 3273 | <value>use of regular expression feature that is not yet implemented</value> |
498 | 3274 | </entry> | ||
499 | 3275 | |||
500 | 3276 | <!-- Regex Ascii error messages--> | ||
501 | 3277 | <entry key="REGEX_UNIMPLEMENTED" if="defined(ZORBA_NO_ICU)"> | ||
502 | 3278 | <value>use of regular expression feature that is not yet implemented</value> | ||
503 | 3279 | </entry> | ||
504 | 3280 | |||
505 | 3281 | <entry key="REGEX_MISMATCHED_PAREN" if="defined(ZORBA_NO_ICU)"> | ||
506 | 3282 | <value>incorrectly nested parentheses</value> | ||
507 | 3283 | </entry> | ||
508 | 3284 | |||
509 | 3285 | <entry key="REGEX_BROKEN_P_CONSTRUCT" if="defined(ZORBA_NO_ICU)"> | ||
510 | 3286 | <value>broken \\p construct</value> | ||
511 | 3287 | </entry> | ||
512 | 3288 | |||
513 | 3289 | <entry key="REGEX_UNKNOWN_PL_CONSTRUCT" if="defined(ZORBA_NO_ICU)"> | ||
514 | 3290 | <value>unknown \\p{L?} category; supported categories: L, Lu, Ll, Lt, Lm, Lo</value> | ||
515 | 3291 | </entry> | ||
516 | 3292 | |||
517 | 3293 | <entry key="REGEX_UNKNOWN_PM_CONSTRUCT" if="defined(ZORBA_NO_ICU)"> | ||
518 | 3294 | <value>unknown \\p{M?} category; supported categories: M, Mn, Mc, Me</value> | ||
519 | 3295 | </entry> | ||
520 | 3296 | |||
521 | 3297 | <entry key="REGEX_UNKNOWN_PN_CONSTRUCT" if="defined(ZORBA_NO_ICU)"> | ||
522 | 3298 | <value>unknown \\p{N?} category; supported categories: N, Nd, Nl, No</value> | ||
523 | 3299 | </entry> | ||
524 | 3300 | |||
525 | 3301 | <entry key="REGEX_UNKNOWN_PP_CONSTRUCT" if="defined(ZORBA_NO_ICU)"> | ||
526 | 3302 | <value>unknown \\p{P?} category; supported categories: P, Pc, Pd, Ps, Pe, Pi, Pf, Po</value> | ||
527 | 3303 | </entry> | ||
528 | 3304 | |||
529 | 3305 | <entry key="REGEX_UNKNOWN_PZ_CONSTRUCT" if="defined(ZORBA_NO_ICU)"> | ||
530 | 3306 | <value>unknown \\p{Z?} category; supported categories: Z, Zs, Zl, Zp</value> | ||
531 | 3307 | </entry> | ||
532 | 3308 | |||
533 | 3309 | <entry key="REGEX_UNKNOWN_PS_CONSTRUCT" if="defined(ZORBA_NO_ICU)"> | ||
534 | 3310 | <value>unknown \\p{S?} category; supported categories: S, Sm, Sc, Sk, So</value> | ||
535 | 3311 | </entry> | ||
536 | 3312 | |||
537 | 3313 | <entry key="REGEX_UNKNOWN_PC_CONSTRUCT" if="defined(ZORBA_NO_ICU)"> | ||
538 | 3314 | <value>unknown \\p{C?} category; supported categories: C, Cc, Cf, Co, Cn(for not assigned)</value> | ||
539 | 3315 | </entry> | ||
540 | 3316 | |||
541 | 3317 | <entry key="REGEX_BROKEN_PIs_CONSTRUCT" if="defined(ZORBA_NO_ICU)"> | ||
542 | 3318 | <value>broken \\p{Is} construct; valid characters are [a-zA-Z0-9-]</value> | ||
543 | 3319 | </entry> | ||
544 | 3320 | |||
545 | 3321 | <entry key="REGEX_UNKNOWN_PIs_CONSTRUCT" if="defined(ZORBA_NO_ICU)"> | ||
546 | 3322 | <value>unknown \\p{Is} category block; see supported block escapes here: http://www.w3.org/TR/xmlschema-2/#charcter-classes</value> | ||
547 | 3323 | </entry> | ||
548 | 3324 | |||
549 | 3325 | <entry key="REGEX_INVALID_UNICODE_CODEPOINT_u" if="defined(ZORBA_NO_ICU)"> | ||
550 | 3326 | <value>invalid unicode hex, should be in form \\uXXXX or \\UXXXXXXXX</value> | ||
551 | 3327 | </entry> | ||
552 | 3328 | |||
553 | 3329 | <entry key="REGEX_UNKNOWN_ESC_CHAR" if="defined(ZORBA_NO_ICU)"> | ||
554 | 3330 | <value>unknown \\? escape char; supported escapes are: \\[nrt\\|.?*+(){}[]-^$] for char escapes, \\[pP] for categories and \\[sSiIcCdDwW] for multichar groups</value> | ||
555 | 3331 | </entry> | ||
556 | 3332 | |||
557 | 3333 | <entry key="REGEX_INVALID_BACK_REF" if="defined(ZORBA_NO_ICU)"> | ||
558 | 3334 | <value>\\$3 backreference to a non-existent capture group ($4 groups so far)</value> | ||
559 | 3335 | </entry> | ||
560 | 3336 | |||
561 | 3337 | <entry key="REGEX_INVALID_ATOM_CHAR" if="defined(ZORBA_NO_ICU)"> | ||
562 | 3338 | <value>'$3': invalid character for an atom; forbidden characters are: [{}?*+|^]</value> | ||
563 | 3339 | </entry> | ||
564 | 3340 | |||
565 | 3341 | <entry key="REGEX_INVALID_SUBCLASS" if="defined(ZORBA_NO_ICU)"> | ||
566 | 3342 | <value>malformed class subtraction</value> | ||
567 | 3343 | </entry> | ||
568 | 3344 | |||
569 | 3345 | <entry key="REGEX_INVALID_USE_OF_SUBCLASS" if="defined(ZORBA_NO_ICU)"> | ||
570 | 3346 | <value>improper use of class subtraction: it must be the last construct in a class group [xxx-[yyy]]</value> | ||
571 | 3347 | </entry> | ||
572 | 3348 | |||
573 | 3349 | <entry key="REGEX_MULTICHAR_IN_CHAR_RANGE" if="defined(ZORBA_NO_ICU)"> | ||
574 | 3350 | <value>multichars or char categories cannot be part of a char range</value> | ||
575 | 3351 | </entry> | ||
576 | 3352 | |||
577 | 3353 | <entry key="REGEX_MISSING_CLOSE_BRACKET" if="defined(ZORBA_NO_ICU)"> | ||
578 | 3354 | <value>missing ']' in character group</value> | ||
579 | 3355 | </entry> | ||
580 | 3356 | |||
581 | 3357 | <entry key="REGEX_MAX_LT_MIN" if="defined(ZORBA_NO_ICU)"> | ||
582 | 3358 | <value>in {min,max}, max is less than min</value> | ||
583 | 3274 | </entry> | 3359 | </entry> |
584 | 3275 | 3360 | ||
585 | 3276 | <entry key="UnaryArithOp"> | 3361 | <entry key="UnaryArithOp"> |
586 | 3277 | <value>unary arithmetic operator</value> | 3362 | <value>unary arithmetic operator</value> |
587 | 3278 | </entry> | 3363 | </entry> |
588 | 3279 | 3364 | ||
590 | 3280 | <entry key="UnbalancedChar_3"> | 3365 | <entry key="UnbalancedChar_3" if="!defined(ZORBA_NO_ICU)"> |
591 | 3281 | <value>missing '$3'</value> | 3366 | <value>missing '$3'</value> |
592 | 3282 | </entry> | 3367 | </entry> |
593 | 3283 | 3368 | ||
594 | 3369 | <entry key="UnescapedChar_3" if="!defined(ZORBA_NO_ICU)"> | ||
595 | 3370 | <value>character '$3' must be escaped here</value> | ||
596 | 3371 | </entry> | ||
597 | 3372 | |||
598 | 3284 | <entry key="UnexpectedElement"> | 3373 | <entry key="UnexpectedElement"> |
599 | 3285 | <value>unexpected element</value> | 3374 | <value>unexpected element</value> |
600 | 3286 | </entry> | 3375 | </entry> |
601 | 3287 | 3376 | ||
602 | === modified file 'src/diagnostics/pregenerated/dict_en.cpp' | |||
603 | --- src/diagnostics/pregenerated/dict_en.cpp 2012-03-28 05:19:57 +0000 | |||
604 | +++ src/diagnostics/pregenerated/dict_en.cpp 2012-04-07 00:45:26 +0000 | |||
605 | @@ -437,8 +437,12 @@ | |||
606 | 437 | { "~AtomizationOfGroupByMakesMoreThanOneItem", "atomization of groupby variable produces more than one item" }, | 437 | { "~AtomizationOfGroupByMakesMoreThanOneItem", "atomization of groupby variable produces more than one item" }, |
607 | 438 | { "~AttributeName", "attribute name" }, | 438 | { "~AttributeName", "attribute name" }, |
608 | 439 | { "~AttributeNode", "attribute node" }, | 439 | { "~AttributeNode", "attribute node" }, |
609 | 440 | #if !defined(ZORBA_NO_ICU) | ||
610 | 440 | { "~BackRef0Illegal", "\"0\": illegal backreference" }, | 441 | { "~BackRef0Illegal", "\"0\": illegal backreference" }, |
611 | 442 | #endif | ||
612 | 443 | #if !defined(ZORBA_NO_ICU) | ||
613 | 441 | { "~BackRefIllegalInCharClass", "backreference illegal in character class" }, | 444 | { "~BackRefIllegalInCharClass", "backreference illegal in character class" }, |
614 | 445 | #endif | ||
615 | 442 | { "~BadAnyURI", "invalid xs:anyURI" }, | 446 | { "~BadAnyURI", "invalid xs:anyURI" }, |
616 | 443 | { "~BadArgTypeForFn_2o34o", "${\"2\": }invalid argument type for function $3()${: 4}" }, | 447 | { "~BadArgTypeForFn_2o34o", "${\"2\": }invalid argument type for function $3()${: 4}" }, |
617 | 444 | { "~BadCharAfter_34", "'$3': illegal character after '$4'" }, | 448 | { "~BadCharAfter_34", "'$3': illegal character after '$4'" }, |
618 | @@ -451,7 +455,9 @@ | |||
619 | 451 | { "~BadIterator", "invalid iterator" }, | 455 | { "~BadIterator", "invalid iterator" }, |
620 | 452 | { "~BadLibraryModule", "invalid library module" }, | 456 | { "~BadLibraryModule", "invalid library module" }, |
621 | 453 | { "~BadPath", "invalid path" }, | 457 | { "~BadPath", "invalid path" }, |
622 | 458 | #if !defined(ZORBA_NO_ICU) | ||
623 | 454 | { "~BadRegexEscape_3", "\"$3\": illegal escape character" }, | 459 | { "~BadRegexEscape_3", "\"$3\": illegal escape character" }, |
624 | 460 | #endif | ||
625 | 455 | { "~BadStreamState", "bad I/O stream state" }, | 461 | { "~BadStreamState", "bad I/O stream state" }, |
626 | 456 | { "~BadTokenInBraces_3", "\"$3\": illegal token within { }" }, | 462 | { "~BadTokenInBraces_3", "\"$3\": illegal token within { }" }, |
627 | 457 | { "~BadTraceStream", "trace stream not retrievable using SerializationCallback" }, | 463 | { "~BadTraceStream", "trace stream not retrievable using SerializationCallback" }, |
628 | @@ -567,10 +573,14 @@ | |||
629 | 567 | { "~NoUntypedKeyNodeValue_2", "node with untyped key value found during probe on index \"$2\"" }, | 573 | { "~NoUntypedKeyNodeValue_2", "node with untyped key value found during probe on index \"$2\"" }, |
630 | 568 | { "~NodeIDNeedsBytes_2", "nodeid requires more than $2 bytes" }, | 574 | { "~NodeIDNeedsBytes_2", "nodeid requires more than $2 bytes" }, |
631 | 569 | { "~NodeIDTooBig", "nodeid component too big for encoding" }, | 575 | { "~NodeIDTooBig", "nodeid component too big for encoding" }, |
632 | 576 | #if !defined(ZORBA_NO_ICU) | ||
633 | 570 | { "~NonClosedBackRef_3", "'$$3': non-closed backreference" }, | 577 | { "~NonClosedBackRef_3", "'$$3': non-closed backreference" }, |
634 | 578 | #endif | ||
635 | 571 | { "~NonFileThesaurusURI", "non-file thesaurus URI" }, | 579 | { "~NonFileThesaurusURI", "non-file thesaurus URI" }, |
636 | 572 | { "~NonLocalhostAuthority", "non-localhost authority" }, | 580 | { "~NonLocalhostAuthority", "non-localhost authority" }, |
637 | 581 | #if !defined(ZORBA_NO_ICU) | ||
638 | 573 | { "~NonexistentBackRef_3", "'$$3': non-existent backreference" }, | 582 | { "~NonexistentBackRef_3", "'$$3': non-existent backreference" }, |
639 | 583 | #endif | ||
640 | 574 | { "~NotAllowedForTypeName", "not allowed for typeName (use xsd:untyped instead)" }, | 584 | { "~NotAllowedForTypeName", "not allowed for typeName (use xsd:untyped instead)" }, |
641 | 575 | { "~NotAmongInScopeSchemaTypes", "not among in-scope schema types" }, | 585 | { "~NotAmongInScopeSchemaTypes", "not among in-scope schema types" }, |
642 | 576 | { "~NotDefInDynamicCtx", "not defined in dynamic context" }, | 586 | { "~NotDefInDynamicCtx", "not defined in dynamic context" }, |
643 | @@ -589,6 +599,69 @@ | |||
644 | 589 | { "~ParserNoCreateTree", "XML tree creation failed" }, | 599 | { "~ParserNoCreateTree", "XML tree creation failed" }, |
645 | 590 | { "~PromotionImpossible", "promotion not possible" }, | 600 | { "~PromotionImpossible", "promotion not possible" }, |
646 | 591 | { "~QuotedColon_23", "\"$2\": $3" }, | 601 | { "~QuotedColon_23", "\"$2\": $3" }, |
647 | 602 | #if defined(ZORBA_NO_ICU) | ||
648 | 603 | { "~REGEX_BROKEN_PIs_CONSTRUCT", "broken \\p{Is} construct; valid characters are [a-zA-Z0-9-]" }, | ||
649 | 604 | #endif | ||
650 | 605 | #if defined(ZORBA_NO_ICU) | ||
651 | 606 | { "~REGEX_BROKEN_P_CONSTRUCT", "broken \\p construct" }, | ||
652 | 607 | #endif | ||
653 | 608 | #if defined(ZORBA_NO_ICU) | ||
654 | 609 | { "~REGEX_INVALID_ATOM_CHAR", "'$3': invalid character for an atom; forbidden characters are: [{}?*+|^]" }, | ||
655 | 610 | #endif | ||
656 | 611 | #if defined(ZORBA_NO_ICU) | ||
657 | 612 | { "~REGEX_INVALID_BACK_REF", "\\$3 backreference to a non-existent capture group ($4 groups so far)" }, | ||
658 | 613 | #endif | ||
659 | 614 | #if defined(ZORBA_NO_ICU) | ||
660 | 615 | { "~REGEX_INVALID_SUBCLASS", "malformed class subtraction" }, | ||
661 | 616 | #endif | ||
662 | 617 | #if defined(ZORBA_NO_ICU) | ||
663 | 618 | { "~REGEX_INVALID_UNICODE_CODEPOINT_u", "invalid unicode hex, should be in form \\uXXXX or \\UXXXXXXXX" }, | ||
664 | 619 | #endif | ||
665 | 620 | #if defined(ZORBA_NO_ICU) | ||
666 | 621 | { "~REGEX_INVALID_USE_OF_SUBCLASS", "improper use of class subtraction: it must be the last construct in a class group [xxx-[yyy]]" }, | ||
667 | 622 | #endif | ||
668 | 623 | #if defined(ZORBA_NO_ICU) | ||
669 | 624 | { "~REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" }, | ||
670 | 625 | #endif | ||
671 | 626 | #if defined(ZORBA_NO_ICU) | ||
672 | 627 | { "~REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" }, | ||
673 | 628 | #endif | ||
674 | 629 | #if defined(ZORBA_NO_ICU) | ||
675 | 630 | { "~REGEX_MISSING_CLOSE_BRACKET", "missing ']' in character group" }, | ||
676 | 631 | #endif | ||
677 | 632 | #if defined(ZORBA_NO_ICU) | ||
678 | 633 | { "~REGEX_MULTICHAR_IN_CHAR_RANGE", "multichars or char categories cannot be part of a char range" }, | ||
679 | 634 | #endif | ||
680 | 635 | #if defined(ZORBA_NO_ICU) | ||
681 | 636 | { "~REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" }, | ||
682 | 637 | #endif | ||
683 | 638 | #if defined(ZORBA_NO_ICU) | ||
684 | 639 | { "~REGEX_UNKNOWN_ESC_CHAR", "unknown \\? escape char; supported escapes are: \\[nrt\\|.?*+(){}[]-^$] for char escapes, \\[pP] for categories and \\[sSiIcCdDwW] for multichar groups" }, | ||
685 | 640 | #endif | ||
686 | 641 | #if defined(ZORBA_NO_ICU) | ||
687 | 642 | { "~REGEX_UNKNOWN_PC_CONSTRUCT", "unknown \\p{C?} category; supported categories: C, Cc, Cf, Co, Cn(for not assigned)" }, | ||
688 | 643 | #endif | ||
689 | 644 | #if defined(ZORBA_NO_ICU) | ||
690 | 645 | { "~REGEX_UNKNOWN_PIs_CONSTRUCT", "unknown \\p{Is} category block; see supported block escapes here: http://www.w3.org/TR/xmlschema-2/#charcter-classes" }, | ||
691 | 646 | #endif | ||
692 | 647 | #if defined(ZORBA_NO_ICU) | ||
693 | 648 | { "~REGEX_UNKNOWN_PL_CONSTRUCT", "unknown \\p{L?} category; supported categories: L, Lu, Ll, Lt, Lm, Lo" }, | ||
694 | 649 | #endif | ||
695 | 650 | #if defined(ZORBA_NO_ICU) | ||
696 | 651 | { "~REGEX_UNKNOWN_PM_CONSTRUCT", "unknown \\p{M?} category; supported categories: M, Mn, Mc, Me" }, | ||
697 | 652 | #endif | ||
698 | 653 | #if defined(ZORBA_NO_ICU) | ||
699 | 654 | { "~REGEX_UNKNOWN_PN_CONSTRUCT", "unknown \\p{N?} category; supported categories: N, Nd, Nl, No" }, | ||
700 | 655 | #endif | ||
701 | 656 | #if defined(ZORBA_NO_ICU) | ||
702 | 657 | { "~REGEX_UNKNOWN_PP_CONSTRUCT", "unknown \\p{P?} category; supported categories: P, Pc, Pd, Ps, Pe, Pi, Pf, Po" }, | ||
703 | 658 | #endif | ||
704 | 659 | #if defined(ZORBA_NO_ICU) | ||
705 | 660 | { "~REGEX_UNKNOWN_PS_CONSTRUCT", "unknown \\p{S?} category; supported categories: S, Sm, Sc, Sk, So" }, | ||
706 | 661 | #endif | ||
707 | 662 | #if defined(ZORBA_NO_ICU) | ||
708 | 663 | { "~REGEX_UNKNOWN_PZ_CONSTRUCT", "unknown \\p{Z?} category; supported categories: Z, Zs, Zl, Zp" }, | ||
709 | 664 | #endif | ||
710 | 592 | { "~SEPM0009_Not10", "the version parameter has a value other than \"1.0\" and the doctype-system parameter is specified" }, | 665 | { "~SEPM0009_Not10", "the version parameter has a value other than \"1.0\" and the doctype-system parameter is specified" }, |
711 | 593 | { "~SEPM0009_NotOmit", "the standalone attribute has a value other than \"omit\"" }, | 666 | { "~SEPM0009_NotOmit", "the standalone attribute has a value other than \"omit\"" }, |
712 | 594 | { "~SchemaAttributeName", "schema-attribute name" }, | 667 | { "~SchemaAttributeName", "schema-attribute name" }, |
713 | @@ -610,68 +683,73 @@ | |||
714 | 610 | { "~TwoDecimalFormatsSameName_2", "\"$2\": two decimal formats with this name" }, | 683 | { "~TwoDecimalFormatsSameName_2", "\"$2\": two decimal formats with this name" }, |
715 | 611 | { "~TwoDefaultDecimalFormats", "two default decimal formats" }, | 684 | { "~TwoDefaultDecimalFormats", "two default decimal formats" }, |
716 | 612 | { "~TypeIsNotSubtype", "item type is not a subtype of \"$3\"" }, | 685 | { "~TypeIsNotSubtype", "item type is not a subtype of \"$3\"" }, |
718 | 613 | #if !defined(ZORBA_NO_UNICODE) | 686 | #if !defined(ZORBA_NO_ICU) |
719 | 614 | { "~U_REGEX_BAD_ESCAPE_SEQUENCE", "unrecognized backslash escape sequence" }, | 687 | { "~U_REGEX_BAD_ESCAPE_SEQUENCE", "unrecognized backslash escape sequence" }, |
720 | 615 | #endif | 688 | #endif |
722 | 616 | #if !defined(ZORBA_NO_UNICODE) | 689 | #if !defined(ZORBA_NO_ICU) |
723 | 617 | { "~U_REGEX_BAD_INTERVAL", "error in {min,max} interval" }, | 690 | { "~U_REGEX_BAD_INTERVAL", "error in {min,max} interval" }, |
724 | 618 | #endif | 691 | #endif |
726 | 619 | #if !defined(ZORBA_NO_UNICODE) | 692 | #if !defined(ZORBA_NO_ICU) |
727 | 620 | { "~U_REGEX_INTERNAL_ERROR", "an internal ICU error (bug) was detected" }, | 693 | { "~U_REGEX_INTERNAL_ERROR", "an internal ICU error (bug) was detected" }, |
728 | 621 | #endif | 694 | #endif |
730 | 622 | #if !defined(ZORBA_NO_UNICODE) | 695 | #if !defined(ZORBA_NO_ICU) |
731 | 623 | { "~U_REGEX_INVALID_BACK_REF", "backreference to a non-existent capture group" }, | 696 | { "~U_REGEX_INVALID_BACK_REF", "backreference to a non-existent capture group" }, |
732 | 624 | #endif | 697 | #endif |
734 | 625 | #if !defined(ZORBA_NO_UNICODE) | 698 | #if !defined(ZORBA_NO_ICU) |
735 | 626 | { "~U_REGEX_INVALID_FLAG", "invalid value for match mode flags" }, | 699 | { "~U_REGEX_INVALID_FLAG", "invalid value for match mode flags" }, |
736 | 627 | #endif | 700 | #endif |
738 | 628 | #if !defined(ZORBA_NO_UNICODE) | 701 | #if !defined(ZORBA_NO_ICU) |
739 | 629 | { "~U_REGEX_INVALID_RANGE", "in character range [x-y], x is greater than y" }, | 702 | { "~U_REGEX_INVALID_RANGE", "in character range [x-y], x is greater than y" }, |
740 | 630 | #endif | 703 | #endif |
742 | 631 | #if !defined(ZORBA_NO_UNICODE) | 704 | #if !defined(ZORBA_NO_ICU) |
743 | 632 | { "~U_REGEX_INVALID_STATE", "RegexMatcher in invalid state for requested operation" }, | 705 | { "~U_REGEX_INVALID_STATE", "RegexMatcher in invalid state for requested operation" }, |
744 | 633 | #endif | 706 | #endif |
746 | 634 | #if !defined(ZORBA_NO_UNICODE) | 707 | #if !defined(ZORBA_NO_ICU) |
747 | 635 | { "~U_REGEX_LOOK_BEHIND_LIMIT", "look-behind pattern matches must have a bounded maximum length" }, | 708 | { "~U_REGEX_LOOK_BEHIND_LIMIT", "look-behind pattern matches must have a bounded maximum length" }, |
748 | 636 | #endif | 709 | #endif |
750 | 637 | #if !defined(ZORBA_NO_UNICODE) | 710 | #if !defined(ZORBA_NO_ICU) |
751 | 638 | { "~U_REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" }, | 711 | { "~U_REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" }, |
752 | 639 | #endif | 712 | #endif |
754 | 640 | #if !defined(ZORBA_NO_UNICODE) | 713 | #if !defined(ZORBA_NO_ICU) |
755 | 641 | { "~U_REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" }, | 714 | { "~U_REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" }, |
756 | 642 | #endif | 715 | #endif |
758 | 643 | #if !defined(ZORBA_NO_UNICODE) | 716 | #if !defined(ZORBA_NO_ICU) |
759 | 644 | { "~U_REGEX_MISSING_CLOSE_BRACKET", "missing ']'" }, | 717 | { "~U_REGEX_MISSING_CLOSE_BRACKET", "missing ']'" }, |
760 | 645 | #endif | 718 | #endif |
762 | 646 | #if !defined(ZORBA_NO_UNICODE) | 719 | #if !defined(ZORBA_NO_ICU) |
763 | 647 | { "~U_REGEX_NUMBER_TOO_BIG", "decimal number is too large" }, | 720 | { "~U_REGEX_NUMBER_TOO_BIG", "decimal number is too large" }, |
764 | 648 | #endif | 721 | #endif |
766 | 649 | #if !defined(ZORBA_NO_UNICODE) | 722 | #if !defined(ZORBA_NO_ICU) |
767 | 650 | { "~U_REGEX_OCTAL_TOO_BIG", "octal character constants must be <= 0377" }, | 723 | { "~U_REGEX_OCTAL_TOO_BIG", "octal character constants must be <= 0377" }, |
768 | 651 | #endif | 724 | #endif |
770 | 652 | #if !defined(ZORBA_NO_UNICODE) | 725 | #if !defined(ZORBA_NO_ICU) |
771 | 653 | { "~U_REGEX_PROPERTY_SYNTAX", "incorrect Unicode property" }, | 726 | { "~U_REGEX_PROPERTY_SYNTAX", "incorrect Unicode property" }, |
772 | 654 | #endif | 727 | #endif |
774 | 655 | #if !defined(ZORBA_NO_UNICODE) | 728 | #if !defined(ZORBA_NO_ICU) |
775 | 656 | { "~U_REGEX_RULE_SYNTAX", "syntax error" }, | 729 | { "~U_REGEX_RULE_SYNTAX", "syntax error" }, |
776 | 657 | #endif | 730 | #endif |
778 | 658 | #if !defined(ZORBA_NO_UNICODE) | 731 | #if !defined(ZORBA_NO_ICU) |
779 | 659 | { "~U_REGEX_SET_CONTAINS_STRING", "can not have UnicodeSets containing strings" }, | 732 | { "~U_REGEX_SET_CONTAINS_STRING", "can not have UnicodeSets containing strings" }, |
780 | 660 | #endif | 733 | #endif |
782 | 661 | #if !defined(ZORBA_NO_UNICODE) | 734 | #if !defined(ZORBA_NO_ICU) |
783 | 662 | { "~U_REGEX_STACK_OVERFLOW", "backtrack stack overflow" }, | 735 | { "~U_REGEX_STACK_OVERFLOW", "backtrack stack overflow" }, |
784 | 663 | #endif | 736 | #endif |
786 | 664 | #if !defined(ZORBA_NO_UNICODE) | 737 | #if !defined(ZORBA_NO_ICU) |
787 | 665 | { "~U_REGEX_STOPPED_BY_CALLER", "matching operation aborted by user callback fn" }, | 738 | { "~U_REGEX_STOPPED_BY_CALLER", "matching operation aborted by user callback fn" }, |
788 | 666 | #endif | 739 | #endif |
790 | 667 | #if !defined(ZORBA_NO_UNICODE) | 740 | #if !defined(ZORBA_NO_ICU) |
791 | 668 | { "~U_REGEX_TIME_OUT", "maximum allowed match time exceeded" }, | 741 | { "~U_REGEX_TIME_OUT", "maximum allowed match time exceeded" }, |
792 | 669 | #endif | 742 | #endif |
794 | 670 | #if !defined(ZORBA_NO_UNICODE) | 743 | #if !defined(ZORBA_NO_ICU) |
795 | 671 | { "~U_REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" }, | 744 | { "~U_REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" }, |
796 | 672 | #endif | 745 | #endif |
797 | 673 | { "~UnaryArithOp", "unary arithmetic operator" }, | 746 | { "~UnaryArithOp", "unary arithmetic operator" }, |
798 | 747 | #if !defined(ZORBA_NO_ICU) | ||
799 | 674 | { "~UnbalancedChar_3", "missing '$3'" }, | 748 | { "~UnbalancedChar_3", "missing '$3'" }, |
800 | 749 | #endif | ||
801 | 750 | #if !defined(ZORBA_NO_ICU) | ||
802 | 751 | { "~UnescapedChar_3", "character '$3' must be escaped here" }, | ||
803 | 752 | #endif | ||
804 | 675 | { "~UnexpectedElement", "unexpected element" }, | 753 | { "~UnexpectedElement", "unexpected element" }, |
805 | 676 | { "~VarValMustBeSingleItem_2", "\"$2\": variable value must be single item" }, | 754 | { "~VarValMustBeSingleItem_2", "\"$2\": variable value must be single item" }, |
806 | 677 | { "~Variable", "variable" }, | 755 | { "~Variable", "variable" }, |
807 | 678 | 756 | ||
808 | === modified file 'src/precompiled/stdafx.h' | |||
809 | --- src/precompiled/stdafx.h 2012-03-28 05:19:57 +0000 | |||
810 | +++ src/precompiled/stdafx.h 2012-04-07 00:45:26 +0000 | |||
811 | @@ -15,363 +15,81 @@ | |||
812 | 15 | 15 | ||
813 | 16 | */ | 16 | */ |
814 | 17 | 17 | ||
880 | 18 | #if defined STDAFX | 18 | #ifdef STDAFX |
881 | 19 | #include <iostream> | 19 | |
882 | 20 | #include <stdexcept> | 20 | #include <fstream> |
883 | 21 | #include <cassert> | 21 | #include <iostream> |
884 | 22 | #include <cstring> | 22 | #include <stdexcept> |
885 | 23 | #include <memory> | 23 | #include <cassert> |
886 | 24 | 24 | #include <cstring> | |
887 | 25 | #include <sstream> | 25 | #include <memory> |
888 | 26 | #include <xfwrap> | 26 | |
889 | 27 | #include <xfwrap1> | 27 | #include <sstream> |
890 | 28 | #include <istream> | 28 | #include <xfwrap> |
891 | 29 | #include <cstdio> | 29 | #include <xfwrap1> |
892 | 30 | #include <xxshared> | 30 | #include <istream> |
893 | 31 | #include <crtdefs.h> | 31 | #include <cstdio> |
894 | 32 | #include <map> | 32 | #include <xxshared> |
895 | 33 | #include <set> | 33 | #include <crtdefs.h> |
896 | 34 | //#include <poppack.h> | 34 | #include <map> |
897 | 35 | //#include <xxtype_traits> | 35 | #include <set> |
898 | 36 | //#include <xxcallwrap> | 36 | |
899 | 37 | 37 | #include "runtime/sequences/sequences.h" | |
900 | 38 | // #include <xxcallpmf> | 38 | #include "diagnostics/xquery_diagnostics.h" |
901 | 39 | // //#include <xxbind0> | 39 | #include "xercesc/util/xercesdefs.hpp" |
902 | 40 | // //#include <xxbind1> | 40 | #include "runtime/collections/collections.h" |
903 | 41 | // //#include <xxresult> | 41 | #include "unicode/utypes.h" |
904 | 42 | // #include <zorba/audit.h> | 42 | #include "zorba/config.h" |
905 | 43 | // #include "api/auditimpl.h" | 43 | #include "store/api/store.h" |
906 | 44 | // #include <zorba/audit.h> | 44 | #include "zorba/zorba.h" |
907 | 45 | 45 | #include "zorba/api_shared_types.h" | |
908 | 46 | //#include "unicode/unistr.h" | 46 | #include "compiler/parsetree/parsenodes.h" |
909 | 47 | #include "runtime/sequences/sequences.h" | 47 | #include "compiler/parser/parse_constants.h" |
910 | 48 | #include "diagnostics/xquery_diagnostics.h" | 48 | #include "zorbautils/checked_vector.h" |
911 | 49 | #include "xercesc/util/xercesdefs.hpp" | 49 | #include "compiler/parser/xquery_driver.h" |
912 | 50 | #include "runtime/collections/collections.h" | 50 | #include "util/sorter.h" |
913 | 51 | #include "unicode/utypes.h" | 51 | #include "compiler/xqueryx/xqueryx_to_xquery.h" |
914 | 52 | #include "zorba/config.h" | 52 | #include <zorba/store_manager.h> |
915 | 53 | #include "store/api/store.h" | 53 | #include <zorba/xquery.h> |
916 | 54 | #include "zorba/zorba.h" | 54 | #include <zorba/xquery_exception.h> |
852 | 55 | #include "zorba/api_shared_types.h" | ||
853 | 56 | #include "compiler/parsetree/parsenodes.h" | ||
854 | 57 | #include "compiler/parser/parse_constants.h" | ||
855 | 58 | //#include "compiler/api/compilercb.h" | ||
856 | 59 | #include "zorbautils/checked_vector.h" | ||
857 | 60 | #include "compiler/parser/xquery_driver.h" | ||
858 | 61 | #include "util/sorter.h" | ||
859 | 62 | #include "compiler/xqueryx/xqueryx_to_xquery.h" | ||
860 | 63 | // #include "compiler/xqueryx/xqueryx_xslt.h" | ||
861 | 64 | //#include "compiler/parser/xquery_scanner.h" | ||
862 | 65 | //#include "compiler/parsetree/parsenode_base.h" | ||
863 | 66 | //#include "compiler/parsetree/parsenode_visitor.h" | ||
864 | 67 | // #include "runtime/core/flwor_iterator.h" | ||
865 | 68 | // #include "context/static_context.h" | ||
866 | 69 | // #include "zorbautils/fatal.h" | ||
867 | 70 | // #include "runtime/base/unarybase.h" | ||
868 | 71 | // #include "compiler/expression/expr_consts.h" | ||
869 | 72 | // #include "api/iterator_singleton.h" | ||
870 | 73 | // #include "runtime/visitors/printer_visitor_api.h" | ||
871 | 74 | // //#include "compiler/parsetree/parsenode_print_dot_visitor.h" | ||
872 | 75 | // //#include "compiler/parsetree/parsenode_print_dot_visitor.h" | ||
873 | 76 | // //#include "runtime/visitors/planiter_visitor_impl_code.h" | ||
874 | 77 | // //#include "runtime/visitors/planiter_visitor_impl_include.h" | ||
875 | 78 | // //#include "runtime/visitors/printer_visitor_impl.h" | ||
876 | 79 | // //#include "runtime/core/path.h" | ||
877 | 80 | // #include "compiler/expression/ft_expr.h" | ||
878 | 81 | // #include "compiler/expression/ftnode.h" | ||
879 | 82 | // #include "compiler/parser/query_loc.h" | ||
917 | 83 | #include "util/cxx_util.h" | 55 | #include "util/cxx_util.h" |
922 | 84 | // #include "util/indent.h" | 56 | #include "diagnostics/assert.h" |
923 | 85 | // #include "util/stl_util.h" | 57 | #include "zorbatypes/mapm/m_apm_lc.h" |
924 | 86 | // #include "diagnostics/xquery_diagnostics.h" | 58 | #include "zorbatypes/datetime/parse.h" |
925 | 87 | // #include "zorbatypes/numconversions.h" | 59 | #include "zorbatypes/chartype.h" |
926 | 60 | #include "zorbatypes/collation_manager.h" | ||
927 | 61 | #include "zorbatypes/ft_token.h" | ||
928 | 62 | #include "zorbatypes/m_apm.h" | ||
929 | 63 | #include "zorbatypes/rclock.h" | ||
930 | 64 | #include "zorbatypes/schema_types.h" | ||
931 | 65 | #include "zorbatypes/timezone.h" | ||
932 | 66 | #include "zorbatypes/transcoder.h" | ||
933 | 67 | #include "zorbatypes/URI.h" | ||
934 | 68 | #include "zorbatypes/xerces_xmlcharray.h" | ||
935 | 69 | #include "zorbatypes/zorbatypes_decl.h" | ||
936 | 70 | #include "zorbatypes/zstring.h" | ||
937 | 71 | #include "zorbautils/condition.h" | ||
938 | 72 | #include "zorbautils/hashfun.h" | ||
939 | 73 | #include "zorbautils/hashmap.h" | ||
940 | 74 | #include "zorbautils/hashmap_itemp.h" | ||
941 | 75 | #include "zorbautils/hashmap_str_obj.h" | ||
942 | 76 | #include "zorbautils/hashmap_zstring.h" | ||
943 | 77 | #include "zorbautils/hashset.h" | ||
944 | 78 | #include "zorbautils/hashset_itemh.h" | ||
945 | 79 | #include "zorbautils/latch.h" | ||
946 | 80 | #include "zorbautils/locale.h" | ||
947 | 81 | #include "zorbautils/lock.h" | ||
948 | 82 | #include "zorbautils/mutex.h" | ||
949 | 83 | #include "zorbautils/runnable.h" | ||
950 | 84 | #include "zorbautils/SAXParser.h" | ||
951 | 85 | #include "zorbautils/stack.h" | ||
952 | 86 | #include "zorbautils/string_util.h" | ||
953 | 87 | #include "unit_tests/unit_test_list.h" | ||
954 | 88 | #include "zorba/diagnostic_handler.h" | ||
955 | 89 | #include "zorba/xquery_warning.h" | ||
956 | 90 | #include "runtime/full_text/ftcontains_visitor.h" | ||
957 | 91 | #include "store/api/ft_token_iterator.h" | ||
958 | 92 | #include "store/naive/ft_token_store.h" | ||
959 | 88 | 93 | ||
960 | 89 | // #include "api/serialization/serializable.h" | ||
961 | 90 | // #include "api/serialization/serializer.h" | ||
962 | 91 | // #include "api/collectionimpl.h" | ||
963 | 92 | // #include "api/dynamiccontextimpl.h" | ||
964 | 93 | // #include "api/fileimpl.h" | ||
965 | 94 | // #include "api/functionimpl.h" | ||
966 | 95 | // #include "api/invoke_item_sequence.h" | ||
967 | 96 | // #include "api/itemfactoryimpl.h" | ||
968 | 97 | // #include "api/resultiteratorchainer.h" | ||
969 | 98 | // #include "api/resultiteratorimpl.h" | ||
970 | 99 | // #include "api/sax2impl.h" | ||
971 | 100 | // #include "api/serializerimpl.h" | ||
972 | 101 | // #include "api/staticcontextimpl.h" | ||
973 | 102 | // #include "api/storeiteratorimpl.h" | ||
974 | 103 | // #include "api/unmarshaller.h" | ||
975 | 104 | // #include "api/uri_resolver_wrappers.h" | ||
976 | 105 | // #include "api/vectoriterator.h" | ||
977 | 106 | // #include "api/xmldatamanagerimpl.h" | ||
978 | 107 | // //#include "api/xqueryimpl.h" | ||
979 | 108 | // #include "api/zorbaimpl.h" | ||
980 | 109 | // #include "capi/cdynamic_context.h" | ||
981 | 110 | // #include "capi/cexpression.h" | ||
982 | 111 | // #include "capi/cexternal_function.h" | ||
983 | 112 | // #include "capi/cimplementation.h" | ||
984 | 113 | // #include "capi/csequence.h" | ||
985 | 114 | // #include "capi/cstatic_context.h" | ||
986 | 115 | // #include "capi/error.h" | ||
987 | 116 | // #include "capi/external_module.h" | ||
988 | 117 | // #include "capi/single_item_sequence.h" | ||
989 | 118 | // #include "capi/user_item_sequence.h" | ||
990 | 119 | // #include "compiler/parser/flexlexer.h" | ||
991 | 120 | // #include "compiler/parser/ft_types.h" | ||
992 | 121 | // #include "compiler/parser/symbol_table.h" | ||
993 | 122 | // #include "compiler/parser/xqdoc_comment.h" | ||
994 | 123 | // #include "compiler/parsetree/parsenode_print_xml_visitor.h" | ||
995 | 124 | // #include "compiler/parsetree/parsenode_print_xqdoc_visitor.h" | ||
996 | 125 | // #include "compiler/parsetree/parsenode_print_xquery_visitor.h" | ||
997 | 126 | // #include "compiler/parsetree/parsenode_xqdoc_visitor.h" | ||
998 | 127 | // #include "compiler/translator/prolog_graph.h" | ||
999 | 128 | // #include "compiler/translator/translator.h" | ||
1000 | 129 | // #include "compiler/codegen/plan_visitor.h" | ||
1001 | 130 | // #include "compiler/expression/abstract_expr_visitor.h" | ||
1002 | 131 | // #include "compiler/expression/expr.h" | ||
1003 | 132 | // #include "compiler/expression/expr_annotations.h" | ||
1004 | 133 | // #include "compiler/expression/expr_base.h" | ||
1005 | 134 | // #include "compiler/expression/expr_classes.h" | ||
1006 | 135 | // #include "compiler/expression/expr_iter.h" | ||
1007 | 136 | // #include "compiler/expression/expr_utils.h" | ||
1008 | 137 | // #include "compiler/expression/expr_visitor.h" | ||
1009 | 138 | // #include "compiler/expression/flwor_expr.h" | ||
1010 | 139 | // //#include "compiler/expression/fo_expr.h" | ||
1011 | 140 | // #include "compiler/expression/ftnode_classes.h" | ||
1012 | 141 | // #include "compiler/expression/ftnode_visitor.h" | ||
1013 | 142 | // #include "compiler/expression/function_item_expr.h" | ||
1014 | 143 | // #include "compiler/expression/path_expr.h" | ||
1015 | 144 | // #include "compiler/expression/script_exprs.h" | ||
1016 | 145 | // #include "compiler/expression/update_exprs.h" | ||
1017 | 146 | // #include "compiler/expression/var_expr.h" | ||
1018 | 147 | // #include "compiler/rewriter/framework/rewriter.h" | ||
1019 | 148 | // #include "compiler/rewriter/framework/rewriter_context.h" | ||
1020 | 149 | // #include "compiler/rewriter/framework/rule_driver.h" | ||
1021 | 150 | // #include "compiler/rewriter/framework/sequential_rewriter.h" | ||
1022 | 151 | // #include "compiler/rewriter/rewriters/common_rewriter.h" | ||
1023 | 152 | // #include "compiler/rewriter/rewriters/default_optimizer.h" | ||
1024 | 153 | // #include "compiler/rewriter/rewriters/phase1_rewriter.h" | ||
1025 | 154 | // #include "compiler/rewriter/rules/ruleset.h" | ||
1026 | 155 | // #include "compiler/rewriter/rules/rule_base.h" | ||
1027 | 156 | // #include "compiler/rewriter/rules/type_rules.h" | ||
1028 | 157 | // #include "compiler/rewriter/tools/dataflow_annotations.h" | ||
1029 | 158 | // #include "compiler/rewriter/tools/expr_tools.h" | ||
1030 | 159 | // #include "compiler/rewriter/tools/udf_graph.h" | ||
1031 | 160 | // #include "compiler/xqddf/collection_decl.h" | ||
1032 | 161 | // #include "compiler/xqddf/value_ic.h" | ||
1033 | 162 | // #include "compiler/xqddf/value_index.h" | ||
1034 | 163 | // #include "compiler/semantic_annotations/annotations.h" | ||
1035 | 164 | // #include "compiler/semantic_annotations/annotation_holder.h" | ||
1036 | 165 | // #include "compiler/semantic_annotations/annotation_keys.h" | ||
1037 | 166 | // #include "compiler/api/compiler_api.h" | ||
1038 | 167 | // #include "compiler/api/compiler_api_impl.h" | ||
1039 | 168 | // #include "system/globalenv.h" | ||
1040 | 169 | // #include "system/properties.h" | ||
1041 | 170 | // #include "system/zorba_properties.h" | ||
1042 | 171 | // #include "context/decimal_format.h" | ||
1043 | 172 | // #include "context/default_uri_mappers.h" | ||
1044 | 173 | // #include "context/default_url_resolvers.h" | ||
1045 | 174 | // #include "context/dynamic_context.h" | ||
1046 | 175 | // #include "context/dynamic_loader.h" | ||
1047 | 176 | // #include "context/internal_uri_resolvers.h" | ||
1048 | 177 | // //#include "context/namespace_context.h" | ||
1049 | 178 | // #include "context/root_static_context.h" | ||
1050 | 179 | // #include "context/sctx_map_iterator.h" | ||
1051 | 180 | // #include "context/standard_uri_resolvers.h" | ||
1052 | 181 | // #include "context/static_context_consts.h" | ||
1053 | 182 | // #include "context/stemmer_wrappers.h" | ||
1054 | 183 | // #include "context/uri_resolver.h" | ||
1055 | 184 | // #include "context/uri_resolver_wrapper.h" | ||
1056 | 185 | #include "diagnostics/assert.h" | ||
1057 | 186 | // #include "diagnostics/diagnostic.h" | ||
1058 | 187 | // #include "diagnostics/dict.h" | ||
1059 | 188 | // #include "diagnostics/dict_impl.h" | ||
1060 | 189 | // #include "diagnostics/StackWalker.h" | ||
1061 | 190 | // #include "diagnostics/user_error.h" | ||
1062 | 191 | // #include "diagnostics/user_exception.h" | ||
1063 | 192 | // #include "diagnostics/xquery_exception.h" | ||
1064 | 193 | // #include "diagnostics/xquery_stack_trace.h" | ||
1065 | 194 | // #include "diagnostics/xquery_warning.h" | ||
1066 | 195 | // #include "diagnostics/zorba_exception.h" | ||
1067 | 196 | // //#include "functions/annotation.h" | ||
1068 | 197 | // #include "functions/external_function.h" | ||
1069 | 198 | // #include "functions/function.h" | ||
1070 | 199 | // #include "functions/function_consts.h" | ||
1071 | 200 | // #include "functions/function_impl.h" | ||
1072 | 201 | // #include "functions/func_accessors_impl.h" | ||
1073 | 202 | // #include "functions/func_apply.h" | ||
1074 | 203 | // #include "functions/func_arithmetic.h" | ||
1075 | 204 | // #include "functions/func_booleans_impl.h" | ||
1076 | 205 | // #include "functions/func_durations_dates_times_impl.h" | ||
1077 | 206 | // #include "functions/func_enclosed.h" | ||
1078 | 207 | // #include "functions/func_eval.h" | ||
1079 | 208 | // #include "functions/func_hoist.h" | ||
1080 | 209 | // #include "functions/func_index_ddl.h" | ||
1081 | 210 | // #include "functions/func_node_sort_distinct.h" | ||
1082 | 211 | // #include "functions/func_numerics_impl.h" | ||
1083 | 212 | // #include "functions/func_reflection.h" | ||
1084 | 213 | // #include "functions/func_sequences_impl.h" | ||
1085 | 214 | // #include "functions/func_var_decl.h" | ||
1086 | 215 | // #include "functions/library.h" | ||
1087 | 216 | // #include "functions/signature.h" | ||
1088 | 217 | // #include "functions/udf.h" | ||
1089 | 218 | // #include "runtime/full_text/thesauri/decode_base128.h" | ||
1090 | 219 | // #include "runtime/full_text/thesauri/encoded_list.h" | ||
1091 | 220 | // #include "runtime/full_text/thesauri/iso2788.h" | ||
1092 | 221 | // #include "runtime/full_text/thesauri/wn_db_segment.h" | ||
1093 | 222 | // #include "runtime/full_text/thesauri/wn_synset.h" | ||
1094 | 223 | // #include "runtime/full_text/thesauri/wn_thesaurus.h" | ||
1095 | 224 | // #include "runtime/full_text/thesauri/wn_types.h" | ||
1096 | 225 | // #include "runtime/full_text/thesauri/xqftts_relationship.h" | ||
1097 | 226 | // #include "runtime/full_text/thesauri/xqftts_thesaurus.h" | ||
1098 | 227 | // #include "runtime/full_text/ft_match.h" | ||
1099 | 228 | // #include "runtime/full_text/ft_query_item.h" | ||
1100 | 229 | // #include "runtime/full_text/ft_single_token_iterator.h" | ||
1101 | 230 | // #include "runtime/full_text/ft_stop_words_set.h" | ||
1102 | 231 | // #include "runtime/full_text/ft_thesaurus.h" | ||
1103 | 232 | // #include "runtime/full_text/ft_token_matcher.h" | ||
1104 | 233 | // #include "runtime/full_text/ft_token_seq_iterator.h" | ||
1105 | 234 | // #include "runtime/full_text/ft_token_span.h" | ||
1106 | 235 | // #include "runtime/full_text/ft_wildcard.h" | ||
1107 | 236 | // #include "runtime/full_text/full_text.h" | ||
1108 | 237 | // #include "runtime/full_text/apply.h" | ||
1109 | 238 | // #include "runtime/full_text/ft_util.h" | ||
1110 | 239 | // #include "runtime/collections/collections_base.h" | ||
1111 | 240 | // #include "runtime/core/apply_updates.h" | ||
1112 | 241 | // #include "runtime/core/arithmetic_impl.h" | ||
1113 | 242 | // #include "runtime/core/constructors.h" | ||
1114 | 243 | // #include "runtime/core/fncall_iterator.h" | ||
1115 | 244 | // #include "runtime/core/internal_operators.h" | ||
1116 | 245 | // #include "runtime/core/item_iterator.h" | ||
1117 | 246 | // #include "runtime/core/nodeid_iterators.h" | ||
1118 | 247 | // #include "runtime/core/path_iterators.h" | ||
1119 | 248 | // #include "runtime/core/sequencetypes.h" | ||
1120 | 249 | // #include "runtime/core/trycatch.h" | ||
1121 | 250 | // #include "runtime/core/var_iterators.h" | ||
1122 | 251 | // #include "runtime/numerics/NumericsImpl.h" | ||
1123 | 252 | // #include "runtime/booleans/BooleanImpl.h" | ||
1124 | 253 | // #include "runtime/base/binarybase.h" | ||
1125 | 254 | // #include "runtime/base/narybase.h" | ||
1126 | 255 | // #include "runtime/base/noarybase.h" | ||
1127 | 256 | // #include "runtime/base/plan_iterator.h" | ||
1128 | 257 | // #include "runtime/sequences/SequencesImpl.h" | ||
1129 | 258 | // #include "runtime/visitors/iterprinter.h" | ||
1130 | 259 | // #include "runtime/misc/materialize.h" | ||
1131 | 260 | // #include "runtime/scripting/scripting.h" | ||
1132 | 261 | // #include "types/schema/EventSchemaValidator.h" | ||
1133 | 262 | // #include "types/schema/LoadSchemaErrorHandler.h" | ||
1134 | 263 | // #include "types/schema/PrintSchema.h" | ||
1135 | 264 | // #include "types/schema/revalidateUtils.h" | ||
1136 | 265 | // #include "types/schema/schema.h" | ||
1137 | 266 | // #include "types/schema/SchemaValidatorFilter.h" | ||
1138 | 267 | // #include "types/schema/StrX.h" | ||
1139 | 268 | // #include "types/schema/validate.h" | ||
1140 | 269 | // #include "types/schema/ValidationEventHandler.h" | ||
1141 | 270 | // #include "types/schema/xercesIncludes.h" | ||
1142 | 271 | // #include "types/schema/XercesParseUtils.h" | ||
1143 | 272 | // #include "types/schema/XercSchemaValidator.h" | ||
1144 | 273 | // #include "types/casting.h" | ||
1145 | 274 | // #include "types/collation.h" | ||
1146 | 275 | // #include "types/node_test.h" | ||
1147 | 276 | // #include "types/root_typemanager.h" | ||
1148 | 277 | // #include "types/typeconstants.h" | ||
1149 | 278 | // #include "types/typeimpl.h" | ||
1150 | 279 | // #include "types/typemanager.h" | ||
1151 | 280 | // #include "types/typemanagerimpl.h" | ||
1152 | 281 | // #include "types/typeops.h" | ||
1153 | 282 | // #include "util/fx/fxarray.h" | ||
1154 | 283 | // #include "util/fx/fxcharheap.h" | ||
1155 | 284 | // #include "util/ascii_util.h" | ||
1156 | 285 | // #include "util/atomic_int.h" | ||
1157 | 286 | // #include "util/auto_vector.h" | ||
1158 | 287 | // #include "util/curl_util.h" | ||
1159 | 288 | // #include "util/dir.h" | ||
1160 | 289 | // #include "util/dynamic_bitset.h" | ||
1161 | 290 | // #include "util/empty.h" | ||
1162 | 291 | // #include "util/error_util.h" | ||
1163 | 292 | // #include "util/fs_util.h" | ||
1164 | 293 | // #include "util/hashmap.h" | ||
1165 | 294 | // //#include "util/hashmap32.h" | ||
1166 | 295 | // #include "util/less.h" | ||
1167 | 296 | // #include "util/mmap_file.h" | ||
1168 | 297 | // #include "util/nonatomic_int.h" | ||
1169 | 298 | // #include "util/omanip.h" | ||
1170 | 299 | // #include "util/oseparator.h" | ||
1171 | 300 | // #include "util/regex.h" | ||
1172 | 301 | // #include "util/singleton.h" | ||
1173 | 302 | // #include "util/string_util.h" | ||
1174 | 303 | // #include "util/threads.h" | ||
1175 | 304 | // #include "util/tokenbuf.h" | ||
1176 | 305 | // #include "util/tracer.h" | ||
1177 | 306 | // #include "util/triple.h" | ||
1178 | 307 | // #include "util/unicode_categories.h" | ||
1179 | 308 | // #include "util/unicode_util.h" | ||
1180 | 309 | // #include "util/uri_util.h" | ||
1181 | 310 | // #include "util/utf8_string.h" | ||
1182 | 311 | // #include "util/utf8_util.h" | ||
1183 | 312 | // #include "util/utf8_util_base.h" | ||
1184 | 313 | // #include "util/void_int.h" | ||
1185 | 314 | // #include "util/xml_util.h" | ||
1186 | 315 | // #include "zorbamisc/config/platform.h" | ||
1187 | 316 | // //#include "zorbaserialization/archiver.h" | ||
1188 | 317 | // #include "zorbaserialization/base64impl.h" | ||
1189 | 318 | // #include "zorbaserialization/bin_archiver.h" | ||
1190 | 319 | // //#include "zorbaserialization/class_serializer.h" | ||
1191 | 320 | // #include "zorbaserialization/mem_archiver.h" | ||
1192 | 321 | // #include "zorbaserialization/serialization_engine.h" | ||
1193 | 322 | // #include "zorbaserialization/template_serializer.h" | ||
1194 | 323 | // #include "zorbaserialization/xml_archiver.h" | ||
1195 | 324 | // #include "zorbaserialization/zorba_class_serializer.h" | ||
1196 | 325 | #include "zorbatypes/mapm/m_apm_lc.h" | ||
1197 | 326 | #include "zorbatypes/datetime/parse.h" | ||
1198 | 327 | //#include "zorbatypes/binary.h" | ||
1199 | 328 | #include "zorbatypes/chartype.h" | ||
1200 | 329 | #include "zorbatypes/collation_manager.h" | ||
1201 | 330 | //#include "zorbatypes/datetime.h" | ||
1202 | 331 | //#include "zorbatypes/decimal.h" | ||
1203 | 332 | //#include "zorbatypes/duration.h" | ||
1204 | 333 | //#include "zorbatypes/floatimpl.h" | ||
1205 | 334 | #include "zorbatypes/ft_token.h" | ||
1206 | 335 | //#include "zorbatypes/integer.h" | ||
1207 | 336 | #include "zorbatypes/libicu.h" | ||
1208 | 337 | #include "zorbatypes/m_apm.h" | ||
1209 | 338 | //#include "zorbatypes/rchandle.h" | ||
1210 | 339 | #include "zorbatypes/rclock.h" | ||
1211 | 340 | //#include "zorbatypes/regex_ascii.h" | ||
1212 | 341 | #include "zorbatypes/schema_types.h" | ||
1213 | 342 | #include "zorbatypes/timezone.h" | ||
1214 | 343 | #include "zorbatypes/transcoder.h" | ||
1215 | 344 | #include "zorbatypes/URI.h" | ||
1216 | 345 | #include "zorbatypes/xerces_xmlcharray.h" | ||
1217 | 346 | #include "zorbatypes/zorbatypes_decl.h" | ||
1218 | 347 | #include "zorbatypes/zstring.h" | ||
1219 | 348 | //#include "zorbautils/stemmer/sb_stemmer.h" | ||
1220 | 349 | #include "zorbautils/condition.h" | ||
1221 | 350 | #include "zorbautils/hashfun.h" | ||
1222 | 351 | #include "zorbautils/hashmap.h" | ||
1223 | 352 | #include "zorbautils/hashmap_itemp.h" | ||
1224 | 353 | #include "zorbautils/hashmap_str_obj.h" | ||
1225 | 354 | #include "zorbautils/hashmap_zstring.h" | ||
1226 | 355 | #include "zorbautils/hashset.h" | ||
1227 | 356 | #include "zorbautils/hashset_itemh.h" | ||
1228 | 357 | //#include "zorbautils/icu_tokenizer.h" | ||
1229 | 358 | #include "zorbautils/latch.h" | ||
1230 | 359 | #include "zorbautils/locale.h" | ||
1231 | 360 | #include "zorbautils/lock.h" | ||
1232 | 361 | #include "zorbautils/mutex.h" | ||
1233 | 362 | #include "zorbautils/runnable.h" | ||
1234 | 363 | #include "zorbautils/SAXParser.h" | ||
1235 | 364 | #include "zorbautils/stack.h" | ||
1236 | 365 | // #include "zorbautils/stemmer.h" | ||
1237 | 366 | #include "zorbautils/string_util.h" | ||
1238 | 367 | //#include "zorbautils/synchronous_logger.h" | ||
1239 | 368 | //#include "zorbautils/tokenizer.h" | ||
1240 | 369 | #include "unit_tests/unit_test_list.h" | ||
1241 | 370 | #include "zorba/diagnostic_handler.h" | ||
1242 | 371 | #include "zorba/xquery_warning.h" | ||
1243 | 372 | #include "runtime/full_text/ftcontains_visitor.h" | ||
1244 | 373 | #include "store/naive/naive_ft_token_iterator.h" | ||
1245 | 374 | #include "store/api/ft_token_iterator.h" | ||
1246 | 375 | #include "store/naive/ft_token_store.h" | ||
1247 | 376 | #endif | 94 | #endif |
1248 | 377 | /* vim:set et sw=2 ts=2: */ | 95 | /* vim:set et sw=2 ts=2: */ |
1249 | 378 | 96 | ||
1250 | === modified file 'src/runtime/full_text/CMakeLists.txt' | |||
1251 | --- src/runtime/full_text/CMakeLists.txt 2012-03-28 05:19:57 +0000 | |||
1252 | +++ src/runtime/full_text/CMakeLists.txt 2012-04-07 00:45:26 +0000 | |||
1253 | @@ -42,11 +42,11 @@ | |||
1254 | 42 | default_tokenizer.cpp | 42 | default_tokenizer.cpp |
1255 | 43 | ) | 43 | ) |
1256 | 44 | 44 | ||
1258 | 45 | IF (ZORBA_NO_UNICODE) | 45 | IF (ZORBA_NO_ICU) |
1259 | 46 | LIST(APPEND FULLTEXT_SRCS latin_tokenizer.cpp) | 46 | LIST(APPEND FULLTEXT_SRCS latin_tokenizer.cpp) |
1261 | 47 | ELSE (ZORBA_NO_UNICODE) | 47 | ELSE (ZORBA_NO_ICU) |
1262 | 48 | LIST(APPEND FULLTEXT_SRCS icu_tokenizer.cpp) | 48 | LIST(APPEND FULLTEXT_SRCS icu_tokenizer.cpp) |
1264 | 49 | ENDIF (ZORBA_NO_UNICODE) | 49 | ENDIF (ZORBA_NO_ICU) |
1265 | 50 | 50 | ||
1266 | 51 | ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS) | 51 | ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS) |
1267 | 52 | 52 | ||
1268 | 53 | 53 | ||
1269 | === modified file 'src/runtime/full_text/default_tokenizer.cpp' | |||
1270 | --- src/runtime/full_text/default_tokenizer.cpp 2012-03-28 05:19:57 +0000 | |||
1271 | +++ src/runtime/full_text/default_tokenizer.cpp 2012-04-07 00:45:26 +0000 | |||
1272 | @@ -19,22 +19,22 @@ | |||
1273 | 19 | #include <zorba/config.h> | 19 | #include <zorba/config.h> |
1274 | 20 | 20 | ||
1275 | 21 | #include "default_tokenizer.h" | 21 | #include "default_tokenizer.h" |
1277 | 22 | #ifdef ZORBA_NO_UNICODE | 22 | #ifdef ZORBA_NO_ICU |
1278 | 23 | # include "latin_tokenizer.h" | 23 | # include "latin_tokenizer.h" |
1279 | 24 | #else | 24 | #else |
1280 | 25 | # include "icu_tokenizer.h" | 25 | # include "icu_tokenizer.h" |
1282 | 26 | #endif /* ZORBA_NO_UNICODE */ | 26 | #endif /* ZORBA_NO_ICU */ |
1283 | 27 | 27 | ||
1284 | 28 | namespace zorba { | 28 | namespace zorba { |
1285 | 29 | 29 | ||
1286 | 30 | /////////////////////////////////////////////////////////////////////////////// | 30 | /////////////////////////////////////////////////////////////////////////////// |
1287 | 31 | 31 | ||
1288 | 32 | TokenizerProvider const& default_tokenizer_provider() { | 32 | TokenizerProvider const& default_tokenizer_provider() { |
1290 | 33 | #ifdef ZORBA_NO_UNICODE | 33 | #ifdef ZORBA_NO_ICU |
1291 | 34 | static LatinTokenizerProvider const instance; | 34 | static LatinTokenizerProvider const instance; |
1292 | 35 | #else | 35 | #else |
1293 | 36 | static ICU_TokenizerProvider const instance; | 36 | static ICU_TokenizerProvider const instance; |
1295 | 37 | #endif /* ZORBA_NO_UNICODE */ | 37 | #endif /* ZORBA_NO_ICU */ |
1296 | 38 | return instance; | 38 | return instance; |
1297 | 39 | }; | 39 | }; |
1298 | 40 | 40 | ||
1299 | 41 | 41 | ||
1300 | === modified file 'src/runtime/full_text/latin_tokenizer.cpp' | |||
1301 | --- src/runtime/full_text/latin_tokenizer.cpp 2012-03-28 05:19:57 +0000 | |||
1302 | +++ src/runtime/full_text/latin_tokenizer.cpp 2012-04-07 00:45:26 +0000 | |||
1303 | @@ -18,8 +18,9 @@ | |||
1304 | 18 | #include <functional> | 18 | #include <functional> |
1305 | 19 | 19 | ||
1306 | 20 | #include <zorba/diagnostic_list.h> | 20 | #include <zorba/diagnostic_list.h> |
1309 | 21 | #include <zorba/xquery_exception.h> | 21 | |
1310 | 22 | #include <zorba/zorba.h> | 22 | #include "diagnostics/dict.h" |
1311 | 23 | #include "diagnostics/xquery_exception.h" | ||
1312 | 23 | 24 | ||
1313 | 24 | #include "latin_tokenizer.h" | 25 | #include "latin_tokenizer.h" |
1314 | 25 | 26 | ||
1315 | 26 | 27 | ||
1316 | === modified file 'src/runtime/full_text/latin_tokenizer.h' | |||
1317 | --- src/runtime/full_text/latin_tokenizer.h 2012-03-28 05:19:57 +0000 | |||
1318 | +++ src/runtime/full_text/latin_tokenizer.h 2012-04-07 00:45:26 +0000 | |||
1319 | @@ -14,12 +14,12 @@ | |||
1320 | 14 | * limitations under the License. | 14 | * limitations under the License. |
1321 | 15 | */ | 15 | */ |
1322 | 16 | 16 | ||
1325 | 17 | #ifndef ZORBA_WESTERN_TOKENIZER_H | 17 | #ifndef ZORBA_LATIN_TOKENIZER_H |
1326 | 18 | #define ZORBA_WESTERN_TOKENIZER_H | 18 | #define ZORBA_LATIN_TOKENIZER_H |
1327 | 19 | 19 | ||
1328 | 20 | #include <zorba/config.h> | 20 | #include <zorba/config.h> |
1329 | 21 | 21 | ||
1331 | 22 | #ifdef ZORBA_NO_FULL_TEXT | 22 | #ifdef ZORBA_NO_ICU |
1332 | 23 | 23 | ||
1333 | 24 | #include <zorba/tokenizer.h> | 24 | #include <zorba/tokenizer.h> |
1334 | 25 | #include "zorbatypes/zstring.h" | 25 | #include "zorbatypes/zstring.h" |
1335 | @@ -38,8 +38,8 @@ | |||
1336 | 38 | 38 | ||
1337 | 39 | // inherited | 39 | // inherited |
1338 | 40 | void destroy() const; | 40 | void destroy() const; |
1341 | 41 | void tokenize( char const*, size_type, iso639_1::type, bool, Callback&, | 41 | void tokenize( char const*, size_type, locale::iso639_1::type, bool, |
1342 | 42 | void* ); | 42 | Callback&, void* ); |
1343 | 43 | 43 | ||
1344 | 44 | private: | 44 | private: |
1345 | 45 | typedef zstring string_type; | 45 | typedef zstring string_type; |
1346 | @@ -64,13 +64,14 @@ | |||
1347 | 64 | class LatinTokenizerProvider : public TokenizerProvider { | 64 | class LatinTokenizerProvider : public TokenizerProvider { |
1348 | 65 | public: | 65 | public: |
1349 | 66 | // inherited | 66 | // inherited |
1351 | 67 | Tokenizer::ptr getTokenizer( iso639_1::type, Tokenizer::Numbers& ) const; | 67 | Tokenizer::ptr getTokenizer( locale::iso639_1::type, |
1352 | 68 | Tokenizer::Numbers& ) const; | ||
1353 | 68 | }; | 69 | }; |
1354 | 69 | 70 | ||
1355 | 70 | /////////////////////////////////////////////////////////////////////////////// | 71 | /////////////////////////////////////////////////////////////////////////////// |
1356 | 71 | 72 | ||
1357 | 72 | } // namespace zorba | 73 | } // namespace zorba |
1358 | 73 | 74 | ||
1361 | 74 | #endif /* ZORBA_NO_FULL_TEXT */ | 75 | #endif /* ZORBA_NO_ICU */ |
1362 | 75 | #endif /* ZORBA_WESTERN_TOKENIZER_H */ | 76 | #endif /* ZORBA_LATIN_TOKENIZER_H */ |
1363 | 76 | /* vim:set et sw=2 ts=2: */ | 77 | /* vim:set et sw=2 ts=2: */ |
1364 | 77 | 78 | ||
1365 | === modified file 'src/runtime/numerics/format_integer_impl.cpp' | |||
1366 | --- src/runtime/numerics/format_integer_impl.cpp 2012-03-28 05:19:57 +0000 | |||
1367 | +++ src/runtime/numerics/format_integer_impl.cpp 2012-04-07 00:45:26 +0000 | |||
1368 | @@ -881,7 +881,7 @@ | |||
1369 | 881 | utf8_result += (*valueit); | 881 | utf8_result += (*valueit); |
1370 | 882 | } | 882 | } |
1371 | 883 | else | 883 | else |
1373 | 884 | utf8_result += (0x2080 + *valueit - '0'); | 884 | utf8_result += (unicode::code_point)(0x2080 + *valueit - '0'); |
1374 | 885 | } | 885 | } |
1375 | 886 | } | 886 | } |
1376 | 887 | else if((c0 == 0x2460) || //CIRCLED DIGIT ONE (1-20) | 887 | else if((c0 == 0x2460) || //CIRCLED DIGIT ONE (1-20) |
1377 | 888 | 888 | ||
1378 | === modified file 'src/runtime/numerics/numerics_impl.cpp' | |||
1379 | --- src/runtime/numerics/numerics_impl.cpp 2012-03-28 05:19:57 +0000 | |||
1380 | +++ src/runtime/numerics/numerics_impl.cpp 2012-04-07 00:45:26 +0000 | |||
1381 | @@ -462,7 +462,7 @@ | |||
1382 | 462 | minus( "-" ) | 462 | minus( "-" ) |
1383 | 463 | { | 463 | { |
1384 | 464 | utf8_string<zstring> u_per_mille( per_mille ); | 464 | utf8_string<zstring> u_per_mille( per_mille ); |
1386 | 465 | u_per_mille = 0x2030; | 465 | u_per_mille = (unicode::code_point)0x2030; |
1387 | 466 | } | 466 | } |
1388 | 467 | 467 | ||
1389 | 468 | void readFormat(const DecimalFormat_t& df_t) | 468 | void readFormat(const DecimalFormat_t& df_t) |
1390 | 469 | 469 | ||
1391 | === modified file 'src/runtime/strings/strings_impl.cpp' | |||
1392 | --- src/runtime/strings/strings_impl.cpp 2012-03-28 05:19:57 +0000 | |||
1393 | +++ src/runtime/strings/strings_impl.cpp 2012-04-07 00:45:26 +0000 | |||
1394 | @@ -810,7 +810,9 @@ | |||
1395 | 810 | zstring normForm; | 810 | zstring normForm; |
1396 | 811 | zstring resStr; | 811 | zstring resStr; |
1397 | 812 | unicode::normalization::type normType; | 812 | unicode::normalization::type normType; |
1398 | 813 | #ifndef ZORBA_NO_ICU | ||
1399 | 813 | bool success; | 814 | bool success; |
1400 | 815 | #endif /* ZORBA_NO_ICU */ | ||
1401 | 814 | 816 | ||
1402 | 815 | PlanIteratorState* state; | 817 | PlanIteratorState* state; |
1403 | 816 | DEFAULT_STACK_INIT(PlanIteratorState, state, planState); | 818 | DEFAULT_STACK_INIT(PlanIteratorState, state, planState); |
1404 | @@ -860,10 +862,10 @@ | |||
1405 | 860 | } | 862 | } |
1406 | 861 | 863 | ||
1407 | 862 | item0->getStringValue2(resStr); | 864 | item0->getStringValue2(resStr); |
1409 | 863 | #ifndef ZORBA_NO_UNICODE | 865 | #ifndef ZORBA_NO_ICU |
1410 | 864 | success = utf8::normalize(resStr, normType, &resStr); | 866 | success = utf8::normalize(resStr, normType, &resStr); |
1411 | 865 | ZORBA_ASSERT(success); | 867 | ZORBA_ASSERT(success); |
1413 | 866 | #endif//#ifndef ZORBA_NO_UNICODE | 868 | #endif//#ifndef ZORBA_NO_ICU |
1414 | 867 | STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state ); | 869 | STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state ); |
1415 | 868 | } | 870 | } |
1416 | 869 | else | 871 | else |
1417 | @@ -992,7 +994,7 @@ | |||
1418 | 992 | trans_map[ *map_i ] = *trans_i; | 994 | trans_map[ *map_i ] = *trans_i; |
1419 | 993 | 995 | ||
1420 | 994 | for ( ; map_i != map_end; ++map_i ) | 996 | for ( ; map_i != map_end; ++map_i ) |
1422 | 995 | trans_map[ *map_i ] = ~0; | 997 | trans_map[ *map_i ] = static_cast<unicode::code_point>( ~0 ); |
1423 | 996 | } | 998 | } |
1424 | 997 | 999 | ||
1425 | 998 | utf8_string<zstring> u_result_string( result_string ); | 1000 | utf8_string<zstring> u_result_string( result_string ); |
1426 | @@ -1007,7 +1009,7 @@ | |||
1427 | 1007 | cp_map_type::const_iterator const found_i = trans_map.find( cp ); | 1009 | cp_map_type::const_iterator const found_i = trans_map.find( cp ); |
1428 | 1008 | if ( found_i != trans_map.end() ) { | 1010 | if ( found_i != trans_map.end() ) { |
1429 | 1009 | cp = found_i->second; | 1011 | cp = found_i->second; |
1431 | 1010 | if ( cp == ~0 ) | 1012 | if ( cp == static_cast<unicode::code_point>( ~0 ) ) |
1432 | 1011 | continue; | 1013 | continue; |
1433 | 1012 | } | 1014 | } |
1434 | 1013 | u_result_string += cp; | 1015 | u_result_string += cp; |
1435 | @@ -1795,16 +1797,33 @@ | |||
1436 | 1795 | int &utf8start, | 1797 | int &utf8start, |
1437 | 1796 | unsigned int &bytestart, | 1798 | unsigned int &bytestart, |
1438 | 1797 | int utf8end, | 1799 | int utf8end, |
1439 | 1800 | unsigned int byteend, | ||
1440 | 1798 | zstring &out) | 1801 | zstring &out) |
1441 | 1799 | { | 1802 | { |
1442 | 1803 | #ifndef ZORBA_NO_ICU | ||
1443 | 1800 | utf8::size_type clen; | 1804 | utf8::size_type clen; |
1451 | 1801 | while(utf8start < utf8end) | 1805 | if(utf8end) |
1452 | 1802 | { | 1806 | { |
1453 | 1803 | clen = utf8::char_length(*sin); | 1807 | while(utf8start < utf8end) |
1454 | 1804 | out.append(sin, clen); | 1808 | { |
1455 | 1805 | utf8start++; | 1809 | clen = utf8::char_length(*sin); |
1456 | 1806 | bytestart += clen; | 1810 | if(clen == 0) |
1457 | 1807 | sin += clen; | 1811 | clen = 1; |
1458 | 1812 | out.append(sin, clen); | ||
1459 | 1813 | utf8start++; | ||
1460 | 1814 | bytestart += clen; | ||
1461 | 1815 | sin += clen; | ||
1462 | 1816 | } | ||
1463 | 1817 | } | ||
1464 | 1818 | else | ||
1465 | 1819 | #endif | ||
1466 | 1820 | { | ||
1467 | 1821 | if(!utf8end) | ||
1468 | 1822 | utf8end = byteend; | ||
1469 | 1823 | out.append(sin, utf8end-bytestart); | ||
1470 | 1824 | sin += utf8end-bytestart; | ||
1471 | 1825 | utf8start = utf8end; | ||
1472 | 1826 | bytestart = utf8end; | ||
1473 | 1808 | } | 1827 | } |
1474 | 1809 | } | 1828 | } |
1475 | 1810 | 1829 | ||
1476 | @@ -1812,6 +1831,7 @@ | |||
1477 | 1812 | int &match_end1, | 1831 | int &match_end1, |
1478 | 1813 | unsigned int &match_end1_bytes, | 1832 | unsigned int &match_end1_bytes, |
1479 | 1814 | int match_start2, | 1833 | int match_start2, |
1480 | 1834 | unsigned int match_start2_bytes, | ||
1481 | 1815 | const char *&strin) | 1835 | const char *&strin) |
1482 | 1816 | { | 1836 | { |
1483 | 1817 | store::Item_t non_match_elem; | 1837 | store::Item_t non_match_elem; |
1484 | @@ -1833,7 +1853,7 @@ | |||
1485 | 1833 | // utf8_it++; | 1853 | // utf8_it++; |
1486 | 1834 | // match_end1++; | 1854 | // match_end1++; |
1487 | 1835 | //} | 1855 | //} |
1489 | 1836 | copyUtf8Chars(strin, match_end1, match_end1_bytes, match_start2, non_match_str); | 1856 | copyUtf8Chars(strin, match_end1, match_end1_bytes, match_start2, match_start2_bytes, non_match_str); |
1490 | 1837 | store::Item_t non_match_text_item; | 1857 | store::Item_t non_match_text_item; |
1491 | 1838 | GENV_ITEMFACTORY->createTextNode(non_match_text_item, non_match_elem, non_match_str); | 1858 | GENV_ITEMFACTORY->createTextNode(non_match_text_item, non_match_elem, non_match_str); |
1492 | 1839 | } | 1859 | } |
1493 | @@ -1864,19 +1884,31 @@ | |||
1494 | 1864 | i--; | 1884 | i--; |
1495 | 1865 | break; | 1885 | break; |
1496 | 1866 | } | 1886 | } |
1497 | 1887 | #ifndef ZORBA_NO_ICU | ||
1498 | 1867 | match_startg = rx.get_match_start(i+1); | 1888 | match_startg = rx.get_match_start(i+1); |
1499 | 1868 | if((match_startg < 0) && (gparent < 0)) | 1889 | if((match_startg < 0) && (gparent < 0)) |
1500 | 1869 | continue; | 1890 | continue; |
1501 | 1891 | #else | ||
1502 | 1892 | int temp_endg; | ||
1503 | 1893 | match_startg = -1; | ||
1504 | 1894 | temp_endg = -1; | ||
1505 | 1895 | if(!rx.get_match_start_end_bytes(i+1, &match_startg, &temp_endg) && (gparent < 0)) | ||
1506 | 1896 | continue; | ||
1507 | 1897 | #endif | ||
1508 | 1870 | if(match_endgood < match_startg) | 1898 | if(match_endgood < match_startg) |
1509 | 1871 | { | 1899 | { |
1510 | 1872 | //add non-group match text | 1900 | //add non-group match text |
1511 | 1873 | zstring non_group_str; | 1901 | zstring non_group_str; |
1512 | 1874 | 1902 | ||
1514 | 1875 | copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_startg, non_group_str); | 1903 | copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_startg, 0, non_group_str); |
1515 | 1876 | store::Item_t non_group_text_item; | 1904 | store::Item_t non_group_text_item; |
1516 | 1877 | GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent.getp(), non_group_str); | 1905 | GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent.getp(), non_group_str); |
1517 | 1878 | } | 1906 | } |
1518 | 1907 | #ifndef ZORBA_NO_ICU | ||
1519 | 1879 | match_endg = rx.get_match_end(i+1); | 1908 | match_endg = rx.get_match_end(i+1); |
1520 | 1909 | #else | ||
1521 | 1910 | match_endg = temp_endg; | ||
1522 | 1911 | #endif | ||
1523 | 1880 | //add group match text | 1912 | //add group match text |
1524 | 1881 | GENV_ITEMFACTORY->createQName(group_element_name, | 1913 | GENV_ITEMFACTORY->createQName(group_element_name, |
1525 | 1882 | static_context::W3C_FN_NS, "fn", "group"); | 1914 | static_context::W3C_FN_NS, "fn", "group"); |
1526 | @@ -1907,7 +1939,7 @@ | |||
1527 | 1907 | } | 1939 | } |
1528 | 1908 | zstring group_str; | 1940 | zstring group_str; |
1529 | 1909 | 1941 | ||
1531 | 1910 | copyUtf8Chars(sin, match_startg, match_end1_bytes, match_endg, group_str); | 1942 | copyUtf8Chars(sin, match_startg, match_end1_bytes, match_endg, 0, group_str); |
1532 | 1911 | store::Item_t group_text_item; | 1943 | store::Item_t group_text_item; |
1533 | 1912 | GENV_ITEMFACTORY->createTextNode(group_text_item, group_elem.getp(), group_str); | 1944 | GENV_ITEMFACTORY->createTextNode(group_text_item, group_elem.getp(), group_str); |
1534 | 1913 | } | 1945 | } |
1535 | @@ -1916,7 +1948,7 @@ | |||
1536 | 1916 | { | 1948 | { |
1537 | 1917 | zstring non_group_str; | 1949 | zstring non_group_str; |
1538 | 1918 | 1950 | ||
1540 | 1919 | copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_end2, non_group_str); | 1951 | copyUtf8Chars(sin, match_endgood, match_end1_bytes, match_end2, 0, non_group_str); |
1541 | 1920 | store::Item_t non_group_text_item; | 1952 | store::Item_t non_group_text_item; |
1542 | 1921 | GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent, non_group_str); | 1953 | GENV_ITEMFACTORY->createTextNode(non_group_text_item, parent, non_group_str); |
1543 | 1922 | } | 1954 | } |
1544 | @@ -2144,8 +2176,14 @@ | |||
1545 | 2144 | reachedEnd = false; | 2176 | reachedEnd = false; |
1546 | 2145 | while(rx.find_next_match(&reachedEnd)) | 2177 | while(rx.find_next_match(&reachedEnd)) |
1547 | 2146 | { | 2178 | { |
1550 | 2147 | int match_start2 = rx.get_match_start(); | 2179 | int match_start2; |
1551 | 2148 | int match_end2 = rx.get_match_end(); | 2180 | int match_end2; |
1552 | 2181 | #ifndef ZORBA_NO_ICU | ||
1553 | 2182 | match_start2 = rx.get_match_start(); | ||
1554 | 2183 | match_end2 = rx.get_match_end(); | ||
1555 | 2184 | #else | ||
1556 | 2185 | rx.get_match_start_end_bytes(0, &match_start2, &match_end2); | ||
1557 | 2186 | #endif | ||
1558 | 2149 | ZORBA_ASSERT(match_start2 >= 0); | 2187 | ZORBA_ASSERT(match_start2 >= 0); |
1559 | 2150 | 2188 | ||
1560 | 2151 | if(is_input_stream && reachedEnd && !instream->eof()) | 2189 | if(is_input_stream && reachedEnd && !instream->eof()) |
1561 | @@ -2157,7 +2195,7 @@ | |||
1562 | 2157 | //construct the fn:non-match | 2195 | //construct the fn:non-match |
1563 | 2158 | if(match_start2 > match_end1) | 2196 | if(match_start2 > match_end1) |
1564 | 2159 | { | 2197 | { |
1566 | 2160 | addNonMatchElement(result, match_end1, match_end1_bytes, match_start2, instr); | 2198 | addNonMatchElement(result, match_end1, match_end1_bytes, match_start2, 0, instr); |
1567 | 2161 | } | 2199 | } |
1568 | 2162 | 2200 | ||
1569 | 2163 | //construct the fn:match | 2201 | //construct the fn:match |
1570 | @@ -2165,7 +2203,7 @@ | |||
1571 | 2165 | match_end1 = match_end2; | 2203 | match_end1 = match_end2; |
1572 | 2166 | } | 2204 | } |
1573 | 2167 | 2205 | ||
1575 | 2168 | if(is_input_stream && reachedEnd && !instream->eof()) | 2206 | if(is_input_stream && !instream->eof()) |
1576 | 2169 | { | 2207 | { |
1577 | 2170 | //load some more data, maybe the match will be different | 2208 | //load some more data, maybe the match will be different |
1578 | 2171 | if(match_end1_bytes) | 2209 | if(match_end1_bytes) |
1579 | @@ -2213,7 +2251,7 @@ | |||
1580 | 2213 | else | 2251 | else |
1581 | 2214 | { | 2252 | { |
1582 | 2215 | if(match_end1_bytes < streambuf_read) | 2253 | if(match_end1_bytes < streambuf_read) |
1584 | 2216 | addNonMatchElement(result, match_end1, match_end1_bytes, streambuf_read, instr); | 2254 | addNonMatchElement(result, match_end1, match_end1_bytes, 0, streambuf_read, instr); |
1585 | 2217 | if(is_input_stream && instream->eof()) | 2255 | if(is_input_stream && instream->eof()) |
1586 | 2218 | reachedEnd = true; | 2256 | reachedEnd = true; |
1587 | 2219 | } | 2257 | } |
1588 | 2220 | 2258 | ||
1589 | === modified file 'src/store/api/store.h' | |||
1590 | --- src/store/api/store.h 2012-03-28 05:19:57 +0000 | |||
1591 | +++ src/store/api/store.h 2012-04-07 00:45:26 +0000 | |||
1592 | @@ -16,7 +16,7 @@ | |||
1593 | 16 | #ifndef ZORBA_STORE_STORE_H | 16 | #ifndef ZORBA_STORE_STORE_H |
1594 | 17 | #define ZORBA_STORE_STORE_H | 17 | #define ZORBA_STORE_STORE_H |
1595 | 18 | 18 | ||
1597 | 19 | #include <zorba/config.h> | 19 | #include "zorba/config.h" |
1598 | 20 | #include "zorbatypes/schema_types.h" | 20 | #include "zorbatypes/schema_types.h" |
1599 | 21 | 21 | ||
1600 | 22 | #include "store/api/shared_types.h" | 22 | #include "store/api/shared_types.h" |
1601 | 23 | 23 | ||
1602 | === modified file 'src/store/naive/simple_store.h' | |||
1603 | --- src/store/naive/simple_store.h 2012-03-28 23:58:23 +0000 | |||
1604 | +++ src/store/naive/simple_store.h 2012-04-07 00:45:26 +0000 | |||
1605 | @@ -16,7 +16,11 @@ | |||
1606 | 16 | #ifndef ZORBA_SIMPLE_STORE | 16 | #ifndef ZORBA_SIMPLE_STORE |
1607 | 17 | #define ZORBA_SIMPLE_STORE | 17 | #define ZORBA_SIMPLE_STORE |
1608 | 18 | 18 | ||
1610 | 19 | #include "store.h" | 19 | #include "store/naive/store.h" |
1611 | 20 | |||
1612 | 21 | #include "store/naive/node_factory.h" | ||
1613 | 22 | #include "store/naive/pul_primitive_factory.h" | ||
1614 | 23 | #include "store/naive/tree_id_generator.h" | ||
1615 | 20 | 24 | ||
1616 | 21 | namespace zorba { | 25 | namespace zorba { |
1617 | 22 | namespace simplestore { | 26 | namespace simplestore { |
1618 | @@ -72,7 +76,7 @@ | |||
1619 | 72 | 76 | ||
1620 | 73 | NodeFactory* createNodeFactory() const; | 77 | NodeFactory* createNodeFactory() const; |
1621 | 74 | 78 | ||
1623 | 75 | void destroyNodeFactory(NodeFactory*) const; | 79 | void destroyNodeFactory(zorba::simplestore::NodeFactory*) const; |
1624 | 76 | 80 | ||
1625 | 77 | store::ItemFactory* createItemFactory() const; | 81 | store::ItemFactory* createItemFactory() const; |
1626 | 78 | 82 | ||
1627 | @@ -84,7 +88,7 @@ | |||
1628 | 84 | 88 | ||
1629 | 85 | PULPrimitiveFactory* createPULFactory() const; | 89 | PULPrimitiveFactory* createPULFactory() const; |
1630 | 86 | 90 | ||
1632 | 87 | void destroyPULFactory(PULPrimitiveFactory*) const; | 91 | void destroyPULFactory(zorba::simplestore::PULPrimitiveFactory*) const; |
1633 | 88 | 92 | ||
1634 | 89 | CollectionSet* createCollectionSet() const; | 93 | CollectionSet* createCollectionSet() const; |
1635 | 90 | 94 | ||
1636 | 91 | 95 | ||
1637 | === modified file 'src/store/naive/store.cpp' | |||
1638 | --- src/store/naive/store.cpp 2012-03-28 22:09:36 +0000 | |||
1639 | +++ src/store/naive/store.cpp 2012-04-07 00:45:26 +0000 | |||
1640 | @@ -33,7 +33,7 @@ | |||
1641 | 33 | 33 | ||
1642 | 34 | #include "properties.h" | 34 | #include "properties.h" |
1643 | 35 | #include "string_pool.h" | 35 | #include "string_pool.h" |
1645 | 36 | #include "store.h" | 36 | #include "simple_store.h" |
1646 | 37 | #include "simple_temp_seq.h" | 37 | #include "simple_temp_seq.h" |
1647 | 38 | #include "simple_lazy_temp_seq.h" | 38 | #include "simple_lazy_temp_seq.h" |
1648 | 39 | #include "collection.h" | 39 | #include "collection.h" |
1649 | 40 | 40 | ||
1650 | === modified file 'src/store/naive/store.h' | |||
1651 | --- src/store/naive/store.h 2012-03-28 22:09:36 +0000 | |||
1652 | +++ src/store/naive/store.h 2012-04-07 00:45:26 +0000 | |||
1653 | @@ -16,10 +16,18 @@ | |||
1654 | 16 | #ifndef ZORBA_SIMPLESTORE_STORE_H | 16 | #ifndef ZORBA_SIMPLESTORE_STORE_H |
1655 | 17 | #define ZORBA_SIMPLESTORE_STORE_H | 17 | #define ZORBA_SIMPLESTORE_STORE_H |
1656 | 18 | 18 | ||
1657 | 19 | #include "store/api/store.h" | ||
1658 | 20 | |||
1659 | 19 | #include "shared_types.h" | 21 | #include "shared_types.h" |
1660 | 20 | #include "store_defs.h" | 22 | #include "store_defs.h" |
1661 | 21 | #include "hashmap_nodep.h" | 23 | #include "hashmap_nodep.h" |
1662 | 22 | #include "tree_id.h" | 24 | #include "tree_id.h" |
1663 | 25 | #include "store/util/hashmap_stringbuf.h" | ||
1664 | 26 | #include "zorbautils/mutex.h" | ||
1665 | 27 | #include "zorbautils/lock.h" | ||
1666 | 28 | #include "zorbautils/hashmap.h" | ||
1667 | 29 | #include "zorbautils/hashmap_itemp.h" | ||
1668 | 30 | #include "zorbautils/hashmap_zstring_nonserializable.h" | ||
1669 | 23 | 31 | ||
1670 | 24 | #if (defined (WIN32) || defined (WINCE)) | 32 | #if (defined (WIN32) || defined (WINCE)) |
1671 | 25 | #include "node_items.h" | 33 | #include "node_items.h" |
1672 | @@ -28,14 +36,7 @@ | |||
1673 | 28 | #include "store/api/ic.h" | 36 | #include "store/api/ic.h" |
1674 | 29 | #endif | 37 | #endif |
1675 | 30 | 38 | ||
1684 | 31 | #include "store/api/store.h" | 39 | using namespace zorba; |
1677 | 32 | |||
1678 | 33 | #include "store/util/hashmap_stringbuf.h" | ||
1679 | 34 | |||
1680 | 35 | #include "zorbautils/mutex.h" | ||
1681 | 36 | #include "zorbautils/lock.h" | ||
1682 | 37 | #include "zorbautils/hashmap_itemp.h" | ||
1683 | 38 | #include "zorbautils/hashmap_zstring_nonserializable.h" | ||
1685 | 39 | 40 | ||
1686 | 40 | namespace zorba | 41 | namespace zorba |
1687 | 41 | { | 42 | { |
1688 | @@ -63,9 +64,9 @@ | |||
1689 | 63 | class TreeIdGeneratorFactory; | 64 | class TreeIdGeneratorFactory; |
1690 | 64 | class TreeIdGenerator; | 65 | class TreeIdGenerator; |
1691 | 65 | 66 | ||
1695 | 66 | typedef zorba::HashMapZString<XmlNode_t> DocumentSet; | 67 | typedef HashMapZString<XmlNode_t> DocumentSet; |
1696 | 67 | typedef ItemPointerHashMap<store::Index_t> IndexSet; | 68 | typedef zorba::ItemPointerHashMap<store::Index_t> IndexSet; |
1697 | 68 | typedef ItemPointerHashMap<store::IC_t> ICSet; | 69 | typedef zorba::ItemPointerHashMap<store::IC_t> ICSet; |
1698 | 69 | 70 | ||
1699 | 70 | 71 | ||
1700 | 71 | 72 | ||
1701 | 72 | 73 | ||
1702 | === modified file 'src/system/globalenv.cpp' | |||
1703 | --- src/system/globalenv.cpp 2012-03-28 05:19:57 +0000 | |||
1704 | +++ src/system/globalenv.cpp 2012-04-07 00:45:26 +0000 | |||
1705 | @@ -17,11 +17,11 @@ | |||
1706 | 17 | 17 | ||
1707 | 18 | #include "common/common.h" | 18 | #include "common/common.h" |
1708 | 19 | 19 | ||
1710 | 20 | #ifndef ZORBA_NO_UNICODE | 20 | #ifndef ZORBA_NO_ICU |
1711 | 21 | # include <unicode/uclean.h> | 21 | # include <unicode/uclean.h> |
1712 | 22 | # include <unicode/utypes.h> | 22 | # include <unicode/utypes.h> |
1713 | 23 | # include <unicode/udata.h> | 23 | # include <unicode/udata.h> |
1715 | 24 | #endif /* ZORBA_NO_UNICODE */ | 24 | #endif /* ZORBA_NO_ICU */ |
1716 | 25 | 25 | ||
1717 | 26 | #ifdef ZORBA_WITH_BIG_INTEGER | 26 | #ifdef ZORBA_WITH_BIG_INTEGER |
1718 | 27 | # include "zorbatypes/m_apm.h" | 27 | # include "zorbatypes/m_apm.h" |
1719 | @@ -208,7 +208,7 @@ | |||
1720 | 208 | // from one thread only | 208 | // from one thread only |
1721 | 209 | // see http://www.icu-project.org/userguide/design.html#Init_and_Termination | 209 | // see http://www.icu-project.org/userguide/design.html#Init_and_Termination |
1722 | 210 | // and http://www.icu-project.org/apiref/icu4c/uclean_8h.html | 210 | // and http://www.icu-project.org/apiref/icu4c/uclean_8h.html |
1724 | 211 | #ifndef ZORBA_NO_UNICODE | 211 | #ifndef ZORBA_NO_ICU |
1725 | 212 | # if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE) | 212 | # if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE) |
1726 | 213 | { | 213 | { |
1727 | 214 | TCHAR self_path[1024]; | 214 | TCHAR self_path[1024]; |
1728 | @@ -238,13 +238,13 @@ | |||
1729 | 238 | udata_setCommonData(icu_appdata, &data_err); | 238 | udata_setCommonData(icu_appdata, &data_err); |
1730 | 239 | ZORBA_ASSERT(data_err == U_ZERO_ERROR); | 239 | ZORBA_ASSERT(data_err == U_ZERO_ERROR); |
1731 | 240 | 240 | ||
1733 | 241 | // u_setDataDirectory(self_path); | 241 | // u_setDataDirectory(self_path); |
1734 | 242 | } | 242 | } |
1735 | 243 | # endif | 243 | # endif |
1736 | 244 | UErrorCode lICUInitStatus = U_ZERO_ERROR; | 244 | UErrorCode lICUInitStatus = U_ZERO_ERROR; |
1737 | 245 | u_init(&lICUInitStatus); | 245 | u_init(&lICUInitStatus); |
1738 | 246 | ZORBA_ASSERT(lICUInitStatus == U_ZERO_ERROR); | 246 | ZORBA_ASSERT(lICUInitStatus == U_ZERO_ERROR); |
1740 | 247 | #endif//ifndef ZORBA_NO_UNICODE | 247 | #endif /* ZORBA_NO_ICU */ |
1741 | 248 | } | 248 | } |
1742 | 249 | 249 | ||
1743 | 250 | 250 | ||
1744 | @@ -256,12 +256,12 @@ | |||
1745 | 256 | // releases statically initialized memory and prevents | 256 | // releases statically initialized memory and prevents |
1746 | 257 | // valgrind from reporting those problems at the end | 257 | // valgrind from reporting those problems at the end |
1747 | 258 | // see http://www.icu-project.org/apiref/icu4c/uclean_8h.html#93f27d0ddc7c196a1da864763f2d8920 | 258 | // see http://www.icu-project.org/apiref/icu4c/uclean_8h.html#93f27d0ddc7c196a1da864763f2d8920 |
1749 | 259 | #ifndef ZORBA_NO_UNICODE | 259 | #ifndef ZORBA_NO_ICU |
1750 | 260 | u_cleanup(); | 260 | u_cleanup(); |
1751 | 261 | # if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE) | 261 | # if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE) |
1752 | 262 | delete[] icu_appdata; | 262 | delete[] icu_appdata; |
1753 | 263 | # endif | 263 | # endif |
1755 | 264 | #endif//ifndef ZORBA_NO_UNICODE | 264 | #endif /* ZORBA_NO_ICU */ |
1756 | 265 | } | 265 | } |
1757 | 266 | 266 | ||
1758 | 267 | 267 | ||
1759 | 268 | 268 | ||
1760 | === modified file 'src/unit_tests/CMakeLists.txt' | |||
1761 | --- src/unit_tests/CMakeLists.txt 2012-03-28 05:19:57 +0000 | |||
1762 | +++ src/unit_tests/CMakeLists.txt 2012-04-07 00:45:26 +0000 | |||
1763 | @@ -29,9 +29,9 @@ | |||
1764 | 29 | tokenizer.cpp) | 29 | tokenizer.cpp) |
1765 | 30 | ENDIF (NOT ZORBA_NO_FULL_TEXT) | 30 | ENDIF (NOT ZORBA_NO_FULL_TEXT) |
1766 | 31 | 31 | ||
1768 | 32 | IF (NOT ZORBA_NO_UNICODE) | 32 | IF (NOT ZORBA_NO_ICU) |
1769 | 33 | LIST (APPEND UNIT_TEST_SRCS | 33 | LIST (APPEND UNIT_TEST_SRCS |
1770 | 34 | test_icu_streambuf.cpp) | 34 | test_icu_streambuf.cpp) |
1772 | 35 | ENDIF (NOT ZORBA_NO_UNICODE) | 35 | ENDIF (NOT ZORBA_NO_ICU) |
1773 | 36 | 36 | ||
1774 | 37 | # vim:set et sw=2 tw=2: | 37 | # vim:set et sw=2 tw=2: |
1775 | 38 | 38 | ||
1776 | === modified file 'src/unit_tests/string.cpp' | |||
1777 | --- src/unit_tests/string.cpp 2012-03-28 05:19:57 +0000 | |||
1778 | +++ src/unit_tests/string.cpp 2012-04-07 00:45:26 +0000 | |||
1779 | @@ -569,6 +569,7 @@ | |||
1780 | 569 | ASSERT_TRUE( t == s ); | 569 | ASSERT_TRUE( t == s ); |
1781 | 570 | } | 570 | } |
1782 | 571 | 571 | ||
1783 | 572 | #ifndef ZORBA_NO_ICU | ||
1784 | 572 | template<class StringType> | 573 | template<class StringType> |
1785 | 573 | static void test_to_string_from_wchar_t() { | 574 | static void test_to_string_from_wchar_t() { |
1786 | 574 | wchar_t const w[] = L"hello"; | 575 | wchar_t const w[] = L"hello"; |
1787 | @@ -578,6 +579,7 @@ | |||
1788 | 578 | for ( string::size_type i = 0; i < s.length(); ++i ) | 579 | for ( string::size_type i = 0; i < s.length(); ++i ) |
1789 | 579 | ASSERT_TRUE( s[i] == w[i] ); | 580 | ASSERT_TRUE( s[i] == w[i] ); |
1790 | 580 | } | 581 | } |
1791 | 582 | #endif /* ZORBA_NO_ICU */ | ||
1792 | 581 | 583 | ||
1793 | 582 | template<class StringType> | 584 | template<class StringType> |
1794 | 583 | static void test_to_upper() { | 585 | static void test_to_upper() { |
1795 | @@ -605,6 +607,7 @@ | |||
1796 | 605 | } | 607 | } |
1797 | 606 | } | 608 | } |
1798 | 607 | 609 | ||
1799 | 610 | #ifndef ZORBA_NO_ICU | ||
1800 | 608 | static void test_to_wchar_t() { | 611 | static void test_to_wchar_t() { |
1801 | 609 | string const s = "hello"; | 612 | string const s = "hello"; |
1802 | 610 | wchar_t *w; | 613 | wchar_t *w; |
1803 | @@ -616,6 +619,7 @@ | |||
1804 | 616 | ASSERT_TRUE( w[i] == s[i] ); | 619 | ASSERT_TRUE( w[i] == s[i] ); |
1805 | 617 | delete[] w; | 620 | delete[] w; |
1806 | 618 | } | 621 | } |
1807 | 622 | #endif /* ZORBA_NO_ICU */ | ||
1808 | 619 | 623 | ||
1809 | 620 | static void test_trim_start() { | 624 | static void test_trim_start() { |
1810 | 621 | char const *s; | 625 | char const *s; |
1811 | @@ -873,16 +877,20 @@ | |||
1812 | 873 | test_to_string_from_utf8<zstring>(); | 877 | test_to_string_from_utf8<zstring>(); |
1813 | 874 | test_to_string_from_utf8<zstring_p>(); | 878 | test_to_string_from_utf8<zstring_p>(); |
1814 | 875 | 879 | ||
1815 | 880 | #ifndef ZORBA_NO_ICU | ||
1816 | 876 | test_to_string_from_wchar_t<string>(); | 881 | test_to_string_from_wchar_t<string>(); |
1817 | 877 | test_to_string_from_wchar_t<zstring>(); | 882 | test_to_string_from_wchar_t<zstring>(); |
1818 | 878 | test_to_string_from_wchar_t<zstring_p>(); | 883 | test_to_string_from_wchar_t<zstring_p>(); |
1819 | 884 | #endif /* ZORBA_NO_ICU */ | ||
1820 | 879 | 885 | ||
1821 | 880 | test_to_upper<string>(); | 886 | test_to_upper<string>(); |
1822 | 881 | test_to_upper<zstring>(); | 887 | test_to_upper<zstring>(); |
1823 | 882 | test_to_upper<zstring_p>(); | 888 | test_to_upper<zstring_p>(); |
1824 | 883 | test_to_upper<String>(); | 889 | test_to_upper<String>(); |
1825 | 884 | 890 | ||
1826 | 891 | #ifndef ZORBA_NO_ICU | ||
1827 | 885 | test_to_wchar_t(); | 892 | test_to_wchar_t(); |
1828 | 893 | #endif /* ZORBA_NO_ICU */ | ||
1829 | 886 | 894 | ||
1830 | 887 | test_trim_start(); | 895 | test_trim_start(); |
1831 | 888 | test_trim_end(); | 896 | test_trim_end(); |
1832 | 889 | 897 | ||
1833 | === modified file 'src/unit_tests/unit_test_list.h' | |||
1834 | --- src/unit_tests/unit_test_list.h 2012-03-28 05:19:57 +0000 | |||
1835 | +++ src/unit_tests/unit_test_list.h 2012-04-07 00:45:26 +0000 | |||
1836 | @@ -36,9 +36,9 @@ | |||
1837 | 36 | /** | 36 | /** |
1838 | 37 | * ADD NEW UNIT TESTS HERE | 37 | * ADD NEW UNIT TESTS HERE |
1839 | 38 | */ | 38 | */ |
1841 | 39 | #ifndef ZORBA_NO_UNICODE | 39 | #ifndef ZORBA_NO_ICU |
1842 | 40 | int test_icu_streambuf( int, char*[] ); | 40 | int test_icu_streambuf( int, char*[] ); |
1844 | 41 | #endif /* ZORBA_NO_UNICODE */ | 41 | #endif /* ZORBA_NO_ICU */ |
1845 | 42 | int json_parser( int, char*[] ); | 42 | int json_parser( int, char*[] ); |
1846 | 43 | 43 | ||
1847 | 44 | void initializeTestList(); | 44 | void initializeTestList(); |
1848 | 45 | 45 | ||
1849 | === modified file 'src/unit_tests/unit_tests.cpp' | |||
1850 | --- src/unit_tests/unit_tests.cpp 2012-03-28 05:19:57 +0000 | |||
1851 | +++ src/unit_tests/unit_tests.cpp 2012-04-07 00:45:26 +0000 | |||
1852 | @@ -39,9 +39,9 @@ | |||
1853 | 39 | void initializeTestList() { | 39 | void initializeTestList() { |
1854 | 40 | libunittests["string"] = test_string; | 40 | libunittests["string"] = test_string; |
1855 | 41 | libunittests["uri"] = runUriTest; | 41 | libunittests["uri"] = runUriTest; |
1857 | 42 | #ifndef ZORBA_NO_UNICODE | 42 | #ifndef ZORBA_NO_ICU |
1858 | 43 | libunittests["icu_streambuf"] = test_icu_streambuf; | 43 | libunittests["icu_streambuf"] = test_icu_streambuf; |
1860 | 44 | #endif /* ZORBA_NO_UNICODE */ | 44 | #endif /* ZORBA_NO_ICU */ |
1861 | 45 | libunittests["json_parser"] = json_parser; | 45 | libunittests["json_parser"] = json_parser; |
1862 | 46 | libunittests["unique_ptr"] = test_unique_ptr; | 46 | libunittests["unique_ptr"] = test_unique_ptr; |
1863 | 47 | #ifndef ZORBA_NO_FULL_TEXT | 47 | #ifndef ZORBA_NO_FULL_TEXT |
1864 | 48 | 48 | ||
1865 | === modified file 'src/util/CMakeLists.txt' | |||
1866 | --- src/util/CMakeLists.txt 2012-03-28 05:19:57 +0000 | |||
1867 | +++ src/util/CMakeLists.txt 2012-04-07 00:45:26 +0000 | |||
1868 | @@ -40,14 +40,14 @@ | |||
1869 | 40 | LIST(APPEND UTIL_SRCS mmap_file.cpp) | 40 | LIST(APPEND UTIL_SRCS mmap_file.cpp) |
1870 | 41 | ENDIF(ZORBA_WITH_FILE_ACCESS) | 41 | ENDIF(ZORBA_WITH_FILE_ACCESS) |
1871 | 42 | 42 | ||
1873 | 43 | IF(ZORBA_NO_UNICODE) | 43 | IF(ZORBA_NO_ICU) |
1874 | 44 | LIST(APPEND UTIL_SRCS | 44 | LIST(APPEND UTIL_SRCS |
1876 | 45 | regex_ascii.cpp | 45 | regex_xquery.cpp |
1877 | 46 | passthru_streambuf.cpp) | 46 | passthru_streambuf.cpp) |
1879 | 47 | ELSE(ZORBA_NO_UNICODE) | 47 | ELSE(ZORBA_NO_ICU) |
1880 | 48 | LIST(APPEND UTIL_SRCS | 48 | LIST(APPEND UTIL_SRCS |
1881 | 49 | icu_streambuf.cpp) | 49 | icu_streambuf.cpp) |
1883 | 50 | ENDIF(ZORBA_NO_UNICODE) | 50 | ENDIF(ZORBA_NO_ICU) |
1884 | 51 | 51 | ||
1885 | 52 | HEADER_GROUP_SUBFOLDER(UTIL_SRCS fx) | 52 | HEADER_GROUP_SUBFOLDER(UTIL_SRCS fx) |
1886 | 53 | HEADER_GROUP_SUBFOLDER(UTIL_SRCS win32) | 53 | HEADER_GROUP_SUBFOLDER(UTIL_SRCS win32) |
1887 | 54 | 54 | ||
1888 | === modified file 'src/util/icu_streambuf.h' | |||
1889 | --- src/util/icu_streambuf.h 2012-02-04 01:26:18 +0000 | |||
1890 | +++ src/util/icu_streambuf.h 2012-04-07 00:45:26 +0000 | |||
1891 | @@ -17,6 +17,7 @@ | |||
1892 | 17 | #ifndef ZORBA_ICU_STREAMBUF_H | 17 | #ifndef ZORBA_ICU_STREAMBUF_H |
1893 | 18 | #define ZORBA_ICU_STREAMBUF_H | 18 | #define ZORBA_ICU_STREAMBUF_H |
1894 | 19 | 19 | ||
1895 | 20 | #include <unicode/ucnv.h> | ||
1896 | 20 | #include <zorba/transcode_stream.h> | 21 | #include <zorba/transcode_stream.h> |
1897 | 21 | 22 | ||
1898 | 22 | #include "util/utf8_util.h" | 23 | #include "util/utf8_util.h" |
1899 | 23 | 24 | ||
1900 | === modified file 'src/util/passthru_streambuf.cpp' | |||
1901 | --- src/util/passthru_streambuf.cpp 2012-02-04 01:26:18 +0000 | |||
1902 | +++ src/util/passthru_streambuf.cpp 2012-04-07 00:45:26 +0000 | |||
1903 | @@ -14,8 +14,8 @@ | |||
1904 | 14 | * limitations under the License. | 14 | * limitations under the License. |
1905 | 15 | */ | 15 | */ |
1906 | 16 | 16 | ||
1907 | 17 | #include "stdafx.h" | ||
1908 | 17 | #include "passthru_streambuf.h" | 18 | #include "passthru_streambuf.h" |
1909 | 18 | |||
1910 | 19 | using namespace std; | 19 | using namespace std; |
1911 | 20 | 20 | ||
1912 | 21 | namespace zorba { | 21 | namespace zorba { |
1913 | @@ -47,7 +47,7 @@ | |||
1914 | 47 | } | 47 | } |
1915 | 48 | 48 | ||
1916 | 49 | bool passthru_streambuf::is_supported( char const *cc_charset ) { | 49 | bool passthru_streambuf::is_supported( char const *cc_charset ) { |
1918 | 50 | return !is_necessary( charset ); | 50 | return !is_necessary( cc_charset ); |
1919 | 51 | } | 51 | } |
1920 | 52 | 52 | ||
1921 | 53 | passthru_streambuf::pos_type | 53 | passthru_streambuf::pos_type |
1922 | 54 | 54 | ||
1923 | === modified file 'src/util/passthru_streambuf.h' | |||
1924 | --- src/util/passthru_streambuf.h 2012-02-02 18:37:24 +0000 | |||
1925 | +++ src/util/passthru_streambuf.h 2012-04-07 00:45:26 +0000 | |||
1926 | @@ -17,8 +17,9 @@ | |||
1927 | 17 | #ifndef ZORBA_PASSTHRU_STREAMBUF_H | 17 | #ifndef ZORBA_PASSTHRU_STREAMBUF_H |
1928 | 18 | #define ZORBA_PASSTHRU_STREAMBUF_H | 18 | #define ZORBA_PASSTHRU_STREAMBUF_H |
1929 | 19 | 19 | ||
1932 | 20 | #include <zorba/transcode_streambuf.h> | 20 | #include <zorba/transcode_stream.h> |
1933 | 21 | 21 | #include "zorbatypes/zstring.h" | |
1934 | 22 | #include "util/ascii_util.h" | ||
1935 | 22 | namespace zorba { | 23 | namespace zorba { |
1936 | 23 | 24 | ||
1937 | 24 | /////////////////////////////////////////////////////////////////////////////// | 25 | /////////////////////////////////////////////////////////////////////////////// |
1938 | @@ -48,6 +49,13 @@ | |||
1939 | 48 | * @return \c true only if the character encoding is supported. | 49 | * @return \c true only if the character encoding is supported. |
1940 | 49 | */ | 50 | */ |
1941 | 50 | static bool is_supported( char const *charset ); | 51 | static bool is_supported( char const *charset ); |
1942 | 52 | static bool is_necessary( char const *cc_charset ); | ||
1943 | 53 | |||
1944 | 54 | typedef std::streambuf::char_type char_type; | ||
1945 | 55 | typedef std::streambuf::int_type int_type; | ||
1946 | 56 | typedef std::streambuf::off_type off_type; | ||
1947 | 57 | typedef std::streambuf::pos_type pos_type; | ||
1948 | 58 | typedef std::streambuf::traits_type traits_type; | ||
1949 | 51 | 59 | ||
1950 | 52 | protected: | 60 | protected: |
1951 | 53 | void imbue( std::locale const& ); | 61 | void imbue( std::locale const& ); |
1952 | 54 | 62 | ||
1953 | === modified file 'src/util/regex.cpp' | |||
1954 | --- src/util/regex.cpp 2012-03-28 05:19:57 +0000 | |||
1955 | +++ src/util/regex.cpp 2012-04-07 00:45:26 +0000 | |||
1956 | @@ -15,8 +15,6 @@ | |||
1957 | 15 | */ | 15 | */ |
1958 | 16 | #include "stdafx.h" | 16 | #include "stdafx.h" |
1959 | 17 | 17 | ||
1960 | 18 | #include "regex.h" | ||
1961 | 19 | |||
1962 | 20 | #include <cstring> | 18 | #include <cstring> |
1963 | 21 | #include <vector> | 19 | #include <vector> |
1964 | 22 | 20 | ||
1965 | @@ -28,13 +26,13 @@ | |||
1966 | 28 | 26 | ||
1967 | 29 | #include "ascii_util.h" | 27 | #include "ascii_util.h" |
1968 | 30 | #include "cxx_util.h" | 28 | #include "cxx_util.h" |
1969 | 29 | #include "regex.h" | ||
1970 | 31 | #include "stl_util.h" | 30 | #include "stl_util.h" |
1971 | 32 | 31 | ||
1972 | 33 | #define INVALID_RE_EXCEPTION(...) \ | 32 | #define INVALID_RE_EXCEPTION(...) \ |
1973 | 34 | XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS( __VA_ARGS__ ) ) | 33 | XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS( __VA_ARGS__ ) ) |
1974 | 35 | 34 | ||
1977 | 36 | 35 | #ifndef ZORBA_NO_ICU | |
1976 | 37 | #ifndef ZORBA_NO_UNICODE | ||
1978 | 38 | # include <unicode/uversion.h> | 36 | # include <unicode/uversion.h> |
1979 | 39 | U_NAMESPACE_USE | 37 | U_NAMESPACE_USE |
1980 | 40 | 38 | ||
1981 | @@ -103,6 +101,7 @@ | |||
1982 | 103 | 101 | ||
1983 | 104 | bool got_backslash = false; | 102 | bool got_backslash = false; |
1984 | 105 | bool in_char_class = false; // within [...] | 103 | bool in_char_class = false; // within [...] |
1985 | 104 | bool is_first_char = true; // to check ^ placement | ||
1986 | 106 | 105 | ||
1987 | 107 | bool in_backref = false; // '\'[1-9][0-9]* | 106 | bool in_backref = false; // '\'[1-9][0-9]* |
1988 | 108 | unsigned backref_no = 0; // 1-based | 107 | unsigned backref_no = 0; // 1-based |
1989 | @@ -231,6 +230,8 @@ | |||
1990 | 231 | ++open_cap_subs; | 230 | ++open_cap_subs; |
1991 | 232 | cap_sub.push_back( true ); | 231 | cap_sub.push_back( true ); |
1992 | 233 | cur_cap_sub = cap_sub.size(); | 232 | cur_cap_sub = cap_sub.size(); |
1993 | 233 | is_first_char = true; | ||
1994 | 234 | goto append; | ||
1995 | 234 | } | 235 | } |
1996 | 235 | break; | 236 | break; |
1997 | 236 | case ')': | 237 | case ')': |
1998 | @@ -245,8 +246,10 @@ | |||
1999 | 245 | case '[': | 246 | case '[': |
2000 | 246 | if ( q_flag ) | 247 | if ( q_flag ) |
2001 | 247 | *icu_re += '\\'; | 248 | *icu_re += '\\'; |
2003 | 248 | else | 249 | else { |
2004 | 249 | in_char_class = true; | 250 | in_char_class = true; |
2005 | 251 | goto append; | ||
2006 | 252 | } | ||
2007 | 250 | break; | 253 | break; |
2008 | 251 | case ']': | 254 | case ']': |
2009 | 252 | if ( q_flag ) | 255 | if ( q_flag ) |
2010 | @@ -254,6 +257,19 @@ | |||
2011 | 254 | else | 257 | else |
2012 | 255 | in_char_class = false; | 258 | in_char_class = false; |
2013 | 256 | break; | 259 | break; |
2014 | 260 | case '^': | ||
2015 | 261 | if ( q_flag ) | ||
2016 | 262 | *icu_re += '\\'; | ||
2017 | 263 | else if ( !is_first_char && !in_char_class ) | ||
2018 | 264 | throw INVALID_RE_EXCEPTION( xq_re, ZED( UnescapedChar_3 ), *xq_c ); | ||
2019 | 265 | break; | ||
2020 | 266 | case '|': | ||
2021 | 267 | if ( q_flag ) | ||
2022 | 268 | *icu_re += '\\'; | ||
2023 | 269 | else { | ||
2024 | 270 | is_first_char = true; | ||
2025 | 271 | goto append; | ||
2026 | 272 | } | ||
2027 | 257 | default: | 273 | default: |
2028 | 258 | if ( x_flag && ascii::is_space( *xq_c ) ) { | 274 | if ( x_flag && ascii::is_space( *xq_c ) ) { |
2029 | 259 | if ( !in_char_class ) | 275 | if ( !in_char_class ) |
2030 | @@ -265,37 +281,42 @@ | |||
2031 | 265 | // | 281 | // |
2032 | 266 | *icu_re += '\\'; | 282 | *icu_re += '\\'; |
2033 | 267 | } | 283 | } |
2036 | 268 | } | 284 | } // switch |
2037 | 269 | } | 285 | } // else |
2038 | 286 | is_first_char = false; | ||
2039 | 287 | append: | ||
2040 | 270 | *icu_re += *xq_c; | 288 | *icu_re += *xq_c; |
2041 | 271 | } // FOR_EACH | 289 | } // FOR_EACH |
2042 | 272 | 290 | ||
2056 | 273 | if ( i_flag ) { | 291 | if ( !q_flag ) { |
2057 | 274 | // | 292 | if ( i_flag ) { |
2058 | 275 | // XQuery 3.0 F&O 5.6.1.1: All other constructs are unaffected by the "i" | 293 | // |
2059 | 276 | // flag. For example, "\p{Lu}" continues to match upper-case letters only. | 294 | // XQuery 3.0 F&O 5.6.1.1: All other constructs are unaffected by the "i" |
2060 | 277 | // | 295 | // flag. For example, "\p{Lu}" continues to match upper-case letters |
2061 | 278 | // However, ICU lower-cases everything for the 'i' flag; hence we have to | 296 | // only. |
2062 | 279 | // turn off the 'i' flag for just the \p{Lu}. | 297 | // |
2063 | 280 | // | 298 | // However, ICU lower-cases everything for the 'i' flag; hence we have to |
2064 | 281 | // Note that the "6" and "12" below are correct since "\\" represents a | 299 | // turn off the 'i' flag for just the \p{Lu}. |
2065 | 282 | // single '\'. | 300 | // |
2066 | 283 | // | 301 | // Note that the "6" and "12" below are correct since "\\" represents a |
2067 | 284 | ascii::replace_all( *icu_re, "\\p{Lu}", 6, "(?-i:\\p{Lu})", 12 ); | 302 | // single '\'. |
2068 | 285 | } | 303 | // |
2069 | 304 | ascii::replace_all( *icu_re, "\\p{Lu}", 6, "(?-i:\\p{Lu})", 12 ); | ||
2070 | 305 | } | ||
2071 | 286 | 306 | ||
2084 | 287 | // | 307 | // |
2085 | 288 | // XML Schema Part 2 F.1.1: [Unicode Database] groups code points into a | 308 | // XML Schema Part 2 F.1.1: [Unicode Database] groups code points into a |
2086 | 289 | // number of blocks such as Basic Latin (i.e., ASCII), Latin-1 Supplement, | 309 | // number of blocks such as Basic Latin (i.e., ASCII), Latin-1 Supplement, |
2087 | 290 | // Hangul Jamo, CJK Compatibility, etc. The set containing all characters | 310 | // Hangul Jamo, CJK Compatibility, etc. The set containing all characters |
2088 | 291 | // that have block name X (with all white space stripped out), can be | 311 | // that have block name X (with all white space stripped out), can be |
2089 | 292 | // identified with a block escape \p{IsX}. | 312 | // identified with a block escape \p{IsX}. |
2090 | 293 | // | 313 | // |
2091 | 294 | // However, ICU uses \p{InX} rather than \p{IsX}. | 314 | // However, ICU uses \p{InX} rather than \p{IsX}. |
2092 | 295 | // | 315 | // |
2093 | 296 | // Note that the "5" below is correct since "\\" represents a single '\'. | 316 | // Note that the "5" below is correct since "\\" represents a single '\'. |
2094 | 297 | // | 317 | // |
2095 | 298 | ascii::replace_all( *icu_re, "\\p{Is", 5, "\\p{In", 5 ); | 318 | ascii::replace_all( *icu_re, "\\p{Is", 5, "\\p{In", 5 ); |
2096 | 319 | } // q_flag | ||
2097 | 299 | } | 320 | } |
2098 | 300 | 321 | ||
2099 | 301 | /////////////////////////////////////////////////////////////////////////////// | 322 | /////////////////////////////////////////////////////////////////////////////// |
2100 | @@ -442,11 +463,11 @@ | |||
2101 | 442 | } | 463 | } |
2102 | 443 | 464 | ||
2103 | 444 | } // namespace unicode | 465 | } // namespace unicode |
2109 | 445 | 466 | } // namespace zorba | |
2110 | 446 | }//namespace zorba | 467 | |
2111 | 447 | 468 | /////////////////////////////////////////////////////////////////////////////// | |
2112 | 448 | 469 | ||
2113 | 449 | #else /* ZORBA_NO_UNICODE */ | 470 | #else /* ZORBA_NO_ICU */ |
2114 | 450 | 471 | ||
2115 | 451 | #include "zorbatypes/zstring.h" | 472 | #include "zorbatypes/zstring.h" |
2116 | 452 | 473 | ||
2117 | @@ -470,7 +491,7 @@ | |||
2118 | 470 | case 'i': flags |= REGEX_ASCII_CASE_INSENSITIVE; break; | 491 | case 'i': flags |= REGEX_ASCII_CASE_INSENSITIVE; break; |
2119 | 471 | case 's': flags |= REGEX_ASCII_DOTALL; break; | 492 | case 's': flags |= REGEX_ASCII_DOTALL; break; |
2120 | 472 | case 'm': flags |= REGEX_ASCII_MULTILINE; break; | 493 | case 'm': flags |= REGEX_ASCII_MULTILINE; break; |
2122 | 473 | case 'x': flags |= REGEX_ASCII_COMMENTS; break; | 494 | case 'x': flags |= REGEX_ASCII_NO_WHITESPACE; break; |
2123 | 474 | case 'q': flags |= REGEX_ASCII_LITERAL; break; | 495 | case 'q': flags |= REGEX_ASCII_LITERAL; break; |
2124 | 475 | default: | 496 | default: |
2125 | 476 | throw XQUERY_EXCEPTION( err::FORX0001, ERROR_PARAMS( *p ) ); | 497 | throw XQUERY_EXCEPTION( err::FORX0001, ERROR_PARAMS( *p ) ); |
2126 | @@ -483,6 +504,7 @@ | |||
2127 | 483 | void regex::compile( char const *pattern, char const *flags) | 504 | void regex::compile( char const *pattern, char const *flags) |
2128 | 484 | { | 505 | { |
2129 | 485 | parsed_flags = parse_regex_flags(flags); | 506 | parsed_flags = parse_regex_flags(flags); |
2130 | 507 | regex_xquery::CRegexXQuery_parser regex_parser; | ||
2131 | 486 | regex_matcher = regex_parser.parse(pattern, parsed_flags); | 508 | regex_matcher = regex_parser.parse(pattern, parsed_flags); |
2132 | 487 | if(!regex_matcher) | 509 | if(!regex_matcher) |
2133 | 488 | throw INVALID_RE_EXCEPTION(pattern); | 510 | throw INVALID_RE_EXCEPTION(pattern); |
2134 | @@ -517,6 +539,8 @@ | |||
2135 | 517 | bool regex::next_token( char const *s, size_type *pos, zstring *token, | 539 | bool regex::next_token( char const *s, size_type *pos, zstring *token, |
2136 | 518 | bool *matched) | 540 | bool *matched) |
2137 | 519 | { | 541 | { |
2138 | 542 | if(!s[*pos]) | ||
2139 | 543 | return false; | ||
2140 | 520 | bool retval; | 544 | bool retval; |
2141 | 521 | int match_pos; | 545 | int match_pos; |
2142 | 522 | int matched_len; | 546 | int matched_len; |
2143 | @@ -528,14 +552,8 @@ | |||
2144 | 528 | token->assign(s+*pos, match_pos); | 552 | token->assign(s+*pos, match_pos); |
2145 | 529 | *pos += match_pos + matched_len; | 553 | *pos += match_pos + matched_len; |
2146 | 530 | if(matched) | 554 | if(matched) |
2155 | 531 | if(match_pos) | 555 | *matched = true; |
2156 | 532 | *matched = true; | 556 | return true; |
2149 | 533 | else | ||
2150 | 534 | *matched = false; | ||
2151 | 535 | if(match_pos) | ||
2152 | 536 | return true; | ||
2153 | 537 | else | ||
2154 | 538 | return false; | ||
2157 | 539 | } | 557 | } |
2158 | 540 | else | 558 | else |
2159 | 541 | { | 559 | { |
2160 | @@ -544,7 +562,7 @@ | |||
2161 | 544 | *pos += strlen(s+*pos); | 562 | *pos += strlen(s+*pos); |
2162 | 545 | if(matched) | 563 | if(matched) |
2163 | 546 | *matched = false; | 564 | *matched = false; |
2165 | 547 | return s[*pos] != 0; | 565 | return true; |
2166 | 548 | } | 566 | } |
2167 | 549 | } | 567 | } |
2168 | 550 | 568 | ||
2169 | @@ -554,13 +572,9 @@ | |||
2170 | 554 | int matched_pos; | 572 | int matched_pos; |
2171 | 555 | int matched_len; | 573 | int matched_len; |
2172 | 556 | 574 | ||
2176 | 557 | bool prev_align = regex_matcher->set_align_begin(true); | 575 | retval = regex_matcher->match_anywhere(s, parsed_flags|REGEX_ASCII_WHOLE_MATCH, &matched_pos, &matched_len); |
2174 | 558 | retval = regex_matcher->match_from(s, parsed_flags, &matched_pos, &matched_len); | ||
2175 | 559 | regex_matcher->set_align_begin(prev_align); | ||
2177 | 560 | if(!retval) | 576 | if(!retval) |
2178 | 561 | return false; | 577 | return false; |
2179 | 562 | if(matched_len != strlen(s)) | ||
2180 | 563 | return false; | ||
2181 | 564 | return true; | 578 | return true; |
2182 | 565 | } | 579 | } |
2183 | 566 | 580 | ||
2184 | @@ -587,14 +601,19 @@ | |||
2185 | 587 | //look for dollars | 601 | //look for dollars |
2186 | 588 | if(*temprepl == '\\') | 602 | if(*temprepl == '\\') |
2187 | 589 | { | 603 | { |
2191 | 590 | temprepl++; | 604 | if(!(parsed_flags & REGEX_ASCII_LITERAL)) |
2192 | 591 | if(!*temprepl || (*temprepl != '\\') || (*temprepl != '$'))//Invalid replacement string. | 605 | { |
2193 | 592 | throw XQUERY_EXCEPTION( err::FORX0004, ERROR_PARAMS( replacement ) ); | 606 | temprepl++; |
2194 | 607 | if(!*temprepl) | ||
2195 | 608 | temprepl--; | ||
2196 | 609 | else if((*temprepl != '\\') && (*temprepl != '$'))//Invalid replacement string. | ||
2197 | 610 | throw XQUERY_EXCEPTION( err::FORX0004, ERROR_PARAMS( replacement ) ); | ||
2198 | 611 | } | ||
2199 | 593 | result->append(1, *temprepl); | 612 | result->append(1, *temprepl); |
2200 | 594 | temprepl++; | 613 | temprepl++; |
2201 | 595 | continue; | 614 | continue; |
2202 | 596 | } | 615 | } |
2204 | 597 | if(*temprepl == '$') | 616 | if((*temprepl == '$') && !(parsed_flags & REGEX_ASCII_LITERAL)) |
2205 | 598 | { | 617 | { |
2206 | 599 | temprepl++; | 618 | temprepl++; |
2207 | 600 | index = 0; | 619 | index = 0; |
2208 | @@ -648,7 +667,7 @@ | |||
2209 | 648 | if(retval) | 667 | if(retval) |
2210 | 649 | { | 668 | { |
2211 | 650 | m_match_pos += m_pos; | 669 | m_match_pos += m_pos; |
2213 | 651 | m_pos = m_match_pos = m_matched_len; | 670 | m_pos = m_match_pos + m_matched_len; |
2214 | 652 | } | 671 | } |
2215 | 653 | else | 672 | else |
2216 | 654 | { | 673 | { |
2217 | @@ -666,35 +685,30 @@ | |||
2218 | 666 | return (int)regex_matcher->get_indexed_regex_count(); | 685 | return (int)regex_matcher->get_indexed_regex_count(); |
2219 | 667 | } | 686 | } |
2220 | 668 | 687 | ||
2245 | 669 | int regex::get_match_start( int groupId ) | 688 | bool regex::get_match_start_end_bytes( int groupId, int *start, int *end ) |
2246 | 670 | { | 689 | { |
2247 | 671 | if(groupId == 0) | 690 | *start = -1; |
2248 | 672 | return m_match_pos; | 691 | *end = -1; |
2249 | 673 | if(groupId > (int)regex_matcher->get_indexed_regex_count()) | 692 | if(groupId == 0) |
2250 | 674 | return -1; | 693 | { |
2251 | 675 | const char *submatched_source; | 694 | *start = m_match_pos; |
2252 | 676 | int submatched_len; | 695 | *end = m_match_pos + m_matched_len; |
2253 | 677 | if(!regex_matcher->get_indexed_match(groupId, &submatched_source, &submatched_len)) | 696 | return true; |
2254 | 678 | return -1; | 697 | } |
2255 | 679 | return submatched_source - s_in_.c_str(); | 698 | if(groupId > (int)regex_matcher->get_indexed_regex_count()) |
2256 | 680 | } | 699 | return false; |
2257 | 681 | 700 | const char *submatched_source; | |
2258 | 682 | int regex::get_match_end( int groupId ) | 701 | int submatched_len; |
2259 | 683 | { | 702 | if(!regex_matcher->get_indexed_match(groupId, &submatched_source, &submatched_len)) |
2260 | 684 | if(groupId == 0) | 703 | return false; |
2261 | 685 | return m_match_pos + m_matched_len; | 704 | *start = submatched_source - s_in_.c_str(); |
2262 | 686 | if(groupId > (int)regex_matcher->get_indexed_regex_count()) | 705 | *end = *start + submatched_len; |
2263 | 687 | return -1; | 706 | return true; |
2240 | 688 | const char *submatched_source; | ||
2241 | 689 | int submatched_len; | ||
2242 | 690 | if(!regex_matcher->get_indexed_match(groupId, &submatched_source, &submatched_len)) | ||
2243 | 691 | return -1; | ||
2244 | 692 | return submatched_source - s_in_.c_str() + submatched_len; | ||
2264 | 693 | } | 707 | } |
2265 | 694 | 708 | ||
2266 | 695 | } // namespace unicode | 709 | } // namespace unicode |
2267 | 696 | } // namespace zorba | 710 | } // namespace zorba |
2269 | 697 | #endif /* ZORBA_NO_UNICODE */ | 711 | #endif /* ZORBA_NO_ICU */ |
2270 | 698 | 712 | ||
2271 | 699 | /////////////////////////////////////////////////////////////////////////////// | 713 | /////////////////////////////////////////////////////////////////////////////// |
2272 | 700 | 714 | ||
2273 | 701 | 715 | ||
2274 | === modified file 'src/util/regex.h' | |||
2275 | --- src/util/regex.h 2012-03-28 05:19:57 +0000 | |||
2276 | +++ src/util/regex.h 2012-04-07 00:45:26 +0000 | |||
2277 | @@ -17,15 +17,13 @@ | |||
2278 | 17 | #ifndef ZORBA_REGEX_H | 17 | #ifndef ZORBA_REGEX_H |
2279 | 18 | #define ZORBA_REGEX_H | 18 | #define ZORBA_REGEX_H |
2280 | 19 | 19 | ||
2281 | 20 | #ifndef ZORBA_NO_UNICODE | ||
2282 | 21 | #include <unicode/regex.h> | ||
2283 | 22 | #endif | ||
2284 | 23 | |||
2285 | 24 | #include "cxx_util.h" | 20 | #include "cxx_util.h" |
2286 | 25 | #include "unicode_util.h" | 21 | #include "unicode_util.h" |
2287 | 26 | #include "zorbatypes/zstring.h" | 22 | #include "zorbatypes/zstring.h" |
2288 | 27 | 23 | ||
2290 | 28 | #ifndef ZORBA_NO_UNICODE | 24 | #ifndef ZORBA_NO_ICU |
2291 | 25 | |||
2292 | 26 | #include <unicode/regex.h> | ||
2293 | 29 | 27 | ||
2294 | 30 | namespace zorba { | 28 | namespace zorba { |
2295 | 31 | 29 | ||
2296 | @@ -496,15 +494,17 @@ | |||
2297 | 496 | } // namespace unicode | 494 | } // namespace unicode |
2298 | 497 | } // namespace zorba | 495 | } // namespace zorba |
2299 | 498 | 496 | ||
2303 | 499 | #else ///ZORBA_NO_UNICODE (ascii part:) | 497 | /////////////////////////////////////////////////////////////////////////////// |
2304 | 500 | 498 | ||
2305 | 501 | #include "util/regex_ascii.h" | 499 | #else /* ZORBA_NO_ICU */ |
2306 | 500 | |||
2307 | 501 | #include "util/regex_xquery.h" | ||
2308 | 502 | #include <string> | 502 | #include <string> |
2309 | 503 | 503 | ||
2310 | 504 | namespace zorba{ | 504 | namespace zorba{ |
2311 | 505 | /** | 505 | /** |
2312 | 506 | * Converts an XQuery regular expression to the form used by the regular | 506 | * Converts an XQuery regular expression to the form used by the regular |
2314 | 507 | * expression library Zorba is using (here regex_ascii). | 507 | * expression library Zorba is using (here regex_xquery). |
2315 | 508 | * | 508 | * |
2316 | 509 | * @param xq_re The XQuery regular expression. | 509 | * @param xq_re The XQuery regular expression. |
2317 | 510 | * @param lib_re A pointer to the resuling library regular expression. | 510 | * @param lib_re A pointer to the resuling library regular expression. |
2318 | @@ -525,7 +525,7 @@ | |||
2319 | 525 | /** | 525 | /** |
2320 | 526 | * Constructs a %regex. | 526 | * Constructs a %regex. |
2321 | 527 | */ | 527 | */ |
2323 | 528 | regex() : regex_matcher( NULL ) { } | 528 | regex() : regex_matcher( nullptr ) { } |
2324 | 529 | 529 | ||
2325 | 530 | /** | 530 | /** |
2326 | 531 | * Destroys a %regex. | 531 | * Destroys a %regex. |
2327 | @@ -835,31 +835,21 @@ | |||
2328 | 835 | 835 | ||
2329 | 836 | /** | 836 | /** |
2330 | 837 | * Get the start position of the matched group. | 837 | * Get the start position of the matched group. |
2334 | 838 | * If groupId is zero, then the start position of the whole match is returned. | 838 | * If groupId is zero, then the start and end position of the whole match is returned. |
2335 | 839 | * If groupId is non-zero, then the start position of that group is returned. | 839 | * If groupId is non-zero, then the start and end position of that group is returned. |
2336 | 840 | * If that group has not been matched, -1 is returned. | 840 | * If that group has not been matched, false is returned. |
2337 | 841 | * | 841 | * |
2338 | 842 | * @param groupId the id of the group, either zero for the entire regex, | 842 | * @param groupId the id of the group, either zero for the entire regex, |
2339 | 843 | * or [1 .. group_count] for that specific group | 843 | * or [1 .. group_count] for that specific group |
2341 | 844 | * @return the start position, zero based, or -1 if that group didn't match | 844 | * @param start to return start position in bytes |
2342 | 845 | * @param end to return end position in bytes | ||
2343 | 846 | * @return true if that group exists and has been matched | ||
2344 | 845 | */ | 847 | */ |
2346 | 846 | int get_match_start( int groupId = 0 ); | 848 | bool get_match_start_end_bytes( int groupId, int *start, int *end ); |
2347 | 847 | 849 | ||
2348 | 848 | /** | ||
2349 | 849 | * Get the end position of the matched group. | ||
2350 | 850 | * If groupId is zero, then the end position of the whole match is returned. | ||
2351 | 851 | * If groupId is non-zero, then the end position of that group is returned. | ||
2352 | 852 | * If that group has not been matched, -1 is returned. | ||
2353 | 853 | * | ||
2354 | 854 | * @param groupId the id of the group, either zero for the entire regex, | ||
2355 | 855 | * or [1 .. group_count] for that specific group | ||
2356 | 856 | * @return the end position, zero based, or -1 if that group didn't match | ||
2357 | 857 | */ | ||
2358 | 858 | int get_match_end( int groupId = 0 ); | ||
2359 | 859 | 850 | ||
2360 | 860 | private: | 851 | private: |
2363 | 861 | regex_ascii::CRegexAscii_parser regex_parser; | 852 | regex_xquery::CRegexXQuery_regex *regex_matcher; |
2362 | 862 | regex_ascii::CRegexAscii_regex *regex_matcher; | ||
2364 | 863 | uint32_t parsed_flags; | 853 | uint32_t parsed_flags; |
2365 | 864 | 854 | ||
2366 | 865 | zstring s_in_; | 855 | zstring s_in_; |
2367 | @@ -873,15 +863,13 @@ | |||
2368 | 873 | regex( regex const& ); | 863 | regex( regex const& ); |
2369 | 874 | regex& operator=( regex const& ); | 864 | regex& operator=( regex const& ); |
2370 | 875 | }; | 865 | }; |
2371 | 866 | |||
2372 | 867 | /////////////////////////////////////////////////////////////////////////////// | ||
2373 | 868 | |||
2374 | 876 | } // namespace unicode | 869 | } // namespace unicode |
2375 | 877 | } // namespace zorba | 870 | } // namespace zorba |
2376 | 878 | 871 | ||
2383 | 879 | #endif /* ZORBA_NO_UNICODE */ | 872 | #endif /* ZORBA_NO_ICU */ |
2378 | 880 | |||
2379 | 881 | |||
2380 | 882 | /////////////////////////////////////////////////////////////////////////////// | ||
2381 | 883 | |||
2382 | 884 | |||
2384 | 885 | #endif /* ZORBA_REGEX_H */ | 873 | #endif /* ZORBA_REGEX_H */ |
2385 | 886 | /* | 874 | /* |
2386 | 887 | * Local variables: | 875 | * Local variables: |
2387 | 888 | 876 | ||
2388 | === renamed file 'src/util/regex_ascii.cpp' => 'src/util/regex_xquery.cpp' | |||
2389 | --- src/util/regex_ascii.cpp 2012-03-28 05:19:57 +0000 | |||
2390 | +++ src/util/regex_xquery.cpp 2012-04-07 00:45:26 +0000 | |||
2391 | @@ -1,4 +1,4 @@ | |||
2393 | 1 | a/* | 1 | /* |
2394 | 2 | * Copyright 2006-2008 The FLWOR Foundation. | 2 | * Copyright 2006-2008 The FLWOR Foundation. |
2395 | 3 | * | 3 | * |
2396 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
2397 | @@ -18,12 +18,15 @@ | |||
2398 | 18 | 18 | ||
2399 | 19 | #include "diagnostics/xquery_diagnostics.h" | 19 | #include "diagnostics/xquery_diagnostics.h" |
2400 | 20 | 20 | ||
2402 | 21 | #include "regex_ascii.h" | 21 | #include "regex_xquery.h" |
2403 | 22 | #include <string.h> | 22 | #include <string.h> |
2404 | 23 | #include "zorbatypes/chartype.h" | 23 | #include "zorbatypes/chartype.h" |
2405 | 24 | #include "util/unicode_categories.h" | ||
2406 | 25 | #include "util/ascii_util.h" | ||
2407 | 26 | #include "util/utf8_string.h" | ||
2408 | 24 | 27 | ||
2409 | 25 | namespace zorba { | 28 | namespace zorba { |
2411 | 26 | namespace regex_ascii{ | 29 | namespace regex_xquery{ |
2412 | 27 | //ascii regular expression matching | 30 | //ascii regular expression matching |
2413 | 28 | 31 | ||
2414 | 29 | /*http://www.w3.org/TR/xmlschema-2/#regexs | 32 | /*http://www.w3.org/TR/xmlschema-2/#regexs |
2415 | @@ -62,96 +65,138 @@ | |||
2416 | 62 | + http://www.w3.org/TR/xquery-operators/#regex-syntax (not implemented) | 65 | + http://www.w3.org/TR/xquery-operators/#regex-syntax (not implemented) |
2417 | 63 | */ | 66 | */ |
2418 | 64 | 67 | ||
2419 | 68 | |||
2420 | 69 | static bool compare_ascii_i(const char *str1, const char *str2) | ||
2421 | 70 | { | ||
2422 | 71 | while(*str1 && *str2) | ||
2423 | 72 | { | ||
2424 | 73 | if(ascii::to_lower(*str1) != ascii::to_lower(*str2)) | ||
2425 | 74 | return false; | ||
2426 | 75 | str1++; | ||
2427 | 76 | str2++; | ||
2428 | 77 | } | ||
2429 | 78 | if(*str1 || *str2) | ||
2430 | 79 | return false; | ||
2431 | 80 | return true; | ||
2432 | 81 | } | ||
2433 | 82 | |||
2434 | 83 | static bool compare_unicode_ni(const char *str1, const char *str2, int len) | ||
2435 | 84 | { | ||
2436 | 85 | while(len > 0) | ||
2437 | 86 | { | ||
2438 | 87 | const char *temp_str1 = str1; | ||
2439 | 88 | const char *temp_str2 = str2; | ||
2440 | 89 | unicode::code_point cp1 = unicode::to_upper(utf8::next_char(temp_str1)); | ||
2441 | 90 | unicode::code_point cp2 = unicode::to_upper(utf8::next_char(temp_str2)); | ||
2442 | 91 | if(cp1 != cp2) | ||
2443 | 92 | return false; | ||
2444 | 93 | len -= temp_str1-str1; | ||
2445 | 94 | str1 = temp_str1; | ||
2446 | 95 | str2 = temp_str2; | ||
2447 | 96 | } | ||
2448 | 97 | return true; | ||
2449 | 98 | } | ||
2450 | 99 | static utf8::size_type myutf8len(const char *source) | ||
2451 | 100 | { | ||
2452 | 101 | utf8::size_type len = utf8::char_length(*source); | ||
2453 | 102 | if(!len) | ||
2454 | 103 | return 1; | ||
2455 | 104 | else | ||
2456 | 105 | return len; | ||
2457 | 106 | } | ||
2458 | 65 | //////////////////////////////////// | 107 | //////////////////////////////////// |
2459 | 66 | ////Regular expression parsing and building of the tree | 108 | ////Regular expression parsing and building of the tree |
2460 | 67 | //////////////////////////////////// | 109 | //////////////////////////////////// |
2461 | 68 | 110 | ||
2463 | 69 | CRegexAscii_regex* CRegexAscii_parser::parse(const char *pattern, unsigned int flags) | 111 | CRegexXQuery_regex* CRegexXQuery_parser::parse(const char *pattern, unsigned int flags) |
2464 | 70 | { | 112 | { |
2465 | 71 | this->flags = flags; | 113 | this->flags = flags; |
2466 | 72 | bool align_begin = false; | ||
2467 | 73 | 114 | ||
2468 | 74 | if(!(flags & REGEX_ASCII_LITERAL) && (pattern[0] == '^')) | ||
2469 | 75 | align_begin = true; | ||
2470 | 76 | |||
2471 | 77 | int regex_len; | 115 | int regex_len; |
2473 | 78 | CRegexAscii_regex* regex = parse_regexp(pattern + (align_begin?1:0), ®ex_len); | 116 | CRegexXQuery_regex* regex = parse_regexp(pattern, ®ex_len); |
2474 | 79 | 117 | ||
2475 | 80 | if(regex) | ||
2476 | 81 | regex->set_align_begin(align_begin); | ||
2477 | 82 | |||
2478 | 83 | return regex; | 118 | return regex; |
2479 | 84 | } | 119 | } |
2480 | 85 | 120 | ||
2481 | 86 | //until '\0' or ')' | 121 | //until '\0' or ')' |
2483 | 87 | CRegexAscii_regex* CRegexAscii_parser::parse_regexp(const char *pattern, | 122 | CRegexXQuery_regex* CRegexXQuery_parser::parse_regexp(const char *pattern, |
2484 | 88 | int *regex_len) | 123 | int *regex_len) |
2485 | 89 | { | 124 | { |
2486 | 90 | *regex_len = 0; | 125 | *regex_len = 0; |
2487 | 91 | int branch_len; | 126 | int branch_len; |
2488 | 92 | regex_depth++; | 127 | regex_depth++; |
2490 | 93 | CRegexAscii_regex *regex = new CRegexAscii_regex(current_regex); | 128 | std::auto_ptr<CRegexXQuery_regex> regex(new CRegexXQuery_regex(current_regex)); |
2491 | 94 | if(!current_regex) | 129 | if(!current_regex) |
2493 | 95 | current_regex = regex; | 130 | current_regex = regex.get(); |
2494 | 96 | if(regex_depth >= 2) | 131 | if(regex_depth >= 2) |
2495 | 97 | { | 132 | { |
2496 | 98 | //mark this as group if it does not start with ?: | 133 | //mark this as group if it does not start with ?: |
2497 | 99 | if(pattern[0] != '?' || pattern[1] != ':') | 134 | if(pattern[0] != '?' || pattern[1] != ':') |
2499 | 100 | current_regex->subregex.push_back(regex); | 135 | current_regex->subregex.push_back(regex.get()); |
2500 | 101 | else | 136 | else |
2501 | 102 | *regex_len = 2; | 137 | *regex_len = 2; |
2502 | 103 | } | 138 | } |
2504 | 104 | CRegexAscii_branch *branch; | 139 | CRegexXQuery_branch *branch; |
2505 | 140 | bool must_read_another_branch = true; | ||
2506 | 105 | while(pattern[*regex_len] && (pattern[*regex_len] != ')')) | 141 | while(pattern[*regex_len] && (pattern[*regex_len] != ')')) |
2507 | 106 | { | 142 | { |
2508 | 107 | branch = parse_branch(pattern+*regex_len, &branch_len); | 143 | branch = parse_branch(pattern+*regex_len, &branch_len); |
2509 | 108 | if(!branch) | 144 | if(!branch) |
2510 | 109 | { | 145 | { |
2511 | 110 | regex_depth--; | 146 | regex_depth--; |
2512 | 111 | delete regex; | ||
2513 | 112 | return NULL; | 147 | return NULL; |
2514 | 113 | } | 148 | } |
2515 | 114 | regex->add_branch(branch); | 149 | regex->add_branch(branch); |
2516 | 115 | *regex_len += branch_len; | 150 | *regex_len += branch_len; |
2517 | 151 | if(pattern[*regex_len] == '|') | ||
2518 | 152 | (*regex_len)++; | ||
2519 | 153 | else | ||
2520 | 154 | must_read_another_branch = false; | ||
2521 | 116 | } | 155 | } |
2523 | 117 | if((current_regex == regex) && (pattern[*regex_len] == ')')) | 156 | if((current_regex == regex.get()) && (pattern[*regex_len] == ')')) |
2524 | 118 | { | 157 | { |
2526 | 119 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_MISMATCHED_PAREN)) ); | 158 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MISMATCHED_PAREN)) ); |
2527 | 120 | } | 159 | } |
2528 | 121 | if(pattern[*regex_len]) | 160 | if(pattern[*regex_len]) |
2529 | 122 | (*regex_len)++; | 161 | (*regex_len)++; |
2530 | 162 | if(must_read_another_branch) | ||
2531 | 163 | regex->add_branch(new CRegexXQuery_branch(current_regex));//add empty branch | ||
2532 | 123 | regex->flags = 0;//finished initialization | 164 | regex->flags = 0;//finished initialization |
2533 | 124 | regex_depth--; | 165 | regex_depth--; |
2535 | 125 | return regex; | 166 | return regex.release(); |
2536 | 126 | } | 167 | } |
2537 | 127 | 168 | ||
2539 | 128 | CRegexAscii_branch* CRegexAscii_parser::parse_branch(const char *pattern, int *branch_len) | 169 | CRegexXQuery_branch* CRegexXQuery_parser::parse_branch(const char *pattern, int *branch_len) |
2540 | 129 | { | 170 | { |
2541 | 130 | int piece_len; | 171 | int piece_len; |
2542 | 131 | 172 | ||
2545 | 132 | CRegexAscii_branch *branch = new CRegexAscii_branch(current_regex); | 173 | std::auto_ptr<CRegexXQuery_branch> branch(new CRegexXQuery_branch(current_regex)); |
2546 | 133 | CRegexAscii_piece *piece; | 174 | CRegexXQuery_piece *piece; |
2547 | 134 | *branch_len = 0; | 175 | *branch_len = 0; |
2548 | 135 | while(pattern[*branch_len] && (pattern[*branch_len] != '|') && (pattern[*branch_len] != ')')) | 176 | while(pattern[*branch_len] && (pattern[*branch_len] != '|') && (pattern[*branch_len] != ')')) |
2549 | 136 | { | 177 | { |
2550 | 137 | piece = parse_piece(pattern+*branch_len, &piece_len); | 178 | piece = parse_piece(pattern+*branch_len, &piece_len); |
2551 | 138 | if(!piece) | 179 | if(!piece) |
2552 | 139 | { | 180 | { |
2553 | 140 | delete branch; | ||
2554 | 141 | return NULL; | 181 | return NULL; |
2555 | 142 | } | 182 | } |
2556 | 183 | if(branch->piece_list.size() && dynamic_cast<CRegexXQuery_pinstart*>(piece->atom)) | ||
2557 | 184 | { | ||
2558 | 185 | //found ^ that is not at the beginning of branch | ||
2559 | 186 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_ATOM_CHAR), '^') ); | ||
2560 | 187 | } | ||
2561 | 143 | branch->add_piece(piece); | 188 | branch->add_piece(piece); |
2562 | 144 | *branch_len += piece_len; | 189 | *branch_len += piece_len; |
2563 | 145 | } | 190 | } |
2567 | 146 | if(pattern[*branch_len] == '|') | 191 | //if(pattern[*branch_len] == '|') |
2568 | 147 | (*branch_len)++; | 192 | // (*branch_len)++; |
2569 | 148 | return branch; | 193 | return branch.release(); |
2570 | 149 | } | 194 | } |
2571 | 150 | 195 | ||
2572 | 151 | //piece = atom + quantifier | 196 | //piece = atom + quantifier |
2574 | 152 | CRegexAscii_piece* CRegexAscii_parser::parse_piece(const char *pattern, int *piece_len) | 197 | CRegexXQuery_piece* CRegexXQuery_parser::parse_piece(const char *pattern, int *piece_len) |
2575 | 153 | { | 198 | { |
2577 | 154 | CRegexAscii_piece *piece = new CRegexAscii_piece; | 199 | std::auto_ptr<CRegexXQuery_piece> piece(new CRegexXQuery_piece); |
2578 | 155 | IRegexAtom *atom; | 200 | IRegexAtom *atom; |
2579 | 156 | *piece_len = 0; | 201 | *piece_len = 0; |
2580 | 157 | 202 | ||
2581 | @@ -160,19 +205,18 @@ | |||
2582 | 160 | atom = read_atom(pattern, &atom_len); | 205 | atom = read_atom(pattern, &atom_len); |
2583 | 161 | if(!atom) | 206 | if(!atom) |
2584 | 162 | { | 207 | { |
2585 | 163 | delete piece; | ||
2586 | 164 | return NULL; | 208 | return NULL; |
2587 | 165 | } | 209 | } |
2588 | 166 | piece->set_atom(atom); | 210 | piece->set_atom(atom); |
2589 | 167 | if(!(flags & REGEX_ASCII_LITERAL)) | 211 | if(!(flags & REGEX_ASCII_LITERAL)) |
2591 | 168 | read_quantifier(piece, pattern+atom_len, &quantif_len); | 212 | read_quantifier(piece.get(), pattern+atom_len, &quantif_len); |
2592 | 169 | 213 | ||
2593 | 170 | *piece_len += atom_len + quantif_len; | 214 | *piece_len += atom_len + quantif_len; |
2594 | 171 | 215 | ||
2596 | 172 | return piece; | 216 | return piece.release(); |
2597 | 173 | } | 217 | } |
2598 | 174 | 218 | ||
2600 | 175 | char CRegexAscii_parser::myishex(char c) | 219 | char CRegexXQuery_parser::myishex(char c) |
2601 | 176 | { | 220 | { |
2602 | 177 | if((c >= '0') && (c <= '9')) | 221 | if((c >= '0') && (c <= '9')) |
2603 | 178 | return c-'0'+1; | 222 | return c-'0'+1; |
2604 | @@ -183,26 +227,125 @@ | |||
2605 | 183 | return 0;//not a hex | 227 | return 0;//not a hex |
2606 | 184 | } | 228 | } |
2607 | 185 | 229 | ||
2614 | 186 | bool CRegexAscii_parser::myisdigit(char c) | 230 | bool CRegexXQuery_parser::myisdigit(char c) |
2615 | 187 | { | 231 | { |
2616 | 188 | return (c >= '0') || (c <= '9'); | 232 | return (c >= '0') && (c <= '9'); |
2617 | 189 | } | 233 | } |
2618 | 190 | 234 | ||
2619 | 191 | char CRegexAscii_parser::readChar(const char *pattern, int *char_len, bool *is_multichar) | 235 | bool CRegexXQuery_parser::myisletterAZ(char c) |
2620 | 236 | { | ||
2621 | 237 | return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')); | ||
2622 | 238 | } | ||
2623 | 239 | |||
2624 | 240 | static const unicode::code_point specials_extcp[] = {0xFFF0, 0xFFFD, 0}; | ||
2625 | 241 | |||
2626 | 242 | static CRegexXQuery_parser::block_escape_t block_escape[] = | ||
2627 | 243 | { | ||
2628 | 244 | {{0x0000, 0x007F}, NULL, "BasicLatin"}, | ||
2629 | 245 | {{0x0080, 0x00FF}, NULL, "Latin-1Supplement"}, | ||
2630 | 246 | {{0x0100, 0x017F}, NULL, "LatinExtended-A"}, | ||
2631 | 247 | {{0x0180, 0x024F}, NULL, "LatinExtended-B"}, | ||
2632 | 248 | {{0x0250, 0x02AF}, NULL, "IPAExtensions"}, | ||
2633 | 249 | {{0x02B0, 0x02FF}, NULL, "SpacingModifierLetters"}, | ||
2634 | 250 | {{0x0300, 0x036F}, NULL, "CombiningDiacriticalMarks"}, | ||
2635 | 251 | {{0x0370, 0x03FF}, NULL, "Greek"}, | ||
2636 | 252 | {{0x0400, 0x04FF}, NULL, "Cyrillic"}, | ||
2637 | 253 | {{0x0530, 0x058F}, NULL, "Armenian"}, | ||
2638 | 254 | {{0x0590, 0x05FF}, NULL, "Hebrew"}, | ||
2639 | 255 | {{0x0600, 0x06FF}, NULL, "Arabic"}, | ||
2640 | 256 | {{0x0700, 0x074F}, NULL, "Syriac"}, | ||
2641 | 257 | {{0x0780, 0x07BF}, NULL, "Thaana"}, | ||
2642 | 258 | {{0x0900, 0x097F}, NULL, "Devanagari"}, | ||
2643 | 259 | {{0x0980, 0x09FF}, NULL, "Bengali"}, | ||
2644 | 260 | {{0x0A00, 0x0A7F}, NULL, "Gurmukhi"}, | ||
2645 | 261 | {{0x0A80, 0x0AFF}, NULL, "Gujarati"}, | ||
2646 | 262 | {{0x0B00, 0x0B7F}, NULL, "Oriya"}, | ||
2647 | 263 | {{0x0B80, 0x0BFF}, NULL, "Tamil"}, | ||
2648 | 264 | {{0x0C00, 0x0C7F}, NULL, "Telugu"}, | ||
2649 | 265 | {{0x0C80, 0x0CFF}, NULL, "Kannada"}, | ||
2650 | 266 | {{0x0D00, 0x0D7F}, NULL, "Malayalam"}, | ||
2651 | 267 | {{0x0D80, 0x0DFF}, NULL, "Sinhala"}, | ||
2652 | 268 | {{0x0E00, 0x0E7F}, NULL, "Thai"}, | ||
2653 | 269 | {{0x0E80, 0x0EFF}, NULL, "Lao"}, | ||
2654 | 270 | {{0x0F00, 0x0FFF}, NULL, "Tibetan"}, | ||
2655 | 271 | {{0x1000, 0x109F}, NULL, "Myanmar"}, | ||
2656 | 272 | {{0x10A0, 0x10FF}, NULL, "Georgian"}, | ||
2657 | 273 | {{0x1100, 0x11FF}, NULL, "HangulJamo"}, | ||
2658 | 274 | {{0x1200, 0x137F}, NULL, "Ethiopic"}, | ||
2659 | 275 | {{0x13A0, 0x13FF}, NULL, "Cherokee"}, | ||
2660 | 276 | {{0x1400, 0x167F}, NULL, "UnifiedCanadianAboriginalSyllabics"}, | ||
2661 | 277 | {{0x1680, 0x169F}, NULL, "Ogham"}, | ||
2662 | 278 | {{0x16A0, 0x16FF}, NULL, "Runic"}, | ||
2663 | 279 | {{0x1780, 0x17FF}, NULL, "Khmer"}, | ||
2664 | 280 | {{0x1800, 0x18AF}, NULL, "Mongolian"}, | ||
2665 | 281 | {{0x1E00, 0x1EFF}, NULL, "LatinExtendedAdditional"}, | ||
2666 | 282 | {{0x1F00, 0x1FFF}, NULL, "GreekExtended"}, | ||
2667 | 283 | {{0x2000, 0x206F}, NULL, "GeneralPunctuation"}, | ||
2668 | 284 | {{0x2070, 0x209F}, NULL, "SuperscriptsandSubscripts"}, | ||
2669 | 285 | {{0x20A0, 0x20CF}, NULL, "CurrencySymbols"}, | ||
2670 | 286 | {{0x20D0, 0x20FF}, NULL, "CombiningMarksforSymbols"}, | ||
2671 | 287 | {{0x2100, 0x214F}, NULL, "LetterlikeSymbols"}, | ||
2672 | 288 | {{0x2150, 0x218F}, NULL, "NumberForms"}, | ||
2673 | 289 | {{0x2190, 0x21FF}, NULL, "Arrows"}, | ||
2674 | 290 | {{0x2200, 0x22FF}, NULL, "MathematicalOperators"}, | ||
2675 | 291 | {{0x2300, 0x23FF}, NULL, "MiscellaneousTechnical"}, | ||
2676 | 292 | {{0x2400, 0x243F}, NULL, "ControlPictures"}, | ||
2677 | 293 | {{0x2440, 0x245F}, NULL, "OpticalCharacterRecognition"}, | ||
2678 | 294 | {{0x2460, 0x24FF}, NULL, "EnclosedAlphanumerics"}, | ||
2679 | 295 | {{0x2500, 0x257F}, NULL, "BoxDrawing"}, | ||
2680 | 296 | {{0x2580, 0x259F}, NULL, "BlockElements"}, | ||
2681 | 297 | {{0x25A0, 0x25FF}, NULL, "GeometricShapes"}, | ||
2682 | 298 | {{0x2600, 0x26FF}, NULL, "MiscellaneousSymbols"}, | ||
2683 | 299 | {{0x2700, 0x27BF}, NULL, "Dingbats"}, | ||
2684 | 300 | {{0x2800, 0x28FF}, NULL, "BraillePatterns"}, | ||
2685 | 301 | {{0x2E80, 0x2EFF}, NULL, "CJKRadicalsSupplement"}, | ||
2686 | 302 | {{0x2F00, 0x2FDF}, NULL, "KangxiRadicals"}, | ||
2687 | 303 | {{0x2FF0, 0x2FFF}, NULL, "IdeographicDescriptionCharacters"}, | ||
2688 | 304 | {{0x3000, 0x303F}, NULL, "CJKSymbolsandPunctuation"}, | ||
2689 | 305 | {{0x3040, 0x309F}, NULL, "Hiragana"}, | ||
2690 | 306 | {{0x30A0, 0x30FF}, NULL, "Katakana"}, | ||
2691 | 307 | {{0x3100, 0x312F}, NULL, "Bopomofo"}, | ||
2692 | 308 | {{0x3130, 0x318F}, NULL, "HangulCompatibilityJamo"}, | ||
2693 | 309 | {{0x3190, 0x319F}, NULL, "Kanbun"}, | ||
2694 | 310 | {{0x31A0, 0x31BF}, NULL, "BopomofoExtended"}, | ||
2695 | 311 | {{0x3200, 0x32FF}, NULL, "EnclosedCJKLettersandMonths"}, | ||
2696 | 312 | {{0x3300, 0x33FF}, NULL, "CJKCompatibility"}, | ||
2697 | 313 | {{0x3400, 0x4DB5}, NULL, "CJKUnifiedIdeographsExtensionA"}, | ||
2698 | 314 | {{0x4E00, 0x9FFF}, NULL, "CJKUnifiedIdeographs"}, | ||
2699 | 315 | {{0xA000, 0xA48F}, NULL, "YiSyllables"}, | ||
2700 | 316 | {{0xA490, 0xA4CF}, NULL, "YiRadicals"}, | ||
2701 | 317 | {{0xAC00, 0xD7A3}, NULL, "HangulSyllables"}, | ||
2702 | 318 | {{0xE000, 0xF8FF}, NULL, "PrivateUse"}, | ||
2703 | 319 | {{0xF900, 0xFAFF}, NULL, "CJKCompatibilityIdeographs"}, | ||
2704 | 320 | {{0xFB00, 0xFB4F}, NULL, "AlphabeticPresentationForms"}, | ||
2705 | 321 | {{0xFB50, 0xFDFF}, NULL, "ArabicPresentationForms-A"}, | ||
2706 | 322 | {{0xFE20, 0xFE2F}, NULL, "CombiningHalfMarks"}, | ||
2707 | 323 | {{0xFE30, 0xFE4F}, NULL, "CJKCompatibilityForms"}, | ||
2708 | 324 | {{0xFE50, 0xFE6F}, NULL, "SmallFormVariants"}, | ||
2709 | 325 | {{0xFE70, 0xFEFE}, NULL, "ArabicPresentationForms-B"}, | ||
2710 | 326 | {{0xFEFF, 0xFEFF}, specials_extcp, "Specials"}, | ||
2711 | 327 | {{0xFF00, 0xFFEF}, NULL, "HalfwidthandFullwidthForms"} | ||
2712 | 328 | }; | ||
2713 | 329 | |||
2714 | 330 | CRegexXQuery_charmatch* CRegexXQuery_parser::readChar(const char *pattern, | ||
2715 | 331 | int *char_len, | ||
2716 | 332 | enum CHARGROUP_t *multichar_type) | ||
2717 | 192 | { | 333 | { |
2718 | 193 | char c = 0; | 334 | char c = 0; |
2719 | 194 | *char_len = 0; | 335 | *char_len = 0; |
2721 | 195 | *is_multichar = false; | 336 | *multichar_type = CHARGROUP_NO_MULTICHAR; |
2722 | 196 | switch(pattern[*char_len]) | 337 | switch(pattern[*char_len]) |
2723 | 197 | { | 338 | { |
2724 | 198 | case '\\': | 339 | case '\\': |
2726 | 199 | { (*char_len)++; | 340 | { |
2727 | 341 | (*char_len)++; | ||
2728 | 200 | switch(pattern[*char_len]) | 342 | switch(pattern[*char_len]) |
2729 | 201 | { | 343 | { |
2733 | 202 | case 'n': c = '\n';break; | 344 | case 'n': c = '\n';(*char_len)++;return new CRegexXQuery_char_ascii(current_regex, c); |
2734 | 203 | case 'r': c = '\r';break; | 345 | case 'r': c = '\r';(*char_len)++;return new CRegexXQuery_char_ascii(current_regex, c); |
2735 | 204 | case 't': c = '\t';break; | 346 | case 't': c = '\t';(*char_len)++;return new CRegexXQuery_char_ascii(current_regex, c); |
2736 | 205 | case '\\': | 347 | case '\\': |
2737 | 348 | case '/'://+ | ||
2738 | 206 | case '|': | 349 | case '|': |
2739 | 207 | case '.': | 350 | case '.': |
2740 | 208 | case '?': | 351 | case '?': |
2741 | @@ -216,19 +359,205 @@ | |||
2742 | 216 | case '['://#x5B | 359 | case '['://#x5B |
2743 | 217 | case ']'://#x5D | 360 | case ']'://#x5D |
2744 | 218 | case '^'://#x5E | 361 | case '^'://#x5E |
2745 | 362 | case '$'://+ | ||
2746 | 219 | c = pattern[*char_len]; | 363 | c = pattern[*char_len]; |
2748 | 220 | break; | 364 | (*char_len)++; |
2749 | 365 | *multichar_type = CHARGROUP_FLAGS_ONECHAR_ASCII; | ||
2750 | 366 | return new CRegexXQuery_char_ascii(current_regex, c); | ||
2751 | 221 | case 'p'://catEsc | 367 | case 'p'://catEsc |
2752 | 222 | case 'P'://complEsc | 368 | case 'P'://complEsc |
2753 | 369 | { | ||
2754 | 223 | //ignore the prop for now | 370 | //ignore the prop for now |
2760 | 224 | c = pattern[*char_len]; | 371 | *multichar_type = CHARGROUP_FLAGS_MULTICHAR_p;//(CHARGROUP_t)((pattern[*char_len] == 'P') ? 128 : 0); |
2761 | 225 | *is_multichar = true; | 372 | bool is_reverse = (pattern[*char_len] == 'P'); |
2762 | 226 | if(pattern[*char_len+1] == '{') | 373 | c = 0; |
2763 | 227 | { | 374 | if(pattern[(*char_len)+1] != '{') |
2764 | 228 | while(pattern[*char_len] != '}') | 375 | { |
2765 | 376 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) ); | ||
2766 | 377 | } | ||
2767 | 378 | (*char_len) += 2; | ||
2768 | 379 | switch(pattern[*char_len]) | ||
2769 | 380 | {//IsCategory | ||
2770 | 381 | case 'L': | ||
2771 | 382 | { | ||
2772 | 383 | switch(pattern[(*char_len)+1]) | ||
2773 | 384 | { | ||
2774 | 385 | case '}': | ||
2775 | 386 | c = unicode::UNICODE_Ll + 50;break; | ||
2776 | 387 | case 'u': | ||
2777 | 388 | c = unicode::UNICODE_Lu; (*char_len)++;break; | ||
2778 | 389 | case 'l': | ||
2779 | 390 | c = unicode::UNICODE_Ll; (*char_len)++;break; | ||
2780 | 391 | case 't': | ||
2781 | 392 | c = unicode::UNICODE_Lt; (*char_len)++;break; | ||
2782 | 393 | case 'm': | ||
2783 | 394 | c = unicode::UNICODE_Lm; (*char_len)++;break; | ||
2784 | 395 | case 'o': | ||
2785 | 396 | c = unicode::UNICODE_Lo; (*char_len)++;break; | ||
2786 | 397 | default: | ||
2787 | 398 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PL_CONSTRUCT)) ); | ||
2788 | 399 | } | ||
2789 | 400 | }break; | ||
2790 | 401 | case 'M': | ||
2791 | 402 | { | ||
2792 | 403 | switch(pattern[(*char_len)+1]) | ||
2793 | 404 | { | ||
2794 | 405 | case '}': | ||
2795 | 406 | c = unicode::UNICODE_Mc + 50;break; | ||
2796 | 407 | case 'n': | ||
2797 | 408 | c = unicode::UNICODE_Mn; (*char_len)++;break; | ||
2798 | 409 | case 'c': | ||
2799 | 410 | c = unicode::UNICODE_Mc; (*char_len)++;break; | ||
2800 | 411 | case 'e': | ||
2801 | 412 | c = unicode::UNICODE_Me; (*char_len)++;break; | ||
2802 | 413 | default: | ||
2803 | 414 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PM_CONSTRUCT)) ); | ||
2804 | 415 | } | ||
2805 | 416 | }break; | ||
2806 | 417 | case 'N': | ||
2807 | 418 | { | ||
2808 | 419 | switch(pattern[(*char_len)+1]) | ||
2809 | 420 | { | ||
2810 | 421 | case '}': | ||
2811 | 422 | c = unicode::UNICODE_Nd + 50;break; | ||
2812 | 423 | case 'd': | ||
2813 | 424 | c = unicode::UNICODE_Nd; (*char_len)++;break; | ||
2814 | 425 | case 'l': | ||
2815 | 426 | c = unicode::UNICODE_Nl; (*char_len)++;break; | ||
2816 | 427 | case 'o': | ||
2817 | 428 | c = unicode::UNICODE_No; (*char_len)++;break; | ||
2818 | 429 | default: | ||
2819 | 430 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PN_CONSTRUCT)) ); | ||
2820 | 431 | } | ||
2821 | 432 | }break; | ||
2822 | 433 | case 'P': | ||
2823 | 434 | { | ||
2824 | 435 | switch(pattern[(*char_len)+1]) | ||
2825 | 436 | { | ||
2826 | 437 | case '}': | ||
2827 | 438 | c = unicode::UNICODE_Pc + 50;break; | ||
2828 | 439 | case 'c': | ||
2829 | 440 | c = unicode::UNICODE_Pc; (*char_len)++;break; | ||
2830 | 441 | case 'd': | ||
2831 | 442 | c = unicode::UNICODE_Pd; (*char_len)++;break; | ||
2832 | 443 | case 's': | ||
2833 | 444 | c = unicode::UNICODE_Ps; (*char_len)++;break; | ||
2834 | 445 | case 'e': | ||
2835 | 446 | c = unicode::UNICODE_Pe; (*char_len)++;break; | ||
2836 | 447 | case 'i': | ||
2837 | 448 | c = unicode::UNICODE_Pi; (*char_len)++;break; | ||
2838 | 449 | case 'f': | ||
2839 | 450 | c = unicode::UNICODE_Pf; (*char_len)++;break; | ||
2840 | 451 | case 'o': | ||
2841 | 452 | c = unicode::UNICODE_Po; (*char_len)++;break; | ||
2842 | 453 | default: | ||
2843 | 454 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PP_CONSTRUCT)) ); | ||
2844 | 455 | } | ||
2845 | 456 | }break; | ||
2846 | 457 | case 'Z': | ||
2847 | 458 | { | ||
2848 | 459 | switch(pattern[(*char_len)+1]) | ||
2849 | 460 | { | ||
2850 | 461 | case '}': | ||
2851 | 462 | c = unicode::UNICODE_Zl + 50;break; | ||
2852 | 463 | case 's': | ||
2853 | 464 | c = unicode::UNICODE_Zs; (*char_len)++;break; | ||
2854 | 465 | case 'l': | ||
2855 | 466 | c = unicode::UNICODE_Zl; (*char_len)++;break; | ||
2856 | 467 | case 'p': | ||
2857 | 468 | c = unicode::UNICODE_Zp; (*char_len)++;break; | ||
2858 | 469 | default: | ||
2859 | 470 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PZ_CONSTRUCT)) ); | ||
2860 | 471 | } | ||
2861 | 472 | }break; | ||
2862 | 473 | case 'S': | ||
2863 | 474 | { | ||
2864 | 475 | switch(pattern[(*char_len)+1]) | ||
2865 | 476 | { | ||
2866 | 477 | case '}': | ||
2867 | 478 | c = unicode::UNICODE_Sc + 50;break; | ||
2868 | 479 | case 'm': | ||
2869 | 480 | c = unicode::UNICODE_Sm; (*char_len)++;break; | ||
2870 | 481 | case 'c': | ||
2871 | 482 | c = unicode::UNICODE_Sc; (*char_len)++;break; | ||
2872 | 483 | case 'k': | ||
2873 | 484 | c = unicode::UNICODE_Sk; (*char_len)++;break; | ||
2874 | 485 | case 'o': | ||
2875 | 486 | c = unicode::UNICODE_So; (*char_len)++;break; | ||
2876 | 487 | default: | ||
2877 | 488 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PS_CONSTRUCT)) ); | ||
2878 | 489 | } | ||
2879 | 490 | }break; | ||
2880 | 491 | case 'C': | ||
2881 | 492 | { | ||
2882 | 493 | switch(pattern[(*char_len)+1]) | ||
2883 | 494 | { | ||
2884 | 495 | case '}': | ||
2885 | 496 | c = unicode::UNICODE_Cc + 50;break; | ||
2886 | 497 | case 'c': | ||
2887 | 498 | c = unicode::UNICODE_Cc; (*char_len)++;break; | ||
2888 | 499 | case 'f': | ||
2889 | 500 | c = unicode::UNICODE_Cf; (*char_len)++;break; | ||
2890 | 501 | case 'o': | ||
2891 | 502 | c = unicode::UNICODE_Co; (*char_len)++;break; | ||
2892 | 503 | case 'n': | ||
2893 | 504 | c = unicode::UNICODE_Cn; (*char_len)++;break; | ||
2894 | 505 | default: | ||
2895 | 506 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PC_CONSTRUCT)) ); | ||
2896 | 507 | } | ||
2897 | 508 | }break; | ||
2898 | 509 | }//end switch | ||
2899 | 510 | if(c) | ||
2900 | 511 | { | ||
2901 | 512 | if(pattern[(*char_len) + 1] != '}') | ||
2902 | 513 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) ); | ||
2903 | 514 | (*char_len)++; | ||
2904 | 515 | (*char_len)++; | ||
2905 | 516 | return new CRegexXQuery_multicharP(current_regex, c, is_reverse); | ||
2906 | 517 | } | ||
2907 | 518 | if(pattern[*char_len] == 'I') | ||
2908 | 519 | { | ||
2909 | 520 | if(pattern[(*char_len)+1] == 's')//IsBlock | ||
2910 | 521 | { | ||
2911 | 522 | *multichar_type = CHARGROUP_FLAGS_MULTICHAR_Is; | ||
2912 | 523 | (*char_len) += 2; | ||
2913 | 524 | zstring block_name; | ||
2914 | 525 | char tempc = pattern[(*char_len)]; | ||
2915 | 526 | while(tempc && (tempc != '}')) | ||
2916 | 527 | { | ||
2917 | 528 | if(!myisletterAZ(tempc) && !myisdigit(tempc) && (tempc != '-')) | ||
2918 | 529 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) ); | ||
2919 | 530 | block_name.append(1, tempc); | ||
2920 | 531 | (*char_len)++; | ||
2921 | 532 | tempc = pattern[(*char_len)]; | ||
2922 | 533 | } | ||
2923 | 534 | if(!tempc) | ||
2924 | 535 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) ); | ||
2925 | 536 | //search for the block name | ||
2926 | 537 | int i; | ||
2927 | 538 | int nr_blocks = sizeof(block_escape)/sizeof(CRegexXQuery_parser::block_escape_t); | ||
2928 | 539 | for(i=0;i<nr_blocks;i++) | ||
2929 | 540 | { | ||
2930 | 541 | if(compare_ascii_i(block_name.c_str(), block_escape[i].group_name)) | ||
2931 | 542 | { | ||
2932 | 543 | c = i; | ||
2933 | 544 | break; | ||
2934 | 545 | } | ||
2935 | 546 | } | ||
2936 | 547 | if(i==nr_blocks) | ||
2937 | 548 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PIs_CONSTRUCT)) ); | ||
2938 | 229 | (*char_len)++; | 549 | (*char_len)++; |
2941 | 230 | } | 550 | return new CRegexXQuery_multicharIs(current_regex, i, is_reverse); |
2942 | 231 | break; | 551 | } |
2943 | 552 | else | ||
2944 | 553 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) ); | ||
2945 | 554 | } | ||
2946 | 555 | else | ||
2947 | 556 | { | ||
2948 | 557 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) ); | ||
2949 | 558 | } | ||
2950 | 559 | break;//unreachable | ||
2951 | 560 | }//end case 'p' | ||
2952 | 232 | //multiCharEsc | 561 | //multiCharEsc |
2953 | 233 | case 's': | 562 | case 's': |
2954 | 234 | case 'S': | 563 | case 'S': |
2955 | @@ -240,40 +569,104 @@ | |||
2956 | 240 | case 'D': | 569 | case 'D': |
2957 | 241 | case 'w': | 570 | case 'w': |
2958 | 242 | case 'W': | 571 | case 'W': |
2960 | 243 | *is_multichar = true; | 572 | *multichar_type = CHARGROUP_FLAGS_MULTICHAR_OTHER; |
2961 | 244 | c = pattern[*char_len]; | 573 | c = pattern[*char_len]; |
2976 | 245 | break; | 574 | (*char_len)++; |
2977 | 246 | } | 575 | return new CRegexXQuery_multicharOther(current_regex, c); |
2978 | 247 | break; | 576 | case 'u'://unicode codepoint \uXXXX |
2979 | 248 | } | 577 | { |
2980 | 249 | case '#':///might be #xXX | 578 | unicode::code_point utf8c = 0; |
2981 | 250 | { | 579 | (*char_len)++; |
2982 | 251 | if((pattern[*char_len+1] == 'x') && | 580 | for(int i=0;i<4;i++) |
2983 | 252 | myishex(pattern[*char_len+2]) && myishex(pattern[*char_len+3])) | 581 | { |
2984 | 253 | { | 582 | char hex = myishex(pattern[*char_len]); |
2985 | 254 | c = (myishex(pattern[*char_len+2])-1)<<4 | (myishex(pattern[*char_len+3])-1); | 583 | if(!hex) |
2986 | 255 | *char_len += 3; | 584 | { |
2987 | 256 | break; | 585 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_UNICODE_CODEPOINT_u)) ); |
2988 | 257 | } | 586 | } |
2989 | 258 | } | 587 | utf8c <<= 4; |
2990 | 588 | utf8c |= (hex-1) & 0x0f; | ||
2991 | 589 | (*char_len)++; | ||
2992 | 590 | } | ||
2993 | 591 | return create_charmatch(utf8c, NULL, 0, multichar_type); | ||
2994 | 592 | } | ||
2995 | 593 | case 'U'://unicode codepoint \UXXXXXXXX | ||
2996 | 594 | { | ||
2997 | 595 | unicode::code_point utf8c = 0; | ||
2998 | 596 | (*char_len)++; | ||
2999 | 597 | for(int i=0;i<8;i++) | ||
3000 | 598 | { | ||
3001 | 599 | char hex = myishex(pattern[*char_len]); | ||
3002 | 600 | if(!hex) | ||
3003 | 601 | { | ||
3004 | 602 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_UNICODE_CODEPOINT_u)) ); | ||
3005 | 603 | } | ||
3006 | 604 | utf8c <<= 4; | ||
3007 | 605 | utf8c |= (hex-1) & 0x0f; | ||
3008 | 606 | (*char_len)++; | ||
3009 | 607 | } | ||
3010 | 608 | return create_charmatch(utf8c, NULL, 0, multichar_type); | ||
3011 | 609 | } | ||
3012 | 610 | default: | ||
3013 | 611 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_ESC_CHAR)) ); | ||
3014 | 612 | } | ||
3015 | 613 | assert(false); | ||
3016 | 614 | break;//unreachable | ||
3017 | 615 | }//end case '\' | ||
3018 | 259 | default: | 616 | default: |
3030 | 260 | c = pattern[*char_len]; | 617 | { |
3031 | 261 | break; | 618 | const char *temp_pattern = pattern; |
3032 | 262 | } | 619 | unicode::code_point utf8c = utf8::next_char(temp_pattern); |
3033 | 263 | 620 | (*char_len) = temp_pattern - pattern; | |
3034 | 264 | (*char_len)++; | 621 | return create_charmatch(utf8c, pattern, *char_len, multichar_type); |
3035 | 265 | return c; | 622 | } |
3036 | 266 | } | 623 | } |
3037 | 267 | 624 | return NULL; | |
3038 | 268 | 625 | } | |
3039 | 269 | 626 | ||
3040 | 270 | IRegexAtom* CRegexAscii_parser::read_atom(const char *pattern, int *atom_len) | 627 | CRegexXQuery_charmatch *CRegexXQuery_parser::create_charmatch(unicode::code_point utf8c, |
3041 | 628 | const char *pattern, int utf8len, | ||
3042 | 629 | enum CHARGROUP_t *multichar_type) | ||
3043 | 630 | { | ||
3044 | 631 | if(utf8c <= 0x7F) | ||
3045 | 632 | { | ||
3046 | 633 | *multichar_type = CHARGROUP_FLAGS_ONECHAR_ASCII; | ||
3047 | 634 | if(flags & REGEX_ASCII_CASE_INSENSITIVE) | ||
3048 | 635 | return new CRegexXQuery_char_ascii_i(current_regex, (char)utf8c); | ||
3049 | 636 | else | ||
3050 | 637 | return new CRegexXQuery_char_ascii(current_regex, (char)utf8c); | ||
3051 | 638 | } | ||
3052 | 639 | else | ||
3053 | 640 | { | ||
3054 | 641 | *multichar_type = CHARGROUP_FLAGS_ONECHAR_UNICODE; | ||
3055 | 642 | if(flags & REGEX_ASCII_CASE_INSENSITIVE) | ||
3056 | 643 | return new CRegexXQuery_char_unicode_i(current_regex, utf8c); | ||
3057 | 644 | else | ||
3058 | 645 | { | ||
3059 | 646 | if(pattern) | ||
3060 | 647 | return new CRegexXQuery_char_unicode(current_regex, pattern, utf8len); | ||
3061 | 648 | else | ||
3062 | 649 | return new CRegexXQuery_char_unicode_cp(current_regex, utf8c); | ||
3063 | 650 | } | ||
3064 | 651 | } | ||
3065 | 652 | } | ||
3066 | 653 | |||
3067 | 654 | IRegexAtom* CRegexXQuery_parser::read_atom(const char *pattern, int *atom_len) | ||
3068 | 271 | { | 655 | { |
3069 | 272 | *atom_len = 0; | 656 | *atom_len = 0; |
3074 | 273 | char c; | 657 | if(flags & REGEX_ASCII_LITERAL) |
3075 | 274 | bool is_end_line = false; | 658 | { |
3076 | 275 | c = pattern[*atom_len]; | 659 | unicode::code_point utf8c; |
3077 | 276 | if((!(flags & REGEX_ASCII_LITERAL)) && (c == '\\')) | 660 | //bool is_end_line = false; |
3078 | 661 | const char *temp_pattern = pattern; | ||
3079 | 662 | utf8c = utf8::next_char(temp_pattern); | ||
3080 | 663 | *atom_len = temp_pattern - pattern; | ||
3081 | 664 | enum CHARGROUP_t multichar_type; | ||
3082 | 665 | return create_charmatch(utf8c, pattern, *atom_len, &multichar_type); | ||
3083 | 666 | } | ||
3084 | 667 | |||
3085 | 668 | char c = *pattern; | ||
3086 | 669 | if(c == '\\') | ||
3087 | 277 | { | 670 | { |
3088 | 278 | //check for back reference | 671 | //check for back reference |
3089 | 279 | if(myisdigit(pattern[(*atom_len)+1])) | 672 | if(myisdigit(pattern[(*atom_len)+1])) |
3090 | @@ -281,13 +674,13 @@ | |||
3091 | 281 | (*atom_len)++; | 674 | (*atom_len)++; |
3092 | 282 | if(pattern[*atom_len] == '0') | 675 | if(pattern[*atom_len] == '0') |
3093 | 283 | { | 676 | { |
3095 | 284 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_INVALID_BACK_REF)) ); | 677 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_BACK_REF), 0, current_regex->subregex.size()) ); |
3096 | 285 | } | 678 | } |
3097 | 286 | unsigned int backref = pattern[*atom_len] - '0'; | 679 | unsigned int backref = pattern[*atom_len] - '0'; |
3098 | 287 | if((backref > current_regex->subregex.size()) || | 680 | if((backref > current_regex->subregex.size()) || |
3099 | 288 | (current_regex->subregex.at(backref-1)->flags != 0)) | 681 | (current_regex->subregex.at(backref-1)->flags != 0)) |
3100 | 289 | { | 682 | { |
3102 | 290 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_INVALID_BACK_REF)) ); | 683 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_BACK_REF), backref, current_regex->subregex.size()) ); |
3103 | 291 | } | 684 | } |
3104 | 292 | while(current_regex->subregex.size() >= backref*10) | 685 | while(current_regex->subregex.size() >= backref*10) |
3105 | 293 | { | 686 | { |
3106 | @@ -303,70 +696,86 @@ | |||
3107 | 303 | break; | 696 | break; |
3108 | 304 | } | 697 | } |
3109 | 305 | } | 698 | } |
3111 | 306 | return new CRegexAscii_backref(current_regex, backref); | 699 | (*atom_len)++; |
3112 | 700 | return new CRegexXQuery_backref(current_regex, backref); | ||
3113 | 307 | } | 701 | } |
3114 | 308 | } | 702 | } |
3115 | 703 | if(c == '^') | ||
3116 | 704 | { | ||
3117 | 705 | (*atom_len)++; | ||
3118 | 706 | return new CRegexXQuery_pinstart(current_regex); | ||
3119 | 707 | } | ||
3120 | 708 | if((c == '}') || (c == '{') || (c == '?') || (c == '*') || (c == '+') || (c == '|')) | ||
3121 | 709 | { | ||
3122 | 710 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_ATOM_CHAR), c) ); | ||
3123 | 711 | } | ||
3124 | 309 | switch(c) | 712 | switch(c) |
3125 | 310 | { | 713 | { |
3126 | 311 | case '[': | 714 | case '[': |
3127 | 312 | { | 715 | { |
3137 | 313 | if(!(flags & REGEX_ASCII_LITERAL)) | 716 | (*atom_len)++; |
3138 | 314 | { | 717 | CRegexXQuery_chargroup *chargroup = NULL; |
3139 | 315 | (*atom_len)++; | 718 | int chargroup_len; |
3140 | 316 | CRegexAscii_chargroup *chargroup = NULL; | 719 | chargroup = readchargroup(pattern+*atom_len, &chargroup_len); |
3141 | 317 | int chargroup_len; | 720 | *atom_len += chargroup_len; |
3142 | 318 | chargroup = readchargroup(pattern+*atom_len, &chargroup_len); | 721 | return chargroup; |
3134 | 319 | *atom_len += chargroup_len; | ||
3135 | 320 | return chargroup; | ||
3136 | 321 | } | ||
3143 | 322 | } | 722 | } |
3144 | 323 | case '.'://WildCharEsc | 723 | case '.'://WildCharEsc |
3145 | 324 | { | 724 | { |
3152 | 325 | if(!(flags & REGEX_ASCII_LITERAL)) | 725 | (*atom_len)++; |
3153 | 326 | { | 726 | return new CRegexXQuery_wildchar(current_regex); |
3148 | 327 | CRegexAscii_wildchar *wildchar = new CRegexAscii_wildchar(current_regex); | ||
3149 | 328 | (*atom_len)++; | ||
3150 | 329 | return wildchar; | ||
3151 | 330 | } | ||
3154 | 331 | } | 727 | } |
3155 | 332 | case '('://begin an embedded reg exp | 728 | case '('://begin an embedded reg exp |
3156 | 333 | { | 729 | { |
3166 | 334 | if(!(flags & REGEX_ASCII_LITERAL)) | 730 | (*atom_len)++; |
3167 | 335 | { | 731 | CRegexXQuery_regex *emb_regex = NULL; |
3168 | 336 | (*atom_len)++; | 732 | int regex_len; |
3169 | 337 | CRegexAscii_regex *emb_regex = NULL; | 733 | emb_regex = parse_regexp(pattern + *atom_len, ®ex_len); |
3170 | 338 | int regex_len; | 734 | *atom_len += regex_len; |
3171 | 339 | emb_regex = parse_regexp(pattern + *atom_len, ®ex_len); | 735 | return emb_regex; |
3163 | 340 | *atom_len += regex_len; | ||
3164 | 341 | return emb_regex; | ||
3165 | 342 | } | ||
3172 | 343 | } | 736 | } |
3173 | 344 | case '$'://end line | 737 | case '$'://end line |
3178 | 345 | if(!(flags & REGEX_ASCII_LITERAL)) | 738 | //is_end_line = true; |
3179 | 346 | { | 739 | (*atom_len)++; |
3180 | 347 | is_end_line = true; | 740 | return new CRegexXQuery_endline(current_regex); |
3177 | 348 | } | ||
3181 | 349 | default: | 741 | default: |
3182 | 350 | { | 742 | { |
3184 | 351 | char c; | 743 | //char c; |
3185 | 744 | CRegexXQuery_charmatch *charmatch = NULL; | ||
3186 | 352 | int c_len; | 745 | int c_len; |
3191 | 353 | bool is_multichar = false; | 746 | CHARGROUP_t multichar_type = CHARGROUP_NO_MULTICHAR; |
3192 | 354 | if(!(flags & REGEX_ASCII_LITERAL)) | 747 | *atom_len = 0; |
3193 | 355 | c = readChar(pattern+*atom_len, &c_len, &is_multichar); | 748 | while(pattern[*atom_len]) |
3190 | 356 | else | ||
3194 | 357 | { | 749 | { |
3197 | 358 | c = pattern[*atom_len]; | 750 | charmatch = readChar(pattern+*atom_len, &c_len, &multichar_type); |
3198 | 359 | c_len = 1; | 751 | *atom_len += c_len; |
3199 | 752 | if((flags & REGEX_ASCII_NO_WHITESPACE) && (multichar_type == CHARGROUP_FLAGS_ONECHAR_ASCII)) | ||
3200 | 753 | { | ||
3201 | 754 | char c = (char)charmatch->get_c(); | ||
3202 | 755 | if((c == ' ') || (c == '\t') || (c == '\r') || (c == '\n')) | ||
3203 | 756 | { | ||
3204 | 757 | //ignore this whitespace | ||
3205 | 758 | delete charmatch; | ||
3206 | 759 | continue; | ||
3207 | 760 | } | ||
3208 | 761 | else | ||
3209 | 762 | break; | ||
3210 | 763 | } | ||
3211 | 764 | else | ||
3212 | 765 | break; | ||
3213 | 360 | } | 766 | } |
3217 | 361 | CRegexAscii_chargroup *chargroup = new CRegexAscii_chargroup(current_regex); | 767 | /* |
3218 | 362 | if(is_multichar) | 768 | std::auto_ptr<CRegexXQuery_chargroup> chargroup(new CRegexXQuery_chargroup(current_regex)); |
3219 | 363 | chargroup->addMultiChar(c); | 769 | if(multichar_type) |
3220 | 770 | chargroup->addMultiChar(c, multichar_type); | ||
3221 | 364 | else if(is_end_line) | 771 | else if(is_end_line) |
3222 | 365 | chargroup->addEndLine(); | 772 | chargroup->addEndLine(); |
3223 | 366 | else | 773 | else |
3225 | 367 | chargroup->addCharRange(c, c); | 774 | chargroup->addOneChar(c); |
3226 | 368 | *atom_len += c_len; | 775 | *atom_len += c_len; |
3228 | 369 | return chargroup; | 776 | return chargroup.release(); |
3229 | 777 | */ | ||
3230 | 778 | return charmatch; | ||
3231 | 370 | } | 779 | } |
3232 | 371 | } | 780 | } |
3233 | 372 | } | 781 | } |
3234 | @@ -374,81 +783,119 @@ | |||
3235 | 374 | //read until ']' | 783 | //read until ']' |
3236 | 375 | //posCharGroup ::= ( charRange | charClassEsc )+ | 784 | //posCharGroup ::= ( charRange | charClassEsc )+ |
3237 | 376 | //charRange ::= seRange | XmlCharIncDash | 785 | //charRange ::= seRange | XmlCharIncDash |
3239 | 377 | CRegexAscii_chargroup* CRegexAscii_parser::readchargroup(const char *pattern, int *chargroup_len) | 786 | CRegexXQuery_chargroup* CRegexXQuery_parser::readchargroup(const char *pattern, int *chargroup_len) |
3240 | 378 | { | 787 | { |
3242 | 379 | CRegexAscii_chargroup *chargroup = NULL; | 788 | std::auto_ptr<CRegexXQuery_chargroup> chargroup; |
3243 | 380 | *chargroup_len = 0; | 789 | *chargroup_len = 0; |
3244 | 381 | if(pattern[*chargroup_len] == '^')//negative group | 790 | if(pattern[*chargroup_len] == '^')//negative group |
3245 | 382 | { | 791 | { |
3246 | 383 | (*chargroup_len)++; | 792 | (*chargroup_len)++; |
3248 | 384 | chargroup = new CRegexAscii_negchargroup(current_regex); | 793 | chargroup.reset(new CRegexXQuery_negchargroup(current_regex)); |
3249 | 385 | } | 794 | } |
3250 | 386 | else | 795 | else |
3252 | 387 | chargroup = new CRegexAscii_chargroup(current_regex); | 796 | chargroup.reset(new CRegexXQuery_chargroup(current_regex)); |
3253 | 388 | while(pattern[*chargroup_len] && (pattern[*chargroup_len]!=']')) | 797 | while(pattern[*chargroup_len] && (pattern[*chargroup_len]!=']')) |
3254 | 389 | { | 798 | { |
3257 | 390 | char c1, c2; | 799 | //char c1, c2; |
3258 | 391 | bool is_multichar; | 800 | CHARGROUP_t multichar_type = CHARGROUP_NO_MULTICHAR; |
3259 | 392 | int c1_len; | 801 | int c1_len; |
3263 | 393 | c1 = pattern[*chargroup_len]; | 802 | if((pattern[*chargroup_len] == '-') && (pattern[(*chargroup_len)+1] == '['))//charClassSub |
3261 | 394 | c2 = pattern[*chargroup_len+1]; | ||
3262 | 395 | if((c1 == '-') && (c2 == '['))//charClassSub | ||
3264 | 396 | { | 803 | { |
3265 | 397 | int classsub_len; | 804 | int classsub_len; |
3267 | 398 | CRegexAscii_chargroup *classsub = readchargroup(pattern + *chargroup_len+1 + 1, &classsub_len); | 805 | CRegexXQuery_chargroup *classsub = readchargroup(pattern + (*chargroup_len)+1 + 1, &classsub_len); |
3268 | 399 | if(!classsub) | 806 | if(!classsub) |
3269 | 400 | { | 807 | { |
3272 | 401 | delete chargroup; | 808 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_SUBCLASS)) ); |
3271 | 402 | return NULL; | ||
3273 | 403 | } | 809 | } |
3274 | 404 | chargroup->addClassSub(classsub); | 810 | chargroup->addClassSub(classsub); |
3275 | 405 | *chargroup_len += 2 + classsub_len + 1; | 811 | *chargroup_len += 2 + classsub_len + 1; |
3276 | 406 | if(pattern[*chargroup_len-1] != ']') | 812 | if(pattern[*chargroup_len-1] != ']') |
3277 | 407 | { | 813 | { |
3280 | 408 | delete chargroup; | 814 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_USE_OF_SUBCLASS)) ); |
3279 | 409 | return NULL; | ||
3281 | 410 | } | 815 | } |
3283 | 411 | return chargroup; | 816 | return chargroup.release(); |
3284 | 412 | } | 817 | } |
3285 | 413 | 818 | ||
3288 | 414 | c1 = readChar(pattern+*chargroup_len, &c1_len, &is_multichar); | 819 | std::unique_ptr<CRegexXQuery_charmatch> charmatch(readChar(pattern+*chargroup_len, &c1_len, &multichar_type)); |
3289 | 415 | if(is_multichar)//first char is multichar | 820 | if((multichar_type == CHARGROUP_FLAGS_MULTICHAR_p) || |
3290 | 821 | (multichar_type == CHARGROUP_FLAGS_MULTICHAR_Is) || | ||
3291 | 822 | (multichar_type == CHARGROUP_FLAGS_MULTICHAR_OTHER))//first char is multichar | ||
3292 | 416 | { | 823 | { |
3294 | 417 | chargroup->addMultiChar(c1); | 824 | if((pattern[*chargroup_len+c1_len] == '-') &&///should not be a range |
3295 | 825 | (pattern[*chargroup_len+c1_len+1] != ']')) | ||
3296 | 826 | { | ||
3297 | 827 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MULTICHAR_IN_CHAR_RANGE)) ); | ||
3298 | 828 | } | ||
3299 | 829 | //chargroup->addMultiChar(c1, multichar_type); | ||
3300 | 830 | chargroup->addCharMatch(charmatch.release()); | ||
3301 | 418 | *chargroup_len += c1_len; | 831 | *chargroup_len += c1_len; |
3302 | 419 | continue; | 832 | continue; |
3303 | 420 | } | 833 | } |
3305 | 421 | if(pattern[*chargroup_len+c1_len] == '-')///might be a range | 834 | (*chargroup_len) += c1_len; |
3306 | 835 | if(pattern[*chargroup_len] == '-')///might be a range | ||
3307 | 422 | { | 836 | { |
3309 | 423 | if(pattern[*chargroup_len+c1_len+1] == ']')//no range, just the last char is '-' | 837 | if(pattern[(*chargroup_len)+1] == ']')//no range, just the last char is '-' |
3310 | 424 | { | 838 | { |
3314 | 425 | chargroup->addCharRange(c1, c1); | 839 | //chargroup->addOneChar(c1); |
3315 | 426 | chargroup->addCharRange('-', '-'); | 840 | //chargroup->addOneChar('-'); |
3316 | 427 | *chargroup_len += c1_len + 1; | 841 | chargroup->addCharMatch(charmatch.release()); |
3317 | 842 | chargroup->addCharMatch(new CRegexXQuery_char_ascii(current_regex, '-')); | ||
3318 | 843 | (*chargroup_len)++; | ||
3319 | 428 | continue; | 844 | continue; |
3320 | 429 | } | 845 | } |
3322 | 430 | else | 846 | else if(pattern[(*chargroup_len)+1] != '[') |
3323 | 431 | { | 847 | { |
3324 | 432 | //it is a range | 848 | //it is a range |
3332 | 433 | char c3; | 849 | (*chargroup_len)++; |
3333 | 434 | int c3_len; | 850 | std::unique_ptr<CRegexXQuery_charmatch> charmatch2; |
3334 | 435 | c3 = readChar(pattern+*chargroup_len+c1_len+1, &c3_len, &is_multichar); | 851 | CHARGROUP_t multichar_type2 = CHARGROUP_NO_MULTICHAR; |
3335 | 436 | if(is_multichar) | 852 | int c2_len; |
3336 | 437 | return NULL;//error | 853 | charmatch2.reset(readChar(pattern+(*chargroup_len), &c2_len, &multichar_type2)); |
3337 | 438 | chargroup->addCharRange(c1, c3); | 854 | if((multichar_type2 != CHARGROUP_FLAGS_ONECHAR_ASCII) && |
3338 | 439 | *chargroup_len += c1_len + 1 + c3_len; | 855 | (multichar_type2 != CHARGROUP_FLAGS_ONECHAR_ASCII))//second char in range is multichar |
3339 | 856 | { | ||
3340 | 857 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MULTICHAR_IN_CHAR_RANGE)) ); | ||
3341 | 858 | } | ||
3342 | 859 | //chargroup->addCharRange(c1, c3); | ||
3343 | 860 | if((multichar_type == CHARGROUP_FLAGS_ONECHAR_ASCII) && (multichar_type2 == CHARGROUP_FLAGS_ONECHAR_ASCII)) | ||
3344 | 861 | { | ||
3345 | 862 | if(flags & REGEX_ASCII_CASE_INSENSITIVE) | ||
3346 | 863 | chargroup->addCharMatch(new CRegexXQuery_char_range_ascii_i(current_regex, | ||
3347 | 864 | (char)charmatch->get_c(), | ||
3348 | 865 | (char)charmatch2->get_c())); | ||
3349 | 866 | else | ||
3350 | 867 | chargroup->addCharMatch(new CRegexXQuery_char_range_ascii(current_regex, | ||
3351 | 868 | (char)charmatch->get_c(), | ||
3352 | 869 | (char)charmatch2->get_c())); | ||
3353 | 870 | } | ||
3354 | 871 | else | ||
3355 | 872 | { | ||
3356 | 873 | if(flags & REGEX_ASCII_CASE_INSENSITIVE) | ||
3357 | 874 | chargroup->addCharMatch(new CRegexXQuery_char_range_unicode_i(current_regex, | ||
3358 | 875 | charmatch->get_c(), | ||
3359 | 876 | charmatch2->get_c())); | ||
3360 | 877 | else | ||
3361 | 878 | chargroup->addCharMatch(new CRegexXQuery_char_range_unicode(current_regex, | ||
3362 | 879 | charmatch->get_c(), | ||
3363 | 880 | charmatch2->get_c())); | ||
3364 | 881 | } | ||
3365 | 882 | *chargroup_len += c2_len; | ||
3366 | 440 | continue; | 883 | continue; |
3367 | 441 | } | 884 | } |
3368 | 442 | } | 885 | } |
3371 | 443 | chargroup->addCharRange(c1, c1); | 886 | //chargroup->addOneChar(c1); |
3372 | 444 | *chargroup_len += c1_len; | 887 | chargroup->addCharMatch(charmatch.release()); |
3373 | 445 | } | 888 | } |
3374 | 446 | if(pattern[*chargroup_len]) | 889 | if(pattern[*chargroup_len]) |
3375 | 447 | (*chargroup_len)++; | 890 | (*chargroup_len)++; |
3377 | 448 | return chargroup; | 891 | else |
3378 | 892 | { | ||
3379 | 893 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MISSING_CLOSE_BRACKET)) ); | ||
3380 | 894 | } | ||
3381 | 895 | return chargroup.release(); | ||
3382 | 449 | } | 896 | } |
3383 | 450 | 897 | ||
3385 | 451 | void CRegexAscii_parser::read_quantifier(CRegexAscii_piece *piece, | 898 | void CRegexXQuery_parser::read_quantifier(CRegexXQuery_piece *piece, |
3386 | 452 | const char *pattern, int *quantif_len) | 899 | const char *pattern, int *quantif_len) |
3387 | 453 | { | 900 | { |
3388 | 454 | *quantif_len = 0; | 901 | *quantif_len = 0; |
3389 | @@ -496,6 +943,10 @@ | |||
3390 | 496 | max = max*10 + pattern[*quantif_len] - '0'; | 943 | max = max*10 + pattern[*quantif_len] - '0'; |
3391 | 497 | (*quantif_len)++; | 944 | (*quantif_len)++; |
3392 | 498 | } | 945 | } |
3393 | 946 | if(max < min) | ||
3394 | 947 | { | ||
3395 | 948 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MAX_LT_MIN)) ); | ||
3396 | 949 | } | ||
3397 | 499 | piece->set_quantifier_min_max(min, max, true); | 950 | piece->set_quantifier_min_max(min, max, true); |
3398 | 500 | } | 951 | } |
3399 | 501 | while(pattern[*quantif_len] && (pattern[*quantif_len] != '}')) | 952 | while(pattern[*quantif_len] && (pattern[*quantif_len] != '}')) |
3400 | @@ -524,23 +975,25 @@ | |||
3401 | 524 | ///Constructors and destructors and internal functions | 975 | ///Constructors and destructors and internal functions |
3402 | 525 | //////////////////////////// | 976 | //////////////////////////// |
3403 | 526 | 977 | ||
3405 | 527 | CRegexAscii_regex::CRegexAscii_regex(CRegexAscii_regex *topregex) : IRegexAtom(topregex?topregex:this) | 978 | CRegexXQuery_regex::CRegexXQuery_regex(CRegexXQuery_regex *topregex) : IRegexAtom(topregex?topregex:this) |
3406 | 528 | { | 979 | { |
3407 | 529 | matched_source = NULL; | 980 | matched_source = NULL; |
3408 | 530 | matched_len = 0; | 981 | matched_len = 0; |
3409 | 982 | // backup_matched_source = NULL; | ||
3410 | 983 | // backup_matched_len = 0; | ||
3411 | 531 | flags = 128;//set to 0 after initialization | 984 | flags = 128;//set to 0 after initialization |
3412 | 532 | } | 985 | } |
3413 | 533 | 986 | ||
3415 | 534 | CRegexAscii_regex::~CRegexAscii_regex() | 987 | CRegexXQuery_regex::~CRegexXQuery_regex() |
3416 | 535 | { | 988 | { |
3418 | 536 | std::list<CRegexAscii_branch*>::iterator branch_it; | 989 | std::list<CRegexXQuery_branch*>::iterator branch_it; |
3419 | 537 | 990 | ||
3420 | 538 | for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++) | 991 | for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++) |
3421 | 539 | { | 992 | { |
3422 | 540 | delete (*branch_it); | 993 | delete (*branch_it); |
3423 | 541 | } | 994 | } |
3424 | 542 | /* | 995 | /* |
3426 | 543 | std::vector<CRegexAscii_regex*>::iterator subregex_it; | 996 | std::vector<CRegexXQuery_regex*>::iterator subregex_it; |
3427 | 544 | for(subregex_it = subregex.begin(); subregex_it != subregex.end(); subregex_it++) | 997 | for(subregex_it = subregex.begin(); subregex_it != subregex.end(); subregex_it++) |
3428 | 545 | { | 998 | { |
3429 | 546 | delete (*subregex_it); | 999 | delete (*subregex_it); |
3430 | @@ -548,25 +1001,18 @@ | |||
3431 | 548 | */ | 1001 | */ |
3432 | 549 | } | 1002 | } |
3433 | 550 | 1003 | ||
3442 | 551 | bool CRegexAscii_regex::set_align_begin(bool align_begin) | 1004 | void CRegexXQuery_regex::add_branch(CRegexXQuery_branch *branch) |
3435 | 552 | { | ||
3436 | 553 | bool prev_align = this->align_begin; | ||
3437 | 554 | this->align_begin = align_begin; | ||
3438 | 555 | return prev_align; | ||
3439 | 556 | } | ||
3440 | 557 | |||
3441 | 558 | void CRegexAscii_regex::add_branch(CRegexAscii_branch *branch) | ||
3443 | 559 | { | 1005 | { |
3444 | 560 | branch_list.push_back(branch); | 1006 | branch_list.push_back(branch); |
3445 | 561 | } | 1007 | } |
3446 | 562 | 1008 | ||
3448 | 563 | bool CRegexAscii_regex::get_indexed_match(int index, | 1009 | bool CRegexXQuery_regex::get_indexed_match(int index, |
3449 | 564 | const char **matched_source, | 1010 | const char **matched_source, |
3450 | 565 | int *matched_len) | 1011 | int *matched_len) |
3451 | 566 | { | 1012 | { |
3452 | 567 | if(!index || index > (int)subregex.size()) | 1013 | if(!index || index > (int)subregex.size()) |
3453 | 568 | return false; | 1014 | return false; |
3455 | 569 | CRegexAscii_regex *subr = subregex[index-1]; | 1015 | CRegexXQuery_regex *subr = subregex[index-1]; |
3456 | 570 | *matched_source = subr->matched_source; | 1016 | *matched_source = subr->matched_source; |
3457 | 571 | if(!*matched_source) | 1017 | if(!*matched_source) |
3458 | 572 | return false; | 1018 | return false; |
3459 | @@ -574,145 +1020,209 @@ | |||
3460 | 574 | return true; | 1020 | return true; |
3461 | 575 | } | 1021 | } |
3462 | 576 | 1022 | ||
3464 | 577 | unsigned int CRegexAscii_regex::get_indexed_regex_count() | 1023 | unsigned int CRegexXQuery_regex::get_indexed_regex_count() |
3465 | 578 | { | 1024 | { |
3466 | 579 | return subregex.size(); | 1025 | return subregex.size(); |
3467 | 580 | } | 1026 | } |
3468 | 581 | 1027 | ||
3471 | 582 | CRegexAscii_branch::CRegexAscii_branch(CRegexAscii_regex* regex) : | 1028 | CRegexXQuery_branch::CRegexXQuery_branch(CRegexXQuery_regex* regex) |
3472 | 583 | IRegexMatcher(regex) | 1029 | //: |
3473 | 1030 | //IRegexMatcher(regex) | ||
3474 | 584 | { | 1031 | { |
3475 | 585 | } | 1032 | } |
3476 | 586 | 1033 | ||
3478 | 587 | CRegexAscii_branch::~CRegexAscii_branch() | 1034 | CRegexXQuery_branch::~CRegexXQuery_branch() |
3479 | 588 | { | 1035 | { |
3481 | 589 | std::list<CRegexAscii_piece*>::iterator piece_it; | 1036 | std::list<RegexAscii_pieceinfo>::iterator piece_it; |
3482 | 590 | 1037 | ||
3483 | 591 | for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++) | 1038 | for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++) |
3484 | 592 | { | 1039 | { |
3486 | 593 | delete (*piece_it); | 1040 | delete (*piece_it).piece; |
3487 | 594 | } | 1041 | } |
3488 | 595 | } | 1042 | } |
3489 | 596 | 1043 | ||
3491 | 597 | void CRegexAscii_branch::add_piece(CRegexAscii_piece *piece) | 1044 | void CRegexXQuery_branch::add_piece(CRegexXQuery_piece *piece) |
3492 | 598 | { | 1045 | { |
3493 | 599 | piece_list.push_back(piece); | 1046 | piece_list.push_back(piece); |
3494 | 600 | } | 1047 | } |
3495 | 601 | 1048 | ||
3497 | 602 | CRegexAscii_piece::CRegexAscii_piece() | 1049 | CRegexXQuery_piece::CRegexXQuery_piece() |
3498 | 603 | { | 1050 | { |
3499 | 1051 | atom = NULL; | ||
3500 | 1052 | regex_atom = NULL; | ||
3501 | 604 | } | 1053 | } |
3502 | 605 | 1054 | ||
3504 | 606 | CRegexAscii_piece::~CRegexAscii_piece() | 1055 | CRegexXQuery_piece::~CRegexXQuery_piece() |
3505 | 607 | { | 1056 | { |
3506 | 608 | delete atom; | 1057 | delete atom; |
3507 | 609 | } | 1058 | } |
3508 | 610 | 1059 | ||
3510 | 611 | void CRegexAscii_piece::set_atom(IRegexAtom *atom) | 1060 | void CRegexXQuery_piece::set_atom(IRegexAtom *atom) |
3511 | 612 | { | 1061 | { |
3512 | 613 | this->atom = atom; | 1062 | this->atom = atom; |
3513 | 1063 | this->regex_atom = dynamic_cast<CRegexXQuery_regex*>(atom); | ||
3514 | 614 | } | 1064 | } |
3515 | 615 | 1065 | ||
3517 | 616 | void CRegexAscii_piece::set_quantifier_min_max(int min, int max, bool strict_max) | 1066 | void CRegexXQuery_piece::set_quantifier_min_max(int min, int max, bool strict_max) |
3518 | 617 | { | 1067 | { |
3519 | 618 | this->min = min; | 1068 | this->min = min; |
3520 | 619 | this->max = max; | 1069 | this->max = max; |
3521 | 620 | this->strict_max = strict_max; | 1070 | this->strict_max = strict_max; |
3522 | 621 | } | 1071 | } |
3524 | 622 | void CRegexAscii_piece::set_is_reluctant(bool is_reluctant) | 1072 | void CRegexXQuery_piece::set_is_reluctant(bool is_reluctant) |
3525 | 623 | { | 1073 | { |
3526 | 624 | this->is_reluctant = is_reluctant; | 1074 | this->is_reluctant = is_reluctant; |
3527 | 625 | } | 1075 | } |
3529 | 626 | void CRegexAscii_piece::get_quantifier(int *min, int *max, bool *strict_max) | 1076 | void CRegexXQuery_piece::get_quantifier(int *min, int *max, bool *strict_max) |
3530 | 627 | { | 1077 | { |
3531 | 628 | *min = this->min; | 1078 | *min = this->min; |
3532 | 629 | *max = this->max; | 1079 | *max = this->max; |
3533 | 630 | *strict_max = this->strict_max; | 1080 | *strict_max = this->strict_max; |
3534 | 631 | } | 1081 | } |
3536 | 632 | bool CRegexAscii_piece::get_is_reluctant() | 1082 | bool CRegexXQuery_piece::get_is_reluctant() |
3537 | 633 | { | 1083 | { |
3538 | 1084 | if(atom->regex_intern->flags & REGEX_ASCII_MINIMAL_MATCH) | ||
3539 | 1085 | return true; | ||
3540 | 634 | return is_reluctant; | 1086 | return is_reluctant; |
3541 | 635 | } | 1087 | } |
3542 | 636 | 1088 | ||
3543 | 637 | 1089 | ||
3545 | 638 | CRegexAscii_chargroup::CRegexAscii_chargroup(CRegexAscii_regex* regex) : | 1090 | CRegexXQuery_charmatch::CRegexXQuery_charmatch(CRegexXQuery_regex* regex) : |
3546 | 1091 | IRegexAtom(regex) | ||
3547 | 1092 | { | ||
3548 | 1093 | } | ||
3549 | 1094 | CRegexXQuery_multicharP::CRegexXQuery_multicharP(CRegexXQuery_regex* regex, char type, bool is_reverse) : | ||
3550 | 1095 | CRegexXQuery_charmatch(regex) | ||
3551 | 1096 | { | ||
3552 | 1097 | this->multichar_type = type; this->is_reverse = is_reverse; | ||
3553 | 1098 | } | ||
3554 | 1099 | CRegexXQuery_multicharIs::CRegexXQuery_multicharIs(CRegexXQuery_regex* regex, int block_index, bool is_reverse) : | ||
3555 | 1100 | CRegexXQuery_charmatch(regex) | ||
3556 | 1101 | { | ||
3557 | 1102 | this->block_index = block_index; this->is_reverse = is_reverse; | ||
3558 | 1103 | } | ||
3559 | 1104 | CRegexXQuery_multicharOther::CRegexXQuery_multicharOther(CRegexXQuery_regex* regex, char type) : | ||
3560 | 1105 | CRegexXQuery_charmatch(regex) | ||
3561 | 1106 | { | ||
3562 | 1107 | this->multichar_type = type; | ||
3563 | 1108 | } | ||
3564 | 1109 | CRegexXQuery_char_ascii::CRegexXQuery_char_ascii(CRegexXQuery_regex* regex, char c) : | ||
3565 | 1110 | CRegexXQuery_charmatch(regex) | ||
3566 | 1111 | { | ||
3567 | 1112 | this->c = c; | ||
3568 | 1113 | } | ||
3569 | 1114 | CRegexXQuery_char_ascii_i::CRegexXQuery_char_ascii_i(CRegexXQuery_regex* regex, char c) : | ||
3570 | 1115 | CRegexXQuery_char_ascii(regex, toupper(c)) | ||
3571 | 1116 | { | ||
3572 | 1117 | } | ||
3573 | 1118 | CRegexXQuery_char_range_ascii::CRegexXQuery_char_range_ascii(CRegexXQuery_regex* regex, char c1, char c2) : | ||
3574 | 1119 | CRegexXQuery_charmatch(regex) | ||
3575 | 1120 | { | ||
3576 | 1121 | this->c1 = c1; this->c2 = c2; | ||
3577 | 1122 | } | ||
3578 | 1123 | CRegexXQuery_char_range_ascii_i::CRegexXQuery_char_range_ascii_i(CRegexXQuery_regex* regex, char c1, char c2) : | ||
3579 | 1124 | CRegexXQuery_char_range_ascii(regex, toupper(c1), toupper(c2)) | ||
3580 | 1125 | { | ||
3581 | 1126 | } | ||
3582 | 1127 | CRegexXQuery_char_unicode::CRegexXQuery_char_unicode(CRegexXQuery_regex* regex, const char *source, int len) : | ||
3583 | 1128 | CRegexXQuery_charmatch(regex) | ||
3584 | 1129 | { | ||
3585 | 1130 | this->len = len; | ||
3586 | 1131 | memcpy(c, source, len); | ||
3587 | 1132 | } | ||
3588 | 1133 | CRegexXQuery_char_unicode_cp::CRegexXQuery_char_unicode_cp(CRegexXQuery_regex* regex, unicode::code_point c) : | ||
3589 | 1134 | CRegexXQuery_charmatch(regex) | ||
3590 | 1135 | { | ||
3591 | 1136 | this->c = c; | ||
3592 | 1137 | } | ||
3593 | 1138 | CRegexXQuery_char_unicode_i::CRegexXQuery_char_unicode_i(CRegexXQuery_regex* regex, unicode::code_point c) : | ||
3594 | 1139 | CRegexXQuery_char_unicode_cp(regex, unicode::to_upper(c)) | ||
3595 | 1140 | { | ||
3596 | 1141 | } | ||
3597 | 1142 | CRegexXQuery_char_range_unicode::CRegexXQuery_char_range_unicode(CRegexXQuery_regex* regex, unicode::code_point c1, unicode::code_point c2) : | ||
3598 | 1143 | CRegexXQuery_charmatch(regex) | ||
3599 | 1144 | { | ||
3600 | 1145 | this->c1 = c1; this->c2 = c2; | ||
3601 | 1146 | } | ||
3602 | 1147 | CRegexXQuery_char_range_unicode_i::CRegexXQuery_char_range_unicode_i(CRegexXQuery_regex* regex, unicode::code_point c1, unicode::code_point c2) : | ||
3603 | 1148 | CRegexXQuery_char_range_unicode(regex, unicode::to_upper(c1), unicode::to_upper(c2)) | ||
3604 | 1149 | { | ||
3605 | 1150 | } | ||
3606 | 1151 | CRegexXQuery_endline::CRegexXQuery_endline(CRegexXQuery_regex* regex) : | ||
3607 | 1152 | CRegexXQuery_charmatch(regex) | ||
3608 | 1153 | { | ||
3609 | 1154 | } | ||
3610 | 1155 | |||
3611 | 1156 | unicode::code_point CRegexXQuery_char_unicode::get_c() | ||
3612 | 1157 | { | ||
3613 | 1158 | const char *temp_c = (const char*)c; | ||
3614 | 1159 | return utf8::next_char(temp_c); | ||
3615 | 1160 | } | ||
3616 | 1161 | |||
3617 | 1162 | |||
3618 | 1163 | CRegexXQuery_chargroup::CRegexXQuery_chargroup(CRegexXQuery_regex* regex) : | ||
3619 | 639 | IRegexAtom(regex) | 1164 | IRegexAtom(regex) |
3620 | 640 | { | 1165 | { |
3621 | 641 | classsub = NULL; | 1166 | classsub = NULL; |
3622 | 642 | } | 1167 | } |
3623 | 643 | 1168 | ||
3625 | 644 | CRegexAscii_chargroup::~CRegexAscii_chargroup() | 1169 | CRegexXQuery_chargroup::~CRegexXQuery_chargroup() |
3626 | 645 | { | 1170 | { |
3627 | 646 | delete classsub; | 1171 | delete classsub; |
3658 | 647 | } | 1172 | std::list<CRegexXQuery_charmatch* >::iterator charmatch_it; |
3659 | 648 | 1173 | for(charmatch_it=chargroup_list.begin(); charmatch_it != chargroup_list.end(); charmatch_it++) | |
3660 | 649 | void CRegexAscii_chargroup::addMultiChar(char c) | 1174 | delete (*charmatch_it); |
3661 | 650 | { | 1175 | } |
3662 | 651 | chargroup_t cgt; | 1176 | |
3663 | 652 | cgt.flags = CHARGROUP_FLAGS_MULTICHAR; | 1177 | void CRegexXQuery_chargroup::addCharMatch(CRegexXQuery_charmatch *charmatch) |
3664 | 653 | cgt.c1 = c; | 1178 | { |
3665 | 654 | cgt.c2 = 0; | 1179 | chargroup_list.push_back(charmatch); |
3666 | 655 | chargroup_list.push_back(cgt); | 1180 | } |
3667 | 656 | } | 1181 | void CRegexXQuery_chargroup::addClassSub(CRegexXQuery_chargroup* classsub) |
3638 | 657 | |||
3639 | 658 | void CRegexAscii_chargroup::addEndLine() | ||
3640 | 659 | { | ||
3641 | 660 | chargroup_t cgt; | ||
3642 | 661 | cgt.flags = CHARGROUP_FLAGS_ENDLINE; | ||
3643 | 662 | cgt.c1 = '$'; | ||
3644 | 663 | cgt.c2 = 0; | ||
3645 | 664 | chargroup_list.push_back(cgt); | ||
3646 | 665 | } | ||
3647 | 666 | |||
3648 | 667 | void CRegexAscii_chargroup::addCharRange(char c1, char c2) | ||
3649 | 668 | { | ||
3650 | 669 | chargroup_t cgt; | ||
3651 | 670 | cgt.flags = 0; | ||
3652 | 671 | cgt.c1 = c1; | ||
3653 | 672 | cgt.c2 = c2; | ||
3654 | 673 | chargroup_list.push_back(cgt); | ||
3655 | 674 | } | ||
3656 | 675 | |||
3657 | 676 | void CRegexAscii_chargroup::addClassSub(CRegexAscii_chargroup* classsub) | ||
3668 | 677 | { | 1182 | { |
3669 | 678 | this->classsub = classsub; | 1183 | this->classsub = classsub; |
3670 | 679 | } | 1184 | } |
3671 | 680 | 1185 | ||
3682 | 681 | CRegexAscii_negchargroup::CRegexAscii_negchargroup(CRegexAscii_regex* regex) : | 1186 | CRegexXQuery_negchargroup::CRegexXQuery_negchargroup(CRegexXQuery_regex* regex) : |
3683 | 682 | CRegexAscii_chargroup(regex) | 1187 | CRegexXQuery_chargroup(regex) |
3684 | 683 | { | 1188 | { |
3685 | 684 | } | 1189 | } |
3686 | 685 | 1190 | ||
3687 | 686 | CRegexAscii_negchargroup::~CRegexAscii_negchargroup() | 1191 | CRegexXQuery_negchargroup::~CRegexXQuery_negchargroup() |
3688 | 687 | { | 1192 | { |
3689 | 688 | } | 1193 | } |
3690 | 689 | 1194 | ||
3691 | 690 | CRegexAscii_wildchar::CRegexAscii_wildchar(CRegexAscii_regex* regex) : | 1195 | CRegexXQuery_wildchar::CRegexXQuery_wildchar(CRegexXQuery_regex* regex) : |
3692 | 691 | IRegexAtom(regex) | 1196 | IRegexAtom(regex) |
3693 | 692 | { | 1197 | { |
3694 | 693 | } | 1198 | } |
3695 | 694 | 1199 | ||
3697 | 695 | CRegexAscii_wildchar::~CRegexAscii_wildchar() | 1200 | CRegexXQuery_wildchar::~CRegexXQuery_wildchar() |
3698 | 696 | { | 1201 | { |
3699 | 697 | } | 1202 | } |
3700 | 698 | 1203 | ||
3702 | 699 | CRegexAscii_backref::CRegexAscii_backref(CRegexAscii_regex* regex, unsigned int backref_) : | 1204 | CRegexXQuery_backref::CRegexXQuery_backref(CRegexXQuery_regex* regex, unsigned int backref_) : |
3703 | 700 | IRegexAtom(regex), | 1205 | IRegexAtom(regex), |
3704 | 701 | backref(backref_) | 1206 | backref(backref_) |
3705 | 702 | { | 1207 | { |
3706 | 703 | } | 1208 | } |
3707 | 704 | 1209 | ||
3713 | 705 | CRegexAscii_backref::~CRegexAscii_backref() | 1210 | CRegexXQuery_backref::~CRegexXQuery_backref() |
3714 | 706 | { | 1211 | { |
3715 | 707 | } | 1212 | } |
3716 | 708 | 1213 | ||
3717 | 709 | CRegexAscii_parser::CRegexAscii_parser() | 1214 | CRegexXQuery_pinstart::CRegexXQuery_pinstart(CRegexXQuery_regex* regex): |
3718 | 1215 | IRegexAtom(regex) | ||
3719 | 1216 | { | ||
3720 | 1217 | } | ||
3721 | 1218 | |||
3722 | 1219 | CRegexXQuery_parser::CRegexXQuery_parser() | ||
3723 | 710 | { | 1220 | { |
3724 | 711 | current_regex = NULL; | 1221 | current_regex = NULL; |
3725 | 712 | regex_depth = 0; | 1222 | regex_depth = 0; |
3726 | 713 | } | 1223 | } |
3727 | 714 | 1224 | ||
3729 | 715 | CRegexAscii_parser::~CRegexAscii_parser() | 1225 | CRegexXQuery_parser::~CRegexXQuery_parser() |
3730 | 716 | { | 1226 | { |
3731 | 717 | } | 1227 | } |
3732 | 718 | 1228 | ||
3733 | @@ -720,9 +1230,68 @@ | |||
3734 | 720 | ////////////////////////////////////////// | 1230 | ////////////////////////////////////////// |
3735 | 721 | ////Matching the pattern on a string | 1231 | ////Matching the pattern on a string |
3736 | 722 | ///////////////////////////////////////// | 1232 | ///////////////////////////////////////// |
3737 | 1233 | static std::list<RegexAscii_pieceinfo> empty_pieces;//empty list of pieces | ||
3738 | 1234 | /* | ||
3739 | 1235 | std::list<RegexAscii_pieceinfo>::iterator | ||
3740 | 1236 | IRegexAtom::choose_next_piece(const char *source, int *matched_len, | ||
3741 | 1237 | std::list<RegexAscii_pieceinfo>::iterator this_piece, | ||
3742 | 1238 | std::list<RegexAscii_pieceinfo>::iterator end_piece) | ||
3743 | 1239 | { | ||
3744 | 1240 | //if this_piece is repetition, repeat until max, then go to next piece | ||
3745 | 1241 | int min, max; | ||
3746 | 1242 | bool strict_max; | ||
3747 | 1243 | while(this_piece != end_piece) | ||
3748 | 1244 | { | ||
3749 | 1245 | (*this_piece).piece->get_quantifier(&min, &max, &strict_max); | ||
3750 | 1246 | if(max <= ((*this_piece).nr_matches))//finished this piece | ||
3751 | 1247 | { | ||
3752 | 1248 | this_piece++; | ||
3753 | 1249 | } | ||
3754 | 1250 | else | ||
3755 | 1251 | break; | ||
3756 | 1252 | } | ||
3757 | 1253 | return this_piece; | ||
3758 | 1254 | } | ||
3759 | 1255 | */ | ||
3760 | 1256 | |||
3761 | 1257 | bool IRegexAtom::match(const char *source, int *start_from_branch, int *matched_len, | ||
3762 | 1258 | std::list<RegexAscii_pieceinfo>::iterator this_piece, | ||
3763 | 1259 | std::list<RegexAscii_pieceinfo>::iterator end_piece) | ||
3764 | 1260 | { | ||
3765 | 1261 | *start_from_branch = 0; | ||
3766 | 1262 | bool retmatch; | ||
3767 | 1263 | retmatch = match_internal(source, start_from_branch, matched_len); | ||
3768 | 1264 | if(!retmatch) | ||
3769 | 1265 | return false; | ||
3770 | 1266 | |||
3771 | 1267 | if(this_piece == end_piece) | ||
3772 | 1268 | return true; | ||
3773 | 1269 | |||
3774 | 1270 | (*this_piece).nr_matches++; | ||
3775 | 1271 | int min,max; | ||
3776 | 1272 | bool strict_max; | ||
3777 | 1273 | (*this_piece).piece->get_quantifier(&min, &max, &strict_max); | ||
3778 | 1274 | std::list<RegexAscii_pieceinfo>::iterator init_piece = this_piece; | ||
3779 | 1275 | if(((min == 1) && (max == 1)) || //the simple common case | ||
3780 | 1276 | ((*matched_len == 0) && ((*this_piece).nr_matches>=min)))//to avoid infinite loop | ||
3781 | 1277 | { | ||
3782 | 1278 | this_piece++; | ||
3783 | 1279 | if(this_piece == end_piece) | ||
3784 | 1280 | return true; | ||
3785 | 1281 | } | ||
3786 | 1282 | int matched_len2; | ||
3787 | 1283 | retmatch = (*this_piece).piece->match_piece(this_piece, end_piece, source + *matched_len, &matched_len2); | ||
3788 | 1284 | if(!retmatch) | ||
3789 | 1285 | { | ||
3790 | 1286 | (*init_piece).nr_matches--; | ||
3791 | 1287 | return false; | ||
3792 | 1288 | } | ||
3793 | 1289 | *matched_len += matched_len2; | ||
3794 | 1290 | return true; | ||
3795 | 1291 | } | ||
3796 | 723 | 1292 | ||
3797 | 724 | //try every position in source to match the pattern | 1293 | //try every position in source to match the pattern |
3799 | 725 | bool CRegexAscii_regex::match_anywhere(const char *source, unsigned int flags, | 1294 | bool CRegexXQuery_regex::match_anywhere(const char *source, unsigned int flags, |
3800 | 726 | int *match_pos, int *matched_len) | 1295 | int *match_pos, int *matched_len) |
3801 | 727 | { | 1296 | { |
3802 | 728 | *match_pos = 0; | 1297 | *match_pos = 0; |
3803 | @@ -730,43 +1299,66 @@ | |||
3804 | 730 | return match_from(source, flags, match_pos, matched_len); | 1299 | return match_from(source, flags, match_pos, matched_len); |
3805 | 731 | } | 1300 | } |
3806 | 732 | 1301 | ||
3808 | 733 | bool CRegexAscii_regex::match_from(const char *source, unsigned int flags, | 1302 | bool CRegexXQuery_regex::match_from(const char *source, unsigned int flags, |
3809 | 734 | int *match_pos, int *matched_len) | 1303 | int *match_pos, int *matched_len) |
3810 | 735 | { | 1304 | { |
3811 | 736 | this->flags = flags; | 1305 | this->flags = flags; |
3812 | 1306 | this->source_start = source; | ||
3813 | 737 | reachedEnd = false; | 1307 | reachedEnd = false; |
3814 | 738 | 1308 | ||
3816 | 739 | std::vector<CRegexAscii_regex*>::iterator regex_it; | 1309 | std::vector<CRegexXQuery_regex*>::iterator regex_it; |
3817 | 740 | for(regex_it = subregex.begin(); regex_it != subregex.end(); regex_it++) | 1310 | for(regex_it = subregex.begin(); regex_it != subregex.end(); regex_it++) |
3818 | 741 | { | 1311 | { |
3819 | 742 | (*regex_it)->matched_source = NULL; | 1312 | (*regex_it)->matched_source = NULL; |
3820 | 743 | } | 1313 | } |
3832 | 744 | // if(!source[0]) | 1314 | |
3833 | 745 | // { | 1315 | std::vector<std::pair<const char*, int> > saved_subregex; |
3834 | 746 | // if(branch_list.empty()) | 1316 | |
3835 | 747 | // return true; | 1317 | if(*match_pos && (flags & REGEX_ASCII_WHOLE_MATCH)) |
3836 | 748 | // else | 1318 | return false; |
3837 | 749 | // return false; | 1319 | |
3827 | 750 | // } | ||
3828 | 751 | |||
3829 | 752 | bool skip_first_match = false; | ||
3830 | 753 | if(*match_pos && align_begin) | ||
3831 | 754 | skip_first_match = true; | ||
3838 | 755 | do | 1320 | do |
3839 | 756 | { | 1321 | { |
3847 | 757 | if(!skip_first_match) | 1322 | int start_from_branch = 0; |
3848 | 758 | { | 1323 | int longest_match = -1; |
3849 | 759 | if(match(source + *match_pos, matched_len)) | 1324 | while(1) |
3850 | 760 | return true; | 1325 | { |
3851 | 761 | } | 1326 | if(!match(source + *match_pos, &start_from_branch, matched_len, empty_pieces.begin(), empty_pieces.end())) |
3852 | 762 | skip_first_match = false; | 1327 | break; |
3853 | 763 | if(align_begin) | 1328 | if(longest_match < *matched_len) |
3854 | 1329 | { | ||
3855 | 1330 | longest_match = *matched_len; | ||
3856 | 1331 | if(start_from_branch && (flags & REGEX_ASCII_GET_LONGEST_BRANCH)) | ||
3857 | 1332 | save_subregex_list(saved_subregex); | ||
3858 | 1333 | } | ||
3859 | 1334 | if(!start_from_branch || !(flags & REGEX_ASCII_GET_LONGEST_BRANCH)) | ||
3860 | 1335 | break; | ||
3861 | 1336 | //else try the other branches to see which is longer | ||
3862 | 1337 | } | ||
3863 | 1338 | if(longest_match != -1) | ||
3864 | 1339 | { | ||
3865 | 1340 | *matched_len = longest_match; | ||
3866 | 1341 | if(saved_subregex.size()) | ||
3867 | 1342 | load_subregex_list(saved_subregex); | ||
3868 | 1343 | if(flags & REGEX_ASCII_WHOLE_MATCH) | ||
3869 | 1344 | { | ||
3870 | 1345 | if(!source[*match_pos+*matched_len]) | ||
3871 | 1346 | return true; | ||
3872 | 1347 | if((flags & REGEX_ASCII_MULTILINE) && | ||
3873 | 1348 | ((source[*match_pos+*matched_len] == '\n') || (source[*match_pos+*matched_len] == '\r'))) | ||
3874 | 1349 | return true; | ||
3875 | 1350 | return false; | ||
3876 | 1351 | } | ||
3877 | 1352 | return true; | ||
3878 | 1353 | } | ||
3879 | 1354 | |||
3880 | 1355 | if(flags & REGEX_ASCII_WHOLE_MATCH) | ||
3881 | 764 | { | 1356 | { |
3882 | 765 | if(flags & REGEX_ASCII_MULTILINE) | 1357 | if(flags & REGEX_ASCII_MULTILINE) |
3883 | 766 | { | 1358 | { |
3885 | 767 | //goto the next line | 1359 | //go to next line |
3886 | 768 | while(source[*match_pos] && (source[*match_pos] != '\n') && (source[*match_pos] != '\r')) | 1360 | while(source[*match_pos] && (source[*match_pos] != '\n') && (source[*match_pos] != '\r')) |
3888 | 769 | (*match_pos)++; | 1361 | (*match_pos) += myutf8len(source); |
3889 | 770 | if(source[*match_pos] == '\n') | 1362 | if(source[*match_pos] == '\n') |
3890 | 771 | { | 1363 | { |
3891 | 772 | (*match_pos)++; | 1364 | (*match_pos)++; |
3892 | @@ -780,190 +1372,1039 @@ | |||
3893 | 780 | (*match_pos)++; | 1372 | (*match_pos)++; |
3894 | 781 | } | 1373 | } |
3895 | 782 | if(!source[*match_pos]) | 1374 | if(!source[*match_pos]) |
3897 | 783 | return false; | 1375 | break; |
3898 | 784 | continue; | 1376 | continue; |
3899 | 785 | } | 1377 | } |
3901 | 786 | return false; | 1378 | break; |
3902 | 787 | } | 1379 | } |
3903 | 788 | if(!source[*match_pos]) | 1380 | if(!source[*match_pos]) |
3904 | 789 | break; | 1381 | break; |
3906 | 790 | (*match_pos)++; | 1382 | (*match_pos) += myutf8len(source); |
3907 | 791 | } | 1383 | } |
3908 | 792 | while(source[*match_pos]); | 1384 | while(source[*match_pos]); |
3909 | 1385 | // if(!source[*match_pos]) | ||
3910 | 1386 | // { | ||
3911 | 1387 | // reachedEnd = true; | ||
3912 | 1388 | // } | ||
3913 | 793 | return false; | 1389 | return false; |
3914 | 794 | } | 1390 | } |
3915 | 795 | 1391 | ||
3916 | 1392 | void CRegexXQuery_regex::reset_match() | ||
3917 | 1393 | { | ||
3918 | 1394 | // this->backup_matched_source = this->matched_source; | ||
3919 | 1395 | // this->backup_matched_len = this->matched_len; | ||
3920 | 1396 | this->matched_source = NULL; | ||
3921 | 1397 | this->matched_len = 0; | ||
3922 | 1398 | std::list<CRegexXQuery_branch*>::iterator branch_it; | ||
3923 | 1399 | for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++) | ||
3924 | 1400 | { | ||
3925 | 1401 | (*branch_it)->reset(); | ||
3926 | 1402 | } | ||
3927 | 1403 | } | ||
3928 | 1404 | /* | ||
3929 | 1405 | void CRegexXQuery_regex::restore_match() | ||
3930 | 1406 | { | ||
3931 | 1407 | this->matched_source = this->backup_matched_source; | ||
3932 | 1408 | this->matched_len = this->backup_matched_len; | ||
3933 | 1409 | std::list<CRegexXQuery_branch*>::iterator branch_it; | ||
3934 | 1410 | for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++) | ||
3935 | 1411 | { | ||
3936 | 1412 | (*branch_it)->restore(); | ||
3937 | 1413 | } | ||
3938 | 1414 | } | ||
3939 | 1415 | */ | ||
3940 | 796 | //match any of the branches | 1416 | //match any of the branches |
3942 | 797 | bool CRegexAscii_regex::match(const char *source, int *matched_len) | 1417 | bool CRegexXQuery_regex::match(const char *source, int *start_from_branch, int *matched_len, |
3943 | 1418 | std::list<RegexAscii_pieceinfo>::iterator next_piece, | ||
3944 | 1419 | std::list<RegexAscii_pieceinfo>::iterator end_piece) | ||
3945 | 798 | { | 1420 | { |
3946 | 799 | reachedEnd = false; | 1421 | reachedEnd = false; |
3955 | 800 | std::list<CRegexAscii_branch*>::iterator branch_it; | 1422 | if(!(flags & REGEX_ASCII_GROUPING_LEN_WHOLE_PIECE) || |
3956 | 801 | 1423 | (this->matched_source == NULL) || ((this->matched_source + this->matched_len) != source)) | |
3957 | 802 | for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++) | 1424 | this->matched_source = source; |
3958 | 803 | { | 1425 | *matched_len = 0; |
3959 | 804 | if((*branch_it)->match(source, matched_len)) | 1426 | std::list<CRegexXQuery_branch*>::iterator branch_it; |
3960 | 805 | { | 1427 | |
3961 | 806 | matched_source = source; | 1428 | if(*start_from_branch == 0) |
3962 | 807 | this->matched_len = *matched_len; | 1429 | { |
3963 | 1430 | for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++) | ||
3964 | 1431 | { | ||
3965 | 1432 | (*branch_it)->reset(); | ||
3966 | 1433 | } | ||
3967 | 1434 | } | ||
3968 | 1435 | |||
3969 | 1436 | branch_it = branch_list.begin(); | ||
3970 | 1437 | if(*start_from_branch) | ||
3971 | 1438 | { | ||
3972 | 1439 | for(int i=0;i<*start_from_branch;i++) | ||
3973 | 1440 | branch_it++; | ||
3974 | 1441 | } | ||
3975 | 1442 | (*start_from_branch)++; | ||
3976 | 1443 | for(; branch_it != branch_list.end(); branch_it++,(*start_from_branch)++) | ||
3977 | 1444 | { | ||
3978 | 1445 | if((*branch_it)->match(source, matched_len, this, next_piece, end_piece)) | ||
3979 | 1446 | { | ||
3980 | 1447 | //matched_source = source; | ||
3981 | 1448 | //this->matched_len = *matched_len; | ||
3982 | 808 | return true; | 1449 | return true; |
3983 | 809 | } | 1450 | } |
3984 | 810 | } | 1451 | } |
3987 | 811 | matched_source = NULL; | 1452 | *start_from_branch = 0; |
3988 | 812 | matched_len = 0; | 1453 | if(this->matched_source == source) |
3989 | 1454 | this->matched_source = NULL; | ||
3990 | 1455 | *matched_len = 0; | ||
3991 | 813 | return false; | 1456 | return false; |
3992 | 814 | } | 1457 | } |
3993 | 815 | 1458 | ||
3994 | 1459 | void CRegexXQuery_regex::save_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex) | ||
3995 | 1460 | { | ||
3996 | 1461 | saved_subregex.resize(0); | ||
3997 | 1462 | saved_subregex.reserve(subregex.size()); | ||
3998 | 1463 | std::vector<CRegexXQuery_regex*>::iterator it; | ||
3999 | 1464 | for(it=subregex.begin(); it != subregex.end(); it++) | ||
4000 | 1465 | { | ||
4001 | 1466 | saved_subregex.push_back(std::pair<const char*, int>((*it)->matched_source, (*it)->matched_len)); | ||
4002 | 1467 | } | ||
4003 | 1468 | } | ||
4004 | 1469 | |||
4005 | 1470 | void CRegexXQuery_regex::load_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex) | ||
4006 | 1471 | { | ||
4007 | 1472 | std::vector<std::pair<const char*, int> >::iterator it; | ||
4008 | 1473 | std::vector<CRegexXQuery_regex*>::iterator subit; | ||
4009 | 1474 | for(it=saved_subregex.begin(), subit = subregex.begin(); it != saved_subregex.end(); it++, subit++) | ||
4010 | 1475 | { | ||
4011 | 1476 | (*subit)->matched_source = (*it).first; | ||
4012 | 1477 | (*subit)->matched_len = (*it).second; | ||
4013 | 1478 | } | ||
4014 | 1479 | } | ||
4015 | 1480 | |||
4016 | 1481 | void CRegexXQuery_branch::reset() | ||
4017 | 1482 | { | ||
4018 | 1483 | std::list<RegexAscii_pieceinfo>::iterator piece_it; | ||
4019 | 1484 | for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++) | ||
4020 | 1485 | { | ||
4021 | 1486 | (*piece_it).piece->atom->reset_match(); | ||
4022 | 1487 | } | ||
4023 | 1488 | } | ||
4024 | 1489 | /* | ||
4025 | 1490 | void CRegexXQuery_branch::restore() | ||
4026 | 1491 | { | ||
4027 | 1492 | std::list<RegexAscii_pieceinfo>::iterator piece_it; | ||
4028 | 1493 | for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++) | ||
4029 | 1494 | { | ||
4030 | 1495 | (*piece_it).piece->atom->restore_match(); | ||
4031 | 1496 | } | ||
4032 | 1497 | } | ||
4033 | 1498 | */ | ||
4034 | 816 | //match all the pieces | 1499 | //match all the pieces |
4036 | 817 | bool CRegexAscii_branch::match(const char *source, int *matched_len) | 1500 | bool CRegexXQuery_branch::match(const char *source, int *matched_len, |
4037 | 1501 | CRegexXQuery_regex* group_regex, | ||
4038 | 1502 | std::list<RegexAscii_pieceinfo>::iterator next_piece, | ||
4039 | 1503 | std::list<RegexAscii_pieceinfo>::iterator end_piece) | ||
4040 | 818 | { | 1504 | { |
4042 | 819 | std::list<CRegexAscii_piece*>::iterator piece_it; | 1505 | std::list<RegexAscii_pieceinfo>::iterator piece_it; |
4043 | 820 | 1506 | ||
4044 | 821 | piece_it = piece_list.begin(); | 1507 | piece_it = piece_list.begin(); |
4045 | 1508 | //if(piece_it == piece_list.end()) | ||
4046 | 1509 | //if(!source[0]) | ||
4047 | 1510 | // return true; | ||
4048 | 1511 | //else | ||
4049 | 1512 | // return false; | ||
4050 | 822 | if(piece_it == piece_list.end()) | 1513 | if(piece_it == piece_list.end()) |
4053 | 823 | if(source[0]) | 1514 | { |
4054 | 824 | return false; | 1515 | piece_it = next_piece; |
4055 | 1516 | if(next_piece == end_piece) | ||
4056 | 1517 | { | ||
4057 | 1518 | group_regex->matched_len = 0; | ||
4058 | 1519 | return true; | ||
4059 | 1520 | } | ||
4060 | 1521 | } | ||
4061 | 1522 | |||
4062 | 1523 | std::list<RegexAscii_pieceinfo> temp_pieces(piece_list); | ||
4063 | 1524 | temp_pieces.push_back(group_regex);//this will be used to store the group match | ||
4064 | 1525 | temp_pieces.insert(temp_pieces.end(), next_piece, end_piece); | ||
4065 | 1526 | |||
4066 | 1527 | return (*piece_it).piece->match_piece(temp_pieces.begin(), temp_pieces.end(), source, matched_len); | ||
4067 | 1528 | } | ||
4068 | 1529 | |||
4069 | 1530 | bool CRegexXQuery_piece::match_piece(std::list<RegexAscii_pieceinfo>::iterator piece_it, | ||
4070 | 1531 | std::list<RegexAscii_pieceinfo>::iterator end_it, | ||
4071 | 1532 | const char *source, int *matched_len) | ||
4072 | 1533 | { | ||
4073 | 1534 | if((*piece_it).nr_matches < 0) | ||
4074 | 1535 | { | ||
4075 | 1536 | //special case, store the group match | ||
4076 | 1537 | (*piece_it).group_regex->matched_len = source - (*piece_it).group_regex->matched_source; | ||
4077 | 1538 | piece_it++; | ||
4078 | 1539 | if(piece_it == end_it) | ||
4079 | 1540 | return true; | ||
4080 | 825 | else | 1541 | else |
4084 | 826 | return true; | 1542 | return (*piece_it).piece->match_piece(piece_it, end_it, source, matched_len); |
4085 | 827 | if(!(*piece_it)->get_is_reluctant()) | 1543 | } |
4086 | 828 | return match_piece_iter_normal(piece_it, source, matched_len); | 1544 | |
4087 | 1545 | if(!get_is_reluctant()) | ||
4088 | 1546 | return match_piece_iter_normal(piece_it, end_it, source, matched_len); | ||
4089 | 829 | else | 1547 | else |
4096 | 830 | return match_piece_iter_reluctant(piece_it, source, matched_len); | 1548 | return match_piece_iter_reluctant(piece_it, end_it, source, matched_len); |
4097 | 831 | } | 1549 | } |
4098 | 832 | 1550 | ||
4099 | 833 | //match as less as possible | 1551 | int CRegexXQuery_piece::choose_another_branch(std::vector<std::pair<int,int> > &match_lens) |
4100 | 834 | bool CRegexAscii_branch::match_piece_iter_reluctant( | 1552 | { |
4101 | 835 | std::list<CRegexAscii_piece*>::iterator piece_it, | 1553 | int i = match_lens.size()-1; |
4102 | 1554 | i--; | ||
4103 | 1555 | while((i >= 0) && (match_lens.at(i).second == 0)) | ||
4104 | 1556 | i--; | ||
4105 | 1557 | if(i < 0) | ||
4106 | 1558 | return -1;//no more branches | ||
4107 | 1559 | match_lens.resize(i+1); | ||
4108 | 1560 | i++; | ||
4109 | 1561 | return i; | ||
4110 | 1562 | } | ||
4111 | 1563 | |||
4112 | 1564 | bool CRegexXQuery_piece::is_regex_atom() | ||
4113 | 1565 | { | ||
4114 | 1566 | return regex_atom != NULL; | ||
4115 | 1567 | } | ||
4116 | 1568 | |||
4117 | 1569 | //match as less as possible (shortest string) | ||
4118 | 1570 | bool CRegexXQuery_piece::match_piece_iter_reluctant( | ||
4119 | 1571 | std::list<RegexAscii_pieceinfo>::iterator piece_it, | ||
4120 | 1572 | std::list<RegexAscii_pieceinfo>::iterator end_it, | ||
4121 | 836 | const char *source, int *matched_len) | 1573 | const char *source, int *matched_len) |
4122 | 837 | { | 1574 | { |
4123 | 838 | *matched_len = 0; | 1575 | *matched_len = 0; |
4125 | 839 | if(piece_it == piece_list.end()) | 1576 | if(piece_it == end_it) |
4126 | 840 | return true; | 1577 | return true; |
4127 | 841 | 1578 | ||
4128 | 842 | int min, max; | 1579 | int min, max; |
4129 | 843 | bool strict_max; | 1580 | bool strict_max; |
4130 | 844 | //std::vector<int> match_lens; | 1581 | //std::vector<int> match_lens; |
4133 | 845 | (*piece_it)->get_quantifier(&min, &max, &strict_max); | 1582 | (*piece_it).piece->get_quantifier(&min, &max, &strict_max); |
4134 | 846 | if(strict_max && (max >= 0)) | 1583 | |
4135 | 1584 | std::vector<std::pair<const char*, int> > saved_subregex; | ||
4136 | 1585 | |||
4137 | 1586 | if(is_regex_atom()) | ||
4138 | 847 | { | 1587 | { |
4143 | 848 | int timeslen; | 1588 | //recursive |
4144 | 849 | //check if the piece doesn't exceed the max match | 1589 | bool retmatch; |
4145 | 850 | if((*piece_it)->match_piece_times(source, ×len, max+1, NULL)) | 1590 | atom->regex_intern->save_subregex_list(saved_subregex); |
4146 | 851 | return false;///too many matches | 1591 | if((*piece_it).nr_matches >= min) |
4147 | 1592 | { | ||
4148 | 1593 | //go to next piece | ||
4149 | 1594 | std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it; | ||
4150 | 1595 | next_it++; | ||
4151 | 1596 | if(next_it == end_it) | ||
4152 | 1597 | return true; | ||
4153 | 1598 | retmatch = (*next_it).piece->match_piece(next_it, end_it, source, matched_len); | ||
4154 | 1599 | if(retmatch) | ||
4155 | 1600 | return true; | ||
4156 | 1601 | } | ||
4157 | 1602 | if(((max == -1) || ((*piece_it).nr_matches < max)) &&//try further with this piece | ||
4158 | 1603 | (((*piece_it).nr_matches < min) || ((*piece_it).nr_matches == 0) || ((*piece_it).piece->regex_atom->matched_len)))//if matched_len is zero, avoid infinite loop | ||
4159 | 1604 | { | ||
4160 | 1605 | int start_from_branch = 0; | ||
4161 | 1606 | int shortest_len = -1; | ||
4162 | 1607 | bool branch_saved = false; | ||
4163 | 1608 | //try all branches to get the shortest len | ||
4164 | 1609 | (*piece_it).nr_matches++; | ||
4165 | 1610 | while(atom->match(source, &start_from_branch, matched_len, piece_it, end_it)) | ||
4166 | 1611 | { | ||
4167 | 1612 | if((shortest_len == -1) || (shortest_len > *matched_len)) | ||
4168 | 1613 | { | ||
4169 | 1614 | shortest_len = *matched_len; | ||
4170 | 1615 | if(start_from_branch && (atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH)) | ||
4171 | 1616 | { | ||
4172 | 1617 | atom->regex_intern->save_subregex_list(saved_subregex); | ||
4173 | 1618 | branch_saved = true; | ||
4174 | 1619 | } | ||
4175 | 1620 | } | ||
4176 | 1621 | if(!start_from_branch || !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH)) | ||
4177 | 1622 | break; | ||
4178 | 1623 | } | ||
4179 | 1624 | if(shortest_len != -1) | ||
4180 | 1625 | { | ||
4181 | 1626 | *matched_len = shortest_len; | ||
4182 | 1627 | if(branch_saved) | ||
4183 | 1628 | atom->regex_intern->load_subregex_list(saved_subregex); | ||
4184 | 1629 | return true; | ||
4185 | 1630 | } | ||
4186 | 1631 | else | ||
4187 | 1632 | { | ||
4188 | 1633 | (*piece_it).nr_matches--; | ||
4189 | 1634 | atom->regex_intern->load_subregex_list(saved_subregex); | ||
4190 | 1635 | return false; | ||
4191 | 1636 | } | ||
4192 | 1637 | } | ||
4193 | 1638 | else | ||
4194 | 1639 | { | ||
4195 | 1640 | atom->regex_intern->load_subregex_list(saved_subregex); | ||
4196 | 1641 | return false; | ||
4197 | 1642 | } | ||
4198 | 852 | } | 1643 | } |
4199 | 853 | 1644 | ||
4202 | 854 | int i=min; | 1645 | int i=0; |
4203 | 855 | std::list<CRegexAscii_piece*>::iterator next_it = piece_it; | 1646 | int shortest_len = -1; |
4204 | 1647 | int otherpieces_shortest = -1; | ||
4205 | 1648 | int i_shortest = -1; | ||
4206 | 1649 | std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it; | ||
4207 | 1650 | std::vector<std::pair<int,int> > match_lens; | ||
4208 | 856 | next_it++; | 1651 | next_it++; |
4209 | 857 | int pieceslen = 0; | 1652 | int pieceslen = 0; |
4210 | 858 | while(1) | 1653 | while(1) |
4211 | 859 | { | 1654 | { |
4218 | 860 | if((max > 0) && (i>max)) | 1655 | int piecelen = 0; |
4219 | 861 | break; | 1656 | bool retmatch; |
4220 | 862 | int piecelen = 0; | 1657 | retmatch = match_piece_times(source, &piecelen, i < min ? min : i, &match_lens); |
4221 | 863 | if((*piece_it)->match_piece_times(source+pieceslen, &piecelen, !pieceslen ? i : 1, NULL)) | 1658 | i = match_lens.size()-1;//number of matches |
4222 | 864 | { | 1659 | if(i<0) |
4223 | 865 | pieceslen += piecelen; | 1660 | i = 0; |
4224 | 1661 | if((i>=min)) | ||
4225 | 1662 | { | ||
4226 | 1663 | pieceslen = piecelen; | ||
4227 | 1664 | if((shortest_len >= 0) && (shortest_len <= pieceslen))//this branch is longer | ||
4228 | 1665 | {//try another branch | ||
4229 | 1666 | i = choose_another_branch(match_lens); | ||
4230 | 1667 | if(i >= 0) | ||
4231 | 1668 | continue;//try another branch | ||
4232 | 1669 | else | ||
4233 | 1670 | break; | ||
4234 | 1671 | } | ||
4235 | 866 | int otherpieces = 0; | 1672 | int otherpieces = 0; |
4243 | 867 | if((next_it == piece_list.end()) || | 1673 | if((next_it == end_it) || |
4244 | 868 | ((*next_it)->get_is_reluctant() && match_piece_iter_reluctant(next_it, source+pieceslen, &otherpieces)) || | 1674 | (*next_it).piece->match_piece(next_it, end_it, source+pieceslen, &otherpieces) |
4245 | 869 | (!(*next_it)->get_is_reluctant() && match_piece_iter_normal(next_it, source+pieceslen, &otherpieces))) | 1675 | ) |
4246 | 870 | { | 1676 | { |
4247 | 871 | *matched_len = pieceslen + otherpieces; | 1677 | if((i == pieceslen) || (match_lens.at(0).second == 0) ||//minimum achieved already, cannot go lower than that |
4248 | 872 | return true; | 1678 | !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH)) |
4249 | 873 | } | 1679 | { |
4250 | 1680 | *matched_len = pieceslen + otherpieces; | ||
4251 | 1681 | return true; | ||
4252 | 1682 | } | ||
4253 | 1683 | if((shortest_len < 0) || (shortest_len > pieceslen)) | ||
4254 | 1684 | { | ||
4255 | 1685 | shortest_len = pieceslen; | ||
4256 | 1686 | otherpieces_shortest = otherpieces; | ||
4257 | 1687 | i_shortest = i; | ||
4258 | 1688 | if(match_lens.at(0).second != 0) | ||
4259 | 1689 | atom->regex_intern->save_subregex_list(saved_subregex); | ||
4260 | 1690 | } | ||
4261 | 1691 | i = choose_another_branch(match_lens); | ||
4262 | 1692 | if(i >= 0) | ||
4263 | 1693 | continue;//try another branch | ||
4264 | 1694 | else | ||
4265 | 1695 | break; | ||
4266 | 1696 | } | ||
4267 | 1697 | else | ||
4268 | 1698 | { | ||
4269 | 1699 | //try further | ||
4270 | 1700 | if(retmatch) | ||
4271 | 1701 | { | ||
4272 | 1702 | i++; | ||
4273 | 1703 | if((max < 0) || (i<=max)) | ||
4274 | 1704 | continue; | ||
4275 | 1705 | i--; | ||
4276 | 1706 | } | ||
4277 | 1707 | } | ||
4278 | 1708 | } | ||
4279 | 1709 | |||
4280 | 1710 | if(i==0) | ||
4281 | 1711 | { | ||
4282 | 1712 | break; | ||
4283 | 874 | } | 1713 | } |
4284 | 875 | else | 1714 | else |
4287 | 876 | break; | 1715 | { |
4288 | 877 | i++; | 1716 | i = choose_another_branch(match_lens); |
4289 | 1717 | if(i >= 0) | ||
4290 | 1718 | continue;//try another branch | ||
4291 | 1719 | else | ||
4292 | 1720 | break; | ||
4293 | 1721 | } | ||
4294 | 878 | } | 1722 | } |
4295 | 879 | 1723 | ||
4296 | 1724 | if(shortest_len >= 0) | ||
4297 | 1725 | { | ||
4298 | 1726 | if(strict_max && (max>=0) && (i_shortest > max)) | ||
4299 | 1727 | return false; | ||
4300 | 1728 | *matched_len = shortest_len + otherpieces_shortest; | ||
4301 | 1729 | if(saved_subregex.size()) | ||
4302 | 1730 | atom->regex_intern->load_subregex_list(saved_subregex); | ||
4303 | 1731 | return true; | ||
4304 | 1732 | } | ||
4305 | 880 | return false; | 1733 | return false; |
4306 | 881 | } | 1734 | } |
4307 | 882 | 1735 | ||
4308 | 883 | //match as much as possible | 1736 | //match as much as possible |
4311 | 884 | bool CRegexAscii_branch::match_piece_iter_normal( | 1737 | bool CRegexXQuery_piece::match_piece_iter_normal( |
4312 | 885 | std::list<CRegexAscii_piece*>::iterator piece_it, | 1738 | std::list<RegexAscii_pieceinfo>::iterator piece_it, |
4313 | 1739 | std::list<RegexAscii_pieceinfo>::iterator end_it, | ||
4314 | 886 | const char *source, int *matched_len) | 1740 | const char *source, int *matched_len) |
4315 | 887 | { | 1741 | { |
4316 | 888 | *matched_len = 0; | 1742 | *matched_len = 0; |
4317 | 889 | 1743 | ||
4318 | 890 | int min, max; | 1744 | int min, max; |
4319 | 891 | bool strict_max; | 1745 | bool strict_max; |
4324 | 892 | std::vector<int> match_lens; | 1746 | std::vector<std::pair<int,int> > match_lens; |
4325 | 893 | (*piece_it)->get_quantifier(&min, &max, &strict_max); | 1747 | (*piece_it).piece->get_quantifier(&min, &max, &strict_max); |
4326 | 894 | int timeslen; | 1748 | int timeslen = 0; |
4327 | 895 | if(strict_max && (max >= 0)) | 1749 | std::vector<std::pair<const char*, int> > saved_subregex; |
4328 | 1750 | |||
4329 | 1751 | if(is_regex_atom()) | ||
4330 | 896 | { | 1752 | { |
4335 | 897 | //check if the piece doesn't exceed the max match | 1753 | //recursive |
4336 | 898 | //if((*piece_it)->match_piece_times(source, ×len, max+1, &match_lens)) | 1754 | bool retmatch; |
4337 | 899 | // return false;///too many matches | 1755 | atom->regex_intern->save_subregex_list(saved_subregex); |
4338 | 900 | (*piece_it)->match_piece_times(source, ×len, max, &match_lens); | 1756 | if(((max == -1) || ((*piece_it).nr_matches < max)) && //try further with this piece |
4339 | 1757 | (((*piece_it).nr_matches < min) || ((*piece_it).nr_matches == 0) || ((*piece_it).piece->regex_atom->matched_len)))//if matched_len is zero, avoid infinite loop | ||
4340 | 1758 | { | ||
4341 | 1759 | int start_from_branch = 0; | ||
4342 | 1760 | int longest_len = -1; | ||
4343 | 1761 | bool branch_saved = false; | ||
4344 | 1762 | //try all branches to get the longest len | ||
4345 | 1763 | (*piece_it).nr_matches++; | ||
4346 | 1764 | while(atom->match(source, &start_from_branch, matched_len, piece_it, end_it)) | ||
4347 | 1765 | { | ||
4348 | 1766 | if((longest_len < *matched_len)) | ||
4349 | 1767 | { | ||
4350 | 1768 | longest_len = *matched_len; | ||
4351 | 1769 | if(start_from_branch && (atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH)) | ||
4352 | 1770 | { | ||
4353 | 1771 | atom->regex_intern->save_subregex_list(saved_subregex); | ||
4354 | 1772 | branch_saved = true; | ||
4355 | 1773 | } | ||
4356 | 1774 | } | ||
4357 | 1775 | if(!start_from_branch || !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH)) | ||
4358 | 1776 | break; | ||
4359 | 1777 | } | ||
4360 | 1778 | if(longest_len != -1) | ||
4361 | 1779 | { | ||
4362 | 1780 | *matched_len = longest_len; | ||
4363 | 1781 | if(branch_saved) | ||
4364 | 1782 | atom->regex_intern->load_subregex_list(saved_subregex); | ||
4365 | 1783 | return true; | ||
4366 | 1784 | } | ||
4367 | 1785 | else | ||
4368 | 1786 | { | ||
4369 | 1787 | atom->regex_intern->load_subregex_list(saved_subregex); | ||
4370 | 1788 | (*piece_it).nr_matches--; | ||
4371 | 1789 | } | ||
4372 | 1790 | } | ||
4373 | 1791 | if((*piece_it).nr_matches >= min) | ||
4374 | 1792 | { | ||
4375 | 1793 | //go to next piece | ||
4376 | 1794 | std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it; | ||
4377 | 1795 | next_it++; | ||
4378 | 1796 | if(next_it == end_it) | ||
4379 | 1797 | return true; | ||
4380 | 1798 | retmatch = (*next_it).piece->match_piece(next_it, end_it, source, matched_len); | ||
4381 | 1799 | if(!retmatch) | ||
4382 | 1800 | atom->regex_intern->load_subregex_list(saved_subregex); | ||
4383 | 1801 | return retmatch; | ||
4384 | 1802 | } | ||
4385 | 1803 | else | ||
4386 | 1804 | { | ||
4387 | 1805 | // regex_atom->restore_match(); | ||
4388 | 1806 | atom->regex_intern->load_subregex_list(saved_subregex); | ||
4389 | 1807 | return false; | ||
4390 | 1808 | } | ||
4391 | 901 | } | 1809 | } |
4392 | 902 | else if(!strict_max && (max >= 0)) | ||
4393 | 903 | (*piece_it)->match_piece_times(source, ×len, max, &match_lens); | ||
4394 | 904 | else | ||
4395 | 905 | (*piece_it)->match_piece_times(source, ×len, -1, &match_lens); | ||
4396 | 906 | 1810 | ||
4399 | 907 | int i; | 1811 | int longest_len = -1; |
4400 | 908 | std::list<CRegexAscii_piece*>::iterator next_it = piece_it; | 1812 | int otherpieces_longest = -1; |
4401 | 1813 | int i_longest = -1; | ||
4402 | 1814 | int i = max; | ||
4403 | 1815 | std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it; | ||
4404 | 909 | next_it++; | 1816 | next_it++; |
4406 | 910 | if(next_it == piece_list.end()) | 1817 | |
4407 | 1818 | bool retmatch; | ||
4408 | 1819 | while(1) | ||
4409 | 911 | { | 1820 | { |
4414 | 912 | if((int)match_lens.size() > min) | 1821 | retmatch = match_piece_times(source, ×len, i, &match_lens); |
4415 | 913 | { | 1822 | i=match_lens.size()-1;//number of matches |
4416 | 914 | *matched_len = timeslen; | 1823 | if((i>=min)) |
4417 | 915 | return true; | 1824 | { |
4418 | 1825 | if(timeslen < longest_len) | ||
4419 | 1826 | {//this branch is no use | ||
4420 | 1827 | i = choose_another_branch(match_lens); | ||
4421 | 1828 | if(i >= 0) | ||
4422 | 1829 | { | ||
4423 | 1830 | i = max; | ||
4424 | 1831 | continue;//try another branch | ||
4425 | 1832 | } | ||
4426 | 1833 | else | ||
4427 | 1834 | break; | ||
4428 | 1835 | } | ||
4429 | 1836 | //int piecelen = 0; | ||
4430 | 1837 | int otherpieces = 0; | ||
4431 | 1838 | if((next_it == end_it) || | ||
4432 | 1839 | (*next_it).piece->match_piece(next_it, end_it, source+timeslen, &otherpieces) | ||
4433 | 1840 | ) | ||
4434 | 1841 | { | ||
4435 | 1842 | if(timeslen > longest_len) | ||
4436 | 1843 | { | ||
4437 | 1844 | longest_len = timeslen; | ||
4438 | 1845 | otherpieces_longest = otherpieces; | ||
4439 | 1846 | i_longest = i; | ||
4440 | 1847 | if(!(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH)) | ||
4441 | 1848 | { | ||
4442 | 1849 | *matched_len = longest_len + otherpieces_longest; | ||
4443 | 1850 | return true; | ||
4444 | 1851 | } | ||
4445 | 1852 | else | ||
4446 | 1853 | { | ||
4447 | 1854 | if(match_lens.at(0).second) | ||
4448 | 1855 | atom->regex_intern->save_subregex_list(saved_subregex); | ||
4449 | 1856 | } | ||
4450 | 1857 | } | ||
4451 | 1858 | } | ||
4452 | 1859 | else | ||
4453 | 1860 | { | ||
4454 | 1861 | if(!match_lens.at(0).second) | ||
4455 | 1862 | { | ||
4456 | 1863 | match_lens.resize(match_lens.size()-1); | ||
4457 | 1864 | i--; | ||
4458 | 1865 | if(i >= 0) | ||
4459 | 1866 | continue;//try smaller | ||
4460 | 1867 | else | ||
4461 | 1868 | break; | ||
4462 | 1869 | } | ||
4463 | 1870 | else | ||
4464 | 1871 | { | ||
4465 | 1872 | i = choose_another_branch(match_lens); | ||
4466 | 1873 | if(i >= 0) | ||
4467 | 1874 | continue;//try another branch | ||
4468 | 1875 | else | ||
4469 | 1876 | break; | ||
4470 | 1877 | } | ||
4471 | 1878 | } | ||
4472 | 1879 | } | ||
4473 | 1880 | //now try another branch | ||
4474 | 1881 | i = choose_another_branch(match_lens); | ||
4475 | 1882 | if(i >= 0) | ||
4476 | 1883 | { | ||
4477 | 1884 | i = max; | ||
4478 | 1885 | continue;//try another branch | ||
4479 | 916 | } | 1886 | } |
4480 | 917 | else | 1887 | else |
4484 | 918 | return false; | 1888 | break; |
4485 | 919 | } | 1889 | }//end while |
4486 | 920 | for(i=match_lens.size()-1; i>=min; i--) | 1890 | |
4487 | 1891 | if(longest_len >= 0) | ||
4488 | 921 | { | 1892 | { |
4497 | 922 | int piecelen = 0; | 1893 | *matched_len = longest_len + otherpieces_longest; |
4498 | 923 | int otherpieces = 0; | 1894 | if(saved_subregex.size()) |
4499 | 924 | if(((*next_it)->get_is_reluctant() && match_piece_iter_reluctant(next_it, source+match_lens[i]+piecelen, &otherpieces)) || | 1895 | atom->regex_intern->load_subregex_list(saved_subregex); |
4500 | 925 | (!(*next_it)->get_is_reluctant() && match_piece_iter_normal(next_it, source+match_lens[i]+piecelen, &otherpieces))) | 1896 | return true; |
4493 | 926 | { | ||
4494 | 927 | *matched_len = match_lens[i] + piecelen + otherpieces; | ||
4495 | 928 | return true; | ||
4496 | 929 | } | ||
4501 | 930 | } | 1897 | } |
4502 | 931 | 1898 | ||
4503 | 932 | return false; | 1899 | return false; |
4504 | 933 | } | 1900 | } |
4505 | 934 | 1901 | ||
4507 | 935 | bool CRegexAscii_piece::match_piece_times(const char *source, | 1902 | bool CRegexXQuery_piece::match_piece_times(const char *source, |
4508 | 936 | int *piecelen, | 1903 | int *piecelen, |
4509 | 937 | int times, | 1904 | int times, |
4511 | 938 | std::vector<int> *match_lens) | 1905 | std::vector<std::pair<int,int> > *match_lens) |
4512 | 939 | { | 1906 | { |
4516 | 940 | *piecelen = 0; | 1907 | int i=0; |
4517 | 941 | for(int i=0;(times < 0) || (i<times);i++) | 1908 | if(match_lens && match_lens->size()) |
4518 | 942 | { | 1909 | { |
4519 | 1910 | i = match_lens->size()-1; | ||
4520 | 1911 | } | ||
4521 | 1912 | if(match_lens && match_lens->size()) | ||
4522 | 1913 | *piecelen = match_lens->at(match_lens->size()-1).first; | ||
4523 | 1914 | else | ||
4524 | 1915 | *piecelen = 0; | ||
4525 | 1916 | if((times >= 0) && (i>=times)) | ||
4526 | 1917 | return true; | ||
4527 | 1918 | for(;(times < 0) || (i<times);i++) | ||
4528 | 1919 | { | ||
4529 | 1920 | int atomlen; | ||
4530 | 1921 | int start_from_branch = 0; | ||
4531 | 1922 | if(match_lens && (i<(int)match_lens->size())) | ||
4532 | 1923 | start_from_branch = match_lens->at(i).second; | ||
4533 | 1924 | bool first_branch = (start_from_branch == 0); | ||
4534 | 1925 | if(!atom->match(source+*piecelen, &start_from_branch, &atomlen, empty_pieces.begin(), empty_pieces.end())) | ||
4535 | 1926 | { | ||
4536 | 1927 | if(match_lens) | ||
4537 | 1928 | { | ||
4538 | 1929 | if(i >= (int)match_lens->size()) | ||
4539 | 1930 | match_lens->push_back(std::pair<int,int>(*piecelen, 0)); | ||
4540 | 1931 | else | ||
4541 | 1932 | (*match_lens)[i] = std::pair<int,int>(*piecelen, 0); | ||
4542 | 1933 | } | ||
4543 | 1934 | return false; | ||
4544 | 1935 | } | ||
4545 | 943 | if(match_lens) | 1936 | if(match_lens) |
4550 | 944 | match_lens->push_back(*piecelen); | 1937 | { |
4551 | 945 | int atomlen; | 1938 | if(i >= (int)match_lens->size()) |
4552 | 946 | if(!atom->match(source+*piecelen, &atomlen)) | 1939 | match_lens->push_back(std::pair<int,int>(*piecelen, start_from_branch)); |
4553 | 947 | return false; | 1940 | else |
4554 | 1941 | (*match_lens)[i] = std::pair<int,int>(*piecelen, start_from_branch); | ||
4555 | 1942 | } | ||
4556 | 948 | *piecelen += atomlen; | 1943 | *piecelen += atomlen; |
4557 | 949 | if(!atomlen && !source[*piecelen]) | 1944 | if(!atomlen && !source[*piecelen]) |
4558 | 950 | { | 1945 | { |
4560 | 951 | atom->regex_intern->reachedEnd = true; | 1946 | // atom->regex_intern->set_reachedEnd(source); |
4561 | 1947 | break; | ||
4562 | 1948 | } | ||
4563 | 1949 | if(first_branch && (atomlen == 0))//avoid infinite loop | ||
4564 | 1950 | { | ||
4565 | 952 | break; | 1951 | break; |
4566 | 953 | } | 1952 | } |
4567 | 954 | } | 1953 | } |
4568 | 955 | if(match_lens) | 1954 | if(match_lens) |
4570 | 956 | match_lens->push_back(*piecelen); | 1955 | { |
4571 | 1956 | // if(i >= match_lens->size()) | ||
4572 | 1957 | match_lens->push_back(std::pair<int,int>(*piecelen, 0)); | ||
4573 | 1958 | // else | ||
4574 | 1959 | // (*match_lens)[i] = std::pair<int,int>(*piecelen, 0); | ||
4575 | 1960 | } | ||
4576 | 957 | 1961 | ||
4577 | 958 | return true; | 1962 | return true; |
4578 | 959 | } | 1963 | } |
4579 | 960 | 1964 | ||
4580 | 1965 | bool CRegexXQuery_multicharP::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4581 | 1966 | { | ||
4582 | 1967 | if(!source[0]) | ||
4583 | 1968 | { | ||
4584 | 1969 | regex_intern->set_reachedEnd(source); | ||
4585 | 1970 | return false; | ||
4586 | 1971 | } | ||
4587 | 1972 | bool found = false; | ||
4588 | 1973 | const char *temp_source = source; | ||
4589 | 1974 | unicode::code_point utf8c = utf8::next_char(temp_source); | ||
4590 | 1975 | switch(multichar_type) | ||
4591 | 1976 | { | ||
4592 | 1977 | case unicode::UNICODE_Ll + 50: | ||
4593 | 1978 | if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Ll) || | ||
4594 | 1979 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lm) || | ||
4595 | 1980 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lo) || | ||
4596 | 1981 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lt) || | ||
4597 | 1982 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Lu)) | ||
4598 | 1983 | { | ||
4599 | 1984 | if(!is_reverse) | ||
4600 | 1985 | found = true; | ||
4601 | 1986 | } | ||
4602 | 1987 | else | ||
4603 | 1988 | { | ||
4604 | 1989 | if(is_reverse) | ||
4605 | 1990 | found = true; | ||
4606 | 1991 | } | ||
4607 | 1992 | break; | ||
4608 | 1993 | case unicode::UNICODE_Mc + 50: | ||
4609 | 1994 | if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Mn) || | ||
4610 | 1995 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Mc) || | ||
4611 | 1996 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Me)) | ||
4612 | 1997 | { | ||
4613 | 1998 | if(!is_reverse) | ||
4614 | 1999 | found = true; | ||
4615 | 2000 | } | ||
4616 | 2001 | else | ||
4617 | 2002 | { | ||
4618 | 2003 | if(is_reverse) | ||
4619 | 2004 | found = true; | ||
4620 | 2005 | } | ||
4621 | 2006 | break; | ||
4622 | 2007 | case unicode::UNICODE_Nd + 50: | ||
4623 | 2008 | if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Nd) || | ||
4624 | 2009 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Nl) || | ||
4625 | 2010 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_No)) | ||
4626 | 2011 | { | ||
4627 | 2012 | if(!is_reverse) | ||
4628 | 2013 | found = true; | ||
4629 | 2014 | } | ||
4630 | 2015 | else | ||
4631 | 2016 | { | ||
4632 | 2017 | if(is_reverse) | ||
4633 | 2018 | found = true; | ||
4634 | 2019 | } | ||
4635 | 2020 | break; | ||
4636 | 2021 | case unicode::UNICODE_Pc + 50: | ||
4637 | 2022 | if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pc) || | ||
4638 | 2023 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pd) || | ||
4639 | 2024 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Ps) || | ||
4640 | 2025 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pe) || | ||
4641 | 2026 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pi) || | ||
4642 | 2027 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pf) || | ||
4643 | 2028 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Po)) | ||
4644 | 2029 | { | ||
4645 | 2030 | if(!is_reverse) | ||
4646 | 2031 | found = true; | ||
4647 | 2032 | } | ||
4648 | 2033 | else | ||
4649 | 2034 | { | ||
4650 | 2035 | if(is_reverse) | ||
4651 | 2036 | found = true; | ||
4652 | 2037 | } | ||
4653 | 2038 | break; | ||
4654 | 2039 | case unicode::UNICODE_Zl + 50: | ||
4655 | 2040 | if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zs) || | ||
4656 | 2041 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zl) || | ||
4657 | 2042 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zp)) | ||
4658 | 2043 | { | ||
4659 | 2044 | if(!is_reverse) | ||
4660 | 2045 | found = true; | ||
4661 | 2046 | } | ||
4662 | 2047 | else | ||
4663 | 2048 | { | ||
4664 | 2049 | if(is_reverse) | ||
4665 | 2050 | found = true; | ||
4666 | 2051 | } | ||
4667 | 2052 | break; | ||
4668 | 2053 | case unicode::UNICODE_Sc + 50: | ||
4669 | 2054 | if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Sm) || | ||
4670 | 2055 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Sc) || | ||
4671 | 2056 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Sk) || | ||
4672 | 2057 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_So)) | ||
4673 | 2058 | { | ||
4674 | 2059 | if(!is_reverse) | ||
4675 | 2060 | found = true; | ||
4676 | 2061 | } | ||
4677 | 2062 | else | ||
4678 | 2063 | { | ||
4679 | 2064 | if(is_reverse) | ||
4680 | 2065 | found = true; | ||
4681 | 2066 | } | ||
4682 | 2067 | break; | ||
4683 | 2068 | case unicode::UNICODE_Cc + 50: | ||
4684 | 2069 | if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cc) || | ||
4685 | 2070 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cf) || | ||
4686 | 2071 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Co))//ignore unicode::UNICODE_Cn | ||
4687 | 2072 | { | ||
4688 | 2073 | if(!is_reverse) | ||
4689 | 2074 | found = true; | ||
4690 | 2075 | } | ||
4691 | 2076 | else | ||
4692 | 2077 | { | ||
4693 | 2078 | if(is_reverse) | ||
4694 | 2079 | found = true; | ||
4695 | 2080 | } | ||
4696 | 2081 | break; | ||
4697 | 2082 | default: | ||
4698 | 2083 | if(unicode::check_codepoint_category(utf8c, (unicode::category)multichar_type)) | ||
4699 | 2084 | { | ||
4700 | 2085 | if(!is_reverse) | ||
4701 | 2086 | found = true; | ||
4702 | 2087 | } | ||
4703 | 2088 | else | ||
4704 | 2089 | { | ||
4705 | 2090 | if(is_reverse) | ||
4706 | 2091 | found = true; | ||
4707 | 2092 | } | ||
4708 | 2093 | break; | ||
4709 | 2094 | } | ||
4710 | 2095 | |||
4711 | 2096 | if(found) | ||
4712 | 2097 | { | ||
4713 | 2098 | *matched_len = temp_source - source; | ||
4714 | 2099 | } | ||
4715 | 2100 | return found; | ||
4716 | 2101 | } | ||
4717 | 2102 | |||
4718 | 2103 | bool CRegexXQuery_multicharIs::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4719 | 2104 | { | ||
4720 | 2105 | if(!source[0]) | ||
4721 | 2106 | { | ||
4722 | 2107 | regex_intern->set_reachedEnd(source); | ||
4723 | 2108 | return false; | ||
4724 | 2109 | } | ||
4725 | 2110 | bool found = false; | ||
4726 | 2111 | const char *temp_source = source; | ||
4727 | 2112 | unicode::code_point utf8c = utf8::next_char(temp_source); | ||
4728 | 2113 | const unicode::code_point *cp = block_escape[block_index].cp; | ||
4729 | 2114 | if((utf8c >= cp[0]) && (utf8c <= cp[1])) | ||
4730 | 2115 | { | ||
4731 | 2116 | if(!is_reverse) | ||
4732 | 2117 | found = true; | ||
4733 | 2118 | } | ||
4734 | 2119 | else if(block_escape[block_index].ext_cp) | ||
4735 | 2120 | { | ||
4736 | 2121 | cp = block_escape[block_index].ext_cp; | ||
4737 | 2122 | while(*cp) | ||
4738 | 2123 | { | ||
4739 | 2124 | if((utf8c >= cp[0]) && (utf8c <= cp[1])) | ||
4740 | 2125 | break; | ||
4741 | 2126 | cp += 2; | ||
4742 | 2127 | } | ||
4743 | 2128 | if(*cp) | ||
4744 | 2129 | { | ||
4745 | 2130 | if(!is_reverse) | ||
4746 | 2131 | found = true; | ||
4747 | 2132 | } | ||
4748 | 2133 | else | ||
4749 | 2134 | { | ||
4750 | 2135 | if(is_reverse) | ||
4751 | 2136 | found = true; | ||
4752 | 2137 | } | ||
4753 | 2138 | } | ||
4754 | 2139 | else | ||
4755 | 2140 | { | ||
4756 | 2141 | if(is_reverse) | ||
4757 | 2142 | found = true; | ||
4758 | 2143 | } | ||
4759 | 2144 | if(found) | ||
4760 | 2145 | { | ||
4761 | 2146 | *matched_len = temp_source - source; | ||
4762 | 2147 | } | ||
4763 | 2148 | return found; | ||
4764 | 2149 | } | ||
4765 | 2150 | |||
4766 | 2151 | bool CRegexXQuery_multicharOther::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4767 | 2152 | { | ||
4768 | 2153 | if(!source[0]) | ||
4769 | 2154 | { | ||
4770 | 2155 | regex_intern->set_reachedEnd(source); | ||
4771 | 2156 | return false; | ||
4772 | 2157 | } | ||
4773 | 2158 | bool found = false; | ||
4774 | 2159 | bool value_true = true; | ||
4775 | 2160 | const char *temp_source = source; | ||
4776 | 2161 | unicode::code_point utf8c = utf8::next_char(temp_source); | ||
4777 | 2162 | switch(multichar_type) | ||
4778 | 2163 | { | ||
4779 | 2164 | case 'S':value_true = false;//[^\s] | ||
4780 | 2165 | case 's'://[#x20\t\n\r] | ||
4781 | 2166 | switch(utf8c) | ||
4782 | 2167 | { | ||
4783 | 2168 | case '\t': | ||
4784 | 2169 | case '\r': | ||
4785 | 2170 | case '\n': | ||
4786 | 2171 | case ' ': | ||
4787 | 2172 | found = true; | ||
4788 | 2173 | default: | ||
4789 | 2174 | break; | ||
4790 | 2175 | } | ||
4791 | 2176 | break; | ||
4792 | 2177 | case 'I':value_true = false;//[^\i] | ||
4793 | 2178 | case 'i'://the set of initial name characters, those matched by Letter | '_' | ':' | ||
4794 | 2179 | if((utf8c == '_') || | ||
4795 | 2180 | (utf8c == ':') || | ||
4796 | 2181 | XQCharType::isLetter(utf8c)) | ||
4797 | 2182 | { | ||
4798 | 2183 | found = true; | ||
4799 | 2184 | } | ||
4800 | 2185 | break; | ||
4801 | 2186 | case 'C':value_true = false;//[^\c] | ||
4802 | 2187 | case 'c'://the set of name characters, those matched by NameChar | ||
4803 | 2188 | if(XQCharType::isNameChar(utf8c)) | ||
4804 | 2189 | { | ||
4805 | 2190 | found = true; | ||
4806 | 2191 | } | ||
4807 | 2192 | break; | ||
4808 | 2193 | case 'D':value_true = false;//[^\d] | ||
4809 | 2194 | case 'd': | ||
4810 | 2195 | if(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Nd)) | ||
4811 | 2196 | found = true; | ||
4812 | 2197 | break; | ||
4813 | 2198 | case 'W':value_true = false;//[^\w] | ||
4814 | 2199 | case 'w': | ||
4815 | 2200 | found = !(unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pc) || | ||
4816 | 2201 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pd) || | ||
4817 | 2202 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Ps) || | ||
4818 | 2203 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pe) || | ||
4819 | 2204 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pi) || | ||
4820 | 2205 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Pf) || | ||
4821 | 2206 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Po) || | ||
4822 | 2207 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zs) || | ||
4823 | 2208 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zl) || | ||
4824 | 2209 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Zp) || | ||
4825 | 2210 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cc) || | ||
4826 | 2211 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Cf) || | ||
4827 | 2212 | unicode::check_codepoint_category(utf8c, unicode::UNICODE_Co));//ignore unicode::UNICODE_Cn | ||
4828 | 2213 | break; | ||
4829 | 2214 | default: | ||
4830 | 2215 | throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(source, ZED(REGEX_UNIMPLEMENTED)) ); | ||
4831 | 2216 | } | ||
4832 | 2217 | if((found && value_true) || (!found && !value_true)) | ||
4833 | 2218 | { | ||
4834 | 2219 | *matched_len = temp_source - source; | ||
4835 | 2220 | return true; | ||
4836 | 2221 | } | ||
4837 | 2222 | else | ||
4838 | 2223 | { | ||
4839 | 2224 | return false; | ||
4840 | 2225 | } | ||
4841 | 2226 | } | ||
4842 | 2227 | |||
4843 | 2228 | bool CRegexXQuery_char_ascii::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4844 | 2229 | { | ||
4845 | 2230 | if(!source[0]) | ||
4846 | 2231 | { | ||
4847 | 2232 | regex_intern->set_reachedEnd(source); | ||
4848 | 2233 | return false; | ||
4849 | 2234 | } | ||
4850 | 2235 | if(source[0] == c) | ||
4851 | 2236 | { | ||
4852 | 2237 | *matched_len = 1; | ||
4853 | 2238 | return true; | ||
4854 | 2239 | } | ||
4855 | 2240 | else | ||
4856 | 2241 | return false; | ||
4857 | 2242 | } | ||
4858 | 2243 | |||
4859 | 2244 | bool CRegexXQuery_char_ascii_i::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4860 | 2245 | { | ||
4861 | 2246 | if(!source[0]) | ||
4862 | 2247 | { | ||
4863 | 2248 | regex_intern->set_reachedEnd(source); | ||
4864 | 2249 | return false; | ||
4865 | 2250 | } | ||
4866 | 2251 | char sup = toupper(source[0]); | ||
4867 | 2252 | if(sup == c) | ||
4868 | 2253 | { | ||
4869 | 2254 | *matched_len = 1; | ||
4870 | 2255 | return true; | ||
4871 | 2256 | } | ||
4872 | 2257 | else | ||
4873 | 2258 | return false; | ||
4874 | 2259 | } | ||
4875 | 2260 | |||
4876 | 2261 | bool CRegexXQuery_char_range_ascii::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4877 | 2262 | { | ||
4878 | 2263 | if(!source[0]) | ||
4879 | 2264 | { | ||
4880 | 2265 | regex_intern->set_reachedEnd(source); | ||
4881 | 2266 | return false; | ||
4882 | 2267 | } | ||
4883 | 2268 | if((source[0] >= c1) && (source[0] <= c2)) | ||
4884 | 2269 | { | ||
4885 | 2270 | *matched_len = 1; | ||
4886 | 2271 | return true; | ||
4887 | 2272 | } | ||
4888 | 2273 | else | ||
4889 | 2274 | return false; | ||
4890 | 2275 | } | ||
4891 | 2276 | |||
4892 | 2277 | bool CRegexXQuery_char_range_ascii_i::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4893 | 2278 | { | ||
4894 | 2279 | if(!source[0]) | ||
4895 | 2280 | { | ||
4896 | 2281 | regex_intern->set_reachedEnd(source); | ||
4897 | 2282 | return false; | ||
4898 | 2283 | } | ||
4899 | 2284 | char sup = toupper(source[0]); | ||
4900 | 2285 | if((sup >= c1) && (sup <= c2)) | ||
4901 | 2286 | { | ||
4902 | 2287 | *matched_len = 1; | ||
4903 | 2288 | return true; | ||
4904 | 2289 | } | ||
4905 | 2290 | else | ||
4906 | 2291 | return false; | ||
4907 | 2292 | } | ||
4908 | 2293 | |||
4909 | 2294 | bool CRegexXQuery_char_unicode::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4910 | 2295 | { | ||
4911 | 2296 | if(!source[0]) | ||
4912 | 2297 | { | ||
4913 | 2298 | regex_intern->set_reachedEnd(source); | ||
4914 | 2299 | return false; | ||
4915 | 2300 | } | ||
4916 | 2301 | if(!memcmp(source, c, len)) | ||
4917 | 2302 | { | ||
4918 | 2303 | *matched_len = len; | ||
4919 | 2304 | return true; | ||
4920 | 2305 | } | ||
4921 | 2306 | else | ||
4922 | 2307 | return false; | ||
4923 | 2308 | } | ||
4924 | 2309 | |||
4925 | 2310 | bool CRegexXQuery_char_unicode_cp::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4926 | 2311 | { | ||
4927 | 2312 | if(!source[0]) | ||
4928 | 2313 | { | ||
4929 | 2314 | regex_intern->set_reachedEnd(source); | ||
4930 | 2315 | return false; | ||
4931 | 2316 | } | ||
4932 | 2317 | const char *temp_source = source; | ||
4933 | 2318 | unicode::code_point utf8c = utf8::next_char(temp_source); | ||
4934 | 2319 | if(utf8c == c) | ||
4935 | 2320 | { | ||
4936 | 2321 | *matched_len = temp_source - source; | ||
4937 | 2322 | return true; | ||
4938 | 2323 | } | ||
4939 | 2324 | else | ||
4940 | 2325 | return false; | ||
4941 | 2326 | } | ||
4942 | 2327 | |||
4943 | 2328 | bool CRegexXQuery_char_unicode_i::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4944 | 2329 | { | ||
4945 | 2330 | if(!source[0]) | ||
4946 | 2331 | { | ||
4947 | 2332 | regex_intern->set_reachedEnd(source); | ||
4948 | 2333 | return false; | ||
4949 | 2334 | } | ||
4950 | 2335 | const char *temp_source = source; | ||
4951 | 2336 | unicode::code_point sup = unicode::to_upper(utf8::next_char(temp_source)); | ||
4952 | 2337 | if(sup == c) | ||
4953 | 2338 | { | ||
4954 | 2339 | *matched_len = temp_source - source; | ||
4955 | 2340 | return true; | ||
4956 | 2341 | } | ||
4957 | 2342 | else | ||
4958 | 2343 | return false; | ||
4959 | 2344 | } | ||
4960 | 2345 | |||
4961 | 2346 | bool CRegexXQuery_char_range_unicode::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4962 | 2347 | { | ||
4963 | 2348 | if(!source[0]) | ||
4964 | 2349 | { | ||
4965 | 2350 | regex_intern->set_reachedEnd(source); | ||
4966 | 2351 | return false; | ||
4967 | 2352 | } | ||
4968 | 2353 | const char *temp_source = source; | ||
4969 | 2354 | unicode::code_point utf8c = utf8::next_char(temp_source); | ||
4970 | 2355 | if((utf8c >= c1) && (utf8c <= c2)) | ||
4971 | 2356 | { | ||
4972 | 2357 | *matched_len = temp_source - source; | ||
4973 | 2358 | return true; | ||
4974 | 2359 | } | ||
4975 | 2360 | else | ||
4976 | 2361 | return false; | ||
4977 | 2362 | } | ||
4978 | 2363 | |||
4979 | 2364 | bool CRegexXQuery_char_range_unicode_i::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4980 | 2365 | { | ||
4981 | 2366 | if(!source[0]) | ||
4982 | 2367 | { | ||
4983 | 2368 | regex_intern->set_reachedEnd(source); | ||
4984 | 2369 | return false; | ||
4985 | 2370 | } | ||
4986 | 2371 | const char *temp_source = source; | ||
4987 | 2372 | unicode::code_point sup = unicode::to_upper(utf8::next_char(temp_source)); | ||
4988 | 2373 | if((sup >= c1) && (sup <= c2)) | ||
4989 | 2374 | { | ||
4990 | 2375 | *matched_len = temp_source - source; | ||
4991 | 2376 | return true; | ||
4992 | 2377 | } | ||
4993 | 2378 | else | ||
4994 | 2379 | return false; | ||
4995 | 2380 | } | ||
4996 | 2381 | |||
4997 | 2382 | bool CRegexXQuery_endline::match_internal(const char *source, int *start_from_branch, int *matched_len) | ||
4998 | 2383 | { | ||
4999 | 2384 | *matched_len = 0; | ||
5000 | 2385 | if(!source[0]) |
Compiling with ZORBA_NO_ICU=ON fails on Linux:
[ 1%] Building CXX object src/CMakeFiles/ zorba_simplesto re.dir/ api/zorba_ string. cpp.o /zorba/ sandbox/ src/util/ regex.h: 501:0,
from /home/mbrantner /zorba/ sandbox/ src/api/ zorba_string. cpp:23: /zorba/ sandbox/ src/util/ regex_xquery. h:209:3: error: a class-key must be used when declaring a friend /zorba/ sandbox/ src/util/ regex_xquery. h:209:3: error: friend declaration does not name a class or function /zorba/ sandbox/ src/util/ regex_xquery. h:253:3: error: a class-key must be used when declaring a friend /zorba/ sandbox/ src/util/ regex_xquery. h:253:3: error: friend declaration does not name a class or function /zorba_ simplestore. dir/api/ zorba_string. cpp.o] Erro
In file included from /home/mbrantner
/home/mbrantner
/home/mbrantner
/home/mbrantner
/home/mbrantner
make[2]: *** [src/CMakeFiles