Merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
- feature-ft_module
- Merge into trunk
Status: | Superseded | ||||
---|---|---|---|---|---|
Proposed branch: | lp:~zorba-coders/zorba/feature-ft_module | ||||
Merge into: | lp:zorba | ||||
Diff against target: |
12463 lines (+7382/-1377) 241 files modified
ChangeLog (+3/-0) cmake_modules/FindICU.cmake (+2/-0) doc/zorba/ft_intro.dox (+8/-8) doc/zorba/ft_stemmer.dox (+25/-7) doc/zorba/ft_thesaurus.dox (+134/-88) doc/zorba/ft_tokenizer.dox (+160/-61) include/zorba/locale.h (+189/-15) include/zorba/pregenerated/diagnostic_list.h (+8/-0) include/zorba/stemmer.h (+23/-4) include/zorba/thesaurus.h (+36/-22) include/zorba/tokenizer.h (+136/-56) include/zorba/uri_resolvers.h (+4/-3) modules/com/zorba-xquery/www/modules/CMakeLists.txt (+7/-0) modules/com/zorba-xquery/www/modules/full-text.xq (+872/-0) modules/com/zorba-xquery/www/modules/full-text.xsd (+134/-0) modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp (+1/-1) modules/com/zorba-xquery/www/modules/pregenerated/errors.xq (+17/-0) modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq (+1/-3) scripts/zt-wn-get (+3/-3) src/api/CMakeLists.txt (+3/-2) src/api/staticcontextimpl.cpp (+2/-3) src/api/stemmer_wrappers.cpp (+21/-12) src/api/stemmer_wrappers.h (+2/-1) src/api/thesaurus.cpp (+5/-3) src/api/thesaurus_wrappers.cpp (+21/-0) src/api/thesaurus_wrappers.h (+12/-0) src/api/uri_resolver_wrappers.cpp (+15/-33) src/api/xmldatamanagerimpl.cpp (+1/-1) src/api/xmldatamanagerimpl.h (+1/-1) src/compiler/codegen/plan_visitor.cpp (+1/-1) src/compiler/expression/expr_put.cpp (+1/-0) src/compiler/translator/translator.cpp (+45/-10) src/context/CMakeLists.txt (+0/-5) src/context/default_url_resolvers.cpp (+19/-5) src/context/static_context.cpp (+12/-4) src/context/static_context.h (+3/-0) src/context/stemmer_wrappers.cpp (+0/-74) src/context/stemmer_wrappers.h (+0/-63) src/context/uri_resolver.cpp (+0/-13) src/context/uri_resolver.h (+4/-23) src/diagnostics/assert.cpp (+1/-1) src/diagnostics/assert.h (+5/-1) src/diagnostics/diagnostic_en.xml (+23/-3) src/diagnostics/pregenerated/diagnostic_list.cpp (+12/-0) src/diagnostics/pregenerated/dict_en.cpp (+12/-0) src/functions/CMakeLists.txt (+4/-0) src/functions/external_function.cpp (+1/-2) src/functions/external_function.h (+0/-2) src/functions/func_ft_module_impl.cpp (+128/-0) src/functions/func_ft_module_impl.h (+81/-0) src/functions/function.cpp (+3/-0) src/functions/function.h (+9/-1) src/functions/function_consts.h (+7/-0) src/functions/library.cpp (+8/-0) src/functions/pregenerated/func_ft_module.cpp (+490/-0) src/functions/pregenerated/func_ft_module.h (+225/-0) src/functions/pregenerated/function_enum.h (+19/-0) src/runtime/full_text/CMakeLists.txt (+3/-1) src/runtime/full_text/apply.cpp (+46/-25) src/runtime/full_text/ft_module_impl.cpp (+843/-0) src/runtime/full_text/ft_module_impl.h (+32/-0) src/runtime/full_text/ft_query_item.h (+2/-1) src/runtime/full_text/ft_single_token_iterator.h (+0/-2) src/runtime/full_text/ft_stop_words_set.cpp (+32/-27) src/runtime/full_text/ft_stop_words_set.h (+29/-14) src/runtime/full_text/ft_token_matcher.cpp (+7/-7) src/runtime/full_text/ft_token_matcher.h (+1/-1) src/runtime/full_text/ft_token_seq_iterator.cpp (+7/-3) src/runtime/full_text/ft_token_seq_iterator.h (+3/-0) src/runtime/full_text/ft_token_span.h (+2/-2) src/runtime/full_text/ft_util.cpp (+42/-0) src/runtime/full_text/ft_util.h (+12/-1) src/runtime/full_text/ftcontains_visitor.cpp (+0/-10) src/runtime/full_text/full_text.h (+1/-1) src/runtime/full_text/icu_tokenizer.cpp (+43/-14) src/runtime/full_text/icu_tokenizer.h (+7/-5) src/runtime/full_text/latin_tokenizer.cpp (+31/-13) src/runtime/full_text/latin_tokenizer.h (+7/-5) src/runtime/full_text/pregenerated/ft_module.cpp (+362/-0) src/runtime/full_text/pregenerated/ft_module.h (+561/-0) src/runtime/full_text/stemmer.cpp (+8/-2) src/runtime/full_text/stemmer.h (+24/-5) src/runtime/full_text/stemmer/sb_stemmer.cpp (+20/-13) src/runtime/full_text/stemmer/sb_stemmer.h (+1/-0) src/runtime/full_text/thesauri/wn_thesaurus.cpp (+79/-9) src/runtime/full_text/thesauri/wn_thesaurus.h (+29/-5) src/runtime/full_text/thesauri/xqftts_thesaurus.cpp (+28/-3) src/runtime/full_text/thesauri/xqftts_thesaurus.h (+28/-4) src/runtime/full_text/thesaurus.cpp (+34/-50) src/runtime/full_text/thesaurus.h (+30/-4) src/runtime/full_text/tokenizer.cpp (+86/-8) src/runtime/spec/codegen-cpp.xq (+13/-5) src/runtime/spec/codegen-h.xq (+1/-1) src/runtime/spec/full_text/ft_module.xml (+208/-0) src/runtime/spec/mappings.xml (+14/-2) src/runtime/visitors/pregenerated/planiter_visitor.h (+91/-0) src/runtime/visitors/pregenerated/printer_visitor.cpp (+196/-0) src/runtime/visitors/pregenerated/printer_visitor.h (+65/-0) src/store/naive/atomic_items.cpp (+10/-10) src/store/naive/atomic_items.h (+8/-21) src/store/naive/node_items.cpp (+39/-126) src/store/naive/node_items.h (+17/-65) src/unit_tests/stemmer.cpp (+11/-7) src/unit_tests/string.cpp (+16/-0) src/unit_tests/thesaurus.cpp (+52/-25) src/unit_tests/tokenizer.cpp (+47/-28) src/util/fs_util.h (+3/-0) src/util/unicode_util.cpp (+14/-0) src/util/unicode_util.h (+12/-0) src/util/uri_util.h (+6/-6) src/util/utf8_util.h (+2/-1) src/util/utf8_util.tcc (+17/-10) src/zorbatypes/ft_token.cpp (+1/-1) src/zorbatypes/ft_token.h (+1/-1) src/zorbatypes/numconversions.cpp (+18/-7) src/zorbautils/locale.cpp (+385/-6) src/zorbautils/locale.h (+375/-271) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-current-lang-true-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-da-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-de-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-en-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-es-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-fi-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-hu-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-it-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-nl-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-no-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-pt-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-ru-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-supported-false-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-supported-false-2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-sv-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-false-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-da-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-de-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-en-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-es-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-fi-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-fr-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-hu-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-it-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-nl-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-no-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-pt-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-sv-supported-true.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-3.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-4.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-3.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-4.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-strip-diacritics-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-3.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-4.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-5.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-2.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-3.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-4.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-string-1.xml.res (+1/-0) test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-string-2.xml.res (+1/-0) test/rbkt/Queries/CMakeLists.txt (+13/-4) test/rbkt/Queries/zorba/fulltext/ft-module-current-lang-true-1.xq (+5/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-da-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-de-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-en-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-es-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-fi-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-hu-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-it-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-nl-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-no-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-pt-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-ru-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-supported-false-1.xq (+4/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-supported-false-2.xq (+4/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-sv-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-false-1.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-da-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-de-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-en-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-es-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-fi-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-fr-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-hu-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-it-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-nl-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-no-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-pt-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-1.xq (+4/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-2.xq (+4/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-sv-supported-true.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-1.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-2.xq (+5/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-3.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-4.xq (+5/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-1.xq (+4/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-2.xq (+4/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-3.spec (+1/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-3.xq (+4/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-1.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-2.spec (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-2.xq (+6/-0) test/rbkt/Queries/zorba/fulltext/ft-module-stem-1.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-stem-2.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-stem-3.xq (+5/-0) test/rbkt/Queries/zorba/fulltext/ft-module-stem-4.xq (+5/-0) test/rbkt/Queries/zorba/fulltext/ft-module-strip-diacritics-1.xq (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-1.xq (+6/-0) test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-2.spec (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-2.xq (+6/-0) test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-3.spec (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-3.xq (+7/-0) test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-4.spec (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-4.xq (+7/-0) test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-5.spec (+3/-0) test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-5.xq (+8/-0) test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq (+18/-0) test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq (+18/-0) test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq (+10/-0) test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq (+10/-0) test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-1.xq (+8/-0) test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-2.xq (+10/-0) test/rbkt/Queries/zorba/fulltext/ft-thesaurus-FOCA0003-1.spec (+4/-0) test/rbkt/Queries/zorba/fulltext/ft-thesaurus-FOCA0003-1.xq (+10/-0) test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-1.spec (+0/-3) test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-2.spec (+0/-3) test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-3.spec (+1/-1) test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-4.spec (+1/-1) test/rbkt/Scripts/w3c/import_w3c_full_text_testsuite.sh (+1/-1) test/rbkt/testdriver.cpp (+1/-1) |
||||
To merge this branch: | bzr merge lp:~zorba-coders/zorba/feature-ft_module | ||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Markos Zaharioudakis | Pending | ||
Matthias Brantner | Pending | ||
Review via email: mp+103404@code.launchpad.net |
This proposal supersedes a proposal from 2012-04-25.
Commit message
1. Added a new full-text module.
2. Fixed semi-broken Thesaurus API.
3. Now supporting many more languages for tokenization including Chinese.
4. Many other full-text improvements.
Description of the change
1. Added a new full-text module.
2. Fixed semi-broken Thesaurus API.
3. Now supporting many more languages for tokenization including Chinese.
4. Many other full-text improvements.
Matthias Brantner (matthias-brantner) wrote : Posted in a previous version of this proposal | # |
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Paul J. Lucas (paul-lucas) wrote : Posted in a previous version of this proposal | # |
The Remote Queue seems to have gotten itself into an infinite loop. I've notified Chris. PLEASE DO NOT SET THE PROPOSAL TO "APPROVED" TO START THE REMOTE QUEUE UNTIL THIS IS FIXED.
@Matthias: The "theSerStream" change was do fix a crash in the module. No, I don't remember why it's fixed in this branch. I think the change is OK as it is because theSerStream is deleted via cleanUpBody() that's called from endBody(). However, I changed the code to set theSerStream to null in cleanUpBody() and put the delete back in the destructor. (Deleting a null pointer is guaranteed to be harmless in C++.)
Chris Hillery (ceejatec) wrote : Posted in a previous version of this proposal | # |
What seems to be happening with the queue is that the build/test is timing out after 40 minutes, which unfortunately doesn't log appropriately. I have increased the timeout to 60 minutes and we'll see if it runs. But before this gets merged, we should look and see if we can figure out why it's taking longer than normal to run with this build.
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Paul J. Lucas (paul-lucas) wrote : Posted in a previous version of this proposal | # |
On Apr 21, 2012, at 10:44 PM, Chris Hillery wrote:
> What seems to be happening with the queue is that the build/test is timing out after 40 minutes, which unfortunately doesn't log appropriately. I have increased the timeout to 60 minutes and we'll see if it runs. But before this gets merged, we should look and see if we can figure out why it's taking longer than normal to run with this build.
FYI: On my machine, it took 73 minutes. The trunk build took 63 minutes.
- Paul
Chris Hillery (ceejatec) wrote : Posted in a previous version of this proposal | # |
Other branches have been ok on the RQ machine with a 40-minute timeout. Why
does this branch take so much longer? If it added a great many slow tests
then that's probably ok, but if something is causing everything to go
slower than that's a problem. Maybe ask on zorba-dev if it would be
possible to run the pdash tests on this branch?
On Apr 22, 2012 3:31 PM, "Paul J. Lucas" <email address hidden> wrote:
> On Apr 21, 2012, at 10:44 PM, Chris Hillery wrote:
>
> > What seems to be happening with the queue is that the build/test is
> timing out after 40 minutes, which unfortunately doesn't log appropriately.
> I have increased the timeout to 60 minutes and we'll see if it runs. But
> before this gets merged, we should look and see if we can figure out why
> it's taking longer than normal to run with this build.
>
> FYI: On my machine, it took 73 minutes. The trunk build took 63 minutes.
>
> - Paul
>
>
> --
>
> https:/
> Your team Zorba Coders is subscribed to branch lp:zorba.
>
> --
> Mailing list: https:/
> Post to : <email address hidden>
> Unsubscribe : https:/
> More help : https:/
>
Paul J. Lucas (paul-lucas) wrote : Posted in a previous version of this proposal | # |
I diff'd the output times between the trunk and this branch and nothing jumps out at being significantly longer. (Some tests actually run in less time.) My latest running of the test suite on my machine took 69 minutes.
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
There are additional revisions which have not been approved in review. Please seek review and approval of these new revisions.
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
The attempt to merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba failed. Below is the output from the failed tests.
CMake Error at /home/ceej/
Validation queue job feature-
finished. The final status was:
No tests were run - build or configure step must have failed.
Not commiting changes.
Error in read script: /home/ceej/
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal | # |
There are additional revisions which have not been approved in review. Please seek review and approval of these new revisions.
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : | # |
The attempt to merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba failed. Below is the output from the failed tests.
CMake Error at /home/ceej/
Validation queue job feature-
finished. The final status was:
630 tests did not succeed - changes not commited.
Error in read script: /home/ceej/
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : | # |
The attempt to merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba failed. Below is the output from the failed tests.
CMake Error at /home/ceej/
Validation queue job feature-
finished. The final status was:
Undetermined, probably an error - please email <email address hidden> with the
number of this job!
Error in read script: /home/ceej/
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue starting for merge proposal.
Log at: http://
Zorba Build Bot (zorba-buildbot) wrote : | # |
The attempt to merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba failed. Below is the output from the failed tests.
CMake Error at /home/ceej/
Validation queue job feature-
finished. The final status was:
Undetermined, probably an error - please email <email address hidden> with the
number of this job!
Error in read script: /home/ceej/
- 10802. By Chris Hillery
-
Add zorba::
Item::getTypeCo de(), and make corresponding SchemaTypeCode enum part of the public API. Add zorba:: ItemFactory: :assignElementT ypedValue( ) to allow construction of elements with typed values in the public API. As discussed, this API is not the prettiest, but it is the least intrusive while being the easiest for end-users to make use of. Approved: Markos Zaharioudakis, Chris Hillery, Matthias Brantner - 10803. By Ghislain Fourny
-
Added an instruction to sort chained tests for deterministic order. Approved: Matthias Brantner, Markos Zaharioudakis
- 10804. By Paul J. Lucas
-
1. Added a new full-text module.
2. Fixed semi-broken Thesaurus API.
3. Now supporting many more languages for tokenization including Chinese.
4. Many other full-text improvements. Approved: Chris Hillery, Paul J. Lucas - 10805. By Nicolae Brinza
-
Fixes for bugs #931501 and #866987 -- improved error messages for fn:format-number() Approved: Nicolae Brinza, Matthias Brantner
- 10806. By Rodolfo Ochoa
-
Collection Manager and Document Manager is ready on XQJ API. Approved: Cezar Andrei, Matthias Brantner
- 10807. By Chris Hillery
-
Fix remote queue failures due to change in 28msec.com. Approved: Till Westmann, Chris Hillery
- 10808. By Cezar Andrei <email address hidden>
-
Fix for Bug #857842 Assertion failed with simple content element with comments
Make use of XML_SCHEMA_NS definition from ns_consts.h. Approved: Matthias Brantner, Cezar Andrei - 10809. By Sorin Marian Nasoi <email address hidden>
-
fix for lp:987830. Approved: Sorin Marian Nasoi, Markos Zaharioudakis
- 10810. By Sorin Marian Nasoi <email address hidden>
-
Added the STACK and QUEUE modules. Approved: Matthias Brantner, Sorin Marian Nasoi
- 10811. By Chris Hillery
-
Added note about re-running CMake if you add or remove .cpp files in your module source directory.
Approved: Juan Zacarias, Chris Hillery - 10812. By Nicolae Brinza
-
Added XQuery 3.0 Functions
path, has-children#0, nilled#0 Approved: Juan Zacarias, Matthias Brantner - 10813. By Chris Hillery
-
Massively refactor, reformat, correct, and clean up Zorba's Build doc. Approved: William Candillon, Chris Hillery
- 10814. By Markos Zaharioudakis
-
1. Fixed memory leak in case of index truncation
2. Fixed bug in mergeUpdates() method
Approved: Markos Zaharioudakis - 10815. By Nicolae Brinza
-
Small optimization in the serializer to avoid a repeated string comparison Approved: Nicolae Brinza, David Graf
- 10816. By Markos Zaharioudakis
-
Fixed bug in MarkNodeCopyProps rule (static cast to replace_expr without chaing the expr kind first) Approved: Markos Zaharioudakis
- 10817. By Markos Zaharioudakis
-
fixed bug #966706 (key uniqueness of value equality index not enforced) Approved: Markos Zaharioudakis, Till Westmann, Matthias Brantner
- 10818. By Markos Zaharioudakis
-
Fixed bug #862971 (no error upon duplicate function declarations) Approved: Markos Zaharioudakis
- 10819. By Paul J. Lucas
-
Fixed warnings and build error. Approved: David Graf, Paul J. Lucas
- 10820. By David Graf
-
fixing windows build Approved: Ghislain Fourny, David Graf
- 10821. By Till Westmann
-
enable blocking of internal modules by running through URI mapping (but not through URL resolution) during translation Approved: Matthias Brantner, Markos Zaharioudakis, Chris Hillery
- 10822. By Markos Zaharioudakis
-
fixed memory leak in population of value index Approved: Markos Zaharioudakis
- 10823. By Paul J. Lucas
-
Fixed typos. Approved: Matthias Brantner, Paul J. Lucas
- 10824. By Matthias Brantner
-
optimized ft:tokenize (no validation of tokens + factorized creation of qnames) Approved: Paul J. Lucas, Matthias Brantner
- 10825. By Chris Hillery
-
Removed note about JSONiq (not in 2.5 after all). Approved: Matthias Brantner, Chris Hillery
- 10826. By Markos Zaharioudakis
-
fixed bug 867170$ Approved: Markos Zaharioudakis
- 10827. By Ghislain Fourny
-
Fixed bug 978254 (QName comparison in item sequence chainer ignored namespaces and took prefixes into account). Approved: Markos Zaharioudakis, Matthias Brantner
- 10828. By Paul J. Lucas
-
Fixes the stop-words core dump on 64-bit Linux. Approved: Matthias Brantner, Paul J. Lucas
- 10829. By Paul J. Lucas
-
1. Added fix for [\s] -- should now always throw FORX0002.
2. I think all regex tests "pass" using both pre- and post-4.0 of ICU. Approved: Ghislain Fourny, Paul J. Lucas - 10830. By Matthias Brantner
-
no node copying during insertion into collection if the nodes are freshly constructed nodes Approved: Markos Zaharioudakis, Matthias Brantner
- 10831. By Matthias Brantner
-
- Add the ability to create a StreamableStrin
gItem that re-uses the stream from another Streamable*Item (in a memory- ownership- safe way). Fixed bug #996084 (crash in Streamable*Item with file module) - Add a base64:decode#2 function that also does transcoding to utf-8 Approved: Chris Hillery, Dennis Knochenwefel, William Candillon, Matthias Brantner
- 10832. By Paul J. Lucas
-
Added current-
compare- options( ) function. Approved: Matthias Brantner, Paul J. Lucas - 10833. By Matthias Brantner
-
note in the java documentation that it might be deprecated in favor of xqj Approved: Matthias Brantner, Rodolfo Ochoa
- 10834. By Paul J. Lucas
-
1. In transcoding streambufs, throwing std::invalid_
argument for empty charsets.
2. In the HTTP code, now setting the charset to ISO-8859-1 in the constructor so it's set even when there's no Content-Type header. Approved: Dennis Knochenwefel, Paul J. Lucas - 10835. By Paul J. Lucas
-
Documentation tweaks. Approved: Matthias Brantner, Paul J. Lucas
- 10836. By Nicolae Brinza
-
Documentation fixes. Approved: Matthias Brantner, Nicolae Brinza
- 10837. By Paul J. Lucas
-
Added link to full-text module.
Note that I have no way to test it. Approved: Matthias Brantner, Paul J. Lucas - 10838. By Paul J. Lucas
-
Added check to see that JsonML (array form) really starts with '['. Approved: William Candillon, Paul J. Lucas
- 10839. By Sorin Marian Nasoi <email address hidden>
-
Updated the import scripts to use the W3C testsuites from 04-May-2012. Approved: Chris Hillery, Sorin Marian Nasoi
- 10840. By Rodolfo Ochoa
-
Complete XQJ Documentation Approved: Matthias Brantner, Cezar Andrei
- 10841. By Rodolfo Ochoa
-
BaseURI can now be cleared through a method.
When BaseUri is undefined it returns an empty string instead of asserting.
Fixed some compilation warnings to have a cleaner compiling.
Added #define stdafx.h to some files to fix the precompiled headers on Windows. Approved: Matthias Brantner, Chris Hillery - 10842. By Matthias Brantner
-
use an instead of ann prefix in the documentation Approved: Matthias Brantner, Chris Hillery
- 10843. By Matthias Brantner
-
- no undo is done for collection truncate
- fix for bug #986377 "do not apply any updates on collection if it is to be truncated" Approved: Markos Zaharioudakis, Matthias Brantner - 10844. By Nicolae Brinza
-
Documentation improvements. Fixed the type of the options parameter to the parse-fragment() function. Approved: Nicolae Brinza, Matthias Brantner
- 10845. By Paul J. Lucas
-
Renamed Tokenizer::Numbers to Tokenizer::State now (just prior to the 2.5 release) to give it a better name for the forthcoming addition of the ability to tokenize using include/exclude Item lists. At that time, State will most likely be expanded to include additional state information beyond just numbers, hence the name change.
(In the previous proposal, I had forgotten to update the documentation -- now done.) Approved: Matthias Brantner, Paul J. Lucas
- 10846. By Ghislain Fourny
-
Allowing general tree IDs (containing hexadecimal digits and dash) in structural URIs (test). Approved: Markos Zaharioudakis, Matthias Brantner
- 10847. By Matthias Brantner
-
replaced occurrences of XQuery version 1.1 with 3.0 Approved: Chris Hillery, Matthias Brantner
- 10848. By Chris Hillery
-
Fix (from Matthias) to ensure StaticContext used for invoke() lives as long as the ItemSequence returned from said invoke().
Approved: Matthias Brantner, Chris Hillery - 10849. By Matthias Brantner
-
- fixed itemfactory unit test on Windows
- disabled string unit test on Windows (because of bug #867271) Approved: Chris Hillery, Matthias Brantner - 10850. By William Candillon
-
Remove dead links in the documentation. Approved: William Candillon, Matthias Brantner
- 10851. By Paul J. Lucas
-
Fixed the build error and also fixed several warnings that are new with Xcode 4.3.2. Approved: Matthias Brantner, Paul J. Lucas
- 10852. By Paul J. Lucas
-
Getting in another public API change for 2.5 for the full-text module since now's the time to do it. Renamed tokenize() to tokenize-node() for 2 reasons:
1. There already exists tokenize-string() and therefore tokenize-node() is a better name than just plain tokenize().
2. The forthcoming addition of the black & white tokenization function will most likely be called tokenize-nodes() -- plural. Approved: Matthias Brantner, Paul J. Lucas
- 10853. By William Candillon
-
Fix PHP build Approved: Paul J. Lucas, William Candillon
- 10854. By Sorin Marian Nasoi <email address hidden>
-
Update modules_svg generation target. Add mechanism for core build to detect whether a given non-core module exists (by URI). Use that method to check for graphviz before generating modules_svg. Approved: Sorin Marian Nasoi, Chris Hillery
- 10855. By Chris Hillery
-
Allow modules to specify their own libs for their Config file, in addition to the automatically-added libs from DECLARE_
ZORBA_MODULE( ).
Approved: Matthias Brantner, Chris Hillery - 10856. By Matthias Brantner
-
- fetch:content-
binary
- fetch:content#3 (with encoding parameter)
- StreamResource::isStreamSeekab le to make sure the streamable strings returned by fetch are seekable.
- fixed two warnings in nodes_impl.cpp
- extended the C++ api to be symmetric to the fetch module Approved: Till Westmann, Chris Hillery, Matthias Brantner - 10857. By Paul J. Lucas
-
Fixed a couple of warnings. Approved: Matthias Brantner, Paul J. Lucas
- 10858. By Matthias Brantner
-
fix for bug #1001463 (type not available during computation of function caching). Approved: Markos Zaharioudakis, Matthias Brantner
- 10859. By Matthias Brantner
-
Add support for function items test in the print xquery visitor. Approved: Matthias Brantner, William Candillon
- 10860. By Matthias Brantner
-
added stacks and queues to the data lifecycle documentation Approved: William Candillon, Matthias Brantner
- 10861. By Matthias Brantner
-
push-down of count(probe-
index() ) into the store Approved: Till Westmann, Matthias Brantner - 10862. By Chris Hillery
-
Set CMAKE_INSTALL_
RPATH_USE_ LINK_PATH to TRUE to have correct RPATHs in installed binaries. Add comment about INSTALL_NAME_DIR. Approved: Matthias Brantner, Chris Hillery - 10863. By Matthias Brantner
-
Update changelog, update module tags for Zorba 2.5. Approved: Matthias Brantner, Chris Hillery
- 10864. By Sorin Marian Nasoi <email address hidden>
-
Fixed bug lp:1001477. Approved: Rodolfo Ochoa, Sorin Marian Nasoi
- 10865. By Paul J. Lucas
-
No longer setting the charset of an overridden media type to a default value. Approved: David Graf, Matthias Brantner
- 10866. By Matthias Brantner
-
updated change log to reflect bug #1002867 Approved: Paul J. Lucas, Matthias Brantner
- 10867. By Matthias Brantner
-
added a cmake variable that allows to configure whether external jars are packaged or not (ZORBA_
PACKAGE_ EXTERNAL_ JARS)
Approved: Matthias Brantner, Chris Hillery - 10868. By Paul J. Lucas
-
s/ZORBA_ASSERT/if/ Approved: Matthias Brantner, Paul J. Lucas
- 10869. By Paul J. Lucas
-
1. Fixed return type of ft:thesaurus-
lookup( ).
2. Added a test to ensure that a look-up of a non-existant word works. Approved: Matthias Brantner, Paul J. Lucas - 10870. By Rodolfo Ochoa
-
- Install added for XQJ bindings and documentation
- more documentation for all SWIG generated language bindings
- other installer related cleanups Approved: Matthias Brantner, Chris Hillery, Juan Zacarias - 10871. By Markos Zaharioudakis
-
fixed bug #1006166 Approved: Markos Zaharioudakis
- 10872. By Markos Zaharioudakis
-
fixed bug 960083$ (improper error handling of NaN comparisons) + small optimization of comparison operations Approved: Markos Zaharioudakis
- 10873. By Carlos Manuel Lopez
-
Implements new group by syntax, as defined in the XQuery 3.0 Spec since September 2011 Approved: Markos Zaharioudakis
- 10874. By Markos Zaharioudakis
-
Fixed bug #1003023$ (optimizer problems due to common subexpression after var folding into if-then-else) Approved: Markos Zaharioudakis
- 10875. By Markos Zaharioudakis
-
fixed bug #854506 and partial fix for bug #867008 Approved: Markos Zaharioudakis
- 10876. By Markos Zaharioudakis
-
Improved hoist rule: tighter hoisting of expressions (also fixes bug #967428) Approved: Markos Zaharioudakis
- 10877. By Markos Zaharioudakis
-
Fixed bug #991088$ (raise XUST0001 in trycatch with mixed updating and simple clauses)
Approved: Markos Zaharioudakis - 10878. By Till Westmann
-
add location information to ZXQP0029_
URI_ACCESS_ DENIED Approved: Chris Hillery, Markos Zaharioudakis - 10879. By Till Westmann
-
remove unnecessary annotations and clean-up indentation Approved: Ghislain Fourny, Till Westmann
- 10880. By Markos Zaharioudakis
-
Merged the JSONiq branch into the zorba trunk, with JSONiq deactivated. Approved: Matthias Brantner, Ghislain Fourny, Markos Zaharioudakis
- 10881. By Markos Zaharioudakis
-
renamed file test/unit/
static_ context. cpp to avoid conflict with src/context/ static_ contectx. cpp during debugging Approved: Markos Zaharioudakis - 10882. By Markos Zaharioudakis
-
Fixed bug #1008082 (bug in transform expr when a copy var is not used anywhere) Approved: Markos Zaharioudakis
- 10883. By Markos Zaharioudakis
-
Fixed bug #932314 (non-comparable values must be treated as distinct by fn:distinct-values) Approved: Markos Zaharioudakis
- 10884. By Ghislain Fourny
-
The builtin schema type names, in the store, are now created directly from the pool (this is to prevent a cyclic dependency if another item factory than the simple item factory needs to access the store to produce the QNames, because at this point the store has not been initialized yet). Approved: Matthias Brantner, Markos Zaharioudakis
- 10885. By Markos Zaharioudakis
-
Plan serializer does not serialize expressions anymore. Approved: Markos Zaharioudakis
- 10886. By Markos Zaharioudakis
-
added serialize_csize function Approved: Markos Zaharioudakis
- 10887. By Rodolfo Ochoa
-
Error fixed on windows Approved: Chris Hillery, Cezar Andrei
- 10888. By Till Westmann
-
Added support for transient maps in the unordered-maps module. Approved: Matthias Brantner, Till Westmann
- 10889. By Nicolae Brinza
-
Improved parser error messages (bug #867357). Approved: David Graf, Matthias Brantner
- 10890. By Ghislain Fourny
-
Fixes csize serialization. Approved: Markos Zaharioudakis, Ghislain Fourny
- 10891. By Ghislain Fourny
-
Adding asserts in OrdPath:
:getLocalBitLen gth to prevent endless loops and possibly reproduce such a potential endless loop with more information. Approved: David Graf, Markos Zaharioudakis - 10892. By Dennis Knochenwefel
-
Cleaning up attribute and element nodes' type handling and checking invariants, following an inconsistency discovered on Windows. Approved: Markos Zaharioudakis, Matthias Brantner, Dennis Knochenwefel
- 10893. By Nicolae Brinza
-
Fixed and enabled fn:parse-
xml-fragment( ). Approved: Juan Zacarias, Matthias Brantner - 10894. By Paul J. Lucas
-
Added base64::streambuf class and replaced horribly inefficient base64 code. Approved: Matthias Brantner, Paul J. Lucas
- 10895. By Rodolfo Ochoa
-
Adding cacert.pem for Windows Approved: Cezar Andrei, Chris Hillery
- 10896. By Markos Zaharioudakis
-
A better fix for compilation problems involving integer types. Approved: Markos Zaharioudakis
- 10897. By Paul J. Lucas
-
Fixed warnings. Approved: Matthias Brantner, Paul J. Lucas
- 10898. By Markos Zaharioudakis
-
Optimized hash sets used by fn:distinct-values and nodes-distinct Approved: Markos Zaharioudakis
- 10899. By Till Westmann
-
modify fn:path to add support for fragments Approved: Juan Zacarias, Matthias Brantner, Till Westmann
- 10900. By Paul J. Lucas
-
Added C++98 subset of C++11's unordered_map. The configure process checks for a working C++11 unordered_map first: if found, it will be used instead. The goal is to phase out the old hash*map* classes over time. Approved: Markos Zaharioudakis, Matthias Brantner
- 10901. By Till Westmann
-
add ref:has-
node-reference and ref:assign- node-reference Approved: Markos Zaharioudakis, Matthias Brantner - 10902. By Paul J. Lucas
-
Added the missing specialization for hash<unique_
ptr<T,D> >. Approved: Matthias Brantner, Paul J. Lucas - 10903. By Matthias Brantner
-
- added API function Item:isSeekable
- fixed a problem in the file module where all exceptions being throw in the body were caught and rethrown as file exception Approved: Chris Hillery, Matthias Brantner - 10904. By David Graf
-
add positional skipping to access of static and dynamic collections Approved: Matthias Brantner, Till Westmann
- 10905. By Markos Zaharioudakis
-
Fixed bug #1018673 + renamed misleading isXXXNode() methods. Approved: Markos Zaharioudakis
- 10906. By Markos Zaharioudakis
-
Optimized hash function used for nodes (fixes bug #1010051) + some hashmap/hashset cleanup Approved: Markos Zaharioudakis
- 10907. By Markos Zaharioudakis
-
Fixed bug #1016429 (scoping of copy variables in transform expr) Approved: Markos Zaharioudakis
- 10908. By Paul J. Lucas
-
Added tokenize-nodes() function. Approved: Matthias Brantner, Paul J. Lucas
- 10909. By David Graf
-
windows build fix Approved: Dennis Knochenwefel, David Graf
- 10910. By Ghislain Fourny
-
Differentiating between two reasons why references are unregistered. Approved: Markos Zaharioudakis, Matthias Brantner
- 10911. By Ghislain Fourny
-
Fixed a bug preventing from compiling Zorba single-threadedly. Approved: Markos Zaharioudakis, Ghislain Fourny
- 10912. By Dennis Knochenwefel
-
fix for bug #1020953 (access of freed object) Approved: Ghislain Fourny, Nicolae Brinza
- 10913. By Ghislain Fourny
-
Adding a NOT ZORBA_HAVE_
UNIQUE_ PTR guard for adding the unique_ptr test, for consistency. There are guards for including the corresponding resources, so that the test was failing by lack of them.
Approved: Markos Zaharioudakis, Till Westmann - 10914. By Dennis Knochenwefel
-
fixed memory leak reading data of zstring after destruction. Encoded illegal char in exception to not cause any problems. Approved: Till Westmann, Ghislain Fourny
- 10915. By Dennis Knochenwefel
-
fixed CHECK_CXX_
SOURCE_ COMPILES doesn't work with backslash-n using cmake 2.8.8 on windows. Approved: Paul J. Lucas, Dennis Knochenwefel - 10916. By Paul J. Lucas
-
Now checking data size before accessing vector[0]. Approved: Dennis Knochenwefel, Paul J. Lucas
- 10917. By David Graf
-
Unifying .bat and .vcproj generation. Bug #1013075 Approved: Rodolfo Ochoa, Dennis Knochenwefel
- 10918. By Chris Hillery
-
EXPECTED_FAILURE()s for two tests failing due to flworfound.org changes. Approved: Matthias Brantner, Chris Hillery
- 10919. By Paul J. Lucas
-
1. Added missing "lang" attribute to tokens generated from tokenize-nodes().
2. Added a test. Approved: Chris Hillery, Paul J. Lucas - 10920. By Paul J. Lucas
-
Moved URI resolution for the thesaurus into the translator. Approved: Matthias Brantner, Paul J. Lucas
- 10921. By Paul J. Lucas
-
Fixed warnings. Approved: Matthias Brantner, Paul J. Lucas
- 10922. By Markos Zaharioudakis
-
Fixed bug #1022557 (subsequence function applied on window variable) Approved: Markos Zaharioudakis
- 10923. By Ghislain Fourny
-
Several modifications in the implementation of JSONiq to make it in sync with the new specification, in particular:
- No more pairs,
- Minor changes to the update syntax
- Synced returned error codes
- Support for collections and indices, including automatic maintenance
- Serializer extended to allow mixed JDM and XDM
- Updated C++ API Approved: Matthias Brantner, Ghislain Fourny, Chris Hillery, Markos Zaharioudakis - 10924. By Markos Zaharioudakis
-
added auditing in zorba cmd + fixed bug in auditing the parse time Approved: Markos Zaharioudakis
- 10925. By Markos Zaharioudakis
-
Fixed bug #1002993 (bug during revalidation after update due to improper condition for calling TypeOps:
:get_atomic_ type_code( ) from SchemaValidator Impl::isPossibl eSimpleContentR evalImpl( )) Approved: Cezar Andrei, Markos Zaharioudakis - 10926. By Ghislain Fourny
-
Adding two tests for allowing two errors upon sequence of several JSON items and an XML node. Approved: Till Westmann, Matthias Brantner
- 10927. By Markos Zaharioudakis
-
Avoid (if possible) treat expr for checking that the value of a non-external global variable conforms to the type declaration of the vatiable Approved: Markos Zaharioudakis
- 10928. By Nicolae Brinza
-
Implemented the new EQName syntax. Approved: Matthias Brantner, Nicolae Brinza
- 10929. By Paul J. Lucas
-
Reverted previous "fix." Approved: Matthias Brantner, Paul J. Lucas
- 10930. By Markos Zaharioudakis
-
Streaming execution for tumbling windows (also fixes bug #1010051) Approved: Markos Zaharioudakis
- 10931. By Ghislain Fourny
-
Fixed build errors occurring on Windows by exposing structured item types even in JSONiq is deactivated. Approved: Matthias Brantner, Markos Zaharioudakis
- 10932. By Markos Zaharioudakis
-
Incremental maintenance for general indexes. Approved: Markos Zaharioudakis
- 10933. By Paul J. Lucas
-
Fixed typo. Approved: Chris Hillery, Paul J. Lucas
- 10934. By Sorin Marian Nasoi <email address hidden>
-
Updated the F&O 1.1 and 3.0 documents used in the comparison of the functions and corrected the util:download-
and-write- spec function. Approved: Chris Hillery, Sorin Marian Nasoi - 10935. By Chris Hillery
-
Add test case demonstrating bug 1010728. Approved: Markos Zaharioudakis, Chris Hillery
- 10936. By David Graf
-
Removed internal debug info from fn:trace out by using zorba serializer instead of the internal show function. Approved: Chris Hillery, David Graf
- 10937. By Dennis Knochenwefel
-
Added case for DT_UNKNOWN. Approved: Dennis Knochenwefel, Paul J. Lucas
- 10938. By Rodolfo Ochoa
-
Fix for precompiled headers on Windows Approved: Rodolfo Ochoa, Chris Hillery
- 10939. By Cezar Andrei <email address hidden>
-
Integrate:
Fix for bug 1023120 xs:include ignored in schemas.
Added a new test for this case. Approved: Chris Hillery, David Graf - 10940. By Till Westmann
-
fix position for elements and processing-
instructions in fn:path Approved: Markos Zaharioudakis, Matthias Brantner - 10941. By Nicolae Brinza
-
Added support for the unix shebang script launcher (#!/path/
interpreter) Approved: Nicolae Brinza, David Graf - 10942. By Paul J. Lucas
-
Now using enable_if for more functions to make overload resolution work better. Approved: Matthias Brantner, Paul J. Lucas
- 10943. By Rodolfo Ochoa
-
Disabling "print stack trace" for windows Approved: Chris Hillery, Rodolfo Ochoa
- 10944. By Paul J. Lucas
-
Now handling UTF-16 surrogate pairs. Approved: Dennis Knochenwefel, Paul J. Lucas
- 10945. By Nicolae Brinza
-
The parse-fragment function now allows a DOCTYPE declaration in the input. Approved: Nicolae Brinza, Matthias Brantner
- 10946. By Paul J. Lucas
-
Added functions to test for and create UTF-16 surrogate pairs.
These will probably be needed by whoever fixes bug #1025622. Approved: Dennis Knochenwefel, Paul J. Lucas - 10947. By Ghislain Fourny
-
Fix that gives precedence to an array replacement over an array deletion, and adds tests about multiple updates with same selector on arrays and on objects. Approved: Markos Zaharioudakis, Matthias Brantner
- 10948. By Paul J. Lucas
-
Now doing proper JSON serialization. Approved: Chris Hillery, Dennis Knochenwefel, Paul J. Lucas
- 10949. By Paul J. Lucas
-
No longer checking captured subgroups in replacement string when 'q' flag is given. Approved: Matthias Brantner, Paul J. Lucas
- 10950. By Matthias Brantner
-
removed a non-core module dependency from the reuse-stream test Approved: Chris Hillery, Matthias Brantner
- 10951. By Markos Zaharioudakis
-
removed some debugging code, which causes Windows compilation problems Approved: Markos Zaharioudakis
- 10952. By Paul J. Lucas
-
Removed zorbatypes/
transcoder. h & .cpp. Approved: Matthias Brantner, Paul J. Lucas - 10953. By Paul J. Lucas
-
Now properly serializing JSON for JsonML. Approved: Chris Hillery, Paul J. Lucas
- 10954. By Paul J. Lucas
-
1. Tweaked equals().
2. Added "const&" to std_string function arguments. Approved: Matthias Brantner, Paul J. Lucas - 10955. By Matthias Brantner
-
fix for bug #898066 (Stringstream & fn:trace) Approved: Chris Hillery, Matthias Brantner
- 10956. By Sorin Marian Nasoi <email address hidden>
-
- added information about the Zorba and XQTS versions that were used in generating the reports
- updated the README.txt with info regarding conformance reports generation Approved: William Candillon, Sorin Marian Nasoi - 10957. By Ghislain Fourny
-
Activating ZORBA_WITH_JSON by default. Approved: Chris Hillery, Matthias Brantner
- 10958. By Rodolfo Ochoa
-
Documentation fixed for Zorba binary package with PHP. Approved: Cezar Andrei, Chris Hillery
- 10959. By Paul J. Lucas
-
s/0/npos/
This probably fixes some as-of-yet-
undiscovered bug. Approved: Matthias Brantner, Paul J. Lucas - 10960. By Rodolfo Ochoa
-
C# API Binding Approved: Cezar Andrei, Chris Hillery
- 10961. By Ghislain Fourny
-
Corrects bug 1029836. Approved: Chris Hillery, Matthias Brantner
- 10962. By Ghislain Fourny
-
Fixing Windows build with JSONiq. Approved: Chris Hillery, Matthias Brantner
- 10963. By Paul J. Lucas
-
Removed JsonML-object. Approved: Ghislain Fourny, Paul J. Lucas
- 10964. By Matthias Brantner
-
new ItemFactory function that allows creating dateTime items without timezone Approved: Chris Hillery, Matthias Brantner
- 10965. By Ghislain Fourny
-
Fixes bug 1032166 (critical memory bug). Approved: Chris Hillery, Matthias Brantner
- 10966. By Paul J. Lucas
-
1. s/take/swap/
2. Removed unnecessary assignment. Approved: Matthias Brantner, Paul J. Lucas - 10967. By Juan Zacarias
-
Fix of bugs
Bug #1014979: Make thesaurus optional component on Windows
Bug #1014981: Make default thesaurus available as Ubuntu package
Also Fixed FindJNI.cmake to be found-able in the Ubuntu Installer Approved: Juan Zacarias, Chris Hillery - 10968. By David Graf
-
Make testdriver_mt work with boost version >1.49. Approved: David Graf, Chris Hillery
- 10969. By Matthias Brantner
-
Added a keys() function to the index dml module. This fixes bug #900677. Approved: Matthias Brantner, David Graf
- 10970. By Chris Hillery
-
Test temporarily disabled while http-client is being updated. Also get HEAD of OAuth module rather than zorba-2.5 tag for some reason. Approved: Juan Zacarias, Chris Hillery
- 10971. By Chris Hillery
-
Split image into image + graphviz; split data-converters into csv + html. Approved: Juan Zacarias, Chris Hillery
- 10972. By William Candillon
-
Fix make doc target when multiple version of the same module exists. Approved: Sorin Marian Nasoi, Matthias Brantner
- 10973. By Ghislain Fourny
-
Correct array insert expression syntax to include [] like append expressions. Approved: Matthias Brantner, Chris Hillery
- 10974. By Juan Zacarias
-
Fixed windows installer component options for c# Bindings Approved: Rodolfo Ochoa, Chris Hillery
- 10975. By Matthias Brantner
-
removed some hardcoded english words from error messages raised by the json parser Approved: Paul J. Lucas, Matthias Brantner
- 10976. By Chris Hillery
-
Regenerate scanner and parser with flex/bison 2.5 - apparently 2.6 creates code that Clang doesn't like. Approved: Matthias Brantner, Chris Hillery
- 10977. By Matthias Brantner
-
prepare ChangeLog for 2.6 release Approved: Chris Hillery, Matthias Brantner
- 10978. By Chris Hillery
-
Bump version number to 2.6; update tagged modules; add archive module. Approved: Cezar Andrei, Sorin Marian Nasoi, Chris Hillery
- 10979. By David Graf
-
Since OSX Mountain Lion, clang is the default c++ compiler on mac. Therefore, it is not enough to check if the c++ compiler executable is called clang. Approved: David Graf, Paul J. Lucas
- 10980. By Chris Hillery
-
Add "generation" of local copy of FlexLexer.h, to ensure it always stays in sync with the generated lexer .cpp file. Approved: Paul J. Lucas, Chris Hillery
- 10981. By Sorin Marian Nasoi <email address hidden>
-
Fixed the XQDoc documentation issue related to the formatting of the parameters for the higher order functions. Approved: William Candillon, Sorin Marian Nasoi
- 10982. By Chris Hillery
-
Try to find FlexLexer.h associated with the flex binary being used.
Approved: David Graf, Chris Hillery - 10983. By Markos Zaharioudakis
-
Fixed bug #1033407 (do not store var_expr rchandles in the static context) Approved: Markos Zaharioudakis
- 10984. By Markos Zaharioudakis
-
Fixed bugs #899364 and 899363 (throw XQST0103 in case of non-distinct window variables)
Fixed bug #899366 (enforce the type declaration of a window variable) Approved: Markos Zaharioudakis - 10985. By Markos Zaharioudakis
-
Fixed bug #1024892 (index declaration references udf declared after the index) Approved: Markos Zaharioudakis
- 10986. By David Graf
-
remove clang warnings in auditing code Approved: Matthias Brantner, Till Westmann, David Graf
- 10987. By Chris Hillery
-
added the archive module to ExternalModules
.conf and the ChangeLog Approved: Luis Rodriguez Gonzalez, Juan Zacarias, Chris Hillery, Matthias Brantner - 10988. By David Graf
-
Because the jsoniq_emitter aggregates the xml_emitter, it needs to handover all the parameters. Inclusive aEmitAttributes. Approved: Till Westmann, David Graf
- 10989. By luisrod <luisrod@LUISROD-LAP>
-
- Added code for bug#1025564 "Deprecate -f argument to zorbacmd"
- 10990. By Markos Zaharioudakis
-
Allow prolog variables to be referenced before they are declared (XQuery 3.0 feature) (fixes bug #900688) Approved: Markos Zaharioudakis
- 10991. By Chris Hillery
-
Revert unintentional commit r10989. Approved: Luis Rodriguez Gonzalez, Chris Hillery
- 10992. By Ghislain Fourny
-
Updated JSONiq tutorial. Approved: William Candillon, Matthias Brantner
- 10993. By Paul J. Lucas
-
Miscellaneous changes, some a prerequisite for LLVM that should be done anyway and not have to wait for the far-in-the-future LLVM branch merge. Approved: Matthias Brantner, Paul J. Lucas
- 10994. By Ghislain Fourny
-
Specifying collection and property upon ZDST0006. Approved: Till Westmann, Matthias Brantner
- 10995. By Paul J. Lucas
-
QueryLoc clean-up:
1. Added all-argument constructor (needed for LLVM).
2. Removed pointless copy constructor since default is fine.
3. Removed pointless virtual destructor (there are no virtual functions!). Approved: Matthias Brantner, Paul J. Lucas - 10996. By Nicolae Brinza
-
Dynamically computed strings can now be cast to xs:QName. Fixes bug #898792 Approved: Nicolae Brinza, Chris Hillery
- 10997. By Till Westmann
-
move appending of ${requiredlibs-
store} to requiredlibs to a place where it works Approved: Ghislain Fourny, Till Westmann - 10998. By Ghislain Fourny
-
Fixes a bug that makes Zorba crash upon inserting more than one pair. Approved: Till Westmann, Matthias Brantner
- 10999. By Ghislain Fourny
-
Adds a method isEncoded to user-typed atomic items. Approved: Till Westmann, Matthias Brantner
- 11000. By Sorin Marian Nasoi <email address hidden>
-
fix for lp:969251. Approved: Ghislain Fourny, Sorin Marian Nasoi
- 11001. By Ghislain Fourny
-
Made URI computation lazy in StructuralAnyUri. Approved: Markos Zaharioudakis, Matthias Brantner
- 11002. By Ghislain Fourny
-
Fixes a Windows compiler error (bug 1040558). Approved: Luis Rodriguez Gonzalez, Juan Zacarias
- 11003. By William Candillon
-
Enable XML output from doxygen by default. Approved: Chris Hillery, Matthias Brantner
- 11004. By Markos Zaharioudakis
-
Fixed bug #1038410 (Memory leaks in parser, trace iterator, and general index) Approved: Markos Zaharioudakis
- 11005. By Markos Zaharioudakis
-
Fixed bug #1042840 (qname pool free-list corruption) Approved: Markos Zaharioudakis
- 11006. By Chris Hillery
-
Restoring execute bit to a bunch of scripts. Approved: Juan Zacarias, Matthias Brantner, Chris Hillery
- 11007. By Carlos Manuel Lopez
-
New memory management for compiler expressions (fixes bug #1036111) Approved: Markos Zaharioudakis
- 11008. By Ghislain Fourny
-
Simplified JSON items class and fixed some Xml Node static casts to handle JSON items as well. Approved: Markos Zaharioudakis, Matthias Brantner
- 11009. By Markos Zaharioudakis
-
Fixed bug #866984 (better error message for an eval error) Approved: Markos Zaharioudakis
- 11010. By Ghislain Fourny
-
Fixing a memory leak in append update primitive. Approved: Matthias Brantner, Till Westmann
- 11011. By William Candillon
-
Remove deprecated reference to the old sourceforge mailing-list. Approved: Matthias Brantner, William Candillon
- 11012. By Nicolae Brinza
-
Fixes for bugs #1023170, #1024033, #1027270 Approved: Chris Hillery, Matthias Brantner
- 11013. By Markos Zaharioudakis
-
rchandle cleanup Approved: Markos Zaharioudakis
- 11014. By Paul J. Lucas
-
Replaced UUID with thin layer over native platform implementation. Approved: Chris Hillery, Rodolfo Ochoa, Matthias Brantner, Paul J. Lucas
- 11015. By Chris Hillery
-
Corrected HTML serialization of empty elements. Added test cases for XHTML.
Approved: Matthias Brantner, Chris Hillery - 11016. By Chris Hillery
-
Fixes debug mode crash because of missing dictionary entries. Approved: William Candillon, Chris Hillery
- 11017. By Ghislain Fourny
-
Fixed bug 1041411 (prefixed true/false/null should be interpreted as name tests). Approved: Matthias Brantner, Chris Hillery
- 11018. By Ghislain Fourny
-
Removing superfluous store/naive prefixes in store includes. Approved: Till Westmann, Matthias Brantner
- 11019. By Paul J. Lucas
-
Suppressed warnings; moved gcc diagnostic push macros to config.h. Approved: Matthias Brantner, Paul J. Lucas
- 11020. By Till Westmann
-
add dependency on libuuid
- 11021. By Till Westmann
-
ensure deterministic test results for keys of index on unordered collection by sorting
- 11022. By Till Westmann
-
Some fixes in TreeID API and some include cleanup.
- 11023. By Matthias Brantner
-
Adding missing JSONiq library functions.
- 11024. By Matthias Brantner
-
Updated JSONiq tutorial.
- 11025. By Matthias Brantner
-
implementation of parse-json#2 allowing multiple top-level items
- 11026. By Matthias Brantner
-
more tests for jn:parse-json
- 11027. By Matthias Brantner
-
adapted changelog regarding jn:parse-json
Unmerged revisions
Preview Diff
1 | === modified file 'ChangeLog' |
2 | --- ChangeLog 2012-04-25 17:16:48 +0000 |
3 | +++ ChangeLog 2012-04-26 16:53:22 +0000 |
4 | @@ -10,6 +10,7 @@ |
5 | * fn:unparsed-text-available |
6 | * Extended API for Python, Java, PHP and Ruby. |
7 | * Add jvm classpath to zorbacmd and to Zorba API. Tracked by #931816 |
8 | + * Added full-text module. |
9 | * Added support for NO_ICU (to not use ICU for unicode processing) |
10 | * Added XQJ support. |
11 | |
12 | @@ -89,6 +90,8 @@ |
13 | * Fixed bug 867509 (Can not handle largest xs:unsignedLong values) |
14 | * Fixed bug 924063 (sentence is incorrectly incremented when token characters end without sentence terminator) |
15 | * Fixed bug 909126 (bug in cloning of var_expr) |
16 | + * Fixed bug 928631 (external builtin function were not executed in the module they |
17 | + were declared) |
18 | * Fixed bug in destruction of exit_catcher_expr |
19 | * Fixed bug #867024 (error messages) |
20 | * Fixed bug #957580 (stream read failure in StringToCodepointsIteartor) |
21 | |
22 | === modified file 'cmake_modules/FindICU.cmake' |
23 | --- cmake_modules/FindICU.cmake 2012-04-24 14:35:54 +0000 |
24 | +++ cmake_modules/FindICU.cmake 2012-04-26 16:53:22 +0000 |
25 | @@ -28,6 +28,8 @@ |
26 | # (note: in addition to ICU_LIBRARIES) |
27 | # ICU_DATA_LIBRARIES - Libraries to link against for ICU data |
28 | # |
29 | +# ICU_VERSION - ICU's version number. |
30 | +# |
31 | |
32 | # Look for the header file. |
33 | find_path( |
34 | |
35 | === modified file 'doc/zorba/ft_intro.dox' |
36 | --- doc/zorba/ft_intro.dox 2012-04-24 12:39:38 +0000 |
37 | +++ doc/zorba/ft_intro.dox 2012-04-26 16:53:22 +0000 |
38 | @@ -5,9 +5,9 @@ |
39 | specification. |
40 | Additional documentation: |
41 | |
42 | - - \ref ft_stemmer |
43 | - - \ref ft_thesaurus |
44 | - - \ref ft_tokenizer |
45 | +- \ref ft_stemmer |
46 | +- \ref ft_thesaurus |
47 | +- \ref ft_tokenizer |
48 | |
49 | \section ft_unimplemented Unimplemented Features |
50 | |
51 | @@ -16,11 +16,11 @@ |
52 | implemented. |
53 | The features that are not (completely) implemented are: |
54 | |
55 | - - The <a href="http://www.w3.org/TR/xpath-full-text-10/#ftignoreoption">Ignore Option</a> |
56 | - (bug <a href="https://bugs.launchpad.net/zorba/+bug/sf-3187470">3187470</a>). |
57 | - - <a href="http://www.w3.org/TR/xpath-full-text-10/#section-score-variables">Score Variables</a> |
58 | - and <a href="http://www.w3.org/TR/xpath-full-text-10/#section-using-weights">Using Weights Within a Scored FTContainsExpr</a> |
59 | - (bug <a href="https://bugs.launchpad.net/zorba/+bug/sf-3187462">3187462</a>). |
60 | +- The <a href="http://www.w3.org/TR/xpath-full-text-10/#ftignoreoption">Ignore Option</a> |
61 | + (bug <a href="https://bugs.launchpad.net/zorba/+bug/866924">866924</a>). |
62 | +- <a href="http://www.w3.org/TR/xpath-full-text-10/#section-score-variables">Score Variables</a> |
63 | + and <a href="http://www.w3.org/TR/xpath-full-text-10/#section-using-weights">Using Weights Within a Scored FTContainsExpr</a> |
64 | + (bug <a href="https://bugs.launchpad.net/zorba/+bug/866923">866923</a>). |
65 | |
66 | */ |
67 | /* vim:set et sw=2 ts=2: */ |
68 | |
69 | === modified file 'doc/zorba/ft_stemmer.dox' |
70 | --- doc/zorba/ft_stemmer.dox 2012-04-24 12:39:38 +0000 |
71 | +++ doc/zorba/ft_stemmer.dox 2012-04-26 16:53:22 +0000 |
72 | @@ -56,7 +56,12 @@ |
73 | public: |
74 | typedef /* implementation-defined */ ptr; |
75 | |
76 | + struct Properties { |
77 | + char const *uri; |
78 | + }; |
79 | + |
80 | virtual void destroy() const = 0; |
81 | + virtual void properties( Properties *result ) const = 0; |
82 | virtual void stem( String const &word, locale::iso639_1::type lang, String *result ) const = 0; |
83 | protected: |
84 | virtual ~Stemmer(); |
85 | @@ -89,6 +94,8 @@ |
86 | Note that \c result should always be set to something. |
87 | If your stemmer doesn't know how to stem the given word, |
88 | you should set \c result to \c word. |
89 | +You also need to implement the \c properties() function |
90 | +and set the identifying URI of your stemmer. |
91 | |
92 | A very simple stemmer |
93 | that stems the word "foobar" to "foo" |
94 | @@ -98,6 +105,7 @@ |
95 | class MyStemmer : public Stemmer { |
96 | public: |
97 | void destroy() const; |
98 | + void properties( Properties *result ) const; |
99 | void stem( String const &word, locale::iso639_1::type lang, String *result ) const; |
100 | private: |
101 | MyStemmer(); |
102 | @@ -108,6 +116,10 @@ |
103 | // Do nothing since we statically allocate a singleton instance of our stemmer. |
104 | } |
105 | |
106 | +void MyStemmer::properties( Properties *props ) const { |
107 | + props->uri = "http://my.example.com/zorba/full-text/stemmer"; |
108 | +} |
109 | + |
110 | void MyStemmer::stem( String const &word, locale::iso639_1::type lang, String *result ) const { |
111 | if ( word == "foobar" ) |
112 | *result = "foo"; |
113 | @@ -120,7 +132,6 @@ |
114 | or a dictionary look-up |
115 | to stem many words, |
116 | of course. |
117 | - |
118 | Although not used in this simple example, |
119 | \c lang can be used to allow a single stemmer instance |
120 | to stem words in more than one language. |
121 | @@ -135,16 +146,24 @@ |
122 | class StemmerProvider { |
123 | public: |
124 | virtual ~StemmerProvider(); |
125 | - virtual Stemmer::ptr getStemmer( locale::iso639_1::type lang ) const = 0; |
126 | + virtual bool getStemmer( locale::iso639_1::type lang, Stemmer::ptr *s = 0 ) const = 0; |
127 | }; |
128 | \endcode |
129 | |
130 | +The \c getStemmer() function should return \c true |
131 | +only if it can provide a \c Stemmer |
132 | +for the given language; \c false otherwise. |
133 | +If the \c Stemmer::ptr argument is \c null, |
134 | +the caller wants to check only whether the provider |
135 | +can provide a stemmer for the given language |
136 | +and doesn't want a \c Stemmer instance created or returned. |
137 | + |
138 | A simple \c StemmerProvider for our simple stemmer can be implemented as: |
139 | |
140 | \code |
141 | class MyStemmerProvider : public StemmerProvider { |
142 | public: |
143 | - Stemmer::ptr getStemmer( locale::iso639_1::type lang ) const; |
144 | + bool getStemmer( locale::iso639_1::type lang Stemmer::ptr *s = 0 ) const; |
145 | }; |
146 | |
147 | Stemmer::ptr MyStemmerProvider::getStemmer( locale::iso639_1::type lang ) const { |
148 | @@ -154,15 +173,14 @@ |
149 | case iso639_1::en: |
150 | case iso639_1::unknown: // Handle "unknown" language since, in many cases, the language is not known. |
151 | result.reset( &stemmer ); |
152 | - break; |
153 | + return true; |
154 | default: |
155 | // |
156 | - // We have no stemmer for the given language: leave the result as null to indicate this. |
157 | + // We have no stemmer for the given language: return false. |
158 | // Zorba will then use the built-in stemmer for the given language. |
159 | // |
160 | - break; |
161 | + return false; |
162 | } |
163 | - resturn std::move( result ); |
164 | } |
165 | \endcode |
166 | |
167 | |
168 | === modified file 'doc/zorba/ft_thesaurus.dox' |
169 | --- doc/zorba/ft_thesaurus.dox 2012-04-24 12:39:38 +0000 |
170 | +++ doc/zorba/ft_thesaurus.dox 2012-04-26 16:53:22 +0000 |
171 | @@ -44,16 +44,16 @@ |
172 | To download and install the WordNet database on a Unix-like system, |
173 | follow these steps: |
174 | |
175 | - -# Download the WordNet database from |
176 | - <a href="http://wordnet.princeton.edu/wordnet/download/">here</a>. |
177 | - All you really need are just the database files |
178 | - (<code>WNdb-3.0.tar.gz</code>). |
179 | - -# Un-gzip and untar the files. |
180 | - This will result in a directory dict containing the database files. |
181 | - -# Move the dict directory somewhere of your choosing, |
182 | - e.g., <code>/usr/local/wordnet-3.0/dict</code>. |
183 | - -# Compile the \c dict directory into a Zorba-compatible binary thesaurus |
184 | - as described below. |
185 | +-# Download the WordNet database from |
186 | + <a href="http://wordnet.princeton.edu/wordnet/download/">here</a>. |
187 | + All you really need are just the database files |
188 | + (<code>WNdb-3.0.tar.gz</code>). |
189 | +-# Un-gzip and untar the files. |
190 | + This will result in a directory dict containing the database files. |
191 | +-# Move the dict directory somewhere of your choosing, |
192 | + e.g., <code>/usr/local/wordnet-3.0/dict</code>. |
193 | +-# Compile the \c dict directory into a Zorba-compatible binary thesaurus |
194 | + as described below. |
195 | |
196 | To compile the WordNet database files, |
197 | use the \c zt-wn-compile script |
198 | @@ -65,12 +65,12 @@ |
199 | zt-wn-compile [-v] wordnet_dict_dir [thesaurus_file] |
200 | \endcode |
201 | |
202 | - - The \c -v option specifies verbose output. |
203 | - - The \e wordnet_dict_dir specifies the full path |
204 | - of the WordNet \c dict directory. |
205 | - - The \e thesaurus_file specifies the name of the resulting binary file. |
206 | - If none is given, it defaults to \c wordnet-en.zth |
207 | - ("en" for English and "zth" for "Zorba Thesaurus file"). |
208 | +- The \c -v option specifies verbose output. |
209 | +- The \e wordnet_dict_dir specifies the full path |
210 | + of the WordNet \c dict directory. |
211 | +- The \e thesaurus_file specifies the name of the resulting binary file. |
212 | + If none is given, it defaults to \c wordnet-en.zth |
213 | + ("en" for English and "zth" for "Zorba Thesaurus file"). |
214 | |
215 | For example: |
216 | |
217 | @@ -78,33 +78,39 @@ |
218 | zt-wn-compile -v /usr/local/wordnet-3.0/dict |
219 | \endcode |
220 | |
221 | -Move the \c wordnet-en.zth file to a location of your choosing. |
222 | +To install the \c wordnet-en.zth file, |
223 | +move it onto Zorba's <i>library path</i>: |
224 | + |
225 | +\code |
226 | +LIB_PATH/edu/princeton/wordnet/wordnet-en.zth |
227 | +\endcode |
228 | |
229 | \subsection ft_thesaurus_precompiled Downloading a Precompiled WordNet Database |
230 | |
231 | Alternatively, |
232 | -you can download a precompiled WordNet database from |
233 | +you can download a precompiled, little-endian (Intel) CPU WordNet database from |
234 | <a href="http://www.zorba-xquery.com/downloads/WordNet-3.0/wordnet-en.zip">here</a>. |
235 | |
236 | \section ft_thesaurus_mappings Thesauri Mappings |
237 | |
238 | In order to use thesauri, |
239 | -you need to specify where they are to the Zorba engine |
240 | -via one or more thesaurus <i>mappings</i>. |
241 | -A <i>mapping</i> maps a symbolic URI to URI for an actual thesaurus. |
242 | +you need to specify what symbolic URI(s) <i>map</i> |
243 | +to what thesauri. |
244 | A mapping is of the form: |
245 | |
246 | -<i>from_uri</i><code>:=</code><b>[</b><i>implementation</i><code>|</code><b>]</b><i>to_uri</i> |
247 | +<i>from_uri</i><code>:=</code><i>implementation-scheme</i><code>:</code><i>to_uri</i> |
248 | |
249 | For example: |
250 | |
251 | \code |
252 | -http://wordnet.princeton.edu:=wordnet|/usr/local/zorba/thesauri/wordnet-en.zth |
253 | +http://wordnet.princeton.edu:=wordnet://wordnet.princeton.edu |
254 | \endcode |
255 | |
256 | says that the symbolic URI \c http://wordnet.princeton.edu |
257 | maps to the WordNet implementation |
258 | -having a database file at the given path. |
259 | +having a database file at the given sub-path |
260 | +\c edu/princeton/wordnet |
261 | +on Zorba's library path. |
262 | Once a mapping is established for a symbolic URI, |
263 | it can be used in a query: |
264 | |
265 | @@ -114,13 +120,8 @@ |
266 | using thesaurus at "http://wordnet.princeton.edu" |
267 | \endcode |
268 | |
269 | -If the \e implementation is omitted, |
270 | -it defaults to \c wordnet. |
271 | As a special-case, |
272 | -the \e from_uri can be \c default or |
273 | -\code |
274 | -##default |
275 | -\endcode |
276 | +the \e from_uri can be \c default or \c ##default |
277 | to allow for specifying the default thesaurus |
278 | as was done for the first example on this page. |
279 | |
280 | @@ -130,7 +131,7 @@ |
281 | use one or more –thesaurus options: |
282 | |
283 | \code |
284 | -zorba --thesaurus default:=/usr/local/zorba/thesauri/wordnet-en.zth ... |
285 | +zorba --thesaurus default:=wordnet://wordnet.princeton.edu ... |
286 | \endcode |
287 | |
288 | \section ft_thesaurus_rels Thesaurus Relationships |
289 | @@ -423,25 +424,26 @@ |
290 | |
291 | If no levels are specified in a query, |
292 | Zorba defaults the WordNet implementation to be 2 levels. |
293 | -The rationale can be found |
294 | -<a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=11444">here</a>. |
295 | +(The rationale can be found |
296 | +<a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=11444">here</a>.) |
297 | |
298 | \section ft_thesaurus_providing Providing Your Own Thesaurus |
299 | |
300 | Using the Zorba C++ API, |
301 | you can provide your own thesaurus |
302 | -by deriving from three classes: |
303 | +by deriving from four classes: |
304 | \c Thesaurus, |
305 | \c Thesaurus::iterator, |
306 | +\c ThesaurusProvider, |
307 | and |
308 | -\c ThesaurusProvider. |
309 | +\c URLResolver. |
310 | |
311 | \subsection ft_class_thesaurus The Thesaurus Class |
312 | |
313 | The \c Thesaurus class is: |
314 | |
315 | \code |
316 | -class Thesaurus : public Resource { |
317 | +class Thesaurus { |
318 | public: |
319 | typedef /* implementation-defined */ ptr; |
320 | typedef /* implementation-defined */ range_type; |
321 | @@ -457,15 +459,15 @@ |
322 | |
323 | virtual iterator::ptr lookup( String const &phrase, String const &relationship, range_type at_least, range_type at_most ) const = 0; |
324 | |
325 | - virtual void destroy() const = 0; // interited from Resource |
326 | + virtual void destroy() const = 0; |
327 | protected: |
328 | virtual ~Thesaurus(); |
329 | }; |
330 | \endcode |
331 | |
332 | -For details about the \c ptr type, |
333 | -the \c destroy() function, |
334 | -and why the destructor is \c protected, |
335 | +For details about the \c ptr types, |
336 | +the \c destroy() functions, |
337 | +and why the destructors are \c protected, |
338 | see the \ref memory_management document. |
339 | |
340 | To implement the \c Thesaurus |
341 | @@ -482,18 +484,19 @@ |
342 | </tr> |
343 | <tr> |
344 | <td>\c at_least</td> |
345 | - <td>The The minimum number of levels within the thesaurus to be traversed.</td> |
346 | + <td>The minimum number of levels within the thesaurus to be traversed.</td> |
347 | </tr> |
348 | <tr> |
349 | <td>\c at_most</td> |
350 | - <td>The The maximum number of levels within the thesaurus to be traversed.</td> |
351 | + <td>The maximum number of levels within the thesaurus to be traversed.</td> |
352 | </tr> |
353 | </table> |
354 | |
355 | The \c lookup() function returns a pointer to an \c iterator |
356 | that is used to iterate over the phrase's synonyms. |
357 | - |
358 | -A very simple thesaurus |
359 | +You also need to implement an \c iterator. |
360 | +A very simple \c Thesaurus |
361 | +and its \c iterator |
362 | can be implemented as: |
363 | |
364 | \code |
365 | @@ -505,53 +508,49 @@ |
366 | // |
367 | // Define a simple thesaurus data structure as a map from a phrase to a list of its synonyms. |
368 | // |
369 | - typedef std::list<String> synonyms_t; |
370 | - typedef std::map<String,synonyms_t const*> thesaurus_t; |
371 | + typedef std::list<String> synonyms_type; |
372 | + typedef std::map<String,synonyms_type const*> thesaurus_data_type; |
373 | |
374 | - static thesaurus_t const& get_thesaurus(); |
375 | + static thesaurus_data_type const& get_thesaurus_data(); |
376 | |
377 | class iterator : public Thesaurus::iterator { |
378 | public: |
379 | - iterator( synonyms_t const &s ) : synonyms_( s ), i_( s.begin() ) { } |
380 | + iterator( synonyms_type const &s ) : synonyms_( s ), i_( s.begin() ) { } |
381 | void destroy(); |
382 | bool next( String *synonym ); |
383 | private: |
384 | - synonyms_t const &synonyms_; // synonyms to iterate over |
385 | - synonyms_t::const_iterator i_; // current iterator position |
386 | + synonyms_type const &synonyms_; // synonyms to iterate over |
387 | + synonyms_type::const_iterator i_; // current iterator position |
388 | }; |
389 | }; |
390 | |
391 | void MyThesaurus::destroy() const { |
392 | - // Do nothing since we statically allocate a singleton instance of our thesaurus. |
393 | + // Do nothing since we statically allocate a singleton instance of our Thesaurus. |
394 | } |
395 | |
396 | -MyThesaurus::thesaurus_t const& MyThesaurus::get_thesaurus() { |
397 | - static thesaurus_t thesaurus; |
398 | - if ( thesaurus.empty() ) { |
399 | - // |
400 | - // Construct a thesaurus "by hand" for this example. A real thesaurus would probably |
401 | - // be read from disk. |
402 | - // |
403 | +MyThesaurus::thesaurus_data_type const& MyThesaurus::get_thesaurus_data() { |
404 | + static thesaurus_data_type thesaurus_data; |
405 | + if ( thesaurus_data.empty() ) { |
406 | + // |
407 | + // Construct thesaurus data "by hand" for this example. A real thesaurus would probably be read from disk. |
408 | // Note that every list of synonyms must always include the original phrase. |
409 | // |
410 | - static synonyms_t synonyms; |
411 | + static synonyms_type synonyms; |
412 | synonyms.push_back( "foo" ); |
413 | synonyms.push_back( "foobar" ); |
414 | - thesaurus[ "foo" ] = &synonyms; |
415 | - thesaurus[ "foobar" ] = &synonyms; |
416 | + thesaurus_data[ "foo" ] = &synonyms; |
417 | + thesaurus_data[ "foobar" ] = &synonyms; |
418 | } |
419 | - return thesaurus; |
420 | + return thesaurus_data; |
421 | } |
422 | -\endcode |
423 | |
424 | -\code |
425 | MyThesaurus::iterator::ptr MyThesaurus::lookup( String const &phrase, String const &relationship, |
426 | range_type at_least, range_type at_most ) const { |
427 | - static thesaurus_t const &thesaurus = get_thesaurus(); |
428 | - thesaurus_t::const_iterator const i = thesaurus.find( phrase ); |
429 | + static thesaurus_data_type const &thesaurus_data = get_thesaurus_data(); |
430 | + thesaurus_data_type::const_iterator const entry = thesaurus_data.find( phrase ); |
431 | iterator::ptr result; |
432 | - if ( i != thesaurus.end() ) |
433 | - result.reset( new iterator( *i->second ) ); |
434 | + if ( entry != thesaurus_data.end() ) |
435 | + result.reset( new iterator( *entry->second ) ); |
436 | return std::move( result ); |
437 | } |
438 | |
439 | @@ -572,13 +571,71 @@ |
440 | A real thesaurus would load a large number of synonyms, |
441 | of course. |
442 | |
443 | +\subsection ft_class_thesaurus_provider The ThesaurusProvider Class |
444 | + |
445 | +The \c ThesaurusProvider class is: |
446 | + |
447 | +\code |
448 | +class ThesaurusProvider : public Resource { |
449 | +public: |
450 | + typedef /* implementation-defined */ ptr; |
451 | + |
452 | + virtual bool getThesaurus( locale::iso639_1::type lang, Thesaurus::ptr *thesaurus = 0 ) const = 0; |
453 | + void destroy() const; // inherited from Resource |
454 | +}; |
455 | +\endcode |
456 | + |
457 | +To implement a \c ThesaurusProvider, |
458 | +you need to implement the \c getThesaurus() function where: |
459 | + |
460 | +<table> |
461 | + <tr> |
462 | + <td>\c lang</td> |
463 | + <td>The desired language of the thesaurus.</td> |
464 | + </tr> |
465 | + <tr> |
466 | + <td>\c thesaurus</td> |
467 | + <td>If not \c null, set to point to a thesaurus for \c lang.</td> |
468 | + </tr> |
469 | +</table> |
470 | + |
471 | +The \c getThesaurus() function returns \c true |
472 | +only if it can provide a thesaurus for the given language. |
473 | +Continuing with the example, |
474 | +a very simple \c ThesaurusProvider |
475 | +can be implemented as: |
476 | + |
477 | +\code |
478 | +class MyThesaurusProvider : pulic ThesaurusProvider { |
479 | +public: |
480 | + void destroy() const; |
481 | + bool getThesaurus( iso639_1::type lang, Thesaurus::ptr* = 0 ) const; |
482 | +}; |
483 | + |
484 | +void MyThesaurusProvider::destroy() const { |
485 | + // Do nothing since we statically allocate a singleton instance of our ThesaurusProvider. |
486 | +} |
487 | + |
488 | +bool MyThesaurusProvider::getThesaurus( iso639_1::type lang, Thesaurus::ptr *result ) const { |
489 | + // |
490 | + // Since our tiny thesaurus contains only universally known words, we don't bother checking lang |
491 | + // and always return true. |
492 | + // |
493 | + static MyThesaurus thesaurus; |
494 | + if ( result ) |
495 | + result->reset( &thesaurus ); |
496 | + return true; |
497 | +} |
498 | +\endcode |
499 | + |
500 | \subsection ft_class_thesaurus_resolver A Thesaurus URL Resolver Class |
501 | |
502 | -In addition to a \c Thesaurus, |
503 | +In addition to a \c Thesaurus |
504 | +and \c ThesaurusProvider, |
505 | you must also implement a "thesaurus resolver" class |
506 | that, |
507 | -given a URL and a language, |
508 | -provides a \c Thesaurus for that language. |
509 | +given a URI, |
510 | +provides a \c ThesaurusProvider for that URI. |
511 | A simple \c ThesaurusURLResolver |
512 | for our simple thesaurus can be implemented as: |
513 | |
514 | @@ -591,23 +648,12 @@ |
515 | String const url_; |
516 | }; |
517 | |
518 | -Resource* |
519 | -ThesaurusURLResolver::resolveURL( String const &url, EntityData const *data ) const { |
520 | - ThesaurusEntityData const *const t_data = dynamic_cast<ThesaurusEntityData const*>( data ); |
521 | - assert( t_data ); |
522 | - static MyThesaurus thesaurus; |
523 | - if ( url == url_ ) |
524 | - switch ( t_data->getLanguage() ) { |
525 | - case locale::iso639_1::en: |
526 | - case locale::iso639_1::unknown: |
527 | - // |
528 | - // Here, we could test to ensure that the language of our thesaurus matches the |
529 | - // language sought, but in our case, we want our thesaurus to be used for all |
530 | - // languages since "foo" and "foobar" are universal. |
531 | - // |
532 | - default: |
533 | - return &thesaurus; |
534 | - } |
535 | +Resource* ThesaurusURLResolver::resolveURL( String const &url, EntityData const *data ) const { |
536 | + if ( data->getKind() == EntityData::THESAURUS ) |
537 | + static MyThesaurusProvider provider; |
538 | + if ( uri == uri_ ) |
539 | + return &provider; |
540 | + } |
541 | return 0; |
542 | } |
543 | \endcode |
544 | |
545 | === modified file 'doc/zorba/ft_tokenizer.dox' |
546 | --- doc/zorba/ft_tokenizer.dox 2012-04-24 12:39:38 +0000 |
547 | +++ doc/zorba/ft_tokenizer.dox 2012-04-26 16:53:22 +0000 |
548 | @@ -5,14 +5,25 @@ |
549 | The Zorba XQuery processor implements the |
550 | <a href="http://www.w3.org/TR/xpath-full-text-10/">XQuery and XPath Full Text 1.0</a> |
551 | specification that, among other things, |
552 | -tokenizes a string into a sequence of tokens. |
553 | -See |
554 | -<a href="http://www.w3.org/TR/xpath-full-text-10/#TokenizationSec">Tokenization</a>. |
555 | - |
556 | -The initial implementation of the toknenizer |
557 | -uses the one provided by the |
558 | -<a href="http://site.icu-project.org/">ICU library</a>. |
559 | -However, you can provide your own tokenizer instead. |
560 | +<a ref="http://www.w3.org/TR/xpath-full-text-10/#TokenizationSec">tokenizes</a> |
561 | +a string into a sequence of tokens. |
562 | + |
563 | +\section ft_tokenizer_tokization Tokenization |
564 | + |
565 | +Using the |
566 | +<a href="http://site.icu-project.org/">ICU library</a>, |
567 | +Zorba's implementation of tokenization |
568 | +considers only alpha-numeric sequences of characters to be part of a token; |
569 | +whitespace and punctuation characters are not |
570 | +and separate tokens. |
571 | +However, alpha-numeric sequences matching the regular expression |
572 | +<code>[0-9][.,][0-9]</code> |
573 | +are retained as part of a token, e.g.: |
574 | +"98.6" and "1,432.58" are tokens. |
575 | + |
576 | +Alternatively, |
577 | +you can implement your own tokenizer |
578 | +by deriving from the \c Tokenizer class. |
579 | |
580 | \section ft_class_tokenizer The Tokenizer Class |
581 | |
582 | @@ -36,33 +47,43 @@ |
583 | |
584 | class Callback { |
585 | public: |
586 | - typedef Tokenizer::size_type size_type;; |
587 | + typedef Tokenizer::size_type size_type; |
588 | |
589 | virtual ~Callback(); |
590 | |
591 | - virtual void operator()( char const *utf8_s, size_type utf8_len, |
592 | - size_type token_no, size_type sent_no, size_type para_no, |
593 | - void *payload = 0 ) = 0; |
594 | - }; |
595 | - |
596 | - enum ElementTraceOptions { |
597 | - trace_none = 0x0, // Trace no elements. |
598 | - trace_begin = 0x1, // Trace the beginning of elements. |
599 | - trace_end = 0x2 // Trace the ending of elements. |
600 | - }; |
601 | + virtual void token( char const *utf8_s, size_type utf8_len, locale::iso639_1::type lang, |
602 | + size_type token_no, size_type sent_no, size_type para_no, |
603 | + Item const *item = 0 ) = 0; |
604 | + }; |
605 | + |
606 | + struct Properties { |
607 | + typedef std::vector<locale::iso639_1::type> languages_type; |
608 | + |
609 | + bool comments_separate_tokens; |
610 | + bool elements_separate_tokens; |
611 | + bool processing_instructions_separate_tokens; |
612 | + languages_type languages; |
613 | + char const *uri; |
614 | + }; |
615 | + |
616 | + virtual void properties( Properties *result ) const = 0; |
617 | |
618 | virtual void destroy() const = 0; |
619 | - virtual void element( Item const &qname, int trace_options ); |
620 | Numbers& numbers(); |
621 | Numbers const& numbers() const; |
622 | - int trace_options() const; |
623 | - |
624 | - virtual void tokenize( char const *utf8_s, size_type utf8_len, locale::iso639_1::type lang, |
625 | - bool wildcards, Callback &callback, void *payload = 0 ) = 0; |
626 | + |
627 | + void tokenize_node( Item const &node, locale::iso639_1::type lang, Callback &callback ); |
628 | + |
629 | + virtual void tokenize_string( char const *utf8_s, size_type utf8_len, locale::iso639_1::type lang, |
630 | + bool wildcards, Callback &callback, Item const *item = 0 ) = 0; |
631 | |
632 | protected: |
633 | - Tokenizer( Numbers&, int trace_options = trace_none ); |
634 | + Tokenizer( Numbers& ); |
635 | virtual ~Tokenizer(); |
636 | + |
637 | + bool find_lang_attribute( Item const&, locale::iso639_1::type *lang ); |
638 | + virtual void item( Item const&, bool entering ); |
639 | + virtual void tokenize_node_impl( Item const&, locale::iso639_1::type, Callback&, bool tokenize_acp ); |
640 | }; |
641 | \endcode |
642 | |
643 | @@ -76,8 +97,8 @@ |
644 | It simply keeps track of the current |
645 | token, sentence, and paragraph numbers. |
646 | |
647 | -To implement the \c Tokenizer, |
648 | -you need to implement the \c %tokenize() function where: |
649 | +To implement a \c Tokenizer, |
650 | +you need to implement the \c %tokenize_string() function where: |
651 | |
652 | <table> |
653 | <tr> |
654 | @@ -115,9 +136,13 @@ |
655 | </td> |
656 | </tr> |
657 | <tr> |
658 | - <td>\c payload</td> |
659 | + <td>\c item</td> |
660 | <td> |
661 | - Optional implementation-defined data. |
662 | + The \c Item whence this token came. |
663 | + If the token occurred within an element, |
664 | + the \c Item is the text node. |
665 | + If the token occurred within an attribute, |
666 | + the \c Item is the attribute node. |
667 | </td> |
668 | </tr> |
669 | </table> |
670 | @@ -127,21 +152,30 @@ |
671 | However, |
672 | the things a tokenizer should take into consideration include: |
673 | |
674 | - - Detecting sentence termination ('.', '?', and '!' characters). |
675 | - - Handling floating-point numbers with possible thousands separators |
676 | - in US and European formats, e.g. "98.7", "98,7", "10,000", etc. |
677 | - - Distinguishing '.' used as a sentence terminator |
678 | - from '.' used as a decimal point. |
679 | - - Handling apostrophies, e.g., "men's". |
680 | - - Handling acronyms, e.g., "AT&T". |
681 | - |
682 | -\subsection ft_paragraphs Paragraphs |
683 | +- Detecting sentence termination ('.', '?', and '!' characters). |
684 | +- Handling floating-point numbers with possible thousands separators |
685 | + in US and European formats, e.g. "98.7", "98,7", "10,000", etc. |
686 | +- Distinguishing '.' used as a sentence terminator |
687 | + from '.' used as a decimal point. |
688 | +- Handling apostrophies, e.g., "men's". |
689 | +- Handling acronyms, e.g., "AT&T". |
690 | + |
691 | +The task of iterating over an XML element's child nodes |
692 | +is done by \c tokenize_node_impl(). |
693 | +Its default implementation |
694 | +treats XML elements, comments, and processing instructions |
695 | +as token separators. |
696 | +(See \ref ft_tokenizer_properties.) |
697 | +If you want to change that, |
698 | +you need to override \c tokenize_node_impl(). |
699 | + |
700 | +\subsection ft_tokenizer_paragraphs Paragraphs |
701 | |
702 | By default, |
703 | Zorba increments the current paragraph number once |
704 | for each XML element encountered. |
705 | However, |
706 | -this doens't work well for mixed content. |
707 | +this doesn't work well for mixed content. |
708 | For example, in the XHTML: |
709 | \code |
710 | <p>The <em>best</em> thing ever!</p> |
711 | @@ -150,31 +184,65 @@ |
712 | but Zorba will consider that 3 paragraphs by default. |
713 | |
714 | Your tokenizer can take control over when the paragraph number is incremented |
715 | -by passing the bitwise-or |
716 | -of the \c ElementTraceOptions values |
717 | -to the constructor |
718 | -and overriding the \c element() function. |
719 | -The \c element() function is passed the QName of the current XML element |
720 | -and (depending on the initial value passed to the constructor) |
721 | -one of \c trace_begin or \c trace_end. |
722 | -Note that this function is called |
723 | -only if the trace options value |
724 | -passed to the constructor |
725 | -was non-zero. |
726 | +by overriding the \c item() function. |
727 | +The \c item() function is passed the \c Item of the current XML element |
728 | +and whether the item is being entered or exited. |
729 | |
730 | For example, |
731 | -the \c element() function for tokenizing XHTML |
732 | +the \c item() function for tokenizing XHTML |
733 | would be along the lines of: |
734 | \code |
735 | -void MyTokenizer::element( Item const &qname, int trace_options ) { |
736 | - if ( trace_options & trace_end ) |
737 | - return; |
738 | - String const name( qname.getLocalName() ); |
739 | - if ( /* qname is an XHTML block-level element */ ) |
740 | - ++numbers().para; |
741 | +void MyTokenizer::item( Item const &item, bool entering ) { |
742 | + if ( entering && item.isNode() && item.getNodeKind() == store::StoreConsts::elementNode ) { |
743 | + Item qname; |
744 | + item.getNodeName( qname ); |
745 | + if ( /* qname matches an XHTML block-level element's name */ ) |
746 | + ++numbers().para; |
747 | } |
748 | \endcode |
749 | |
750 | +\subsection ft_tokenizer_properties Properties |
751 | + |
752 | +To implement a \c Tokenizer, |
753 | +you need also to implement the \c %properties() function |
754 | +that fills in the \c Properties struct where: |
755 | + |
756 | +<table> |
757 | + <tr> |
758 | + <td>\c comments_separate_tokens</td> |
759 | + <td> |
760 | + If \c true, XML comments separate tokens. For example, |
761 | + <code>net<!-- -->work</code> would be 2 tokens instead of 1. |
762 | + </td> |
763 | + </tr> |
764 | + <tr> |
765 | + <td>\c elements_separate_tokens</td> |
766 | + <td> |
767 | + If \c true, XML elements separate tokens. For example, |
768 | + <code><b>B</b>old</code> would be 2 tokens instead of 1. |
769 | + </td> |
770 | + </tr> |
771 | + <tr> |
772 | + <td>\c processing_instructions_separate_tokens</td> |
773 | + <td> |
774 | + If \c true, XML processing instructions separate tokens. For example, |
775 | + <code>net<?PI pi?>work</code> would be 2 tokens instead of 1. |
776 | + </td> |
777 | + </tr> |
778 | + <tr> |
779 | + <td>\c languages</td> |
780 | + <td> |
781 | + The list of languages supported by the tokenizer. |
782 | + </td> |
783 | + </tr> |
784 | + <tr> |
785 | + <td>\c uri</td> |
786 | + <td> |
787 | + The URI that uniquely identifies the %Tokenizer. |
788 | + </td> |
789 | + </tr> |
790 | +</table> |
791 | + |
792 | \section ft_class_tokenizer_provider The TokenizerProviderClass |
793 | |
794 | In addition to a \c Tokenizer, |
795 | @@ -185,20 +253,51 @@ |
796 | class TokenizerProvider { |
797 | public: |
798 | virtual ~TokenizerProvider(); |
799 | - virtual Tokenizer::ptr getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers &numbers ) const = 0; |
800 | + virtual bool getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers *numbers = 0, Tokenizer::ptr* = 0 ) const = 0; |
801 | }; |
802 | \endcode |
803 | |
804 | +Specifically, you need to implement the \c getTokenizer() function where: |
805 | + |
806 | +<table> |
807 | + <tr> |
808 | + <td>\c lang</td> |
809 | + <td>The language to tokenize.</td> |
810 | + </tr> |
811 | + <tr> |
812 | + <td>\c num</td> |
813 | + <td> |
814 | + The \c Numbers to use. |
815 | + If \c null, |
816 | + \a t is not set. |
817 | + </td> |
818 | + </tr> |
819 | + <tr> |
820 | + <td>\c t</td> |
821 | + <td> |
822 | + If not \c null, |
823 | + set to point to a Tokenizer for \a lang. |
824 | + </td> |
825 | + </tr> |
826 | +</table> |
827 | + |
828 | A simple \c TokenizerProvider for our tokenizer can be implemented as: |
829 | |
830 | \code |
831 | class MyTokenizerProvider : public TokenizerProvider { |
832 | public: |
833 | - Tokenizer::ptr getTokenizer( locale::iso639_1::type lang ) const; |
834 | + getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers* = 0, Tokenizer::ptr* = 0 ) const; |
835 | }; |
836 | |
837 | -Tokenizer::ptr MyTokenizerProvider::getTokenizer( locale::iso639_1::type lang const { |
838 | - return Tokenizer::ptr( new MyTokenizer ); |
839 | +bool MyTokenizerProvider::getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers *num, Tokenizer::ptr *t ) const { |
840 | + switch ( lang ) { |
841 | + case iso639_1::en: |
842 | + if ( num && t ) |
843 | + t->reset( new MyTokenizer ); |
844 | + return true; |
845 | + default: |
846 | + return false; |
847 | + } |
848 | } |
849 | \endcode |
850 | |
851 | |
852 | === modified file 'include/zorba/locale.h' |
853 | --- include/zorba/locale.h 2012-04-24 12:39:38 +0000 |
854 | +++ include/zorba/locale.h 2012-04-26 16:53:22 +0000 |
855 | @@ -22,24 +22,198 @@ |
856 | |
857 | /////////////////////////////////////////////////////////////////////////// |
858 | |
859 | + /** |
860 | + * Defines constants for all ISO 639-1 language codes. |
861 | + */ |
862 | namespace iso639_1 { |
863 | enum type { |
864 | unknown, |
865 | - da, // Danish |
866 | - de, // German |
867 | - en, // English |
868 | - es, // Spanish |
869 | - fi, // Finnish |
870 | - fr, // French |
871 | - hu, // Hungarian |
872 | - it, // Italian |
873 | - nl, // Dutch |
874 | - no, // Norwegian |
875 | - pt, // Portuguese |
876 | - ro, // Romanian |
877 | - ru, // Russian |
878 | - sv, // Swedish |
879 | - tr, // Turkish |
880 | + aa, ///< Afar |
881 | + ab, ///< Abkhazian |
882 | + ae, ///< Avestan |
883 | + af, ///< Afrikaans |
884 | + ak, ///< Akan |
885 | + am, ///< Amharic |
886 | + an, ///< Aragonese |
887 | + ar, ///< Arabic |
888 | + as, ///< Assamese |
889 | + av, ///< Avaric |
890 | + ay, ///< Aymara |
891 | + az, ///< Azerbaijani |
892 | + ba, ///< Bashkir |
893 | + be, ///< Byelorussian |
894 | + bg, ///< Bulgarian |
895 | + bh, ///< Bihari |
896 | + bi, ///< Bislama |
897 | + bm, ///< Bambara |
898 | + bn, ///< Bengali; Bangla |
899 | + bo, ///< Tibetan |
900 | + br, ///< Breton |
901 | + bs, ///< Bosnian |
902 | + ca, ///< Catalan |
903 | + ce, ///< Chechen |
904 | + ch, ///< Chamorro |
905 | + co, ///< Corsican |
906 | + cr, ///< Cree |
907 | + cs, ///< Czech |
908 | + cu, ///< Church Slavic; Church Slavonic |
909 | + cv, ///< Chuvash |
910 | + cy, ///< Welsh |
911 | + da, ///< Danish |
912 | + de, ///< German |
913 | + dv, ///< Divehi |
914 | + dz, ///< Bhutani |
915 | + ee, ///< Ewe |
916 | + el, ///< Greek |
917 | + en, ///< English |
918 | + eo, ///< Esperanto |
919 | + es, ///< Spanish |
920 | + et, ///< Estonian |
921 | + eu, ///< Basque |
922 | + fa, ///< Persian |
923 | + ff, ///< Fulah |
924 | + fi, ///< Finnish |
925 | + fj, ///< Fiji |
926 | + fo, ///< Faroese |
927 | + fr, ///< French |
928 | + fy, ///< Frisian |
929 | + ga, ///< Irish |
930 | + gd, ///< Scots Gaelic |
931 | + gl, ///< Galician |
932 | + gn, ///< Guarani |
933 | + gu, ///< Gujarati |
934 | + gv, ///< Manx |
935 | + ha, ///< Hausa |
936 | + he, ///< Hebrew (formerly iw) |
937 | + hi, ///< Hindi |
938 | + ho, ///< Hiri Motu |
939 | + hr, ///< Croatian |
940 | + ht, ///< Haitian Creole |
941 | + hu, ///< Hungarian |
942 | + hy, ///< Armenian |
943 | + hz, ///< Herero |
944 | + ia, ///< Interlingua |
945 | + id, ///< Indonesian (formerly in) |
946 | + ie, ///< Interlingue |
947 | + ig, ///< Igbo |
948 | + ii, ///< Nuosu |
949 | + ik, ///< Inupiak |
950 | + io, ///< Ido |
951 | + is, ///< Icelandic |
952 | + it, ///< Italian |
953 | + iu, ///< Inuktitut |
954 | + ja, ///< Japanese |
955 | + jv, ///< Javanese |
956 | + ka, ///< Georgian |
957 | + kg, ///< Kongo |
958 | + ki, ///< Gikuyu |
959 | + kj, ///< Kuanyama |
960 | + kk, ///< Kazakh |
961 | + kl, ///< Greenlandic |
962 | + km, ///< Cambodian |
963 | + kn, ///< Kannada |
964 | + ko, ///< Korean |
965 | + kr, ///< Kanuri |
966 | + ks, ///< Kashmiri |
967 | + ku, ///< Kurdish |
968 | + kv, ///< Komi |
969 | + kw, ///< Cornish |
970 | + ky, ///< Kirghiz |
971 | + la, ///< Latin |
972 | + lb, ///< Letzeburgesch |
973 | + lg, ///< Ganda |
974 | + li, ///< Limburgan; Limburger; Limburgish |
975 | + ln, ///< Lingala |
976 | + lo, ///< Laothian |
977 | + lt, ///< Lithuanian |
978 | + lu, ///< Luba-Katanga |
979 | + lv, ///< Latvian |
980 | + mg, ///< Malagasy |
981 | + mh, ///< Marshallese |
982 | + mi, ///< Maori |
983 | + mk, ///< Macedonian |
984 | + ml, ///< Malayalam |
985 | + mn, ///< Mongolian |
986 | + mo, ///< Moldavian |
987 | + mr, ///< Marathi |
988 | + ms, ///< Malay |
989 | + mt, ///< Maltese |
990 | + my, ///< Burmese |
991 | + na, ///< Nauru |
992 | + nb, ///< Norwegian Bokmal |
993 | + nd, ///< Ndebele, North |
994 | + ne, ///< Nepali |
995 | + ng, ///< Ndonga |
996 | + nl, ///< Dutch |
997 | + nn, ///< Norwegian Nynorsk |
998 | + no, ///< Norwegian |
999 | + nr, ///< Ndebele, South |
1000 | + nv, ///< Navajo; Navaho |
1001 | + ny, ///< Chichewa; Chewa; Nyanja |
1002 | + oc, ///< Occitan |
1003 | + oj, ///< Ojibwa |
1004 | + om, ///< Oromo |
1005 | + or_, ///< Oriya |
1006 | + os, ///< Ossetian; Ossetic |
1007 | + pa, ///< Panjabi; Punjabi |
1008 | + pi, ///< Pali |
1009 | + pl, ///< Polish |
1010 | + ps, ///< Pashto, Pushto |
1011 | + pt, ///< Portuguese |
1012 | + qu, ///< Quechua |
1013 | + rm, ///< Romansh |
1014 | + rn, ///< Kirundi |
1015 | + ro, ///< Romanian |
1016 | + ru, ///< Russian |
1017 | + rw, ///< Kinyarwanda |
1018 | + sa, ///< Sanskrit |
1019 | + sc, ///< Sardinian |
1020 | + sd, ///< Sindhi |
1021 | + se, ///< Northern Sami |
1022 | + sg, ///< Sangho |
1023 | + sh, ///< Serbo-Croatian |
1024 | + si, ///< Sinhalese |
1025 | + sk, ///< Slovak |
1026 | + sl, ///< Slovenian |
1027 | + sm, ///< Samoan |
1028 | + sn, ///< Shona |
1029 | + so, ///< Somali |
1030 | + sq, ///< Albanian |
1031 | + sr, ///< Serbian |
1032 | + ss, ///< Siswati |
1033 | + st, ///< Sesotho |
1034 | + su, ///< Sundanese |
1035 | + sv, ///< Swedish |
1036 | + sw, ///< Swahili |
1037 | + ta, ///< Tamil |
1038 | + te, ///< Telugu |
1039 | + tg, ///< Tajik |
1040 | + th, ///< Thai |
1041 | + ti, ///< Tigrinya |
1042 | + tk, ///< Turkmen |
1043 | + tl, ///< Tagalog |
1044 | + tn, ///< Setswana |
1045 | + to, ///< Tonga |
1046 | + tr, ///< Turkish |
1047 | + ts, ///< Tsonga |
1048 | + tt, ///< Tatar |
1049 | + tw, ///< Twi |
1050 | + ty, ///< Tahitian |
1051 | + ug, ///< Uighur |
1052 | + uk, ///< Ukrainian |
1053 | + ur, ///< Urdu |
1054 | + uz, ///< Uzbek |
1055 | + ve, ///< Venda |
1056 | + vi, ///< Vietnamese |
1057 | + vo, ///< Volapuk |
1058 | + wa, ///< Walloon |
1059 | + wo, ///< Wolof |
1060 | + xh, ///< Xhosa |
1061 | + yi, ///< Yiddish |
1062 | + yo, ///< Yoruba |
1063 | + za, ///< Zhuang |
1064 | + zh, ///< Chinese |
1065 | + zu, ///< Zulu |
1066 | NUM_ENTRIES |
1067 | }; |
1068 | } |
1069 | |
1070 | === modified file 'include/zorba/pregenerated/diagnostic_list.h' |
1071 | --- include/zorba/pregenerated/diagnostic_list.h 2012-04-24 12:39:38 +0000 |
1072 | +++ include/zorba/pregenerated/diagnostic_list.h 2012-04-26 16:53:22 +0000 |
1073 | @@ -454,6 +454,14 @@ |
1074 | extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8402_THESAURUS_ENDIANNESS_MISMATCH; |
1075 | |
1076 | extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8403_THESAURUS_DATA_ERROR; |
1077 | + |
1078 | +extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8404_STEM_LANG_NOT_SUPPORTED; |
1079 | + |
1080 | +extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8405_STOP_WORDS_LANG_NOT_SUPPORTED; |
1081 | + |
1082 | +extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8406_THESAURUS_LANG_NOT_SUPPORTED; |
1083 | + |
1084 | +extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8407_TOKENIZER_LANG_NOT_SUPPORTED; |
1085 | #endif |
1086 | |
1087 | extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQD0001_PREFIX_NOT_DECLARED; |
1088 | |
1089 | === modified file 'include/zorba/stemmer.h' |
1090 | --- include/zorba/stemmer.h 2012-04-24 12:39:38 +0000 |
1091 | +++ include/zorba/stemmer.h 2012-04-26 16:53:22 +0000 |
1092 | @@ -52,6 +52,23 @@ |
1093 | virtual void destroy() const = 0; |
1094 | |
1095 | /** |
1096 | + * Various properties of this %Stemmer. |
1097 | + */ |
1098 | + struct Properties { |
1099 | + /** |
1100 | + * The URI that uniquely identifies this %Stemmer. |
1101 | + */ |
1102 | + char const *uri; |
1103 | + }; |
1104 | + |
1105 | + /** |
1106 | + * Gets the Properties of this %Stemmer. |
1107 | + * |
1108 | + * @param result The Properties to populate. |
1109 | + */ |
1110 | + virtual void properties( Properties *result ) const = 0; |
1111 | + |
1112 | + /** |
1113 | * Stems the given word. |
1114 | * |
1115 | * @param word The word to stem. |
1116 | @@ -66,7 +83,7 @@ |
1117 | }; |
1118 | |
1119 | /** |
1120 | - * A %StemmerProvider, given an language, provies a stemmer for it. |
1121 | + * A %StemmerProvider, given a language, provides a Stemmer for it. |
1122 | */ |
1123 | class ZORBA_DLL_PUBLIC StemmerProvider { |
1124 | public: |
1125 | @@ -76,10 +93,12 @@ |
1126 | * Gets a Stemmer for the given language. |
1127 | * |
1128 | * @param lang The language to get a Stemmer for. |
1129 | - * @return The relevant Stemmer or \c NULL if no stemmer for the given |
1130 | - * language is available. |
1131 | + * @param s If not \c null, set to point to a Stemmer for \a lang. |
1132 | + * @return Returns \c true only if this provider can provide a stemmer for |
1133 | + * \a lang. |
1134 | */ |
1135 | - virtual Stemmer::ptr getStemmer( locale::iso639_1::type lang ) const = 0; |
1136 | + virtual bool getStemmer( locale::iso639_1::type lang, |
1137 | + Stemmer::ptr *s = 0 ) const = 0; |
1138 | }; |
1139 | |
1140 | /////////////////////////////////////////////////////////////////////////////// |
1141 | |
1142 | === modified file 'include/zorba/thesaurus.h' |
1143 | --- include/zorba/thesaurus.h 2012-04-24 12:39:38 +0000 |
1144 | +++ include/zorba/thesaurus.h 2012-04-26 16:53:22 +0000 |
1145 | @@ -32,25 +32,13 @@ |
1146 | /////////////////////////////////////////////////////////////////////////////// |
1147 | |
1148 | /** |
1149 | - * Contains additional data for URIMappers and URLResolvers |
1150 | - * when mapping/resolving a Thesaurus URI. |
1151 | - */ |
1152 | -class ZORBA_DLL_PUBLIC ThesaurusEntityData : public EntityData { |
1153 | -public: |
1154 | - /** |
1155 | - * Gets the language for which a thesaurus is being requested. |
1156 | - * |
1157 | - * @return said language. |
1158 | - */ |
1159 | - virtual locale::iso639_1::type getLanguage() const = 0; |
1160 | -}; |
1161 | - |
1162 | -/** |
1163 | - * A %Thesaurus is-a Resource for thesaurus implementations. |
1164 | - */ |
1165 | -class ZORBA_DLL_PUBLIC Thesaurus : public Resource { |
1166 | -public: |
1167 | - typedef std::unique_ptr<Thesaurus,internal::ztd::destroy_delete<Thesaurus> > |
1168 | + * A %Thesaurus provides a way to look up related phrases for a given phrase. |
1169 | + */ |
1170 | +class ZORBA_DLL_PUBLIC Thesaurus { |
1171 | +public: |
1172 | + typedef std::unique_ptr< |
1173 | + Thesaurus const,internal::ztd::destroy_delete<Thesaurus const> |
1174 | + > |
1175 | ptr; |
1176 | |
1177 | /** |
1178 | @@ -88,11 +76,11 @@ |
1179 | * Destroys this %Thesaurus. |
1180 | * This function is called by Zorba when the %Thesaurus is no longer needed. |
1181 | * |
1182 | - * If your URLResolver dynamically allocates %Thesaurus objects, then the |
1183 | + * If your implementation dynamically allocates %Thesaurus objects, then your |
1184 | * implementation can simply be (and usually is) <code>delete this</code>. |
1185 | * |
1186 | - * If your URLResolver returns a pointer to a static %Thesaurus object, then |
1187 | - * the implementation should do nothing. |
1188 | + * If your implementation returns a pointer to a static %Thesaurus object, |
1189 | + * then your implementation should do nothing. |
1190 | */ |
1191 | virtual void destroy() const = 0; |
1192 | |
1193 | @@ -119,6 +107,32 @@ |
1194 | |
1195 | /////////////////////////////////////////////////////////////////////////////// |
1196 | |
1197 | +/** |
1198 | + * A %ThesaurusProvider is-a Resource for providing thesauri for a given |
1199 | + * language. |
1200 | + */ |
1201 | +class ZORBA_DLL_PUBLIC ThesaurusProvider : public Resource { |
1202 | +public: |
1203 | + typedef std::unique_ptr< |
1204 | + ThesaurusProvider const, |
1205 | + internal::ztd::destroy_delete<ThesaurusProvider const> |
1206 | + > |
1207 | + ptr; |
1208 | + |
1209 | + /** |
1210 | + * Gets a Thesaurus for the given language. |
1211 | + * |
1212 | + * @param lang The desired language of the thesaurus. |
1213 | + * @param t If not \c null, set to point to a Thesaurus for \a lang. |
1214 | + * @return Returns \c true only if this provider can provide a thesaurus for |
1215 | + * \a lang. |
1216 | + */ |
1217 | + virtual bool getThesaurus( locale::iso639_1::type lang, |
1218 | + Thesaurus::ptr *t = 0 ) const = 0; |
1219 | +}; |
1220 | + |
1221 | +/////////////////////////////////////////////////////////////////////////////// |
1222 | + |
1223 | } // namespace zorba |
1224 | #endif /* ZORBA_NO_FULL_TEXT */ |
1225 | #endif /* ZORBA_THESAURUS_API_H */ |
1226 | |
1227 | === modified file 'include/zorba/tokenizer.h' |
1228 | --- include/zorba/tokenizer.h 2012-04-24 12:39:38 +0000 |
1229 | +++ include/zorba/tokenizer.h 2012-04-26 16:53:22 +0000 |
1230 | @@ -18,6 +18,8 @@ |
1231 | #ifndef ZORBA_TOKENIZER_API_H |
1232 | #define ZORBA_TOKENIZER_API_H |
1233 | |
1234 | +#include <vector> |
1235 | + |
1236 | #include <zorba/config.h> |
1237 | #include <zorba/locale.h> |
1238 | #include <zorba/internal/unique_ptr.h> |
1239 | @@ -67,8 +69,6 @@ |
1240 | * A %Callback is called once per token. |
1241 | * This is only internally by Zorba. |
1242 | * You do not need to derive from this class. |
1243 | - * The only thing you need to do is call the callback's \c operator() once |
1244 | - * for each token you parse in \c tokenize(). |
1245 | */ |
1246 | class Callback { |
1247 | public: |
1248 | @@ -77,19 +77,75 @@ |
1249 | virtual ~Callback(); |
1250 | |
1251 | /** |
1252 | + * This member-function is called whenever an item that is being tokenized |
1253 | + * is entered or exited. |
1254 | + * |
1255 | + * @param item The item being entered or exited. |
1256 | + * @param entering If \c true, the item is being entered; if \c false, the |
1257 | + * item is being exited. |
1258 | + */ |
1259 | + virtual void item( Item const &item, bool entering ); |
1260 | + |
1261 | + /** |
1262 | * This member-function is called once per token. |
1263 | * |
1264 | * @param utf8_s The UTF-8 token string. It is not null-terminated. |
1265 | * @param utf8_len The number of bytes in the token string. |
1266 | + * @param lang The language of the token. |
1267 | * @param token_no The token number. Token numbers start at 0. |
1268 | * @param sent_no The sentence number. Sentence numbers start at 1. |
1269 | * @param para_no The paragraph number. Paragraph numbers start at 1. |
1270 | - * @param payload Optional user-defined data. |
1271 | - */ |
1272 | - virtual void operator()( char const *utf8_s, size_type utf8_len, |
1273 | - size_type token_no, size_type sent_no, |
1274 | - size_type para_no, void *payload = 0 ) = 0; |
1275 | - }; |
1276 | + * @param item The Item this token is from, if any. |
1277 | + */ |
1278 | + virtual void token( char const *utf8_s, size_type utf8_len, |
1279 | + locale::iso639_1::type lang, |
1280 | + size_type token_no, size_type sent_no, |
1281 | + size_type para_no, Item const *item = 0 ) = 0; |
1282 | + }; |
1283 | + |
1284 | + ///////////////////////////////////////////////////////////////////////////// |
1285 | + |
1286 | + /** |
1287 | + * Various properties of this %Tokenizer. |
1288 | + */ |
1289 | + struct Properties { |
1290 | + typedef std::vector<locale::iso639_1::type> languages_type; |
1291 | + |
1292 | + /** |
1293 | + * If \c true, XML comments separate tokens. For example, |
1294 | + * \c net<!---->work would be 2 tokens instead of 1. |
1295 | + */ |
1296 | + bool comments_separate_tokens; |
1297 | + |
1298 | + /** |
1299 | + * If \c true, XML elements separate tokens. For example, |
1300 | + * \c <b>B</b>old would be 2 tokens instead of 1. |
1301 | + */ |
1302 | + bool elements_separate_tokens; |
1303 | + |
1304 | + /** |
1305 | + * If \c true, XML processing instructions separate tokens. For example, |
1306 | + * <code>net<?PI pi?>work</code> would be 2 tokens instead of 1. |
1307 | + */ |
1308 | + bool processing_instructions_separate_tokens; |
1309 | + |
1310 | + /** |
1311 | + * The set of languages supported. |
1312 | + */ |
1313 | + languages_type languages; |
1314 | + |
1315 | + /** |
1316 | + * The URI that uniquely identifies this %Tokenizer. |
1317 | + */ |
1318 | + char const* uri; |
1319 | + }; |
1320 | + |
1321 | + /** |
1322 | + * Gets the Properties of this %Tokenizer. |
1323 | + * |
1324 | + * @param result The Properties to populate. |
1325 | + */ |
1326 | + virtual void properties( Properties *result ) const = 0; |
1327 | |
1328 | ///////////////////////////////////////////////////////////////////////////// |
1329 | |
1330 | @@ -106,39 +162,6 @@ |
1331 | virtual void destroy() const = 0; |
1332 | |
1333 | /** |
1334 | - * Trace options for XML elements combined via bitwise-or. |
1335 | - */ |
1336 | - enum ElementTraceOptions { |
1337 | - trace_none = 0x0, ///< Trace no elements. |
1338 | - trace_begin = 0x1, ///< Trace the beginning of elements. |
1339 | - trace_end = 0x2 ///< Trace the ending of elements. |
1340 | - }; |
1341 | - |
1342 | - /** |
1343 | - * Gets the trace options. If the value is \c trace_none, then the paragraph |
1344 | - * number will be incremented upon entering an XML element; if the value is |
1345 | - * anything other than \c trace_none, then the tokenizer assumes |
1346 | - * responsibility for incrementing the paragraph number. |
1347 | - * |
1348 | - * @return Returns said options. |
1349 | - */ |
1350 | - int trace_options() const { |
1351 | - return trace_options_; |
1352 | - } |
1353 | - |
1354 | - /** |
1355 | - * This function is called whenever an XML element is entered during |
1356 | - * tokenization. Note that this function is called only if \c |
1357 | - * trace_options() returns non-zero. |
1358 | - * |
1359 | - * @param qname The element's QName. |
1360 | - * @param trace_options The bitwise-or of the trace option(s) in effect for a |
1361 | - * particular call. |
1362 | - * @see trace_options() |
1363 | - */ |
1364 | - virtual void element( Item const &qname, int trace_options ); |
1365 | - |
1366 | - /** |
1367 | * Gets this %Tokenizer's associated Numbers. |
1368 | * |
1369 | * @return Returns said Numbers. |
1370 | @@ -153,6 +176,16 @@ |
1371 | Numbers const& numbers() const; |
1372 | |
1373 | /** |
1374 | + * Tokenizes the given node. |
1375 | + * |
1376 | + * @param node The node to tokenize. |
1377 | + * @param lang The default language to use. |
1378 | + * @param callback The Callback to call once per token. |
1379 | + */ |
1380 | + void tokenize_node( Item const &node, locale::iso639_1::type lang, |
1381 | + Callback &callback ); |
1382 | + |
1383 | + /** |
1384 | * Tokenizes the given string. |
1385 | * |
1386 | * @param utf8_s The UTF-8 string to tokenize. It need not be |
1387 | @@ -162,11 +195,11 @@ |
1388 | * @param wildcards If \c true, allows XQuery wildcard syntax characters to |
1389 | * be part of tokens. |
1390 | * @param callback The Callback to call once per token. |
1391 | - * @param payload Optional user-defined data. |
1392 | + * @param item The Item this string is from, if any. |
1393 | */ |
1394 | - virtual void tokenize( char const *utf8_s, size_type utf8_len, |
1395 | - locale::iso639_1::type lang, bool wildcards, |
1396 | - Callback &callback, void *payload = 0 ) = 0; |
1397 | + virtual void tokenize_string( char const *utf8_s, size_type utf8_len, |
1398 | + locale::iso639_1::type lang, bool wildcards, |
1399 | + Callback &callback, Item const *item = 0 ) = 0; |
1400 | |
1401 | ///////////////////////////////////////////////////////////////////////////// |
1402 | |
1403 | @@ -175,27 +208,71 @@ |
1404 | * Constructs a %Tokenizer. |
1405 | * |
1406 | * @param numbers the Numbers to use. |
1407 | - * @param trace_options The bitwise-or of the available trace options, if |
1408 | - * any. |
1409 | */ |
1410 | - Tokenizer( Numbers &numbers, int trace_options = trace_none ); |
1411 | + Tokenizer( Numbers &numbers ); |
1412 | |
1413 | /** |
1414 | * Destroys a %Tokenizer. |
1415 | */ |
1416 | virtual ~Tokenizer() = 0; |
1417 | |
1418 | + /** |
1419 | + * Given an element, finds its \c xml:lang attribute, if any, and gets its |
1420 | + * value. |
1421 | + * |
1422 | + * @param element The element to check. |
1423 | + * @param lang A pointer to where to put the found language, if any. |
1424 | + * @return Returns \c true only if an \c xml:lang attribute is found and the |
1425 | + * value is a known language. |
1426 | + */ |
1427 | + bool find_lang_attribute( Item const &element, locale::iso639_1::type *lang ); |
1428 | + |
1429 | + /** |
1430 | + * This member-function is called whenever an item that is being tokenized is |
1431 | + * entered or exited. |
1432 | + * |
1433 | + * @param item The item being entered or exited. |
1434 | + * @param entering If \c true, the item is being entered; if \c false, the |
1435 | + * item is being exited. |
1436 | + */ |
1437 | + virtual void item( Item const &item, bool entering ); |
1438 | + |
1439 | + /** |
1440 | + * Tokenizes the given node and all of its child nodes, if any. For each |
1441 | + * node, it is required that this function call the item() member function of |
1442 | + * both this %Tokenizer and of the Callback twice, once each for entrance and |
1443 | + * exit. |
1444 | + * |
1445 | + * @param node The node to tokenize. |
1446 | + * @param lang The default language to use. |
1447 | + * @param callback The Callback to call per token. |
1448 | + * @param tokenize_acp If \c true, additionally tokenize all attribute, |
1449 | + * comment, and processing-instruction nodes encountered; |
1450 | + * if \c false, skip them. |
1451 | + */ |
1452 | + virtual void tokenize_node_impl( Item const &node, |
1453 | + locale::iso639_1::type lang, |
1454 | + Callback &callback, bool tokenize_acp ); |
1455 | + |
1456 | private: |
1457 | - int trace_options_; |
1458 | - Numbers *no_; |
1459 | + Numbers *numbers_; |
1460 | }; |
1461 | |
1462 | +inline Tokenizer::Tokenizer( Numbers &numbers ) : numbers_( &numbers ) { |
1463 | +} |
1464 | + |
1465 | inline Tokenizer::Numbers& Tokenizer::numbers() { |
1466 | - return *no_; |
1467 | + return *numbers_; |
1468 | } |
1469 | |
1470 | inline Tokenizer::Numbers const& Tokenizer::numbers() const { |
1471 | - return *no_; |
1472 | + return *numbers_; |
1473 | +} |
1474 | + |
1475 | +inline void Tokenizer::tokenize_node( Item const &item, |
1476 | + locale::iso639_1::type lang, |
1477 | + Callback &callback ) { |
1478 | + tokenize_node_impl( item, lang, callback, true ); |
1479 | } |
1480 | |
1481 | /////////////////////////////////////////////////////////////////////////////// |
1482 | @@ -211,11 +288,14 @@ |
1483 | * Creates a new %Tokenizer. |
1484 | * |
1485 | * @param lang The language of the text that the tokenizer will tokenize. |
1486 | - * @param numbers The Numbers to use. |
1487 | - * @return Returns said %Tokenizer. |
1488 | + * @param numbers The Numbers to use. If \c null, \a t is not set. |
1489 | + * @param t If not \c null, set to point to a Tokenizer for \a lang. |
1490 | + * @return Returns \c true only if this provider can provide a tokenizer for |
1491 | + * \a lang. |
1492 | */ |
1493 | - virtual Tokenizer::ptr getTokenizer( locale::iso639_1::type lang, |
1494 | - Tokenizer::Numbers &numbers ) const = 0; |
1495 | + virtual bool getTokenizer( locale::iso639_1::type lang, |
1496 | + Tokenizer::Numbers *numbers = 0, |
1497 | + Tokenizer::ptr *t = 0 ) const = 0; |
1498 | }; |
1499 | |
1500 | /////////////////////////////////////////////////////////////////////////////// |
1501 | |
1502 | === modified file 'include/zorba/uri_resolvers.h' |
1503 | --- include/zorba/uri_resolvers.h 2012-04-24 12:39:38 +0000 |
1504 | +++ include/zorba/uri_resolvers.h 2012-04-26 16:53:22 +0000 |
1505 | @@ -50,7 +50,8 @@ |
1506 | class ZORBA_DLL_PUBLIC Resource |
1507 | { |
1508 | public: |
1509 | - typedef std::unique_ptr<Resource,internal::ztd::destroy_delete<Resource> > ptr; |
1510 | + typedef std::unique_ptr<Resource,internal::ztd::destroy_delete<Resource> > |
1511 | + ptr; |
1512 | |
1513 | virtual ~Resource() = 0; |
1514 | |
1515 | @@ -172,8 +173,8 @@ |
1516 | * object itself will be discarded. |
1517 | * |
1518 | * In any case, if they create a Resource, Zorba will take memory |
1519 | - * ownership of the Resource and delete it when it is no longer |
1520 | - * needed. |
1521 | + * ownership of the Resource and delete it (by calling destroy() on it) |
1522 | + * when it is no longer needed. |
1523 | */ |
1524 | virtual Resource* resolveURL(const zorba::String& aUrl, |
1525 | EntityData const* aEntityData) = 0; |
1526 | |
1527 | === modified file 'modules/com/zorba-xquery/www/modules/CMakeLists.txt' |
1528 | --- modules/com/zorba-xquery/www/modules/CMakeLists.txt 2012-04-24 12:39:38 +0000 |
1529 | +++ modules/com/zorba-xquery/www/modules/CMakeLists.txt 2012-04-26 16:53:22 +0000 |
1530 | @@ -72,6 +72,13 @@ |
1531 | DECLARE_ZORBA_MODULE(FILE xqdoc.xq VERSION 2.0 |
1532 | URI "http://www.zorba-xquery.com/modules/xqdoc") |
1533 | |
1534 | +IF(NOT ZORBA_NO_FULL_TEXT) |
1535 | + DECLARE_ZORBA_MODULE(FILE full-text.xq VERSION 2.0 |
1536 | + URI "http://www.zorba-xquery.com/modules/full-text") |
1537 | + DECLARE_ZORBA_SCHEMA(FILE full-text.xsd |
1538 | + URI "http://www.zorba-xquery.com/modules/full-text") |
1539 | +ENDIF(NOT ZORBA_NO_FULL_TEXT) |
1540 | + |
1541 | # Subdirectories |
1542 | DECLARE_ZORBA_MODULE(FILE converters/base64.xq VERSION 2.0 |
1543 | URI "http://www.zorba-xquery.com/modules/converters/base64") |
1544 | |
1545 | === added file 'modules/com/zorba-xquery/www/modules/full-text.xq' |
1546 | --- modules/com/zorba-xquery/www/modules/full-text.xq 1970-01-01 00:00:00 +0000 |
1547 | +++ modules/com/zorba-xquery/www/modules/full-text.xq 2012-04-26 16:53:22 +0000 |
1548 | @@ -0,0 +1,872 @@ |
1549 | +xquery version "3.0"; |
1550 | + |
1551 | +(: |
1552 | + : Copyright 2006-2011 The FLWOR Foundation. |
1553 | + : |
1554 | + : Licensed under the Apache License, Version 2.0 (the "License"); |
1555 | + : you may not use this file except in compliance with the License. |
1556 | + : You may obtain a copy of the License at |
1557 | + : |
1558 | + : http://www.apache.org/licenses/LICENSE-2.0 |
1559 | + : |
1560 | + : Unless required by applicable law or agreed to in writing, software |
1561 | + : distributed under the License is distributed on an "AS IS" BASIS, |
1562 | + : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
1563 | + : See the License for the specific language governing permissions and |
1564 | + : limitations under the License. |
1565 | + :) |
1566 | + |
1567 | +(:===========================================================================:) |
1568 | + |
1569 | +(:~ |
1570 | + : This module provides an XQuery API to full-text functions. |
1571 | + : For general information about Zorba's implementation of the |
1572 | + : <a href="http://www.w3.org/TR/xpath-full-text-10/">XQuery and XPath Full Text 1.0 specification</a> |
1573 | + : as well as instructions for building an installing a thesaurus, |
1574 | + : see the <a href="http://www.zorba-xquery.com/html/documentation/latest/zorba/ft_thesaurus">Full Text Thesaurus documentation</a>. |
1575 | + : <h2>Notes on languages</h2> |
1576 | + : To refer to paricular human languages, |
1577 | + : Zorba uses both the |
1578 | + : <a href="http://en.wikipedia.org/wiki/ISO_639-1">ISO 639-1</a> |
1579 | + : and |
1580 | + : <a href="http://en.wikipedia.org/wiki/ISO_639-2">ISO 639-2</a> |
1581 | + : languages codes. |
1582 | + : Note that Zorba supports only a subset of the |
1583 | + : <a href="http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes">complete list of language codes</a> |
1584 | + : and not every function supports the same subset. |
1585 | + : <p/> |
1586 | + : Most functions in this module take a language as a parameter |
1587 | + : using the |
1588 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a> |
1589 | + : XML schema data type. |
1590 | + : <h2>Notes on stemming</h2> |
1591 | + : The <code>stem()</code> functions return the |
1592 | + : <a href="http://en.wikipedia.org/wiki/Word_stem">stem</a> |
1593 | + : of a word. |
1594 | + : In Zorba, |
1595 | + : the stem of a word itself, however, is not guaranteed to be a word. |
1596 | + : It is best to consider a stem as an opaque byte sequence. |
1597 | + : All that is guaranteed about a stem is that, |
1598 | + : for a given word, |
1599 | + : the stem of that word will always be the same byte sequence. |
1600 | + : Hence, |
1601 | + : you sould never compare the result of one of the <code>stem()</code> |
1602 | + : functions against a non-stemmed string, |
1603 | + : for example: |
1604 | + : <pre> |
1605 | + : if ( ft:stem( "apples" ) eq "apple" ) ** WRONG ** |
1606 | + : </pre> |
1607 | + : Instead do: |
1608 | + : <pre> |
1609 | + : if ( ft:stem( "apples" ) eq ft:stem( "apple" ) ) ** CORRECT ** |
1610 | + : </pre> |
1611 | + : <h2>Notes on the thesaurus</h2> |
1612 | + : The <code>thesaurus-lookup()</code> functions have "levels" |
1613 | + : and "relationship" parameters. |
1614 | + : The values for these are implementation-defined. |
1615 | + : Zorba's default implementation uses the |
1616 | + : <a href="http://wordnet.princeton.edu/">WordNet lexical database</a>, |
1617 | + : version 3.0. |
1618 | + : <p/> |
1619 | + : In WordNet, |
1620 | + : the number of "levels" that two phrases are apart |
1621 | + : are how many hierarchical meanings apart they are. |
1622 | + : For example, |
1623 | + : "canary" is 5 levels away from "vertebrate" |
1624 | + : (carary > finch > oscine > passerine > bird > vertebrate). |
1625 | + : <p/> |
1626 | + : When using the WordNet implementation, |
1627 | + : Zorba supports all of the relationships (and their abbreviations) |
1628 | + : specified by |
1629 | + : <a href="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=7776">ISO 2788</a> |
1630 | + : and |
1631 | + : <a href="http://www.niso.org/kst/reports/standards?step=2&gid=&project_key=7cc9b583cb5a62e8c15d3099e0bb46bbae9cf38a">ANSI/NISO Z39.19-2005</a> |
1632 | + : with the exceptions of "HN" (history note) |
1633 | + : and "X SN" (see scope note for). |
1634 | + : These relationships are: |
1635 | + : <table> |
1636 | + : <tr> |
1637 | + : <th>Rel.</th> |
1638 | + : <th>Meaning</th> |
1639 | + : <th>WordNet Rel.</th> |
1640 | + : </tr> |
1641 | + : <tr> |
1642 | + : <td>BT</td> |
1643 | + : <td>broader term</td> |
1644 | + : <td>hypernym</td> |
1645 | + : </tr> |
1646 | + : <tr> |
1647 | + : <td>BTG</td> |
1648 | + : <td>broader term generic</td> |
1649 | + : <td>hypernym</td> |
1650 | + : </tr> |
1651 | + : <tr> |
1652 | + : <td>BTI</td> |
1653 | + : <td>broader term instance</td> |
1654 | + : <td>instance hypernym</td> |
1655 | + : </tr> |
1656 | + : <tr> |
1657 | + : <td>BTP</td> |
1658 | + : <td>broader term partitive</td> |
1659 | + : <td>part meronym</td> |
1660 | + : </tr> |
1661 | + : <tr> |
1662 | + : <td>NT</td> |
1663 | + : <td>narrower term</td> |
1664 | + : <td>hyponym</td> |
1665 | + : </tr> |
1666 | + : <tr> |
1667 | + : <td>NTG</td> |
1668 | + : <td>narrower term generic</td> |
1669 | + : <td>hyponym</td> |
1670 | + : </tr> |
1671 | + : <tr> |
1672 | + : <td>NTI</td> |
1673 | + : <td>narrower term instance</td> |
1674 | + : <td>instance hyponym</td> |
1675 | + : </tr> |
1676 | + : <tr> |
1677 | + : <td>NTP</td> |
1678 | + : <td>narrower term partitive</td> |
1679 | + : <td>part holonym</td> |
1680 | + : </tr> |
1681 | + : <tr> |
1682 | + : <td>RT</td> |
1683 | + : <td>related term</td> |
1684 | + : <td>also see</td> |
1685 | + : </tr> |
1686 | + : <tr> |
1687 | + : <td>SN</td> |
1688 | + : <td>scope note</td> |
1689 | + : <td>n/a</td> |
1690 | + : </tr> |
1691 | + : <tr> |
1692 | + : <td>TT</td> |
1693 | + : <td>top term</td> |
1694 | + : <td>hypernym</td> |
1695 | + : </tr> |
1696 | + : <tr> |
1697 | + : <td>UF</td> |
1698 | + : <td>non-preferred term</td> |
1699 | + : <td>n/a</td> |
1700 | + : </tr> |
1701 | + : <tr> |
1702 | + : <td>USE</td> |
1703 | + : <td>preferred term</td> |
1704 | + : <td>n/a</td> |
1705 | + : </tr> |
1706 | + : </table> |
1707 | + : Note that you can specify relationships |
1708 | + : either by their abbreviation |
1709 | + : or their meaning. |
1710 | + : Relationships are case-insensitive. |
1711 | + : |
1712 | + : In addition to the |
1713 | + : <a href="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=7776">ISO 2788</a> |
1714 | + : and |
1715 | + : <a href="http://www.niso.org/kst/reports/standards?step=2&gid=&project_key=7cc9b583cb5a62e8c15d3099e0bb46bbae9cf38a">ANSI/NISO Z39.19-2005</a> |
1716 | + : relationships, |
1717 | + : Zorba also supports all of the relationships offered by WordNet. |
1718 | + : These relationships are: |
1719 | + : <table class="ft_rels"> |
1720 | + : <tr> |
1721 | + : <th>Relationship</th> |
1722 | + : <th>Meaning</th> |
1723 | + : </tr> |
1724 | + : <tr> |
1725 | + : <td nowrap="nowrap">also see</td> |
1726 | + : <td> |
1727 | + : A word that is related to another, |
1728 | + : e.g., for "varnished" (furniture) |
1729 | + : one should <em>also see</em> "finished." |
1730 | + : </td> |
1731 | + : </tr> |
1732 | + : <tr> |
1733 | + : <td>antonym</td> |
1734 | + : <td> |
1735 | + : A word opposite in meaning to another, |
1736 | + : e.g., "light" is an <em>antonym</em> for "heavy." |
1737 | + : </td> |
1738 | + : </tr> |
1739 | + : <tr> |
1740 | + : <td>attribute</td> |
1741 | + : <td> |
1742 | + : A noun for which adjectives express values, |
1743 | + : e.g., "weight" is an <em>attribute</em> |
1744 | + : for which the adjectives "light" and "heavy" |
1745 | + : express values. |
1746 | + : </td> |
1747 | + : </tr> |
1748 | + : <tr> |
1749 | + : <td>cause</td> |
1750 | + : <td> |
1751 | + : A verb that causes another, |
1752 | + : e.g., "show" is a <em>cause</em> of "see." |
1753 | + : </td> |
1754 | + : </tr> |
1755 | + : <tr> |
1756 | + : <td nowrap="nowrap">derivationally related form</td> |
1757 | + : <td> |
1758 | + : A word that is derived from a root word, |
1759 | + : e.g., "metric" is a <em>derivationally related form</em> of "meter." |
1760 | + : </td> |
1761 | + : </tr> |
1762 | + : <tr> |
1763 | + : <td nowrap="nowrap">derived from adjective</td> |
1764 | + : <td> |
1765 | + : An adverb that is derived from an adjective, |
1766 | + : e.g., "correctly" is <em>derived from the adjective</em> "correct." |
1767 | + : </td> |
1768 | + : </tr> |
1769 | + : <tr> |
1770 | + : <td>entailment</td> |
1771 | + : <td> |
1772 | + : A verb that presupposes another, |
1773 | + : e.g., "snoring" <em>entails</em> "sleeping." |
1774 | + : </td> |
1775 | + : </tr> |
1776 | + : <tr> |
1777 | + : <td>hypernym</td> |
1778 | + : <td> |
1779 | + : A word with a broad meaning that more specific words fall under, |
1780 | + : e.g., "meal" is a <em>hypernym</em> of "breakfast." |
1781 | + : </td> |
1782 | + : </tr> |
1783 | + : <tr> |
1784 | + : <td>hyponym</td> |
1785 | + : <td> |
1786 | + : A word of more specific meaning than a general term applicable to it, |
1787 | + : e.g., "breakfast" is a <em>hyponym</em> of "meal." |
1788 | + : </td> |
1789 | + : </tr> |
1790 | + : <tr> |
1791 | + : <td nowrap="nowrap">instance hypernym</td> |
1792 | + : <td> |
1793 | + : A word that denotes a category of some specific instance, |
1794 | + : e.g., "author" is an <em>instance hypernym</em> of "Asimov." |
1795 | + : </td> |
1796 | + : </tr> |
1797 | + : <tr> |
1798 | + : <td nowrap="nowrap">instance hyponym</td> |
1799 | + : <td> |
1800 | + : A term that donotes a specific instance of some general category, |
1801 | + : e.g., "Asimov" is an <em>instance hyponym</em> of "author." |
1802 | + : </td> |
1803 | + : </tr> |
1804 | + : <tr> |
1805 | + : <td nowrap="nowrap">member holonym</td> |
1806 | + : <td> |
1807 | + : A word that denotes a collection of individuals, |
1808 | + : e.g., "faculty" is a <em>member holonym</em> of "professor." |
1809 | + : </td> |
1810 | + : </tr> |
1811 | + : <tr> |
1812 | + : <td nowrap="nowrap">member meronym</td> |
1813 | + : <td> |
1814 | + : A word that denotes a member of a larger group, |
1815 | + : e.g., a "person" is a <em>member meronym</em> of a "crowd." |
1816 | + : </td> |
1817 | + : </tr> |
1818 | + : <tr> |
1819 | + : <td nowrap="nowrap">part holonym</td> |
1820 | + : <td> |
1821 | + : A word that denotes a larger whole comprised of some part, |
1822 | + : e.g., "car" is a <em>part holonym</em> of "engine." |
1823 | + : </td> |
1824 | + : </tr> |
1825 | + : <tr> |
1826 | + : <td nowrap="nowrap">part meronym</td> |
1827 | + : <td> |
1828 | + : A word that denotes a part of a larger whole, |
1829 | + : e.g., an "engine" is <em>part meronym</em> of a "car." |
1830 | + : </td> |
1831 | + : </tr> |
1832 | + : <tr> |
1833 | + : <td nowrap="nowrap">participle of verb</td> |
1834 | + : <td> |
1835 | + : An adjective that is the participle of some verb, |
1836 | + : e.g., "breaking" is the <em>participle of the verb</em> "break." |
1837 | + : </td> |
1838 | + : </tr> |
1839 | + : <tr> |
1840 | + : <td>pertainym</td> |
1841 | + : <td> |
1842 | + : An adjective that classifies its noun, |
1843 | + : e.g., "musical" is a <em>pertainym</em> in "musical instrument." |
1844 | + : </td> |
1845 | + : </tr> |
1846 | + : <tr> |
1847 | + : <td nowrap="nowrap">similar to</td> |
1848 | + : <td> |
1849 | + : Similar, though not necessarily interchangeable, adjectives. |
1850 | + : For example, "shiny" is <em>similar to</em> "bright", |
1851 | + : but they have subtle differences. |
1852 | + : </td> |
1853 | + : </tr> |
1854 | + : <tr> |
1855 | + : <td nowrap="nowrap">substance holonym</td> |
1856 | + : <td> |
1857 | + : A word that denotes a larger whole containing some constituent |
1858 | + : substance, e.g., "bread" is a <em>substance holonym</em> of "flour." |
1859 | + : </td> |
1860 | + : </tr> |
1861 | + : <tr> |
1862 | + : <td nowrap="nowrap">substance meronym</td> |
1863 | + : <td> |
1864 | + : A word that denotes a constituant substance of some larger whole, |
1865 | + : e.g., "flour" is a <em>substance meronym</em> of "bread." |
1866 | + : </td> |
1867 | + : </tr> |
1868 | + : <tr> |
1869 | + : <td nowrap="nowrap">verb group</td> |
1870 | + : <td> |
1871 | + : A verb that is a member of a group of similar verbs, |
1872 | + : e.g., "live" is in the <em>verb group</em> |
1873 | + : of "dwell", "live", "inhabit", etc. |
1874 | + : </td> |
1875 | + : </tr> |
1876 | + : </table> |
1877 | + : <h2>Notes on tokenization</h2> |
1878 | + : For general information about Zorba's implementation of tokenization, |
1879 | + : including what constitutes a token, |
1880 | + : see the <a href="http://www.zorba-xquery.com/html/documentation/latest/zorba/ft_tokenizer">Full Text Tokenizer</a> documentation. |
1881 | + :) |
1882 | + |
1883 | +(:===========================================================================:) |
1884 | + |
1885 | +module namespace ft = "http://www.zorba-xquery.com/modules/full-text"; |
1886 | + |
1887 | +import schema namespace ft-schema = |
1888 | + "http://www.zorba-xquery.com/modules/full-text"; |
1889 | + |
1890 | +declare namespace err = "http://www.w3.org/2005/xqt-errors"; |
1891 | +declare namespace zerr = "http://www.zorba-xquery.com/errors"; |
1892 | + |
1893 | +declare namespace ver = "http://www.zorba-xquery.com/options/versioning"; |
1894 | +declare option ver:module-version "2.0"; |
1895 | + |
1896 | +(:===========================================================================:) |
1897 | + |
1898 | +(:~ |
1899 | + : Predeclared constant for the Danish |
1900 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1901 | + :) |
1902 | +declare variable $ft:lang-da as xs:language := xs:language("da"); |
1903 | + |
1904 | +(:~ |
1905 | + : Predeclared constant for the German |
1906 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1907 | + :) |
1908 | +declare variable $ft:lang-de as xs:language := xs:language("de"); |
1909 | + |
1910 | +(:~ |
1911 | + : Predeclared constant for the English |
1912 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1913 | + :) |
1914 | +declare variable $ft:lang-en as xs:language := xs:language("en"); |
1915 | + |
1916 | +(:~ |
1917 | + : Predeclared constant for the Spanish |
1918 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1919 | + :) |
1920 | +declare variable $ft:lang-es as xs:language := xs:language("es"); |
1921 | + |
1922 | +(:~ |
1923 | + : Predeclared constant for the Finnish |
1924 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1925 | + :) |
1926 | +declare variable $ft:lang-fi as xs:language := xs:language("fi"); |
1927 | + |
1928 | +(:~ |
1929 | + : Predeclared constant for the French |
1930 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1931 | + :) |
1932 | +declare variable $ft:lang-fr as xs:language := xs:language("fr"); |
1933 | + |
1934 | +(:~ |
1935 | + : Predeclared constant for the Hungarian |
1936 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1937 | + :) |
1938 | +declare variable $ft:lang-hu as xs:language := xs:language("hu"); |
1939 | + |
1940 | +(:~ |
1941 | + : Predeclared constant for the Italian |
1942 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1943 | + :) |
1944 | +declare variable $ft:lang-it as xs:language := xs:language("it"); |
1945 | + |
1946 | +(:~ |
1947 | + : Predeclared constant for the Dutch |
1948 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1949 | + :) |
1950 | +declare variable $ft:lang-nl as xs:language := xs:language("nl"); |
1951 | + |
1952 | +(:~ |
1953 | + : Predeclared constant for the Norwegian |
1954 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1955 | + :) |
1956 | +declare variable $ft:lang-no as xs:language := xs:language("no"); |
1957 | + |
1958 | +(:~ |
1959 | + : Predeclared constant for the Portuguese |
1960 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1961 | + :) |
1962 | +declare variable $ft:lang-pt as xs:language := xs:language("pt"); |
1963 | + |
1964 | +(:~ |
1965 | + : Predeclared constant for the Romanian |
1966 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1967 | + :) |
1968 | +declare variable $ft:lang-ro as xs:language := xs:language("ro"); |
1969 | + |
1970 | +(:~ |
1971 | + : Predeclared constant for the Russian |
1972 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1973 | + :) |
1974 | +declare variable $ft:lang-ru as xs:language := xs:language("ru"); |
1975 | + |
1976 | +(:~ |
1977 | + : Predeclared constant for the Swedish |
1978 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1979 | + :) |
1980 | +declare variable $ft:lang-sv as xs:language := xs:language("sv"); |
1981 | + |
1982 | +(:~ |
1983 | + : Predeclared constant for the Turkish |
1984 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>. |
1985 | + :) |
1986 | +declare variable $ft:lang-tr as xs:language := xs:language("tr"); |
1987 | + |
1988 | +(:===========================================================================:) |
1989 | + |
1990 | +(:~ |
1991 | + : Gets the current |
1992 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>: |
1993 | + : either the langauge specified by the |
1994 | + : <code><a href="http://www.w3.org/TR/xpath-full-text-10/#doc-xquery10-FTOptionDecl">declare ft-option using</a> |
1995 | + : <a href="http://www.w3.org/TR/xpath-full-text-10/#ftlanguageoption">language</a></code> |
1996 | + : statement (if any) |
1997 | + : or the one returned by <code>ft:host-lang()</code> (if none). |
1998 | + : |
1999 | + : @return said language. |
2000 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-current-lang-true-1.xq |
2001 | + :) |
2002 | +declare function ft:current-lang() |
2003 | + as xs:language external; |
2004 | + |
2005 | +(:~ |
2006 | + : Gets the host's current |
2007 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>. |
2008 | + : The "host" is the computer on which Zorba is running. |
2009 | + : The host's current language is obtained as follows: |
2010 | + : <ul> |
2011 | + : <li> |
2012 | + : For *nix systems: |
2013 | + : <ol> |
2014 | + : <li> |
2015 | + : If <a ref="http://www.cplusplus.com/reference/clibrary/clocale/setlocale/"><code>setlocale</code>(3)</a> returns non-null, |
2016 | + : the language corresponding to that locale is used. |
2017 | + : </li> |
2018 | + : <li> |
2019 | + : Else, if the <code>LANG</code> environment variable is set, |
2020 | + : that language is ued. |
2021 | + : </li> |
2022 | + : <li> |
2023 | + : Otherwise, there is no default language. |
2024 | + : </li> |
2025 | + : </ol> |
2026 | + : </li> |
2027 | + : <li> |
2028 | + : For Windows systems, |
2029 | + : the language corresponding to the locale returned by the |
2030 | + : <a href="http://msdn.microsoft.com/en-us/library/windows/desktop/dd318101(v=vs.85).aspx"><code>GetLocaleInfo()</code></a> |
2031 | + : function is used. |
2032 | + : </li> |
2033 | + : </ul> |
2034 | + : |
2035 | + : @return said language. |
2036 | + :) |
2037 | +declare function ft:host-lang() |
2038 | + as xs:language external; |
2039 | + |
2040 | +(:~ |
2041 | + : Checks whether the given |
2042 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2043 | + : is supported for stemming. |
2044 | + : |
2045 | + : @param $lang The language to check. |
2046 | + : @return <code>true</code> only if the language is supported. |
2047 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-es-supported-true.xq |
2048 | + :) |
2049 | +declare function ft:is-stem-lang-supported( $lang as xs:language ) |
2050 | + as xs:boolean external; |
2051 | + |
2052 | +(:~ |
2053 | + : Checks whether the given word is a stop-word. |
2054 | + : |
2055 | + : @param $word The word to check. |
2056 | + : @param $lang The |
2057 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2058 | + : of <code>$word</code>. |
2059 | + : @return <code>true</code> only if <code>$word</code> is a stop-word. |
2060 | + : @error zerr:ZXQP8405 if <code>$lang</code> is not supported for stop-words. |
2061 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-1.xq |
2062 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-3.xq |
2063 | + :) |
2064 | +declare function ft:is-stop-word( $word as xs:string, $lang as xs:language ) |
2065 | + as xs:boolean external; |
2066 | + |
2067 | +(:~ |
2068 | + : Checks whether the given word is a stop-word. |
2069 | + : |
2070 | + : @param $word The word to check. |
2071 | + : The word's <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2072 | + : is assumed to be the one returned by <code>ft:current-lang()</code>. |
2073 | + : @return <code>true</code> only if <code>$word</code> is a stop-word. |
2074 | + : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in |
2075 | + : general. |
2076 | + : @error zerr:ZXQP8405 if <code>ft:current-lang()</code> is not supported for |
2077 | + : stop-words specifically. |
2078 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-2.xq |
2079 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-4.xq |
2080 | + :) |
2081 | +declare function ft:is-stop-word( $word as xs:string ) |
2082 | + as xs:boolean external; |
2083 | + |
2084 | +(:~ |
2085 | + : Checks whether the given |
2086 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2087 | + : is supported for stop words. |
2088 | + : |
2089 | + : @param $lang The language to check. |
2090 | + : @return <code>true</code> only if the language is supported. |
2091 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-en-supported-true.xq |
2092 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-1.xq |
2093 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-2.xq |
2094 | + :) |
2095 | +declare function ft:is-stop-word-lang-supported( $lang as xs:language ) |
2096 | + as xs:boolean external; |
2097 | + |
2098 | +(:~ |
2099 | + : Checks whether the given |
2100 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2101 | + : is supported for look-up using the default thesaurus. |
2102 | + : |
2103 | + : @param $lang The language to check. |
2104 | + : @return <code>true</code> only if the language is supported. |
2105 | + :) |
2106 | +declare function ft:is-thesaurus-lang-supported( $lang as xs:language ) |
2107 | + as xs:boolean external; |
2108 | + |
2109 | +(:~ |
2110 | + : Checks whether the given |
2111 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2112 | + : is supported for look-up using the thesaurus specified by the given URI. |
2113 | + : |
2114 | + : @param $uri The URI specifying the thesaurus to use. |
2115 | + : @param $lang The language to check. |
2116 | + : @return <code>true</code> only if the language is supported. |
2117 | + : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus |
2118 | + : that is not found in the statically known thesauri. |
2119 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-1.xq |
2120 | + :) |
2121 | +declare function ft:is-thesaurus-lang-supported( $uri as xs:string, |
2122 | + $lang as xs:language ) |
2123 | + as xs:boolean external; |
2124 | + |
2125 | +(:~ |
2126 | + : Checks whether the given |
2127 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2128 | + : is supported for tokenization. |
2129 | + : |
2130 | + : @param $lang The language to check. |
2131 | + : @return <code>true</code> only if the language is supported. |
2132 | + :) |
2133 | +declare function ft:is-tokenizer-lang-supported( $lang as xs:language ) |
2134 | + as xs:boolean external; |
2135 | + |
2136 | +(:~ |
2137 | + : Stems the given word. |
2138 | + : |
2139 | + : @param $word The word to stem. |
2140 | + : @param $lang The |
2141 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2142 | + : of <code>$word</code>. |
2143 | + : @return the stem of <code>$word</code>. |
2144 | + : @error err:FTST0009 if <code>$lang</code> is not supported in general. |
2145 | + : @error zerr:ZXQP8404 if <code>$lang</code> is not supported for stemming |
2146 | + : specifically. |
2147 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-1.xq |
2148 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-2.xq |
2149 | + :) |
2150 | +declare function ft:stem( $word as xs:string, $lang as xs:language ) |
2151 | + as xs:string external; |
2152 | + |
2153 | +(:~ |
2154 | + : Stems the given word. |
2155 | + : |
2156 | + : @param $word The word to stem. |
2157 | + : The word's <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2158 | + : is assumed to be the one returned by <code>ft:current-lang()</code>. |
2159 | + : @return the stem of <code>$word</code>. |
2160 | + : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in |
2161 | + : general. |
2162 | + : @error zerr:ZXQP8404 if <code>ft:current-lang()</code> is not supported for |
2163 | + : stemming specifically. |
2164 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-3.xq |
2165 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-4.xq |
2166 | + :) |
2167 | +declare function ft:stem( $word as xs:string ) |
2168 | + as xs:string external; |
2169 | + |
2170 | +(:~ |
2171 | + : Strips all diacritical marks from all characters. |
2172 | + : |
2173 | + : @param $string The string to strip diacritical marks from. |
2174 | + : @return <code>$string</code> with diacritical marks stripped. |
2175 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-strip-diacritics-1.xq |
2176 | + :) |
2177 | +declare function ft:strip-diacritics( $string as xs:string ) |
2178 | + as xs:string external; |
2179 | + |
2180 | +(:~ |
2181 | + : Looks-up the given phrase in the default thesaurus. |
2182 | + : |
2183 | + : @param $phrase The phrase to look up. |
2184 | + : The phrase's |
2185 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2186 | + : is assumed to be the one returned by <code>ft:current-lang()</code>. |
2187 | + : @return the original and related phrases. |
2188 | + : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in |
2189 | + : general. |
2190 | + : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported |
2191 | + : by the currently running version of Zorba. |
2192 | + : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match |
2193 | + : that of the CPU on which Zorba is currently running. |
2194 | + : @error zerr:ZXQP8403 if there was an error reading the thesaurus data. |
2195 | + : @error zerr:ZXQP8406 if <code>ft:current-lang()</code> is not supported for |
2196 | + : thesaurus look-up specifically. |
2197 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-1.xq |
2198 | + :) |
2199 | +declare function ft:thesaurus-lookup( $phrase as xs:string ) |
2200 | + as xs:string+ external; |
2201 | + |
2202 | +(:~ |
2203 | + : Looks-up the given phrase in the thesaurus specified by the given URI. |
2204 | + : |
2205 | + : @param $uri The URI specifying the thesaurus to use. |
2206 | + : @param $phrase The phrase to look up. |
2207 | + : @param $lang The |
2208 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2209 | + : of <code>$phrase</code>. |
2210 | + : @return the original and related phrases. |
2211 | + : @error err:FTST0009 if <code>$lang</code> is not supported in general. |
2212 | + : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus |
2213 | + : that is not found in the statically known thesauri. |
2214 | + : @error zerr:ZOSE0001 if the thesaurus data file could not be found. |
2215 | + : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file. |
2216 | + : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported |
2217 | + : by the currently running version of Zorba. |
2218 | + : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match |
2219 | + : that of the CPU on which Zorba is currently running. |
2220 | + : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file. |
2221 | + : @error zerr:ZXQP8406 if <code>$lang</code> is not supported for thesaurus |
2222 | + : look-up specifically. |
2223 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-2.xq |
2224 | + :) |
2225 | +declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string, |
2226 | + $lang as xs:language ) |
2227 | + as xs:string+ external; |
2228 | + |
2229 | +(:~ |
2230 | + : Looks-up the given phrase in a thesaurus. |
2231 | + : |
2232 | + : @param $uri The URI specifying the thesaurus to use. |
2233 | + : @param $phrase The phrase to look up. |
2234 | + : The phrase's |
2235 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2236 | + : is assumed to be the one the one returned by <code>ft:current-lang()</code>. |
2237 | + : @return the original and related phrases. |
2238 | + : @error err:FTST0009 if <code>ft:current-lang()</code> is unsupported in |
2239 | + : general. |
2240 | + : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus |
2241 | + : that is not found in the statically known thesauri. |
2242 | + : @error zerr:ZOSE0001 if the thesaurus data file could not be found. |
2243 | + : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file. |
2244 | + : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported |
2245 | + : by the currently running version of Zorba. |
2246 | + : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match |
2247 | + : that of the CPU on which Zorba is currently running. |
2248 | + : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file. |
2249 | + : @error zerr:ZXQP8406 if <code>ft:current-lang()</code> is not supported for |
2250 | + : thesaurus look-up specifically. |
2251 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-3.xq |
2252 | + :) |
2253 | +declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string ) |
2254 | + as xs:string+ external; |
2255 | + |
2256 | +(:~ |
2257 | + : Looks-up the given phrase in a thesaurus. |
2258 | + : |
2259 | + : @param $uri The URI specifying the thesaurus to use. |
2260 | + : @param $phrase The phrase to look up. |
2261 | + : @param $lang The |
2262 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2263 | + : of <code>$phrase</code>. |
2264 | + : @param $relationship The relationship the results are to have to |
2265 | + : <code>$phrase</code>. |
2266 | + : @return the original and related phrases. |
2267 | + : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus |
2268 | + : that is not found in the statically known thesauri. |
2269 | + : @error err:FTST0009 if <code>$lang</code> is not supported in general. |
2270 | + : @error zerr:ZOSE0001 if the thesaurus data file could not be found. |
2271 | + : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file. |
2272 | + : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported |
2273 | + : by the currently running version of Zorba. |
2274 | + : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match |
2275 | + : that of the CPU on which Zorba is currently running. |
2276 | + : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file. |
2277 | + : @error zerr:ZXQP8406 if <code>$lang</code> is not supported for thesaurus |
2278 | + : look-up specifically. |
2279 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-4.xq |
2280 | + :) |
2281 | +declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string, |
2282 | + $lang as xs:language, |
2283 | + $relationship as xs:string ) |
2284 | + as xs:string+ external; |
2285 | + |
2286 | +(:~ |
2287 | + : Looks-up the given phrase in a thesaurus. |
2288 | + : |
2289 | + : @param $uri The URI specifying the thesaurus to use. |
2290 | + : @param $phrase The phrase to look up. |
2291 | + : @param $lang The |
2292 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2293 | + : of <code>$phrase</code>. |
2294 | + : @param $relationship The relationship the results are to have to |
2295 | + : <code>$phrase</code>. |
2296 | + : @param $level-least The minimum number of levels within the thesaurus to be |
2297 | + : travers$ed. |
2298 | + : @param $level-most The maximum number of levels within the thesaurus to be |
2299 | + : traversed. |
2300 | + : @return the original and related phrases. |
2301 | + : @error err:FOCA0003 if either <code>$level-least</code> or |
2302 | + : <code>$level-most</code> is either negative or too large. |
2303 | + : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus |
2304 | + : that is not found in the statically known thesauri. |
2305 | + : @error err:FTST0009 if <code>$lang</code> is not supported in general. |
2306 | + : @error zerr:ZOSE0001 if the thesaurus data file could not be found. |
2307 | + : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file. |
2308 | + : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported |
2309 | + : by the currently running version of Zorba. |
2310 | + : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match |
2311 | + : that of the CPU on which Zorba is currently running. |
2312 | + : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file. |
2313 | + : @error zerr:ZXQP8406 if <code>$lang</code> is not supported for thesaurus |
2314 | + : look-up specifically. |
2315 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-5.xq |
2316 | + :) |
2317 | +declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string, |
2318 | + $lang as xs:language, |
2319 | + $relationship as xs:string, |
2320 | + $level-least as xs:integer, |
2321 | + $level-most as xs:integer ) |
2322 | + as xs:string+ external; |
2323 | + |
2324 | +(:~ |
2325 | + : Tokenizes the given document. |
2326 | + : |
2327 | + : @param $node The node to tokenize. |
2328 | + : @param $lang The default |
2329 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2330 | + : of <code>$node</code>. |
2331 | + : @return a (possibly empty) sequence of tokens. |
2332 | + : @error err:FTST0009 if <code>$lang</code> is not supported in general. |
2333 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq |
2334 | + :) |
2335 | +declare function ft:tokenize( $node as node(), $lang as xs:language ) |
2336 | + as element(ft-schema:token)* external; |
2337 | + |
2338 | +(:~ |
2339 | + : Tokenizes the given document. |
2340 | + : |
2341 | + : @param $node The node to tokenize. |
2342 | + : The document's default |
2343 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2344 | + : is assumed to be the one returned by <code>ft:current-lang()</code>. |
2345 | + : @return a (possibly empty) sequence of tokens. |
2346 | + : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in |
2347 | + : general. |
2348 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq |
2349 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq |
2350 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq |
2351 | + :) |
2352 | +declare function ft:tokenize( $node as node() ) |
2353 | + as element(ft-schema:token)* external; |
2354 | + |
2355 | +(:~ |
2356 | + : Tokenizes the given string. |
2357 | + : |
2358 | + : @param $string The string to tokenize. |
2359 | + : @param $lang The default |
2360 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2361 | + : of <code>$string</code>. |
2362 | + : @return a (possibly empty) sequence of tokens. |
2363 | + : @error err:FTST0009 if <code>$lang</code> is not supported in general. |
2364 | + : @error zerr:ZXQP8407 if <code>$lang</code> is not supported for |
2365 | + : tokenization specifically. |
2366 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-1.xq |
2367 | + :) |
2368 | +declare function ft:tokenize-string( $string as xs:string, |
2369 | + $lang as xs:language ) |
2370 | + as xs:string* external; |
2371 | + |
2372 | +(:~ |
2373 | + : Tokenizes the given string. |
2374 | + : |
2375 | + : @param $string The string to tokenize. |
2376 | + : The string's default |
2377 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2378 | + : is assumed to be the one returned by <code>ft:current-lang()</code>. |
2379 | + : @return a (possibly empty) sequence of tokens. |
2380 | + : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in |
2381 | + : general. |
2382 | + : @error zerr:ZXQP8407 if <code>ft:current_lang()</code> is not supported for |
2383 | + : tokenization specifically. |
2384 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-2.xq |
2385 | + :) |
2386 | +declare function ft:tokenize-string( $string as xs:string ) |
2387 | + as xs:string* external; |
2388 | + |
2389 | +(:~ |
2390 | + : Gets properties of the tokenizer for the given |
2391 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>. |
2392 | + : |
2393 | + : @param $lang The langauage of the tokenizer to get the properties of. |
2394 | + : @return said properties. |
2395 | + : @error err:FTST0009 if <code>$lang</code> is not supported in general. |
2396 | + : @error zerr:ZXQP8407 if <code>$lang</code> is not supported for |
2397 | + : tokenization specifically. |
2398 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenizer-properties-1.xq |
2399 | + :) |
2400 | +declare function ft:tokenizer-properties( $lang as xs:language ) |
2401 | + as element(ft-schema:tokenizer-properties) external; |
2402 | + |
2403 | +(:~ |
2404 | + : Gets properties of the tokenizer for the |
2405 | + : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a> |
2406 | + : returned by <code>ft:current-lang()</code>. |
2407 | + : |
2408 | + : @return said properties. |
2409 | + : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in |
2410 | + : general. |
2411 | + : @error zerr:ZXQP8407 if <code>ft:current_lang()</code> is not supported for |
2412 | + : tokenization specifically. |
2413 | + : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenizer-properties-2.xq |
2414 | + :) |
2415 | +declare function ft:tokenizer-properties() |
2416 | + as element(ft-schema:tokenizer-properties) external; |
2417 | + |
2418 | +(:===========================================================================:) |
2419 | + |
2420 | +(: vim:set et sw=2 ts=2: :) |
2421 | |
2422 | === added file 'modules/com/zorba-xquery/www/modules/full-text.xsd' |
2423 | --- modules/com/zorba-xquery/www/modules/full-text.xsd 1970-01-01 00:00:00 +0000 |
2424 | +++ modules/com/zorba-xquery/www/modules/full-text.xsd 2012-04-26 16:53:22 +0000 |
2425 | @@ -0,0 +1,134 @@ |
2426 | +<?xml version="1.0"?> |
2427 | +<!-- |
2428 | + ! Copyright 2006-2011 The FLWOR Foundation. |
2429 | + ! |
2430 | + ! Licensed under the Apache License, Version 2.0 (the "License"); |
2431 | + ! you may not use this file except in compliance with the License. |
2432 | + ! You may obtain a copy of the License at |
2433 | + ! |
2434 | + ! http://www.apache.org/licenses/LICENSE-2.0 |
2435 | + ! |
2436 | + ! Unless required by applicable law or agreed to in writing, software |
2437 | + ! distributed under the License is distributed on an "AS IS" BASIS, |
2438 | + ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
2439 | + ! See the License for the specific language governing permissions and |
2440 | + ! limitations under the License. |
2441 | +--> |
2442 | + |
2443 | +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" |
2444 | + targetNamespace="http://www.zorba-xquery.com/modules/full-text" |
2445 | + xmlns="http://www.zorba-xquery.com/modules/full-text" |
2446 | + elementFormDefault="qualified" |
2447 | + attributeFormDefault="unqualified"> |
2448 | + |
2449 | + <!--======================================================================--> |
2450 | + |
2451 | + <xs:element name="compare-options"> |
2452 | + <xs:complexType> |
2453 | + <xs:attributeGroup ref="compare-attributes"/> |
2454 | + </xs:complexType> |
2455 | + </xs:element> |
2456 | + |
2457 | + <xs:attributeGroup name="compare-attributes"> |
2458 | + <xs:attribute name="case" type="sensitivity" default="insensitive"/> |
2459 | + <xs:attribute name="diacritics" type="sensitivity" default="insensitive"/> |
2460 | + <xs:attribute name="stem" type="yes-no-both" default="no"/> |
2461 | + </xs:attributeGroup> |
2462 | + |
2463 | + <xs:simpleType name="sensitivity"> |
2464 | + <xs:restriction base="xs:string"> |
2465 | + <xs:enumeration value="insensitive"/> |
2466 | + <xs:enumeration value="sensitive"/> |
2467 | + <xs:enumeration value="both"/> |
2468 | + </xs:restriction> |
2469 | + </xs:simpleType> |
2470 | + |
2471 | + <xs:simpleType name="yes-no-both"> |
2472 | + <xs:restriction base="xs:string"> |
2473 | + <xs:enumeration value="yes"/> |
2474 | + <xs:enumeration value="no"/> |
2475 | + <xs:enumeration value="both"/> |
2476 | + </xs:restriction> |
2477 | + </xs:simpleType> |
2478 | + |
2479 | + <xs:complexType name="boolean-value"> |
2480 | + <xs:attribute name="value" type="xs:boolean" use="required"/> |
2481 | + </xs:complexType> |
2482 | + |
2483 | + <!--======================================================================--> |
2484 | + |
2485 | + <xs:element name="token"> |
2486 | + <xs:complexType> |
2487 | + |
2488 | + <!-- The language of the token. --> |
2489 | + <xs:attribute name="lang" type="xs:language"/> |
2490 | + |
2491 | + <!-- The sentence number. --> |
2492 | + <xs:attribute name="sentence" type="xs:nonNegativeInteger" use="required"/> |
2493 | + |
2494 | + <!-- The paragraph number. --> |
2495 | + <xs:attribute name="paragraph" type="xs:nonNegativeInteger" use="required"/> |
2496 | + |
2497 | + <!-- The token string value. --> |
2498 | + <xs:attribute name="value" type="xs:string" use="required"/> |
2499 | + |
2500 | + <!-- |
2501 | + ! A reference to the originating node. If the token occurred within an |
2502 | + ! element, the reference refers to the text node. If the token occurred |
2503 | + ! within an attribute, the reference refers to the attribute node. |
2504 | + --> |
2505 | + <xs:attribute name="node-ref" type="xs:anyURI"/> |
2506 | + |
2507 | + </xs:complexType> |
2508 | + </xs:element> |
2509 | + |
2510 | + <!--======================================================================--> |
2511 | + |
2512 | + <xs:element name="tokenizer-properties"> |
2513 | + <xs:complexType> |
2514 | + <xs:all> |
2515 | + |
2516 | + <!-- |
2517 | + ! If true, XML comments separate tokens. (No example can be provided |
2518 | + ! here because it is illegal to nest an XML comment inside an XML |
2519 | + ! comment.) |
2520 | + --> |
2521 | + <xs:element name="comments-separate-tokens" type="boolean-value"/> |
2522 | + |
2523 | + <!-- |
2524 | + ! If true, XML elements separate tokens. For example, |
2525 | + ! <b>B</b>old would be 2 tokens instead of 1. |
2526 | + --> |
2527 | + <xs:element name="elements-separate-tokens" type="boolean-value"/> |
2528 | + |
2529 | + <!-- |
2530 | + ! If true, XML processing instructions separate tokens. For example, |
2531 | + ! net<?PI pi?>work would be 2 tokens instead of 1. |
2532 | + --> |
2533 | + <xs:element name="processing-instructions-separate-tokens" type="boolean-value"/> |
2534 | + |
2535 | + <!-- |
2536 | + ! The list of languages that the tokenizer can tokenize. |
2537 | + --> |
2538 | + <xs:element name="supported-languages"> |
2539 | + <xs:complexType> |
2540 | + <xs:sequence> |
2541 | + <xs:element name="lang" type="xs:language" maxOccurs="unbounded"/> |
2542 | + </xs:sequence> |
2543 | + </xs:complexType> |
2544 | + </xs:element> |
2545 | + |
2546 | + </xs:all> |
2547 | + |
2548 | + <!-- |
2549 | + ! The tokenizer's identifying URI. |
2550 | + --> |
2551 | + <xs:attribute name="uri" type="xs:anyURI"/> |
2552 | + |
2553 | + </xs:complexType> |
2554 | + </xs:element> |
2555 | + |
2556 | + <!--======================================================================--> |
2557 | + |
2558 | +</xs:schema> |
2559 | +<!-- vim:set et sw=2 ts=2: --> |
2560 | |
2561 | === modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp' |
2562 | --- modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp 2012-04-24 12:39:38 +0000 |
2563 | +++ modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp 2012-04-26 16:53:22 +0000 |
2564 | @@ -39,7 +39,6 @@ |
2565 | theSerStream(NULL), |
2566 | thePost(NULL), |
2567 | theLast(NULL), |
2568 | - theLastSerializerOptions(NULL), |
2569 | theIsHeadRequest(false) |
2570 | { |
2571 | theHeaderLists.push_back(NULL); |
2572 | @@ -260,6 +259,7 @@ |
2573 | void HttpRequestHandler::cleanUpBody() |
2574 | { |
2575 | delete theSerStream; |
2576 | + theSerStream = 0; |
2577 | theLastBodyHadContent = false; |
2578 | } |
2579 | |
2580 | |
2581 | === modified file 'modules/com/zorba-xquery/www/modules/pregenerated/errors.xq' |
2582 | --- modules/com/zorba-xquery/www/modules/pregenerated/errors.xq 2012-04-24 12:39:38 +0000 |
2583 | +++ modules/com/zorba-xquery/www/modules/pregenerated/errors.xq 2012-04-26 16:53:22 +0000 |
2584 | @@ -188,6 +188,7 @@ |
2585 | |
2586 | (:~ |
2587 | : |
2588 | + : The thesaurus data file's endianness does not match that of the CPU. |
2589 | : |
2590 | :) |
2591 | declare variable $zerr:ZXQP8402 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8402"); |
2592 | @@ -201,6 +202,22 @@ |
2593 | |
2594 | (:~ |
2595 | :) |
2596 | +declare variable $zerr:ZXQP8404 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8404"); |
2597 | + |
2598 | +(:~ |
2599 | +:) |
2600 | +declare variable $zerr:ZXQP8405 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8405"); |
2601 | + |
2602 | +(:~ |
2603 | +:) |
2604 | +declare variable $zerr:ZXQP8406 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8406"); |
2605 | + |
2606 | +(:~ |
2607 | +:) |
2608 | +declare variable $zerr:ZXQP8407 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8407"); |
2609 | + |
2610 | +(:~ |
2611 | +:) |
2612 | declare variable $zerr:ZXQD0001 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQD0001"); |
2613 | |
2614 | (:~ |
2615 | |
2616 | === modified file 'modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq' |
2617 | --- modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq 2012-04-24 12:39:38 +0000 |
2618 | +++ modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq 2012-04-26 16:53:22 +0000 |
2619 | @@ -839,9 +839,7 @@ |
2620 | if(fn:matches($specLine, "Args:")) then |
2621 | let $arg_split := fn:substring-after($specLine, "-x") |
2622 | return |
2623 | - if(fn:string-length($arg_split) eq 0) then |
2624 | - fn:error($err:UE008, fn:concat("Unknown Args: in spec file for example <", $exampleSource,"> . |
2625 | - Add the example input and expected output by hand in the example, in a commentary that should also include the word 'output'.")) |
2626 | + if(fn:string-length($arg_split) eq 0) then string-join($specLines, " ") |
2627 | else |
2628 | let $var_value := fn:tokenize($arg_split, "=") |
2629 | let $var_name := fn:normalize-space(fn:replace($var_value[1], ":$", "")) |
2630 | |
2631 | === modified file 'scripts/zt-wn-get' |
2632 | --- scripts/zt-wn-get 2012-04-24 12:39:38 +0000 |
2633 | +++ scripts/zt-wn-get 2012-04-26 16:53:22 +0000 |
2634 | @@ -22,7 +22,7 @@ |
2635 | echo 'Arguments: [--workdir <workdir>] [--builddir <builddir>]' |
2636 | echo ' [--thesaurusurl <thesaurusurl>]' |
2637 | echo ' <zorba_repository>' |
2638 | - echo '<zorba_repository> is the top-level SVN working copy.' |
2639 | + echo '<zorba_repository> is the top-level BZR working copy.' |
2640 | echo '<workdir> is a temp directory to download and unzip XQTS (default: /tmp).' |
2641 | echo '<builddir> is the directory Zorba has been built in' |
2642 | echo ' (default: <zorba_repository>/build)' |
2643 | @@ -71,8 +71,8 @@ |
2644 | echo Build dir is at $BUILD |
2645 | |
2646 | # Compile thesaurus to binary format |
2647 | -mkdir -p $BUILD/test/rbkt/thesauri |
2648 | -THESAURUS_DEST="$BUILD/test/rbkt/thesauri/wordnet-en.zth" |
2649 | +mkdir -p $BUILD/LIB_PATH/edu/princeton/wordnet |
2650 | +THESAURUS_DEST="$BUILD/LIB_PATH/edu/princeton/wordnet/wordnet-en.zth" |
2651 | echo "Compiling thesaurus to $THESAURUS_DEST..." |
2652 | untar_dir=`mktemp -d "$WORK/thesaurus.XXXXXX"` |
2653 | cd "$untar_dir" |
2654 | |
2655 | === modified file 'src/api/CMakeLists.txt' |
2656 | --- src/api/CMakeLists.txt 2012-04-24 12:39:38 +0000 |
2657 | +++ src/api/CMakeLists.txt 2012-04-26 16:53:22 +0000 |
2658 | @@ -62,8 +62,9 @@ |
2659 | IF (NOT ZORBA_NO_FULL_TEXT) |
2660 | LIST(APPEND API_SRCS |
2661 | stemmer.cpp |
2662 | - stemmer_wrapper.cpp |
2663 | - thesaurus.cpp) |
2664 | + stemmer_wrappers.cpp |
2665 | + thesaurus.cpp |
2666 | + thesaurus_wrappers.cpp) |
2667 | ENDIF (NOT ZORBA_NO_FULL_TEXT) |
2668 | |
2669 | ADD_SRC_SUBFOLDER(API_SRCS serialization API_SERIALIZATION_SRCS) |
2670 | |
2671 | === modified file 'src/api/staticcontextimpl.cpp' |
2672 | --- src/api/staticcontextimpl.cpp 2012-04-24 12:39:38 +0000 |
2673 | +++ src/api/staticcontextimpl.cpp 2012-04-26 16:53:22 +0000 |
2674 | @@ -42,8 +42,8 @@ |
2675 | #include "context/static_context.h" |
2676 | #include "context/static_context_consts.h" |
2677 | #ifndef ZORBA_NO_FULL_TEXT |
2678 | -#include "context/stemmer_wrappers.h" |
2679 | -#include "context/thesaurus_wrappers.h" |
2680 | +#include "stemmer_wrappers.h" |
2681 | +#include "thesaurus_wrappers.h" |
2682 | #endif /* ZORBA_NO_FULL_TEXT */ |
2683 | #include "uri_resolver_wrappers.h" |
2684 | |
2685 | @@ -65,7 +65,6 @@ |
2686 | |
2687 | namespace zorba { |
2688 | |
2689 | - |
2690 | /******************************************************************************* |
2691 | Create a StaticContextImpl obj as well as an internal static_context obj S. |
2692 | S is created as a child of the zorba root sctx. This constructor is used |
2693 | |
2694 | === renamed file 'src/api/stemmer_wrapper.cpp' => 'src/api/stemmer_wrappers.cpp' |
2695 | --- src/api/stemmer_wrapper.cpp 2012-04-24 12:39:38 +0000 |
2696 | +++ src/api/stemmer_wrappers.cpp 2012-04-26 16:53:22 +0000 |
2697 | @@ -23,7 +23,7 @@ |
2698 | #include "diagnostics/assert.h" |
2699 | #include "util/cxx_util.h" |
2700 | |
2701 | -#include "stemmer_wrapper.h" |
2702 | +#include "stemmer_wrappers.h" |
2703 | |
2704 | using namespace zorba::locale; |
2705 | |
2706 | @@ -32,8 +32,8 @@ |
2707 | |
2708 | /////////////////////////////////////////////////////////////////////////////// |
2709 | |
2710 | -StemmerWrapper::StemmerWrapper( zorba::Stemmer::ptr p ) : |
2711 | - api_stemmer_( std::move( p ) ) |
2712 | +StemmerWrapper::StemmerWrapper( zorba::Stemmer::ptr api_stemmer ) : |
2713 | + api_stemmer_( std::move( api_stemmer ) ) |
2714 | { |
2715 | ZORBA_ASSERT( api_stemmer_.get() ); |
2716 | } |
2717 | @@ -42,6 +42,12 @@ |
2718 | api_stemmer_.release()->destroy(); |
2719 | } |
2720 | |
2721 | +void StemmerWrapper::properties( Properties *props ) const { |
2722 | + zorba::Stemmer::Properties api_props; |
2723 | + api_stemmer_->properties( &api_props ); |
2724 | + props->uri = api_props.uri; |
2725 | +} |
2726 | + |
2727 | void StemmerWrapper::stem( zstring const &word, iso639_1::type lang, |
2728 | zstring *result ) const { |
2729 | String const api_word( Unmarshaller::newString( word ) ); |
2730 | @@ -52,19 +58,22 @@ |
2731 | /////////////////////////////////////////////////////////////////////////////// |
2732 | |
2733 | StemmerProviderWrapper:: |
2734 | -StemmerProviderWrapper( zorba::StemmerProvider const *p ) : |
2735 | - api_stemmer_provider_( p ) |
2736 | +StemmerProviderWrapper( zorba::StemmerProvider const *api_stemmer_provider ) : |
2737 | + api_stemmer_provider_( api_stemmer_provider ) |
2738 | { |
2739 | ZORBA_ASSERT( api_stemmer_provider_ ); |
2740 | } |
2741 | |
2742 | -Stemmer::ptr |
2743 | -StemmerProviderWrapper::get_stemmer( iso639_1::type lang ) const { |
2744 | - zorba::Stemmer::ptr p( api_stemmer_provider_->getStemmer( lang ) ); |
2745 | - Stemmer::ptr result; |
2746 | - if ( p.get() ) |
2747 | - result.reset( new StemmerWrapper( std::move( p ) ) ); |
2748 | - return std::move( result ); |
2749 | +bool StemmerProviderWrapper::getStemmer( iso639_1::type lang, |
2750 | + Stemmer::ptr *result ) const { |
2751 | + zorba::Stemmer::ptr api_ptr; |
2752 | + zorba::Stemmer::ptr *const api_ptr_ptr = result ? &api_ptr : nullptr; |
2753 | + if ( api_stemmer_provider_->getStemmer( lang, api_ptr_ptr ) ) { |
2754 | + if ( result ) |
2755 | + result->reset( new StemmerWrapper( std::move( api_ptr ) ) ); |
2756 | + return true; |
2757 | + } |
2758 | + return false; |
2759 | } |
2760 | |
2761 | /////////////////////////////////////////////////////////////////////////////// |
2762 | |
2763 | === renamed file 'src/api/stemmer_wrapper.h' => 'src/api/stemmer_wrappers.h' |
2764 | --- src/api/stemmer_wrapper.h 2012-04-24 12:39:38 +0000 |
2765 | +++ src/api/stemmer_wrappers.h 2012-04-26 16:53:22 +0000 |
2766 | @@ -35,6 +35,7 @@ |
2767 | |
2768 | // inherited |
2769 | void destroy() const; |
2770 | + void properties( Properties* ) const; |
2771 | void stem( zstring const &word, locale::iso639_1::type lang, |
2772 | zstring *result ) const; |
2773 | private: |
2774 | @@ -50,7 +51,7 @@ |
2775 | } |
2776 | |
2777 | // inherited |
2778 | - Stemmer::ptr get_stemmer( locale::iso639_1::type lang ) const; |
2779 | + bool getStemmer( locale::iso639_1::type, Stemmer::ptr* = 0 ) const; |
2780 | private: |
2781 | zorba::StemmerProvider const *const api_stemmer_provider_; |
2782 | }; |
2783 | |
2784 | === modified file 'src/api/thesaurus.cpp' |
2785 | --- src/api/thesaurus.cpp 2012-04-24 12:39:38 +0000 |
2786 | +++ src/api/thesaurus.cpp 2012-04-26 16:53:22 +0000 |
2787 | @@ -25,9 +25,11 @@ |
2788 | // out-of-line since it's virtual |
2789 | } |
2790 | |
2791 | -//Thesaurus::iterator::~iterator() { |
2792 | -// // out-of-line since it's virtual |
2793 | -//} |
2794 | +#if 0 |
2795 | +Thesaurus::iterator::~iterator() { |
2796 | + // out-of-line since it's virtual |
2797 | +} |
2798 | +#endif |
2799 | |
2800 | /////////////////////////////////////////////////////////////////////////////// |
2801 | |
2802 | |
2803 | === renamed file 'src/context/thesaurus_wrappers.cpp' => 'src/api/thesaurus_wrappers.cpp' |
2804 | --- src/context/thesaurus_wrappers.cpp 2012-04-24 12:39:38 +0000 |
2805 | +++ src/api/thesaurus_wrappers.cpp 2012-04-26 16:53:22 +0000 |
2806 | @@ -87,6 +87,27 @@ |
2807 | |
2808 | /////////////////////////////////////////////////////////////////////////////// |
2809 | |
2810 | +ThesaurusProviderWrapper:: |
2811 | +ThesaurusProviderWrapper( zorba::ThesaurusProvider const *p ) : |
2812 | + api_thesaurus_provider_( p ) |
2813 | +{ |
2814 | + ZORBA_ASSERT( api_thesaurus_provider_ ); |
2815 | +} |
2816 | + |
2817 | +bool ThesaurusProviderWrapper::getThesaurus( iso639_1::type lang, |
2818 | + Thesaurus::ptr *result ) const { |
2819 | + zorba::Thesaurus::ptr api_ptr; |
2820 | + zorba::Thesaurus::ptr *const api_ptr_ptr = result ? &api_ptr : nullptr; |
2821 | + if ( api_thesaurus_provider_->getThesaurus( lang, api_ptr_ptr ) ) { |
2822 | + if ( result ) |
2823 | + result->reset( new ThesaurusWrapper( std::move( api_ptr ) ) ); |
2824 | + return true; |
2825 | + } |
2826 | + return false; |
2827 | +} |
2828 | + |
2829 | +/////////////////////////////////////////////////////////////////////////////// |
2830 | + |
2831 | } // namespace internal |
2832 | } // namespace zorba |
2833 | |
2834 | |
2835 | === renamed file 'src/context/thesaurus_wrappers.h' => 'src/api/thesaurus_wrappers.h' |
2836 | --- src/context/thesaurus_wrappers.h 2012-04-24 12:39:38 +0000 |
2837 | +++ src/api/thesaurus_wrappers.h 2012-04-26 16:53:22 +0000 |
2838 | @@ -22,6 +22,7 @@ |
2839 | #ifndef ZORBA_NO_FULL_TEXT |
2840 | |
2841 | #include <zorba/thesaurus.h> |
2842 | + |
2843 | #include "runtime/full_text/thesaurus.h" |
2844 | |
2845 | namespace zorba { |
2846 | @@ -54,6 +55,17 @@ |
2847 | zorba::Thesaurus::ptr api_thesaurus_; |
2848 | }; |
2849 | |
2850 | +class ThesaurusProviderWrapper : public ThesaurusProvider { |
2851 | +public: |
2852 | + ThesaurusProviderWrapper( zorba::ThesaurusProvider const* ); |
2853 | + |
2854 | + // inherited |
2855 | + bool getThesaurus( locale::iso639_1::type, Thesaurus::ptr* ) const; |
2856 | + |
2857 | +private: |
2858 | + zorba::ThesaurusProvider::ptr const api_thesaurus_provider_; |
2859 | +}; |
2860 | + |
2861 | /////////////////////////////////////////////////////////////////////////////// |
2862 | |
2863 | } // namespace internal |
2864 | |
2865 | === modified file 'src/api/uri_resolver_wrappers.cpp' |
2866 | --- src/api/uri_resolver_wrappers.cpp 2012-04-24 12:39:38 +0000 |
2867 | +++ src/api/uri_resolver_wrappers.cpp 2012-04-26 16:53:22 +0000 |
2868 | @@ -15,24 +15,20 @@ |
2869 | */ |
2870 | #include "stdafx.h" |
2871 | |
2872 | +#include <zorba/thesaurus.h> |
2873 | + |
2874 | +#include "runtime/full_text/thesaurus.h" |
2875 | + |
2876 | +#include "thesaurus_wrappers.h" |
2877 | +#include "unmarshaller.h" |
2878 | #include "uri_resolver_wrappers.h" |
2879 | #include "uriresolverimpl.h" |
2880 | -#include "unmarshaller.h" |
2881 | -#include <zorba/thesaurus.h> |
2882 | -#include <runtime/full_text/thesaurus.h> |
2883 | -#include <context/thesaurus_wrappers.h> |
2884 | |
2885 | namespace zorba |
2886 | { |
2887 | // "Convenience" class for passing an internal EntityData object to |
2888 | - // external mappers/resolvers. This can serve as a plain EntityData or |
2889 | - // a ThesaurusEntityData. However, when there's another EntityData subclass |
2890 | - // in future, this won't work as EntityData becomes an ambiguous base class... |
2891 | -#ifndef ZORBA_NO_FULL_TEXT |
2892 | - class EntityDataWrapper : public ThesaurusEntityData |
2893 | -#else |
2894 | + // external mappers/resolvers. |
2895 | class EntityDataWrapper : public EntityData |
2896 | -#endif /* ZORBA_NO_FULL_TEXT */ |
2897 | { |
2898 | public: |
2899 | static EntityDataWrapper const* create(internal::EntityData const* aData) { |
2900 | @@ -45,12 +41,7 @@ |
2901 | return new EntityDataWrapper(EntityData::SCHEMA); |
2902 | #ifndef ZORBA_NO_FULL_TEXT |
2903 | case internal::EntityData::THESAURUS: |
2904 | - { |
2905 | - EntityDataWrapper* retval = new EntityDataWrapper(EntityData::THESAURUS); |
2906 | - retval->theThesaurusLang = |
2907 | - dynamic_cast<const internal::ThesaurusEntityData*>(aData)->getLanguage(); |
2908 | - return retval; |
2909 | - } |
2910 | + return new EntityDataWrapper(EntityData::THESAURUS); |
2911 | case internal::EntityData::STOP_WORDS: |
2912 | return new EntityDataWrapper(EntityData::STOP_WORDS); |
2913 | #endif /* ZORBA_NO_FULL_TEXT */ |
2914 | @@ -67,21 +58,12 @@ |
2915 | return theKind; |
2916 | } |
2917 | |
2918 | -#ifndef ZORBA_NO_FULL_TEXT |
2919 | - virtual zorba::locale::iso639_1::type getLanguage() const { |
2920 | - return theThesaurusLang; |
2921 | - } |
2922 | -#endif /* ZORBA_NO_FULL_TEXT */ |
2923 | - |
2924 | private: |
2925 | EntityDataWrapper(EntityData::Kind aKind) |
2926 | : theKind(aKind) |
2927 | {} |
2928 | |
2929 | EntityData::Kind const theKind; |
2930 | -#ifndef ZORBA_NO_FULL_TEXT |
2931 | - zorba::locale::iso639_1::type theThesaurusLang; |
2932 | -#endif /* ZORBA_NO_FULL_TEXT */ |
2933 | }; |
2934 | |
2935 | URIMapperWrapper::URIMapperWrapper(zorba::URIMapper& aUserMapper) |
2936 | @@ -169,13 +151,13 @@ |
2937 | } |
2938 | #ifndef ZORBA_NO_FULL_TEXT |
2939 | else { |
2940 | - Thesaurus* lUserThesaurus = dynamic_cast<Thesaurus*>(lUserPtr.get()); |
2941 | - if (lUserThesaurus != NULL) { |
2942 | - // Here we pass memory ownership of the actual Thesaurus to the |
2943 | - // internal ThesaurusWrapper. |
2944 | - lRetval = new internal::ThesaurusWrapper |
2945 | - (Thesaurus::ptr(lUserThesaurus)); |
2946 | - lUserPtr.release(); |
2947 | + ThesaurusProvider* lUserThesaurusProvider = |
2948 | + dynamic_cast<ThesaurusProvider*>(lUserPtr.get()); |
2949 | + if (lUserThesaurusProvider) { |
2950 | + // Here we pass memory ownership of the actual ThesaurusProvider to |
2951 | + // the internal ThesaurusWrapper. |
2952 | + lRetval = new internal::ThesaurusProviderWrapper |
2953 | + (lUserThesaurusProvider); |
2954 | } |
2955 | else { |
2956 | assert(false); |
2957 | |
2958 | === modified file 'src/api/xmldatamanagerimpl.cpp' |
2959 | --- src/api/xmldatamanagerimpl.cpp 2012-04-24 12:39:38 +0000 |
2960 | +++ src/api/xmldatamanagerimpl.cpp 2012-04-26 16:53:22 +0000 |
2961 | @@ -47,7 +47,7 @@ |
2962 | #include "runtime/util/flowctl_exception.h" |
2963 | |
2964 | #ifndef ZORBA_NO_FULL_TEXT |
2965 | -#include "stemmer_wrapper.h" |
2966 | +#include "stemmer_wrappers.h" |
2967 | #endif /* ZORBA_NO_FULL_TEXT */ |
2968 | |
2969 | namespace zorba { |
2970 | |
2971 | === modified file 'src/api/xmldatamanagerimpl.h' |
2972 | --- src/api/xmldatamanagerimpl.h 2012-04-24 12:39:38 +0000 |
2973 | +++ src/api/xmldatamanagerimpl.h 2012-04-26 16:53:22 +0000 |
2974 | @@ -27,7 +27,7 @@ |
2975 | #include "util/singleton.h" |
2976 | |
2977 | #ifndef ZORBA_NO_FULL_TEXT |
2978 | -#include "stemmer_wrapper.h" |
2979 | +#include "stemmer_wrappers.h" |
2980 | #endif /* ZORBA_NO_FULL_TEXT */ |
2981 | |
2982 | namespace zorba { |
2983 | |
2984 | === modified file 'src/compiler/codegen/plan_visitor.cpp' |
2985 | --- src/compiler/codegen/plan_visitor.cpp 2012-04-24 12:39:38 +0000 |
2986 | +++ src/compiler/codegen/plan_visitor.cpp 2012-04-26 16:53:22 +0000 |
2987 | @@ -250,7 +250,7 @@ |
2988 | class plan_ftnode_visitor : public ftnode_visitor |
2989 | { |
2990 | public: |
2991 | - typedef std::list<PlanIter_t> PlanIter_list_t; |
2992 | + typedef std::vector<PlanIter_t> PlanIter_list_t; |
2993 | |
2994 | plan_ftnode_visitor( plan_visitor* v ) : plan_visitor_( v ) { } |
2995 | |
2996 | |
2997 | === modified file 'src/compiler/expression/expr_put.cpp' |
2998 | --- src/compiler/expression/expr_put.cpp 2012-04-24 12:39:38 +0000 |
2999 | +++ src/compiler/expression/expr_put.cpp 2012-04-26 16:53:22 +0000 |
3000 | @@ -41,6 +41,7 @@ |
3001 | #include "compiler/expression/function_item_expr.h" |
3002 | #include "compiler/parser/parse_constants.h" |
3003 | |
3004 | +#include "diagnostics/assert.h" |
3005 | #include "functions/function.h" |
3006 | #include "functions/udf.h" |
3007 | |
3008 | |
3009 | === modified file 'src/compiler/translator/translator.cpp' |
3010 | --- src/compiler/translator/translator.cpp 2012-04-24 12:39:38 +0000 |
3011 | +++ src/compiler/translator/translator.cpp 2012-04-26 16:53:22 +0000 |
3012 | @@ -68,6 +68,8 @@ |
3013 | #include "functions/signature.h" |
3014 | #include "functions/udf.h" |
3015 | #include "functions/external_function.h" |
3016 | +#include "functions/func_ft_module.h" |
3017 | +#include "functions/func_ft_module_impl.h" |
3018 | |
3019 | #include "annotations/annotations.h" |
3020 | |
3021 | @@ -859,7 +861,7 @@ |
3022 | { |
3023 | ZORBA_ASSERT(count >= 0); |
3024 | |
3025 | - ftnode *n = NULL; |
3026 | + ftnode *n = nullptr; |
3027 | while ( count-- > 0 ) |
3028 | { |
3029 | ZORBA_FATAL( !theFTNodeStack.empty(), "" ); |
3030 | @@ -3294,6 +3296,41 @@ |
3031 | qnameItem->getLocalName()))); |
3032 | } |
3033 | |
3034 | +#ifndef ZORBA_NO_FULL_TEXT |
3035 | + if (qnameItem->getNamespace() == static_context::ZORBA_FULL_TEXT_FN_NS && |
3036 | + (qnameItem->getLocalName() == "tokenizer-properties" || |
3037 | + qnameItem->getLocalName() == "tokenize")) |
3038 | + { |
3039 | + FunctionConsts::FunctionKind kind; |
3040 | + |
3041 | + if (qnameItem->getLocalName() == "tokenizer-properties") |
3042 | + { |
3043 | + assert(numParams <= 1); |
3044 | + |
3045 | + if (numParams == 1) |
3046 | + kind = FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_1; |
3047 | + else |
3048 | + kind = FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_0; |
3049 | + |
3050 | + f = new full_text_tokenizer_properties(f->getSignature(), kind); |
3051 | + } |
3052 | + else |
3053 | + { |
3054 | + assert(numParams == 1 || numParams == 2); |
3055 | + |
3056 | + if (numParams == 2) |
3057 | + kind = FunctionConsts::FULL_TEXT_TOKENIZE_2; |
3058 | + else |
3059 | + kind = FunctionConsts::FULL_TEXT_TOKENIZE_1; |
3060 | + |
3061 | + f = new full_text_tokenize(f->getSignature(), kind); |
3062 | + } |
3063 | + |
3064 | + f->setStaticContext(theRootSctx); |
3065 | + bind_fn(f, numParams, loc); |
3066 | + } |
3067 | +#endif /* ZORBA_NO_FULL_TEXT */ |
3068 | + |
3069 | f->setAnnotations(theAnnotations); |
3070 | theAnnotations = NULL; // important to reset |
3071 | |
3072 | @@ -12512,7 +12549,7 @@ |
3073 | { |
3074 | TRACE_VISIT (); |
3075 | #ifndef ZORBA_NO_FULL_TEXT |
3076 | - push_ftstack( NULL ); // sentinel |
3077 | + push_ftstack( nullptr ); // sentinel |
3078 | #endif /* ZORBA_NO_FULL_TEXT */ |
3079 | return no_state; |
3080 | } |
3081 | @@ -12756,7 +12793,7 @@ |
3082 | void *begin_visit (const FTMildNot& v) { |
3083 | TRACE_VISIT (); |
3084 | #ifndef ZORBA_NO_FULL_TEXT |
3085 | - push_ftstack( NULL ); // sentinel |
3086 | + push_ftstack( nullptr ); // sentinel |
3087 | #endif /* ZORBA_NO_FULL_TEXT */ |
3088 | return no_state; |
3089 | } |
3090 | @@ -12799,7 +12836,7 @@ |
3091 | void *begin_visit (const FTOr& v) { |
3092 | TRACE_VISIT (); |
3093 | #ifndef ZORBA_NO_FULL_TEXT |
3094 | - push_ftstack( NULL ); // sentinel |
3095 | + push_ftstack( nullptr ); // sentinel |
3096 | #endif /* ZORBA_NO_FULL_TEXT */ |
3097 | return no_state; |
3098 | } |
3099 | @@ -13058,7 +13095,7 @@ |
3100 | levels = dynamic_cast<ftrange*>( pop_ftstack() ); |
3101 | ZORBA_ASSERT( levels ); |
3102 | } else |
3103 | - levels = NULL; |
3104 | + levels = nullptr; |
3105 | |
3106 | ftthesaurus_id *const tid = new ftthesaurus_id( |
3107 | loc, v.get_uri(), v.get_relationship(), levels |
3108 | @@ -13070,7 +13107,7 @@ |
3109 | void *begin_visit (const FTThesaurusOption& v) { |
3110 | TRACE_VISIT (); |
3111 | #ifndef ZORBA_NO_FULL_TEXT |
3112 | - push_ftstack( NULL ); // sentinel |
3113 | + push_ftstack( nullptr ); // sentinel |
3114 | #endif /* ZORBA_NO_FULL_TEXT */ |
3115 | return no_state; |
3116 | } |
3117 | @@ -13078,10 +13115,8 @@ |
3118 | void end_visit (const FTThesaurusOption& v, void* /*visit_state*/) { |
3119 | TRACE_VISIT_OUT (); |
3120 | #ifndef ZORBA_NO_FULL_TEXT |
3121 | - ftthesaurus_id *default_tid = NULL; |
3122 | - if ( v.includes_default() ) { |
3123 | - default_tid = new ftthesaurus_id( loc, "##default" ); |
3124 | - } |
3125 | + ftthesaurus_id *const default_tid = v.includes_default() ? |
3126 | + new ftthesaurus_id( loc, "##default" ) : nullptr; |
3127 | |
3128 | ftthesaurus_option::thesaurus_id_list_t list; |
3129 | while ( true ) { |
3130 | |
3131 | === modified file 'src/context/CMakeLists.txt' |
3132 | --- src/context/CMakeLists.txt 2012-04-24 12:39:38 +0000 |
3133 | +++ src/context/CMakeLists.txt 2012-04-26 16:53:22 +0000 |
3134 | @@ -32,11 +32,6 @@ |
3135 | features.cpp |
3136 | ) |
3137 | |
3138 | -IF (NOT ZORBA_NO_FULL_TEXT) |
3139 | - LIST(APPEND CONTEXT_SRCS |
3140 | - thesaurus_wrappers.cpp) |
3141 | -ENDIF (NOT ZORBA_NO_FULL_TEXT) |
3142 | - |
3143 | SET(CONTEXT_BUILD_SRCS |
3144 | ${CMAKE_CURRENT_BINARY_DIR}/context/root_static_context_init.cpp |
3145 | ) |
3146 | |
3147 | === modified file 'src/context/default_url_resolvers.cpp' |
3148 | --- src/context/default_url_resolvers.cpp 2012-04-24 12:39:38 +0000 |
3149 | +++ src/context/default_url_resolvers.cpp 2012-04-26 16:53:22 +0000 |
3150 | @@ -17,6 +17,7 @@ |
3151 | |
3152 | |
3153 | #include "context/default_url_resolvers.h" |
3154 | +#include "util/cxx_util.h" |
3155 | #include "util/uri_util.h" |
3156 | #include "util/http_util.h" |
3157 | #include "util/fs_util.h" |
3158 | @@ -41,8 +42,15 @@ |
3159 | HTTPURLResolver::resolveURL |
3160 | (zstring const& aUrl, EntityData const* aEntityData) |
3161 | { |
3162 | - if (aEntityData->getKind() == EntityData::COLLECTION) |
3163 | - return NULL; |
3164 | + switch ( aEntityData->getKind() ) { |
3165 | + case EntityData::COLLECTION: |
3166 | +#ifndef ZORBA_NO_FULL_TEXT |
3167 | + case EntityData::THESAURUS: |
3168 | +#endif /* ZORBA_NO_FULL_TEXT */ |
3169 | + return nullptr; |
3170 | + default: |
3171 | + break; |
3172 | + } |
3173 | |
3174 | uri::scheme lScheme = uri::get_scheme(aUrl); |
3175 | switch (lScheme) { |
3176 | @@ -82,8 +90,15 @@ |
3177 | FileURLResolver::resolveURL |
3178 | (zstring const& aUrl, EntityData const* aEntityData) |
3179 | { |
3180 | - if (aEntityData->getKind() == EntityData::COLLECTION) |
3181 | - return NULL; |
3182 | + switch ( aEntityData->getKind() ) { |
3183 | + case EntityData::COLLECTION: |
3184 | +#ifndef ZORBA_NO_FULL_TEXT |
3185 | + case EntityData::THESAURUS: |
3186 | +#endif /* ZORBA_NO_FULL_TEXT */ |
3187 | + return nullptr; |
3188 | + default: |
3189 | + break; |
3190 | + } |
3191 | |
3192 | uri::scheme lScheme = uri::get_scheme(aUrl); |
3193 | if (lScheme != uri::file) { |
3194 | @@ -111,7 +126,6 @@ |
3195 | { |
3196 | if (aEntityData->getKind() != EntityData::COLLECTION) |
3197 | return NULL; |
3198 | - |
3199 | store::Item_t lName; |
3200 | GENV_STORE.getItemFactory()->createQName(lName, aUrl.c_str(), "", "zorba-internal-name-for-w3c-collections"); |
3201 | store::Collection_t lColl = GENV_STORE.getCollection(lName.getp(), true); |
3202 | |
3203 | === modified file 'src/context/static_context.cpp' |
3204 | --- src/context/static_context.cpp 2012-04-24 12:39:38 +0000 |
3205 | +++ src/context/static_context.cpp 2012-04-26 16:53:22 +0000 |
3206 | @@ -378,11 +378,16 @@ |
3207 | static_context::ZORBA_XML_FN_NS = |
3208 | "http://www.zorba-xquery.com/modules/xml"; |
3209 | |
3210 | +#ifndef ZORBA_NO_FULL_TEXT |
3211 | +const char* |
3212 | +static_context::ZORBA_FULL_TEXT_FN_NS = |
3213 | +"http://www.zorba-xquery.com/modules/full-text"; |
3214 | +#endif /* ZORBA_NO_FULL_TEXT */ |
3215 | + |
3216 | const char* |
3217 | static_context::ZORBA_XML_FN_OPTIONS_NS = |
3218 | "http://www.zorba-xquery.com/modules/xml-options"; |
3219 | |
3220 | - |
3221 | /***************************************************************************//** |
3222 | Target namespaces of zorba reserved modules |
3223 | ********************************************************************************/ |
3224 | @@ -451,8 +456,11 @@ |
3225 | ns == ZORBA_JSON_FN_NS || |
3226 | ns == ZORBA_FETCH_FN_NS || |
3227 | ns == ZORBA_NODE_FN_NS || |
3228 | +#ifndef ZORBA_NO_FULL_TEXT |
3229 | + ns == ZORBA_FULL_TEXT_FN_NS || |
3230 | +#endif /* ZORBA_NO_FULL_TEXT */ |
3231 | ns == ZORBA_XML_FN_NS); |
3232 | - } |
3233 | + } |
3234 | else if (ns == W3C_FN_NS || ns == XQUERY_MATH_FN_NS) |
3235 | { |
3236 | return true; |
3237 | @@ -1585,7 +1593,7 @@ |
3238 | std::auto_ptr<internal::Resource>& oResource, |
3239 | zstring& oErrorMessage) const |
3240 | { |
3241 | - oErrorMessage = ""; |
3242 | + oErrorMessage.clear(); |
3243 | |
3244 | // Iterate through all candidate URLs... |
3245 | for (std::vector<zstring>::iterator url = aUrls.begin(); |
3246 | @@ -1621,7 +1629,7 @@ |
3247 | } |
3248 | catch (const std::exception& e) |
3249 | { |
3250 | - if (oErrorMessage == "") |
3251 | + if (oErrorMessage.empty()) |
3252 | { |
3253 | // Really no point in saving anything more than the first message |
3254 | oErrorMessage = e.what(); |
3255 | |
3256 | === modified file 'src/context/static_context.h' |
3257 | --- src/context/static_context.h 2012-04-24 12:39:38 +0000 |
3258 | +++ src/context/static_context.h 2012-04-26 16:53:22 +0000 |
3259 | @@ -471,6 +471,9 @@ |
3260 | static const char* ZORBA_FETCH_FN_NS; |
3261 | static const char* ZORBA_NODE_FN_NS; |
3262 | static const char* ZORBA_XML_FN_NS; |
3263 | +#ifndef ZORBA_NO_FULL_TEXT |
3264 | + static const char* ZORBA_FULL_TEXT_FN_NS; |
3265 | +#endif /* ZORBA_NO_FULL_TEXT */ |
3266 | static const char* ZORBA_XML_FN_OPTIONS_NS; |
3267 | |
3268 | // Namespaces of virtual modules declaring zorba builtin functions |
3269 | |
3270 | === removed file 'src/context/stemmer_wrappers.cpp' |
3271 | --- src/context/stemmer_wrappers.cpp 2012-04-24 12:39:38 +0000 |
3272 | +++ src/context/stemmer_wrappers.cpp 1970-01-01 00:00:00 +0000 |
3273 | @@ -1,74 +0,0 @@ |
3274 | -/* |
3275 | - * Copyright 2006-2008 The FLWOR Foundation. |
3276 | - * |
3277 | - * Licensed under the Apache License, Version 2.0 (the "License"); |
3278 | - * you may not use this file except in compliance with the License. |
3279 | - * You may obtain a copy of the License at |
3280 | - * |
3281 | - * http://www.apache.org/licenses/LICENSE-2.0 |
3282 | - * |
3283 | - * Unless required by applicable law or agreed to in writing, software |
3284 | - * distributed under the License is distributed on an "AS IS" BASIS, |
3285 | - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
3286 | - * See the License for the specific language governing permissions and |
3287 | - * limitations under the License. |
3288 | - */ |
3289 | -#include "stdafx.h" |
3290 | - |
3291 | -#include <zorba/config.h> |
3292 | - |
3293 | -#ifndef ZORBA_NO_FULL_TEXT |
3294 | - |
3295 | -#include "api/unmarshaller.h" |
3296 | -#include "diagnostics/assert.h" |
3297 | -#include "util/cxx_util.h" |
3298 | - |
3299 | -#include "stemmer_wrappers.h" |
3300 | - |
3301 | -using namespace zorba::locale; |
3302 | - |
3303 | -namespace zorba { |
3304 | -namespace internal { |
3305 | - |
3306 | -/////////////////////////////////////////////////////////////////////////////// |
3307 | - |
3308 | -StemmerWrapper::StemmerWrapper( zorba::Stemmer const *s ) : |
3309 | - api_stemmer_( s ) |
3310 | -{ |
3311 | - ZORBA_ASSERT( api_stemmer_ ); |
3312 | -} |
3313 | - |
3314 | -void StemmerWrapper::stem( zstring const &word, iso639_1::type lang, |
3315 | - zstring *result ) const { |
3316 | - String const api_word( Unmarshaller::newString( word ) ); |
3317 | - String api_result( Unmarshaller::newString( *result ) ); |
3318 | - api_stemmer_->stem( api_word, lang, &api_result ); |
3319 | -} |
3320 | - |
3321 | -/////////////////////////////////////////////////////////////////////////////// |
3322 | - |
3323 | -StemmerProviderWrapper:: |
3324 | -StemmerProviderWrapper( zorba::StemmerProvider const *p ) : |
3325 | - api_stemmer_provider_( p ) |
3326 | -{ |
3327 | - ZORBA_ASSERT( api_stemmer_provider_ ); |
3328 | -} |
3329 | - |
3330 | -Stemmer const* |
3331 | -StemmerProviderWrapper::get_stemmer( iso639_1::type lang ) const { |
3332 | - zorba::Stemmer const *const s = api_stemmer_provider_->getStemmer( lang ); |
3333 | - return s ? new StemmerWrapper( s ) : nullptr; |
3334 | -} |
3335 | - |
3336 | -/////////////////////////////////////////////////////////////////////////////// |
3337 | - |
3338 | -} // namespace internal |
3339 | -} // namespace zorba |
3340 | - |
3341 | -#endif /* ZORBA_NO_FULL_TEXT */ |
3342 | -/* |
3343 | - * Local variables: |
3344 | - * mode: c++ |
3345 | - * End: |
3346 | - */ |
3347 | -/* vim:set et sw=2 ts=2: */ |
3348 | |
3349 | === removed file 'src/context/stemmer_wrappers.h' |
3350 | --- src/context/stemmer_wrappers.h 2012-04-24 12:39:38 +0000 |
3351 | +++ src/context/stemmer_wrappers.h 1970-01-01 00:00:00 +0000 |
3352 | @@ -1,63 +0,0 @@ |
3353 | -/* |
3354 | - * Copyright 2006-2008 The FLWOR Foundation. |
3355 | - * |
3356 | - * Licensed under the Apache License, Version 2.0 (the "License"); |
3357 | - * you may not use this file except in compliance with the License. |
3358 | - * You may obtain a copy of the License at |
3359 | - * |
3360 | - * http://www.apache.org/licenses/LICENSE-2.0 |
3361 | - * |
3362 | - * Unless required by applicable law or agreed to in writing, software |
3363 | - * distributed under the License is distributed on an "AS IS" BASIS, |
3364 | - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
3365 | - * See the License for the specific language governing permissions and |
3366 | - * limitations under the License. |
3367 | - */ |
3368 | -#pragma once |
3369 | -#ifndef ZORBA_STEMMER_WRAPPERS_H |
3370 | -#define ZORBA_STEMMER_WRAPPERS_H |
3371 | - |
3372 | -#include <zorba/config.h> |
3373 | - |
3374 | -#if 0 |
3375 | -#ifndef ZORBA_NO_FULL_TEXT |
3376 | - |
3377 | -#include <zorba/stemmer.h> |
3378 | -#include "zorbautils/stemmer.h" |
3379 | - |
3380 | -namespace zorba { |
3381 | -namespace internal { |
3382 | - |
3383 | -/////////////////////////////////////////////////////////////////////////////// |
3384 | - |
3385 | -class StemmerWrapper : public Stemmer { |
3386 | -public: |
3387 | - StemmerWrapper( zorba::Stemmer const *api_stemmer ); |
3388 | - void stem( zstring const &word, locale::iso639_1::type lang, |
3389 | - zstring *result ) const; |
3390 | -private: |
3391 | - zorba::Stemmer const *const api_stemmer_; |
3392 | -}; |
3393 | - |
3394 | -class StemmerProviderWrapper : public StemmerProvider { |
3395 | -public: |
3396 | - StemmerProviderWrapper( zorba::StemmerProvider const *p ); |
3397 | - Stemmer const* get_stemmer( locale::iso639_1::type lang ) const; |
3398 | -private: |
3399 | - zorba::StemmerProvider const *const api_stemmer_provider_; |
3400 | -}; |
3401 | - |
3402 | -/////////////////////////////////////////////////////////////////////////////// |
3403 | - |
3404 | -} // namespace internal |
3405 | -} // namespace zorba |
3406 | - |
3407 | -#endif /* ZORBA_NO_FULL_TEXT */ |
3408 | -#endif |
3409 | -#endif /* ZORBA_STEMMER_WRAPPERS_H */ |
3410 | -/* |
3411 | - * Local variables: |
3412 | - * mode: c++ |
3413 | - * End: |
3414 | - */ |
3415 | -/* vim:set et sw=2 ts=2: */ |
3416 | |
3417 | === modified file 'src/context/uri_resolver.cpp' |
3418 | --- src/context/uri_resolver.cpp 2012-04-24 12:39:38 +0000 |
3419 | +++ src/context/uri_resolver.cpp 2012-04-26 16:53:22 +0000 |
3420 | @@ -117,19 +117,6 @@ |
3421 | { |
3422 | } |
3423 | |
3424 | -#ifndef ZORBA_NO_FULL_TEXT |
3425 | - ThesaurusEntityData::ThesaurusEntityData(locale::iso639_1::type aLang) |
3426 | - : EntityData(EntityData::THESAURUS), |
3427 | - theLang(aLang) |
3428 | - { |
3429 | - } |
3430 | - |
3431 | - locale::iso639_1::type ThesaurusEntityData::getLanguage() const |
3432 | - { |
3433 | - return theLang; |
3434 | - } |
3435 | -#endif /* ZORBA_NO_FULL_TEXT */ |
3436 | - |
3437 | /************* |
3438 | * URIMapper is an abstract class, but we have to define its vtbl and |
3439 | * base destructor somewhere. |
3440 | |
3441 | === modified file 'src/context/uri_resolver.h' |
3442 | --- src/context/uri_resolver.h 2012-04-24 12:39:38 +0000 |
3443 | +++ src/context/uri_resolver.h 2012-04-26 16:53:22 +0000 |
3444 | @@ -55,21 +55,21 @@ |
3445 | /** |
3446 | * @brief Return the URL used to load this Resource. |
3447 | */ |
3448 | - zstring getUrl() { return theUrl; } |
3449 | + zstring const& getUrl() const { return theUrl; } |
3450 | |
3451 | virtual ~Resource() = 0; |
3452 | |
3453 | - protected: |
3454 | +protected: |
3455 | |
3456 | Resource(); |
3457 | |
3458 | - private: |
3459 | +private: |
3460 | |
3461 | /** |
3462 | * Used by static_context to populate the URL. |
3463 | */ |
3464 | + void setUrl(zstring const &aUrl) { theUrl = aUrl; } |
3465 | friend class zorba::static_context; |
3466 | - void setUrl(zstring aUrl) { theUrl = aUrl; } |
3467 | |
3468 | zstring theUrl; |
3469 | }; |
3470 | @@ -193,25 +193,6 @@ |
3471 | Kind const theKind; |
3472 | }; |
3473 | |
3474 | -#ifndef ZORBA_NO_FULL_TEXT |
3475 | -/** |
3476 | - * @brief The class containing additional data for URIMappers and URLResolvers |
3477 | - * when mapping/resolving a Thesaurus URI. |
3478 | - */ |
3479 | -class ThesaurusEntityData : public EntityData |
3480 | -{ |
3481 | -public: |
3482 | - ThesaurusEntityData(locale::iso639_1::type aLang); |
3483 | - /** |
3484 | - * @brief Return the language for which a thesaurus is being requested. |
3485 | - */ |
3486 | - virtual locale::iso639_1::type getLanguage() const; |
3487 | - |
3488 | -private: |
3489 | - locale::iso639_1::type const theLang; |
3490 | -}; |
3491 | -#endif /* ZORBA_NO_FULL_TEXT */ |
3492 | - |
3493 | /** |
3494 | * @brief Interface for URL resolving. |
3495 | * |
3496 | |
3497 | === modified file 'src/diagnostics/assert.cpp' |
3498 | --- src/diagnostics/assert.cpp 2012-04-24 12:39:38 +0000 |
3499 | +++ src/diagnostics/assert.cpp 2012-04-26 16:53:22 +0000 |
3500 | @@ -68,7 +68,7 @@ |
3501 | file, |
3502 | line, |
3503 | zerr::ZXQP0002_ASSERT_FAILED, |
3504 | - ( msg ? ERROR_PARAMS( condition, msg ) : ERROR_PARAMS( condition )) |
3505 | + ( msg ? ERROR_PARAMS( condition, msg ) : ERROR_PARAMS( condition ) ) |
3506 | ); |
3507 | } |
3508 | |
3509 | |
3510 | === modified file 'src/diagnostics/assert.h' |
3511 | --- src/diagnostics/assert.h 2012-04-24 12:39:38 +0000 |
3512 | +++ src/diagnostics/assert.h 2012-04-26 16:53:22 +0000 |
3513 | @@ -20,6 +20,10 @@ |
3514 | #ifndef ZORBA_ASSERT_H |
3515 | #define ZORBA_ASSERT_H |
3516 | |
3517 | +#include <sstream> |
3518 | + |
3519 | +#include "util/cxx_util.h" |
3520 | + |
3521 | namespace zorba { |
3522 | |
3523 | /** |
3524 | @@ -35,7 +39,7 @@ |
3525 | void assertion_failed( char const *condition, |
3526 | char const *file, |
3527 | int line, |
3528 | - char const *msg = 0); |
3529 | + char const *msg = nullptr ); |
3530 | |
3531 | /** |
3532 | * Zorba version of the standard assert(3) macro. |
3533 | |
3534 | === modified file 'src/diagnostics/diagnostic_en.xml' |
3535 | --- src/diagnostics/diagnostic_en.xml 2012-04-24 12:39:38 +0000 |
3536 | +++ src/diagnostics/diagnostic_en.xml 2012-04-26 16:53:22 +0000 |
3537 | @@ -1746,7 +1746,7 @@ |
3538 | <diagnostic code="ZXQP8401" name="THESAURUS_VERSION_MISMATCH" |
3539 | if="!defined(ZORBA_NO_FULL_TEXT)"> |
3540 | <comment> |
3541 | - The version of the thesaurus is not the expected version. |
3542 | + The version of the thesaurus is not the expected version. |
3543 | </comment> |
3544 | <value>"$1": wrong WordNet file version; should be "$2"</value> |
3545 | </diagnostic> |
3546 | @@ -1754,19 +1754,39 @@ |
3547 | <diagnostic code="ZXQP8402" name="THESAURUS_ENDIANNESS_MISMATCH" |
3548 | if="!defined(ZORBA_NO_FULL_TEXT)"> |
3549 | <comment> |
3550 | + The thesaurus data file's endianness does not match that of the CPU. |
3551 | </comment> |
3552 | <value>thesaurus data endianness does not match CPU</value> |
3553 | - The thesaurus data file's endianness does not match that of the CPU. |
3554 | </diagnostic> |
3555 | |
3556 | <diagnostic code="ZXQP8403" name="THESAURUS_DATA_ERROR" |
3557 | if="!defined(ZORBA_NO_FULL_TEXT)"> |
3558 | <comment> |
3559 | - The thesaurus data contains an unexpected value. |
3560 | + The thesaurus data contains an unexpected value. |
3561 | </comment> |
3562 | <value>thesaurus data error${: 1}</value> |
3563 | </diagnostic> |
3564 | |
3565 | + <diagnostic code="ZXQP8404" name="STEM_LANG_NOT_SUPPORTED" |
3566 | + if="!defined(ZORBA_NO_FULL_TEXT)"> |
3567 | + <value>"$1": langauge not supported for stemming</value> |
3568 | + </diagnostic> |
3569 | + |
3570 | + <diagnostic code="ZXQP8405" name="STOP_WORDS_LANG_NOT_SUPPORTED" |
3571 | + if="!defined(ZORBA_NO_FULL_TEXT)"> |
3572 | + <value>"$1": langauge not supported for stop-words</value> |
3573 | + </diagnostic> |
3574 | + |
3575 | + <diagnostic code="ZXQP8406" name="THESAURUS_LANG_NOT_SUPPORTED" |
3576 | + if="!defined(ZORBA_NO_FULL_TEXT)"> |
3577 | + <value>"$1": langauge not supported for thesaurus</value> |
3578 | + </diagnostic> |
3579 | + |
3580 | + <diagnostic code="ZXQP8407" name="TOKENIZER_LANG_NOT_SUPPORTED" |
3581 | + if="!defined(ZORBA_NO_FULL_TEXT)"> |
3582 | + <value>"$1": langauge not supported for tokenizer</value> |
3583 | + </diagnostic> |
3584 | + |
3585 | <diagnostic code="ZXQD0001" name="PREFIX_NOT_DECLARED"> |
3586 | <value>"$1": prefix not declared when calling function "$2" from $3</value> |
3587 | </diagnostic> |
3588 | |
3589 | === modified file 'src/diagnostics/pregenerated/diagnostic_list.cpp' |
3590 | --- src/diagnostics/pregenerated/diagnostic_list.cpp 2012-04-24 12:39:38 +0000 |
3591 | +++ src/diagnostics/pregenerated/diagnostic_list.cpp 2012-04-26 16:53:22 +0000 |
3592 | @@ -660,6 +660,18 @@ |
3593 | |
3594 | |
3595 | ZorbaErrorCode ZXQP8403_THESAURUS_DATA_ERROR( "ZXQP8403" ); |
3596 | + |
3597 | + |
3598 | +ZorbaErrorCode ZXQP8404_STEM_LANG_NOT_SUPPORTED( "ZXQP8404" ); |
3599 | + |
3600 | + |
3601 | +ZorbaErrorCode ZXQP8405_STOP_WORDS_LANG_NOT_SUPPORTED( "ZXQP8405" ); |
3602 | + |
3603 | + |
3604 | +ZorbaErrorCode ZXQP8406_THESAURUS_LANG_NOT_SUPPORTED( "ZXQP8406" ); |
3605 | + |
3606 | + |
3607 | +ZorbaErrorCode ZXQP8407_TOKENIZER_LANG_NOT_SUPPORTED( "ZXQP8407" ); |
3608 | #endif |
3609 | |
3610 | |
3611 | |
3612 | === modified file 'src/diagnostics/pregenerated/dict_en.cpp' |
3613 | --- src/diagnostics/pregenerated/dict_en.cpp 2012-04-24 12:39:38 +0000 |
3614 | +++ src/diagnostics/pregenerated/dict_en.cpp 2012-04-26 16:53:22 +0000 |
3615 | @@ -434,6 +434,18 @@ |
3616 | #if !defined(ZORBA_NO_FULL_TEXT) |
3617 | { "ZXQP8403", "thesaurus data error${: 1}" }, |
3618 | #endif |
3619 | +#if !defined(ZORBA_NO_FULL_TEXT) |
3620 | + { "ZXQP8404", "\"$1\": langauge not supported for stemming" }, |
3621 | +#endif |
3622 | +#if !defined(ZORBA_NO_FULL_TEXT) |
3623 | + { "ZXQP8405", "\"$1\": langauge not supported for stop-words" }, |
3624 | +#endif |
3625 | +#if !defined(ZORBA_NO_FULL_TEXT) |
3626 | + { "ZXQP8406", "\"$1\": langauge not supported for thesaurus" }, |
3627 | +#endif |
3628 | +#if !defined(ZORBA_NO_FULL_TEXT) |
3629 | + { "ZXQP8407", "\"$1\": langauge not supported for tokenizer" }, |
3630 | +#endif |
3631 | { "~AllMatchesHasExcludes", "AllMatches contains StringExclude" }, |
3632 | { "~AlreadySpecified", "already specified" }, |
3633 | { "~ArithOpNotDefinedBetween_23", "arithmetic operation not defined between types \"$2\" and \"$3\"" }, |
3634 | |
3635 | === modified file 'src/functions/CMakeLists.txt' |
3636 | --- src/functions/CMakeLists.txt 2012-04-24 12:39:38 +0000 |
3637 | +++ src/functions/CMakeLists.txt 2012-04-26 16:53:22 +0000 |
3638 | @@ -83,3 +83,7 @@ |
3639 | func_apply.cpp |
3640 | func_serialize_impl.cpp |
3641 | ) |
3642 | + |
3643 | +IF (NOT ZORBA_NO_FULL_TEXT) |
3644 | + LIST(APPEND FUNCTIONS_SRCS func_ft_module_impl.cpp) |
3645 | +ENDIF (NOT ZORBA_NO_FULL_TEXT) |
3646 | |
3647 | === modified file 'src/functions/external_function.cpp' |
3648 | --- src/functions/external_function.cpp 2012-04-24 12:39:38 +0000 |
3649 | +++ src/functions/external_function.cpp 2012-04-26 16:53:22 +0000 |
3650 | @@ -45,12 +45,12 @@ |
3651 | : |
3652 | function(sig, FunctionConsts::FN_UNKNOWN), |
3653 | theLoc(loc), |
3654 | - theModuleSctx(modSctx), |
3655 | theNamespace(ns), |
3656 | theScriptingKind(scriptingType), |
3657 | theImpl(impl) |
3658 | { |
3659 | resetFlag(FunctionConsts::isBuiltin); |
3660 | + theModuleSctx = modSctx; |
3661 | } |
3662 | |
3663 | |
3664 | @@ -62,7 +62,6 @@ |
3665 | zorba::serialization::serialize_baseclass(ar, (function*)this); |
3666 | |
3667 | ar & theLoc; |
3668 | - ar & theModuleSctx; |
3669 | ar & theNamespace; |
3670 | ar & theScriptingKind; |
3671 | |
3672 | |
3673 | === modified file 'src/functions/external_function.h' |
3674 | --- src/functions/external_function.h 2012-04-24 12:39:38 +0000 |
3675 | +++ src/functions/external_function.h 2012-04-26 16:53:22 +0000 |
3676 | @@ -29,7 +29,6 @@ |
3677 | |
3678 | /******************************************************************************* |
3679 | theLoc : The location of the declaration of this external function. |
3680 | - theModuleContext : The root sctx of the module containing the declaration. |
3681 | theNamespace : The namespace of the module containing the declaration. |
3682 | theScriptingKind : Whether the external function is simple, updating, or |
3683 | sequential (this property is part of the declaration). |
3684 | @@ -40,7 +39,6 @@ |
3685 | { |
3686 | protected: |
3687 | QueryLoc theLoc; |
3688 | - static_context * theModuleSctx; |
3689 | zstring theNamespace; |
3690 | short theScriptingKind; |
3691 | ExternalFunction * theImpl; |
3692 | |
3693 | === added file 'src/functions/func_ft_module_impl.cpp' |
3694 | --- src/functions/func_ft_module_impl.cpp 1970-01-01 00:00:00 +0000 |
3695 | +++ src/functions/func_ft_module_impl.cpp 2012-04-26 16:53:22 +0000 |
3696 | @@ -0,0 +1,128 @@ |
3697 | +/* |
3698 | + * Copyright 2006-2008 The FLWOR Foundation. |
3699 | + * |
3700 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
3701 | + * you may not use this file except in compliance with the License. |
3702 | + * You may obtain a copy of the License at |
3703 | + * |
3704 | + * http://www.apache.org/licenses/LICENSE-2.0 |
3705 | + * |
3706 | + * Unless required by applicable law or agreed to in writing, software |
3707 | + * distributed under the License is distributed on an "AS IS" BASIS, |
3708 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
3709 | + * See the License for the specific language governing permissions and |
3710 | + * limitations under the License. |
3711 | + */ |
3712 | +#include "functions/func_ft_module_impl.h" |
3713 | + |
3714 | +#include "runtime/full_text/ft_module.h" |
3715 | + |
3716 | +#define FT_MODULE_NS "http://www.zorba-xquery.com/modules/full-text" |
3717 | + |
3718 | +namespace zorba |
3719 | +{ |
3720 | + |
3721 | +#ifndef ZORBA_NO_FULL_TEXT |
3722 | + |
3723 | +SERIALIZABLE_CLASS_VERSIONS(full_text_tokenize) |
3724 | + |
3725 | + |
3726 | +void full_text_tokenize::serialize(::zorba::serialization::Archiver& ar) |
3727 | +{ |
3728 | + serialize_baseclass(ar, (function*)this); |
3729 | +} |
3730 | + |
3731 | + |
3732 | +PlanIter_t full_text_tokenize::codegen( |
3733 | + CompilerCB*, |
3734 | + static_context* sctx, |
3735 | + const QueryLoc& loc, |
3736 | + std::vector<PlanIter_t>& argv, |
3737 | + expr& ann) const |
3738 | +{ |
3739 | + return new TokenizeIterator(theModuleSctx, loc, argv); |
3740 | +} |
3741 | + |
3742 | + |
3743 | +SERIALIZABLE_CLASS_VERSIONS(full_text_tokenizer_properties) |
3744 | + |
3745 | + |
3746 | +void full_text_tokenizer_properties::serialize(::zorba::serialization::Archiver& ar) |
3747 | +{ |
3748 | + serialize_baseclass(ar, (function*)this); |
3749 | +} |
3750 | + |
3751 | + |
3752 | +PlanIter_t full_text_tokenizer_properties::codegen( |
3753 | + CompilerCB*, |
3754 | + static_context* sctx, |
3755 | + const QueryLoc& loc, |
3756 | + std::vector<PlanIter_t>& argv, |
3757 | + expr& ann) const |
3758 | +{ |
3759 | + return new TokenizerPropertiesIterator(theModuleSctx, loc, argv); |
3760 | +} |
3761 | + |
3762 | +#endif // ZORBA_NO_FULL_TEXT |
3763 | + |
3764 | + |
3765 | +/////////////////////////////////////////////////////////////////////////////// |
3766 | + |
3767 | +void populate_context_ft_module_impl(static_context* sctx) |
3768 | +{ |
3769 | +#ifndef ZORBA_NO_FULL_TEXT |
3770 | + |
3771 | + xqtref_t tokenize_return_type = |
3772 | + GENV_TYPESYSTEM.create_node_type(store::StoreConsts::elementNode, |
3773 | + createQName(FT_MODULE_NS, "", "token"), |
3774 | + NULL, |
3775 | + TypeConstants::QUANT_STAR, |
3776 | + false, |
3777 | + false); |
3778 | + { |
3779 | + DECL_WITH_KIND(sctx, |
3780 | + full_text_tokenize, |
3781 | + (createQName(FT_MODULE_NS, "", "tokenize"), |
3782 | + GENV_TYPESYSTEM.ANY_NODE_TYPE_ONE, |
3783 | + tokenize_return_type), |
3784 | + FunctionConsts::FULL_TEXT_TOKENIZE_1); |
3785 | + } |
3786 | + { |
3787 | + DECL_WITH_KIND(sctx, |
3788 | + full_text_tokenize, |
3789 | + (createQName( FT_MODULE_NS, "", "tokenize"), |
3790 | + GENV_TYPESYSTEM.ANY_NODE_TYPE_ONE, |
3791 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
3792 | + tokenize_return_type), |
3793 | + FunctionConsts::FULL_TEXT_TOKENIZE_2); |
3794 | + } |
3795 | + |
3796 | + xqtref_t tokenizer_properties_return_type = |
3797 | + GENV_TYPESYSTEM.create_node_type(store::StoreConsts::elementNode, |
3798 | + createQName(FT_MODULE_NS, "", "tokenizer-properties"), |
3799 | + NULL, |
3800 | + TypeConstants::QUANT_ONE, |
3801 | + false, |
3802 | + false); |
3803 | + { |
3804 | + DECL_WITH_KIND(sctx, |
3805 | + full_text_tokenizer_properties, |
3806 | + (createQName(FT_MODULE_NS, "", "tokenizer-properties"), |
3807 | + tokenizer_properties_return_type), |
3808 | + FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_0); |
3809 | + } |
3810 | + { |
3811 | + DECL_WITH_KIND(sctx, |
3812 | + full_text_tokenizer_properties, |
3813 | + (createQName( FT_MODULE_NS, "", "tokenizer-properties"), |
3814 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
3815 | + tokenizer_properties_return_type), |
3816 | + FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_1); |
3817 | + } |
3818 | +#endif // ZORBA_NO_FULL_TEXT |
3819 | +} |
3820 | + |
3821 | + |
3822 | + |
3823 | +} // namespace zorba |
3824 | +/* vim:set et sw=2 ts=2: */ |
3825 | |
3826 | === added file 'src/functions/func_ft_module_impl.h' |
3827 | --- src/functions/func_ft_module_impl.h 1970-01-01 00:00:00 +0000 |
3828 | +++ src/functions/func_ft_module_impl.h 2012-04-26 16:53:22 +0000 |
3829 | @@ -0,0 +1,81 @@ |
3830 | +/* |
3831 | + * Copyright 2006-2008 The FLWOR Foundation. |
3832 | + * |
3833 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
3834 | + * you may not use this file except in compliance with the License. |
3835 | + * You may obtain a copy of the License at |
3836 | + * |
3837 | + * http://www.apache.org/licenses/LICENSE-2.0 |
3838 | + * |
3839 | + * Unless required by applicable law or agreed to in writing, software |
3840 | + * distributed under the License is distributed on an "AS IS" BASIS, |
3841 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
3842 | + * See the License for the specific language governing permissions and |
3843 | + * limitations under the License. |
3844 | + */ |
3845 | +#ifndef ZORBA_FUNCTIONS_FT_MODULE_IMPL_H |
3846 | +#define ZORBA_FUNCTIONS_FT_MODULE_IMPL_H |
3847 | + |
3848 | +#include "stdafx.h" |
3849 | + |
3850 | +#include "functions/function.h" |
3851 | +#include "functions/function_impl.h" |
3852 | + |
3853 | + |
3854 | +namespace zorba |
3855 | +{ |
3856 | + |
3857 | +#ifndef ZORBA_NO_FULL_TEXT |
3858 | + |
3859 | +//full-text:tokenize |
3860 | +class full_text_tokenize : public function |
3861 | +{ |
3862 | +public: |
3863 | + SERIALIZABLE_CLASS(full_text_tokenize); |
3864 | + SERIALIZABLE_CLASS_CONSTRUCTOR2(full_text_tokenize, function) |
3865 | + void serialize(::zorba::serialization::Archiver& ar); |
3866 | + |
3867 | +public: |
3868 | + full_text_tokenize(const signature& sig, FunctionConsts::FunctionKind kind) |
3869 | + : |
3870 | + function(sig, kind) |
3871 | + { |
3872 | + |
3873 | + } |
3874 | + |
3875 | + CODEGEN_DECL(); |
3876 | +}; |
3877 | + |
3878 | + |
3879 | + |
3880 | +//full-text:tokenizer-properties |
3881 | +class full_text_tokenizer_properties : public function |
3882 | +{ |
3883 | +public: |
3884 | + SERIALIZABLE_CLASS(full_text_tokenizer_properties); |
3885 | + SERIALIZABLE_CLASS_CONSTRUCTOR2(full_text_tokenizer_properties, function) |
3886 | + void serialize(::zorba::serialization::Archiver& ar); |
3887 | + |
3888 | +public: |
3889 | + full_text_tokenizer_properties(const signature& sig, FunctionConsts::FunctionKind kind) |
3890 | + : |
3891 | + function(sig, kind) |
3892 | + { |
3893 | + |
3894 | + } |
3895 | + |
3896 | + // Mark the function as accessing the dyn ctx so that it won't be |
3897 | + // const-folded. We must prevent const-folding because the function |
3898 | + // returns a node that is validated with a schema that may not be |
3899 | + // imported in the module where the function is invoked from. |
3900 | + bool accessesDynCtx() const { return true; } |
3901 | + |
3902 | + CODEGEN_DECL(); |
3903 | +}; |
3904 | + |
3905 | +#endif // ZORBA_NO_FULL_TEXT |
3906 | + |
3907 | +} |
3908 | + |
3909 | +#endif |
3910 | +/* vim:set et sw=2 ts=2: */ |
3911 | |
3912 | === modified file 'src/functions/function.cpp' |
3913 | --- src/functions/function.cpp 2012-04-24 12:39:38 +0000 |
3914 | +++ src/functions/function.cpp 2012-04-26 16:53:22 +0000 |
3915 | @@ -43,6 +43,7 @@ |
3916 | theSignature(sig), |
3917 | theKind(kind), |
3918 | theFlags(0), |
3919 | + theModuleSctx(NULL), |
3920 | theXQueryVersion(StaticContextConsts::xquery_version_1_0) |
3921 | { |
3922 | setFlag(FunctionConsts::isBuiltin); |
3923 | @@ -70,6 +71,7 @@ |
3924 | SERIALIZE_ENUM(FunctionConsts::FunctionKind, theKind); |
3925 | ar & theFlags; |
3926 | ar & theAnnotationList; |
3927 | + ar & theModuleSctx; |
3928 | SERIALIZE_ENUM(StaticContextConsts::xquery_version_t, theXQueryVersion); |
3929 | } |
3930 | |
3931 | @@ -92,6 +94,7 @@ |
3932 | return n == VARIADIC_SIG_SIZE || argv.size() == n; |
3933 | } |
3934 | |
3935 | + |
3936 | /******************************************************************************* |
3937 | |
3938 | ********************************************************************************/ |
3939 | |
3940 | === modified file 'src/functions/function.h' |
3941 | --- src/functions/function.h 2012-04-24 12:39:38 +0000 |
3942 | +++ src/functions/function.h 2012-04-26 16:53:22 +0000 |
3943 | @@ -42,7 +42,10 @@ |
3944 | |
3945 | |
3946 | /******************************************************************************* |
3947 | - |
3948 | + theModuleContext: |
3949 | + ----------------- |
3950 | + The root sctx of the module containing the declaration. It is NULL for |
3951 | + functions that must be executed in the static context of the caller. |
3952 | ********************************************************************************/ |
3953 | class function : public SimpleRCObject |
3954 | { |
3955 | @@ -51,6 +54,7 @@ |
3956 | FunctionConsts::FunctionKind theKind; |
3957 | uint32_t theFlags; |
3958 | AnnotationList_t theAnnotationList; |
3959 | + static_context * theModuleSctx; |
3960 | |
3961 | StaticContextConsts::xquery_version_t theXQueryVersion; |
3962 | |
3963 | @@ -89,6 +93,10 @@ |
3964 | |
3965 | bool isVariadic() const { return theSignature.isVariadic(); } |
3966 | |
3967 | + static_context* getStaticContext() const { return theModuleSctx; } |
3968 | + |
3969 | + void setStaticContext(static_context* sctx) { theModuleSctx = sctx; } |
3970 | + |
3971 | void setFlag(FunctionConsts::AnnotationFlags flag) |
3972 | { |
3973 | theFlags |= flag; |
3974 | |
3975 | === modified file 'src/functions/function_consts.h' |
3976 | --- src/functions/function_consts.h 2012-04-24 12:39:38 +0000 |
3977 | +++ src/functions/function_consts.h 2012-04-26 16:53:22 +0000 |
3978 | @@ -225,6 +225,13 @@ |
3979 | OP_HOIST_1, |
3980 | OP_UNHOIST_1, |
3981 | |
3982 | +#ifndef ZORBA_NO_FULL_TEXT |
3983 | + FULL_TEXT_TOKENIZER_PROPERTIES_1, |
3984 | + FULL_TEXT_TOKENIZER_PROPERTIES_0, |
3985 | + FULL_TEXT_TOKENIZE_2, |
3986 | + FULL_TEXT_TOKENIZE_1, |
3987 | +#endif |
3988 | + |
3989 | #include "functions/function_enum.h" |
3990 | |
3991 | FN_MAX_FUNC |
3992 | |
3993 | === modified file 'src/functions/library.cpp' |
3994 | --- src/functions/library.cpp 2012-04-24 12:39:38 +0000 |
3995 | +++ src/functions/library.cpp 2012-04-26 16:53:22 +0000 |
3996 | @@ -68,6 +68,10 @@ |
3997 | #include "functions/func_reflection.h" |
3998 | #include "functions/func_apply.h" |
3999 | #include "functions/func_fetch.h" |
4000 | +#ifndef ZORBA_NO_FULL_TEXT |
4001 | +#include "functions/func_ft_module.h" |
4002 | +#include "runtime/full_text/ft_module_impl.h" |
4003 | +#endif /* ZORBA_NO_FULL_TEXT */ |
4004 | |
4005 | #include "functions/func_function_item_iter.h" |
4006 | |
4007 | @@ -144,6 +148,10 @@ |
4008 | populate_context_apply(sctx); |
4009 | |
4010 | populate_context_fetch(sctx); |
4011 | +#ifndef ZORBA_NO_FULL_TEXT |
4012 | + populate_context_ft_module(sctx); |
4013 | + populate_context_ft_module_impl(sctx); |
4014 | +#endif /* ZORBA_NO_FULL_TEXT */ |
4015 | |
4016 | ar.set_loading_hardcoded_objects(false); |
4017 | } |
4018 | |
4019 | === added file 'src/functions/pregenerated/func_ft_module.cpp' |
4020 | --- src/functions/pregenerated/func_ft_module.cpp 1970-01-01 00:00:00 +0000 |
4021 | +++ src/functions/pregenerated/func_ft_module.cpp 2012-04-26 16:53:22 +0000 |
4022 | @@ -0,0 +1,490 @@ |
4023 | +/* |
4024 | + * Copyright 2006-2008 The FLWOR Foundation. |
4025 | + * |
4026 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
4027 | + * you may not use this file except in compliance with the License. |
4028 | + * You may obtain a copy of the License at |
4029 | + * |
4030 | + * http://www.apache.org/licenses/LICENSE-2.0 |
4031 | + * |
4032 | + * Unless required by applicable law or agreed to in writing, software |
4033 | + * distributed under the License is distributed on an "AS IS" BASIS, |
4034 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
4035 | + * See the License for the specific language governing permissions and |
4036 | + * limitations under the License. |
4037 | + */ |
4038 | + |
4039 | +// ****************************************** |
4040 | +// * * |
4041 | +// * THIS IS A GENERATED FILE. DO NOT EDIT! * |
4042 | +// * SEE .xml FILE WITH SAME NAME * |
4043 | +// * * |
4044 | +// ****************************************** |
4045 | + |
4046 | + |
4047 | +#include "stdafx.h" |
4048 | +#include "runtime/full_text/ft_module.h" |
4049 | +#include "functions/func_ft_module.h" |
4050 | + |
4051 | + |
4052 | +namespace zorba{ |
4053 | + |
4054 | + |
4055 | +#ifndef ZORBA_NO_FULL_TEXT |
4056 | +PlanIter_t full_text_current_lang::codegen( |
4057 | + CompilerCB*, |
4058 | + static_context* sctx, |
4059 | + const QueryLoc& loc, |
4060 | + std::vector<PlanIter_t>& argv, |
4061 | + expr& ann) const |
4062 | +{ |
4063 | + return new CurrentLangIterator(sctx, loc, argv); |
4064 | +} |
4065 | + |
4066 | +#endif |
4067 | +#ifndef ZORBA_NO_FULL_TEXT |
4068 | +PlanIter_t full_text_host_lang::codegen( |
4069 | + CompilerCB*, |
4070 | + static_context* sctx, |
4071 | + const QueryLoc& loc, |
4072 | + std::vector<PlanIter_t>& argv, |
4073 | + expr& ann) const |
4074 | +{ |
4075 | + return new HostLangIterator(sctx, loc, argv); |
4076 | +} |
4077 | + |
4078 | +#endif |
4079 | +#ifndef ZORBA_NO_FULL_TEXT |
4080 | +PlanIter_t full_text_is_stem_lang_supported::codegen( |
4081 | + CompilerCB*, |
4082 | + static_context* sctx, |
4083 | + const QueryLoc& loc, |
4084 | + std::vector<PlanIter_t>& argv, |
4085 | + expr& ann) const |
4086 | +{ |
4087 | + return new IsStemLangSupportedIterator(sctx, loc, argv); |
4088 | +} |
4089 | + |
4090 | +#endif |
4091 | +#ifndef ZORBA_NO_FULL_TEXT |
4092 | +PlanIter_t full_text_is_stop_word::codegen( |
4093 | + CompilerCB*, |
4094 | + static_context* sctx, |
4095 | + const QueryLoc& loc, |
4096 | + std::vector<PlanIter_t>& argv, |
4097 | + expr& ann) const |
4098 | +{ |
4099 | + return new IsStopWordIterator(sctx, loc, argv); |
4100 | +} |
4101 | + |
4102 | +#endif |
4103 | +#ifndef ZORBA_NO_FULL_TEXT |
4104 | +PlanIter_t full_text_is_stop_word_lang_supported::codegen( |
4105 | + CompilerCB*, |
4106 | + static_context* sctx, |
4107 | + const QueryLoc& loc, |
4108 | + std::vector<PlanIter_t>& argv, |
4109 | + expr& ann) const |
4110 | +{ |
4111 | + return new IsStopWordLangSupportedIterator(sctx, loc, argv); |
4112 | +} |
4113 | + |
4114 | +#endif |
4115 | +#ifndef ZORBA_NO_FULL_TEXT |
4116 | +PlanIter_t full_text_is_thesaurus_lang_supported::codegen( |
4117 | + CompilerCB*, |
4118 | + static_context* sctx, |
4119 | + const QueryLoc& loc, |
4120 | + std::vector<PlanIter_t>& argv, |
4121 | + expr& ann) const |
4122 | +{ |
4123 | + return new IsThesaurusLangSupportedIterator(sctx, loc, argv); |
4124 | +} |
4125 | + |
4126 | +#endif |
4127 | +#ifndef ZORBA_NO_FULL_TEXT |
4128 | +PlanIter_t full_text_is_tokenizer_lang_supported::codegen( |
4129 | + CompilerCB*, |
4130 | + static_context* sctx, |
4131 | + const QueryLoc& loc, |
4132 | + std::vector<PlanIter_t>& argv, |
4133 | + expr& ann) const |
4134 | +{ |
4135 | + return new IsTokenizerLangSupportedIterator(sctx, loc, argv); |
4136 | +} |
4137 | + |
4138 | +#endif |
4139 | +#ifndef ZORBA_NO_FULL_TEXT |
4140 | +PlanIter_t full_text_stem::codegen( |
4141 | + CompilerCB*, |
4142 | + static_context* sctx, |
4143 | + const QueryLoc& loc, |
4144 | + std::vector<PlanIter_t>& argv, |
4145 | + expr& ann) const |
4146 | +{ |
4147 | + return new StemIterator(sctx, loc, argv); |
4148 | +} |
4149 | + |
4150 | +#endif |
4151 | +#ifndef ZORBA_NO_FULL_TEXT |
4152 | +PlanIter_t full_text_strip_diacritics::codegen( |
4153 | + CompilerCB*, |
4154 | + static_context* sctx, |
4155 | + const QueryLoc& loc, |
4156 | + std::vector<PlanIter_t>& argv, |
4157 | + expr& ann) const |
4158 | +{ |
4159 | + return new StripDiacriticsIterator(sctx, loc, argv); |
4160 | +} |
4161 | + |
4162 | +#endif |
4163 | +#ifndef ZORBA_NO_FULL_TEXT |
4164 | +PlanIter_t full_text_thesaurus_lookup::codegen( |
4165 | + CompilerCB*, |
4166 | + static_context* sctx, |
4167 | + const QueryLoc& loc, |
4168 | + std::vector<PlanIter_t>& argv, |
4169 | + expr& ann) const |
4170 | +{ |
4171 | + return new ThesaurusLookupIterator(sctx, loc, argv); |
4172 | +} |
4173 | + |
4174 | +#endif |
4175 | +#ifndef ZORBA_NO_FULL_TEXT |
4176 | +PlanIter_t full_text_tokenize_string::codegen( |
4177 | + CompilerCB*, |
4178 | + static_context* sctx, |
4179 | + const QueryLoc& loc, |
4180 | + std::vector<PlanIter_t>& argv, |
4181 | + expr& ann) const |
4182 | +{ |
4183 | + return new TokenizeStringIterator(sctx, loc, argv); |
4184 | +} |
4185 | + |
4186 | +#endif |
4187 | + |
4188 | +void populate_context_ft_module(static_context* sctx) |
4189 | +{ |
4190 | + |
4191 | +#ifndef ZORBA_NO_FULL_TEXT |
4192 | + { |
4193 | + |
4194 | + |
4195 | + DECL_WITH_KIND(sctx, full_text_current_lang, |
4196 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","current-lang"), |
4197 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE), |
4198 | + FunctionConsts::FULL_TEXT_CURRENT_LANG_0); |
4199 | + |
4200 | + } |
4201 | + |
4202 | + |
4203 | +#endif |
4204 | + |
4205 | + |
4206 | +#ifndef ZORBA_NO_FULL_TEXT |
4207 | + { |
4208 | + |
4209 | + |
4210 | + DECL_WITH_KIND(sctx, full_text_host_lang, |
4211 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","host-lang"), |
4212 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE), |
4213 | + FunctionConsts::FULL_TEXT_HOST_LANG_0); |
4214 | + |
4215 | + } |
4216 | + |
4217 | + |
4218 | +#endif |
4219 | + |
4220 | + |
4221 | +#ifndef ZORBA_NO_FULL_TEXT |
4222 | + { |
4223 | + |
4224 | + |
4225 | + DECL_WITH_KIND(sctx, full_text_is_stem_lang_supported, |
4226 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stem-lang-supported"), |
4227 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4228 | + GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE), |
4229 | + FunctionConsts::FULL_TEXT_IS_STEM_LANG_SUPPORTED_1); |
4230 | + |
4231 | + } |
4232 | + |
4233 | + |
4234 | +#endif |
4235 | + |
4236 | + |
4237 | +#ifndef ZORBA_NO_FULL_TEXT |
4238 | + { |
4239 | + |
4240 | + |
4241 | + DECL_WITH_KIND(sctx, full_text_is_stop_word, |
4242 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stop-word"), |
4243 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4244 | + GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE), |
4245 | + FunctionConsts::FULL_TEXT_IS_STOP_WORD_1); |
4246 | + |
4247 | + } |
4248 | + |
4249 | + |
4250 | +#endif |
4251 | + |
4252 | + |
4253 | +#ifndef ZORBA_NO_FULL_TEXT |
4254 | + { |
4255 | + |
4256 | + |
4257 | + DECL_WITH_KIND(sctx, full_text_is_stop_word, |
4258 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stop-word"), |
4259 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4260 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4261 | + GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE), |
4262 | + FunctionConsts::FULL_TEXT_IS_STOP_WORD_2); |
4263 | + |
4264 | + } |
4265 | + |
4266 | + |
4267 | +#endif |
4268 | + |
4269 | + |
4270 | +#ifndef ZORBA_NO_FULL_TEXT |
4271 | + { |
4272 | + |
4273 | + |
4274 | + DECL_WITH_KIND(sctx, full_text_is_stop_word_lang_supported, |
4275 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stop-word-lang-supported"), |
4276 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4277 | + GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE), |
4278 | + FunctionConsts::FULL_TEXT_IS_STOP_WORD_LANG_SUPPORTED_1); |
4279 | + |
4280 | + } |
4281 | + |
4282 | + |
4283 | +#endif |
4284 | + |
4285 | + |
4286 | +#ifndef ZORBA_NO_FULL_TEXT |
4287 | + { |
4288 | + |
4289 | + |
4290 | + DECL_WITH_KIND(sctx, full_text_is_thesaurus_lang_supported, |
4291 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","is-thesaurus-lang-supported"), |
4292 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4293 | + GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE), |
4294 | + FunctionConsts::FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_1); |
4295 | + |
4296 | + } |
4297 | + |
4298 | + |
4299 | +#endif |
4300 | + |
4301 | + |
4302 | +#ifndef ZORBA_NO_FULL_TEXT |
4303 | + { |
4304 | + |
4305 | + |
4306 | + DECL_WITH_KIND(sctx, full_text_is_thesaurus_lang_supported, |
4307 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","is-thesaurus-lang-supported"), |
4308 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4309 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4310 | + GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE), |
4311 | + FunctionConsts::FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_2); |
4312 | + |
4313 | + } |
4314 | + |
4315 | + |
4316 | +#endif |
4317 | + |
4318 | + |
4319 | +#ifndef ZORBA_NO_FULL_TEXT |
4320 | + { |
4321 | + |
4322 | + |
4323 | + DECL_WITH_KIND(sctx, full_text_is_tokenizer_lang_supported, |
4324 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","is-tokenizer-lang-supported"), |
4325 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4326 | + GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE), |
4327 | + FunctionConsts::FULL_TEXT_IS_TOKENIZER_LANG_SUPPORTED_1); |
4328 | + |
4329 | + } |
4330 | + |
4331 | + |
4332 | +#endif |
4333 | + |
4334 | + |
4335 | +#ifndef ZORBA_NO_FULL_TEXT |
4336 | + { |
4337 | + |
4338 | + |
4339 | + DECL_WITH_KIND(sctx, full_text_stem, |
4340 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","stem"), |
4341 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4342 | + GENV_TYPESYSTEM.STRING_TYPE_ONE), |
4343 | + FunctionConsts::FULL_TEXT_STEM_1); |
4344 | + |
4345 | + } |
4346 | + |
4347 | + |
4348 | +#endif |
4349 | + |
4350 | + |
4351 | +#ifndef ZORBA_NO_FULL_TEXT |
4352 | + { |
4353 | + |
4354 | + |
4355 | + DECL_WITH_KIND(sctx, full_text_stem, |
4356 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","stem"), |
4357 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4358 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4359 | + GENV_TYPESYSTEM.STRING_TYPE_ONE), |
4360 | + FunctionConsts::FULL_TEXT_STEM_2); |
4361 | + |
4362 | + } |
4363 | + |
4364 | + |
4365 | +#endif |
4366 | + |
4367 | + |
4368 | +#ifndef ZORBA_NO_FULL_TEXT |
4369 | + { |
4370 | + |
4371 | + |
4372 | + DECL_WITH_KIND(sctx, full_text_strip_diacritics, |
4373 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","strip-diacritics"), |
4374 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4375 | + GENV_TYPESYSTEM.STRING_TYPE_ONE), |
4376 | + FunctionConsts::FULL_TEXT_STRIP_DIACRITICS_1); |
4377 | + |
4378 | + } |
4379 | + |
4380 | + |
4381 | +#endif |
4382 | + |
4383 | + |
4384 | +#ifndef ZORBA_NO_FULL_TEXT |
4385 | + { |
4386 | + |
4387 | + |
4388 | + DECL_WITH_KIND(sctx, full_text_thesaurus_lookup, |
4389 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"), |
4390 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4391 | + GENV_TYPESYSTEM.STRING_TYPE_PLUS), |
4392 | + FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_1); |
4393 | + |
4394 | + } |
4395 | + |
4396 | + |
4397 | +#endif |
4398 | + |
4399 | + |
4400 | +#ifndef ZORBA_NO_FULL_TEXT |
4401 | + { |
4402 | + |
4403 | + |
4404 | + DECL_WITH_KIND(sctx, full_text_thesaurus_lookup, |
4405 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"), |
4406 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4407 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4408 | + GENV_TYPESYSTEM.STRING_TYPE_PLUS), |
4409 | + FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_2); |
4410 | + |
4411 | + } |
4412 | + |
4413 | + |
4414 | +#endif |
4415 | + |
4416 | + |
4417 | +#ifndef ZORBA_NO_FULL_TEXT |
4418 | + { |
4419 | + |
4420 | + |
4421 | + DECL_WITH_KIND(sctx, full_text_thesaurus_lookup, |
4422 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"), |
4423 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4424 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4425 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4426 | + GENV_TYPESYSTEM.STRING_TYPE_PLUS), |
4427 | + FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_3); |
4428 | + |
4429 | + } |
4430 | + |
4431 | + |
4432 | +#endif |
4433 | + |
4434 | + |
4435 | +#ifndef ZORBA_NO_FULL_TEXT |
4436 | + { |
4437 | + |
4438 | + |
4439 | + DECL_WITH_KIND(sctx, full_text_thesaurus_lookup, |
4440 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"), |
4441 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4442 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4443 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4444 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4445 | + GENV_TYPESYSTEM.STRING_TYPE_PLUS), |
4446 | + FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_4); |
4447 | + |
4448 | + } |
4449 | + |
4450 | + |
4451 | +#endif |
4452 | + |
4453 | + |
4454 | +#ifndef ZORBA_NO_FULL_TEXT |
4455 | + { |
4456 | + |
4457 | + |
4458 | + DECL_WITH_KIND(sctx, full_text_thesaurus_lookup, |
4459 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"), |
4460 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4461 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4462 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4463 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4464 | + GENV_TYPESYSTEM.INTEGER_TYPE_ONE, |
4465 | + GENV_TYPESYSTEM.INTEGER_TYPE_ONE, |
4466 | + GENV_TYPESYSTEM.STRING_TYPE_PLUS), |
4467 | + FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_6); |
4468 | + |
4469 | + } |
4470 | + |
4471 | + |
4472 | +#endif |
4473 | + |
4474 | + |
4475 | +#ifndef ZORBA_NO_FULL_TEXT |
4476 | + { |
4477 | + |
4478 | + |
4479 | + DECL_WITH_KIND(sctx, full_text_tokenize_string, |
4480 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","tokenize-string"), |
4481 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4482 | + GENV_TYPESYSTEM.STRING_TYPE_STAR), |
4483 | + FunctionConsts::FULL_TEXT_TOKENIZE_STRING_1); |
4484 | + |
4485 | + } |
4486 | + |
4487 | + |
4488 | +#endif |
4489 | + |
4490 | + |
4491 | +#ifndef ZORBA_NO_FULL_TEXT |
4492 | + { |
4493 | + |
4494 | + |
4495 | + DECL_WITH_KIND(sctx, full_text_tokenize_string, |
4496 | + (createQName("http://www.zorba-xquery.com/modules/full-text","","tokenize-string"), |
4497 | + GENV_TYPESYSTEM.STRING_TYPE_ONE, |
4498 | + GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE, |
4499 | + GENV_TYPESYSTEM.STRING_TYPE_STAR), |
4500 | + FunctionConsts::FULL_TEXT_TOKENIZE_STRING_2); |
4501 | + |
4502 | + } |
4503 | + |
4504 | + |
4505 | +#endif |
4506 | +} |
4507 | + |
4508 | + |
4509 | +} |
4510 | + |
4511 | + |
4512 | + |
4513 | |
4514 | === added file 'src/functions/pregenerated/func_ft_module.h' |
4515 | --- src/functions/pregenerated/func_ft_module.h 1970-01-01 00:00:00 +0000 |
4516 | +++ src/functions/pregenerated/func_ft_module.h 2012-04-26 16:53:22 +0000 |
4517 | @@ -0,0 +1,225 @@ |
4518 | +/* |
4519 | + * Copyright 2006-2008 The FLWOR Foundation. |
4520 | + * |
4521 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
4522 | + * you may not use this file except in compliance with the License. |
4523 | + * You may obtain a copy of the License at |
4524 | + * |
4525 | + * http://www.apache.org/licenses/LICENSE-2.0 |
4526 | + * |
4527 | + * Unless required by applicable law or agreed to in writing, software |
4528 | + * distributed under the License is distributed on an "AS IS" BASIS, |
4529 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
4530 | + * See the License for the specific language governing permissions and |
4531 | + * limitations under the License. |
4532 | + */ |
4533 | + |
4534 | +// ****************************************** |
4535 | +// * * |
4536 | +// * THIS IS A GENERATED FILE. DO NOT EDIT! * |
4537 | +// * SEE .xml FILE WITH SAME NAME * |
4538 | +// * * |
4539 | +// ****************************************** |
4540 | + |
4541 | + |
4542 | +#ifndef ZORBA_FUNCTIONS_FT_MODULE_H |
4543 | +#define ZORBA_FUNCTIONS_FT_MODULE_H |
4544 | + |
4545 | + |
4546 | +#include "common/shared_types.h" |
4547 | +#include "functions/function_impl.h" |
4548 | + |
4549 | + |
4550 | +namespace zorba { |
4551 | + |
4552 | + |
4553 | +void populate_context_ft_module(static_context* sctx); |
4554 | + |
4555 | + |
4556 | +#ifndef ZORBA_NO_FULL_TEXT |
4557 | + |
4558 | +//full-text:current-lang |
4559 | +class full_text_current_lang : public function |
4560 | +{ |
4561 | +public: |
4562 | + full_text_current_lang(const signature& sig, FunctionConsts::FunctionKind kind) |
4563 | + : |
4564 | + function(sig, kind) |
4565 | + { |
4566 | + |
4567 | + } |
4568 | + |
4569 | + CODEGEN_DECL(); |
4570 | +}; |
4571 | +#endif |
4572 | +#ifndef ZORBA_NO_FULL_TEXT |
4573 | + |
4574 | +//full-text:host-lang |
4575 | +class full_text_host_lang : public function |
4576 | +{ |
4577 | +public: |
4578 | + full_text_host_lang(const signature& sig, FunctionConsts::FunctionKind kind) |
4579 | + : |
4580 | + function(sig, kind) |
4581 | + { |
4582 | + |
4583 | + } |
4584 | + |
4585 | + CODEGEN_DECL(); |
4586 | +}; |
4587 | +#endif |
4588 | +#ifndef ZORBA_NO_FULL_TEXT |
4589 | + |
4590 | +//full-text:is-stem-lang-supported |
4591 | +class full_text_is_stem_lang_supported : public function |
4592 | +{ |
4593 | +public: |
4594 | + full_text_is_stem_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind) |
4595 | + : |
4596 | + function(sig, kind) |
4597 | + { |
4598 | + |
4599 | + } |
4600 | + |
4601 | + CODEGEN_DECL(); |
4602 | +}; |
4603 | +#endif |
4604 | +#ifndef ZORBA_NO_FULL_TEXT |
4605 | + |
4606 | +//full-text:is-stop-word |
4607 | +class full_text_is_stop_word : public function |
4608 | +{ |
4609 | +public: |
4610 | + full_text_is_stop_word(const signature& sig, FunctionConsts::FunctionKind kind) |
4611 | + : |
4612 | + function(sig, kind) |
4613 | + { |
4614 | + |
4615 | + } |
4616 | + |
4617 | + CODEGEN_DECL(); |
4618 | +}; |
4619 | +#endif |
4620 | +#ifndef ZORBA_NO_FULL_TEXT |
4621 | + |
4622 | +//full-text:is-stop-word-lang-supported |
4623 | +class full_text_is_stop_word_lang_supported : public function |
4624 | +{ |
4625 | +public: |
4626 | + full_text_is_stop_word_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind) |
4627 | + : |
4628 | + function(sig, kind) |
4629 | + { |
4630 | + |
4631 | + } |
4632 | + |
4633 | + CODEGEN_DECL(); |
4634 | +}; |
4635 | +#endif |
4636 | +#ifndef ZORBA_NO_FULL_TEXT |
4637 | + |
4638 | +//full-text:is-thesaurus-lang-supported |
4639 | +class full_text_is_thesaurus_lang_supported : public function |
4640 | +{ |
4641 | +public: |
4642 | + full_text_is_thesaurus_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind) |
4643 | + : |
4644 | + function(sig, kind) |
4645 | + { |
4646 | + |
4647 | + } |
4648 | + |
4649 | + CODEGEN_DECL(); |
4650 | +}; |
4651 | +#endif |
4652 | +#ifndef ZORBA_NO_FULL_TEXT |
4653 | + |
4654 | +//full-text:is-tokenizer-lang-supported |
4655 | +class full_text_is_tokenizer_lang_supported : public function |
4656 | +{ |
4657 | +public: |
4658 | + full_text_is_tokenizer_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind) |
4659 | + : |
4660 | + function(sig, kind) |
4661 | + { |
4662 | + |
4663 | + } |
4664 | + |
4665 | + CODEGEN_DECL(); |
4666 | +}; |
4667 | +#endif |
4668 | +#ifndef ZORBA_NO_FULL_TEXT |
4669 | + |
4670 | +//full-text:stem |
4671 | +class full_text_stem : public function |
4672 | +{ |
4673 | +public: |
4674 | + full_text_stem(const signature& sig, FunctionConsts::FunctionKind kind) |
4675 | + : |
4676 | + function(sig, kind) |
4677 | + { |
4678 | + |
4679 | + } |
4680 | + |
4681 | + CODEGEN_DECL(); |
4682 | +}; |
4683 | +#endif |
4684 | +#ifndef ZORBA_NO_FULL_TEXT |
4685 | + |
4686 | +//full-text:strip-diacritics |
4687 | +class full_text_strip_diacritics : public function |
4688 | +{ |
4689 | +public: |
4690 | + full_text_strip_diacritics(const signature& sig, FunctionConsts::FunctionKind kind) |
4691 | + : |
4692 | + function(sig, kind) |
4693 | + { |
4694 | + |
4695 | + } |
4696 | + |
4697 | + CODEGEN_DECL(); |
4698 | +}; |
4699 | +#endif |
4700 | +#ifndef ZORBA_NO_FULL_TEXT |
4701 | + |
4702 | +//full-text:thesaurus-lookup |
4703 | +class full_text_thesaurus_lookup : public function |
4704 | +{ |
4705 | +public: |
4706 | + full_text_thesaurus_lookup(const signature& sig, FunctionConsts::FunctionKind kind) |
4707 | + : |
4708 | + function(sig, kind) |
4709 | + { |
4710 | + |
4711 | + } |
4712 | + |
4713 | + CODEGEN_DECL(); |
4714 | +}; |
4715 | +#endif |
4716 | +#ifndef ZORBA_NO_FULL_TEXT |
4717 | + |
4718 | +//full-text:tokenize-string |
4719 | +class full_text_tokenize_string : public function |
4720 | +{ |
4721 | +public: |
4722 | + full_text_tokenize_string(const signature& sig, FunctionConsts::FunctionKind kind) |
4723 | + : |
4724 | + function(sig, kind) |
4725 | + { |
4726 | + |
4727 | + } |
4728 | + |
4729 | + CODEGEN_DECL(); |
4730 | +}; |
4731 | +#endif |
4732 | + |
4733 | + |
4734 | +} //namespace zorba |
4735 | + |
4736 | + |
4737 | +#endif |
4738 | +/* |
4739 | + * Local variables: |
4740 | + * mode: c++ |
4741 | + * End: |
4742 | + */ |
4743 | |
4744 | === modified file 'src/functions/pregenerated/function_enum.h' |
4745 | --- src/functions/pregenerated/function_enum.h 2012-04-24 12:39:38 +0000 |
4746 | +++ src/functions/pregenerated/function_enum.h 2012-04-26 16:53:22 +0000 |
4747 | @@ -138,6 +138,25 @@ |
4748 | FN_ZORBA_FETCH_CONTENT_2, |
4749 | FN_ZORBA_FETCH_CONTENT_TYPE_1, |
4750 | FN_PUT_2, |
4751 | + FULL_TEXT_CURRENT_LANG_0, |
4752 | + FULL_TEXT_HOST_LANG_0, |
4753 | + FULL_TEXT_IS_STEM_LANG_SUPPORTED_1, |
4754 | + FULL_TEXT_IS_STOP_WORD_1, |
4755 | + FULL_TEXT_IS_STOP_WORD_2, |
4756 | + FULL_TEXT_IS_STOP_WORD_LANG_SUPPORTED_1, |
4757 | + FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_1, |
4758 | + FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_2, |
4759 | + FULL_TEXT_IS_TOKENIZER_LANG_SUPPORTED_1, |
4760 | + FULL_TEXT_STEM_1, |
4761 | + FULL_TEXT_STEM_2, |
4762 | + FULL_TEXT_STRIP_DIACRITICS_1, |
4763 | + FULL_TEXT_THESAURUS_LOOKUP_1, |
4764 | + FULL_TEXT_THESAURUS_LOOKUP_2, |
4765 | + FULL_TEXT_THESAURUS_LOOKUP_3, |
4766 | + FULL_TEXT_THESAURUS_LOOKUP_4, |
4767 | + FULL_TEXT_THESAURUS_LOOKUP_6, |
4768 | + FULL_TEXT_TOKENIZE_STRING_1, |
4769 | + FULL_TEXT_TOKENIZE_STRING_2, |
4770 | FN_FUNCTION_NAME_1, |
4771 | FN_FUNCTION_ARITY_1, |
4772 | FN_PARTIAL_APPLY_2, |
4773 | |
4774 | === modified file 'src/runtime/full_text/CMakeLists.txt' |
4775 | --- src/runtime/full_text/CMakeLists.txt 2012-04-24 12:39:38 +0000 |
4776 | +++ src/runtime/full_text/CMakeLists.txt 2012-04-26 16:53:22 +0000 |
4777 | @@ -13,6 +13,7 @@ |
4778 | # limitations under the License. |
4779 | |
4780 | SET(FULLTEXT_SRCS |
4781 | + ft_util.cpp |
4782 | ft_match.cpp |
4783 | ft_query_item.cpp |
4784 | ft_single_token_iterator.cpp |
4785 | @@ -40,6 +41,7 @@ |
4786 | thesaurus.cpp |
4787 | tokenizer.cpp |
4788 | default_tokenizer.cpp |
4789 | + ft_module.cpp |
4790 | ) |
4791 | |
4792 | IF (ZORBA_NO_ICU) |
4793 | @@ -51,5 +53,5 @@ |
4794 | ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS) |
4795 | |
4796 | IF (ZORBA_WITH_FILE_ACCESS) |
4797 | - ADD_SRC_SUBFOLDER(FULLTEXT_SRCS thesauri THESAURUS_SRCS) |
4798 | + ADD_SRC_SUBFOLDER(FULLTEXT_SRCS thesauri THESAURUS_SRCS) |
4799 | ENDIF (ZORBA_WITH_FILE_ACCESS) |
4800 | |
4801 | === modified file 'src/runtime/full_text/apply.cpp' |
4802 | --- src/runtime/full_text/apply.cpp 2012-04-24 12:39:38 +0000 |
4803 | +++ src/runtime/full_text/apply.cpp 2012-04-26 16:53:22 +0000 |
4804 | @@ -26,13 +26,14 @@ |
4805 | #include "diagnostics/dict.h" |
4806 | #include "diagnostics/xquery_diagnostics.h" |
4807 | #include "store/api/item.h" |
4808 | +#include "store/api/item_factory.h" |
4809 | #include "store/api/store.h" |
4810 | -#include "store/api/item_factory.h" |
4811 | #include "system/globalenv.h" |
4812 | #include "util/cxx_util.h" |
4813 | #include "util/indent.h" |
4814 | #include "util/stl_util.h" |
4815 | #include "zorbamisc/ns_consts.h" |
4816 | +#include "zorbautils/locale.h" |
4817 | |
4818 | #ifndef NDEBUG |
4819 | # include "system/properties.h" |
4820 | @@ -1184,11 +1185,10 @@ |
4821 | { |
4822 | } |
4823 | |
4824 | - void operator()( char const *utf8_s, size_type utf8_len, size_type, |
4825 | - size_type, size_type, void* ) { |
4826 | - FTToken const t( utf8_s, (int)utf8_len, token_no_, lang_ ); |
4827 | - tokens_.push_back( t ); |
4828 | - } |
4829 | + // inherited |
4830 | + void item( Item const&, bool ); |
4831 | + void token( char const*, size_type, iso639_1::type, size_type, size_type, |
4832 | + size_type, Item const* ); |
4833 | |
4834 | private: |
4835 | FTTokenSeqIterator::FTTokens &tokens_; |
4836 | @@ -1196,51 +1196,72 @@ |
4837 | iso639_1::type const lang_; |
4838 | }; |
4839 | |
4840 | +void thesaurus_callback::item( Item const&, bool ) { |
4841 | + // out-of-line since it's virtual |
4842 | +} |
4843 | + |
4844 | +void thesaurus_callback::token( char const *utf8_s, size_type utf8_len, |
4845 | + iso639_1::type, size_type, size_type, |
4846 | + size_type, Item const* ) { |
4847 | + FTToken const t( utf8_s, (int)utf8_len, token_no_, lang_ ); |
4848 | + tokens_.push_back( t ); |
4849 | +} |
4850 | + |
4851 | } // anonymous namespace |
4852 | |
4853 | void ftcontains_visitor:: |
4854 | -lookup_thesaurus( ftthesaurus_id const &tid, zstring const &query_phrase, |
4855 | +lookup_thesaurus( ftthesaurus_id const &t_id, zstring const &query_phrase, |
4856 | FTToken const &qt0, query_item_star_t &result ) { |
4857 | ft_int at_least, at_most; |
4858 | - if ( ftrange const *const levels = tid.get_levels() ) |
4859 | + if ( ftrange const *const levels = t_id.get_levels() ) |
4860 | eval_ftrange( *levels, &at_least, &at_most ); |
4861 | else |
4862 | at_least = 0, at_most = numeric_limits<ft_int>::max(); |
4863 | |
4864 | - zstring const &uri = tid.get_uri(); |
4865 | + zstring const &uri = t_id.get_uri(); |
4866 | |
4867 | zstring error_msg; |
4868 | auto_ptr<internal::Resource> rsrc = static_ctx_.resolve_uri( |
4869 | - uri, internal::ThesaurusEntityData( qt0.lang() ), error_msg |
4870 | + uri, internal::EntityData::THESAURUS, error_msg |
4871 | ); |
4872 | if ( !rsrc.get() ) |
4873 | throw XQUERY_EXCEPTION( err::FTST0018, ERROR_PARAMS( uri ) ); |
4874 | |
4875 | - internal::Thesaurus::ptr thesaurus( |
4876 | - dynamic_cast<internal::Thesaurus*>( rsrc.release() ) |
4877 | - ); |
4878 | - if ( !thesaurus ) |
4879 | - throw XQUERY_EXCEPTION( err::FTST0018, ERROR_PARAMS( uri ) ); |
4880 | - |
4881 | - internal::Thesaurus::iterator::ptr tresult( |
4882 | + internal::ThesaurusProvider const *const t_provider = |
4883 | + dynamic_cast<internal::ThesaurusProvider const*>( rsrc.get() ); |
4884 | + ZORBA_ASSERT( t_provider ); |
4885 | + |
4886 | + internal::Thesaurus::ptr thesaurus; |
4887 | + if ( !t_provider->getThesaurus( qt0.lang(), &thesaurus ) ) |
4888 | + throw XQUERY_EXCEPTION( |
4889 | + zerr::ZXQP8406_THESAURUS_LANG_NOT_SUPPORTED, |
4890 | + ERROR_PARAMS( iso639_1::string_of[ qt0.lang() ] ) |
4891 | + ); |
4892 | + |
4893 | + internal::Thesaurus::iterator::ptr t_synonyms( |
4894 | thesaurus->lookup( |
4895 | - query_phrase, tid.get_relationship(), at_least, at_most |
4896 | + query_phrase, t_id.get_relationship(), at_least, at_most |
4897 | ) |
4898 | ); |
4899 | - if ( !tresult ) |
4900 | + if ( !t_synonyms ) |
4901 | return; |
4902 | |
4903 | FTTokenSeqIterator::FTTokens synonyms; |
4904 | thesaurus_callback cb( qt0.pos(), qt0.lang(), synonyms ); |
4905 | |
4906 | - Tokenizer::Numbers tno; |
4907 | - Tokenizer::ptr tokenizer( |
4908 | - GENV_STORE.getTokenizerProvider()->getTokenizer( qt0.lang(), tno ) |
4909 | - ); |
4910 | + Tokenizer::Numbers t_num; |
4911 | + TokenizerProvider const *const provider = GENV_STORE.getTokenizerProvider(); |
4912 | + ZORBA_ASSERT( provider ); |
4913 | + Tokenizer::ptr tokenizer; |
4914 | + if ( !provider->getTokenizer( qt0.lang(), &t_num, &tokenizer ) ) |
4915 | + throw XQUERY_EXCEPTION( |
4916 | + zerr::ZXQP8407_TOKENIZER_LANG_NOT_SUPPORTED, |
4917 | + ERROR_PARAMS( iso639_1::string_of[ qt0.lang() ] ) |
4918 | + ); |
4919 | |
4920 | - for ( zstring synonym; tresult->next( &synonym ); ) { |
4921 | + for ( zstring synonym; t_synonyms->next( &synonym ); ) { |
4922 | synonyms.clear(); |
4923 | - tokenizer->tokenize( |
4924 | + tokenizer->tokenize_string( |
4925 | synonym.data(), synonym.size(), qt0.lang(), false, cb |
4926 | ); |
4927 | query_item_t const query_item( new FTTokenSeqIterator( synonyms ) ); |
4928 | |
4929 | === added file 'src/runtime/full_text/ft_module_impl.cpp' |
4930 | --- src/runtime/full_text/ft_module_impl.cpp 1970-01-01 00:00:00 +0000 |
4931 | +++ src/runtime/full_text/ft_module_impl.cpp 2012-04-26 16:53:22 +0000 |
4932 | @@ -0,0 +1,843 @@ |
4933 | +/* |
4934 | + * Copyright 2006-2008 The FLWOR Foundation. |
4935 | + * |
4936 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
4937 | + * you may not use this file except in compliance with the License. |
4938 | + * You may obtain a copy of the License at |
4939 | + * |
4940 | + * http://www.apache.org/licenses/LICENSE-2.0 |
4941 | + * |
4942 | + * Unless required by applicable law or agreed to in writing, software |
4943 | + * distributed under the License is distributed on an "AS IS" BASIS, |
4944 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
4945 | + * See the License for the specific language governing permissions and |
4946 | + * limitations under the License. |
4947 | + */ |
4948 | + |
4949 | +#include <zorba/config.h> |
4950 | + |
4951 | +#ifndef ZORBA_NO_FULL_TEXT |
4952 | + |
4953 | +# include <limits> |
4954 | +# include <typeinfo> |
4955 | + |
4956 | +# include <zorba/diagnostic_list.h> |
4957 | + |
4958 | +# include "api/unmarshaller.h" |
4959 | +# include "context/namespace_context.h" |
4960 | +# include "context/static_context.h" |
4961 | +# include "diagnostics/assert.h" |
4962 | +# include "diagnostics/xquery_diagnostics.h" |
4963 | +# include "store/api/index.h" |
4964 | +# include "store/api/item.h" |
4965 | +# include "store/api/item_factory.h" |
4966 | +# include "store/api/iterator.h" |
4967 | +# include "store/api/store.h" |
4968 | +# include "system/globalenv.h" |
4969 | +# include "types/casting.h" |
4970 | +# include "types/typeimpl.h" |
4971 | +# include "types/typeops.h" |
4972 | +# include "util/utf8_util.h" |
4973 | +# include "zorbatypes/URI.h" |
4974 | +# include "zorbautils/locale.h" |
4975 | + |
4976 | +# include "ft_stop_words_set.h" |
4977 | +# include "ft_token_seq_iterator.h" |
4978 | +# include "ft_util.h" |
4979 | +# include "thesaurus.h" |
4980 | + |
4981 | +#endif /* ZORBA_NO_FULL_TEXT */ |
4982 | + |
4983 | +#include "runtime/full_text/ft_module.h" |
4984 | + |
4985 | +using namespace std; |
4986 | +using namespace zorba::locale; |
4987 | + |
4988 | +namespace zorba { |
4989 | + |
4990 | +/////////////////////////////////////////////////////////////////////////////// |
4991 | + |
4992 | +#ifndef ZORBA_NO_FULL_TEXT |
4993 | +inline iso639_1::type get_lang_from( static_context const *sctx ) { |
4994 | + iso639_1::type const lang = get_lang_from( sctx->get_match_options() ); |
4995 | + return lang ? lang : get_host_lang(); |
4996 | +} |
4997 | + |
4998 | +static iso639_1::type get_lang_from( store::Item_t lang_item, |
4999 | + QueryLoc const &loc ) { |
5000 | + zstring lang_string; |
What's the change in modules/ com/zorba- xquery/ www/modules/ http-client. xq.src/ http_request_ handler. cpp supposed to do? Isn't this introducing a memory leak?