Merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba

Proposed by Paul J. Lucas
Status: Superseded
Proposed branch: lp:~zorba-coders/zorba/feature-ft_module
Merge into: lp:zorba
Diff against target: 12463 lines (+7382/-1377)
241 files modified
ChangeLog (+3/-0)
cmake_modules/FindICU.cmake (+2/-0)
doc/zorba/ft_intro.dox (+8/-8)
doc/zorba/ft_stemmer.dox (+25/-7)
doc/zorba/ft_thesaurus.dox (+134/-88)
doc/zorba/ft_tokenizer.dox (+160/-61)
include/zorba/locale.h (+189/-15)
include/zorba/pregenerated/diagnostic_list.h (+8/-0)
include/zorba/stemmer.h (+23/-4)
include/zorba/thesaurus.h (+36/-22)
include/zorba/tokenizer.h (+136/-56)
include/zorba/uri_resolvers.h (+4/-3)
modules/com/zorba-xquery/www/modules/CMakeLists.txt (+7/-0)
modules/com/zorba-xquery/www/modules/full-text.xq (+872/-0)
modules/com/zorba-xquery/www/modules/full-text.xsd (+134/-0)
modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp (+1/-1)
modules/com/zorba-xquery/www/modules/pregenerated/errors.xq (+17/-0)
modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq (+1/-3)
scripts/zt-wn-get (+3/-3)
src/api/CMakeLists.txt (+3/-2)
src/api/staticcontextimpl.cpp (+2/-3)
src/api/stemmer_wrappers.cpp (+21/-12)
src/api/stemmer_wrappers.h (+2/-1)
src/api/thesaurus.cpp (+5/-3)
src/api/thesaurus_wrappers.cpp (+21/-0)
src/api/thesaurus_wrappers.h (+12/-0)
src/api/uri_resolver_wrappers.cpp (+15/-33)
src/api/xmldatamanagerimpl.cpp (+1/-1)
src/api/xmldatamanagerimpl.h (+1/-1)
src/compiler/codegen/plan_visitor.cpp (+1/-1)
src/compiler/expression/expr_put.cpp (+1/-0)
src/compiler/translator/translator.cpp (+45/-10)
src/context/CMakeLists.txt (+0/-5)
src/context/default_url_resolvers.cpp (+19/-5)
src/context/static_context.cpp (+12/-4)
src/context/static_context.h (+3/-0)
src/context/stemmer_wrappers.cpp (+0/-74)
src/context/stemmer_wrappers.h (+0/-63)
src/context/uri_resolver.cpp (+0/-13)
src/context/uri_resolver.h (+4/-23)
src/diagnostics/assert.cpp (+1/-1)
src/diagnostics/assert.h (+5/-1)
src/diagnostics/diagnostic_en.xml (+23/-3)
src/diagnostics/pregenerated/diagnostic_list.cpp (+12/-0)
src/diagnostics/pregenerated/dict_en.cpp (+12/-0)
src/functions/CMakeLists.txt (+4/-0)
src/functions/external_function.cpp (+1/-2)
src/functions/external_function.h (+0/-2)
src/functions/func_ft_module_impl.cpp (+128/-0)
src/functions/func_ft_module_impl.h (+81/-0)
src/functions/function.cpp (+3/-0)
src/functions/function.h (+9/-1)
src/functions/function_consts.h (+7/-0)
src/functions/library.cpp (+8/-0)
src/functions/pregenerated/func_ft_module.cpp (+490/-0)
src/functions/pregenerated/func_ft_module.h (+225/-0)
src/functions/pregenerated/function_enum.h (+19/-0)
src/runtime/full_text/CMakeLists.txt (+3/-1)
src/runtime/full_text/apply.cpp (+46/-25)
src/runtime/full_text/ft_module_impl.cpp (+843/-0)
src/runtime/full_text/ft_module_impl.h (+32/-0)
src/runtime/full_text/ft_query_item.h (+2/-1)
src/runtime/full_text/ft_single_token_iterator.h (+0/-2)
src/runtime/full_text/ft_stop_words_set.cpp (+32/-27)
src/runtime/full_text/ft_stop_words_set.h (+29/-14)
src/runtime/full_text/ft_token_matcher.cpp (+7/-7)
src/runtime/full_text/ft_token_matcher.h (+1/-1)
src/runtime/full_text/ft_token_seq_iterator.cpp (+7/-3)
src/runtime/full_text/ft_token_seq_iterator.h (+3/-0)
src/runtime/full_text/ft_token_span.h (+2/-2)
src/runtime/full_text/ft_util.cpp (+42/-0)
src/runtime/full_text/ft_util.h (+12/-1)
src/runtime/full_text/ftcontains_visitor.cpp (+0/-10)
src/runtime/full_text/full_text.h (+1/-1)
src/runtime/full_text/icu_tokenizer.cpp (+43/-14)
src/runtime/full_text/icu_tokenizer.h (+7/-5)
src/runtime/full_text/latin_tokenizer.cpp (+31/-13)
src/runtime/full_text/latin_tokenizer.h (+7/-5)
src/runtime/full_text/pregenerated/ft_module.cpp (+362/-0)
src/runtime/full_text/pregenerated/ft_module.h (+561/-0)
src/runtime/full_text/stemmer.cpp (+8/-2)
src/runtime/full_text/stemmer.h (+24/-5)
src/runtime/full_text/stemmer/sb_stemmer.cpp (+20/-13)
src/runtime/full_text/stemmer/sb_stemmer.h (+1/-0)
src/runtime/full_text/thesauri/wn_thesaurus.cpp (+79/-9)
src/runtime/full_text/thesauri/wn_thesaurus.h (+29/-5)
src/runtime/full_text/thesauri/xqftts_thesaurus.cpp (+28/-3)
src/runtime/full_text/thesauri/xqftts_thesaurus.h (+28/-4)
src/runtime/full_text/thesaurus.cpp (+34/-50)
src/runtime/full_text/thesaurus.h (+30/-4)
src/runtime/full_text/tokenizer.cpp (+86/-8)
src/runtime/spec/codegen-cpp.xq (+13/-5)
src/runtime/spec/codegen-h.xq (+1/-1)
src/runtime/spec/full_text/ft_module.xml (+208/-0)
src/runtime/spec/mappings.xml (+14/-2)
src/runtime/visitors/pregenerated/planiter_visitor.h (+91/-0)
src/runtime/visitors/pregenerated/printer_visitor.cpp (+196/-0)
src/runtime/visitors/pregenerated/printer_visitor.h (+65/-0)
src/store/naive/atomic_items.cpp (+10/-10)
src/store/naive/atomic_items.h (+8/-21)
src/store/naive/node_items.cpp (+39/-126)
src/store/naive/node_items.h (+17/-65)
src/unit_tests/stemmer.cpp (+11/-7)
src/unit_tests/string.cpp (+16/-0)
src/unit_tests/thesaurus.cpp (+52/-25)
src/unit_tests/tokenizer.cpp (+47/-28)
src/util/fs_util.h (+3/-0)
src/util/unicode_util.cpp (+14/-0)
src/util/unicode_util.h (+12/-0)
src/util/uri_util.h (+6/-6)
src/util/utf8_util.h (+2/-1)
src/util/utf8_util.tcc (+17/-10)
src/zorbatypes/ft_token.cpp (+1/-1)
src/zorbatypes/ft_token.h (+1/-1)
src/zorbatypes/numconversions.cpp (+18/-7)
src/zorbautils/locale.cpp (+385/-6)
src/zorbautils/locale.h (+375/-271)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-current-lang-true-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-da-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-de-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-en-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-es-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-fi-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-hu-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-it-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-nl-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-no-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-pt-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-ru-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-supported-false-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-supported-false-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-sv-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-false-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-da-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-de-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-en-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-es-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-fi-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-fr-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-hu-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-it-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-nl-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-no-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-pt-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-sv-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-strip-diacritics-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-5.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-string-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-string-2.xml.res (+1/-0)
test/rbkt/Queries/CMakeLists.txt (+13/-4)
test/rbkt/Queries/zorba/fulltext/ft-module-current-lang-true-1.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-da-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-de-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-en-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-es-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-fi-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-hu-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-it-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-nl-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-no-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-pt-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-ru-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-supported-false-1.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-supported-false-2.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-sv-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-false-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-da-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-de-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-en-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-es-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-fi-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-fr-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-hu-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-it-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-nl-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-no-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-pt-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-1.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-2.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-sv-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-2.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-3.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-4.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-1.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-2.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-3.spec (+1/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-3.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-2.spec (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-2.xq (+6/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-stem-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-stem-2.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-stem-3.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-stem-4.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-strip-diacritics-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-1.xq (+6/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-2.spec (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-2.xq (+6/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-3.spec (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-3.xq (+7/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-4.spec (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-4.xq (+7/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-5.spec (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-5.xq (+8/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq (+18/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq (+18/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq (+10/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq (+10/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-1.xq (+8/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-2.xq (+10/-0)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-FOCA0003-1.spec (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-FOCA0003-1.xq (+10/-0)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-1.spec (+0/-3)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-2.spec (+0/-3)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-3.spec (+1/-1)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-4.spec (+1/-1)
test/rbkt/Scripts/w3c/import_w3c_full_text_testsuite.sh (+1/-1)
test/rbkt/testdriver.cpp (+1/-1)
To merge this branch: bzr merge lp:~zorba-coders/zorba/feature-ft_module
Reviewer Review Type Date Requested Status
Markos Zaharioudakis Pending
Matthias Brantner Pending
Review via email: mp+103404@code.launchpad.net

This proposal supersedes a proposal from 2012-04-25.

Commit message

1. Added a new full-text module.
2. Fixed semi-broken Thesaurus API.
3. Now supporting many more languages for tokenization including Chinese.
4. Many other full-text improvements.

Description of the change

1. Added a new full-text module.
2. Fixed semi-broken Thesaurus API.
3. Now supporting many more languages for tokenization including Chinese.
4. Many other full-text improvements.

To post a comment you must log in.
Revision history for this message
Matthias Brantner (matthias-brantner) wrote : Posted in a previous version of this proposal

What's the change in modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp supposed to do? Isn't this introducing a memory leak?

review: Needs Information
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Paul J. Lucas (paul-lucas) wrote : Posted in a previous version of this proposal

The Remote Queue seems to have gotten itself into an infinite loop. I've notified Chris. PLEASE DO NOT SET THE PROPOSAL TO "APPROVED" TO START THE REMOTE QUEUE UNTIL THIS IS FIXED.

@Matthias: The "theSerStream" change was do fix a crash in the module. No, I don't remember why it's fixed in this branch. I think the change is OK as it is because theSerStream is deleted via cleanUpBody() that's called from endBody(). However, I changed the code to set theSerStream to null in cleanUpBody() and put the delete back in the destructor. (Deleting a null pointer is guaranteed to be harmless in C++.)

Revision history for this message
Chris Hillery (ceejatec) wrote : Posted in a previous version of this proposal

What seems to be happening with the queue is that the build/test is timing out after 40 minutes, which unfortunately doesn't log appropriately. I have increased the timeout to 60 minutes and we'll see if it runs. But before this gets merged, we should look and see if we can figure out why it's taking longer than normal to run with this build.

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Paul J. Lucas (paul-lucas) wrote : Posted in a previous version of this proposal

On Apr 21, 2012, at 10:44 PM, Chris Hillery wrote:

> What seems to be happening with the queue is that the build/test is timing out after 40 minutes, which unfortunately doesn't log appropriately. I have increased the timeout to 60 minutes and we'll see if it runs. But before this gets merged, we should look and see if we can figure out why it's taking longer than normal to run with this build.

FYI: On my machine, it took 73 minutes. The trunk build took 63 minutes.

- Paul

Revision history for this message
Chris Hillery (ceejatec) wrote : Posted in a previous version of this proposal

Other branches have been ok on the RQ machine with a 40-minute timeout. Why
does this branch take so much longer? If it added a great many slow tests
then that's probably ok, but if something is causing everything to go
slower than that's a problem. Maybe ask on zorba-dev if it would be
possible to run the pdash tests on this branch?
On Apr 22, 2012 3:31 PM, "Paul J. Lucas" <email address hidden> wrote:

> On Apr 21, 2012, at 10:44 PM, Chris Hillery wrote:
>
> > What seems to be happening with the queue is that the build/test is
> timing out after 40 minutes, which unfortunately doesn't log appropriately.
> I have increased the timeout to 60 minutes and we'll see if it runs. But
> before this gets merged, we should look and see if we can figure out why
> it's taking longer than normal to run with this build.
>
> FYI: On my machine, it took 73 minutes. The trunk build took 63 minutes.
>
> - Paul
>
>
> --
>
> https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/102972
> Your team Zorba Coders is subscribed to branch lp:zorba.
>
> --
> Mailing list: https://launchpad.net/~zorba-coders
> Post to : <email address hidden>
> Unsubscribe : https://launchpad.net/~zorba-coders
> More help : https://help.launchpad.net/ListHelp
>

Revision history for this message
Paul J. Lucas (paul-lucas) wrote : Posted in a previous version of this proposal

I diff'd the output times between the trunk and this branch and nothing jumps out at being significantly longer. (Some tests actually run in less time.) My latest running of the test suite on my machine took 69 minutes.

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal

There are additional revisions which have not been approved in review. Please seek review and approval of these new revisions.

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal

The attempt to merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba failed. Below is the output from the failed tests.

CMake Error at /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake:274 (message):
  Validation queue job feature-ft_module-2012-04-25T01-24-19.624Z is
  finished. The final status was:

  No tests were run - build or configure step must have failed.

  Not commiting changes.

Error in read script: /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal

There are additional revisions which have not been approved in review. Please seek review and approval of these new revisions.

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

The attempt to merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba failed. Below is the output from the failed tests.

CMake Error at /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake:274 (message):
  Validation queue job feature-ft_module-2012-04-25T02-04-01.152Z is
  finished. The final status was:

  630 tests did not succeed - changes not commited.

Error in read script: /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

The attempt to merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba failed. Below is the output from the failed tests.

CMake Error at /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake:274 (message):
  Validation queue job feature-ft_module-2012-04-26T13-32-05.014Z is
  finished. The final status was:

  Undetermined, probably an error - please email <email address hidden> with the
  number of this job!

Error in read script: /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

The attempt to merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba failed. Below is the output from the failed tests.

CMake Error at /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake:274 (message):
  Validation queue job feature-ft_module-2012-04-26T13-44-25.114Z is
  finished. The final status was:

  Undetermined, probably an error - please email <email address hidden> with the
  number of this job!

Error in read script: /home/ceej/zo/testing/zorbatest/tester/TarmacLander.cmake

10802. By Chris Hillery

Add zorba::Item::getTypeCode(), and make corresponding SchemaTypeCode enum part of the public API. Add zorba::ItemFactory::assignElementTypedValue() to allow construction of elements with typed values in the public API. As discussed, this API is not the prettiest, but it is the least intrusive while being the easiest for end-users to make use of. Approved: Markos Zaharioudakis, Chris Hillery, Matthias Brantner

10803. By Ghislain Fourny

Added an instruction to sort chained tests for deterministic order. Approved: Matthias Brantner, Markos Zaharioudakis

10804. By Paul J. Lucas

1. Added a new full-text module.
2. Fixed semi-broken Thesaurus API.
3. Now supporting many more languages for tokenization including Chinese.
4. Many other full-text improvements. Approved: Chris Hillery, Paul J. Lucas

10805. By Nicolae Brinza

Fixes for bugs #931501 and #866987 -- improved error messages for fn:format-number() Approved: Nicolae Brinza, Matthias Brantner

10806. By Rodolfo Ochoa

Collection Manager and Document Manager is ready on XQJ API. Approved: Cezar Andrei, Matthias Brantner

10807. By Chris Hillery

Fix remote queue failures due to change in 28msec.com. Approved: Till Westmann, Chris Hillery

10808. By Cezar Andrei <email address hidden>

Fix for Bug #857842 Assertion failed with simple content element with comments
Make use of XML_SCHEMA_NS definition from ns_consts.h. Approved: Matthias Brantner, Cezar Andrei

10809. By Sorin Marian Nasoi <email address hidden>

fix for lp:987830. Approved: Sorin Marian Nasoi, Markos Zaharioudakis

10810. By Sorin Marian Nasoi <email address hidden>

Added the STACK and QUEUE modules. Approved: Matthias Brantner, Sorin Marian Nasoi

10811. By Chris Hillery

Added note about re-running CMake if you add or remove .cpp files in your module source directory.
 Approved: Juan Zacarias, Chris Hillery

10812. By Nicolae Brinza

Added XQuery 3.0 Functions
path, has-children#0, nilled#0 Approved: Juan Zacarias, Matthias Brantner

10813. By Chris Hillery

Massively refactor, reformat, correct, and clean up Zorba's Build doc. Approved: William Candillon, Chris Hillery

10814. By Markos Zaharioudakis

1. Fixed memory leak in case of index truncation
2. Fixed bug in mergeUpdates() method
 Approved: Markos Zaharioudakis

10815. By Nicolae Brinza

Small optimization in the serializer to avoid a repeated string comparison Approved: Nicolae Brinza, David Graf

10816. By Markos Zaharioudakis

Fixed bug in MarkNodeCopyProps rule (static cast to replace_expr without chaing the expr kind first) Approved: Markos Zaharioudakis

10817. By Markos Zaharioudakis

fixed bug #966706 (key uniqueness of value equality index not enforced) Approved: Markos Zaharioudakis, Till Westmann, Matthias Brantner

10818. By Markos Zaharioudakis

Fixed bug #862971 (no error upon duplicate function declarations) Approved: Markos Zaharioudakis

10819. By Paul J. Lucas

Fixed warnings and build error. Approved: David Graf, Paul J. Lucas

10820. By David Graf

fixing windows build Approved: Ghislain Fourny, David Graf

10821. By Till Westmann

enable blocking of internal modules by running through URI mapping (but not through URL resolution) during translation Approved: Matthias Brantner, Markos Zaharioudakis, Chris Hillery

10822. By Markos Zaharioudakis

fixed memory leak in population of value index Approved: Markos Zaharioudakis

10823. By Paul J. Lucas

Fixed typos. Approved: Matthias Brantner, Paul J. Lucas

10824. By Matthias Brantner

optimized ft:tokenize (no validation of tokens + factorized creation of qnames) Approved: Paul J. Lucas, Matthias Brantner

10825. By Chris Hillery

Removed note about JSONiq (not in 2.5 after all). Approved: Matthias Brantner, Chris Hillery

10826. By Markos Zaharioudakis

fixed bug 867170$ Approved: Markos Zaharioudakis

10827. By Ghislain Fourny

Fixed bug 978254 (QName comparison in item sequence chainer ignored namespaces and took prefixes into account). Approved: Markos Zaharioudakis, Matthias Brantner

10828. By Paul J. Lucas

Fixes the stop-words core dump on 64-bit Linux. Approved: Matthias Brantner, Paul J. Lucas

10829. By Paul J. Lucas

1. Added fix for [\s] -- should now always throw FORX0002.
2. I think all regex tests "pass" using both pre- and post-4.0 of ICU. Approved: Ghislain Fourny, Paul J. Lucas

10830. By Matthias Brantner

no node copying during insertion into collection if the nodes are freshly constructed nodes Approved: Markos Zaharioudakis, Matthias Brantner

10831. By Matthias Brantner

- Add the ability to create a StreamableStringItem that re-uses the stream from another Streamable*Item (in a memory-ownership-safe way). Fixed bug #996084 (crash in Streamable*Item with file module)

- Add a base64:decode#2 function that also does transcoding to utf-8 Approved: Chris Hillery, Dennis Knochenwefel, William Candillon, Matthias Brantner

10832. By Paul J. Lucas

Added current-compare-options() function. Approved: Matthias Brantner, Paul J. Lucas

10833. By Matthias Brantner

note in the java documentation that it might be deprecated in favor of xqj Approved: Matthias Brantner, Rodolfo Ochoa

10834. By Paul J. Lucas

1. In transcoding streambufs, throwing std::invalid_argument for empty charsets.
2. In the HTTP code, now setting the charset to ISO-8859-1 in the constructor so it's set even when there's no Content-Type header. Approved: Dennis Knochenwefel, Paul J. Lucas

10835. By Paul J. Lucas

Documentation tweaks. Approved: Matthias Brantner, Paul J. Lucas

10836. By Nicolae Brinza

Documentation fixes. Approved: Matthias Brantner, Nicolae Brinza

10837. By Paul J. Lucas

Added link to full-text module.
Note that I have no way to test it. Approved: Matthias Brantner, Paul J. Lucas

10838. By Paul J. Lucas

Added check to see that JsonML (array form) really starts with '['. Approved: William Candillon, Paul J. Lucas

10839. By Sorin Marian Nasoi <email address hidden>

Updated the import scripts to use the W3C testsuites from 04-May-2012. Approved: Chris Hillery, Sorin Marian Nasoi

10840. By Rodolfo Ochoa

Complete XQJ Documentation Approved: Matthias Brantner, Cezar Andrei

10841. By Rodolfo Ochoa

BaseURI can now be cleared through a method.
When BaseUri is undefined it returns an empty string instead of asserting.
Fixed some compilation warnings to have a cleaner compiling.
Added #define stdafx.h to some files to fix the precompiled headers on Windows. Approved: Matthias Brantner, Chris Hillery

10842. By Matthias Brantner

use an instead of ann prefix in the documentation Approved: Matthias Brantner, Chris Hillery

10843. By Matthias Brantner

- no undo is done for collection truncate
- fix for bug #986377 "do not apply any updates on collection if it is to be truncated" Approved: Markos Zaharioudakis, Matthias Brantner

10844. By Nicolae Brinza

Documentation improvements. Fixed the type of the options parameter to the parse-fragment() function. Approved: Nicolae Brinza, Matthias Brantner

10845. By Paul J. Lucas

Renamed Tokenizer::Numbers to Tokenizer::State now (just prior to the 2.5 release) to give it a better name for the forthcoming addition of the ability to tokenize using include/exclude Item lists. At that time, State will most likely be expanded to include additional state information beyond just numbers, hence the name change.

(In the previous proposal, I had forgotten to update the documentation -- now done.) Approved: Matthias Brantner, Paul J. Lucas

10846. By Ghislain Fourny

Allowing general tree IDs (containing hexadecimal digits and dash) in structural URIs (test). Approved: Markos Zaharioudakis, Matthias Brantner

10847. By Matthias Brantner

replaced occurrences of XQuery version 1.1 with 3.0 Approved: Chris Hillery, Matthias Brantner

10848. By Chris Hillery

Fix (from Matthias) to ensure StaticContext used for invoke() lives as long as the ItemSequence returned from said invoke().
 Approved: Matthias Brantner, Chris Hillery

10849. By Matthias Brantner

- fixed itemfactory unit test on Windows
- disabled string unit test on Windows (because of bug #867271) Approved: Chris Hillery, Matthias Brantner

10850. By William Candillon

Remove dead links in the documentation. Approved: William Candillon, Matthias Brantner

10851. By Paul J. Lucas

Fixed the build error and also fixed several warnings that are new with Xcode 4.3.2. Approved: Matthias Brantner, Paul J. Lucas

10852. By Paul J. Lucas

Getting in another public API change for 2.5 for the full-text module since now's the time to do it. Renamed tokenize() to tokenize-node() for 2 reasons:

1. There already exists tokenize-string() and therefore tokenize-node() is a better name than just plain tokenize().

2. The forthcoming addition of the black & white tokenization function will most likely be called tokenize-nodes() -- plural. Approved: Matthias Brantner, Paul J. Lucas

10853. By William Candillon

Fix PHP build Approved: Paul J. Lucas, William Candillon

10854. By Sorin Marian Nasoi <email address hidden>

Update modules_svg generation target. Add mechanism for core build to detect whether a given non-core module exists (by URI). Use that method to check for graphviz before generating modules_svg. Approved: Sorin Marian Nasoi, Chris Hillery

10855. By Chris Hillery

Allow modules to specify their own libs for their Config file, in addition to the automatically-added libs from DECLARE_ZORBA_MODULE().
 Approved: Matthias Brantner, Chris Hillery

10856. By Matthias Brantner

- fetch:content-binary
- fetch:content#3 (with encoding parameter)
- StreamResource::isStreamSeekable to make sure the streamable strings returned by fetch are seekable.
- fixed two warnings in nodes_impl.cpp
- extended the C++ api to be symmetric to the fetch module Approved: Till Westmann, Chris Hillery, Matthias Brantner

10857. By Paul J. Lucas

Fixed a couple of warnings. Approved: Matthias Brantner, Paul J. Lucas

10858. By Matthias Brantner

fix for bug #1001463 (type not available during computation of function caching). Approved: Markos Zaharioudakis, Matthias Brantner

10859. By Matthias Brantner

Add support for function items test in the print xquery visitor. Approved: Matthias Brantner, William Candillon

10860. By Matthias Brantner

added stacks and queues to the data lifecycle documentation Approved: William Candillon, Matthias Brantner

10861. By Matthias Brantner

push-down of count(probe-index()) into the store Approved: Till Westmann, Matthias Brantner

10862. By Chris Hillery

Set CMAKE_INSTALL_RPATH_USE_LINK_PATH to TRUE to have correct RPATHs in installed binaries. Add comment about INSTALL_NAME_DIR. Approved: Matthias Brantner, Chris Hillery

10863. By Matthias Brantner

Update changelog, update module tags for Zorba 2.5. Approved: Matthias Brantner, Chris Hillery

10864. By Sorin Marian Nasoi <email address hidden>

Fixed bug lp:1001477. Approved: Rodolfo Ochoa, Sorin Marian Nasoi

10865. By Paul J. Lucas

No longer setting the charset of an overridden media type to a default value. Approved: David Graf, Matthias Brantner

10866. By Matthias Brantner

updated change log to reflect bug #1002867 Approved: Paul J. Lucas, Matthias Brantner

10867. By Matthias Brantner

added a cmake variable that allows to configure whether external jars are packaged or not (ZORBA_PACKAGE_EXTERNAL_JARS)
 Approved: Matthias Brantner, Chris Hillery

10868. By Paul J. Lucas

s/ZORBA_ASSERT/if/ Approved: Matthias Brantner, Paul J. Lucas

10869. By Paul J. Lucas

1. Fixed return type of ft:thesaurus-lookup().
2. Added a test to ensure that a look-up of a non-existant word works. Approved: Matthias Brantner, Paul J. Lucas

10870. By Rodolfo Ochoa

- Install added for XQJ bindings and documentation
- more documentation for all SWIG generated language bindings
- other installer related cleanups Approved: Matthias Brantner, Chris Hillery, Juan Zacarias

10871. By Markos Zaharioudakis

fixed bug #1006166 Approved: Markos Zaharioudakis

10872. By Markos Zaharioudakis

fixed bug 960083$ (improper error handling of NaN comparisons) + small optimization of comparison operations Approved: Markos Zaharioudakis

10873. By Carlos Manuel Lopez

Implements new group by syntax, as defined in the XQuery 3.0 Spec since September 2011 Approved: Markos Zaharioudakis

10874. By Markos Zaharioudakis

Fixed bug #1003023$ (optimizer problems due to common subexpression after var folding into if-then-else) Approved: Markos Zaharioudakis

10875. By Markos Zaharioudakis

fixed bug #854506 and partial fix for bug #867008 Approved: Markos Zaharioudakis

10876. By Markos Zaharioudakis

Improved hoist rule: tighter hoisting of expressions (also fixes bug #967428) Approved: Markos Zaharioudakis

10877. By Markos Zaharioudakis

Fixed bug #991088$ (raise XUST0001 in trycatch with mixed updating and simple clauses)
 Approved: Markos Zaharioudakis

10878. By Till Westmann

add location information to ZXQP0029_URI_ACCESS_DENIED Approved: Chris Hillery, Markos Zaharioudakis

10879. By Till Westmann

remove unnecessary annotations and clean-up indentation Approved: Ghislain Fourny, Till Westmann

10880. By Markos Zaharioudakis

Merged the JSONiq branch into the zorba trunk, with JSONiq deactivated. Approved: Matthias Brantner, Ghislain Fourny, Markos Zaharioudakis

10881. By Markos Zaharioudakis

renamed file test/unit/static_context.cpp to avoid conflict with src/context/static_contectx.cpp during debugging Approved: Markos Zaharioudakis

10882. By Markos Zaharioudakis

Fixed bug #1008082 (bug in transform expr when a copy var is not used anywhere) Approved: Markos Zaharioudakis

10883. By Markos Zaharioudakis

Fixed bug #932314 (non-comparable values must be treated as distinct by fn:distinct-values) Approved: Markos Zaharioudakis

10884. By Ghislain Fourny

The builtin schema type names, in the store, are now created directly from the pool (this is to prevent a cyclic dependency if another item factory than the simple item factory needs to access the store to produce the QNames, because at this point the store has not been initialized yet). Approved: Matthias Brantner, Markos Zaharioudakis

10885. By Markos Zaharioudakis

Plan serializer does not serialize expressions anymore. Approved: Markos Zaharioudakis

10886. By Markos Zaharioudakis

added serialize_csize function Approved: Markos Zaharioudakis

10887. By Rodolfo Ochoa

Error fixed on windows Approved: Chris Hillery, Cezar Andrei

10888. By Till Westmann

Added support for transient maps in the unordered-maps module. Approved: Matthias Brantner, Till Westmann

10889. By Nicolae Brinza

Improved parser error messages (bug #867357). Approved: David Graf, Matthias Brantner

10890. By Ghislain Fourny

Fixes csize serialization. Approved: Markos Zaharioudakis, Ghislain Fourny

10891. By Ghislain Fourny

Adding asserts in OrdPath::getLocalBitLength to prevent endless loops and possibly reproduce such a potential endless loop with more information. Approved: David Graf, Markos Zaharioudakis

10892. By Dennis Knochenwefel

Cleaning up attribute and element nodes' type handling and checking invariants, following an inconsistency discovered on Windows. Approved: Markos Zaharioudakis, Matthias Brantner, Dennis Knochenwefel

10893. By Nicolae Brinza

Fixed and enabled fn:parse-xml-fragment(). Approved: Juan Zacarias, Matthias Brantner

10894. By Paul J. Lucas

Added base64::streambuf class and replaced horribly inefficient base64 code. Approved: Matthias Brantner, Paul J. Lucas

10895. By Rodolfo Ochoa

Adding cacert.pem for Windows Approved: Cezar Andrei, Chris Hillery

10896. By Markos Zaharioudakis

A better fix for compilation problems involving integer types. Approved: Markos Zaharioudakis

10897. By Paul J. Lucas

Fixed warnings. Approved: Matthias Brantner, Paul J. Lucas

10898. By Markos Zaharioudakis

Optimized hash sets used by fn:distinct-values and nodes-distinct Approved: Markos Zaharioudakis

10899. By Till Westmann

modify fn:path to add support for fragments Approved: Juan Zacarias, Matthias Brantner, Till Westmann

10900. By Paul J. Lucas

Added C++98 subset of C++11's unordered_map. The configure process checks for a working C++11 unordered_map first: if found, it will be used instead. The goal is to phase out the old hash*map* classes over time. Approved: Markos Zaharioudakis, Matthias Brantner

10901. By Till Westmann

add ref:has-node-reference and ref:assign-node-reference Approved: Markos Zaharioudakis, Matthias Brantner

10902. By Paul J. Lucas

Added the missing specialization for hash<unique_ptr<T,D>>. Approved: Matthias Brantner, Paul J. Lucas

10903. By Matthias Brantner

- added API function Item:isSeekable
- fixed a problem in the file module where all exceptions being throw in the body were caught and rethrown as file exception Approved: Chris Hillery, Matthias Brantner

10904. By David Graf

add positional skipping to access of static and dynamic collections Approved: Matthias Brantner, Till Westmann

10905. By Markos Zaharioudakis

Fixed bug #1018673 + renamed misleading isXXXNode() methods. Approved: Markos Zaharioudakis

10906. By Markos Zaharioudakis

Optimized hash function used for nodes (fixes bug #1010051) + some hashmap/hashset cleanup Approved: Markos Zaharioudakis

10907. By Markos Zaharioudakis

Fixed bug #1016429 (scoping of copy variables in transform expr) Approved: Markos Zaharioudakis

10908. By Paul J. Lucas

Added tokenize-nodes() function. Approved: Matthias Brantner, Paul J. Lucas

10909. By David Graf

windows build fix Approved: Dennis Knochenwefel, David Graf

10910. By Ghislain Fourny

Differentiating between two reasons why references are unregistered. Approved: Markos Zaharioudakis, Matthias Brantner

10911. By Ghislain Fourny

Fixed a bug preventing from compiling Zorba single-threadedly. Approved: Markos Zaharioudakis, Ghislain Fourny

10912. By Dennis Knochenwefel

fix for bug #1020953 (access of freed object) Approved: Ghislain Fourny, Nicolae Brinza

10913. By Ghislain Fourny

Adding a NOT ZORBA_HAVE_UNIQUE_PTR guard for adding the unique_ptr test, for consistency. There are guards for including the corresponding resources, so that the test was failing by lack of them.
 Approved: Markos Zaharioudakis, Till Westmann

10914. By Dennis Knochenwefel

fixed memory leak reading data of zstring after destruction. Encoded illegal char in exception to not cause any problems. Approved: Till Westmann, Ghislain Fourny

10915. By Dennis Knochenwefel

fixed CHECK_CXX_SOURCE_COMPILES doesn't work with backslash-n using cmake 2.8.8 on windows. Approved: Paul J. Lucas, Dennis Knochenwefel

10916. By Paul J. Lucas

Now checking data size before accessing vector[0]. Approved: Dennis Knochenwefel, Paul J. Lucas

10917. By David Graf

Unifying .bat and .vcproj generation. Bug #1013075 Approved: Rodolfo Ochoa, Dennis Knochenwefel

10918. By Chris Hillery

EXPECTED_FAILURE()s for two tests failing due to flworfound.org changes. Approved: Matthias Brantner, Chris Hillery

10919. By Paul J. Lucas

1. Added missing "lang" attribute to tokens generated from tokenize-nodes().
2. Added a test. Approved: Chris Hillery, Paul J. Lucas

10920. By Paul J. Lucas

Moved URI resolution for the thesaurus into the translator. Approved: Matthias Brantner, Paul J. Lucas

10921. By Paul J. Lucas

Fixed warnings. Approved: Matthias Brantner, Paul J. Lucas

10922. By Markos Zaharioudakis

Fixed bug #1022557 (subsequence function applied on window variable) Approved: Markos Zaharioudakis

10923. By Ghislain Fourny

Several modifications in the implementation of JSONiq to make it in sync with the new specification, in particular:
- No more pairs,
- Minor changes to the update syntax
- Synced returned error codes
- Support for collections and indices, including automatic maintenance
- Serializer extended to allow mixed JDM and XDM
- Updated C++ API Approved: Matthias Brantner, Ghislain Fourny, Chris Hillery, Markos Zaharioudakis

10924. By Markos Zaharioudakis

added auditing in zorba cmd + fixed bug in auditing the parse time Approved: Markos Zaharioudakis

10925. By Markos Zaharioudakis

Fixed bug #1002993 (bug during revalidation after update due to improper condition for calling TypeOps::get_atomic_type_code() from SchemaValidatorImpl::isPossibleSimpleContentRevalImpl()) Approved: Cezar Andrei, Markos Zaharioudakis

10926. By Ghislain Fourny

Adding two tests for allowing two errors upon sequence of several JSON items and an XML node. Approved: Till Westmann, Matthias Brantner

10927. By Markos Zaharioudakis

Avoid (if possible) treat expr for checking that the value of a non-external global variable conforms to the type declaration of the vatiable Approved: Markos Zaharioudakis

10928. By Nicolae Brinza

Implemented the new EQName syntax. Approved: Matthias Brantner, Nicolae Brinza

10929. By Paul J. Lucas

Reverted previous "fix." Approved: Matthias Brantner, Paul J. Lucas

10930. By Markos Zaharioudakis

Streaming execution for tumbling windows (also fixes bug #1010051) Approved: Markos Zaharioudakis

10931. By Ghislain Fourny

Fixed build errors occurring on Windows by exposing structured item types even in JSONiq is deactivated. Approved: Matthias Brantner, Markos Zaharioudakis

10932. By Markos Zaharioudakis

Incremental maintenance for general indexes. Approved: Markos Zaharioudakis

10933. By Paul J. Lucas

Fixed typo. Approved: Chris Hillery, Paul J. Lucas

10934. By Sorin Marian Nasoi <email address hidden>

Updated the F&O 1.1 and 3.0 documents used in the comparison of the functions and corrected the util:download-and-write-spec function. Approved: Chris Hillery, Sorin Marian Nasoi

10935. By Chris Hillery

Add test case demonstrating bug 1010728. Approved: Markos Zaharioudakis, Chris Hillery

10936. By David Graf

Removed internal debug info from fn:trace out by using zorba serializer instead of the internal show function. Approved: Chris Hillery, David Graf

10937. By Dennis Knochenwefel

Added case for DT_UNKNOWN. Approved: Dennis Knochenwefel, Paul J. Lucas

10938. By Rodolfo Ochoa

Fix for precompiled headers on Windows Approved: Rodolfo Ochoa, Chris Hillery

10939. By Cezar Andrei <email address hidden>

Integrate:
  Fix for bug 1023120 xs:include ignored in schemas.
  Added a new test for this case. Approved: Chris Hillery, David Graf

10940. By Till Westmann

fix position for elements and processing-instructions in fn:path Approved: Markos Zaharioudakis, Matthias Brantner

10941. By Nicolae Brinza

Added support for the unix shebang script launcher (#!/path/interpreter) Approved: Nicolae Brinza, David Graf

10942. By Paul J. Lucas

Now using enable_if for more functions to make overload resolution work better. Approved: Matthias Brantner, Paul J. Lucas

10943. By Rodolfo Ochoa

Disabling "print stack trace" for windows Approved: Chris Hillery, Rodolfo Ochoa

10944. By Paul J. Lucas

Now handling UTF-16 surrogate pairs. Approved: Dennis Knochenwefel, Paul J. Lucas

10945. By Nicolae Brinza

The parse-fragment function now allows a DOCTYPE declaration in the input. Approved: Nicolae Brinza, Matthias Brantner

10946. By Paul J. Lucas

Added functions to test for and create UTF-16 surrogate pairs.
These will probably be needed by whoever fixes bug #1025622. Approved: Dennis Knochenwefel, Paul J. Lucas

10947. By Ghislain Fourny

Fix that gives precedence to an array replacement over an array deletion, and adds tests about multiple updates with same selector on arrays and on objects. Approved: Markos Zaharioudakis, Matthias Brantner

10948. By Paul J. Lucas

Now doing proper JSON serialization. Approved: Chris Hillery, Dennis Knochenwefel, Paul J. Lucas

10949. By Paul J. Lucas

No longer checking captured subgroups in replacement string when 'q' flag is given. Approved: Matthias Brantner, Paul J. Lucas

10950. By Matthias Brantner

removed a non-core module dependency from the reuse-stream test Approved: Chris Hillery, Matthias Brantner

10951. By Markos Zaharioudakis

removed some debugging code, which causes Windows compilation problems Approved: Markos Zaharioudakis

10952. By Paul J. Lucas

Removed zorbatypes/transcoder.h & .cpp. Approved: Matthias Brantner, Paul J. Lucas

10953. By Paul J. Lucas

Now properly serializing JSON for JsonML. Approved: Chris Hillery, Paul J. Lucas

10954. By Paul J. Lucas

1. Tweaked equals().
2. Added "const&" to std_string function arguments. Approved: Matthias Brantner, Paul J. Lucas

10955. By Matthias Brantner

fix for bug #898066 (Stringstream & fn:trace) Approved: Chris Hillery, Matthias Brantner

10956. By Sorin Marian Nasoi <email address hidden>

- added information about the Zorba and XQTS versions that were used in generating the reports
- updated the README.txt with info regarding conformance reports generation Approved: William Candillon, Sorin Marian Nasoi

10957. By Ghislain Fourny

Activating ZORBA_WITH_JSON by default. Approved: Chris Hillery, Matthias Brantner

10958. By Rodolfo Ochoa

Documentation fixed for Zorba binary package with PHP. Approved: Cezar Andrei, Chris Hillery

10959. By Paul J. Lucas

s/0/npos/

This probably fixes some as-of-yet-undiscovered bug. Approved: Matthias Brantner, Paul J. Lucas

10960. By Rodolfo Ochoa

C# API Binding Approved: Cezar Andrei, Chris Hillery

10961. By Ghislain Fourny

Corrects bug 1029836. Approved: Chris Hillery, Matthias Brantner

10962. By Ghislain Fourny

Fixing Windows build with JSONiq. Approved: Chris Hillery, Matthias Brantner

10963. By Paul J. Lucas

Removed JsonML-object. Approved: Ghislain Fourny, Paul J. Lucas

10964. By Matthias Brantner

new ItemFactory function that allows creating dateTime items without timezone Approved: Chris Hillery, Matthias Brantner

10965. By Ghislain Fourny

Fixes bug 1032166 (critical memory bug). Approved: Chris Hillery, Matthias Brantner

10966. By Paul J. Lucas

1. s/take/swap/
2. Removed unnecessary assignment. Approved: Matthias Brantner, Paul J. Lucas

10967. By Juan Zacarias

Fix of bugs
Bug #1014979: Make thesaurus optional component on Windows
Bug #1014981: Make default thesaurus available as Ubuntu package
Also Fixed FindJNI.cmake to be found-able in the Ubuntu Installer Approved: Juan Zacarias, Chris Hillery

10968. By David Graf

Make testdriver_mt work with boost version >1.49. Approved: David Graf, Chris Hillery

10969. By Matthias Brantner

Added a keys() function to the index dml module. This fixes bug #900677. Approved: Matthias Brantner, David Graf

10970. By Chris Hillery

Test temporarily disabled while http-client is being updated. Also get HEAD of OAuth module rather than zorba-2.5 tag for some reason. Approved: Juan Zacarias, Chris Hillery

10971. By Chris Hillery

Split image into image + graphviz; split data-converters into csv + html. Approved: Juan Zacarias, Chris Hillery

10972. By William Candillon

Fix make doc target when multiple version of the same module exists. Approved: Sorin Marian Nasoi, Matthias Brantner

10973. By Ghislain Fourny

Correct array insert expression syntax to include [] like append expressions. Approved: Matthias Brantner, Chris Hillery

10974. By Juan Zacarias

Fixed windows installer component options for c# Bindings Approved: Rodolfo Ochoa, Chris Hillery

10975. By Matthias Brantner

removed some hardcoded english words from error messages raised by the json parser Approved: Paul J. Lucas, Matthias Brantner

10976. By Chris Hillery

Regenerate scanner and parser with flex/bison 2.5 - apparently 2.6 creates code that Clang doesn't like. Approved: Matthias Brantner, Chris Hillery

10977. By Matthias Brantner

prepare ChangeLog for 2.6 release Approved: Chris Hillery, Matthias Brantner

10978. By Chris Hillery

Bump version number to 2.6; update tagged modules; add archive module. Approved: Cezar Andrei, Sorin Marian Nasoi, Chris Hillery

10979. By David Graf

Since OSX Mountain Lion, clang is the default c++ compiler on mac. Therefore, it is not enough to check if the c++ compiler executable is called clang. Approved: David Graf, Paul J. Lucas

10980. By Chris Hillery

Add "generation" of local copy of FlexLexer.h, to ensure it always stays in sync with the generated lexer .cpp file. Approved: Paul J. Lucas, Chris Hillery

10981. By Sorin Marian Nasoi <email address hidden>

Fixed the XQDoc documentation issue related to the formatting of the parameters for the higher order functions. Approved: William Candillon, Sorin Marian Nasoi

10982. By Chris Hillery

Try to find FlexLexer.h associated with the flex binary being used.
 Approved: David Graf, Chris Hillery

10983. By Markos Zaharioudakis

Fixed bug #1033407 (do not store var_expr rchandles in the static context) Approved: Markos Zaharioudakis

10984. By Markos Zaharioudakis

Fixed bugs #899364 and 899363 (throw XQST0103 in case of non-distinct window variables)
Fixed bug #899366 (enforce the type declaration of a window variable) Approved: Markos Zaharioudakis

10985. By Markos Zaharioudakis

Fixed bug #1024892 (index declaration references udf declared after the index) Approved: Markos Zaharioudakis

10986. By David Graf

remove clang warnings in auditing code Approved: Matthias Brantner, Till Westmann, David Graf

10987. By Chris Hillery

added the archive module to ExternalModules.conf and the ChangeLog Approved: Luis Rodriguez Gonzalez, Juan Zacarias, Chris Hillery, Matthias Brantner

10988. By David Graf

Because the jsoniq_emitter aggregates the xml_emitter, it needs to handover all the parameters. Inclusive aEmitAttributes. Approved: Till Westmann, David Graf

10989. By luisrod <luisrod@LUISROD-LAP>

- Added code for bug#1025564 "Deprecate -f argument to zorbacmd"

10990. By Markos Zaharioudakis

Allow prolog variables to be referenced before they are declared (XQuery 3.0 feature) (fixes bug #900688) Approved: Markos Zaharioudakis

10991. By Chris Hillery

Revert unintentional commit r10989. Approved: Luis Rodriguez Gonzalez, Chris Hillery

10992. By Ghislain Fourny

Updated JSONiq tutorial. Approved: William Candillon, Matthias Brantner

10993. By Paul J. Lucas

Miscellaneous changes, some a prerequisite for LLVM that should be done anyway and not have to wait for the far-in-the-future LLVM branch merge. Approved: Matthias Brantner, Paul J. Lucas

10994. By Ghislain Fourny

Specifying collection and property upon ZDST0006. Approved: Till Westmann, Matthias Brantner

10995. By Paul J. Lucas

QueryLoc clean-up:
1. Added all-argument constructor (needed for LLVM).
2. Removed pointless copy constructor since default is fine.
3. Removed pointless virtual destructor (there are no virtual functions!). Approved: Matthias Brantner, Paul J. Lucas

10996. By Nicolae Brinza

Dynamically computed strings can now be cast to xs:QName. Fixes bug #898792 Approved: Nicolae Brinza, Chris Hillery

10997. By Till Westmann

move appending of ${requiredlibs-store} to requiredlibs to a place where it works Approved: Ghislain Fourny, Till Westmann

10998. By Ghislain Fourny

Fixes a bug that makes Zorba crash upon inserting more than one pair. Approved: Till Westmann, Matthias Brantner

10999. By Ghislain Fourny

Adds a method isEncoded to user-typed atomic items. Approved: Till Westmann, Matthias Brantner

11000. By Sorin Marian Nasoi <email address hidden>

fix for lp:969251. Approved: Ghislain Fourny, Sorin Marian Nasoi

11001. By Ghislain Fourny

Made URI computation lazy in StructuralAnyUri. Approved: Markos Zaharioudakis, Matthias Brantner

11002. By Ghislain Fourny

Fixes a Windows compiler error (bug 1040558). Approved: Luis Rodriguez Gonzalez, Juan Zacarias

11003. By William Candillon

Enable XML output from doxygen by default. Approved: Chris Hillery, Matthias Brantner

11004. By Markos Zaharioudakis

Fixed bug #1038410 (Memory leaks in parser, trace iterator, and general index) Approved: Markos Zaharioudakis

11005. By Markos Zaharioudakis

Fixed bug #1042840 (qname pool free-list corruption) Approved: Markos Zaharioudakis

11006. By Chris Hillery

Restoring execute bit to a bunch of scripts. Approved: Juan Zacarias, Matthias Brantner, Chris Hillery

11007. By Carlos Manuel Lopez

New memory management for compiler expressions (fixes bug #1036111) Approved: Markos Zaharioudakis

11008. By Ghislain Fourny

Simplified JSON items class and fixed some Xml Node static casts to handle JSON items as well. Approved: Markos Zaharioudakis, Matthias Brantner

11009. By Markos Zaharioudakis

Fixed bug #866984 (better error message for an eval error) Approved: Markos Zaharioudakis

11010. By Ghislain Fourny

Fixing a memory leak in append update primitive. Approved: Matthias Brantner, Till Westmann

11011. By William Candillon

Remove deprecated reference to the old sourceforge mailing-list. Approved: Matthias Brantner, William Candillon

11012. By Nicolae Brinza

Fixes for bugs #1023170, #1024033, #1027270 Approved: Chris Hillery, Matthias Brantner

11013. By Markos Zaharioudakis

rchandle cleanup Approved: Markos Zaharioudakis

11014. By Paul J. Lucas

Replaced UUID with thin layer over native platform implementation. Approved: Chris Hillery, Rodolfo Ochoa, Matthias Brantner, Paul J. Lucas

11015. By Chris Hillery

Corrected HTML serialization of empty elements. Added test cases for XHTML.
 Approved: Matthias Brantner, Chris Hillery

11016. By Chris Hillery

Fixes debug mode crash because of missing dictionary entries. Approved: William Candillon, Chris Hillery

11017. By Ghislain Fourny

Fixed bug 1041411 (prefixed true/false/null should be interpreted as name tests). Approved: Matthias Brantner, Chris Hillery

11018. By Ghislain Fourny

Removing superfluous store/naive prefixes in store includes. Approved: Till Westmann, Matthias Brantner

11019. By Paul J. Lucas

Suppressed warnings; moved gcc diagnostic push macros to config.h. Approved: Matthias Brantner, Paul J. Lucas

11020. By Till Westmann

add dependency on libuuid

11021. By Till Westmann

ensure deterministic test results for keys of index on unordered collection by sorting

11022. By Till Westmann

Some fixes in TreeID API and some include cleanup.

11023. By Matthias Brantner

Adding missing JSONiq library functions.

11024. By Matthias Brantner

Updated JSONiq tutorial.

11025. By Matthias Brantner

implementation of parse-json#2 allowing multiple top-level items

11026. By Matthias Brantner

more tests for jn:parse-json

11027. By Matthias Brantner

adapted changelog regarding jn:parse-json

Unmerged revisions

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ChangeLog'
2--- ChangeLog 2012-04-25 17:16:48 +0000
3+++ ChangeLog 2012-04-26 16:53:22 +0000
4@@ -10,6 +10,7 @@
5 * fn:unparsed-text-available
6 * Extended API for Python, Java, PHP and Ruby.
7 * Add jvm classpath to zorbacmd and to Zorba API. Tracked by #931816
8+ * Added full-text module.
9 * Added support for NO_ICU (to not use ICU for unicode processing)
10 * Added XQJ support.
11
12@@ -89,6 +90,8 @@
13 * Fixed bug 867509 (Can not handle largest xs:unsignedLong values)
14 * Fixed bug 924063 (sentence is incorrectly incremented when token characters end without sentence terminator)
15 * Fixed bug 909126 (bug in cloning of var_expr)
16+ * Fixed bug 928631 (external builtin function were not executed in the module they
17+ were declared)
18 * Fixed bug in destruction of exit_catcher_expr
19 * Fixed bug #867024 (error messages)
20 * Fixed bug #957580 (stream read failure in StringToCodepointsIteartor)
21
22=== modified file 'cmake_modules/FindICU.cmake'
23--- cmake_modules/FindICU.cmake 2012-04-24 14:35:54 +0000
24+++ cmake_modules/FindICU.cmake 2012-04-26 16:53:22 +0000
25@@ -28,6 +28,8 @@
26 # (note: in addition to ICU_LIBRARIES)
27 # ICU_DATA_LIBRARIES - Libraries to link against for ICU data
28 #
29+# ICU_VERSION - ICU's version number.
30+#
31
32 # Look for the header file.
33 find_path(
34
35=== modified file 'doc/zorba/ft_intro.dox'
36--- doc/zorba/ft_intro.dox 2012-04-24 12:39:38 +0000
37+++ doc/zorba/ft_intro.dox 2012-04-26 16:53:22 +0000
38@@ -5,9 +5,9 @@
39 specification.
40 Additional documentation:
41
42- - \ref ft_stemmer
43- - \ref ft_thesaurus
44- - \ref ft_tokenizer
45+- \ref ft_stemmer
46+- \ref ft_thesaurus
47+- \ref ft_tokenizer
48
49 \section ft_unimplemented Unimplemented Features
50
51@@ -16,11 +16,11 @@
52 implemented.
53 The features that are not (completely) implemented are:
54
55- - The <a href="http://www.w3.org/TR/xpath-full-text-10/#ftignoreoption">Ignore Option</a>
56- (bug <a href="https://bugs.launchpad.net/zorba/+bug/sf-3187470">3187470</a>).
57- - <a href="http://www.w3.org/TR/xpath-full-text-10/#section-score-variables">Score Variables</a>
58- and <a href="http://www.w3.org/TR/xpath-full-text-10/#section-using-weights">Using Weights Within a Scored FTContainsExpr</a>
59- (bug <a href="https://bugs.launchpad.net/zorba/+bug/sf-3187462">3187462</a>).
60+- The <a href="http://www.w3.org/TR/xpath-full-text-10/#ftignoreoption">Ignore Option</a>
61+ (bug <a href="https://bugs.launchpad.net/zorba/+bug/866924">866924</a>).
62+- <a href="http://www.w3.org/TR/xpath-full-text-10/#section-score-variables">Score Variables</a>
63+ and <a href="http://www.w3.org/TR/xpath-full-text-10/#section-using-weights">Using Weights Within a Scored FTContainsExpr</a>
64+ (bug <a href="https://bugs.launchpad.net/zorba/+bug/866923">866923</a>).
65
66 */
67 /* vim:set et sw=2 ts=2: */
68
69=== modified file 'doc/zorba/ft_stemmer.dox'
70--- doc/zorba/ft_stemmer.dox 2012-04-24 12:39:38 +0000
71+++ doc/zorba/ft_stemmer.dox 2012-04-26 16:53:22 +0000
72@@ -56,7 +56,12 @@
73 public:
74 typedef /* implementation-defined */ ptr;
75
76+ struct Properties {
77+ char const *uri;
78+ };
79+
80 virtual void destroy() const = 0;
81+ virtual void properties( Properties *result ) const = 0;
82 virtual void stem( String const &word, locale::iso639_1::type lang, String *result ) const = 0;
83 protected:
84 virtual ~Stemmer();
85@@ -89,6 +94,8 @@
86 Note that \c result should always be set to something.
87 If your stemmer doesn't know how to stem the given word,
88 you should set \c result to \c word.
89+You also need to implement the \c properties() function
90+and set the identifying URI of your stemmer.
91
92 A very simple stemmer
93 that stems the word "foobar" to "foo"
94@@ -98,6 +105,7 @@
95 class MyStemmer : public Stemmer {
96 public:
97 void destroy() const;
98+ void properties( Properties *result ) const;
99 void stem( String const &word, locale::iso639_1::type lang, String *result ) const;
100 private:
101 MyStemmer();
102@@ -108,6 +116,10 @@
103 // Do nothing since we statically allocate a singleton instance of our stemmer.
104 }
105
106+void MyStemmer::properties( Properties *props ) const {
107+ props->uri = "http://my.example.com/zorba/full-text/stemmer";
108+}
109+
110 void MyStemmer::stem( String const &word, locale::iso639_1::type lang, String *result ) const {
111 if ( word == "foobar" )
112 *result = "foo";
113@@ -120,7 +132,6 @@
114 or a dictionary look-up
115 to stem many words,
116 of course.
117-
118 Although not used in this simple example,
119 \c lang can be used to allow a single stemmer instance
120 to stem words in more than one language.
121@@ -135,16 +146,24 @@
122 class StemmerProvider {
123 public:
124 virtual ~StemmerProvider();
125- virtual Stemmer::ptr getStemmer( locale::iso639_1::type lang ) const = 0;
126+ virtual bool getStemmer( locale::iso639_1::type lang, Stemmer::ptr *s = 0 ) const = 0;
127 };
128 \endcode
129
130+The \c getStemmer() function should return \c true
131+only if it can provide a \c Stemmer
132+for the given language; \c false otherwise.
133+If the \c Stemmer::ptr argument is \c null,
134+the caller wants to check only whether the provider
135+can provide a stemmer for the given language
136+and doesn't want a \c Stemmer instance created or returned.
137+
138 A simple \c StemmerProvider for our simple stemmer can be implemented as:
139
140 \code
141 class MyStemmerProvider : public StemmerProvider {
142 public:
143- Stemmer::ptr getStemmer( locale::iso639_1::type lang ) const;
144+ bool getStemmer( locale::iso639_1::type lang Stemmer::ptr *s = 0 ) const;
145 };
146
147 Stemmer::ptr MyStemmerProvider::getStemmer( locale::iso639_1::type lang ) const {
148@@ -154,15 +173,14 @@
149 case iso639_1::en:
150 case iso639_1::unknown: // Handle "unknown" language since, in many cases, the language is not known.
151 result.reset( &stemmer );
152- break;
153+ return true;
154 default:
155 //
156- // We have no stemmer for the given language: leave the result as null to indicate this.
157+ // We have no stemmer for the given language: return false.
158 // Zorba will then use the built-in stemmer for the given language.
159 //
160- break;
161+ return false;
162 }
163- resturn std::move( result );
164 }
165 \endcode
166
167
168=== modified file 'doc/zorba/ft_thesaurus.dox'
169--- doc/zorba/ft_thesaurus.dox 2012-04-24 12:39:38 +0000
170+++ doc/zorba/ft_thesaurus.dox 2012-04-26 16:53:22 +0000
171@@ -44,16 +44,16 @@
172 To download and install the WordNet database on a Unix-like system,
173 follow these steps:
174
175- -# Download the WordNet database from
176- <a href="http://wordnet.princeton.edu/wordnet/download/">here</a>.
177- All you really need are just the database files
178- (<code>WNdb-3.0.tar.gz</code>).
179- -# Un-gzip and untar the files.
180- This will result in a directory dict containing the database files.
181- -# Move the dict directory somewhere of your choosing,
182- e.g., <code>/usr/local/wordnet-3.0/dict</code>.
183- -# Compile the \c dict directory into a Zorba-compatible binary thesaurus
184- as described below.
185+-# Download the WordNet database from
186+ <a href="http://wordnet.princeton.edu/wordnet/download/">here</a>.
187+ All you really need are just the database files
188+ (<code>WNdb-3.0.tar.gz</code>).
189+-# Un-gzip and untar the files.
190+ This will result in a directory dict containing the database files.
191+-# Move the dict directory somewhere of your choosing,
192+ e.g., <code>/usr/local/wordnet-3.0/dict</code>.
193+-# Compile the \c dict directory into a Zorba-compatible binary thesaurus
194+ as described below.
195
196 To compile the WordNet database files,
197 use the \c zt-wn-compile script
198@@ -65,12 +65,12 @@
199 zt-wn-compile [-v] wordnet_dict_dir [thesaurus_file]
200 \endcode
201
202- - The \c -v option specifies verbose output.
203- - The \e wordnet_dict_dir specifies the full path
204- of the WordNet \c dict directory.
205- - The \e thesaurus_file specifies the name of the resulting binary file.
206- If none is given, it defaults to \c wordnet-en.zth
207- ("en" for English and "zth" for "Zorba Thesaurus file").
208+- The \c -v option specifies verbose output.
209+- The \e wordnet_dict_dir specifies the full path
210+ of the WordNet \c dict directory.
211+- The \e thesaurus_file specifies the name of the resulting binary file.
212+ If none is given, it defaults to \c wordnet-en.zth
213+ ("en" for English and "zth" for "Zorba Thesaurus file").
214
215 For example:
216
217@@ -78,33 +78,39 @@
218 zt-wn-compile -v /usr/local/wordnet-3.0/dict
219 \endcode
220
221-Move the \c wordnet-en.zth file to a location of your choosing.
222+To install the \c wordnet-en.zth file,
223+move it onto Zorba's <i>library path</i>:
224+
225+\code
226+LIB_PATH/edu/princeton/wordnet/wordnet-en.zth
227+\endcode
228
229 \subsection ft_thesaurus_precompiled Downloading a Precompiled WordNet Database
230
231 Alternatively,
232-you can download a precompiled WordNet database from
233+you can download a precompiled, little-endian (Intel) CPU WordNet database from
234 <a href="http://www.zorba-xquery.com/downloads/WordNet-3.0/wordnet-en.zip">here</a>.
235
236 \section ft_thesaurus_mappings Thesauri Mappings
237
238 In order to use thesauri,
239-you need to specify where they are to the Zorba engine
240-via one or more thesaurus <i>mappings</i>.
241-A <i>mapping</i> maps a symbolic URI to URI for an actual thesaurus.
242+you need to specify what symbolic URI(s) <i>map</i>
243+to what thesauri.
244 A mapping is of the form:
245
246-<i>from_uri</i><code>:=</code><b>[</b><i>implementation</i><code>|</code><b>]</b><i>to_uri</i>
247+<i>from_uri</i><code>:=</code><i>implementation-scheme</i><code>:</code><i>to_uri</i>
248
249 For example:
250
251 \code
252-http://wordnet.princeton.edu:=wordnet|/usr/local/zorba/thesauri/wordnet-en.zth
253+http://wordnet.princeton.edu:=wordnet://wordnet.princeton.edu
254 \endcode
255
256 says that the symbolic URI \c http://wordnet.princeton.edu
257 maps to the WordNet implementation
258-having a database file at the given path.
259+having a database file at the given sub-path
260+\c edu/princeton/wordnet
261+on Zorba's library path.
262 Once a mapping is established for a symbolic URI,
263 it can be used in a query:
264
265@@ -114,13 +120,8 @@
266 using thesaurus at "http://wordnet.princeton.edu"
267 \endcode
268
269-If the \e implementation is omitted,
270-it defaults to \c wordnet.
271 As a special-case,
272-the \e from_uri can be \c default or
273-\code
274-##default
275-\endcode
276+the \e from_uri can be \c default or \c ##default
277 to allow for specifying the default thesaurus
278 as was done for the first example on this page.
279
280@@ -130,7 +131,7 @@
281 use one or more –thesaurus options:
282
283 \code
284-zorba --thesaurus default:=/usr/local/zorba/thesauri/wordnet-en.zth ...
285+zorba --thesaurus default:=wordnet://wordnet.princeton.edu ...
286 \endcode
287
288 \section ft_thesaurus_rels Thesaurus Relationships
289@@ -423,25 +424,26 @@
290
291 If no levels are specified in a query,
292 Zorba defaults the WordNet implementation to be 2 levels.
293-The rationale can be found
294-<a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=11444">here</a>.
295+(The rationale can be found
296+<a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=11444">here</a>.)
297
298 \section ft_thesaurus_providing Providing Your Own Thesaurus
299
300 Using the Zorba C++ API,
301 you can provide your own thesaurus
302-by deriving from three classes:
303+by deriving from four classes:
304 \c Thesaurus,
305 \c Thesaurus::iterator,
306+\c ThesaurusProvider,
307 and
308-\c ThesaurusProvider.
309+\c URLResolver.
310
311 \subsection ft_class_thesaurus The Thesaurus Class
312
313 The \c Thesaurus class is:
314
315 \code
316-class Thesaurus : public Resource {
317+class Thesaurus {
318 public:
319 typedef /* implementation-defined */ ptr;
320 typedef /* implementation-defined */ range_type;
321@@ -457,15 +459,15 @@
322
323 virtual iterator::ptr lookup( String const &phrase, String const &relationship, range_type at_least, range_type at_most ) const = 0;
324
325- virtual void destroy() const = 0; // interited from Resource
326+ virtual void destroy() const = 0;
327 protected:
328 virtual ~Thesaurus();
329 };
330 \endcode
331
332-For details about the \c ptr type,
333-the \c destroy() function,
334-and why the destructor is \c protected,
335+For details about the \c ptr types,
336+the \c destroy() functions,
337+and why the destructors are \c protected,
338 see the \ref memory_management document.
339
340 To implement the \c Thesaurus
341@@ -482,18 +484,19 @@
342 </tr>
343 <tr>
344 <td>\c at_least</td>
345- <td>The The minimum number of levels within the thesaurus to be traversed.</td>
346+ <td>The minimum number of levels within the thesaurus to be traversed.</td>
347 </tr>
348 <tr>
349 <td>\c at_most</td>
350- <td>The The maximum number of levels within the thesaurus to be traversed.</td>
351+ <td>The maximum number of levels within the thesaurus to be traversed.</td>
352 </tr>
353 </table>
354
355 The \c lookup() function returns a pointer to an \c iterator
356 that is used to iterate over the phrase's synonyms.
357-
358-A very simple thesaurus
359+You also need to implement an \c iterator.
360+A very simple \c Thesaurus
361+and its \c iterator
362 can be implemented as:
363
364 \code
365@@ -505,53 +508,49 @@
366 //
367 // Define a simple thesaurus data structure as a map from a phrase to a list of its synonyms.
368 //
369- typedef std::list<String> synonyms_t;
370- typedef std::map<String,synonyms_t const*> thesaurus_t;
371+ typedef std::list<String> synonyms_type;
372+ typedef std::map<String,synonyms_type const*> thesaurus_data_type;
373
374- static thesaurus_t const& get_thesaurus();
375+ static thesaurus_data_type const& get_thesaurus_data();
376
377 class iterator : public Thesaurus::iterator {
378 public:
379- iterator( synonyms_t const &s ) : synonyms_( s ), i_( s.begin() ) { }
380+ iterator( synonyms_type const &s ) : synonyms_( s ), i_( s.begin() ) { }
381 void destroy();
382 bool next( String *synonym );
383 private:
384- synonyms_t const &synonyms_; // synonyms to iterate over
385- synonyms_t::const_iterator i_; // current iterator position
386+ synonyms_type const &synonyms_; // synonyms to iterate over
387+ synonyms_type::const_iterator i_; // current iterator position
388 };
389 };
390
391 void MyThesaurus::destroy() const {
392- // Do nothing since we statically allocate a singleton instance of our thesaurus.
393+ // Do nothing since we statically allocate a singleton instance of our Thesaurus.
394 }
395
396-MyThesaurus::thesaurus_t const& MyThesaurus::get_thesaurus() {
397- static thesaurus_t thesaurus;
398- if ( thesaurus.empty() ) {
399- //
400- // Construct a thesaurus "by hand" for this example. A real thesaurus would probably
401- // be read from disk.
402- //
403+MyThesaurus::thesaurus_data_type const& MyThesaurus::get_thesaurus_data() {
404+ static thesaurus_data_type thesaurus_data;
405+ if ( thesaurus_data.empty() ) {
406+ //
407+ // Construct thesaurus data "by hand" for this example. A real thesaurus would probably be read from disk.
408 // Note that every list of synonyms must always include the original phrase.
409 //
410- static synonyms_t synonyms;
411+ static synonyms_type synonyms;
412 synonyms.push_back( "foo" );
413 synonyms.push_back( "foobar" );
414- thesaurus[ "foo" ] = &synonyms;
415- thesaurus[ "foobar" ] = &synonyms;
416+ thesaurus_data[ "foo" ] = &synonyms;
417+ thesaurus_data[ "foobar" ] = &synonyms;
418 }
419- return thesaurus;
420+ return thesaurus_data;
421 }
422-\endcode
423
424-\code
425 MyThesaurus::iterator::ptr MyThesaurus::lookup( String const &phrase, String const &relationship,
426 range_type at_least, range_type at_most ) const {
427- static thesaurus_t const &thesaurus = get_thesaurus();
428- thesaurus_t::const_iterator const i = thesaurus.find( phrase );
429+ static thesaurus_data_type const &thesaurus_data = get_thesaurus_data();
430+ thesaurus_data_type::const_iterator const entry = thesaurus_data.find( phrase );
431 iterator::ptr result;
432- if ( i != thesaurus.end() )
433- result.reset( new iterator( *i->second ) );
434+ if ( entry != thesaurus_data.end() )
435+ result.reset( new iterator( *entry->second ) );
436 return std::move( result );
437 }
438
439@@ -572,13 +571,71 @@
440 A real thesaurus would load a large number of synonyms,
441 of course.
442
443+\subsection ft_class_thesaurus_provider The ThesaurusProvider Class
444+
445+The \c ThesaurusProvider class is:
446+
447+\code
448+class ThesaurusProvider : public Resource {
449+public:
450+ typedef /* implementation-defined */ ptr;
451+
452+ virtual bool getThesaurus( locale::iso639_1::type lang, Thesaurus::ptr *thesaurus = 0 ) const = 0;
453+ void destroy() const; // inherited from Resource
454+};
455+\endcode
456+
457+To implement a \c ThesaurusProvider,
458+you need to implement the \c getThesaurus() function where:
459+
460+<table>
461+ <tr>
462+ <td>\c lang</td>
463+ <td>The desired language of the thesaurus.</td>
464+ </tr>
465+ <tr>
466+ <td>\c thesaurus</td>
467+ <td>If not \c null, set to point to a thesaurus for \c lang.</td>
468+ </tr>
469+</table>
470+
471+The \c getThesaurus() function returns \c true
472+only if it can provide a thesaurus for the given language.
473+Continuing with the example,
474+a very simple \c ThesaurusProvider
475+can be implemented as:
476+
477+\code
478+class MyThesaurusProvider : pulic ThesaurusProvider {
479+public:
480+ void destroy() const;
481+ bool getThesaurus( iso639_1::type lang, Thesaurus::ptr* = 0 ) const;
482+};
483+
484+void MyThesaurusProvider::destroy() const {
485+ // Do nothing since we statically allocate a singleton instance of our ThesaurusProvider.
486+}
487+
488+bool MyThesaurusProvider::getThesaurus( iso639_1::type lang, Thesaurus::ptr *result ) const {
489+ //
490+ // Since our tiny thesaurus contains only universally known words, we don't bother checking lang
491+ // and always return true.
492+ //
493+ static MyThesaurus thesaurus;
494+ if ( result )
495+ result->reset( &thesaurus );
496+ return true;
497+}
498+\endcode
499+
500 \subsection ft_class_thesaurus_resolver A Thesaurus URL Resolver Class
501
502-In addition to a \c Thesaurus,
503+In addition to a \c Thesaurus
504+and \c ThesaurusProvider,
505 you must also implement a "thesaurus resolver" class
506 that,
507-given a URL and a language,
508-provides a \c Thesaurus for that language.
509+given a URI,
510+provides a \c ThesaurusProvider for that URI.
511 A simple \c ThesaurusURLResolver
512 for our simple thesaurus can be implemented as:
513
514@@ -591,23 +648,12 @@
515 String const url_;
516 };
517
518-Resource*
519-ThesaurusURLResolver::resolveURL( String const &url, EntityData const *data ) const {
520- ThesaurusEntityData const *const t_data = dynamic_cast<ThesaurusEntityData const*>( data );
521- assert( t_data );
522- static MyThesaurus thesaurus;
523- if ( url == url_ )
524- switch ( t_data->getLanguage() ) {
525- case locale::iso639_1::en:
526- case locale::iso639_1::unknown:
527- //
528- // Here, we could test to ensure that the language of our thesaurus matches the
529- // language sought, but in our case, we want our thesaurus to be used for all
530- // languages since "foo" and "foobar" are universal.
531- //
532- default:
533- return &thesaurus;
534- }
535+Resource* ThesaurusURLResolver::resolveURL( String const &url, EntityData const *data ) const {
536+ if ( data->getKind() == EntityData::THESAURUS )
537+ static MyThesaurusProvider provider;
538+ if ( uri == uri_ )
539+ return &provider;
540+ }
541 return 0;
542 }
543 \endcode
544
545=== modified file 'doc/zorba/ft_tokenizer.dox'
546--- doc/zorba/ft_tokenizer.dox 2012-04-24 12:39:38 +0000
547+++ doc/zorba/ft_tokenizer.dox 2012-04-26 16:53:22 +0000
548@@ -5,14 +5,25 @@
549 The Zorba XQuery processor implements the
550 <a href="http://www.w3.org/TR/xpath-full-text-10/">XQuery and XPath Full Text 1.0</a>
551 specification that, among other things,
552-tokenizes a string into a sequence of tokens.
553-See
554-<a href="http://www.w3.org/TR/xpath-full-text-10/#TokenizationSec">Tokenization</a>.
555-
556-The initial implementation of the toknenizer
557-uses the one provided by the
558-<a href="http://site.icu-project.org/">ICU library</a>.
559-However, you can provide your own tokenizer instead.
560+<a ref="http://www.w3.org/TR/xpath-full-text-10/#TokenizationSec">tokenizes</a>
561+a string into a sequence of tokens.
562+
563+\section ft_tokenizer_tokization Tokenization
564+
565+Using the
566+<a href="http://site.icu-project.org/">ICU library</a>,
567+Zorba's implementation of tokenization
568+considers only alpha-numeric sequences of characters to be part of a token;
569+whitespace and punctuation characters are not
570+and separate tokens.
571+However, alpha-numeric sequences matching the regular expression
572+<code>[0-9][.,][0-9]</code>
573+are retained as part of a token, e.g.:
574+"98.6" and "1,432.58" are tokens.
575+
576+Alternatively,
577+you can implement your own tokenizer
578+by deriving from the \c Tokenizer class.
579
580 \section ft_class_tokenizer The Tokenizer Class
581
582@@ -36,33 +47,43 @@
583
584 class Callback {
585 public:
586- typedef Tokenizer::size_type size_type;;
587+ typedef Tokenizer::size_type size_type;
588
589 virtual ~Callback();
590
591- virtual void operator()( char const *utf8_s, size_type utf8_len,
592- size_type token_no, size_type sent_no, size_type para_no,
593- void *payload = 0 ) = 0;
594- };
595-
596- enum ElementTraceOptions {
597- trace_none = 0x0, // Trace no elements.
598- trace_begin = 0x1, // Trace the beginning of elements.
599- trace_end = 0x2 // Trace the ending of elements.
600- };
601+ virtual void token( char const *utf8_s, size_type utf8_len, locale::iso639_1::type lang,
602+ size_type token_no, size_type sent_no, size_type para_no,
603+ Item const *item = 0 ) = 0;
604+ };
605+
606+ struct Properties {
607+ typedef std::vector<locale::iso639_1::type> languages_type;
608+
609+ bool comments_separate_tokens;
610+ bool elements_separate_tokens;
611+ bool processing_instructions_separate_tokens;
612+ languages_type languages;
613+ char const *uri;
614+ };
615+
616+ virtual void properties( Properties *result ) const = 0;
617
618 virtual void destroy() const = 0;
619- virtual void element( Item const &qname, int trace_options );
620 Numbers& numbers();
621 Numbers const& numbers() const;
622- int trace_options() const;
623-
624- virtual void tokenize( char const *utf8_s, size_type utf8_len, locale::iso639_1::type lang,
625- bool wildcards, Callback &callback, void *payload = 0 ) = 0;
626+
627+ void tokenize_node( Item const &node, locale::iso639_1::type lang, Callback &callback );
628+
629+ virtual void tokenize_string( char const *utf8_s, size_type utf8_len, locale::iso639_1::type lang,
630+ bool wildcards, Callback &callback, Item const *item = 0 ) = 0;
631
632 protected:
633- Tokenizer( Numbers&, int trace_options = trace_none );
634+ Tokenizer( Numbers& );
635 virtual ~Tokenizer();
636+
637+ bool find_lang_attribute( Item const&, locale::iso639_1::type *lang );
638+ virtual void item( Item const&, bool entering );
639+ virtual void tokenize_node_impl( Item const&, locale::iso639_1::type, Callback&, bool tokenize_acp );
640 };
641 \endcode
642
643@@ -76,8 +97,8 @@
644 It simply keeps track of the current
645 token, sentence, and paragraph numbers.
646
647-To implement the \c Tokenizer,
648-you need to implement the \c %tokenize() function where:
649+To implement a \c Tokenizer,
650+you need to implement the \c %tokenize_string() function where:
651
652 <table>
653 <tr>
654@@ -115,9 +136,13 @@
655 </td>
656 </tr>
657 <tr>
658- <td>\c payload</td>
659+ <td>\c item</td>
660 <td>
661- Optional implementation-defined data.
662+ The \c Item whence this token came.
663+ If the token occurred within an element,
664+ the \c Item is the text node.
665+ If the token occurred within an attribute,
666+ the \c Item is the attribute node.
667 </td>
668 </tr>
669 </table>
670@@ -127,21 +152,30 @@
671 However,
672 the things a tokenizer should take into consideration include:
673
674- - Detecting sentence termination ('.', '?', and '!' characters).
675- - Handling floating-point numbers with possible thousands separators
676- in US and European formats, e.g. "98.7", "98,7", "10,000", etc.
677- - Distinguishing '.' used as a sentence terminator
678- from '.' used as a decimal point.
679- - Handling apostrophies, e.g., "men's".
680- - Handling acronyms, e.g., "AT&T".
681-
682-\subsection ft_paragraphs Paragraphs
683+- Detecting sentence termination ('.', '?', and '!' characters).
684+- Handling floating-point numbers with possible thousands separators
685+ in US and European formats, e.g. "98.7", "98,7", "10,000", etc.
686+- Distinguishing '.' used as a sentence terminator
687+ from '.' used as a decimal point.
688+- Handling apostrophies, e.g., "men's".
689+- Handling acronyms, e.g., "AT&T".
690+
691+The task of iterating over an XML element's child nodes
692+is done by \c tokenize_node_impl().
693+Its default implementation
694+treats XML elements, comments, and processing instructions
695+as token separators.
696+(See \ref ft_tokenizer_properties.)
697+If you want to change that,
698+you need to override \c tokenize_node_impl().
699+
700+\subsection ft_tokenizer_paragraphs Paragraphs
701
702 By default,
703 Zorba increments the current paragraph number once
704 for each XML element encountered.
705 However,
706-this doens't work well for mixed content.
707+this doesn't work well for mixed content.
708 For example, in the XHTML:
709 \code
710 <p>The <em>best</em> thing ever!</p>
711@@ -150,31 +184,65 @@
712 but Zorba will consider that 3 paragraphs by default.
713
714 Your tokenizer can take control over when the paragraph number is incremented
715-by passing the bitwise-or
716-of the \c ElementTraceOptions values
717-to the constructor
718-and overriding the \c element() function.
719-The \c element() function is passed the QName of the current XML element
720-and (depending on the initial value passed to the constructor)
721-one of \c trace_begin or \c trace_end.
722-Note that this function is called
723-only if the trace options value
724-passed to the constructor
725-was non-zero.
726+by overriding the \c item() function.
727+The \c item() function is passed the \c Item of the current XML element
728+and whether the item is being entered or exited.
729
730 For example,
731-the \c element() function for tokenizing XHTML
732+the \c item() function for tokenizing XHTML
733 would be along the lines of:
734 \code
735-void MyTokenizer::element( Item const &qname, int trace_options ) {
736- if ( trace_options & trace_end )
737- return;
738- String const name( qname.getLocalName() );
739- if ( /* qname is an XHTML block-level element */ )
740- ++numbers().para;
741+void MyTokenizer::item( Item const &item, bool entering ) {
742+ if ( entering && item.isNode() && item.getNodeKind() == store::StoreConsts::elementNode ) {
743+ Item qname;
744+ item.getNodeName( qname );
745+ if ( /* qname matches an XHTML block-level element's name */ )
746+ ++numbers().para;
747 }
748 \endcode
749
750+\subsection ft_tokenizer_properties Properties
751+
752+To implement a \c Tokenizer,
753+you need also to implement the \c %properties() function
754+that fills in the \c Properties struct where:
755+
756+<table>
757+ <tr>
758+ <td>\c comments_separate_tokens</td>
759+ <td>
760+ If \c true, XML comments separate tokens. For example,
761+ <code>net&lt;!-- --&gt;work</code> would be 2 tokens instead of 1.
762+ </td>
763+ </tr>
764+ <tr>
765+ <td>\c elements_separate_tokens</td>
766+ <td>
767+ If \c true, XML elements separate tokens. For example,
768+ <code>&lt;b&gt;B&lt;/b&gt;old</code> would be 2 tokens instead of 1.
769+ </td>
770+ </tr>
771+ <tr>
772+ <td>\c processing_instructions_separate_tokens</td>
773+ <td>
774+ If \c true, XML processing instructions separate tokens. For example,
775+ <code>net&lt;?PI pi?&gt;work</code> would be 2 tokens instead of 1.
776+ </td>
777+ </tr>
778+ <tr>
779+ <td>\c languages</td>
780+ <td>
781+ The list of languages supported by the tokenizer.
782+ </td>
783+ </tr>
784+ <tr>
785+ <td>\c uri</td>
786+ <td>
787+ The URI that uniquely identifies the %Tokenizer.
788+ </td>
789+ </tr>
790+</table>
791+
792 \section ft_class_tokenizer_provider The TokenizerProviderClass
793
794 In addition to a \c Tokenizer,
795@@ -185,20 +253,51 @@
796 class TokenizerProvider {
797 public:
798 virtual ~TokenizerProvider();
799- virtual Tokenizer::ptr getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers &numbers ) const = 0;
800+ virtual bool getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers *numbers = 0, Tokenizer::ptr* = 0 ) const = 0;
801 };
802 \endcode
803
804+Specifically, you need to implement the \c getTokenizer() function where:
805+
806+<table>
807+ <tr>
808+ <td>\c lang</td>
809+ <td>The language to tokenize.</td>
810+ </tr>
811+ <tr>
812+ <td>\c num</td>
813+ <td>
814+ The \c Numbers to use.
815+ If \c null,
816+ \a t is not set.
817+ </td>
818+ </tr>
819+ <tr>
820+ <td>\c t</td>
821+ <td>
822+ If not \c null,
823+ set to point to a Tokenizer for \a lang.
824+ </td>
825+ </tr>
826+</table>
827+
828 A simple \c TokenizerProvider for our tokenizer can be implemented as:
829
830 \code
831 class MyTokenizerProvider : public TokenizerProvider {
832 public:
833- Tokenizer::ptr getTokenizer( locale::iso639_1::type lang ) const;
834+ getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers* = 0, Tokenizer::ptr* = 0 ) const;
835 };
836
837-Tokenizer::ptr MyTokenizerProvider::getTokenizer( locale::iso639_1::type lang const {
838- return Tokenizer::ptr( new MyTokenizer );
839+bool MyTokenizerProvider::getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers *num, Tokenizer::ptr *t ) const {
840+ switch ( lang ) {
841+ case iso639_1::en:
842+ if ( num && t )
843+ t->reset( new MyTokenizer );
844+ return true;
845+ default:
846+ return false;
847+ }
848 }
849 \endcode
850
851
852=== modified file 'include/zorba/locale.h'
853--- include/zorba/locale.h 2012-04-24 12:39:38 +0000
854+++ include/zorba/locale.h 2012-04-26 16:53:22 +0000
855@@ -22,24 +22,198 @@
856
857 ///////////////////////////////////////////////////////////////////////////
858
859+ /**
860+ * Defines constants for all ISO 639-1 language codes.
861+ */
862 namespace iso639_1 {
863 enum type {
864 unknown,
865- da, // Danish
866- de, // German
867- en, // English
868- es, // Spanish
869- fi, // Finnish
870- fr, // French
871- hu, // Hungarian
872- it, // Italian
873- nl, // Dutch
874- no, // Norwegian
875- pt, // Portuguese
876- ro, // Romanian
877- ru, // Russian
878- sv, // Swedish
879- tr, // Turkish
880+ aa, ///< Afar
881+ ab, ///< Abkhazian
882+ ae, ///< Avestan
883+ af, ///< Afrikaans
884+ ak, ///< Akan
885+ am, ///< Amharic
886+ an, ///< Aragonese
887+ ar, ///< Arabic
888+ as, ///< Assamese
889+ av, ///< Avaric
890+ ay, ///< Aymara
891+ az, ///< Azerbaijani
892+ ba, ///< Bashkir
893+ be, ///< Byelorussian
894+ bg, ///< Bulgarian
895+ bh, ///< Bihari
896+ bi, ///< Bislama
897+ bm, ///< Bambara
898+ bn, ///< Bengali; Bangla
899+ bo, ///< Tibetan
900+ br, ///< Breton
901+ bs, ///< Bosnian
902+ ca, ///< Catalan
903+ ce, ///< Chechen
904+ ch, ///< Chamorro
905+ co, ///< Corsican
906+ cr, ///< Cree
907+ cs, ///< Czech
908+ cu, ///< Church Slavic; Church Slavonic
909+ cv, ///< Chuvash
910+ cy, ///< Welsh
911+ da, ///< Danish
912+ de, ///< German
913+ dv, ///< Divehi
914+ dz, ///< Bhutani
915+ ee, ///< Ewe
916+ el, ///< Greek
917+ en, ///< English
918+ eo, ///< Esperanto
919+ es, ///< Spanish
920+ et, ///< Estonian
921+ eu, ///< Basque
922+ fa, ///< Persian
923+ ff, ///< Fulah
924+ fi, ///< Finnish
925+ fj, ///< Fiji
926+ fo, ///< Faroese
927+ fr, ///< French
928+ fy, ///< Frisian
929+ ga, ///< Irish
930+ gd, ///< Scots Gaelic
931+ gl, ///< Galician
932+ gn, ///< Guarani
933+ gu, ///< Gujarati
934+ gv, ///< Manx
935+ ha, ///< Hausa
936+ he, ///< Hebrew (formerly iw)
937+ hi, ///< Hindi
938+ ho, ///< Hiri Motu
939+ hr, ///< Croatian
940+ ht, ///< Haitian Creole
941+ hu, ///< Hungarian
942+ hy, ///< Armenian
943+ hz, ///< Herero
944+ ia, ///< Interlingua
945+ id, ///< Indonesian (formerly in)
946+ ie, ///< Interlingue
947+ ig, ///< Igbo
948+ ii, ///< Nuosu
949+ ik, ///< Inupiak
950+ io, ///< Ido
951+ is, ///< Icelandic
952+ it, ///< Italian
953+ iu, ///< Inuktitut
954+ ja, ///< Japanese
955+ jv, ///< Javanese
956+ ka, ///< Georgian
957+ kg, ///< Kongo
958+ ki, ///< Gikuyu
959+ kj, ///< Kuanyama
960+ kk, ///< Kazakh
961+ kl, ///< Greenlandic
962+ km, ///< Cambodian
963+ kn, ///< Kannada
964+ ko, ///< Korean
965+ kr, ///< Kanuri
966+ ks, ///< Kashmiri
967+ ku, ///< Kurdish
968+ kv, ///< Komi
969+ kw, ///< Cornish
970+ ky, ///< Kirghiz
971+ la, ///< Latin
972+ lb, ///< Letzeburgesch
973+ lg, ///< Ganda
974+ li, ///< Limburgan; Limburger; Limburgish
975+ ln, ///< Lingala
976+ lo, ///< Laothian
977+ lt, ///< Lithuanian
978+ lu, ///< Luba-Katanga
979+ lv, ///< Latvian
980+ mg, ///< Malagasy
981+ mh, ///< Marshallese
982+ mi, ///< Maori
983+ mk, ///< Macedonian
984+ ml, ///< Malayalam
985+ mn, ///< Mongolian
986+ mo, ///< Moldavian
987+ mr, ///< Marathi
988+ ms, ///< Malay
989+ mt, ///< Maltese
990+ my, ///< Burmese
991+ na, ///< Nauru
992+ nb, ///< Norwegian Bokmal
993+ nd, ///< Ndebele, North
994+ ne, ///< Nepali
995+ ng, ///< Ndonga
996+ nl, ///< Dutch
997+ nn, ///< Norwegian Nynorsk
998+ no, ///< Norwegian
999+ nr, ///< Ndebele, South
1000+ nv, ///< Navajo; Navaho
1001+ ny, ///< Chichewa; Chewa; Nyanja
1002+ oc, ///< Occitan
1003+ oj, ///< Ojibwa
1004+ om, ///< Oromo
1005+ or_, ///< Oriya
1006+ os, ///< Ossetian; Ossetic
1007+ pa, ///< Panjabi; Punjabi
1008+ pi, ///< Pali
1009+ pl, ///< Polish
1010+ ps, ///< Pashto, Pushto
1011+ pt, ///< Portuguese
1012+ qu, ///< Quechua
1013+ rm, ///< Romansh
1014+ rn, ///< Kirundi
1015+ ro, ///< Romanian
1016+ ru, ///< Russian
1017+ rw, ///< Kinyarwanda
1018+ sa, ///< Sanskrit
1019+ sc, ///< Sardinian
1020+ sd, ///< Sindhi
1021+ se, ///< Northern Sami
1022+ sg, ///< Sangho
1023+ sh, ///< Serbo-Croatian
1024+ si, ///< Sinhalese
1025+ sk, ///< Slovak
1026+ sl, ///< Slovenian
1027+ sm, ///< Samoan
1028+ sn, ///< Shona
1029+ so, ///< Somali
1030+ sq, ///< Albanian
1031+ sr, ///< Serbian
1032+ ss, ///< Siswati
1033+ st, ///< Sesotho
1034+ su, ///< Sundanese
1035+ sv, ///< Swedish
1036+ sw, ///< Swahili
1037+ ta, ///< Tamil
1038+ te, ///< Telugu
1039+ tg, ///< Tajik
1040+ th, ///< Thai
1041+ ti, ///< Tigrinya
1042+ tk, ///< Turkmen
1043+ tl, ///< Tagalog
1044+ tn, ///< Setswana
1045+ to, ///< Tonga
1046+ tr, ///< Turkish
1047+ ts, ///< Tsonga
1048+ tt, ///< Tatar
1049+ tw, ///< Twi
1050+ ty, ///< Tahitian
1051+ ug, ///< Uighur
1052+ uk, ///< Ukrainian
1053+ ur, ///< Urdu
1054+ uz, ///< Uzbek
1055+ ve, ///< Venda
1056+ vi, ///< Vietnamese
1057+ vo, ///< Volapuk
1058+ wa, ///< Walloon
1059+ wo, ///< Wolof
1060+ xh, ///< Xhosa
1061+ yi, ///< Yiddish
1062+ yo, ///< Yoruba
1063+ za, ///< Zhuang
1064+ zh, ///< Chinese
1065+ zu, ///< Zulu
1066 NUM_ENTRIES
1067 };
1068 }
1069
1070=== modified file 'include/zorba/pregenerated/diagnostic_list.h'
1071--- include/zorba/pregenerated/diagnostic_list.h 2012-04-24 12:39:38 +0000
1072+++ include/zorba/pregenerated/diagnostic_list.h 2012-04-26 16:53:22 +0000
1073@@ -454,6 +454,14 @@
1074 extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8402_THESAURUS_ENDIANNESS_MISMATCH;
1075
1076 extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8403_THESAURUS_DATA_ERROR;
1077+
1078+extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8404_STEM_LANG_NOT_SUPPORTED;
1079+
1080+extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8405_STOP_WORDS_LANG_NOT_SUPPORTED;
1081+
1082+extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8406_THESAURUS_LANG_NOT_SUPPORTED;
1083+
1084+extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8407_TOKENIZER_LANG_NOT_SUPPORTED;
1085 #endif
1086
1087 extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQD0001_PREFIX_NOT_DECLARED;
1088
1089=== modified file 'include/zorba/stemmer.h'
1090--- include/zorba/stemmer.h 2012-04-24 12:39:38 +0000
1091+++ include/zorba/stemmer.h 2012-04-26 16:53:22 +0000
1092@@ -52,6 +52,23 @@
1093 virtual void destroy() const = 0;
1094
1095 /**
1096+ * Various properties of this %Stemmer.
1097+ */
1098+ struct Properties {
1099+ /**
1100+ * The URI that uniquely identifies this %Stemmer.
1101+ */
1102+ char const *uri;
1103+ };
1104+
1105+ /**
1106+ * Gets the Properties of this %Stemmer.
1107+ *
1108+ * @param result The Properties to populate.
1109+ */
1110+ virtual void properties( Properties *result ) const = 0;
1111+
1112+ /**
1113 * Stems the given word.
1114 *
1115 * @param word The word to stem.
1116@@ -66,7 +83,7 @@
1117 };
1118
1119 /**
1120- * A %StemmerProvider, given an language, provies a stemmer for it.
1121+ * A %StemmerProvider, given a language, provides a Stemmer for it.
1122 */
1123 class ZORBA_DLL_PUBLIC StemmerProvider {
1124 public:
1125@@ -76,10 +93,12 @@
1126 * Gets a Stemmer for the given language.
1127 *
1128 * @param lang The language to get a Stemmer for.
1129- * @return The relevant Stemmer or \c NULL if no stemmer for the given
1130- * language is available.
1131+ * @param s If not \c null, set to point to a Stemmer for \a lang.
1132+ * @return Returns \c true only if this provider can provide a stemmer for
1133+ * \a lang.
1134 */
1135- virtual Stemmer::ptr getStemmer( locale::iso639_1::type lang ) const = 0;
1136+ virtual bool getStemmer( locale::iso639_1::type lang,
1137+ Stemmer::ptr *s = 0 ) const = 0;
1138 };
1139
1140 ///////////////////////////////////////////////////////////////////////////////
1141
1142=== modified file 'include/zorba/thesaurus.h'
1143--- include/zorba/thesaurus.h 2012-04-24 12:39:38 +0000
1144+++ include/zorba/thesaurus.h 2012-04-26 16:53:22 +0000
1145@@ -32,25 +32,13 @@
1146 ///////////////////////////////////////////////////////////////////////////////
1147
1148 /**
1149- * Contains additional data for URIMappers and URLResolvers
1150- * when mapping/resolving a Thesaurus URI.
1151- */
1152-class ZORBA_DLL_PUBLIC ThesaurusEntityData : public EntityData {
1153-public:
1154- /**
1155- * Gets the language for which a thesaurus is being requested.
1156- *
1157- * @return said language.
1158- */
1159- virtual locale::iso639_1::type getLanguage() const = 0;
1160-};
1161-
1162-/**
1163- * A %Thesaurus is-a Resource for thesaurus implementations.
1164- */
1165-class ZORBA_DLL_PUBLIC Thesaurus : public Resource {
1166-public:
1167- typedef std::unique_ptr<Thesaurus,internal::ztd::destroy_delete<Thesaurus> >
1168+ * A %Thesaurus provides a way to look up related phrases for a given phrase.
1169+ */
1170+class ZORBA_DLL_PUBLIC Thesaurus {
1171+public:
1172+ typedef std::unique_ptr<
1173+ Thesaurus const,internal::ztd::destroy_delete<Thesaurus const>
1174+ >
1175 ptr;
1176
1177 /**
1178@@ -88,11 +76,11 @@
1179 * Destroys this %Thesaurus.
1180 * This function is called by Zorba when the %Thesaurus is no longer needed.
1181 *
1182- * If your URLResolver dynamically allocates %Thesaurus objects, then the
1183+ * If your implementation dynamically allocates %Thesaurus objects, then your
1184 * implementation can simply be (and usually is) <code>delete this</code>.
1185 *
1186- * If your URLResolver returns a pointer to a static %Thesaurus object, then
1187- * the implementation should do nothing.
1188+ * If your implementation returns a pointer to a static %Thesaurus object,
1189+ * then your implementation should do nothing.
1190 */
1191 virtual void destroy() const = 0;
1192
1193@@ -119,6 +107,32 @@
1194
1195 ///////////////////////////////////////////////////////////////////////////////
1196
1197+/**
1198+ * A %ThesaurusProvider is-a Resource for providing thesauri for a given
1199+ * language.
1200+ */
1201+class ZORBA_DLL_PUBLIC ThesaurusProvider : public Resource {
1202+public:
1203+ typedef std::unique_ptr<
1204+ ThesaurusProvider const,
1205+ internal::ztd::destroy_delete<ThesaurusProvider const>
1206+ >
1207+ ptr;
1208+
1209+ /**
1210+ * Gets a Thesaurus for the given language.
1211+ *
1212+ * @param lang The desired language of the thesaurus.
1213+ * @param t If not \c null, set to point to a Thesaurus for \a lang.
1214+ * @return Returns \c true only if this provider can provide a thesaurus for
1215+ * \a lang.
1216+ */
1217+ virtual bool getThesaurus( locale::iso639_1::type lang,
1218+ Thesaurus::ptr *t = 0 ) const = 0;
1219+};
1220+
1221+///////////////////////////////////////////////////////////////////////////////
1222+
1223 } // namespace zorba
1224 #endif /* ZORBA_NO_FULL_TEXT */
1225 #endif /* ZORBA_THESAURUS_API_H */
1226
1227=== modified file 'include/zorba/tokenizer.h'
1228--- include/zorba/tokenizer.h 2012-04-24 12:39:38 +0000
1229+++ include/zorba/tokenizer.h 2012-04-26 16:53:22 +0000
1230@@ -18,6 +18,8 @@
1231 #ifndef ZORBA_TOKENIZER_API_H
1232 #define ZORBA_TOKENIZER_API_H
1233
1234+#include <vector>
1235+
1236 #include <zorba/config.h>
1237 #include <zorba/locale.h>
1238 #include <zorba/internal/unique_ptr.h>
1239@@ -67,8 +69,6 @@
1240 * A %Callback is called once per token.
1241 * This is only internally by Zorba.
1242 * You do not need to derive from this class.
1243- * The only thing you need to do is call the callback's \c operator() once
1244- * for each token you parse in \c tokenize().
1245 */
1246 class Callback {
1247 public:
1248@@ -77,19 +77,75 @@
1249 virtual ~Callback();
1250
1251 /**
1252+ * This member-function is called whenever an item that is being tokenized
1253+ * is entered or exited.
1254+ *
1255+ * @param item The item being entered or exited.
1256+ * @param entering If \c true, the item is being entered; if \c false, the
1257+ * item is being exited.
1258+ */
1259+ virtual void item( Item const &item, bool entering );
1260+
1261+ /**
1262 * This member-function is called once per token.
1263 *
1264 * @param utf8_s The UTF-8 token string. It is not null-terminated.
1265 * @param utf8_len The number of bytes in the token string.
1266+ * @param lang The language of the token.
1267 * @param token_no The token number. Token numbers start at 0.
1268 * @param sent_no The sentence number. Sentence numbers start at 1.
1269 * @param para_no The paragraph number. Paragraph numbers start at 1.
1270- * @param payload Optional user-defined data.
1271- */
1272- virtual void operator()( char const *utf8_s, size_type utf8_len,
1273- size_type token_no, size_type sent_no,
1274- size_type para_no, void *payload = 0 ) = 0;
1275- };
1276+ * @param item The Item this token is from, if any.
1277+ */
1278+ virtual void token( char const *utf8_s, size_type utf8_len,
1279+ locale::iso639_1::type lang,
1280+ size_type token_no, size_type sent_no,
1281+ size_type para_no, Item const *item = 0 ) = 0;
1282+ };
1283+
1284+ /////////////////////////////////////////////////////////////////////////////
1285+
1286+ /**
1287+ * Various properties of this %Tokenizer.
1288+ */
1289+ struct Properties {
1290+ typedef std::vector<locale::iso639_1::type> languages_type;
1291+
1292+ /**
1293+ * If \c true, XML comments separate tokens. For example,
1294+ * \c net&lt;!----&gt;work would be 2 tokens instead of 1.
1295+ */
1296+ bool comments_separate_tokens;
1297+
1298+ /**
1299+ * If \c true, XML elements separate tokens. For example,
1300+ * \c &lt;b&gt;B&lt;/b&gt;old would be 2 tokens instead of 1.
1301+ */
1302+ bool elements_separate_tokens;
1303+
1304+ /**
1305+ * If \c true, XML processing instructions separate tokens. For example,
1306+ * <code>net<?PI pi?>work</code> would be 2 tokens instead of 1.
1307+ */
1308+ bool processing_instructions_separate_tokens;
1309+
1310+ /**
1311+ * The set of languages supported.
1312+ */
1313+ languages_type languages;
1314+
1315+ /**
1316+ * The URI that uniquely identifies this %Tokenizer.
1317+ */
1318+ char const* uri;
1319+ };
1320+
1321+ /**
1322+ * Gets the Properties of this %Tokenizer.
1323+ *
1324+ * @param result The Properties to populate.
1325+ */
1326+ virtual void properties( Properties *result ) const = 0;
1327
1328 /////////////////////////////////////////////////////////////////////////////
1329
1330@@ -106,39 +162,6 @@
1331 virtual void destroy() const = 0;
1332
1333 /**
1334- * Trace options for XML elements combined via bitwise-or.
1335- */
1336- enum ElementTraceOptions {
1337- trace_none = 0x0, ///< Trace no elements.
1338- trace_begin = 0x1, ///< Trace the beginning of elements.
1339- trace_end = 0x2 ///< Trace the ending of elements.
1340- };
1341-
1342- /**
1343- * Gets the trace options. If the value is \c trace_none, then the paragraph
1344- * number will be incremented upon entering an XML element; if the value is
1345- * anything other than \c trace_none, then the tokenizer assumes
1346- * responsibility for incrementing the paragraph number.
1347- *
1348- * @return Returns said options.
1349- */
1350- int trace_options() const {
1351- return trace_options_;
1352- }
1353-
1354- /**
1355- * This function is called whenever an XML element is entered during
1356- * tokenization. Note that this function is called only if \c
1357- * trace_options() returns non-zero.
1358- *
1359- * @param qname The element's QName.
1360- * @param trace_options The bitwise-or of the trace option(s) in effect for a
1361- * particular call.
1362- * @see trace_options()
1363- */
1364- virtual void element( Item const &qname, int trace_options );
1365-
1366- /**
1367 * Gets this %Tokenizer's associated Numbers.
1368 *
1369 * @return Returns said Numbers.
1370@@ -153,6 +176,16 @@
1371 Numbers const& numbers() const;
1372
1373 /**
1374+ * Tokenizes the given node.
1375+ *
1376+ * @param node The node to tokenize.
1377+ * @param lang The default language to use.
1378+ * @param callback The Callback to call once per token.
1379+ */
1380+ void tokenize_node( Item const &node, locale::iso639_1::type lang,
1381+ Callback &callback );
1382+
1383+ /**
1384 * Tokenizes the given string.
1385 *
1386 * @param utf8_s The UTF-8 string to tokenize. It need not be
1387@@ -162,11 +195,11 @@
1388 * @param wildcards If \c true, allows XQuery wildcard syntax characters to
1389 * be part of tokens.
1390 * @param callback The Callback to call once per token.
1391- * @param payload Optional user-defined data.
1392+ * @param item The Item this string is from, if any.
1393 */
1394- virtual void tokenize( char const *utf8_s, size_type utf8_len,
1395- locale::iso639_1::type lang, bool wildcards,
1396- Callback &callback, void *payload = 0 ) = 0;
1397+ virtual void tokenize_string( char const *utf8_s, size_type utf8_len,
1398+ locale::iso639_1::type lang, bool wildcards,
1399+ Callback &callback, Item const *item = 0 ) = 0;
1400
1401 /////////////////////////////////////////////////////////////////////////////
1402
1403@@ -175,27 +208,71 @@
1404 * Constructs a %Tokenizer.
1405 *
1406 * @param numbers the Numbers to use.
1407- * @param trace_options The bitwise-or of the available trace options, if
1408- * any.
1409 */
1410- Tokenizer( Numbers &numbers, int trace_options = trace_none );
1411+ Tokenizer( Numbers &numbers );
1412
1413 /**
1414 * Destroys a %Tokenizer.
1415 */
1416 virtual ~Tokenizer() = 0;
1417
1418+ /**
1419+ * Given an element, finds its \c xml:lang attribute, if any, and gets its
1420+ * value.
1421+ *
1422+ * @param element The element to check.
1423+ * @param lang A pointer to where to put the found language, if any.
1424+ * @return Returns \c true only if an \c xml:lang attribute is found and the
1425+ * value is a known language.
1426+ */
1427+ bool find_lang_attribute( Item const &element, locale::iso639_1::type *lang );
1428+
1429+ /**
1430+ * This member-function is called whenever an item that is being tokenized is
1431+ * entered or exited.
1432+ *
1433+ * @param item The item being entered or exited.
1434+ * @param entering If \c true, the item is being entered; if \c false, the
1435+ * item is being exited.
1436+ */
1437+ virtual void item( Item const &item, bool entering );
1438+
1439+ /**
1440+ * Tokenizes the given node and all of its child nodes, if any. For each
1441+ * node, it is required that this function call the item() member function of
1442+ * both this %Tokenizer and of the Callback twice, once each for entrance and
1443+ * exit.
1444+ *
1445+ * @param node The node to tokenize.
1446+ * @param lang The default language to use.
1447+ * @param callback The Callback to call per token.
1448+ * @param tokenize_acp If \c true, additionally tokenize all attribute,
1449+ * comment, and processing-instruction nodes encountered;
1450+ * if \c false, skip them.
1451+ */
1452+ virtual void tokenize_node_impl( Item const &node,
1453+ locale::iso639_1::type lang,
1454+ Callback &callback, bool tokenize_acp );
1455+
1456 private:
1457- int trace_options_;
1458- Numbers *no_;
1459+ Numbers *numbers_;
1460 };
1461
1462+inline Tokenizer::Tokenizer( Numbers &numbers ) : numbers_( &numbers ) {
1463+}
1464+
1465 inline Tokenizer::Numbers& Tokenizer::numbers() {
1466- return *no_;
1467+ return *numbers_;
1468 }
1469
1470 inline Tokenizer::Numbers const& Tokenizer::numbers() const {
1471- return *no_;
1472+ return *numbers_;
1473+}
1474+
1475+inline void Tokenizer::tokenize_node( Item const &item,
1476+ locale::iso639_1::type lang,
1477+ Callback &callback ) {
1478+ tokenize_node_impl( item, lang, callback, true );
1479 }
1480
1481 ///////////////////////////////////////////////////////////////////////////////
1482@@ -211,11 +288,14 @@
1483 * Creates a new %Tokenizer.
1484 *
1485 * @param lang The language of the text that the tokenizer will tokenize.
1486- * @param numbers The Numbers to use.
1487- * @return Returns said %Tokenizer.
1488+ * @param numbers The Numbers to use. If \c null, \a t is not set.
1489+ * @param t If not \c null, set to point to a Tokenizer for \a lang.
1490+ * @return Returns \c true only if this provider can provide a tokenizer for
1491+ * \a lang.
1492 */
1493- virtual Tokenizer::ptr getTokenizer( locale::iso639_1::type lang,
1494- Tokenizer::Numbers &numbers ) const = 0;
1495+ virtual bool getTokenizer( locale::iso639_1::type lang,
1496+ Tokenizer::Numbers *numbers = 0,
1497+ Tokenizer::ptr *t = 0 ) const = 0;
1498 };
1499
1500 ///////////////////////////////////////////////////////////////////////////////
1501
1502=== modified file 'include/zorba/uri_resolvers.h'
1503--- include/zorba/uri_resolvers.h 2012-04-24 12:39:38 +0000
1504+++ include/zorba/uri_resolvers.h 2012-04-26 16:53:22 +0000
1505@@ -50,7 +50,8 @@
1506 class ZORBA_DLL_PUBLIC Resource
1507 {
1508 public:
1509- typedef std::unique_ptr<Resource,internal::ztd::destroy_delete<Resource> > ptr;
1510+ typedef std::unique_ptr<Resource,internal::ztd::destroy_delete<Resource> >
1511+ ptr;
1512
1513 virtual ~Resource() = 0;
1514
1515@@ -172,8 +173,8 @@
1516 * object itself will be discarded.
1517 *
1518 * In any case, if they create a Resource, Zorba will take memory
1519- * ownership of the Resource and delete it when it is no longer
1520- * needed.
1521+ * ownership of the Resource and delete it (by calling destroy() on it)
1522+ * when it is no longer needed.
1523 */
1524 virtual Resource* resolveURL(const zorba::String& aUrl,
1525 EntityData const* aEntityData) = 0;
1526
1527=== modified file 'modules/com/zorba-xquery/www/modules/CMakeLists.txt'
1528--- modules/com/zorba-xquery/www/modules/CMakeLists.txt 2012-04-24 12:39:38 +0000
1529+++ modules/com/zorba-xquery/www/modules/CMakeLists.txt 2012-04-26 16:53:22 +0000
1530@@ -72,6 +72,13 @@
1531 DECLARE_ZORBA_MODULE(FILE xqdoc.xq VERSION 2.0
1532 URI "http://www.zorba-xquery.com/modules/xqdoc")
1533
1534+IF(NOT ZORBA_NO_FULL_TEXT)
1535+ DECLARE_ZORBA_MODULE(FILE full-text.xq VERSION 2.0
1536+ URI "http://www.zorba-xquery.com/modules/full-text")
1537+ DECLARE_ZORBA_SCHEMA(FILE full-text.xsd
1538+ URI "http://www.zorba-xquery.com/modules/full-text")
1539+ENDIF(NOT ZORBA_NO_FULL_TEXT)
1540+
1541 # Subdirectories
1542 DECLARE_ZORBA_MODULE(FILE converters/base64.xq VERSION 2.0
1543 URI "http://www.zorba-xquery.com/modules/converters/base64")
1544
1545=== added file 'modules/com/zorba-xquery/www/modules/full-text.xq'
1546--- modules/com/zorba-xquery/www/modules/full-text.xq 1970-01-01 00:00:00 +0000
1547+++ modules/com/zorba-xquery/www/modules/full-text.xq 2012-04-26 16:53:22 +0000
1548@@ -0,0 +1,872 @@
1549+xquery version "3.0";
1550+
1551+(:
1552+ : Copyright 2006-2011 The FLWOR Foundation.
1553+ :
1554+ : Licensed under the Apache License, Version 2.0 (the "License");
1555+ : you may not use this file except in compliance with the License.
1556+ : You may obtain a copy of the License at
1557+ :
1558+ : http://www.apache.org/licenses/LICENSE-2.0
1559+ :
1560+ : Unless required by applicable law or agreed to in writing, software
1561+ : distributed under the License is distributed on an "AS IS" BASIS,
1562+ : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1563+ : See the License for the specific language governing permissions and
1564+ : limitations under the License.
1565+ :)
1566+
1567+(:===========================================================================:)
1568+
1569+(:~
1570+ : This module provides an XQuery API to full-text functions.
1571+ : For general information about Zorba's implementation of the
1572+ : <a href="http://www.w3.org/TR/xpath-full-text-10/">XQuery and XPath Full Text 1.0 specification</a>
1573+ : as well as instructions for building an installing a thesaurus,
1574+ : see the <a href="http://www.zorba-xquery.com/html/documentation/latest/zorba/ft_thesaurus">Full Text Thesaurus documentation</a>.
1575+ : <h2>Notes on languages</h2>
1576+ : To refer to paricular human languages,
1577+ : Zorba uses both the
1578+ : <a href="http://en.wikipedia.org/wiki/ISO_639-1">ISO 639-1</a>
1579+ : and
1580+ : <a href="http://en.wikipedia.org/wiki/ISO_639-2">ISO 639-2</a>
1581+ : languages codes.
1582+ : Note that Zorba supports only a subset of the
1583+ : <a href="http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes">complete list of language codes</a>
1584+ : and not every function supports the same subset.
1585+ : <p/>
1586+ : Most functions in this module take a language as a parameter
1587+ : using the
1588+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>
1589+ : XML schema data type.
1590+ : <h2>Notes on stemming</h2>
1591+ : The <code>stem()</code> functions return the
1592+ : <a href="http://en.wikipedia.org/wiki/Word_stem">stem</a>
1593+ : of a word.
1594+ : In Zorba,
1595+ : the stem of a word itself, however, is not guaranteed to be a word.
1596+ : It is best to consider a stem as an opaque byte sequence.
1597+ : All that is guaranteed about a stem is that,
1598+ : for a given word,
1599+ : the stem of that word will always be the same byte sequence.
1600+ : Hence,
1601+ : you sould never compare the result of one of the <code>stem()</code>
1602+ : functions against a non-stemmed string,
1603+ : for example:
1604+ : <pre>
1605+ : if ( ft:stem( "apples" ) eq "apple" ) ** WRONG **
1606+ : </pre>
1607+ : Instead do:
1608+ : <pre>
1609+ : if ( ft:stem( "apples" ) eq ft:stem( "apple" ) ) ** CORRECT **
1610+ : </pre>
1611+ : <h2>Notes on the thesaurus</h2>
1612+ : The <code>thesaurus-lookup()</code> functions have "levels"
1613+ : and "relationship" parameters.
1614+ : The values for these are implementation-defined.
1615+ : Zorba's default implementation uses the
1616+ : <a href="http://wordnet.princeton.edu/">WordNet lexical database</a>,
1617+ : version 3.0.
1618+ : <p/>
1619+ : In WordNet,
1620+ : the number of "levels" that two phrases are apart
1621+ : are how many hierarchical meanings apart they are.
1622+ : For example,
1623+ : "canary" is 5 levels away from "vertebrate"
1624+ : (carary &gt; finch &gt; oscine &gt; passerine &gt; bird &gt; vertebrate).
1625+ : <p/>
1626+ : When using the WordNet implementation,
1627+ : Zorba supports all of the relationships (and their abbreviations)
1628+ : specified by
1629+ : <a href="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=7776">ISO 2788</a>
1630+ : and
1631+ : <a href="http://www.niso.org/kst/reports/standards?step=2&amp;gid=&amp;project_key=7cc9b583cb5a62e8c15d3099e0bb46bbae9cf38a">ANSI/NISO Z39.19-2005</a>
1632+ : with the exceptions of "HN" (history note)
1633+ : and "X SN" (see scope note for).
1634+ : These relationships are:
1635+ : <table>
1636+ : <tr>
1637+ : <th>Rel.</th>
1638+ : <th>Meaning</th>
1639+ : <th>WordNet Rel.</th>
1640+ : </tr>
1641+ : <tr>
1642+ : <td>BT</td>
1643+ : <td>broader term</td>
1644+ : <td>hypernym</td>
1645+ : </tr>
1646+ : <tr>
1647+ : <td>BTG</td>
1648+ : <td>broader term generic</td>
1649+ : <td>hypernym</td>
1650+ : </tr>
1651+ : <tr>
1652+ : <td>BTI</td>
1653+ : <td>broader term instance</td>
1654+ : <td>instance hypernym</td>
1655+ : </tr>
1656+ : <tr>
1657+ : <td>BTP</td>
1658+ : <td>broader term partitive</td>
1659+ : <td>part meronym</td>
1660+ : </tr>
1661+ : <tr>
1662+ : <td>NT</td>
1663+ : <td>narrower term</td>
1664+ : <td>hyponym</td>
1665+ : </tr>
1666+ : <tr>
1667+ : <td>NTG</td>
1668+ : <td>narrower term generic</td>
1669+ : <td>hyponym</td>
1670+ : </tr>
1671+ : <tr>
1672+ : <td>NTI</td>
1673+ : <td>narrower term instance</td>
1674+ : <td>instance hyponym</td>
1675+ : </tr>
1676+ : <tr>
1677+ : <td>NTP</td>
1678+ : <td>narrower term partitive</td>
1679+ : <td>part holonym</td>
1680+ : </tr>
1681+ : <tr>
1682+ : <td>RT</td>
1683+ : <td>related term</td>
1684+ : <td>also see</td>
1685+ : </tr>
1686+ : <tr>
1687+ : <td>SN</td>
1688+ : <td>scope note</td>
1689+ : <td>n/a</td>
1690+ : </tr>
1691+ : <tr>
1692+ : <td>TT</td>
1693+ : <td>top term</td>
1694+ : <td>hypernym</td>
1695+ : </tr>
1696+ : <tr>
1697+ : <td>UF</td>
1698+ : <td>non-preferred term</td>
1699+ : <td>n/a</td>
1700+ : </tr>
1701+ : <tr>
1702+ : <td>USE</td>
1703+ : <td>preferred term</td>
1704+ : <td>n/a</td>
1705+ : </tr>
1706+ : </table>
1707+ : Note that you can specify relationships
1708+ : either by their abbreviation
1709+ : or their meaning.
1710+ : Relationships are case-insensitive.
1711+ :
1712+ : In addition to the
1713+ : <a href="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=7776">ISO 2788</a>
1714+ : and
1715+ : <a href="http://www.niso.org/kst/reports/standards?step=2&amp;gid=&amp;project_key=7cc9b583cb5a62e8c15d3099e0bb46bbae9cf38a">ANSI/NISO Z39.19-2005</a>
1716+ : relationships,
1717+ : Zorba also supports all of the relationships offered by WordNet.
1718+ : These relationships are:
1719+ : <table class="ft_rels">
1720+ : <tr>
1721+ : <th>Relationship</th>
1722+ : <th>Meaning</th>
1723+ : </tr>
1724+ : <tr>
1725+ : <td nowrap="nowrap">also see</td>
1726+ : <td>
1727+ : A word that is related to another,
1728+ : e.g., for "varnished" (furniture)
1729+ : one should <em>also see</em> "finished."
1730+ : </td>
1731+ : </tr>
1732+ : <tr>
1733+ : <td>antonym</td>
1734+ : <td>
1735+ : A word opposite in meaning to another,
1736+ : e.g., "light" is an <em>antonym</em> for "heavy."
1737+ : </td>
1738+ : </tr>
1739+ : <tr>
1740+ : <td>attribute</td>
1741+ : <td>
1742+ : A noun for which adjectives express values,
1743+ : e.g., "weight" is an <em>attribute</em>
1744+ : for which the adjectives "light" and "heavy"
1745+ : express values.
1746+ : </td>
1747+ : </tr>
1748+ : <tr>
1749+ : <td>cause</td>
1750+ : <td>
1751+ : A verb that causes another,
1752+ : e.g., "show" is a <em>cause</em> of "see."
1753+ : </td>
1754+ : </tr>
1755+ : <tr>
1756+ : <td nowrap="nowrap">derivationally related form</td>
1757+ : <td>
1758+ : A word that is derived from a root word,
1759+ : e.g., "metric" is a <em>derivationally related form</em> of "meter."
1760+ : </td>
1761+ : </tr>
1762+ : <tr>
1763+ : <td nowrap="nowrap">derived from adjective</td>
1764+ : <td>
1765+ : An adverb that is derived from an adjective,
1766+ : e.g., "correctly" is <em>derived from the adjective</em> "correct."
1767+ : </td>
1768+ : </tr>
1769+ : <tr>
1770+ : <td>entailment</td>
1771+ : <td>
1772+ : A verb that presupposes another,
1773+ : e.g., "snoring" <em>entails</em> "sleeping."
1774+ : </td>
1775+ : </tr>
1776+ : <tr>
1777+ : <td>hypernym</td>
1778+ : <td>
1779+ : A word with a broad meaning that more specific words fall under,
1780+ : e.g., "meal" is a <em>hypernym</em> of "breakfast."
1781+ : </td>
1782+ : </tr>
1783+ : <tr>
1784+ : <td>hyponym</td>
1785+ : <td>
1786+ : A word of more specific meaning than a general term applicable to it,
1787+ : e.g., "breakfast" is a <em>hyponym</em> of "meal."
1788+ : </td>
1789+ : </tr>
1790+ : <tr>
1791+ : <td nowrap="nowrap">instance hypernym</td>
1792+ : <td>
1793+ : A word that denotes a category of some specific instance,
1794+ : e.g., "author" is an <em>instance hypernym</em> of "Asimov."
1795+ : </td>
1796+ : </tr>
1797+ : <tr>
1798+ : <td nowrap="nowrap">instance hyponym</td>
1799+ : <td>
1800+ : A term that donotes a specific instance of some general category,
1801+ : e.g., "Asimov" is an <em>instance hyponym</em> of "author."
1802+ : </td>
1803+ : </tr>
1804+ : <tr>
1805+ : <td nowrap="nowrap">member holonym</td>
1806+ : <td>
1807+ : A word that denotes a collection of individuals,
1808+ : e.g., "faculty" is a <em>member holonym</em> of "professor."
1809+ : </td>
1810+ : </tr>
1811+ : <tr>
1812+ : <td nowrap="nowrap">member meronym</td>
1813+ : <td>
1814+ : A word that denotes a member of a larger group,
1815+ : e.g., a "person" is a <em>member meronym</em> of a "crowd."
1816+ : </td>
1817+ : </tr>
1818+ : <tr>
1819+ : <td nowrap="nowrap">part holonym</td>
1820+ : <td>
1821+ : A word that denotes a larger whole comprised of some part,
1822+ : e.g., "car" is a <em>part holonym</em> of "engine."
1823+ : </td>
1824+ : </tr>
1825+ : <tr>
1826+ : <td nowrap="nowrap">part meronym</td>
1827+ : <td>
1828+ : A word that denotes a part of a larger whole,
1829+ : e.g., an "engine" is <em>part meronym</em> of a "car."
1830+ : </td>
1831+ : </tr>
1832+ : <tr>
1833+ : <td nowrap="nowrap">participle of verb</td>
1834+ : <td>
1835+ : An adjective that is the participle of some verb,
1836+ : e.g., "breaking" is the <em>participle of the verb</em> "break."
1837+ : </td>
1838+ : </tr>
1839+ : <tr>
1840+ : <td>pertainym</td>
1841+ : <td>
1842+ : An adjective that classifies its noun,
1843+ : e.g., "musical" is a <em>pertainym</em> in "musical instrument."
1844+ : </td>
1845+ : </tr>
1846+ : <tr>
1847+ : <td nowrap="nowrap">similar to</td>
1848+ : <td>
1849+ : Similar, though not necessarily interchangeable, adjectives.
1850+ : For example, "shiny" is <em>similar to</em> "bright",
1851+ : but they have subtle differences.
1852+ : </td>
1853+ : </tr>
1854+ : <tr>
1855+ : <td nowrap="nowrap">substance holonym</td>
1856+ : <td>
1857+ : A word that denotes a larger whole containing some constituent
1858+ : substance, e.g., "bread" is a <em>substance holonym</em> of "flour."
1859+ : </td>
1860+ : </tr>
1861+ : <tr>
1862+ : <td nowrap="nowrap">substance meronym</td>
1863+ : <td>
1864+ : A word that denotes a constituant substance of some larger whole,
1865+ : e.g., "flour" is a <em>substance meronym</em> of "bread."
1866+ : </td>
1867+ : </tr>
1868+ : <tr>
1869+ : <td nowrap="nowrap">verb group</td>
1870+ : <td>
1871+ : A verb that is a member of a group of similar verbs,
1872+ : e.g., "live" is in the <em>verb group</em>
1873+ : of "dwell", "live", "inhabit", etc.
1874+ : </td>
1875+ : </tr>
1876+ : </table>
1877+ : <h2>Notes on tokenization</h2>
1878+ : For general information about Zorba's implementation of tokenization,
1879+ : including what constitutes a token,
1880+ : see the <a href="http://www.zorba-xquery.com/html/documentation/latest/zorba/ft_tokenizer">Full Text Tokenizer</a> documentation.
1881+ :)
1882+
1883+(:===========================================================================:)
1884+
1885+module namespace ft = "http://www.zorba-xquery.com/modules/full-text";
1886+
1887+import schema namespace ft-schema =
1888+ "http://www.zorba-xquery.com/modules/full-text";
1889+
1890+declare namespace err = "http://www.w3.org/2005/xqt-errors";
1891+declare namespace zerr = "http://www.zorba-xquery.com/errors";
1892+
1893+declare namespace ver = "http://www.zorba-xquery.com/options/versioning";
1894+declare option ver:module-version "2.0";
1895+
1896+(:===========================================================================:)
1897+
1898+(:~
1899+ : Predeclared constant for the Danish
1900+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1901+ :)
1902+declare variable $ft:lang-da as xs:language := xs:language("da");
1903+
1904+(:~
1905+ : Predeclared constant for the German
1906+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1907+ :)
1908+declare variable $ft:lang-de as xs:language := xs:language("de");
1909+
1910+(:~
1911+ : Predeclared constant for the English
1912+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1913+ :)
1914+declare variable $ft:lang-en as xs:language := xs:language("en");
1915+
1916+(:~
1917+ : Predeclared constant for the Spanish
1918+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1919+ :)
1920+declare variable $ft:lang-es as xs:language := xs:language("es");
1921+
1922+(:~
1923+ : Predeclared constant for the Finnish
1924+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1925+ :)
1926+declare variable $ft:lang-fi as xs:language := xs:language("fi");
1927+
1928+(:~
1929+ : Predeclared constant for the French
1930+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1931+ :)
1932+declare variable $ft:lang-fr as xs:language := xs:language("fr");
1933+
1934+(:~
1935+ : Predeclared constant for the Hungarian
1936+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1937+ :)
1938+declare variable $ft:lang-hu as xs:language := xs:language("hu");
1939+
1940+(:~
1941+ : Predeclared constant for the Italian
1942+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1943+ :)
1944+declare variable $ft:lang-it as xs:language := xs:language("it");
1945+
1946+(:~
1947+ : Predeclared constant for the Dutch
1948+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1949+ :)
1950+declare variable $ft:lang-nl as xs:language := xs:language("nl");
1951+
1952+(:~
1953+ : Predeclared constant for the Norwegian
1954+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1955+ :)
1956+declare variable $ft:lang-no as xs:language := xs:language("no");
1957+
1958+(:~
1959+ : Predeclared constant for the Portuguese
1960+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1961+ :)
1962+declare variable $ft:lang-pt as xs:language := xs:language("pt");
1963+
1964+(:~
1965+ : Predeclared constant for the Romanian
1966+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1967+ :)
1968+declare variable $ft:lang-ro as xs:language := xs:language("ro");
1969+
1970+(:~
1971+ : Predeclared constant for the Russian
1972+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1973+ :)
1974+declare variable $ft:lang-ru as xs:language := xs:language("ru");
1975+
1976+(:~
1977+ : Predeclared constant for the Swedish
1978+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1979+ :)
1980+declare variable $ft:lang-sv as xs:language := xs:language("sv");
1981+
1982+(:~
1983+ : Predeclared constant for the Turkish
1984+ : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
1985+ :)
1986+declare variable $ft:lang-tr as xs:language := xs:language("tr");
1987+
1988+(:===========================================================================:)
1989+
1990+(:~
1991+ : Gets the current
1992+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>:
1993+ : either the langauge specified by the
1994+ : <code><a href="http://www.w3.org/TR/xpath-full-text-10/#doc-xquery10-FTOptionDecl">declare ft-option using</a>
1995+ : <a href="http://www.w3.org/TR/xpath-full-text-10/#ftlanguageoption">language</a></code>
1996+ : statement (if any)
1997+ : or the one returned by <code>ft:host-lang()</code> (if none).
1998+ :
1999+ : @return said language.
2000+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-current-lang-true-1.xq
2001+ :)
2002+declare function ft:current-lang()
2003+ as xs:language external;
2004+
2005+(:~
2006+ : Gets the host's current
2007+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>.
2008+ : The "host" is the computer on which Zorba is running.
2009+ : The host's current language is obtained as follows:
2010+ : <ul>
2011+ : <li>
2012+ : For *nix systems:
2013+ : <ol>
2014+ : <li>
2015+ : If <a ref="http://www.cplusplus.com/reference/clibrary/clocale/setlocale/"><code>setlocale</code>(3)</a> returns non-null,
2016+ : the language corresponding to that locale is used.
2017+ : </li>
2018+ : <li>
2019+ : Else, if the <code>LANG</code> environment variable is set,
2020+ : that language is ued.
2021+ : </li>
2022+ : <li>
2023+ : Otherwise, there is no default language.
2024+ : </li>
2025+ : </ol>
2026+ : </li>
2027+ : <li>
2028+ : For Windows systems,
2029+ : the language corresponding to the locale returned by the
2030+ : <a href="http://msdn.microsoft.com/en-us/library/windows/desktop/dd318101(v=vs.85).aspx"><code>GetLocaleInfo()</code></a>
2031+ : function is used.
2032+ : </li>
2033+ : </ul>
2034+ :
2035+ : @return said language.
2036+ :)
2037+declare function ft:host-lang()
2038+ as xs:language external;
2039+
2040+(:~
2041+ : Checks whether the given
2042+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2043+ : is supported for stemming.
2044+ :
2045+ : @param $lang The language to check.
2046+ : @return <code>true</code> only if the language is supported.
2047+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-es-supported-true.xq
2048+ :)
2049+declare function ft:is-stem-lang-supported( $lang as xs:language )
2050+ as xs:boolean external;
2051+
2052+(:~
2053+ : Checks whether the given word is a stop-word.
2054+ :
2055+ : @param $word The word to check.
2056+ : @param $lang The
2057+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2058+ : of <code>$word</code>.
2059+ : @return <code>true</code> only if <code>$word</code> is a stop-word.
2060+ : @error zerr:ZXQP8405 if <code>$lang</code> is not supported for stop-words.
2061+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-1.xq
2062+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-3.xq
2063+ :)
2064+declare function ft:is-stop-word( $word as xs:string, $lang as xs:language )
2065+ as xs:boolean external;
2066+
2067+(:~
2068+ : Checks whether the given word is a stop-word.
2069+ :
2070+ : @param $word The word to check.
2071+ : The word's <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2072+ : is assumed to be the one returned by <code>ft:current-lang()</code>.
2073+ : @return <code>true</code> only if <code>$word</code> is a stop-word.
2074+ : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
2075+ : general.
2076+ : @error zerr:ZXQP8405 if <code>ft:current-lang()</code> is not supported for
2077+ : stop-words specifically.
2078+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-2.xq
2079+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-4.xq
2080+ :)
2081+declare function ft:is-stop-word( $word as xs:string )
2082+ as xs:boolean external;
2083+
2084+(:~
2085+ : Checks whether the given
2086+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2087+ : is supported for stop words.
2088+ :
2089+ : @param $lang The language to check.
2090+ : @return <code>true</code> only if the language is supported.
2091+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-en-supported-true.xq
2092+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-1.xq
2093+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-2.xq
2094+ :)
2095+declare function ft:is-stop-word-lang-supported( $lang as xs:language )
2096+ as xs:boolean external;
2097+
2098+(:~
2099+ : Checks whether the given
2100+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2101+ : is supported for look-up using the default thesaurus.
2102+ :
2103+ : @param $lang The language to check.
2104+ : @return <code>true</code> only if the language is supported.
2105+ :)
2106+declare function ft:is-thesaurus-lang-supported( $lang as xs:language )
2107+ as xs:boolean external;
2108+
2109+(:~
2110+ : Checks whether the given
2111+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2112+ : is supported for look-up using the thesaurus specified by the given URI.
2113+ :
2114+ : @param $uri The URI specifying the thesaurus to use.
2115+ : @param $lang The language to check.
2116+ : @return <code>true</code> only if the language is supported.
2117+ : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus
2118+ : that is not found in the statically known thesauri.
2119+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-1.xq
2120+ :)
2121+declare function ft:is-thesaurus-lang-supported( $uri as xs:string,
2122+ $lang as xs:language )
2123+ as xs:boolean external;
2124+
2125+(:~
2126+ : Checks whether the given
2127+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2128+ : is supported for tokenization.
2129+ :
2130+ : @param $lang The language to check.
2131+ : @return <code>true</code> only if the language is supported.
2132+ :)
2133+declare function ft:is-tokenizer-lang-supported( $lang as xs:language )
2134+ as xs:boolean external;
2135+
2136+(:~
2137+ : Stems the given word.
2138+ :
2139+ : @param $word The word to stem.
2140+ : @param $lang The
2141+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2142+ : of <code>$word</code>.
2143+ : @return the stem of <code>$word</code>.
2144+ : @error err:FTST0009 if <code>$lang</code> is not supported in general.
2145+ : @error zerr:ZXQP8404 if <code>$lang</code> is not supported for stemming
2146+ : specifically.
2147+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-1.xq
2148+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-2.xq
2149+ :)
2150+declare function ft:stem( $word as xs:string, $lang as xs:language )
2151+ as xs:string external;
2152+
2153+(:~
2154+ : Stems the given word.
2155+ :
2156+ : @param $word The word to stem.
2157+ : The word's <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2158+ : is assumed to be the one returned by <code>ft:current-lang()</code>.
2159+ : @return the stem of <code>$word</code>.
2160+ : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
2161+ : general.
2162+ : @error zerr:ZXQP8404 if <code>ft:current-lang()</code> is not supported for
2163+ : stemming specifically.
2164+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-3.xq
2165+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-4.xq
2166+ :)
2167+declare function ft:stem( $word as xs:string )
2168+ as xs:string external;
2169+
2170+(:~
2171+ : Strips all diacritical marks from all characters.
2172+ :
2173+ : @param $string The string to strip diacritical marks from.
2174+ : @return <code>$string</code> with diacritical marks stripped.
2175+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-strip-diacritics-1.xq
2176+ :)
2177+declare function ft:strip-diacritics( $string as xs:string )
2178+ as xs:string external;
2179+
2180+(:~
2181+ : Looks-up the given phrase in the default thesaurus.
2182+ :
2183+ : @param $phrase The phrase to look up.
2184+ : The phrase's
2185+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2186+ : is assumed to be the one returned by <code>ft:current-lang()</code>.
2187+ : @return the original and related phrases.
2188+ : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
2189+ : general.
2190+ : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported
2191+ : by the currently running version of Zorba.
2192+ : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match
2193+ : that of the CPU on which Zorba is currently running.
2194+ : @error zerr:ZXQP8403 if there was an error reading the thesaurus data.
2195+ : @error zerr:ZXQP8406 if <code>ft:current-lang()</code> is not supported for
2196+ : thesaurus look-up specifically.
2197+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-1.xq
2198+ :)
2199+declare function ft:thesaurus-lookup( $phrase as xs:string )
2200+ as xs:string+ external;
2201+
2202+(:~
2203+ : Looks-up the given phrase in the thesaurus specified by the given URI.
2204+ :
2205+ : @param $uri The URI specifying the thesaurus to use.
2206+ : @param $phrase The phrase to look up.
2207+ : @param $lang The
2208+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2209+ : of <code>$phrase</code>.
2210+ : @return the original and related phrases.
2211+ : @error err:FTST0009 if <code>$lang</code> is not supported in general.
2212+ : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus
2213+ : that is not found in the statically known thesauri.
2214+ : @error zerr:ZOSE0001 if the thesaurus data file could not be found.
2215+ : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file.
2216+ : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported
2217+ : by the currently running version of Zorba.
2218+ : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match
2219+ : that of the CPU on which Zorba is currently running.
2220+ : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file.
2221+ : @error zerr:ZXQP8406 if <code>$lang</code> is not supported for thesaurus
2222+ : look-up specifically.
2223+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-2.xq
2224+ :)
2225+declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string,
2226+ $lang as xs:language )
2227+ as xs:string+ external;
2228+
2229+(:~
2230+ : Looks-up the given phrase in a thesaurus.
2231+ :
2232+ : @param $uri The URI specifying the thesaurus to use.
2233+ : @param $phrase The phrase to look up.
2234+ : The phrase's
2235+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2236+ : is assumed to be the one the one returned by <code>ft:current-lang()</code>.
2237+ : @return the original and related phrases.
2238+ : @error err:FTST0009 if <code>ft:current-lang()</code> is unsupported in
2239+ : general.
2240+ : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus
2241+ : that is not found in the statically known thesauri.
2242+ : @error zerr:ZOSE0001 if the thesaurus data file could not be found.
2243+ : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file.
2244+ : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported
2245+ : by the currently running version of Zorba.
2246+ : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match
2247+ : that of the CPU on which Zorba is currently running.
2248+ : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file.
2249+ : @error zerr:ZXQP8406 if <code>ft:current-lang()</code> is not supported for
2250+ : thesaurus look-up specifically.
2251+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-3.xq
2252+ :)
2253+declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string )
2254+ as xs:string+ external;
2255+
2256+(:~
2257+ : Looks-up the given phrase in a thesaurus.
2258+ :
2259+ : @param $uri The URI specifying the thesaurus to use.
2260+ : @param $phrase The phrase to look up.
2261+ : @param $lang The
2262+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2263+ : of <code>$phrase</code>.
2264+ : @param $relationship The relationship the results are to have to
2265+ : <code>$phrase</code>.
2266+ : @return the original and related phrases.
2267+ : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus
2268+ : that is not found in the statically known thesauri.
2269+ : @error err:FTST0009 if <code>$lang</code> is not supported in general.
2270+ : @error zerr:ZOSE0001 if the thesaurus data file could not be found.
2271+ : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file.
2272+ : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported
2273+ : by the currently running version of Zorba.
2274+ : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match
2275+ : that of the CPU on which Zorba is currently running.
2276+ : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file.
2277+ : @error zerr:ZXQP8406 if <code>$lang</code> is not supported for thesaurus
2278+ : look-up specifically.
2279+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-4.xq
2280+ :)
2281+declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string,
2282+ $lang as xs:language,
2283+ $relationship as xs:string )
2284+ as xs:string+ external;
2285+
2286+(:~
2287+ : Looks-up the given phrase in a thesaurus.
2288+ :
2289+ : @param $uri The URI specifying the thesaurus to use.
2290+ : @param $phrase The phrase to look up.
2291+ : @param $lang The
2292+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2293+ : of <code>$phrase</code>.
2294+ : @param $relationship The relationship the results are to have to
2295+ : <code>$phrase</code>.
2296+ : @param $level-least The minimum number of levels within the thesaurus to be
2297+ : travers$ed.
2298+ : @param $level-most The maximum number of levels within the thesaurus to be
2299+ : traversed.
2300+ : @return the original and related phrases.
2301+ : @error err:FOCA0003 if either <code>$level-least</code> or
2302+ : <code>$level-most</code> is either negative or too large.
2303+ : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus
2304+ : that is not found in the statically known thesauri.
2305+ : @error err:FTST0009 if <code>$lang</code> is not supported in general.
2306+ : @error zerr:ZOSE0001 if the thesaurus data file could not be found.
2307+ : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file.
2308+ : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported
2309+ : by the currently running version of Zorba.
2310+ : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match
2311+ : that of the CPU on which Zorba is currently running.
2312+ : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file.
2313+ : @error zerr:ZXQP8406 if <code>$lang</code> is not supported for thesaurus
2314+ : look-up specifically.
2315+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-5.xq
2316+ :)
2317+declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string,
2318+ $lang as xs:language,
2319+ $relationship as xs:string,
2320+ $level-least as xs:integer,
2321+ $level-most as xs:integer )
2322+ as xs:string+ external;
2323+
2324+(:~
2325+ : Tokenizes the given document.
2326+ :
2327+ : @param $node The node to tokenize.
2328+ : @param $lang The default
2329+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2330+ : of <code>$node</code>.
2331+ : @return a (possibly empty) sequence of tokens.
2332+ : @error err:FTST0009 if <code>$lang</code> is not supported in general.
2333+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq
2334+ :)
2335+declare function ft:tokenize( $node as node(), $lang as xs:language )
2336+ as element(ft-schema:token)* external;
2337+
2338+(:~
2339+ : Tokenizes the given document.
2340+ :
2341+ : @param $node The node to tokenize.
2342+ : The document's default
2343+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2344+ : is assumed to be the one returned by <code>ft:current-lang()</code>.
2345+ : @return a (possibly empty) sequence of tokens.
2346+ : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
2347+ : general.
2348+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq
2349+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq
2350+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq
2351+ :)
2352+declare function ft:tokenize( $node as node() )
2353+ as element(ft-schema:token)* external;
2354+
2355+(:~
2356+ : Tokenizes the given string.
2357+ :
2358+ : @param $string The string to tokenize.
2359+ : @param $lang The default
2360+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2361+ : of <code>$string</code>.
2362+ : @return a (possibly empty) sequence of tokens.
2363+ : @error err:FTST0009 if <code>$lang</code> is not supported in general.
2364+ : @error zerr:ZXQP8407 if <code>$lang</code> is not supported for
2365+ : tokenization specifically.
2366+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-1.xq
2367+ :)
2368+declare function ft:tokenize-string( $string as xs:string,
2369+ $lang as xs:language )
2370+ as xs:string* external;
2371+
2372+(:~
2373+ : Tokenizes the given string.
2374+ :
2375+ : @param $string The string to tokenize.
2376+ : The string's default
2377+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2378+ : is assumed to be the one returned by <code>ft:current-lang()</code>.
2379+ : @return a (possibly empty) sequence of tokens.
2380+ : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
2381+ : general.
2382+ : @error zerr:ZXQP8407 if <code>ft:current_lang()</code> is not supported for
2383+ : tokenization specifically.
2384+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-2.xq
2385+ :)
2386+declare function ft:tokenize-string( $string as xs:string )
2387+ as xs:string* external;
2388+
2389+(:~
2390+ : Gets properties of the tokenizer for the given
2391+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>.
2392+ :
2393+ : @param $lang The langauage of the tokenizer to get the properties of.
2394+ : @return said properties.
2395+ : @error err:FTST0009 if <code>$lang</code> is not supported in general.
2396+ : @error zerr:ZXQP8407 if <code>$lang</code> is not supported for
2397+ : tokenization specifically.
2398+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenizer-properties-1.xq
2399+ :)
2400+declare function ft:tokenizer-properties( $lang as xs:language )
2401+ as element(ft-schema:tokenizer-properties) external;
2402+
2403+(:~
2404+ : Gets properties of the tokenizer for the
2405+ : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
2406+ : returned by <code>ft:current-lang()</code>.
2407+ :
2408+ : @return said properties.
2409+ : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
2410+ : general.
2411+ : @error zerr:ZXQP8407 if <code>ft:current_lang()</code> is not supported for
2412+ : tokenization specifically.
2413+ : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenizer-properties-2.xq
2414+ :)
2415+declare function ft:tokenizer-properties()
2416+ as element(ft-schema:tokenizer-properties) external;
2417+
2418+(:===========================================================================:)
2419+
2420+(: vim:set et sw=2 ts=2: :)
2421
2422=== added file 'modules/com/zorba-xquery/www/modules/full-text.xsd'
2423--- modules/com/zorba-xquery/www/modules/full-text.xsd 1970-01-01 00:00:00 +0000
2424+++ modules/com/zorba-xquery/www/modules/full-text.xsd 2012-04-26 16:53:22 +0000
2425@@ -0,0 +1,134 @@
2426+<?xml version="1.0"?>
2427+<!--
2428+ ! Copyright 2006-2011 The FLWOR Foundation.
2429+ !
2430+ ! Licensed under the Apache License, Version 2.0 (the "License");
2431+ ! you may not use this file except in compliance with the License.
2432+ ! You may obtain a copy of the License at
2433+ !
2434+ ! http://www.apache.org/licenses/LICENSE-2.0
2435+ !
2436+ ! Unless required by applicable law or agreed to in writing, software
2437+ ! distributed under the License is distributed on an "AS IS" BASIS,
2438+ ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2439+ ! See the License for the specific language governing permissions and
2440+ ! limitations under the License.
2441+-->
2442+
2443+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
2444+ targetNamespace="http://www.zorba-xquery.com/modules/full-text"
2445+ xmlns="http://www.zorba-xquery.com/modules/full-text"
2446+ elementFormDefault="qualified"
2447+ attributeFormDefault="unqualified">
2448+
2449+ <!--======================================================================-->
2450+
2451+ <xs:element name="compare-options">
2452+ <xs:complexType>
2453+ <xs:attributeGroup ref="compare-attributes"/>
2454+ </xs:complexType>
2455+ </xs:element>
2456+
2457+ <xs:attributeGroup name="compare-attributes">
2458+ <xs:attribute name="case" type="sensitivity" default="insensitive"/>
2459+ <xs:attribute name="diacritics" type="sensitivity" default="insensitive"/>
2460+ <xs:attribute name="stem" type="yes-no-both" default="no"/>
2461+ </xs:attributeGroup>
2462+
2463+ <xs:simpleType name="sensitivity">
2464+ <xs:restriction base="xs:string">
2465+ <xs:enumeration value="insensitive"/>
2466+ <xs:enumeration value="sensitive"/>
2467+ <xs:enumeration value="both"/>
2468+ </xs:restriction>
2469+ </xs:simpleType>
2470+
2471+ <xs:simpleType name="yes-no-both">
2472+ <xs:restriction base="xs:string">
2473+ <xs:enumeration value="yes"/>
2474+ <xs:enumeration value="no"/>
2475+ <xs:enumeration value="both"/>
2476+ </xs:restriction>
2477+ </xs:simpleType>
2478+
2479+ <xs:complexType name="boolean-value">
2480+ <xs:attribute name="value" type="xs:boolean" use="required"/>
2481+ </xs:complexType>
2482+
2483+ <!--======================================================================-->
2484+
2485+ <xs:element name="token">
2486+ <xs:complexType>
2487+
2488+ <!-- The language of the token. -->
2489+ <xs:attribute name="lang" type="xs:language"/>
2490+
2491+ <!-- The sentence number. -->
2492+ <xs:attribute name="sentence" type="xs:nonNegativeInteger" use="required"/>
2493+
2494+ <!-- The paragraph number. -->
2495+ <xs:attribute name="paragraph" type="xs:nonNegativeInteger" use="required"/>
2496+
2497+ <!-- The token string value. -->
2498+ <xs:attribute name="value" type="xs:string" use="required"/>
2499+
2500+ <!--
2501+ ! A reference to the originating node. If the token occurred within an
2502+ ! element, the reference refers to the text node. If the token occurred
2503+ ! within an attribute, the reference refers to the attribute node.
2504+ -->
2505+ <xs:attribute name="node-ref" type="xs:anyURI"/>
2506+
2507+ </xs:complexType>
2508+ </xs:element>
2509+
2510+ <!--======================================================================-->
2511+
2512+ <xs:element name="tokenizer-properties">
2513+ <xs:complexType>
2514+ <xs:all>
2515+
2516+ <!--
2517+ ! If true, XML comments separate tokens. (No example can be provided
2518+ ! here because it is illegal to nest an XML comment inside an XML
2519+ ! comment.)
2520+ -->
2521+ <xs:element name="comments-separate-tokens" type="boolean-value"/>
2522+
2523+ <!--
2524+ ! If true, XML elements separate tokens. For example,
2525+ ! <b>B</b>old would be 2 tokens instead of 1.
2526+ -->
2527+ <xs:element name="elements-separate-tokens" type="boolean-value"/>
2528+
2529+ <!--
2530+ ! If true, XML processing instructions separate tokens. For example,
2531+ ! net<?PI pi?>work would be 2 tokens instead of 1.
2532+ -->
2533+ <xs:element name="processing-instructions-separate-tokens" type="boolean-value"/>
2534+
2535+ <!--
2536+ ! The list of languages that the tokenizer can tokenize.
2537+ -->
2538+ <xs:element name="supported-languages">
2539+ <xs:complexType>
2540+ <xs:sequence>
2541+ <xs:element name="lang" type="xs:language" maxOccurs="unbounded"/>
2542+ </xs:sequence>
2543+ </xs:complexType>
2544+ </xs:element>
2545+
2546+ </xs:all>
2547+
2548+ <!--
2549+ ! The tokenizer's identifying URI.
2550+ -->
2551+ <xs:attribute name="uri" type="xs:anyURI"/>
2552+
2553+ </xs:complexType>
2554+ </xs:element>
2555+
2556+ <!--======================================================================-->
2557+
2558+</xs:schema>
2559+<!-- vim:set et sw=2 ts=2: -->
2560
2561=== modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp'
2562--- modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp 2012-04-24 12:39:38 +0000
2563+++ modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp 2012-04-26 16:53:22 +0000
2564@@ -39,7 +39,6 @@
2565 theSerStream(NULL),
2566 thePost(NULL),
2567 theLast(NULL),
2568- theLastSerializerOptions(NULL),
2569 theIsHeadRequest(false)
2570 {
2571 theHeaderLists.push_back(NULL);
2572@@ -260,6 +259,7 @@
2573 void HttpRequestHandler::cleanUpBody()
2574 {
2575 delete theSerStream;
2576+ theSerStream = 0;
2577 theLastBodyHadContent = false;
2578 }
2579
2580
2581=== modified file 'modules/com/zorba-xquery/www/modules/pregenerated/errors.xq'
2582--- modules/com/zorba-xquery/www/modules/pregenerated/errors.xq 2012-04-24 12:39:38 +0000
2583+++ modules/com/zorba-xquery/www/modules/pregenerated/errors.xq 2012-04-26 16:53:22 +0000
2584@@ -188,6 +188,7 @@
2585
2586 (:~
2587 :
2588+ : The thesaurus data file's endianness does not match that of the CPU.
2589 :
2590 :)
2591 declare variable $zerr:ZXQP8402 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8402");
2592@@ -201,6 +202,22 @@
2593
2594 (:~
2595 :)
2596+declare variable $zerr:ZXQP8404 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8404");
2597+
2598+(:~
2599+:)
2600+declare variable $zerr:ZXQP8405 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8405");
2601+
2602+(:~
2603+:)
2604+declare variable $zerr:ZXQP8406 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8406");
2605+
2606+(:~
2607+:)
2608+declare variable $zerr:ZXQP8407 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8407");
2609+
2610+(:~
2611+:)
2612 declare variable $zerr:ZXQD0001 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQD0001");
2613
2614 (:~
2615
2616=== modified file 'modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq'
2617--- modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq 2012-04-24 12:39:38 +0000
2618+++ modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq 2012-04-26 16:53:22 +0000
2619@@ -839,9 +839,7 @@
2620 if(fn:matches($specLine, "Args:")) then
2621 let $arg_split := fn:substring-after($specLine, "-x")
2622 return
2623- if(fn:string-length($arg_split) eq 0) then
2624- fn:error($err:UE008, fn:concat("Unknown Args: in spec file for example <", $exampleSource,"> .
2625- Add the example input and expected output by hand in the example, in a commentary that should also include the word 'output'."))
2626+ if(fn:string-length($arg_split) eq 0) then string-join($specLines, " ")
2627 else
2628 let $var_value := fn:tokenize($arg_split, "=")
2629 let $var_name := fn:normalize-space(fn:replace($var_value[1], ":$", ""))
2630
2631=== modified file 'scripts/zt-wn-get'
2632--- scripts/zt-wn-get 2012-04-24 12:39:38 +0000
2633+++ scripts/zt-wn-get 2012-04-26 16:53:22 +0000
2634@@ -22,7 +22,7 @@
2635 echo 'Arguments: [--workdir <workdir>] [--builddir <builddir>]'
2636 echo ' [--thesaurusurl <thesaurusurl>]'
2637 echo ' <zorba_repository>'
2638- echo '<zorba_repository> is the top-level SVN working copy.'
2639+ echo '<zorba_repository> is the top-level BZR working copy.'
2640 echo '<workdir> is a temp directory to download and unzip XQTS (default: /tmp).'
2641 echo '<builddir> is the directory Zorba has been built in'
2642 echo ' (default: <zorba_repository>/build)'
2643@@ -71,8 +71,8 @@
2644 echo Build dir is at $BUILD
2645
2646 # Compile thesaurus to binary format
2647-mkdir -p $BUILD/test/rbkt/thesauri
2648-THESAURUS_DEST="$BUILD/test/rbkt/thesauri/wordnet-en.zth"
2649+mkdir -p $BUILD/LIB_PATH/edu/princeton/wordnet
2650+THESAURUS_DEST="$BUILD/LIB_PATH/edu/princeton/wordnet/wordnet-en.zth"
2651 echo "Compiling thesaurus to $THESAURUS_DEST..."
2652 untar_dir=`mktemp -d "$WORK/thesaurus.XXXXXX"`
2653 cd "$untar_dir"
2654
2655=== modified file 'src/api/CMakeLists.txt'
2656--- src/api/CMakeLists.txt 2012-04-24 12:39:38 +0000
2657+++ src/api/CMakeLists.txt 2012-04-26 16:53:22 +0000
2658@@ -62,8 +62,9 @@
2659 IF (NOT ZORBA_NO_FULL_TEXT)
2660 LIST(APPEND API_SRCS
2661 stemmer.cpp
2662- stemmer_wrapper.cpp
2663- thesaurus.cpp)
2664+ stemmer_wrappers.cpp
2665+ thesaurus.cpp
2666+ thesaurus_wrappers.cpp)
2667 ENDIF (NOT ZORBA_NO_FULL_TEXT)
2668
2669 ADD_SRC_SUBFOLDER(API_SRCS serialization API_SERIALIZATION_SRCS)
2670
2671=== modified file 'src/api/staticcontextimpl.cpp'
2672--- src/api/staticcontextimpl.cpp 2012-04-24 12:39:38 +0000
2673+++ src/api/staticcontextimpl.cpp 2012-04-26 16:53:22 +0000
2674@@ -42,8 +42,8 @@
2675 #include "context/static_context.h"
2676 #include "context/static_context_consts.h"
2677 #ifndef ZORBA_NO_FULL_TEXT
2678-#include "context/stemmer_wrappers.h"
2679-#include "context/thesaurus_wrappers.h"
2680+#include "stemmer_wrappers.h"
2681+#include "thesaurus_wrappers.h"
2682 #endif /* ZORBA_NO_FULL_TEXT */
2683 #include "uri_resolver_wrappers.h"
2684
2685@@ -65,7 +65,6 @@
2686
2687 namespace zorba {
2688
2689-
2690 /*******************************************************************************
2691 Create a StaticContextImpl obj as well as an internal static_context obj S.
2692 S is created as a child of the zorba root sctx. This constructor is used
2693
2694=== renamed file 'src/api/stemmer_wrapper.cpp' => 'src/api/stemmer_wrappers.cpp'
2695--- src/api/stemmer_wrapper.cpp 2012-04-24 12:39:38 +0000
2696+++ src/api/stemmer_wrappers.cpp 2012-04-26 16:53:22 +0000
2697@@ -23,7 +23,7 @@
2698 #include "diagnostics/assert.h"
2699 #include "util/cxx_util.h"
2700
2701-#include "stemmer_wrapper.h"
2702+#include "stemmer_wrappers.h"
2703
2704 using namespace zorba::locale;
2705
2706@@ -32,8 +32,8 @@
2707
2708 ///////////////////////////////////////////////////////////////////////////////
2709
2710-StemmerWrapper::StemmerWrapper( zorba::Stemmer::ptr p ) :
2711- api_stemmer_( std::move( p ) )
2712+StemmerWrapper::StemmerWrapper( zorba::Stemmer::ptr api_stemmer ) :
2713+ api_stemmer_( std::move( api_stemmer ) )
2714 {
2715 ZORBA_ASSERT( api_stemmer_.get() );
2716 }
2717@@ -42,6 +42,12 @@
2718 api_stemmer_.release()->destroy();
2719 }
2720
2721+void StemmerWrapper::properties( Properties *props ) const {
2722+ zorba::Stemmer::Properties api_props;
2723+ api_stemmer_->properties( &api_props );
2724+ props->uri = api_props.uri;
2725+}
2726+
2727 void StemmerWrapper::stem( zstring const &word, iso639_1::type lang,
2728 zstring *result ) const {
2729 String const api_word( Unmarshaller::newString( word ) );
2730@@ -52,19 +58,22 @@
2731 ///////////////////////////////////////////////////////////////////////////////
2732
2733 StemmerProviderWrapper::
2734-StemmerProviderWrapper( zorba::StemmerProvider const *p ) :
2735- api_stemmer_provider_( p )
2736+StemmerProviderWrapper( zorba::StemmerProvider const *api_stemmer_provider ) :
2737+ api_stemmer_provider_( api_stemmer_provider )
2738 {
2739 ZORBA_ASSERT( api_stemmer_provider_ );
2740 }
2741
2742-Stemmer::ptr
2743-StemmerProviderWrapper::get_stemmer( iso639_1::type lang ) const {
2744- zorba::Stemmer::ptr p( api_stemmer_provider_->getStemmer( lang ) );
2745- Stemmer::ptr result;
2746- if ( p.get() )
2747- result.reset( new StemmerWrapper( std::move( p ) ) );
2748- return std::move( result );
2749+bool StemmerProviderWrapper::getStemmer( iso639_1::type lang,
2750+ Stemmer::ptr *result ) const {
2751+ zorba::Stemmer::ptr api_ptr;
2752+ zorba::Stemmer::ptr *const api_ptr_ptr = result ? &api_ptr : nullptr;
2753+ if ( api_stemmer_provider_->getStemmer( lang, api_ptr_ptr ) ) {
2754+ if ( result )
2755+ result->reset( new StemmerWrapper( std::move( api_ptr ) ) );
2756+ return true;
2757+ }
2758+ return false;
2759 }
2760
2761 ///////////////////////////////////////////////////////////////////////////////
2762
2763=== renamed file 'src/api/stemmer_wrapper.h' => 'src/api/stemmer_wrappers.h'
2764--- src/api/stemmer_wrapper.h 2012-04-24 12:39:38 +0000
2765+++ src/api/stemmer_wrappers.h 2012-04-26 16:53:22 +0000
2766@@ -35,6 +35,7 @@
2767
2768 // inherited
2769 void destroy() const;
2770+ void properties( Properties* ) const;
2771 void stem( zstring const &word, locale::iso639_1::type lang,
2772 zstring *result ) const;
2773 private:
2774@@ -50,7 +51,7 @@
2775 }
2776
2777 // inherited
2778- Stemmer::ptr get_stemmer( locale::iso639_1::type lang ) const;
2779+ bool getStemmer( locale::iso639_1::type, Stemmer::ptr* = 0 ) const;
2780 private:
2781 zorba::StemmerProvider const *const api_stemmer_provider_;
2782 };
2783
2784=== modified file 'src/api/thesaurus.cpp'
2785--- src/api/thesaurus.cpp 2012-04-24 12:39:38 +0000
2786+++ src/api/thesaurus.cpp 2012-04-26 16:53:22 +0000
2787@@ -25,9 +25,11 @@
2788 // out-of-line since it's virtual
2789 }
2790
2791-//Thesaurus::iterator::~iterator() {
2792-// // out-of-line since it's virtual
2793-//}
2794+#if 0
2795+Thesaurus::iterator::~iterator() {
2796+ // out-of-line since it's virtual
2797+}
2798+#endif
2799
2800 ///////////////////////////////////////////////////////////////////////////////
2801
2802
2803=== renamed file 'src/context/thesaurus_wrappers.cpp' => 'src/api/thesaurus_wrappers.cpp'
2804--- src/context/thesaurus_wrappers.cpp 2012-04-24 12:39:38 +0000
2805+++ src/api/thesaurus_wrappers.cpp 2012-04-26 16:53:22 +0000
2806@@ -87,6 +87,27 @@
2807
2808 ///////////////////////////////////////////////////////////////////////////////
2809
2810+ThesaurusProviderWrapper::
2811+ThesaurusProviderWrapper( zorba::ThesaurusProvider const *p ) :
2812+ api_thesaurus_provider_( p )
2813+{
2814+ ZORBA_ASSERT( api_thesaurus_provider_ );
2815+}
2816+
2817+bool ThesaurusProviderWrapper::getThesaurus( iso639_1::type lang,
2818+ Thesaurus::ptr *result ) const {
2819+ zorba::Thesaurus::ptr api_ptr;
2820+ zorba::Thesaurus::ptr *const api_ptr_ptr = result ? &api_ptr : nullptr;
2821+ if ( api_thesaurus_provider_->getThesaurus( lang, api_ptr_ptr ) ) {
2822+ if ( result )
2823+ result->reset( new ThesaurusWrapper( std::move( api_ptr ) ) );
2824+ return true;
2825+ }
2826+ return false;
2827+}
2828+
2829+///////////////////////////////////////////////////////////////////////////////
2830+
2831 } // namespace internal
2832 } // namespace zorba
2833
2834
2835=== renamed file 'src/context/thesaurus_wrappers.h' => 'src/api/thesaurus_wrappers.h'
2836--- src/context/thesaurus_wrappers.h 2012-04-24 12:39:38 +0000
2837+++ src/api/thesaurus_wrappers.h 2012-04-26 16:53:22 +0000
2838@@ -22,6 +22,7 @@
2839 #ifndef ZORBA_NO_FULL_TEXT
2840
2841 #include <zorba/thesaurus.h>
2842+
2843 #include "runtime/full_text/thesaurus.h"
2844
2845 namespace zorba {
2846@@ -54,6 +55,17 @@
2847 zorba::Thesaurus::ptr api_thesaurus_;
2848 };
2849
2850+class ThesaurusProviderWrapper : public ThesaurusProvider {
2851+public:
2852+ ThesaurusProviderWrapper( zorba::ThesaurusProvider const* );
2853+
2854+ // inherited
2855+ bool getThesaurus( locale::iso639_1::type, Thesaurus::ptr* ) const;
2856+
2857+private:
2858+ zorba::ThesaurusProvider::ptr const api_thesaurus_provider_;
2859+};
2860+
2861 ///////////////////////////////////////////////////////////////////////////////
2862
2863 } // namespace internal
2864
2865=== modified file 'src/api/uri_resolver_wrappers.cpp'
2866--- src/api/uri_resolver_wrappers.cpp 2012-04-24 12:39:38 +0000
2867+++ src/api/uri_resolver_wrappers.cpp 2012-04-26 16:53:22 +0000
2868@@ -15,24 +15,20 @@
2869 */
2870 #include "stdafx.h"
2871
2872+#include <zorba/thesaurus.h>
2873+
2874+#include "runtime/full_text/thesaurus.h"
2875+
2876+#include "thesaurus_wrappers.h"
2877+#include "unmarshaller.h"
2878 #include "uri_resolver_wrappers.h"
2879 #include "uriresolverimpl.h"
2880-#include "unmarshaller.h"
2881-#include <zorba/thesaurus.h>
2882-#include <runtime/full_text/thesaurus.h>
2883-#include <context/thesaurus_wrappers.h>
2884
2885 namespace zorba
2886 {
2887 // "Convenience" class for passing an internal EntityData object to
2888- // external mappers/resolvers. This can serve as a plain EntityData or
2889- // a ThesaurusEntityData. However, when there's another EntityData subclass
2890- // in future, this won't work as EntityData becomes an ambiguous base class...
2891-#ifndef ZORBA_NO_FULL_TEXT
2892- class EntityDataWrapper : public ThesaurusEntityData
2893-#else
2894+ // external mappers/resolvers.
2895 class EntityDataWrapper : public EntityData
2896-#endif /* ZORBA_NO_FULL_TEXT */
2897 {
2898 public:
2899 static EntityDataWrapper const* create(internal::EntityData const* aData) {
2900@@ -45,12 +41,7 @@
2901 return new EntityDataWrapper(EntityData::SCHEMA);
2902 #ifndef ZORBA_NO_FULL_TEXT
2903 case internal::EntityData::THESAURUS:
2904- {
2905- EntityDataWrapper* retval = new EntityDataWrapper(EntityData::THESAURUS);
2906- retval->theThesaurusLang =
2907- dynamic_cast<const internal::ThesaurusEntityData*>(aData)->getLanguage();
2908- return retval;
2909- }
2910+ return new EntityDataWrapper(EntityData::THESAURUS);
2911 case internal::EntityData::STOP_WORDS:
2912 return new EntityDataWrapper(EntityData::STOP_WORDS);
2913 #endif /* ZORBA_NO_FULL_TEXT */
2914@@ -67,21 +58,12 @@
2915 return theKind;
2916 }
2917
2918-#ifndef ZORBA_NO_FULL_TEXT
2919- virtual zorba::locale::iso639_1::type getLanguage() const {
2920- return theThesaurusLang;
2921- }
2922-#endif /* ZORBA_NO_FULL_TEXT */
2923-
2924 private:
2925 EntityDataWrapper(EntityData::Kind aKind)
2926 : theKind(aKind)
2927 {}
2928
2929 EntityData::Kind const theKind;
2930-#ifndef ZORBA_NO_FULL_TEXT
2931- zorba::locale::iso639_1::type theThesaurusLang;
2932-#endif /* ZORBA_NO_FULL_TEXT */
2933 };
2934
2935 URIMapperWrapper::URIMapperWrapper(zorba::URIMapper& aUserMapper)
2936@@ -169,13 +151,13 @@
2937 }
2938 #ifndef ZORBA_NO_FULL_TEXT
2939 else {
2940- Thesaurus* lUserThesaurus = dynamic_cast<Thesaurus*>(lUserPtr.get());
2941- if (lUserThesaurus != NULL) {
2942- // Here we pass memory ownership of the actual Thesaurus to the
2943- // internal ThesaurusWrapper.
2944- lRetval = new internal::ThesaurusWrapper
2945- (Thesaurus::ptr(lUserThesaurus));
2946- lUserPtr.release();
2947+ ThesaurusProvider* lUserThesaurusProvider =
2948+ dynamic_cast<ThesaurusProvider*>(lUserPtr.get());
2949+ if (lUserThesaurusProvider) {
2950+ // Here we pass memory ownership of the actual ThesaurusProvider to
2951+ // the internal ThesaurusWrapper.
2952+ lRetval = new internal::ThesaurusProviderWrapper
2953+ (lUserThesaurusProvider);
2954 }
2955 else {
2956 assert(false);
2957
2958=== modified file 'src/api/xmldatamanagerimpl.cpp'
2959--- src/api/xmldatamanagerimpl.cpp 2012-04-24 12:39:38 +0000
2960+++ src/api/xmldatamanagerimpl.cpp 2012-04-26 16:53:22 +0000
2961@@ -47,7 +47,7 @@
2962 #include "runtime/util/flowctl_exception.h"
2963
2964 #ifndef ZORBA_NO_FULL_TEXT
2965-#include "stemmer_wrapper.h"
2966+#include "stemmer_wrappers.h"
2967 #endif /* ZORBA_NO_FULL_TEXT */
2968
2969 namespace zorba {
2970
2971=== modified file 'src/api/xmldatamanagerimpl.h'
2972--- src/api/xmldatamanagerimpl.h 2012-04-24 12:39:38 +0000
2973+++ src/api/xmldatamanagerimpl.h 2012-04-26 16:53:22 +0000
2974@@ -27,7 +27,7 @@
2975 #include "util/singleton.h"
2976
2977 #ifndef ZORBA_NO_FULL_TEXT
2978-#include "stemmer_wrapper.h"
2979+#include "stemmer_wrappers.h"
2980 #endif /* ZORBA_NO_FULL_TEXT */
2981
2982 namespace zorba {
2983
2984=== modified file 'src/compiler/codegen/plan_visitor.cpp'
2985--- src/compiler/codegen/plan_visitor.cpp 2012-04-24 12:39:38 +0000
2986+++ src/compiler/codegen/plan_visitor.cpp 2012-04-26 16:53:22 +0000
2987@@ -250,7 +250,7 @@
2988 class plan_ftnode_visitor : public ftnode_visitor
2989 {
2990 public:
2991- typedef std::list<PlanIter_t> PlanIter_list_t;
2992+ typedef std::vector<PlanIter_t> PlanIter_list_t;
2993
2994 plan_ftnode_visitor( plan_visitor* v ) : plan_visitor_( v ) { }
2995
2996
2997=== modified file 'src/compiler/expression/expr_put.cpp'
2998--- src/compiler/expression/expr_put.cpp 2012-04-24 12:39:38 +0000
2999+++ src/compiler/expression/expr_put.cpp 2012-04-26 16:53:22 +0000
3000@@ -41,6 +41,7 @@
3001 #include "compiler/expression/function_item_expr.h"
3002 #include "compiler/parser/parse_constants.h"
3003
3004+#include "diagnostics/assert.h"
3005 #include "functions/function.h"
3006 #include "functions/udf.h"
3007
3008
3009=== modified file 'src/compiler/translator/translator.cpp'
3010--- src/compiler/translator/translator.cpp 2012-04-24 12:39:38 +0000
3011+++ src/compiler/translator/translator.cpp 2012-04-26 16:53:22 +0000
3012@@ -68,6 +68,8 @@
3013 #include "functions/signature.h"
3014 #include "functions/udf.h"
3015 #include "functions/external_function.h"
3016+#include "functions/func_ft_module.h"
3017+#include "functions/func_ft_module_impl.h"
3018
3019 #include "annotations/annotations.h"
3020
3021@@ -859,7 +861,7 @@
3022 {
3023 ZORBA_ASSERT(count >= 0);
3024
3025- ftnode *n = NULL;
3026+ ftnode *n = nullptr;
3027 while ( count-- > 0 )
3028 {
3029 ZORBA_FATAL( !theFTNodeStack.empty(), "" );
3030@@ -3294,6 +3296,41 @@
3031 qnameItem->getLocalName())));
3032 }
3033
3034+#ifndef ZORBA_NO_FULL_TEXT
3035+ if (qnameItem->getNamespace() == static_context::ZORBA_FULL_TEXT_FN_NS &&
3036+ (qnameItem->getLocalName() == "tokenizer-properties" ||
3037+ qnameItem->getLocalName() == "tokenize"))
3038+ {
3039+ FunctionConsts::FunctionKind kind;
3040+
3041+ if (qnameItem->getLocalName() == "tokenizer-properties")
3042+ {
3043+ assert(numParams <= 1);
3044+
3045+ if (numParams == 1)
3046+ kind = FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_1;
3047+ else
3048+ kind = FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_0;
3049+
3050+ f = new full_text_tokenizer_properties(f->getSignature(), kind);
3051+ }
3052+ else
3053+ {
3054+ assert(numParams == 1 || numParams == 2);
3055+
3056+ if (numParams == 2)
3057+ kind = FunctionConsts::FULL_TEXT_TOKENIZE_2;
3058+ else
3059+ kind = FunctionConsts::FULL_TEXT_TOKENIZE_1;
3060+
3061+ f = new full_text_tokenize(f->getSignature(), kind);
3062+ }
3063+
3064+ f->setStaticContext(theRootSctx);
3065+ bind_fn(f, numParams, loc);
3066+ }
3067+#endif /* ZORBA_NO_FULL_TEXT */
3068+
3069 f->setAnnotations(theAnnotations);
3070 theAnnotations = NULL; // important to reset
3071
3072@@ -12512,7 +12549,7 @@
3073 {
3074 TRACE_VISIT ();
3075 #ifndef ZORBA_NO_FULL_TEXT
3076- push_ftstack( NULL ); // sentinel
3077+ push_ftstack( nullptr ); // sentinel
3078 #endif /* ZORBA_NO_FULL_TEXT */
3079 return no_state;
3080 }
3081@@ -12756,7 +12793,7 @@
3082 void *begin_visit (const FTMildNot& v) {
3083 TRACE_VISIT ();
3084 #ifndef ZORBA_NO_FULL_TEXT
3085- push_ftstack( NULL ); // sentinel
3086+ push_ftstack( nullptr ); // sentinel
3087 #endif /* ZORBA_NO_FULL_TEXT */
3088 return no_state;
3089 }
3090@@ -12799,7 +12836,7 @@
3091 void *begin_visit (const FTOr& v) {
3092 TRACE_VISIT ();
3093 #ifndef ZORBA_NO_FULL_TEXT
3094- push_ftstack( NULL ); // sentinel
3095+ push_ftstack( nullptr ); // sentinel
3096 #endif /* ZORBA_NO_FULL_TEXT */
3097 return no_state;
3098 }
3099@@ -13058,7 +13095,7 @@
3100 levels = dynamic_cast<ftrange*>( pop_ftstack() );
3101 ZORBA_ASSERT( levels );
3102 } else
3103- levels = NULL;
3104+ levels = nullptr;
3105
3106 ftthesaurus_id *const tid = new ftthesaurus_id(
3107 loc, v.get_uri(), v.get_relationship(), levels
3108@@ -13070,7 +13107,7 @@
3109 void *begin_visit (const FTThesaurusOption& v) {
3110 TRACE_VISIT ();
3111 #ifndef ZORBA_NO_FULL_TEXT
3112- push_ftstack( NULL ); // sentinel
3113+ push_ftstack( nullptr ); // sentinel
3114 #endif /* ZORBA_NO_FULL_TEXT */
3115 return no_state;
3116 }
3117@@ -13078,10 +13115,8 @@
3118 void end_visit (const FTThesaurusOption& v, void* /*visit_state*/) {
3119 TRACE_VISIT_OUT ();
3120 #ifndef ZORBA_NO_FULL_TEXT
3121- ftthesaurus_id *default_tid = NULL;
3122- if ( v.includes_default() ) {
3123- default_tid = new ftthesaurus_id( loc, "##default" );
3124- }
3125+ ftthesaurus_id *const default_tid = v.includes_default() ?
3126+ new ftthesaurus_id( loc, "##default" ) : nullptr;
3127
3128 ftthesaurus_option::thesaurus_id_list_t list;
3129 while ( true ) {
3130
3131=== modified file 'src/context/CMakeLists.txt'
3132--- src/context/CMakeLists.txt 2012-04-24 12:39:38 +0000
3133+++ src/context/CMakeLists.txt 2012-04-26 16:53:22 +0000
3134@@ -32,11 +32,6 @@
3135 features.cpp
3136 )
3137
3138-IF (NOT ZORBA_NO_FULL_TEXT)
3139- LIST(APPEND CONTEXT_SRCS
3140- thesaurus_wrappers.cpp)
3141-ENDIF (NOT ZORBA_NO_FULL_TEXT)
3142-
3143 SET(CONTEXT_BUILD_SRCS
3144 ${CMAKE_CURRENT_BINARY_DIR}/context/root_static_context_init.cpp
3145 )
3146
3147=== modified file 'src/context/default_url_resolvers.cpp'
3148--- src/context/default_url_resolvers.cpp 2012-04-24 12:39:38 +0000
3149+++ src/context/default_url_resolvers.cpp 2012-04-26 16:53:22 +0000
3150@@ -17,6 +17,7 @@
3151
3152
3153 #include "context/default_url_resolvers.h"
3154+#include "util/cxx_util.h"
3155 #include "util/uri_util.h"
3156 #include "util/http_util.h"
3157 #include "util/fs_util.h"
3158@@ -41,8 +42,15 @@
3159 HTTPURLResolver::resolveURL
3160 (zstring const& aUrl, EntityData const* aEntityData)
3161 {
3162- if (aEntityData->getKind() == EntityData::COLLECTION)
3163- return NULL;
3164+ switch ( aEntityData->getKind() ) {
3165+ case EntityData::COLLECTION:
3166+#ifndef ZORBA_NO_FULL_TEXT
3167+ case EntityData::THESAURUS:
3168+#endif /* ZORBA_NO_FULL_TEXT */
3169+ return nullptr;
3170+ default:
3171+ break;
3172+ }
3173
3174 uri::scheme lScheme = uri::get_scheme(aUrl);
3175 switch (lScheme) {
3176@@ -82,8 +90,15 @@
3177 FileURLResolver::resolveURL
3178 (zstring const& aUrl, EntityData const* aEntityData)
3179 {
3180- if (aEntityData->getKind() == EntityData::COLLECTION)
3181- return NULL;
3182+ switch ( aEntityData->getKind() ) {
3183+ case EntityData::COLLECTION:
3184+#ifndef ZORBA_NO_FULL_TEXT
3185+ case EntityData::THESAURUS:
3186+#endif /* ZORBA_NO_FULL_TEXT */
3187+ return nullptr;
3188+ default:
3189+ break;
3190+ }
3191
3192 uri::scheme lScheme = uri::get_scheme(aUrl);
3193 if (lScheme != uri::file) {
3194@@ -111,7 +126,6 @@
3195 {
3196 if (aEntityData->getKind() != EntityData::COLLECTION)
3197 return NULL;
3198-
3199 store::Item_t lName;
3200 GENV_STORE.getItemFactory()->createQName(lName, aUrl.c_str(), "", "zorba-internal-name-for-w3c-collections");
3201 store::Collection_t lColl = GENV_STORE.getCollection(lName.getp(), true);
3202
3203=== modified file 'src/context/static_context.cpp'
3204--- src/context/static_context.cpp 2012-04-24 12:39:38 +0000
3205+++ src/context/static_context.cpp 2012-04-26 16:53:22 +0000
3206@@ -378,11 +378,16 @@
3207 static_context::ZORBA_XML_FN_NS =
3208 "http://www.zorba-xquery.com/modules/xml";
3209
3210+#ifndef ZORBA_NO_FULL_TEXT
3211+const char*
3212+static_context::ZORBA_FULL_TEXT_FN_NS =
3213+"http://www.zorba-xquery.com/modules/full-text";
3214+#endif /* ZORBA_NO_FULL_TEXT */
3215+
3216 const char*
3217 static_context::ZORBA_XML_FN_OPTIONS_NS =
3218 "http://www.zorba-xquery.com/modules/xml-options";
3219
3220-
3221 /***************************************************************************//**
3222 Target namespaces of zorba reserved modules
3223 ********************************************************************************/
3224@@ -451,8 +456,11 @@
3225 ns == ZORBA_JSON_FN_NS ||
3226 ns == ZORBA_FETCH_FN_NS ||
3227 ns == ZORBA_NODE_FN_NS ||
3228+#ifndef ZORBA_NO_FULL_TEXT
3229+ ns == ZORBA_FULL_TEXT_FN_NS ||
3230+#endif /* ZORBA_NO_FULL_TEXT */
3231 ns == ZORBA_XML_FN_NS);
3232- }
3233+ }
3234 else if (ns == W3C_FN_NS || ns == XQUERY_MATH_FN_NS)
3235 {
3236 return true;
3237@@ -1585,7 +1593,7 @@
3238 std::auto_ptr<internal::Resource>& oResource,
3239 zstring& oErrorMessage) const
3240 {
3241- oErrorMessage = "";
3242+ oErrorMessage.clear();
3243
3244 // Iterate through all candidate URLs...
3245 for (std::vector<zstring>::iterator url = aUrls.begin();
3246@@ -1621,7 +1629,7 @@
3247 }
3248 catch (const std::exception& e)
3249 {
3250- if (oErrorMessage == "")
3251+ if (oErrorMessage.empty())
3252 {
3253 // Really no point in saving anything more than the first message
3254 oErrorMessage = e.what();
3255
3256=== modified file 'src/context/static_context.h'
3257--- src/context/static_context.h 2012-04-24 12:39:38 +0000
3258+++ src/context/static_context.h 2012-04-26 16:53:22 +0000
3259@@ -471,6 +471,9 @@
3260 static const char* ZORBA_FETCH_FN_NS;
3261 static const char* ZORBA_NODE_FN_NS;
3262 static const char* ZORBA_XML_FN_NS;
3263+#ifndef ZORBA_NO_FULL_TEXT
3264+ static const char* ZORBA_FULL_TEXT_FN_NS;
3265+#endif /* ZORBA_NO_FULL_TEXT */
3266 static const char* ZORBA_XML_FN_OPTIONS_NS;
3267
3268 // Namespaces of virtual modules declaring zorba builtin functions
3269
3270=== removed file 'src/context/stemmer_wrappers.cpp'
3271--- src/context/stemmer_wrappers.cpp 2012-04-24 12:39:38 +0000
3272+++ src/context/stemmer_wrappers.cpp 1970-01-01 00:00:00 +0000
3273@@ -1,74 +0,0 @@
3274-/*
3275- * Copyright 2006-2008 The FLWOR Foundation.
3276- *
3277- * Licensed under the Apache License, Version 2.0 (the "License");
3278- * you may not use this file except in compliance with the License.
3279- * You may obtain a copy of the License at
3280- *
3281- * http://www.apache.org/licenses/LICENSE-2.0
3282- *
3283- * Unless required by applicable law or agreed to in writing, software
3284- * distributed under the License is distributed on an "AS IS" BASIS,
3285- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3286- * See the License for the specific language governing permissions and
3287- * limitations under the License.
3288- */
3289-#include "stdafx.h"
3290-
3291-#include <zorba/config.h>
3292-
3293-#ifndef ZORBA_NO_FULL_TEXT
3294-
3295-#include "api/unmarshaller.h"
3296-#include "diagnostics/assert.h"
3297-#include "util/cxx_util.h"
3298-
3299-#include "stemmer_wrappers.h"
3300-
3301-using namespace zorba::locale;
3302-
3303-namespace zorba {
3304-namespace internal {
3305-
3306-///////////////////////////////////////////////////////////////////////////////
3307-
3308-StemmerWrapper::StemmerWrapper( zorba::Stemmer const *s ) :
3309- api_stemmer_( s )
3310-{
3311- ZORBA_ASSERT( api_stemmer_ );
3312-}
3313-
3314-void StemmerWrapper::stem( zstring const &word, iso639_1::type lang,
3315- zstring *result ) const {
3316- String const api_word( Unmarshaller::newString( word ) );
3317- String api_result( Unmarshaller::newString( *result ) );
3318- api_stemmer_->stem( api_word, lang, &api_result );
3319-}
3320-
3321-///////////////////////////////////////////////////////////////////////////////
3322-
3323-StemmerProviderWrapper::
3324-StemmerProviderWrapper( zorba::StemmerProvider const *p ) :
3325- api_stemmer_provider_( p )
3326-{
3327- ZORBA_ASSERT( api_stemmer_provider_ );
3328-}
3329-
3330-Stemmer const*
3331-StemmerProviderWrapper::get_stemmer( iso639_1::type lang ) const {
3332- zorba::Stemmer const *const s = api_stemmer_provider_->getStemmer( lang );
3333- return s ? new StemmerWrapper( s ) : nullptr;
3334-}
3335-
3336-///////////////////////////////////////////////////////////////////////////////
3337-
3338-} // namespace internal
3339-} // namespace zorba
3340-
3341-#endif /* ZORBA_NO_FULL_TEXT */
3342-/*
3343- * Local variables:
3344- * mode: c++
3345- * End:
3346- */
3347-/* vim:set et sw=2 ts=2: */
3348
3349=== removed file 'src/context/stemmer_wrappers.h'
3350--- src/context/stemmer_wrappers.h 2012-04-24 12:39:38 +0000
3351+++ src/context/stemmer_wrappers.h 1970-01-01 00:00:00 +0000
3352@@ -1,63 +0,0 @@
3353-/*
3354- * Copyright 2006-2008 The FLWOR Foundation.
3355- *
3356- * Licensed under the Apache License, Version 2.0 (the "License");
3357- * you may not use this file except in compliance with the License.
3358- * You may obtain a copy of the License at
3359- *
3360- * http://www.apache.org/licenses/LICENSE-2.0
3361- *
3362- * Unless required by applicable law or agreed to in writing, software
3363- * distributed under the License is distributed on an "AS IS" BASIS,
3364- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3365- * See the License for the specific language governing permissions and
3366- * limitations under the License.
3367- */
3368-#pragma once
3369-#ifndef ZORBA_STEMMER_WRAPPERS_H
3370-#define ZORBA_STEMMER_WRAPPERS_H
3371-
3372-#include <zorba/config.h>
3373-
3374-#if 0
3375-#ifndef ZORBA_NO_FULL_TEXT
3376-
3377-#include <zorba/stemmer.h>
3378-#include "zorbautils/stemmer.h"
3379-
3380-namespace zorba {
3381-namespace internal {
3382-
3383-///////////////////////////////////////////////////////////////////////////////
3384-
3385-class StemmerWrapper : public Stemmer {
3386-public:
3387- StemmerWrapper( zorba::Stemmer const *api_stemmer );
3388- void stem( zstring const &word, locale::iso639_1::type lang,
3389- zstring *result ) const;
3390-private:
3391- zorba::Stemmer const *const api_stemmer_;
3392-};
3393-
3394-class StemmerProviderWrapper : public StemmerProvider {
3395-public:
3396- StemmerProviderWrapper( zorba::StemmerProvider const *p );
3397- Stemmer const* get_stemmer( locale::iso639_1::type lang ) const;
3398-private:
3399- zorba::StemmerProvider const *const api_stemmer_provider_;
3400-};
3401-
3402-///////////////////////////////////////////////////////////////////////////////
3403-
3404-} // namespace internal
3405-} // namespace zorba
3406-
3407-#endif /* ZORBA_NO_FULL_TEXT */
3408-#endif
3409-#endif /* ZORBA_STEMMER_WRAPPERS_H */
3410-/*
3411- * Local variables:
3412- * mode: c++
3413- * End:
3414- */
3415-/* vim:set et sw=2 ts=2: */
3416
3417=== modified file 'src/context/uri_resolver.cpp'
3418--- src/context/uri_resolver.cpp 2012-04-24 12:39:38 +0000
3419+++ src/context/uri_resolver.cpp 2012-04-26 16:53:22 +0000
3420@@ -117,19 +117,6 @@
3421 {
3422 }
3423
3424-#ifndef ZORBA_NO_FULL_TEXT
3425- ThesaurusEntityData::ThesaurusEntityData(locale::iso639_1::type aLang)
3426- : EntityData(EntityData::THESAURUS),
3427- theLang(aLang)
3428- {
3429- }
3430-
3431- locale::iso639_1::type ThesaurusEntityData::getLanguage() const
3432- {
3433- return theLang;
3434- }
3435-#endif /* ZORBA_NO_FULL_TEXT */
3436-
3437 /*************
3438 * URIMapper is an abstract class, but we have to define its vtbl and
3439 * base destructor somewhere.
3440
3441=== modified file 'src/context/uri_resolver.h'
3442--- src/context/uri_resolver.h 2012-04-24 12:39:38 +0000
3443+++ src/context/uri_resolver.h 2012-04-26 16:53:22 +0000
3444@@ -55,21 +55,21 @@
3445 /**
3446 * @brief Return the URL used to load this Resource.
3447 */
3448- zstring getUrl() { return theUrl; }
3449+ zstring const& getUrl() const { return theUrl; }
3450
3451 virtual ~Resource() = 0;
3452
3453- protected:
3454+protected:
3455
3456 Resource();
3457
3458- private:
3459+private:
3460
3461 /**
3462 * Used by static_context to populate the URL.
3463 */
3464+ void setUrl(zstring const &aUrl) { theUrl = aUrl; }
3465 friend class zorba::static_context;
3466- void setUrl(zstring aUrl) { theUrl = aUrl; }
3467
3468 zstring theUrl;
3469 };
3470@@ -193,25 +193,6 @@
3471 Kind const theKind;
3472 };
3473
3474-#ifndef ZORBA_NO_FULL_TEXT
3475-/**
3476- * @brief The class containing additional data for URIMappers and URLResolvers
3477- * when mapping/resolving a Thesaurus URI.
3478- */
3479-class ThesaurusEntityData : public EntityData
3480-{
3481-public:
3482- ThesaurusEntityData(locale::iso639_1::type aLang);
3483- /**
3484- * @brief Return the language for which a thesaurus is being requested.
3485- */
3486- virtual locale::iso639_1::type getLanguage() const;
3487-
3488-private:
3489- locale::iso639_1::type const theLang;
3490-};
3491-#endif /* ZORBA_NO_FULL_TEXT */
3492-
3493 /**
3494 * @brief Interface for URL resolving.
3495 *
3496
3497=== modified file 'src/diagnostics/assert.cpp'
3498--- src/diagnostics/assert.cpp 2012-04-24 12:39:38 +0000
3499+++ src/diagnostics/assert.cpp 2012-04-26 16:53:22 +0000
3500@@ -68,7 +68,7 @@
3501 file,
3502 line,
3503 zerr::ZXQP0002_ASSERT_FAILED,
3504- ( msg ? ERROR_PARAMS( condition, msg ) : ERROR_PARAMS( condition ))
3505+ ( msg ? ERROR_PARAMS( condition, msg ) : ERROR_PARAMS( condition ) )
3506 );
3507 }
3508
3509
3510=== modified file 'src/diagnostics/assert.h'
3511--- src/diagnostics/assert.h 2012-04-24 12:39:38 +0000
3512+++ src/diagnostics/assert.h 2012-04-26 16:53:22 +0000
3513@@ -20,6 +20,10 @@
3514 #ifndef ZORBA_ASSERT_H
3515 #define ZORBA_ASSERT_H
3516
3517+#include <sstream>
3518+
3519+#include "util/cxx_util.h"
3520+
3521 namespace zorba {
3522
3523 /**
3524@@ -35,7 +39,7 @@
3525 void assertion_failed( char const *condition,
3526 char const *file,
3527 int line,
3528- char const *msg = 0);
3529+ char const *msg = nullptr );
3530
3531 /**
3532 * Zorba version of the standard assert(3) macro.
3533
3534=== modified file 'src/diagnostics/diagnostic_en.xml'
3535--- src/diagnostics/diagnostic_en.xml 2012-04-24 12:39:38 +0000
3536+++ src/diagnostics/diagnostic_en.xml 2012-04-26 16:53:22 +0000
3537@@ -1746,7 +1746,7 @@
3538 <diagnostic code="ZXQP8401" name="THESAURUS_VERSION_MISMATCH"
3539 if="!defined(ZORBA_NO_FULL_TEXT)">
3540 <comment>
3541- The version of the thesaurus is not the expected version.
3542+ The version of the thesaurus is not the expected version.
3543 </comment>
3544 <value>"$1": wrong WordNet file version; should be "$2"</value>
3545 </diagnostic>
3546@@ -1754,19 +1754,39 @@
3547 <diagnostic code="ZXQP8402" name="THESAURUS_ENDIANNESS_MISMATCH"
3548 if="!defined(ZORBA_NO_FULL_TEXT)">
3549 <comment>
3550+ The thesaurus data file's endianness does not match that of the CPU.
3551 </comment>
3552 <value>thesaurus data endianness does not match CPU</value>
3553- The thesaurus data file's endianness does not match that of the CPU.
3554 </diagnostic>
3555
3556 <diagnostic code="ZXQP8403" name="THESAURUS_DATA_ERROR"
3557 if="!defined(ZORBA_NO_FULL_TEXT)">
3558 <comment>
3559- The thesaurus data contains an unexpected value.
3560+ The thesaurus data contains an unexpected value.
3561 </comment>
3562 <value>thesaurus data error${: 1}</value>
3563 </diagnostic>
3564
3565+ <diagnostic code="ZXQP8404" name="STEM_LANG_NOT_SUPPORTED"
3566+ if="!defined(ZORBA_NO_FULL_TEXT)">
3567+ <value>"$1": langauge not supported for stemming</value>
3568+ </diagnostic>
3569+
3570+ <diagnostic code="ZXQP8405" name="STOP_WORDS_LANG_NOT_SUPPORTED"
3571+ if="!defined(ZORBA_NO_FULL_TEXT)">
3572+ <value>"$1": langauge not supported for stop-words</value>
3573+ </diagnostic>
3574+
3575+ <diagnostic code="ZXQP8406" name="THESAURUS_LANG_NOT_SUPPORTED"
3576+ if="!defined(ZORBA_NO_FULL_TEXT)">
3577+ <value>"$1": langauge not supported for thesaurus</value>
3578+ </diagnostic>
3579+
3580+ <diagnostic code="ZXQP8407" name="TOKENIZER_LANG_NOT_SUPPORTED"
3581+ if="!defined(ZORBA_NO_FULL_TEXT)">
3582+ <value>"$1": langauge not supported for tokenizer</value>
3583+ </diagnostic>
3584+
3585 <diagnostic code="ZXQD0001" name="PREFIX_NOT_DECLARED">
3586 <value>"$1": prefix not declared when calling function "$2" from $3</value>
3587 </diagnostic>
3588
3589=== modified file 'src/diagnostics/pregenerated/diagnostic_list.cpp'
3590--- src/diagnostics/pregenerated/diagnostic_list.cpp 2012-04-24 12:39:38 +0000
3591+++ src/diagnostics/pregenerated/diagnostic_list.cpp 2012-04-26 16:53:22 +0000
3592@@ -660,6 +660,18 @@
3593
3594
3595 ZorbaErrorCode ZXQP8403_THESAURUS_DATA_ERROR( "ZXQP8403" );
3596+
3597+
3598+ZorbaErrorCode ZXQP8404_STEM_LANG_NOT_SUPPORTED( "ZXQP8404" );
3599+
3600+
3601+ZorbaErrorCode ZXQP8405_STOP_WORDS_LANG_NOT_SUPPORTED( "ZXQP8405" );
3602+
3603+
3604+ZorbaErrorCode ZXQP8406_THESAURUS_LANG_NOT_SUPPORTED( "ZXQP8406" );
3605+
3606+
3607+ZorbaErrorCode ZXQP8407_TOKENIZER_LANG_NOT_SUPPORTED( "ZXQP8407" );
3608 #endif
3609
3610
3611
3612=== modified file 'src/diagnostics/pregenerated/dict_en.cpp'
3613--- src/diagnostics/pregenerated/dict_en.cpp 2012-04-24 12:39:38 +0000
3614+++ src/diagnostics/pregenerated/dict_en.cpp 2012-04-26 16:53:22 +0000
3615@@ -434,6 +434,18 @@
3616 #if !defined(ZORBA_NO_FULL_TEXT)
3617 { "ZXQP8403", "thesaurus data error${: 1}" },
3618 #endif
3619+#if !defined(ZORBA_NO_FULL_TEXT)
3620+ { "ZXQP8404", "\"$1\": langauge not supported for stemming" },
3621+#endif
3622+#if !defined(ZORBA_NO_FULL_TEXT)
3623+ { "ZXQP8405", "\"$1\": langauge not supported for stop-words" },
3624+#endif
3625+#if !defined(ZORBA_NO_FULL_TEXT)
3626+ { "ZXQP8406", "\"$1\": langauge not supported for thesaurus" },
3627+#endif
3628+#if !defined(ZORBA_NO_FULL_TEXT)
3629+ { "ZXQP8407", "\"$1\": langauge not supported for tokenizer" },
3630+#endif
3631 { "~AllMatchesHasExcludes", "AllMatches contains StringExclude" },
3632 { "~AlreadySpecified", "already specified" },
3633 { "~ArithOpNotDefinedBetween_23", "arithmetic operation not defined between types \"$2\" and \"$3\"" },
3634
3635=== modified file 'src/functions/CMakeLists.txt'
3636--- src/functions/CMakeLists.txt 2012-04-24 12:39:38 +0000
3637+++ src/functions/CMakeLists.txt 2012-04-26 16:53:22 +0000
3638@@ -83,3 +83,7 @@
3639 func_apply.cpp
3640 func_serialize_impl.cpp
3641 )
3642+
3643+IF (NOT ZORBA_NO_FULL_TEXT)
3644+ LIST(APPEND FUNCTIONS_SRCS func_ft_module_impl.cpp)
3645+ENDIF (NOT ZORBA_NO_FULL_TEXT)
3646
3647=== modified file 'src/functions/external_function.cpp'
3648--- src/functions/external_function.cpp 2012-04-24 12:39:38 +0000
3649+++ src/functions/external_function.cpp 2012-04-26 16:53:22 +0000
3650@@ -45,12 +45,12 @@
3651 :
3652 function(sig, FunctionConsts::FN_UNKNOWN),
3653 theLoc(loc),
3654- theModuleSctx(modSctx),
3655 theNamespace(ns),
3656 theScriptingKind(scriptingType),
3657 theImpl(impl)
3658 {
3659 resetFlag(FunctionConsts::isBuiltin);
3660+ theModuleSctx = modSctx;
3661 }
3662
3663
3664@@ -62,7 +62,6 @@
3665 zorba::serialization::serialize_baseclass(ar, (function*)this);
3666
3667 ar & theLoc;
3668- ar & theModuleSctx;
3669 ar & theNamespace;
3670 ar & theScriptingKind;
3671
3672
3673=== modified file 'src/functions/external_function.h'
3674--- src/functions/external_function.h 2012-04-24 12:39:38 +0000
3675+++ src/functions/external_function.h 2012-04-26 16:53:22 +0000
3676@@ -29,7 +29,6 @@
3677
3678 /*******************************************************************************
3679 theLoc : The location of the declaration of this external function.
3680- theModuleContext : The root sctx of the module containing the declaration.
3681 theNamespace : The namespace of the module containing the declaration.
3682 theScriptingKind : Whether the external function is simple, updating, or
3683 sequential (this property is part of the declaration).
3684@@ -40,7 +39,6 @@
3685 {
3686 protected:
3687 QueryLoc theLoc;
3688- static_context * theModuleSctx;
3689 zstring theNamespace;
3690 short theScriptingKind;
3691 ExternalFunction * theImpl;
3692
3693=== added file 'src/functions/func_ft_module_impl.cpp'
3694--- src/functions/func_ft_module_impl.cpp 1970-01-01 00:00:00 +0000
3695+++ src/functions/func_ft_module_impl.cpp 2012-04-26 16:53:22 +0000
3696@@ -0,0 +1,128 @@
3697+/*
3698+ * Copyright 2006-2008 The FLWOR Foundation.
3699+ *
3700+ * Licensed under the Apache License, Version 2.0 (the "License");
3701+ * you may not use this file except in compliance with the License.
3702+ * You may obtain a copy of the License at
3703+ *
3704+ * http://www.apache.org/licenses/LICENSE-2.0
3705+ *
3706+ * Unless required by applicable law or agreed to in writing, software
3707+ * distributed under the License is distributed on an "AS IS" BASIS,
3708+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3709+ * See the License for the specific language governing permissions and
3710+ * limitations under the License.
3711+ */
3712+#include "functions/func_ft_module_impl.h"
3713+
3714+#include "runtime/full_text/ft_module.h"
3715+
3716+#define FT_MODULE_NS "http://www.zorba-xquery.com/modules/full-text"
3717+
3718+namespace zorba
3719+{
3720+
3721+#ifndef ZORBA_NO_FULL_TEXT
3722+
3723+SERIALIZABLE_CLASS_VERSIONS(full_text_tokenize)
3724+
3725+
3726+void full_text_tokenize::serialize(::zorba::serialization::Archiver& ar)
3727+{
3728+ serialize_baseclass(ar, (function*)this);
3729+}
3730+
3731+
3732+PlanIter_t full_text_tokenize::codegen(
3733+ CompilerCB*,
3734+ static_context* sctx,
3735+ const QueryLoc& loc,
3736+ std::vector<PlanIter_t>& argv,
3737+ expr& ann) const
3738+{
3739+ return new TokenizeIterator(theModuleSctx, loc, argv);
3740+}
3741+
3742+
3743+SERIALIZABLE_CLASS_VERSIONS(full_text_tokenizer_properties)
3744+
3745+
3746+void full_text_tokenizer_properties::serialize(::zorba::serialization::Archiver& ar)
3747+{
3748+ serialize_baseclass(ar, (function*)this);
3749+}
3750+
3751+
3752+PlanIter_t full_text_tokenizer_properties::codegen(
3753+ CompilerCB*,
3754+ static_context* sctx,
3755+ const QueryLoc& loc,
3756+ std::vector<PlanIter_t>& argv,
3757+ expr& ann) const
3758+{
3759+ return new TokenizerPropertiesIterator(theModuleSctx, loc, argv);
3760+}
3761+
3762+#endif // ZORBA_NO_FULL_TEXT
3763+
3764+
3765+///////////////////////////////////////////////////////////////////////////////
3766+
3767+void populate_context_ft_module_impl(static_context* sctx)
3768+{
3769+#ifndef ZORBA_NO_FULL_TEXT
3770+
3771+ xqtref_t tokenize_return_type =
3772+ GENV_TYPESYSTEM.create_node_type(store::StoreConsts::elementNode,
3773+ createQName(FT_MODULE_NS, "", "token"),
3774+ NULL,
3775+ TypeConstants::QUANT_STAR,
3776+ false,
3777+ false);
3778+ {
3779+ DECL_WITH_KIND(sctx,
3780+ full_text_tokenize,
3781+ (createQName(FT_MODULE_NS, "", "tokenize"),
3782+ GENV_TYPESYSTEM.ANY_NODE_TYPE_ONE,
3783+ tokenize_return_type),
3784+ FunctionConsts::FULL_TEXT_TOKENIZE_1);
3785+ }
3786+ {
3787+ DECL_WITH_KIND(sctx,
3788+ full_text_tokenize,
3789+ (createQName( FT_MODULE_NS, "", "tokenize"),
3790+ GENV_TYPESYSTEM.ANY_NODE_TYPE_ONE,
3791+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
3792+ tokenize_return_type),
3793+ FunctionConsts::FULL_TEXT_TOKENIZE_2);
3794+ }
3795+
3796+ xqtref_t tokenizer_properties_return_type =
3797+ GENV_TYPESYSTEM.create_node_type(store::StoreConsts::elementNode,
3798+ createQName(FT_MODULE_NS, "", "tokenizer-properties"),
3799+ NULL,
3800+ TypeConstants::QUANT_ONE,
3801+ false,
3802+ false);
3803+ {
3804+ DECL_WITH_KIND(sctx,
3805+ full_text_tokenizer_properties,
3806+ (createQName(FT_MODULE_NS, "", "tokenizer-properties"),
3807+ tokenizer_properties_return_type),
3808+ FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_0);
3809+ }
3810+ {
3811+ DECL_WITH_KIND(sctx,
3812+ full_text_tokenizer_properties,
3813+ (createQName( FT_MODULE_NS, "", "tokenizer-properties"),
3814+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
3815+ tokenizer_properties_return_type),
3816+ FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_1);
3817+ }
3818+#endif // ZORBA_NO_FULL_TEXT
3819+}
3820+
3821+
3822+
3823+} // namespace zorba
3824+/* vim:set et sw=2 ts=2: */
3825
3826=== added file 'src/functions/func_ft_module_impl.h'
3827--- src/functions/func_ft_module_impl.h 1970-01-01 00:00:00 +0000
3828+++ src/functions/func_ft_module_impl.h 2012-04-26 16:53:22 +0000
3829@@ -0,0 +1,81 @@
3830+/*
3831+ * Copyright 2006-2008 The FLWOR Foundation.
3832+ *
3833+ * Licensed under the Apache License, Version 2.0 (the "License");
3834+ * you may not use this file except in compliance with the License.
3835+ * You may obtain a copy of the License at
3836+ *
3837+ * http://www.apache.org/licenses/LICENSE-2.0
3838+ *
3839+ * Unless required by applicable law or agreed to in writing, software
3840+ * distributed under the License is distributed on an "AS IS" BASIS,
3841+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3842+ * See the License for the specific language governing permissions and
3843+ * limitations under the License.
3844+ */
3845+#ifndef ZORBA_FUNCTIONS_FT_MODULE_IMPL_H
3846+#define ZORBA_FUNCTIONS_FT_MODULE_IMPL_H
3847+
3848+#include "stdafx.h"
3849+
3850+#include "functions/function.h"
3851+#include "functions/function_impl.h"
3852+
3853+
3854+namespace zorba
3855+{
3856+
3857+#ifndef ZORBA_NO_FULL_TEXT
3858+
3859+//full-text:tokenize
3860+class full_text_tokenize : public function
3861+{
3862+public:
3863+ SERIALIZABLE_CLASS(full_text_tokenize);
3864+ SERIALIZABLE_CLASS_CONSTRUCTOR2(full_text_tokenize, function)
3865+ void serialize(::zorba::serialization::Archiver& ar);
3866+
3867+public:
3868+ full_text_tokenize(const signature& sig, FunctionConsts::FunctionKind kind)
3869+ :
3870+ function(sig, kind)
3871+ {
3872+
3873+ }
3874+
3875+ CODEGEN_DECL();
3876+};
3877+
3878+
3879+
3880+//full-text:tokenizer-properties
3881+class full_text_tokenizer_properties : public function
3882+{
3883+public:
3884+ SERIALIZABLE_CLASS(full_text_tokenizer_properties);
3885+ SERIALIZABLE_CLASS_CONSTRUCTOR2(full_text_tokenizer_properties, function)
3886+ void serialize(::zorba::serialization::Archiver& ar);
3887+
3888+public:
3889+ full_text_tokenizer_properties(const signature& sig, FunctionConsts::FunctionKind kind)
3890+ :
3891+ function(sig, kind)
3892+ {
3893+
3894+ }
3895+
3896+ // Mark the function as accessing the dyn ctx so that it won't be
3897+ // const-folded. We must prevent const-folding because the function
3898+ // returns a node that is validated with a schema that may not be
3899+ // imported in the module where the function is invoked from.
3900+ bool accessesDynCtx() const { return true; }
3901+
3902+ CODEGEN_DECL();
3903+};
3904+
3905+#endif // ZORBA_NO_FULL_TEXT
3906+
3907+}
3908+
3909+#endif
3910+/* vim:set et sw=2 ts=2: */
3911
3912=== modified file 'src/functions/function.cpp'
3913--- src/functions/function.cpp 2012-04-24 12:39:38 +0000
3914+++ src/functions/function.cpp 2012-04-26 16:53:22 +0000
3915@@ -43,6 +43,7 @@
3916 theSignature(sig),
3917 theKind(kind),
3918 theFlags(0),
3919+ theModuleSctx(NULL),
3920 theXQueryVersion(StaticContextConsts::xquery_version_1_0)
3921 {
3922 setFlag(FunctionConsts::isBuiltin);
3923@@ -70,6 +71,7 @@
3924 SERIALIZE_ENUM(FunctionConsts::FunctionKind, theKind);
3925 ar & theFlags;
3926 ar & theAnnotationList;
3927+ ar & theModuleSctx;
3928 SERIALIZE_ENUM(StaticContextConsts::xquery_version_t, theXQueryVersion);
3929 }
3930
3931@@ -92,6 +94,7 @@
3932 return n == VARIADIC_SIG_SIZE || argv.size() == n;
3933 }
3934
3935+
3936 /*******************************************************************************
3937
3938 ********************************************************************************/
3939
3940=== modified file 'src/functions/function.h'
3941--- src/functions/function.h 2012-04-24 12:39:38 +0000
3942+++ src/functions/function.h 2012-04-26 16:53:22 +0000
3943@@ -42,7 +42,10 @@
3944
3945
3946 /*******************************************************************************
3947-
3948+ theModuleContext:
3949+ -----------------
3950+ The root sctx of the module containing the declaration. It is NULL for
3951+ functions that must be executed in the static context of the caller.
3952 ********************************************************************************/
3953 class function : public SimpleRCObject
3954 {
3955@@ -51,6 +54,7 @@
3956 FunctionConsts::FunctionKind theKind;
3957 uint32_t theFlags;
3958 AnnotationList_t theAnnotationList;
3959+ static_context * theModuleSctx;
3960
3961 StaticContextConsts::xquery_version_t theXQueryVersion;
3962
3963@@ -89,6 +93,10 @@
3964
3965 bool isVariadic() const { return theSignature.isVariadic(); }
3966
3967+ static_context* getStaticContext() const { return theModuleSctx; }
3968+
3969+ void setStaticContext(static_context* sctx) { theModuleSctx = sctx; }
3970+
3971 void setFlag(FunctionConsts::AnnotationFlags flag)
3972 {
3973 theFlags |= flag;
3974
3975=== modified file 'src/functions/function_consts.h'
3976--- src/functions/function_consts.h 2012-04-24 12:39:38 +0000
3977+++ src/functions/function_consts.h 2012-04-26 16:53:22 +0000
3978@@ -225,6 +225,13 @@
3979 OP_HOIST_1,
3980 OP_UNHOIST_1,
3981
3982+#ifndef ZORBA_NO_FULL_TEXT
3983+ FULL_TEXT_TOKENIZER_PROPERTIES_1,
3984+ FULL_TEXT_TOKENIZER_PROPERTIES_0,
3985+ FULL_TEXT_TOKENIZE_2,
3986+ FULL_TEXT_TOKENIZE_1,
3987+#endif
3988+
3989 #include "functions/function_enum.h"
3990
3991 FN_MAX_FUNC
3992
3993=== modified file 'src/functions/library.cpp'
3994--- src/functions/library.cpp 2012-04-24 12:39:38 +0000
3995+++ src/functions/library.cpp 2012-04-26 16:53:22 +0000
3996@@ -68,6 +68,10 @@
3997 #include "functions/func_reflection.h"
3998 #include "functions/func_apply.h"
3999 #include "functions/func_fetch.h"
4000+#ifndef ZORBA_NO_FULL_TEXT
4001+#include "functions/func_ft_module.h"
4002+#include "runtime/full_text/ft_module_impl.h"
4003+#endif /* ZORBA_NO_FULL_TEXT */
4004
4005 #include "functions/func_function_item_iter.h"
4006
4007@@ -144,6 +148,10 @@
4008 populate_context_apply(sctx);
4009
4010 populate_context_fetch(sctx);
4011+#ifndef ZORBA_NO_FULL_TEXT
4012+ populate_context_ft_module(sctx);
4013+ populate_context_ft_module_impl(sctx);
4014+#endif /* ZORBA_NO_FULL_TEXT */
4015
4016 ar.set_loading_hardcoded_objects(false);
4017 }
4018
4019=== added file 'src/functions/pregenerated/func_ft_module.cpp'
4020--- src/functions/pregenerated/func_ft_module.cpp 1970-01-01 00:00:00 +0000
4021+++ src/functions/pregenerated/func_ft_module.cpp 2012-04-26 16:53:22 +0000
4022@@ -0,0 +1,490 @@
4023+/*
4024+ * Copyright 2006-2008 The FLWOR Foundation.
4025+ *
4026+ * Licensed under the Apache License, Version 2.0 (the "License");
4027+ * you may not use this file except in compliance with the License.
4028+ * You may obtain a copy of the License at
4029+ *
4030+ * http://www.apache.org/licenses/LICENSE-2.0
4031+ *
4032+ * Unless required by applicable law or agreed to in writing, software
4033+ * distributed under the License is distributed on an "AS IS" BASIS,
4034+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
4035+ * See the License for the specific language governing permissions and
4036+ * limitations under the License.
4037+ */
4038+
4039+// ******************************************
4040+// * *
4041+// * THIS IS A GENERATED FILE. DO NOT EDIT! *
4042+// * SEE .xml FILE WITH SAME NAME *
4043+// * *
4044+// ******************************************
4045+
4046+
4047+#include "stdafx.h"
4048+#include "runtime/full_text/ft_module.h"
4049+#include "functions/func_ft_module.h"
4050+
4051+
4052+namespace zorba{
4053+
4054+
4055+#ifndef ZORBA_NO_FULL_TEXT
4056+PlanIter_t full_text_current_lang::codegen(
4057+ CompilerCB*,
4058+ static_context* sctx,
4059+ const QueryLoc& loc,
4060+ std::vector<PlanIter_t>& argv,
4061+ expr& ann) const
4062+{
4063+ return new CurrentLangIterator(sctx, loc, argv);
4064+}
4065+
4066+#endif
4067+#ifndef ZORBA_NO_FULL_TEXT
4068+PlanIter_t full_text_host_lang::codegen(
4069+ CompilerCB*,
4070+ static_context* sctx,
4071+ const QueryLoc& loc,
4072+ std::vector<PlanIter_t>& argv,
4073+ expr& ann) const
4074+{
4075+ return new HostLangIterator(sctx, loc, argv);
4076+}
4077+
4078+#endif
4079+#ifndef ZORBA_NO_FULL_TEXT
4080+PlanIter_t full_text_is_stem_lang_supported::codegen(
4081+ CompilerCB*,
4082+ static_context* sctx,
4083+ const QueryLoc& loc,
4084+ std::vector<PlanIter_t>& argv,
4085+ expr& ann) const
4086+{
4087+ return new IsStemLangSupportedIterator(sctx, loc, argv);
4088+}
4089+
4090+#endif
4091+#ifndef ZORBA_NO_FULL_TEXT
4092+PlanIter_t full_text_is_stop_word::codegen(
4093+ CompilerCB*,
4094+ static_context* sctx,
4095+ const QueryLoc& loc,
4096+ std::vector<PlanIter_t>& argv,
4097+ expr& ann) const
4098+{
4099+ return new IsStopWordIterator(sctx, loc, argv);
4100+}
4101+
4102+#endif
4103+#ifndef ZORBA_NO_FULL_TEXT
4104+PlanIter_t full_text_is_stop_word_lang_supported::codegen(
4105+ CompilerCB*,
4106+ static_context* sctx,
4107+ const QueryLoc& loc,
4108+ std::vector<PlanIter_t>& argv,
4109+ expr& ann) const
4110+{
4111+ return new IsStopWordLangSupportedIterator(sctx, loc, argv);
4112+}
4113+
4114+#endif
4115+#ifndef ZORBA_NO_FULL_TEXT
4116+PlanIter_t full_text_is_thesaurus_lang_supported::codegen(
4117+ CompilerCB*,
4118+ static_context* sctx,
4119+ const QueryLoc& loc,
4120+ std::vector<PlanIter_t>& argv,
4121+ expr& ann) const
4122+{
4123+ return new IsThesaurusLangSupportedIterator(sctx, loc, argv);
4124+}
4125+
4126+#endif
4127+#ifndef ZORBA_NO_FULL_TEXT
4128+PlanIter_t full_text_is_tokenizer_lang_supported::codegen(
4129+ CompilerCB*,
4130+ static_context* sctx,
4131+ const QueryLoc& loc,
4132+ std::vector<PlanIter_t>& argv,
4133+ expr& ann) const
4134+{
4135+ return new IsTokenizerLangSupportedIterator(sctx, loc, argv);
4136+}
4137+
4138+#endif
4139+#ifndef ZORBA_NO_FULL_TEXT
4140+PlanIter_t full_text_stem::codegen(
4141+ CompilerCB*,
4142+ static_context* sctx,
4143+ const QueryLoc& loc,
4144+ std::vector<PlanIter_t>& argv,
4145+ expr& ann) const
4146+{
4147+ return new StemIterator(sctx, loc, argv);
4148+}
4149+
4150+#endif
4151+#ifndef ZORBA_NO_FULL_TEXT
4152+PlanIter_t full_text_strip_diacritics::codegen(
4153+ CompilerCB*,
4154+ static_context* sctx,
4155+ const QueryLoc& loc,
4156+ std::vector<PlanIter_t>& argv,
4157+ expr& ann) const
4158+{
4159+ return new StripDiacriticsIterator(sctx, loc, argv);
4160+}
4161+
4162+#endif
4163+#ifndef ZORBA_NO_FULL_TEXT
4164+PlanIter_t full_text_thesaurus_lookup::codegen(
4165+ CompilerCB*,
4166+ static_context* sctx,
4167+ const QueryLoc& loc,
4168+ std::vector<PlanIter_t>& argv,
4169+ expr& ann) const
4170+{
4171+ return new ThesaurusLookupIterator(sctx, loc, argv);
4172+}
4173+
4174+#endif
4175+#ifndef ZORBA_NO_FULL_TEXT
4176+PlanIter_t full_text_tokenize_string::codegen(
4177+ CompilerCB*,
4178+ static_context* sctx,
4179+ const QueryLoc& loc,
4180+ std::vector<PlanIter_t>& argv,
4181+ expr& ann) const
4182+{
4183+ return new TokenizeStringIterator(sctx, loc, argv);
4184+}
4185+
4186+#endif
4187+
4188+void populate_context_ft_module(static_context* sctx)
4189+{
4190+
4191+#ifndef ZORBA_NO_FULL_TEXT
4192+ {
4193+
4194+
4195+ DECL_WITH_KIND(sctx, full_text_current_lang,
4196+ (createQName("http://www.zorba-xquery.com/modules/full-text","","current-lang"),
4197+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE),
4198+ FunctionConsts::FULL_TEXT_CURRENT_LANG_0);
4199+
4200+ }
4201+
4202+
4203+#endif
4204+
4205+
4206+#ifndef ZORBA_NO_FULL_TEXT
4207+ {
4208+
4209+
4210+ DECL_WITH_KIND(sctx, full_text_host_lang,
4211+ (createQName("http://www.zorba-xquery.com/modules/full-text","","host-lang"),
4212+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE),
4213+ FunctionConsts::FULL_TEXT_HOST_LANG_0);
4214+
4215+ }
4216+
4217+
4218+#endif
4219+
4220+
4221+#ifndef ZORBA_NO_FULL_TEXT
4222+ {
4223+
4224+
4225+ DECL_WITH_KIND(sctx, full_text_is_stem_lang_supported,
4226+ (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stem-lang-supported"),
4227+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4228+ GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
4229+ FunctionConsts::FULL_TEXT_IS_STEM_LANG_SUPPORTED_1);
4230+
4231+ }
4232+
4233+
4234+#endif
4235+
4236+
4237+#ifndef ZORBA_NO_FULL_TEXT
4238+ {
4239+
4240+
4241+ DECL_WITH_KIND(sctx, full_text_is_stop_word,
4242+ (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stop-word"),
4243+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4244+ GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
4245+ FunctionConsts::FULL_TEXT_IS_STOP_WORD_1);
4246+
4247+ }
4248+
4249+
4250+#endif
4251+
4252+
4253+#ifndef ZORBA_NO_FULL_TEXT
4254+ {
4255+
4256+
4257+ DECL_WITH_KIND(sctx, full_text_is_stop_word,
4258+ (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stop-word"),
4259+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4260+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4261+ GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
4262+ FunctionConsts::FULL_TEXT_IS_STOP_WORD_2);
4263+
4264+ }
4265+
4266+
4267+#endif
4268+
4269+
4270+#ifndef ZORBA_NO_FULL_TEXT
4271+ {
4272+
4273+
4274+ DECL_WITH_KIND(sctx, full_text_is_stop_word_lang_supported,
4275+ (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stop-word-lang-supported"),
4276+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4277+ GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
4278+ FunctionConsts::FULL_TEXT_IS_STOP_WORD_LANG_SUPPORTED_1);
4279+
4280+ }
4281+
4282+
4283+#endif
4284+
4285+
4286+#ifndef ZORBA_NO_FULL_TEXT
4287+ {
4288+
4289+
4290+ DECL_WITH_KIND(sctx, full_text_is_thesaurus_lang_supported,
4291+ (createQName("http://www.zorba-xquery.com/modules/full-text","","is-thesaurus-lang-supported"),
4292+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4293+ GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
4294+ FunctionConsts::FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_1);
4295+
4296+ }
4297+
4298+
4299+#endif
4300+
4301+
4302+#ifndef ZORBA_NO_FULL_TEXT
4303+ {
4304+
4305+
4306+ DECL_WITH_KIND(sctx, full_text_is_thesaurus_lang_supported,
4307+ (createQName("http://www.zorba-xquery.com/modules/full-text","","is-thesaurus-lang-supported"),
4308+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4309+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4310+ GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
4311+ FunctionConsts::FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_2);
4312+
4313+ }
4314+
4315+
4316+#endif
4317+
4318+
4319+#ifndef ZORBA_NO_FULL_TEXT
4320+ {
4321+
4322+
4323+ DECL_WITH_KIND(sctx, full_text_is_tokenizer_lang_supported,
4324+ (createQName("http://www.zorba-xquery.com/modules/full-text","","is-tokenizer-lang-supported"),
4325+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4326+ GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
4327+ FunctionConsts::FULL_TEXT_IS_TOKENIZER_LANG_SUPPORTED_1);
4328+
4329+ }
4330+
4331+
4332+#endif
4333+
4334+
4335+#ifndef ZORBA_NO_FULL_TEXT
4336+ {
4337+
4338+
4339+ DECL_WITH_KIND(sctx, full_text_stem,
4340+ (createQName("http://www.zorba-xquery.com/modules/full-text","","stem"),
4341+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4342+ GENV_TYPESYSTEM.STRING_TYPE_ONE),
4343+ FunctionConsts::FULL_TEXT_STEM_1);
4344+
4345+ }
4346+
4347+
4348+#endif
4349+
4350+
4351+#ifndef ZORBA_NO_FULL_TEXT
4352+ {
4353+
4354+
4355+ DECL_WITH_KIND(sctx, full_text_stem,
4356+ (createQName("http://www.zorba-xquery.com/modules/full-text","","stem"),
4357+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4358+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4359+ GENV_TYPESYSTEM.STRING_TYPE_ONE),
4360+ FunctionConsts::FULL_TEXT_STEM_2);
4361+
4362+ }
4363+
4364+
4365+#endif
4366+
4367+
4368+#ifndef ZORBA_NO_FULL_TEXT
4369+ {
4370+
4371+
4372+ DECL_WITH_KIND(sctx, full_text_strip_diacritics,
4373+ (createQName("http://www.zorba-xquery.com/modules/full-text","","strip-diacritics"),
4374+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4375+ GENV_TYPESYSTEM.STRING_TYPE_ONE),
4376+ FunctionConsts::FULL_TEXT_STRIP_DIACRITICS_1);
4377+
4378+ }
4379+
4380+
4381+#endif
4382+
4383+
4384+#ifndef ZORBA_NO_FULL_TEXT
4385+ {
4386+
4387+
4388+ DECL_WITH_KIND(sctx, full_text_thesaurus_lookup,
4389+ (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"),
4390+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4391+ GENV_TYPESYSTEM.STRING_TYPE_PLUS),
4392+ FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_1);
4393+
4394+ }
4395+
4396+
4397+#endif
4398+
4399+
4400+#ifndef ZORBA_NO_FULL_TEXT
4401+ {
4402+
4403+
4404+ DECL_WITH_KIND(sctx, full_text_thesaurus_lookup,
4405+ (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"),
4406+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4407+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4408+ GENV_TYPESYSTEM.STRING_TYPE_PLUS),
4409+ FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_2);
4410+
4411+ }
4412+
4413+
4414+#endif
4415+
4416+
4417+#ifndef ZORBA_NO_FULL_TEXT
4418+ {
4419+
4420+
4421+ DECL_WITH_KIND(sctx, full_text_thesaurus_lookup,
4422+ (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"),
4423+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4424+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4425+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4426+ GENV_TYPESYSTEM.STRING_TYPE_PLUS),
4427+ FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_3);
4428+
4429+ }
4430+
4431+
4432+#endif
4433+
4434+
4435+#ifndef ZORBA_NO_FULL_TEXT
4436+ {
4437+
4438+
4439+ DECL_WITH_KIND(sctx, full_text_thesaurus_lookup,
4440+ (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"),
4441+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4442+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4443+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4444+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4445+ GENV_TYPESYSTEM.STRING_TYPE_PLUS),
4446+ FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_4);
4447+
4448+ }
4449+
4450+
4451+#endif
4452+
4453+
4454+#ifndef ZORBA_NO_FULL_TEXT
4455+ {
4456+
4457+
4458+ DECL_WITH_KIND(sctx, full_text_thesaurus_lookup,
4459+ (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"),
4460+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4461+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4462+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4463+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4464+ GENV_TYPESYSTEM.INTEGER_TYPE_ONE,
4465+ GENV_TYPESYSTEM.INTEGER_TYPE_ONE,
4466+ GENV_TYPESYSTEM.STRING_TYPE_PLUS),
4467+ FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_6);
4468+
4469+ }
4470+
4471+
4472+#endif
4473+
4474+
4475+#ifndef ZORBA_NO_FULL_TEXT
4476+ {
4477+
4478+
4479+ DECL_WITH_KIND(sctx, full_text_tokenize_string,
4480+ (createQName("http://www.zorba-xquery.com/modules/full-text","","tokenize-string"),
4481+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4482+ GENV_TYPESYSTEM.STRING_TYPE_STAR),
4483+ FunctionConsts::FULL_TEXT_TOKENIZE_STRING_1);
4484+
4485+ }
4486+
4487+
4488+#endif
4489+
4490+
4491+#ifndef ZORBA_NO_FULL_TEXT
4492+ {
4493+
4494+
4495+ DECL_WITH_KIND(sctx, full_text_tokenize_string,
4496+ (createQName("http://www.zorba-xquery.com/modules/full-text","","tokenize-string"),
4497+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
4498+ GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
4499+ GENV_TYPESYSTEM.STRING_TYPE_STAR),
4500+ FunctionConsts::FULL_TEXT_TOKENIZE_STRING_2);
4501+
4502+ }
4503+
4504+
4505+#endif
4506+}
4507+
4508+
4509+}
4510+
4511+
4512+
4513
4514=== added file 'src/functions/pregenerated/func_ft_module.h'
4515--- src/functions/pregenerated/func_ft_module.h 1970-01-01 00:00:00 +0000
4516+++ src/functions/pregenerated/func_ft_module.h 2012-04-26 16:53:22 +0000
4517@@ -0,0 +1,225 @@
4518+/*
4519+ * Copyright 2006-2008 The FLWOR Foundation.
4520+ *
4521+ * Licensed under the Apache License, Version 2.0 (the "License");
4522+ * you may not use this file except in compliance with the License.
4523+ * You may obtain a copy of the License at
4524+ *
4525+ * http://www.apache.org/licenses/LICENSE-2.0
4526+ *
4527+ * Unless required by applicable law or agreed to in writing, software
4528+ * distributed under the License is distributed on an "AS IS" BASIS,
4529+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
4530+ * See the License for the specific language governing permissions and
4531+ * limitations under the License.
4532+ */
4533+
4534+// ******************************************
4535+// * *
4536+// * THIS IS A GENERATED FILE. DO NOT EDIT! *
4537+// * SEE .xml FILE WITH SAME NAME *
4538+// * *
4539+// ******************************************
4540+
4541+
4542+#ifndef ZORBA_FUNCTIONS_FT_MODULE_H
4543+#define ZORBA_FUNCTIONS_FT_MODULE_H
4544+
4545+
4546+#include "common/shared_types.h"
4547+#include "functions/function_impl.h"
4548+
4549+
4550+namespace zorba {
4551+
4552+
4553+void populate_context_ft_module(static_context* sctx);
4554+
4555+
4556+#ifndef ZORBA_NO_FULL_TEXT
4557+
4558+//full-text:current-lang
4559+class full_text_current_lang : public function
4560+{
4561+public:
4562+ full_text_current_lang(const signature& sig, FunctionConsts::FunctionKind kind)
4563+ :
4564+ function(sig, kind)
4565+ {
4566+
4567+ }
4568+
4569+ CODEGEN_DECL();
4570+};
4571+#endif
4572+#ifndef ZORBA_NO_FULL_TEXT
4573+
4574+//full-text:host-lang
4575+class full_text_host_lang : public function
4576+{
4577+public:
4578+ full_text_host_lang(const signature& sig, FunctionConsts::FunctionKind kind)
4579+ :
4580+ function(sig, kind)
4581+ {
4582+
4583+ }
4584+
4585+ CODEGEN_DECL();
4586+};
4587+#endif
4588+#ifndef ZORBA_NO_FULL_TEXT
4589+
4590+//full-text:is-stem-lang-supported
4591+class full_text_is_stem_lang_supported : public function
4592+{
4593+public:
4594+ full_text_is_stem_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind)
4595+ :
4596+ function(sig, kind)
4597+ {
4598+
4599+ }
4600+
4601+ CODEGEN_DECL();
4602+};
4603+#endif
4604+#ifndef ZORBA_NO_FULL_TEXT
4605+
4606+//full-text:is-stop-word
4607+class full_text_is_stop_word : public function
4608+{
4609+public:
4610+ full_text_is_stop_word(const signature& sig, FunctionConsts::FunctionKind kind)
4611+ :
4612+ function(sig, kind)
4613+ {
4614+
4615+ }
4616+
4617+ CODEGEN_DECL();
4618+};
4619+#endif
4620+#ifndef ZORBA_NO_FULL_TEXT
4621+
4622+//full-text:is-stop-word-lang-supported
4623+class full_text_is_stop_word_lang_supported : public function
4624+{
4625+public:
4626+ full_text_is_stop_word_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind)
4627+ :
4628+ function(sig, kind)
4629+ {
4630+
4631+ }
4632+
4633+ CODEGEN_DECL();
4634+};
4635+#endif
4636+#ifndef ZORBA_NO_FULL_TEXT
4637+
4638+//full-text:is-thesaurus-lang-supported
4639+class full_text_is_thesaurus_lang_supported : public function
4640+{
4641+public:
4642+ full_text_is_thesaurus_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind)
4643+ :
4644+ function(sig, kind)
4645+ {
4646+
4647+ }
4648+
4649+ CODEGEN_DECL();
4650+};
4651+#endif
4652+#ifndef ZORBA_NO_FULL_TEXT
4653+
4654+//full-text:is-tokenizer-lang-supported
4655+class full_text_is_tokenizer_lang_supported : public function
4656+{
4657+public:
4658+ full_text_is_tokenizer_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind)
4659+ :
4660+ function(sig, kind)
4661+ {
4662+
4663+ }
4664+
4665+ CODEGEN_DECL();
4666+};
4667+#endif
4668+#ifndef ZORBA_NO_FULL_TEXT
4669+
4670+//full-text:stem
4671+class full_text_stem : public function
4672+{
4673+public:
4674+ full_text_stem(const signature& sig, FunctionConsts::FunctionKind kind)
4675+ :
4676+ function(sig, kind)
4677+ {
4678+
4679+ }
4680+
4681+ CODEGEN_DECL();
4682+};
4683+#endif
4684+#ifndef ZORBA_NO_FULL_TEXT
4685+
4686+//full-text:strip-diacritics
4687+class full_text_strip_diacritics : public function
4688+{
4689+public:
4690+ full_text_strip_diacritics(const signature& sig, FunctionConsts::FunctionKind kind)
4691+ :
4692+ function(sig, kind)
4693+ {
4694+
4695+ }
4696+
4697+ CODEGEN_DECL();
4698+};
4699+#endif
4700+#ifndef ZORBA_NO_FULL_TEXT
4701+
4702+//full-text:thesaurus-lookup
4703+class full_text_thesaurus_lookup : public function
4704+{
4705+public:
4706+ full_text_thesaurus_lookup(const signature& sig, FunctionConsts::FunctionKind kind)
4707+ :
4708+ function(sig, kind)
4709+ {
4710+
4711+ }
4712+
4713+ CODEGEN_DECL();
4714+};
4715+#endif
4716+#ifndef ZORBA_NO_FULL_TEXT
4717+
4718+//full-text:tokenize-string
4719+class full_text_tokenize_string : public function
4720+{
4721+public:
4722+ full_text_tokenize_string(const signature& sig, FunctionConsts::FunctionKind kind)
4723+ :
4724+ function(sig, kind)
4725+ {
4726+
4727+ }
4728+
4729+ CODEGEN_DECL();
4730+};
4731+#endif
4732+
4733+
4734+} //namespace zorba
4735+
4736+
4737+#endif
4738+/*
4739+ * Local variables:
4740+ * mode: c++
4741+ * End:
4742+ */
4743
4744=== modified file 'src/functions/pregenerated/function_enum.h'
4745--- src/functions/pregenerated/function_enum.h 2012-04-24 12:39:38 +0000
4746+++ src/functions/pregenerated/function_enum.h 2012-04-26 16:53:22 +0000
4747@@ -138,6 +138,25 @@
4748 FN_ZORBA_FETCH_CONTENT_2,
4749 FN_ZORBA_FETCH_CONTENT_TYPE_1,
4750 FN_PUT_2,
4751+ FULL_TEXT_CURRENT_LANG_0,
4752+ FULL_TEXT_HOST_LANG_0,
4753+ FULL_TEXT_IS_STEM_LANG_SUPPORTED_1,
4754+ FULL_TEXT_IS_STOP_WORD_1,
4755+ FULL_TEXT_IS_STOP_WORD_2,
4756+ FULL_TEXT_IS_STOP_WORD_LANG_SUPPORTED_1,
4757+ FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_1,
4758+ FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_2,
4759+ FULL_TEXT_IS_TOKENIZER_LANG_SUPPORTED_1,
4760+ FULL_TEXT_STEM_1,
4761+ FULL_TEXT_STEM_2,
4762+ FULL_TEXT_STRIP_DIACRITICS_1,
4763+ FULL_TEXT_THESAURUS_LOOKUP_1,
4764+ FULL_TEXT_THESAURUS_LOOKUP_2,
4765+ FULL_TEXT_THESAURUS_LOOKUP_3,
4766+ FULL_TEXT_THESAURUS_LOOKUP_4,
4767+ FULL_TEXT_THESAURUS_LOOKUP_6,
4768+ FULL_TEXT_TOKENIZE_STRING_1,
4769+ FULL_TEXT_TOKENIZE_STRING_2,
4770 FN_FUNCTION_NAME_1,
4771 FN_FUNCTION_ARITY_1,
4772 FN_PARTIAL_APPLY_2,
4773
4774=== modified file 'src/runtime/full_text/CMakeLists.txt'
4775--- src/runtime/full_text/CMakeLists.txt 2012-04-24 12:39:38 +0000
4776+++ src/runtime/full_text/CMakeLists.txt 2012-04-26 16:53:22 +0000
4777@@ -13,6 +13,7 @@
4778 # limitations under the License.
4779
4780 SET(FULLTEXT_SRCS
4781+ ft_util.cpp
4782 ft_match.cpp
4783 ft_query_item.cpp
4784 ft_single_token_iterator.cpp
4785@@ -40,6 +41,7 @@
4786 thesaurus.cpp
4787 tokenizer.cpp
4788 default_tokenizer.cpp
4789+ ft_module.cpp
4790 )
4791
4792 IF (ZORBA_NO_ICU)
4793@@ -51,5 +53,5 @@
4794 ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS)
4795
4796 IF (ZORBA_WITH_FILE_ACCESS)
4797- ADD_SRC_SUBFOLDER(FULLTEXT_SRCS thesauri THESAURUS_SRCS)
4798+ ADD_SRC_SUBFOLDER(FULLTEXT_SRCS thesauri THESAURUS_SRCS)
4799 ENDIF (ZORBA_WITH_FILE_ACCESS)
4800
4801=== modified file 'src/runtime/full_text/apply.cpp'
4802--- src/runtime/full_text/apply.cpp 2012-04-24 12:39:38 +0000
4803+++ src/runtime/full_text/apply.cpp 2012-04-26 16:53:22 +0000
4804@@ -26,13 +26,14 @@
4805 #include "diagnostics/dict.h"
4806 #include "diagnostics/xquery_diagnostics.h"
4807 #include "store/api/item.h"
4808+#include "store/api/item_factory.h"
4809 #include "store/api/store.h"
4810-#include "store/api/item_factory.h"
4811 #include "system/globalenv.h"
4812 #include "util/cxx_util.h"
4813 #include "util/indent.h"
4814 #include "util/stl_util.h"
4815 #include "zorbamisc/ns_consts.h"
4816+#include "zorbautils/locale.h"
4817
4818 #ifndef NDEBUG
4819 # include "system/properties.h"
4820@@ -1184,11 +1185,10 @@
4821 {
4822 }
4823
4824- void operator()( char const *utf8_s, size_type utf8_len, size_type,
4825- size_type, size_type, void* ) {
4826- FTToken const t( utf8_s, (int)utf8_len, token_no_, lang_ );
4827- tokens_.push_back( t );
4828- }
4829+ // inherited
4830+ void item( Item const&, bool );
4831+ void token( char const*, size_type, iso639_1::type, size_type, size_type,
4832+ size_type, Item const* );
4833
4834 private:
4835 FTTokenSeqIterator::FTTokens &tokens_;
4836@@ -1196,51 +1196,72 @@
4837 iso639_1::type const lang_;
4838 };
4839
4840+void thesaurus_callback::item( Item const&, bool ) {
4841+ // out-of-line since it's virtual
4842+}
4843+
4844+void thesaurus_callback::token( char const *utf8_s, size_type utf8_len,
4845+ iso639_1::type, size_type, size_type,
4846+ size_type, Item const* ) {
4847+ FTToken const t( utf8_s, (int)utf8_len, token_no_, lang_ );
4848+ tokens_.push_back( t );
4849+}
4850+
4851 } // anonymous namespace
4852
4853 void ftcontains_visitor::
4854-lookup_thesaurus( ftthesaurus_id const &tid, zstring const &query_phrase,
4855+lookup_thesaurus( ftthesaurus_id const &t_id, zstring const &query_phrase,
4856 FTToken const &qt0, query_item_star_t &result ) {
4857 ft_int at_least, at_most;
4858- if ( ftrange const *const levels = tid.get_levels() )
4859+ if ( ftrange const *const levels = t_id.get_levels() )
4860 eval_ftrange( *levels, &at_least, &at_most );
4861 else
4862 at_least = 0, at_most = numeric_limits<ft_int>::max();
4863
4864- zstring const &uri = tid.get_uri();
4865+ zstring const &uri = t_id.get_uri();
4866
4867 zstring error_msg;
4868 auto_ptr<internal::Resource> rsrc = static_ctx_.resolve_uri(
4869- uri, internal::ThesaurusEntityData( qt0.lang() ), error_msg
4870+ uri, internal::EntityData::THESAURUS, error_msg
4871 );
4872 if ( !rsrc.get() )
4873 throw XQUERY_EXCEPTION( err::FTST0018, ERROR_PARAMS( uri ) );
4874
4875- internal::Thesaurus::ptr thesaurus(
4876- dynamic_cast<internal::Thesaurus*>( rsrc.release() )
4877- );
4878- if ( !thesaurus )
4879- throw XQUERY_EXCEPTION( err::FTST0018, ERROR_PARAMS( uri ) );
4880-
4881- internal::Thesaurus::iterator::ptr tresult(
4882+ internal::ThesaurusProvider const *const t_provider =
4883+ dynamic_cast<internal::ThesaurusProvider const*>( rsrc.get() );
4884+ ZORBA_ASSERT( t_provider );
4885+
4886+ internal::Thesaurus::ptr thesaurus;
4887+ if ( !t_provider->getThesaurus( qt0.lang(), &thesaurus ) )
4888+ throw XQUERY_EXCEPTION(
4889+ zerr::ZXQP8406_THESAURUS_LANG_NOT_SUPPORTED,
4890+ ERROR_PARAMS( iso639_1::string_of[ qt0.lang() ] )
4891+ );
4892+
4893+ internal::Thesaurus::iterator::ptr t_synonyms(
4894 thesaurus->lookup(
4895- query_phrase, tid.get_relationship(), at_least, at_most
4896+ query_phrase, t_id.get_relationship(), at_least, at_most
4897 )
4898 );
4899- if ( !tresult )
4900+ if ( !t_synonyms )
4901 return;
4902
4903 FTTokenSeqIterator::FTTokens synonyms;
4904 thesaurus_callback cb( qt0.pos(), qt0.lang(), synonyms );
4905
4906- Tokenizer::Numbers tno;
4907- Tokenizer::ptr tokenizer(
4908- GENV_STORE.getTokenizerProvider()->getTokenizer( qt0.lang(), tno )
4909- );
4910+ Tokenizer::Numbers t_num;
4911+ TokenizerProvider const *const provider = GENV_STORE.getTokenizerProvider();
4912+ ZORBA_ASSERT( provider );
4913+ Tokenizer::ptr tokenizer;
4914+ if ( !provider->getTokenizer( qt0.lang(), &t_num, &tokenizer ) )
4915+ throw XQUERY_EXCEPTION(
4916+ zerr::ZXQP8407_TOKENIZER_LANG_NOT_SUPPORTED,
4917+ ERROR_PARAMS( iso639_1::string_of[ qt0.lang() ] )
4918+ );
4919
4920- for ( zstring synonym; tresult->next( &synonym ); ) {
4921+ for ( zstring synonym; t_synonyms->next( &synonym ); ) {
4922 synonyms.clear();
4923- tokenizer->tokenize(
4924+ tokenizer->tokenize_string(
4925 synonym.data(), synonym.size(), qt0.lang(), false, cb
4926 );
4927 query_item_t const query_item( new FTTokenSeqIterator( synonyms ) );
4928
4929=== added file 'src/runtime/full_text/ft_module_impl.cpp'
4930--- src/runtime/full_text/ft_module_impl.cpp 1970-01-01 00:00:00 +0000
4931+++ src/runtime/full_text/ft_module_impl.cpp 2012-04-26 16:53:22 +0000
4932@@ -0,0 +1,843 @@
4933+/*
4934+ * Copyright 2006-2008 The FLWOR Foundation.
4935+ *
4936+ * Licensed under the Apache License, Version 2.0 (the "License");
4937+ * you may not use this file except in compliance with the License.
4938+ * You may obtain a copy of the License at
4939+ *
4940+ * http://www.apache.org/licenses/LICENSE-2.0
4941+ *
4942+ * Unless required by applicable law or agreed to in writing, software
4943+ * distributed under the License is distributed on an "AS IS" BASIS,
4944+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
4945+ * See the License for the specific language governing permissions and
4946+ * limitations under the License.
4947+ */
4948+
4949+#include <zorba/config.h>
4950+
4951+#ifndef ZORBA_NO_FULL_TEXT
4952+
4953+# include <limits>
4954+# include <typeinfo>
4955+
4956+# include <zorba/diagnostic_list.h>
4957+
4958+# include "api/unmarshaller.h"
4959+# include "context/namespace_context.h"
4960+# include "context/static_context.h"
4961+# include "diagnostics/assert.h"
4962+# include "diagnostics/xquery_diagnostics.h"
4963+# include "store/api/index.h"
4964+# include "store/api/item.h"
4965+# include "store/api/item_factory.h"
4966+# include "store/api/iterator.h"
4967+# include "store/api/store.h"
4968+# include "system/globalenv.h"
4969+# include "types/casting.h"
4970+# include "types/typeimpl.h"
4971+# include "types/typeops.h"
4972+# include "util/utf8_util.h"
4973+# include "zorbatypes/URI.h"
4974+# include "zorbautils/locale.h"
4975+
4976+# include "ft_stop_words_set.h"
4977+# include "ft_token_seq_iterator.h"
4978+# include "ft_util.h"
4979+# include "thesaurus.h"
4980+
4981+#endif /* ZORBA_NO_FULL_TEXT */
4982+
4983+#include "runtime/full_text/ft_module.h"
4984+
4985+using namespace std;
4986+using namespace zorba::locale;
4987+
4988+namespace zorba {
4989+
4990+///////////////////////////////////////////////////////////////////////////////
4991+
4992+#ifndef ZORBA_NO_FULL_TEXT
4993+inline iso639_1::type get_lang_from( static_context const *sctx ) {
4994+ iso639_1::type const lang = get_lang_from( sctx->get_match_options() );
4995+ return lang ? lang : get_host_lang();
4996+}
4997+
4998+static iso639_1::type get_lang_from( store::Item_t lang_item,
4999+ QueryLoc const &loc ) {
5000+ zstring lang_string;
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches