Merge lp:~zorba-coders/zorba/feature-ft_module into lp:zorba

Proposed by Paul J. Lucas
Status: Superseded
Proposed branch: lp:~zorba-coders/zorba/feature-ft_module
Merge into: lp:zorba
Diff against target: 12663 lines (+7591/-1375)
242 files modified
ChangeLog (+3/-0)
cmake_modules/FindICU.cmake (+2/-0)
doc/zorba/ft_intro.dox (+8/-8)
doc/zorba/ft_stemmer.dox (+25/-7)
doc/zorba/ft_thesaurus.dox (+134/-88)
doc/zorba/ft_tokenizer.dox (+160/-61)
include/zorba/locale.h (+189/-15)
include/zorba/pregenerated/diagnostic_list.h (+8/-0)
include/zorba/stemmer.h (+23/-4)
include/zorba/thesaurus.h (+36/-22)
include/zorba/tokenizer.h (+136/-56)
include/zorba/uri_resolvers.h (+4/-3)
modules/com/zorba-xquery/www/modules/CMakeLists.txt (+7/-0)
modules/com/zorba-xquery/www/modules/full-text.xq (+872/-0)
modules/com/zorba-xquery/www/modules/full-text.xsd (+134/-0)
modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp (+1/-1)
modules/com/zorba-xquery/www/modules/pregenerated/errors.xq (+17/-0)
modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq (+1/-3)
scripts/zt-wn-get (+3/-3)
src/api/CMakeLists.txt (+3/-2)
src/api/staticcontextimpl.cpp (+2/-3)
src/api/stemmer_wrappers.cpp (+21/-12)
src/api/stemmer_wrappers.h (+2/-1)
src/api/thesaurus.cpp (+5/-3)
src/api/thesaurus_wrappers.cpp (+21/-0)
src/api/thesaurus_wrappers.h (+12/-0)
src/api/uri_resolver_wrappers.cpp (+15/-33)
src/api/xmldatamanagerimpl.cpp (+1/-1)
src/api/xmldatamanagerimpl.h (+1/-1)
src/compiler/codegen/plan_visitor.cpp (+1/-1)
src/compiler/expression/expr_put.cpp (+1/-0)
src/compiler/translator/translator.cpp (+44/-10)
src/context/CMakeLists.txt (+0/-5)
src/context/default_url_resolvers.cpp (+19/-5)
src/context/static_context.cpp (+12/-4)
src/context/static_context.h (+3/-0)
src/context/stemmer_wrappers.cpp (+0/-74)
src/context/stemmer_wrappers.h (+0/-63)
src/context/uri_resolver.cpp (+0/-13)
src/context/uri_resolver.h (+4/-23)
src/diagnostics/assert.cpp (+1/-1)
src/diagnostics/assert.h (+5/-1)
src/diagnostics/diagnostic_en.xml (+23/-3)
src/diagnostics/pregenerated/diagnostic_list.cpp (+12/-0)
src/diagnostics/pregenerated/dict_en.cpp (+12/-0)
src/functions/CMakeLists.txt (+4/-0)
src/functions/external_function.cpp (+1/-2)
src/functions/func_ft_module_impl.cpp (+110/-0)
src/functions/function.cpp (+3/-0)
src/functions/function.h (+9/-1)
src/functions/library.cpp (+8/-0)
src/functions/pregenerated/func_ft_module.cpp (+496/-0)
src/functions/pregenerated/func_ft_module.h (+259/-0)
src/functions/pregenerated/function_enum.h (+23/-0)
src/runtime/full_text/CMakeLists.txt (+3/-1)
src/runtime/full_text/apply.cpp (+46/-25)
src/runtime/full_text/ft_module_impl.cpp (+843/-0)
src/runtime/full_text/ft_module_impl.h (+32/-0)
src/runtime/full_text/ft_query_item.h (+2/-1)
src/runtime/full_text/ft_single_token_iterator.h (+0/-2)
src/runtime/full_text/ft_stop_words_set.cpp (+32/-27)
src/runtime/full_text/ft_stop_words_set.h (+29/-14)
src/runtime/full_text/ft_token_matcher.cpp (+7/-7)
src/runtime/full_text/ft_token_matcher.h (+1/-1)
src/runtime/full_text/ft_token_seq_iterator.cpp (+7/-3)
src/runtime/full_text/ft_token_seq_iterator.h (+3/-0)
src/runtime/full_text/ft_token_span.h (+2/-2)
src/runtime/full_text/ft_util.cpp (+42/-0)
src/runtime/full_text/ft_util.h (+12/-1)
src/runtime/full_text/ftcontains_visitor.cpp (+0/-10)
src/runtime/full_text/full_text.h (+1/-1)
src/runtime/full_text/icu_tokenizer.cpp (+43/-14)
src/runtime/full_text/icu_tokenizer.h (+7/-5)
src/runtime/full_text/latin_tokenizer.cpp (+31/-13)
src/runtime/full_text/latin_tokenizer.h (+7/-5)
src/runtime/full_text/pregenerated/ft_module.cpp (+362/-0)
src/runtime/full_text/pregenerated/ft_module.h (+561/-0)
src/runtime/full_text/stemmer.cpp (+8/-2)
src/runtime/full_text/stemmer.h (+24/-5)
src/runtime/full_text/stemmer/sb_stemmer.cpp (+20/-13)
src/runtime/full_text/stemmer/sb_stemmer.h (+1/-0)
src/runtime/full_text/thesauri/wn_thesaurus.cpp (+79/-9)
src/runtime/full_text/thesauri/wn_thesaurus.h (+29/-5)
src/runtime/full_text/thesauri/xqftts_thesaurus.cpp (+28/-3)
src/runtime/full_text/thesauri/xqftts_thesaurus.h (+28/-4)
src/runtime/full_text/thesaurus.cpp (+34/-50)
src/runtime/full_text/thesaurus.h (+30/-4)
src/runtime/full_text/tokenizer.cpp (+86/-8)
src/runtime/spec/codegen-cpp.xq (+13/-5)
src/runtime/spec/codegen-h.xq (+1/-1)
src/runtime/spec/full_text/ft_module.xml (+247/-0)
src/runtime/spec/mappings.xml (+14/-2)
src/runtime/visitors/pregenerated/planiter_visitor.h (+91/-0)
src/runtime/visitors/pregenerated/printer_visitor.cpp (+196/-0)
src/runtime/visitors/pregenerated/printer_visitor.h (+65/-0)
src/store/naive/atomic_items.cpp (+10/-10)
src/store/naive/atomic_items.h (+8/-21)
src/store/naive/node_items.cpp (+39/-126)
src/store/naive/node_items.h (+17/-65)
src/unit_tests/stemmer.cpp (+11/-7)
src/unit_tests/string.cpp (+16/-0)
src/unit_tests/thesaurus.cpp (+52/-25)
src/unit_tests/tokenizer.cpp (+47/-28)
src/util/fs_util.h (+3/-0)
src/util/unicode_util.cpp (+14/-0)
src/util/unicode_util.h (+12/-0)
src/util/uri_util.h (+6/-6)
src/util/utf8_util.h (+2/-1)
src/util/utf8_util.tcc (+17/-10)
src/zorbatypes/ft_token.cpp (+1/-1)
src/zorbatypes/ft_token.h (+1/-1)
src/zorbatypes/numconversions.cpp (+18/-7)
src/zorbautils/locale.cpp (+385/-6)
src/zorbautils/locale.h (+375/-271)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-current-lang-true-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-da-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-de-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-en-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-es-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-fi-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-hu-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-it-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-nl-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-no-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-pt-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-ru-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-supported-false-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-supported-false-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stem-lang-sv-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-false-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-da-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-de-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-en-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-es-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-fi-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-fr-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-hu-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-it-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-nl-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-no-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-pt-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-lang-sv-supported-true.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-stop-word-true-4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-stem-4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-strip-diacritics-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-thesaurus-lookup-5.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-3.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-4.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-string-1.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-string-2.xml.res (+1/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenizer-properties-1.xml.res (+112/-0)
test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenizer-properties-2.xml.res (+112/-0)
test/rbkt/Queries/CMakeLists.txt (+14/-4)
test/rbkt/Queries/zorba/fulltext/ft-module-current-lang-true-1.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-da-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-de-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-en-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-es-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-fi-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-hu-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-it-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-nl-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-no-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-pt-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-ru-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-supported-false-1.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-supported-false-2.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-sv-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-false-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-da-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-de-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-en-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-es-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-fi-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-fr-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-hu-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-it-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-nl-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-no-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-pt-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-1.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-2.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-sv-supported-true.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-2.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-3.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-4.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-1.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-2.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-3.spec (+1/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-false-3.xq (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-2.spec (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-2.xq (+6/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-stem-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-stem-2.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-stem-3.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-stem-4.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-strip-diacritics-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-1.xq (+6/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-2.spec (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-2.xq (+6/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-3.spec (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-3.xq (+7/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-4.spec (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-4.xq (+7/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-5.spec (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-5.xq (+8/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq (+18/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq (+18/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq (+10/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq (+10/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-1.xq (+8/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-2.xq (+10/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenizer-properties-1.xq (+3/-0)
test/rbkt/Queries/zorba/fulltext/ft-module-tokenizer-properties-2.xq (+5/-0)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-FOCA0003-1.spec (+4/-0)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-FOCA0003-1.xq (+10/-0)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-1.spec (+0/-3)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-2.spec (+0/-3)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-3.spec (+1/-1)
test/rbkt/Queries/zorba/fulltext/ft-thesaurus-true-4.spec (+1/-1)
test/rbkt/Scripts/w3c/import_w3c_full_text_testsuite.sh (+1/-1)
test/rbkt/testdriver.cpp (+1/-1)
To merge this branch: bzr merge lp:~zorba-coders/zorba/feature-ft_module
Reviewer Review Type Date Requested Status
Markos Zaharioudakis Pending
Matthias Brantner Pending
Review via email: mp+103378@code.launchpad.net

This proposal supersedes a proposal from 2012-04-21.

This proposal has been superseded by a proposal from 2012-04-24.

Commit message

1. Added a new full-text module.
2. Fixed semi-broken Thesaurus API.
3. Now supporting many more languages for tokenization including Chinese.
4. Many other full-text improvements.

Description of the change

1. Added a new full-text module.
2. Fixed semi-broken Thesaurus API.
3. Now supporting many more languages for tokenization including Chinese.
4. Many other full-text improvements.

To post a comment you must log in.
Revision history for this message
Matthias Brantner (matthias-brantner) wrote : Posted in a previous version of this proposal

What's the change in modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp supposed to do? Isn't this introducing a memory leak?

review: Needs Information
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Paul J. Lucas (paul-lucas) wrote : Posted in a previous version of this proposal

The Remote Queue seems to have gotten itself into an infinite loop. I've notified Chris. PLEASE DO NOT SET THE PROPOSAL TO "APPROVED" TO START THE REMOTE QUEUE UNTIL THIS IS FIXED.

@Matthias: The "theSerStream" change was do fix a crash in the module. No, I don't remember why it's fixed in this branch. I think the change is OK as it is because theSerStream is deleted via cleanUpBody() that's called from endBody(). However, I changed the code to set theSerStream to null in cleanUpBody() and put the delete back in the destructor. (Deleting a null pointer is guaranteed to be harmless in C++.)

Revision history for this message
Chris Hillery (ceejatec) wrote : Posted in a previous version of this proposal

What seems to be happening with the queue is that the build/test is timing out after 40 minutes, which unfortunately doesn't log appropriately. I have increased the timeout to 60 minutes and we'll see if it runs. But before this gets merged, we should look and see if we can figure out why it's taking longer than normal to run with this build.

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : Posted in a previous version of this proposal
Revision history for this message
Paul J. Lucas (paul-lucas) wrote : Posted in a previous version of this proposal

On Apr 21, 2012, at 10:44 PM, Chris Hillery wrote:

> What seems to be happening with the queue is that the build/test is timing out after 40 minutes, which unfortunately doesn't log appropriately. I have increased the timeout to 60 minutes and we'll see if it runs. But before this gets merged, we should look and see if we can figure out why it's taking longer than normal to run with this build.

FYI: On my machine, it took 73 minutes. The trunk build took 63 minutes.

- Paul

Revision history for this message
Chris Hillery (ceejatec) wrote : Posted in a previous version of this proposal

Other branches have been ok on the RQ machine with a 40-minute timeout. Why
does this branch take so much longer? If it added a great many slow tests
then that's probably ok, but if something is causing everything to go
slower than that's a problem. Maybe ask on zorba-dev if it would be
possible to run the pdash tests on this branch?
On Apr 22, 2012 3:31 PM, "Paul J. Lucas" <email address hidden> wrote:

> On Apr 21, 2012, at 10:44 PM, Chris Hillery wrote:
>
> > What seems to be happening with the queue is that the build/test is
> timing out after 40 minutes, which unfortunately doesn't log appropriately.
> I have increased the timeout to 60 minutes and we'll see if it runs. But
> before this gets merged, we should look and see if we can figure out why
> it's taking longer than normal to run with this build.
>
> FYI: On my machine, it took 73 minutes. The trunk build took 63 minutes.
>
> - Paul
>
>
> --
>
> https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/102972
> Your team Zorba Coders is subscribed to branch lp:zorba.
>
> --
> Mailing list: https://launchpad.net/~zorba-coders
> Post to : <email address hidden>
> Unsubscribe : https://launchpad.net/~zorba-coders
> More help : https://help.launchpad.net/ListHelp
>

Revision history for this message
Paul J. Lucas (paul-lucas) wrote :

I diff'd the output times between the trunk and this branch and nothing jumps out at being significantly longer. (Some tests actually run in less time.) My latest running of the test suite on my machine took 69 minutes.

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

There are additional revisions which have not been approved in review. Please seek review and approval of these new revisions.

10796. By Sorin Marian Nasoi <email address hidden>

Fix for lp:967284. Approved: Nicolae Brinza, Sorin Marian Nasoi

10797. By Ghislain Fourny

Fixing case where accessing an empty vector with position 0 in base64binary (causes assertion on Windows to be hit). Approved: Markos Zaharioudakis, Matthias Brantner

10798. By Markos Zaharioudakis

put back --compile-plan and --execute-plan options Approved: Markos Zaharioudakis

10799. By Juan Zacarias

Solves bug #928135
Segmentation fault on python Approved: Rodolfo Ochoa, Matthias Brantner

10800. By Matthias Brantner

fix for bug #988412 (date:current-dateTime daylight saving) Approved: William Candillon, Matthias Brantner

10801. By Paul J. Lucas

Better implementation of listing files in a directory. Approved: Rodolfo Ochoa, Cezar Andrei

10802. By Chris Hillery

Add zorba::Item::getTypeCode(), and make corresponding SchemaTypeCode enum part of the public API. Add zorba::ItemFactory::assignElementTypedValue() to allow construction of elements with typed values in the public API. As discussed, this API is not the prettiest, but it is the least intrusive while being the easiest for end-users to make use of. Approved: Markos Zaharioudakis, Chris Hillery, Matthias Brantner

10803. By Ghislain Fourny

Added an instruction to sort chained tests for deterministic order. Approved: Matthias Brantner, Markos Zaharioudakis

10804. By Paul J. Lucas

1. Added a new full-text module.
2. Fixed semi-broken Thesaurus API.
3. Now supporting many more languages for tokenization including Chinese.
4. Many other full-text improvements. Approved: Chris Hillery, Paul J. Lucas

10805. By Nicolae Brinza

Fixes for bugs #931501 and #866987 -- improved error messages for fn:format-number() Approved: Nicolae Brinza, Matthias Brantner

10806. By Rodolfo Ochoa

Collection Manager and Document Manager is ready on XQJ API. Approved: Cezar Andrei, Matthias Brantner

10807. By Chris Hillery

Fix remote queue failures due to change in 28msec.com. Approved: Till Westmann, Chris Hillery

10808. By Cezar Andrei <email address hidden>

Fix for Bug #857842 Assertion failed with simple content element with comments
Make use of XML_SCHEMA_NS definition from ns_consts.h. Approved: Matthias Brantner, Cezar Andrei

10809. By Sorin Marian Nasoi <email address hidden>

fix for lp:987830. Approved: Sorin Marian Nasoi, Markos Zaharioudakis

10810. By Sorin Marian Nasoi <email address hidden>

Added the STACK and QUEUE modules. Approved: Matthias Brantner, Sorin Marian Nasoi

10811. By Chris Hillery

Added note about re-running CMake if you add or remove .cpp files in your module source directory.
 Approved: Juan Zacarias, Chris Hillery

10812. By Nicolae Brinza

Added XQuery 3.0 Functions
path, has-children#0, nilled#0 Approved: Juan Zacarias, Matthias Brantner

10813. By Chris Hillery

Massively refactor, reformat, correct, and clean up Zorba's Build doc. Approved: William Candillon, Chris Hillery

10814. By Markos Zaharioudakis

1. Fixed memory leak in case of index truncation
2. Fixed bug in mergeUpdates() method
 Approved: Markos Zaharioudakis

10815. By Nicolae Brinza

Small optimization in the serializer to avoid a repeated string comparison Approved: Nicolae Brinza, David Graf

10816. By Markos Zaharioudakis

Fixed bug in MarkNodeCopyProps rule (static cast to replace_expr without chaing the expr kind first) Approved: Markos Zaharioudakis

10817. By Markos Zaharioudakis

fixed bug #966706 (key uniqueness of value equality index not enforced) Approved: Markos Zaharioudakis, Till Westmann, Matthias Brantner

10818. By Markos Zaharioudakis

Fixed bug #862971 (no error upon duplicate function declarations) Approved: Markos Zaharioudakis

10819. By Paul J. Lucas

Fixed warnings and build error. Approved: David Graf, Paul J. Lucas

10820. By David Graf

fixing windows build Approved: Ghislain Fourny, David Graf

10821. By Till Westmann

enable blocking of internal modules by running through URI mapping (but not through URL resolution) during translation Approved: Matthias Brantner, Markos Zaharioudakis, Chris Hillery

10822. By Markos Zaharioudakis

fixed memory leak in population of value index Approved: Markos Zaharioudakis

10823. By Paul J. Lucas

Fixed typos. Approved: Matthias Brantner, Paul J. Lucas

10824. By Matthias Brantner

optimized ft:tokenize (no validation of tokens + factorized creation of qnames) Approved: Paul J. Lucas, Matthias Brantner

10825. By Chris Hillery

Removed note about JSONiq (not in 2.5 after all). Approved: Matthias Brantner, Chris Hillery

10826. By Markos Zaharioudakis

fixed bug 867170$ Approved: Markos Zaharioudakis

10827. By Ghislain Fourny

Fixed bug 978254 (QName comparison in item sequence chainer ignored namespaces and took prefixes into account). Approved: Markos Zaharioudakis, Matthias Brantner

10828. By Paul J. Lucas

Fixes the stop-words core dump on 64-bit Linux. Approved: Matthias Brantner, Paul J. Lucas

10829. By Paul J. Lucas

1. Added fix for [\s] -- should now always throw FORX0002.
2. I think all regex tests "pass" using both pre- and post-4.0 of ICU. Approved: Ghislain Fourny, Paul J. Lucas

10830. By Matthias Brantner

no node copying during insertion into collection if the nodes are freshly constructed nodes Approved: Markos Zaharioudakis, Matthias Brantner

10831. By Matthias Brantner

- Add the ability to create a StreamableStringItem that re-uses the stream from another Streamable*Item (in a memory-ownership-safe way). Fixed bug #996084 (crash in Streamable*Item with file module)

- Add a base64:decode#2 function that also does transcoding to utf-8 Approved: Chris Hillery, Dennis Knochenwefel, William Candillon, Matthias Brantner

10832. By Paul J. Lucas

Added current-compare-options() function. Approved: Matthias Brantner, Paul J. Lucas

10833. By Matthias Brantner

note in the java documentation that it might be deprecated in favor of xqj Approved: Matthias Brantner, Rodolfo Ochoa

10834. By Paul J. Lucas

1. In transcoding streambufs, throwing std::invalid_argument for empty charsets.
2. In the HTTP code, now setting the charset to ISO-8859-1 in the constructor so it's set even when there's no Content-Type header. Approved: Dennis Knochenwefel, Paul J. Lucas

10835. By Paul J. Lucas

Documentation tweaks. Approved: Matthias Brantner, Paul J. Lucas

10836. By Nicolae Brinza

Documentation fixes. Approved: Matthias Brantner, Nicolae Brinza

10837. By Paul J. Lucas

Added link to full-text module.
Note that I have no way to test it. Approved: Matthias Brantner, Paul J. Lucas

10838. By Paul J. Lucas

Added check to see that JsonML (array form) really starts with '['. Approved: William Candillon, Paul J. Lucas

10839. By Sorin Marian Nasoi <email address hidden>

Updated the import scripts to use the W3C testsuites from 04-May-2012. Approved: Chris Hillery, Sorin Marian Nasoi

10840. By Rodolfo Ochoa

Complete XQJ Documentation Approved: Matthias Brantner, Cezar Andrei

10841. By Rodolfo Ochoa

BaseURI can now be cleared through a method.
When BaseUri is undefined it returns an empty string instead of asserting.
Fixed some compilation warnings to have a cleaner compiling.
Added #define stdafx.h to some files to fix the precompiled headers on Windows. Approved: Matthias Brantner, Chris Hillery

10842. By Matthias Brantner

use an instead of ann prefix in the documentation Approved: Matthias Brantner, Chris Hillery

10843. By Matthias Brantner

- no undo is done for collection truncate
- fix for bug #986377 "do not apply any updates on collection if it is to be truncated" Approved: Markos Zaharioudakis, Matthias Brantner

10844. By Nicolae Brinza

Documentation improvements. Fixed the type of the options parameter to the parse-fragment() function. Approved: Nicolae Brinza, Matthias Brantner

10845. By Paul J. Lucas

Renamed Tokenizer::Numbers to Tokenizer::State now (just prior to the 2.5 release) to give it a better name for the forthcoming addition of the ability to tokenize using include/exclude Item lists. At that time, State will most likely be expanded to include additional state information beyond just numbers, hence the name change.

(In the previous proposal, I had forgotten to update the documentation -- now done.) Approved: Matthias Brantner, Paul J. Lucas

10846. By Ghislain Fourny

Allowing general tree IDs (containing hexadecimal digits and dash) in structural URIs (test). Approved: Markos Zaharioudakis, Matthias Brantner

10847. By Matthias Brantner

replaced occurrences of XQuery version 1.1 with 3.0 Approved: Chris Hillery, Matthias Brantner

10848. By Chris Hillery

Fix (from Matthias) to ensure StaticContext used for invoke() lives as long as the ItemSequence returned from said invoke().
 Approved: Matthias Brantner, Chris Hillery

10849. By Matthias Brantner

- fixed itemfactory unit test on Windows
- disabled string unit test on Windows (because of bug #867271) Approved: Chris Hillery, Matthias Brantner

10850. By William Candillon

Remove dead links in the documentation. Approved: William Candillon, Matthias Brantner

10851. By Paul J. Lucas

Fixed the build error and also fixed several warnings that are new with Xcode 4.3.2. Approved: Matthias Brantner, Paul J. Lucas

10852. By Paul J. Lucas

Getting in another public API change for 2.5 for the full-text module since now's the time to do it. Renamed tokenize() to tokenize-node() for 2 reasons:

1. There already exists tokenize-string() and therefore tokenize-node() is a better name than just plain tokenize().

2. The forthcoming addition of the black & white tokenization function will most likely be called tokenize-nodes() -- plural. Approved: Matthias Brantner, Paul J. Lucas

10853. By William Candillon

Fix PHP build Approved: Paul J. Lucas, William Candillon

10854. By Sorin Marian Nasoi <email address hidden>

Update modules_svg generation target. Add mechanism for core build to detect whether a given non-core module exists (by URI). Use that method to check for graphviz before generating modules_svg. Approved: Sorin Marian Nasoi, Chris Hillery

10855. By Chris Hillery

Allow modules to specify their own libs for their Config file, in addition to the automatically-added libs from DECLARE_ZORBA_MODULE().
 Approved: Matthias Brantner, Chris Hillery

10856. By Matthias Brantner

- fetch:content-binary
- fetch:content#3 (with encoding parameter)
- StreamResource::isStreamSeekable to make sure the streamable strings returned by fetch are seekable.
- fixed two warnings in nodes_impl.cpp
- extended the C++ api to be symmetric to the fetch module Approved: Till Westmann, Chris Hillery, Matthias Brantner

10857. By Paul J. Lucas

Fixed a couple of warnings. Approved: Matthias Brantner, Paul J. Lucas

10858. By Matthias Brantner

fix for bug #1001463 (type not available during computation of function caching). Approved: Markos Zaharioudakis, Matthias Brantner

10859. By Matthias Brantner

Add support for function items test in the print xquery visitor. Approved: Matthias Brantner, William Candillon

10860. By Matthias Brantner

added stacks and queues to the data lifecycle documentation Approved: William Candillon, Matthias Brantner

10861. By Matthias Brantner

push-down of count(probe-index()) into the store Approved: Till Westmann, Matthias Brantner

10862. By Chris Hillery

Set CMAKE_INSTALL_RPATH_USE_LINK_PATH to TRUE to have correct RPATHs in installed binaries. Add comment about INSTALL_NAME_DIR. Approved: Matthias Brantner, Chris Hillery

10863. By Matthias Brantner

Update changelog, update module tags for Zorba 2.5. Approved: Matthias Brantner, Chris Hillery

10864. By Sorin Marian Nasoi <email address hidden>

Fixed bug lp:1001477. Approved: Rodolfo Ochoa, Sorin Marian Nasoi

10865. By Paul J. Lucas

No longer setting the charset of an overridden media type to a default value. Approved: David Graf, Matthias Brantner

10866. By Matthias Brantner

updated change log to reflect bug #1002867 Approved: Paul J. Lucas, Matthias Brantner

10867. By Matthias Brantner

added a cmake variable that allows to configure whether external jars are packaged or not (ZORBA_PACKAGE_EXTERNAL_JARS)
 Approved: Matthias Brantner, Chris Hillery

10868. By Paul J. Lucas

s/ZORBA_ASSERT/if/ Approved: Matthias Brantner, Paul J. Lucas

10869. By Paul J. Lucas

1. Fixed return type of ft:thesaurus-lookup().
2. Added a test to ensure that a look-up of a non-existant word works. Approved: Matthias Brantner, Paul J. Lucas

10870. By Rodolfo Ochoa

- Install added for XQJ bindings and documentation
- more documentation for all SWIG generated language bindings
- other installer related cleanups Approved: Matthias Brantner, Chris Hillery, Juan Zacarias

10871. By Markos Zaharioudakis

fixed bug #1006166 Approved: Markos Zaharioudakis

10872. By Markos Zaharioudakis

fixed bug 960083$ (improper error handling of NaN comparisons) + small optimization of comparison operations Approved: Markos Zaharioudakis

10873. By Carlos Manuel Lopez

Implements new group by syntax, as defined in the XQuery 3.0 Spec since September 2011 Approved: Markos Zaharioudakis

10874. By Markos Zaharioudakis

Fixed bug #1003023$ (optimizer problems due to common subexpression after var folding into if-then-else) Approved: Markos Zaharioudakis

10875. By Markos Zaharioudakis

fixed bug #854506 and partial fix for bug #867008 Approved: Markos Zaharioudakis

10876. By Markos Zaharioudakis

Improved hoist rule: tighter hoisting of expressions (also fixes bug #967428) Approved: Markos Zaharioudakis

10877. By Markos Zaharioudakis

Fixed bug #991088$ (raise XUST0001 in trycatch with mixed updating and simple clauses)
 Approved: Markos Zaharioudakis

10878. By Till Westmann

add location information to ZXQP0029_URI_ACCESS_DENIED Approved: Chris Hillery, Markos Zaharioudakis

10879. By Till Westmann

remove unnecessary annotations and clean-up indentation Approved: Ghislain Fourny, Till Westmann

10880. By Markos Zaharioudakis

Merged the JSONiq branch into the zorba trunk, with JSONiq deactivated. Approved: Matthias Brantner, Ghislain Fourny, Markos Zaharioudakis

10881. By Markos Zaharioudakis

renamed file test/unit/static_context.cpp to avoid conflict with src/context/static_contectx.cpp during debugging Approved: Markos Zaharioudakis

10882. By Markos Zaharioudakis

Fixed bug #1008082 (bug in transform expr when a copy var is not used anywhere) Approved: Markos Zaharioudakis

10883. By Markos Zaharioudakis

Fixed bug #932314 (non-comparable values must be treated as distinct by fn:distinct-values) Approved: Markos Zaharioudakis

10884. By Ghislain Fourny

The builtin schema type names, in the store, are now created directly from the pool (this is to prevent a cyclic dependency if another item factory than the simple item factory needs to access the store to produce the QNames, because at this point the store has not been initialized yet). Approved: Matthias Brantner, Markos Zaharioudakis

10885. By Markos Zaharioudakis

Plan serializer does not serialize expressions anymore. Approved: Markos Zaharioudakis

10886. By Markos Zaharioudakis

added serialize_csize function Approved: Markos Zaharioudakis

10887. By Rodolfo Ochoa

Error fixed on windows Approved: Chris Hillery, Cezar Andrei

10888. By Till Westmann

Added support for transient maps in the unordered-maps module. Approved: Matthias Brantner, Till Westmann

10889. By Nicolae Brinza

Improved parser error messages (bug #867357). Approved: David Graf, Matthias Brantner

10890. By Ghislain Fourny

Fixes csize serialization. Approved: Markos Zaharioudakis, Ghislain Fourny

10891. By Ghislain Fourny

Adding asserts in OrdPath::getLocalBitLength to prevent endless loops and possibly reproduce such a potential endless loop with more information. Approved: David Graf, Markos Zaharioudakis

10892. By Dennis Knochenwefel

Cleaning up attribute and element nodes' type handling and checking invariants, following an inconsistency discovered on Windows. Approved: Markos Zaharioudakis, Matthias Brantner, Dennis Knochenwefel

10893. By Nicolae Brinza

Fixed and enabled fn:parse-xml-fragment(). Approved: Juan Zacarias, Matthias Brantner

10894. By Paul J. Lucas

Added base64::streambuf class and replaced horribly inefficient base64 code. Approved: Matthias Brantner, Paul J. Lucas

10895. By Rodolfo Ochoa

Adding cacert.pem for Windows Approved: Cezar Andrei, Chris Hillery

10896. By Markos Zaharioudakis

A better fix for compilation problems involving integer types. Approved: Markos Zaharioudakis

10897. By Paul J. Lucas

Fixed warnings. Approved: Matthias Brantner, Paul J. Lucas

10898. By Markos Zaharioudakis

Optimized hash sets used by fn:distinct-values and nodes-distinct Approved: Markos Zaharioudakis

10899. By Till Westmann

modify fn:path to add support for fragments Approved: Juan Zacarias, Matthias Brantner, Till Westmann

10900. By Paul J. Lucas

Added C++98 subset of C++11's unordered_map. The configure process checks for a working C++11 unordered_map first: if found, it will be used instead. The goal is to phase out the old hash*map* classes over time. Approved: Markos Zaharioudakis, Matthias Brantner

10901. By Till Westmann

add ref:has-node-reference and ref:assign-node-reference Approved: Markos Zaharioudakis, Matthias Brantner

10902. By Paul J. Lucas

Added the missing specialization for hash<unique_ptr<T,D>>. Approved: Matthias Brantner, Paul J. Lucas

10903. By Matthias Brantner

- added API function Item:isSeekable
- fixed a problem in the file module where all exceptions being throw in the body were caught and rethrown as file exception Approved: Chris Hillery, Matthias Brantner

10904. By David Graf

add positional skipping to access of static and dynamic collections Approved: Matthias Brantner, Till Westmann

10905. By Markos Zaharioudakis

Fixed bug #1018673 + renamed misleading isXXXNode() methods. Approved: Markos Zaharioudakis

10906. By Markos Zaharioudakis

Optimized hash function used for nodes (fixes bug #1010051) + some hashmap/hashset cleanup Approved: Markos Zaharioudakis

10907. By Markos Zaharioudakis

Fixed bug #1016429 (scoping of copy variables in transform expr) Approved: Markos Zaharioudakis

10908. By Paul J. Lucas

Added tokenize-nodes() function. Approved: Matthias Brantner, Paul J. Lucas

10909. By David Graf

windows build fix Approved: Dennis Knochenwefel, David Graf

10910. By Ghislain Fourny

Differentiating between two reasons why references are unregistered. Approved: Markos Zaharioudakis, Matthias Brantner

10911. By Ghislain Fourny

Fixed a bug preventing from compiling Zorba single-threadedly. Approved: Markos Zaharioudakis, Ghislain Fourny

10912. By Dennis Knochenwefel

fix for bug #1020953 (access of freed object) Approved: Ghislain Fourny, Nicolae Brinza

10913. By Ghislain Fourny

Adding a NOT ZORBA_HAVE_UNIQUE_PTR guard for adding the unique_ptr test, for consistency. There are guards for including the corresponding resources, so that the test was failing by lack of them.
 Approved: Markos Zaharioudakis, Till Westmann

10914. By Dennis Knochenwefel

fixed memory leak reading data of zstring after destruction. Encoded illegal char in exception to not cause any problems. Approved: Till Westmann, Ghislain Fourny

10915. By Dennis Knochenwefel

fixed CHECK_CXX_SOURCE_COMPILES doesn't work with backslash-n using cmake 2.8.8 on windows. Approved: Paul J. Lucas, Dennis Knochenwefel

10916. By Paul J. Lucas

Now checking data size before accessing vector[0]. Approved: Dennis Knochenwefel, Paul J. Lucas

10917. By David Graf

Unifying .bat and .vcproj generation. Bug #1013075 Approved: Rodolfo Ochoa, Dennis Knochenwefel

10918. By Chris Hillery

EXPECTED_FAILURE()s for two tests failing due to flworfound.org changes. Approved: Matthias Brantner, Chris Hillery

10919. By Paul J. Lucas

1. Added missing "lang" attribute to tokens generated from tokenize-nodes().
2. Added a test. Approved: Chris Hillery, Paul J. Lucas

10920. By Paul J. Lucas

Moved URI resolution for the thesaurus into the translator. Approved: Matthias Brantner, Paul J. Lucas

10921. By Paul J. Lucas

Fixed warnings. Approved: Matthias Brantner, Paul J. Lucas

10922. By Markos Zaharioudakis

Fixed bug #1022557 (subsequence function applied on window variable) Approved: Markos Zaharioudakis

10923. By Ghislain Fourny

Several modifications in the implementation of JSONiq to make it in sync with the new specification, in particular:
- No more pairs,
- Minor changes to the update syntax
- Synced returned error codes
- Support for collections and indices, including automatic maintenance
- Serializer extended to allow mixed JDM and XDM
- Updated C++ API Approved: Matthias Brantner, Ghislain Fourny, Chris Hillery, Markos Zaharioudakis

10924. By Markos Zaharioudakis

added auditing in zorba cmd + fixed bug in auditing the parse time Approved: Markos Zaharioudakis

10925. By Markos Zaharioudakis

Fixed bug #1002993 (bug during revalidation after update due to improper condition for calling TypeOps::get_atomic_type_code() from SchemaValidatorImpl::isPossibleSimpleContentRevalImpl()) Approved: Cezar Andrei, Markos Zaharioudakis

10926. By Ghislain Fourny

Adding two tests for allowing two errors upon sequence of several JSON items and an XML node. Approved: Till Westmann, Matthias Brantner

10927. By Markos Zaharioudakis

Avoid (if possible) treat expr for checking that the value of a non-external global variable conforms to the type declaration of the vatiable Approved: Markos Zaharioudakis

10928. By Nicolae Brinza

Implemented the new EQName syntax. Approved: Matthias Brantner, Nicolae Brinza

10929. By Paul J. Lucas

Reverted previous "fix." Approved: Matthias Brantner, Paul J. Lucas

10930. By Markos Zaharioudakis

Streaming execution for tumbling windows (also fixes bug #1010051) Approved: Markos Zaharioudakis

10931. By Ghislain Fourny

Fixed build errors occurring on Windows by exposing structured item types even in JSONiq is deactivated. Approved: Matthias Brantner, Markos Zaharioudakis

10932. By Markos Zaharioudakis

Incremental maintenance for general indexes. Approved: Markos Zaharioudakis

10933. By Paul J. Lucas

Fixed typo. Approved: Chris Hillery, Paul J. Lucas

10934. By Sorin Marian Nasoi <email address hidden>

Updated the F&O 1.1 and 3.0 documents used in the comparison of the functions and corrected the util:download-and-write-spec function. Approved: Chris Hillery, Sorin Marian Nasoi

10935. By Chris Hillery

Add test case demonstrating bug 1010728. Approved: Markos Zaharioudakis, Chris Hillery

10936. By David Graf

Removed internal debug info from fn:trace out by using zorba serializer instead of the internal show function. Approved: Chris Hillery, David Graf

10937. By Dennis Knochenwefel

Added case for DT_UNKNOWN. Approved: Dennis Knochenwefel, Paul J. Lucas

10938. By Rodolfo Ochoa

Fix for precompiled headers on Windows Approved: Rodolfo Ochoa, Chris Hillery

10939. By Cezar Andrei <email address hidden>

Integrate:
  Fix for bug 1023120 xs:include ignored in schemas.
  Added a new test for this case. Approved: Chris Hillery, David Graf

10940. By Till Westmann

fix position for elements and processing-instructions in fn:path Approved: Markos Zaharioudakis, Matthias Brantner

10941. By Nicolae Brinza

Added support for the unix shebang script launcher (#!/path/interpreter) Approved: Nicolae Brinza, David Graf

10942. By Paul J. Lucas

Now using enable_if for more functions to make overload resolution work better. Approved: Matthias Brantner, Paul J. Lucas

10943. By Rodolfo Ochoa

Disabling "print stack trace" for windows Approved: Chris Hillery, Rodolfo Ochoa

10944. By Paul J. Lucas

Now handling UTF-16 surrogate pairs. Approved: Dennis Knochenwefel, Paul J. Lucas

10945. By Nicolae Brinza

The parse-fragment function now allows a DOCTYPE declaration in the input. Approved: Nicolae Brinza, Matthias Brantner

10946. By Paul J. Lucas

Added functions to test for and create UTF-16 surrogate pairs.
These will probably be needed by whoever fixes bug #1025622. Approved: Dennis Knochenwefel, Paul J. Lucas

10947. By Ghislain Fourny

Fix that gives precedence to an array replacement over an array deletion, and adds tests about multiple updates with same selector on arrays and on objects. Approved: Markos Zaharioudakis, Matthias Brantner

10948. By Paul J. Lucas

Now doing proper JSON serialization. Approved: Chris Hillery, Dennis Knochenwefel, Paul J. Lucas

10949. By Paul J. Lucas

No longer checking captured subgroups in replacement string when 'q' flag is given. Approved: Matthias Brantner, Paul J. Lucas

10950. By Matthias Brantner

removed a non-core module dependency from the reuse-stream test Approved: Chris Hillery, Matthias Brantner

10951. By Markos Zaharioudakis

removed some debugging code, which causes Windows compilation problems Approved: Markos Zaharioudakis

10952. By Paul J. Lucas

Removed zorbatypes/transcoder.h & .cpp. Approved: Matthias Brantner, Paul J. Lucas

10953. By Paul J. Lucas

Now properly serializing JSON for JsonML. Approved: Chris Hillery, Paul J. Lucas

10954. By Paul J. Lucas

1. Tweaked equals().
2. Added "const&" to std_string function arguments. Approved: Matthias Brantner, Paul J. Lucas

10955. By Matthias Brantner

fix for bug #898066 (Stringstream & fn:trace) Approved: Chris Hillery, Matthias Brantner

10956. By Sorin Marian Nasoi <email address hidden>

- added information about the Zorba and XQTS versions that were used in generating the reports
- updated the README.txt with info regarding conformance reports generation Approved: William Candillon, Sorin Marian Nasoi

10957. By Ghislain Fourny

Activating ZORBA_WITH_JSON by default. Approved: Chris Hillery, Matthias Brantner

10958. By Rodolfo Ochoa

Documentation fixed for Zorba binary package with PHP. Approved: Cezar Andrei, Chris Hillery

10959. By Paul J. Lucas

s/0/npos/

This probably fixes some as-of-yet-undiscovered bug. Approved: Matthias Brantner, Paul J. Lucas

10960. By Rodolfo Ochoa

C# API Binding Approved: Cezar Andrei, Chris Hillery

10961. By Ghislain Fourny

Corrects bug 1029836. Approved: Chris Hillery, Matthias Brantner

10962. By Ghislain Fourny

Fixing Windows build with JSONiq. Approved: Chris Hillery, Matthias Brantner

10963. By Paul J. Lucas

Removed JsonML-object. Approved: Ghislain Fourny, Paul J. Lucas

10964. By Matthias Brantner

new ItemFactory function that allows creating dateTime items without timezone Approved: Chris Hillery, Matthias Brantner

10965. By Ghislain Fourny

Fixes bug 1032166 (critical memory bug). Approved: Chris Hillery, Matthias Brantner

10966. By Paul J. Lucas

1. s/take/swap/
2. Removed unnecessary assignment. Approved: Matthias Brantner, Paul J. Lucas

10967. By Juan Zacarias

Fix of bugs
Bug #1014979: Make thesaurus optional component on Windows
Bug #1014981: Make default thesaurus available as Ubuntu package
Also Fixed FindJNI.cmake to be found-able in the Ubuntu Installer Approved: Juan Zacarias, Chris Hillery

10968. By David Graf

Make testdriver_mt work with boost version >1.49. Approved: David Graf, Chris Hillery

10969. By Matthias Brantner

Added a keys() function to the index dml module. This fixes bug #900677. Approved: Matthias Brantner, David Graf

10970. By Chris Hillery

Test temporarily disabled while http-client is being updated. Also get HEAD of OAuth module rather than zorba-2.5 tag for some reason. Approved: Juan Zacarias, Chris Hillery

10971. By Chris Hillery

Split image into image + graphviz; split data-converters into csv + html. Approved: Juan Zacarias, Chris Hillery

10972. By William Candillon

Fix make doc target when multiple version of the same module exists. Approved: Sorin Marian Nasoi, Matthias Brantner

10973. By Ghislain Fourny

Correct array insert expression syntax to include [] like append expressions. Approved: Matthias Brantner, Chris Hillery

10974. By Juan Zacarias

Fixed windows installer component options for c# Bindings Approved: Rodolfo Ochoa, Chris Hillery

10975. By Matthias Brantner

removed some hardcoded english words from error messages raised by the json parser Approved: Paul J. Lucas, Matthias Brantner

10976. By Chris Hillery

Regenerate scanner and parser with flex/bison 2.5 - apparently 2.6 creates code that Clang doesn't like. Approved: Matthias Brantner, Chris Hillery

10977. By Matthias Brantner

prepare ChangeLog for 2.6 release Approved: Chris Hillery, Matthias Brantner

10978. By Chris Hillery

Bump version number to 2.6; update tagged modules; add archive module. Approved: Cezar Andrei, Sorin Marian Nasoi, Chris Hillery

10979. By David Graf

Since OSX Mountain Lion, clang is the default c++ compiler on mac. Therefore, it is not enough to check if the c++ compiler executable is called clang. Approved: David Graf, Paul J. Lucas

10980. By Chris Hillery

Add "generation" of local copy of FlexLexer.h, to ensure it always stays in sync with the generated lexer .cpp file. Approved: Paul J. Lucas, Chris Hillery

10981. By Sorin Marian Nasoi <email address hidden>

Fixed the XQDoc documentation issue related to the formatting of the parameters for the higher order functions. Approved: William Candillon, Sorin Marian Nasoi

10982. By Chris Hillery

Try to find FlexLexer.h associated with the flex binary being used.
 Approved: David Graf, Chris Hillery

10983. By Markos Zaharioudakis

Fixed bug #1033407 (do not store var_expr rchandles in the static context) Approved: Markos Zaharioudakis

10984. By Markos Zaharioudakis

Fixed bugs #899364 and 899363 (throw XQST0103 in case of non-distinct window variables)
Fixed bug #899366 (enforce the type declaration of a window variable) Approved: Markos Zaharioudakis

10985. By Markos Zaharioudakis

Fixed bug #1024892 (index declaration references udf declared after the index) Approved: Markos Zaharioudakis

10986. By David Graf

remove clang warnings in auditing code Approved: Matthias Brantner, Till Westmann, David Graf

10987. By Chris Hillery

added the archive module to ExternalModules.conf and the ChangeLog Approved: Luis Rodriguez Gonzalez, Juan Zacarias, Chris Hillery, Matthias Brantner

10988. By David Graf

Because the jsoniq_emitter aggregates the xml_emitter, it needs to handover all the parameters. Inclusive aEmitAttributes. Approved: Till Westmann, David Graf

10989. By luisrod <luisrod@LUISROD-LAP>

- Added code for bug#1025564 "Deprecate -f argument to zorbacmd"

10990. By Markos Zaharioudakis

Allow prolog variables to be referenced before they are declared (XQuery 3.0 feature) (fixes bug #900688) Approved: Markos Zaharioudakis

10991. By Chris Hillery

Revert unintentional commit r10989. Approved: Luis Rodriguez Gonzalez, Chris Hillery

10992. By Ghislain Fourny

Updated JSONiq tutorial. Approved: William Candillon, Matthias Brantner

10993. By Paul J. Lucas

Miscellaneous changes, some a prerequisite for LLVM that should be done anyway and not have to wait for the far-in-the-future LLVM branch merge. Approved: Matthias Brantner, Paul J. Lucas

10994. By Ghislain Fourny

Specifying collection and property upon ZDST0006. Approved: Till Westmann, Matthias Brantner

10995. By Paul J. Lucas

QueryLoc clean-up:
1. Added all-argument constructor (needed for LLVM).
2. Removed pointless copy constructor since default is fine.
3. Removed pointless virtual destructor (there are no virtual functions!). Approved: Matthias Brantner, Paul J. Lucas

10996. By Nicolae Brinza

Dynamically computed strings can now be cast to xs:QName. Fixes bug #898792 Approved: Nicolae Brinza, Chris Hillery

10997. By Till Westmann

move appending of ${requiredlibs-store} to requiredlibs to a place where it works Approved: Ghislain Fourny, Till Westmann

10998. By Ghislain Fourny

Fixes a bug that makes Zorba crash upon inserting more than one pair. Approved: Till Westmann, Matthias Brantner

10999. By Ghislain Fourny

Adds a method isEncoded to user-typed atomic items. Approved: Till Westmann, Matthias Brantner

11000. By Sorin Marian Nasoi <email address hidden>

fix for lp:969251. Approved: Ghislain Fourny, Sorin Marian Nasoi

11001. By Ghislain Fourny

Made URI computation lazy in StructuralAnyUri. Approved: Markos Zaharioudakis, Matthias Brantner

11002. By Ghislain Fourny

Fixes a Windows compiler error (bug 1040558). Approved: Luis Rodriguez Gonzalez, Juan Zacarias

11003. By William Candillon

Enable XML output from doxygen by default. Approved: Chris Hillery, Matthias Brantner

11004. By Markos Zaharioudakis

Fixed bug #1038410 (Memory leaks in parser, trace iterator, and general index) Approved: Markos Zaharioudakis

11005. By Markos Zaharioudakis

Fixed bug #1042840 (qname pool free-list corruption) Approved: Markos Zaharioudakis

11006. By Chris Hillery

Restoring execute bit to a bunch of scripts. Approved: Juan Zacarias, Matthias Brantner, Chris Hillery

11007. By Carlos Manuel Lopez

New memory management for compiler expressions (fixes bug #1036111) Approved: Markos Zaharioudakis

11008. By Ghislain Fourny

Simplified JSON items class and fixed some Xml Node static casts to handle JSON items as well. Approved: Markos Zaharioudakis, Matthias Brantner

11009. By Markos Zaharioudakis

Fixed bug #866984 (better error message for an eval error) Approved: Markos Zaharioudakis

11010. By Ghislain Fourny

Fixing a memory leak in append update primitive. Approved: Matthias Brantner, Till Westmann

11011. By William Candillon

Remove deprecated reference to the old sourceforge mailing-list. Approved: Matthias Brantner, William Candillon

11012. By Nicolae Brinza

Fixes for bugs #1023170, #1024033, #1027270 Approved: Chris Hillery, Matthias Brantner

11013. By Markos Zaharioudakis

rchandle cleanup Approved: Markos Zaharioudakis

11014. By Paul J. Lucas

Replaced UUID with thin layer over native platform implementation. Approved: Chris Hillery, Rodolfo Ochoa, Matthias Brantner, Paul J. Lucas

11015. By Chris Hillery

Corrected HTML serialization of empty elements. Added test cases for XHTML.
 Approved: Matthias Brantner, Chris Hillery

11016. By Chris Hillery

Fixes debug mode crash because of missing dictionary entries. Approved: William Candillon, Chris Hillery

11017. By Ghislain Fourny

Fixed bug 1041411 (prefixed true/false/null should be interpreted as name tests). Approved: Matthias Brantner, Chris Hillery

11018. By Ghislain Fourny

Removing superfluous store/naive prefixes in store includes. Approved: Till Westmann, Matthias Brantner

11019. By Paul J. Lucas

Suppressed warnings; moved gcc diagnostic push macros to config.h. Approved: Matthias Brantner, Paul J. Lucas

11020. By Till Westmann

add dependency on libuuid

11021. By Till Westmann

ensure deterministic test results for keys of index on unordered collection by sorting

11022. By Till Westmann

Some fixes in TreeID API and some include cleanup.

11023. By Matthias Brantner

Adding missing JSONiq library functions.

11024. By Matthias Brantner

Updated JSONiq tutorial.

11025. By Matthias Brantner

implementation of parse-json#2 allowing multiple top-level items

11026. By Matthias Brantner

more tests for jn:parse-json

11027. By Matthias Brantner

adapted changelog regarding jn:parse-json

Unmerged revisions

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'ChangeLog'
--- ChangeLog 2012-04-24 12:39:38 +0000
+++ ChangeLog 2012-04-24 21:06:20 +0000
@@ -10,6 +10,7 @@
10 * fn:unparsed-text-available10 * fn:unparsed-text-available
11 * Extended API for Python, Java, PHP and Ruby.11 * Extended API for Python, Java, PHP and Ruby.
12 * Add jvm classpath to zorbacmd and to Zorba API. Tracked by #93181612 * Add jvm classpath to zorbacmd and to Zorba API. Tracked by #931816
13 * Added full-text module.
13 * Added support for NO_ICU (to not use ICU for unicode processing)14 * Added support for NO_ICU (to not use ICU for unicode processing)
14 * Added XQJ support.15 * Added XQJ support.
1516
@@ -88,6 +89,8 @@
88 * Fixed bug 867509 (Can not handle largest xs:unsignedLong values)89 * Fixed bug 867509 (Can not handle largest xs:unsignedLong values)
89 * Fixed bug 924063 (sentence is incorrectly incremented when token characters end without sentence terminator)90 * Fixed bug 924063 (sentence is incorrectly incremented when token characters end without sentence terminator)
90 * Fixed bug 909126 (bug in cloning of var_expr)91 * Fixed bug 909126 (bug in cloning of var_expr)
92 * Fixed bug 928631 (external builtin function were not executed in the module they
93 were declared)
91 * Fixed bug in destruction of exit_catcher_expr94 * Fixed bug in destruction of exit_catcher_expr
92 * Fixed bug #867024 (error messages)95 * Fixed bug #867024 (error messages)
93 * Fixed bug #957580 (stream read failure in StringToCodepointsIteartor)96 * Fixed bug #957580 (stream read failure in StringToCodepointsIteartor)
9497
=== modified file 'cmake_modules/FindICU.cmake'
--- cmake_modules/FindICU.cmake 2012-04-24 14:35:54 +0000
+++ cmake_modules/FindICU.cmake 2012-04-24 21:06:20 +0000
@@ -28,6 +28,8 @@
28# (note: in addition to ICU_LIBRARIES)28# (note: in addition to ICU_LIBRARIES)
29# ICU_DATA_LIBRARIES - Libraries to link against for ICU data29# ICU_DATA_LIBRARIES - Libraries to link against for ICU data
30#30#
31# ICU_VERSION - ICU's version number.
32#
3133
32# Look for the header file.34# Look for the header file.
33find_path(35find_path(
3436
=== modified file 'doc/zorba/ft_intro.dox'
--- doc/zorba/ft_intro.dox 2012-04-24 12:39:38 +0000
+++ doc/zorba/ft_intro.dox 2012-04-24 21:06:20 +0000
@@ -5,9 +5,9 @@
5specification.5specification.
6Additional documentation:6Additional documentation:
77
8 - \ref ft_stemmer8- \ref ft_stemmer
9 - \ref ft_thesaurus9- \ref ft_thesaurus
10 - \ref ft_tokenizer10- \ref ft_tokenizer
1111
12\section ft_unimplemented Unimplemented Features12\section ft_unimplemented Unimplemented Features
1313
@@ -16,11 +16,11 @@
16implemented.16implemented.
17The features that are not (completely) implemented are:17The features that are not (completely) implemented are:
1818
19 - The <a href="http://www.w3.org/TR/xpath-full-text-10/#ftignoreoption">Ignore Option</a>19- The <a href="http://www.w3.org/TR/xpath-full-text-10/#ftignoreoption">Ignore Option</a>
20 (bug <a href="https://bugs.launchpad.net/zorba/+bug/sf-3187470">3187470</a>).20 (bug <a href="https://bugs.launchpad.net/zorba/+bug/866924">866924</a>).
21 - <a href="http://www.w3.org/TR/xpath-full-text-10/#section-score-variables">Score Variables</a>21- <a href="http://www.w3.org/TR/xpath-full-text-10/#section-score-variables">Score Variables</a>
22 and <a href="http://www.w3.org/TR/xpath-full-text-10/#section-using-weights">Using Weights Within a Scored FTContainsExpr</a>22 and <a href="http://www.w3.org/TR/xpath-full-text-10/#section-using-weights">Using Weights Within a Scored FTContainsExpr</a>
23 (bug <a href="https://bugs.launchpad.net/zorba/+bug/sf-3187462">3187462</a>).23 (bug <a href="https://bugs.launchpad.net/zorba/+bug/866923">866923</a>).
2424
25*/25*/
26/* vim:set et sw=2 ts=2: */26/* vim:set et sw=2 ts=2: */
2727
=== modified file 'doc/zorba/ft_stemmer.dox'
--- doc/zorba/ft_stemmer.dox 2012-04-24 12:39:38 +0000
+++ doc/zorba/ft_stemmer.dox 2012-04-24 21:06:20 +0000
@@ -56,7 +56,12 @@
56public:56public:
57 typedef /* implementation-defined */ ptr;57 typedef /* implementation-defined */ ptr;
5858
59 struct Properties {
60 char const *uri;
61 };
62
59 virtual void destroy() const = 0;63 virtual void destroy() const = 0;
64 virtual void properties( Properties *result ) const = 0;
60 virtual void stem( String const &word, locale::iso639_1::type lang, String *result ) const = 0;65 virtual void stem( String const &word, locale::iso639_1::type lang, String *result ) const = 0;
61protected:66protected:
62 virtual ~Stemmer();67 virtual ~Stemmer();
@@ -89,6 +94,8 @@
89Note that \c result should always be set to something.94Note that \c result should always be set to something.
90If your stemmer doesn't know how to stem the given word,95If your stemmer doesn't know how to stem the given word,
91you should set \c result to \c word.96you should set \c result to \c word.
97You also need to implement the \c properties() function
98and set the identifying URI of your stemmer.
9299
93A very simple stemmer100A very simple stemmer
94that stems the word "foobar" to "foo"101that stems the word "foobar" to "foo"
@@ -98,6 +105,7 @@
98class MyStemmer : public Stemmer {105class MyStemmer : public Stemmer {
99public:106public:
100 void destroy() const;107 void destroy() const;
108 void properties( Properties *result ) const;
101 void stem( String const &word, locale::iso639_1::type lang, String *result ) const;109 void stem( String const &word, locale::iso639_1::type lang, String *result ) const;
102private:110private:
103 MyStemmer();111 MyStemmer();
@@ -108,6 +116,10 @@
108 // Do nothing since we statically allocate a singleton instance of our stemmer.116 // Do nothing since we statically allocate a singleton instance of our stemmer.
109}117}
110118
119void MyStemmer::properties( Properties *props ) const {
120 props->uri = "http://my.example.com/zorba/full-text/stemmer";
121}
122
111void MyStemmer::stem( String const &word, locale::iso639_1::type lang, String *result ) const {123void MyStemmer::stem( String const &word, locale::iso639_1::type lang, String *result ) const {
112 if ( word == "foobar" )124 if ( word == "foobar" )
113 *result = "foo";125 *result = "foo";
@@ -120,7 +132,6 @@
120or a dictionary look-up132or a dictionary look-up
121to stem many words,133to stem many words,
122of course.134of course.
123
124Although not used in this simple example,135Although not used in this simple example,
125\c lang can be used to allow a single stemmer instance136\c lang can be used to allow a single stemmer instance
126to stem words in more than one language.137to stem words in more than one language.
@@ -135,16 +146,24 @@
135class StemmerProvider {146class StemmerProvider {
136public:147public:
137 virtual ~StemmerProvider();148 virtual ~StemmerProvider();
138 virtual Stemmer::ptr getStemmer( locale::iso639_1::type lang ) const = 0;149 virtual bool getStemmer( locale::iso639_1::type lang, Stemmer::ptr *s = 0 ) const = 0;
139};150};
140\endcode151\endcode
141152
153The \c getStemmer() function should return \c true
154only if it can provide a \c Stemmer
155for the given language; \c false otherwise.
156If the \c Stemmer::ptr argument is \c null,
157the caller wants to check only whether the provider
158can provide a stemmer for the given language
159and doesn't want a \c Stemmer instance created or returned.
160
142A simple \c StemmerProvider for our simple stemmer can be implemented as:161A simple \c StemmerProvider for our simple stemmer can be implemented as:
143162
144\code163\code
145class MyStemmerProvider : public StemmerProvider {164class MyStemmerProvider : public StemmerProvider {
146public:165public:
147 Stemmer::ptr getStemmer( locale::iso639_1::type lang ) const;166 bool getStemmer( locale::iso639_1::type lang Stemmer::ptr *s = 0 ) const;
148};167};
149168
150Stemmer::ptr MyStemmerProvider::getStemmer( locale::iso639_1::type lang ) const {169Stemmer::ptr MyStemmerProvider::getStemmer( locale::iso639_1::type lang ) const {
@@ -154,15 +173,14 @@
154 case iso639_1::en:173 case iso639_1::en:
155 case iso639_1::unknown: // Handle "unknown" language since, in many cases, the language is not known.174 case iso639_1::unknown: // Handle "unknown" language since, in many cases, the language is not known.
156 result.reset( &stemmer );175 result.reset( &stemmer );
157 break;176 return true;
158 default: 177 default:
159 //178 //
160 // We have no stemmer for the given language: leave the result as null to indicate this.179 // We have no stemmer for the given language: return false.
161 // Zorba will then use the built-in stemmer for the given language.180 // Zorba will then use the built-in stemmer for the given language.
162 //181 //
163 break;182 return false;
164 }183 }
165 resturn std::move( result );
166}184}
167\endcode185\endcode
168186
169187
=== modified file 'doc/zorba/ft_thesaurus.dox'
--- doc/zorba/ft_thesaurus.dox 2012-04-24 12:39:38 +0000
+++ doc/zorba/ft_thesaurus.dox 2012-04-24 21:06:20 +0000
@@ -44,16 +44,16 @@
44To download and install the WordNet database on a Unix-like system,44To download and install the WordNet database on a Unix-like system,
45follow these steps:45follow these steps:
4646
47 -# Download the WordNet database from47-# Download the WordNet database from
48 <a href="http://wordnet.princeton.edu/wordnet/download/">here</a>.48 <a href="http://wordnet.princeton.edu/wordnet/download/">here</a>.
49 All you really need are just the database files49 All you really need are just the database files
50 (<code>WNdb-3.0.tar.gz</code>).50 (<code>WNdb-3.0.tar.gz</code>).
51 -# Un-gzip and untar the files.51-# Un-gzip and untar the files.
52 This will result in a directory dict containing the database files.52 This will result in a directory dict containing the database files.
53 -# Move the dict directory somewhere of your choosing,53-# Move the dict directory somewhere of your choosing,
54 e.g., <code>/usr/local/wordnet-3.0/dict</code>.54 e.g., <code>/usr/local/wordnet-3.0/dict</code>.
55 -# Compile the \c dict directory into a Zorba-compatible binary thesaurus55-# Compile the \c dict directory into a Zorba-compatible binary thesaurus
56 as described below.56 as described below.
5757
58To compile the WordNet database files,58To compile the WordNet database files,
59use the \c zt-wn-compile script59use the \c zt-wn-compile script
@@ -65,12 +65,12 @@
65zt-wn-compile [-v] wordnet_dict_dir [thesaurus_file]65zt-wn-compile [-v] wordnet_dict_dir [thesaurus_file]
66\endcode66\endcode
6767
68 - The \c -v option specifies verbose output.68- The \c -v option specifies verbose output.
69 - The \e wordnet_dict_dir specifies the full path69- The \e wordnet_dict_dir specifies the full path
70 of the WordNet \c dict directory.70 of the WordNet \c dict directory.
71 - The \e thesaurus_file specifies the name of the resulting binary file.71- The \e thesaurus_file specifies the name of the resulting binary file.
72 If none is given, it defaults to \c wordnet-en.zth72 If none is given, it defaults to \c wordnet-en.zth
73 ("en" for English and "zth" for "Zorba Thesaurus file").73 ("en" for English and "zth" for "Zorba Thesaurus file").
7474
75For example:75For example:
7676
@@ -78,33 +78,39 @@
78zt-wn-compile -v /usr/local/wordnet-3.0/dict78zt-wn-compile -v /usr/local/wordnet-3.0/dict
79\endcode79\endcode
8080
81Move the \c wordnet-en.zth file to a location of your choosing.81To install the \c wordnet-en.zth file,
82move it onto Zorba's <i>library path</i>:
83
84\code
85LIB_PATH/edu/princeton/wordnet/wordnet-en.zth
86\endcode
8287
83\subsection ft_thesaurus_precompiled Downloading a Precompiled WordNet Database88\subsection ft_thesaurus_precompiled Downloading a Precompiled WordNet Database
8489
85Alternatively,90Alternatively,
86you can download a precompiled WordNet database from91you can download a precompiled, little-endian (Intel) CPU WordNet database from
87<a href="http://www.zorba-xquery.com/downloads/WordNet-3.0/wordnet-en.zip">here</a>.92<a href="http://www.zorba-xquery.com/downloads/WordNet-3.0/wordnet-en.zip">here</a>.
8893
89\section ft_thesaurus_mappings Thesauri Mappings94\section ft_thesaurus_mappings Thesauri Mappings
9095
91In order to use thesauri,96In order to use thesauri,
92you need to specify where they are to the Zorba engine97you need to specify what symbolic URI(s) <i>map</i>
93via one or more thesaurus <i>mappings</i>.98to what thesauri.
94A <i>mapping</i> maps a symbolic URI to URI for an actual thesaurus.
95A mapping is of the form:99A mapping is of the form:
96100
97<i>from_uri</i><code>:=</code><b>[</b><i>implementation</i><code>|</code><b>]</b><i>to_uri</i>101<i>from_uri</i><code>:=</code><i>implementation-scheme</i><code>:</code><i>to_uri</i>
98102
99For example:103For example:
100104
101\code105\code
102http://wordnet.princeton.edu:=wordnet|/usr/local/zorba/thesauri/wordnet-en.zth106http://wordnet.princeton.edu:=wordnet://wordnet.princeton.edu
103\endcode107\endcode
104108
105says that the symbolic URI \c http://wordnet.princeton.edu109says that the symbolic URI \c http://wordnet.princeton.edu
106maps to the WordNet implementation110maps to the WordNet implementation
107having a database file at the given path.111having a database file at the given sub-path
112\c edu/princeton/wordnet
113on Zorba's library path.
108Once a mapping is established for a symbolic URI,114Once a mapping is established for a symbolic URI,
109it can be used in a query:115it can be used in a query:
110116
@@ -114,13 +120,8 @@
114 using thesaurus at "http://wordnet.princeton.edu"120 using thesaurus at "http://wordnet.princeton.edu"
115\endcode121\endcode
116122
117If the \e implementation is omitted,
118it defaults to \c wordnet.
119As a special-case,123As a special-case,
120the \e from_uri can be \c default or 124the \e from_uri can be \c default or \c ##default
121\code
122##default
123\endcode
124to allow for specifying the default thesaurus125to allow for specifying the default thesaurus
125as was done for the first example on this page.126as was done for the first example on this page.
126127
@@ -130,7 +131,7 @@
130use one or more –thesaurus options:131use one or more –thesaurus options:
131132
132\code133\code
133zorba --thesaurus default:=/usr/local/zorba/thesauri/wordnet-en.zth ...134zorba --thesaurus default:=wordnet://wordnet.princeton.edu ...
134\endcode135\endcode
135136
136\section ft_thesaurus_rels Thesaurus Relationships137\section ft_thesaurus_rels Thesaurus Relationships
@@ -423,25 +424,26 @@
423424
424If no levels are specified in a query,425If no levels are specified in a query,
425Zorba defaults the WordNet implementation to be 2 levels.426Zorba defaults the WordNet implementation to be 2 levels.
426The rationale can be found427(The rationale can be found
427<a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=11444">here</a>.428<a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=11444">here</a>.)
428429
429\section ft_thesaurus_providing Providing Your Own Thesaurus430\section ft_thesaurus_providing Providing Your Own Thesaurus
430431
431Using the Zorba C++ API,432Using the Zorba C++ API,
432you can provide your own thesaurus433you can provide your own thesaurus
433by deriving from three classes:434by deriving from four classes:
434\c Thesaurus,435\c Thesaurus,
435\c Thesaurus::iterator,436\c Thesaurus::iterator,
437\c ThesaurusProvider,
436and438and
437\c ThesaurusProvider.439\c URLResolver.
438440
439\subsection ft_class_thesaurus The Thesaurus Class441\subsection ft_class_thesaurus The Thesaurus Class
440442
441The \c Thesaurus class is:443The \c Thesaurus class is:
442444
443\code445\code
444class Thesaurus : public Resource {446class Thesaurus {
445public:447public:
446 typedef /* implementation-defined */ ptr;448 typedef /* implementation-defined */ ptr;
447 typedef /* implementation-defined */ range_type;449 typedef /* implementation-defined */ range_type;
@@ -457,15 +459,15 @@
457459
458 virtual iterator::ptr lookup( String const &phrase, String const &relationship, range_type at_least, range_type at_most ) const = 0;460 virtual iterator::ptr lookup( String const &phrase, String const &relationship, range_type at_least, range_type at_most ) const = 0;
459461
460 virtual void destroy() const = 0; // interited from Resource462 virtual void destroy() const = 0;
461protected:463protected:
462 virtual ~Thesaurus();464 virtual ~Thesaurus();
463};465};
464\endcode466\endcode
465467
466For details about the \c ptr type,468For details about the \c ptr types,
467the \c destroy() function,469the \c destroy() functions,
468and why the destructor is \c protected,470and why the destructors are \c protected,
469see the \ref memory_management document.471see the \ref memory_management document.
470472
471To implement the \c Thesaurus473To implement the \c Thesaurus
@@ -482,18 +484,19 @@
482 </tr>484 </tr>
483 <tr>485 <tr>
484 <td>\c at_least</td>486 <td>\c at_least</td>
485 <td>The The minimum number of levels within the thesaurus to be traversed.</td>487 <td>The minimum number of levels within the thesaurus to be traversed.</td>
486 </tr>488 </tr>
487 <tr>489 <tr>
488 <td>\c at_most</td>490 <td>\c at_most</td>
489 <td>The The maximum number of levels within the thesaurus to be traversed.</td>491 <td>The maximum number of levels within the thesaurus to be traversed.</td>
490 </tr>492 </tr>
491</table>493</table>
492494
493The \c lookup() function returns a pointer to an \c iterator495The \c lookup() function returns a pointer to an \c iterator
494that is used to iterate over the phrase's synonyms.496that is used to iterate over the phrase's synonyms.
495497You also need to implement an \c iterator.
496A very simple thesaurus498A very simple \c Thesaurus
499and its \c iterator
497can be implemented as:500can be implemented as:
498501
499\code502\code
@@ -505,53 +508,49 @@
505 //508 //
506 // Define a simple thesaurus data structure as a map from a phrase to a list of its synonyms.509 // Define a simple thesaurus data structure as a map from a phrase to a list of its synonyms.
507 //510 //
508 typedef std::list<String> synonyms_t;511 typedef std::list<String> synonyms_type;
509 typedef std::map<String,synonyms_t const*> thesaurus_t;512 typedef std::map<String,synonyms_type const*> thesaurus_data_type;
510513
511 static thesaurus_t const& get_thesaurus();514 static thesaurus_data_type const& get_thesaurus_data();
512515
513 class iterator : public Thesaurus::iterator {516 class iterator : public Thesaurus::iterator {
514 public:517 public:
515 iterator( synonyms_t const &s ) : synonyms_( s ), i_( s.begin() ) { }518 iterator( synonyms_type const &s ) : synonyms_( s ), i_( s.begin() ) { }
516 void destroy();519 void destroy();
517 bool next( String *synonym );520 bool next( String *synonym );
518 private:521 private:
519 synonyms_t const &synonyms_; // synonyms to iterate over522 synonyms_type const &synonyms_; // synonyms to iterate over
520 synonyms_t::const_iterator i_; // current iterator position523 synonyms_type::const_iterator i_; // current iterator position
521 };524 };
522};525};
523526
524void MyThesaurus::destroy() const {527void MyThesaurus::destroy() const {
525 // Do nothing since we statically allocate a singleton instance of our thesaurus.528 // Do nothing since we statically allocate a singleton instance of our Thesaurus.
526}529}
527530
528MyThesaurus::thesaurus_t const& MyThesaurus::get_thesaurus() {531MyThesaurus::thesaurus_data_type const& MyThesaurus::get_thesaurus_data() {
529 static thesaurus_t thesaurus;532 static thesaurus_data_type thesaurus_data;
530 if ( thesaurus.empty() ) {533 if ( thesaurus_data.empty() ) {
531 //534 //
532 // Construct a thesaurus "by hand" for this example. A real thesaurus would probably535 // Construct thesaurus data "by hand" for this example. A real thesaurus would probably be read from disk.
533 // be read from disk.
534 //
535 // Note that every list of synonyms must always include the original phrase.536 // Note that every list of synonyms must always include the original phrase.
536 //537 //
537 static synonyms_t synonyms;538 static synonyms_type synonyms;
538 synonyms.push_back( "foo" );539 synonyms.push_back( "foo" );
539 synonyms.push_back( "foobar" );540 synonyms.push_back( "foobar" );
540 thesaurus[ "foo" ] = &synonyms;541 thesaurus_data[ "foo" ] = &synonyms;
541 thesaurus[ "foobar" ] = &synonyms;542 thesaurus_data[ "foobar" ] = &synonyms;
542 }543 }
543 return thesaurus;544 return thesaurus_data;
544}545}
545\endcode
546546
547\code
548MyThesaurus::iterator::ptr MyThesaurus::lookup( String const &phrase, String const &relationship,547MyThesaurus::iterator::ptr MyThesaurus::lookup( String const &phrase, String const &relationship,
549 range_type at_least, range_type at_most ) const {548 range_type at_least, range_type at_most ) const {
550 static thesaurus_t const &thesaurus = get_thesaurus();549 static thesaurus_data_type const &thesaurus_data = get_thesaurus_data();
551 thesaurus_t::const_iterator const i = thesaurus.find( phrase );550 thesaurus_data_type::const_iterator const entry = thesaurus_data.find( phrase );
552 iterator::ptr result;551 iterator::ptr result;
553 if ( i != thesaurus.end() )552 if ( entry != thesaurus_data.end() )
554 result.reset( new iterator( *i->second ) );553 result.reset( new iterator( *entry->second ) );
555 return std::move( result );554 return std::move( result );
556}555}
557556
@@ -572,13 +571,71 @@
572A real thesaurus would load a large number of synonyms,571A real thesaurus would load a large number of synonyms,
573of course.572of course.
574573
574\subsection ft_class_thesaurus_provider The ThesaurusProvider Class
575
576The \c ThesaurusProvider class is:
577
578\code
579class ThesaurusProvider : public Resource {
580public:
581 typedef /* implementation-defined */ ptr;
582
583 virtual bool getThesaurus( locale::iso639_1::type lang, Thesaurus::ptr *thesaurus = 0 ) const = 0;
584 void destroy() const; // inherited from Resource
585};
586\endcode
587
588To implement a \c ThesaurusProvider,
589you need to implement the \c getThesaurus() function where:
590
591<table>
592 <tr>
593 <td>\c lang</td>
594 <td>The desired language of the thesaurus.</td>
595 </tr>
596 <tr>
597 <td>\c thesaurus</td>
598 <td>If not \c null, set to point to a thesaurus for \c lang.</td>
599 </tr>
600</table>
601
602The \c getThesaurus() function returns \c true
603only if it can provide a thesaurus for the given language.
604Continuing with the example,
605a very simple \c ThesaurusProvider
606can be implemented as:
607
608\code
609class MyThesaurusProvider : pulic ThesaurusProvider {
610public:
611 void destroy() const;
612 bool getThesaurus( iso639_1::type lang, Thesaurus::ptr* = 0 ) const;
613};
614
615void MyThesaurusProvider::destroy() const {
616 // Do nothing since we statically allocate a singleton instance of our ThesaurusProvider.
617}
618
619bool MyThesaurusProvider::getThesaurus( iso639_1::type lang, Thesaurus::ptr *result ) const {
620 //
621 // Since our tiny thesaurus contains only universally known words, we don't bother checking lang
622 // and always return true.
623 //
624 static MyThesaurus thesaurus;
625 if ( result )
626 result->reset( &thesaurus );
627 return true;
628}
629\endcode
630
575\subsection ft_class_thesaurus_resolver A Thesaurus URL Resolver Class631\subsection ft_class_thesaurus_resolver A Thesaurus URL Resolver Class
576632
577In addition to a \c Thesaurus,633In addition to a \c Thesaurus
634and \c ThesaurusProvider,
578you must also implement a "thesaurus resolver" class635you must also implement a "thesaurus resolver" class
579that,636that,
580given a URL and a language,637given a URI,
581provides a \c Thesaurus for that language.638provides a \c ThesaurusProvider for that URI.
582A simple \c ThesaurusURLResolver639A simple \c ThesaurusURLResolver
583for our simple thesaurus can be implemented as:640for our simple thesaurus can be implemented as:
584641
@@ -591,23 +648,12 @@
591 String const url_;648 String const url_;
592};649};
593650
594Resource*651Resource* ThesaurusURLResolver::resolveURL( String const &url, EntityData const *data ) const {
595ThesaurusURLResolver::resolveURL( String const &url, EntityData const *data ) const {652 if ( data->getKind() == EntityData::THESAURUS )
596 ThesaurusEntityData const *const t_data = dynamic_cast<ThesaurusEntityData const*>( data );653 static MyThesaurusProvider provider;
597 assert( t_data );654 if ( uri == uri_ )
598 static MyThesaurus thesaurus;655 return &provider;
599 if ( url == url_ )656 }
600 switch ( t_data->getLanguage() ) {
601 case locale::iso639_1::en:
602 case locale::iso639_1::unknown:
603 //
604 // Here, we could test to ensure that the language of our thesaurus matches the
605 // language sought, but in our case, we want our thesaurus to be used for all
606 // languages since "foo" and "foobar" are universal.
607 //
608 default:
609 return &thesaurus;
610 }
611 return 0;657 return 0;
612}658}
613\endcode659\endcode
614660
=== modified file 'doc/zorba/ft_tokenizer.dox'
--- doc/zorba/ft_tokenizer.dox 2012-04-24 12:39:38 +0000
+++ doc/zorba/ft_tokenizer.dox 2012-04-24 21:06:20 +0000
@@ -5,14 +5,25 @@
5The Zorba XQuery processor implements the5The Zorba XQuery processor implements the
6<a href="http://www.w3.org/TR/xpath-full-text-10/">XQuery and XPath Full Text 1.0</a>6<a href="http://www.w3.org/TR/xpath-full-text-10/">XQuery and XPath Full Text 1.0</a>
7specification that, among other things,7specification that, among other things,
8tokenizes a string into a sequence of tokens.8<a ref="http://www.w3.org/TR/xpath-full-text-10/#TokenizationSec">tokenizes</a>
9See9a string into a sequence of tokens.
10<a href="http://www.w3.org/TR/xpath-full-text-10/#TokenizationSec">Tokenization</a>.10
1111\section ft_tokenizer_tokization Tokenization
12The initial implementation of the toknenizer12
13uses the one provided by the13Using the
14<a href="http://site.icu-project.org/">ICU library</a>.14<a href="http://site.icu-project.org/">ICU library</a>,
15However, you can provide your own tokenizer instead.15Zorba's implementation of tokenization
16considers only alpha-numeric sequences of characters to be part of a token;
17whitespace and punctuation characters are not
18and separate tokens.
19However, alpha-numeric sequences matching the regular expression
20<code>[0-9][.,][0-9]</code>
21are retained as part of a token, e.g.:
22"98.6" and "1,432.58" are tokens.
23
24Alternatively,
25you can implement your own tokenizer
26by deriving from the \c Tokenizer class.
1627
17\section ft_class_tokenizer The Tokenizer Class28\section ft_class_tokenizer The Tokenizer Class
1829
@@ -36,33 +47,43 @@
3647
37 class Callback {48 class Callback {
38 public:49 public:
39 typedef Tokenizer::size_type size_type;;50 typedef Tokenizer::size_type size_type;
4051
41 virtual ~Callback();52 virtual ~Callback();
4253
43 virtual void operator()( char const *utf8_s, size_type utf8_len,54 virtual void token( char const *utf8_s, size_type utf8_len, locale::iso639_1::type lang,
44 size_type token_no, size_type sent_no, size_type para_no,55 size_type token_no, size_type sent_no, size_type para_no,
45 void *payload = 0 ) = 0;56 Item const *item = 0 ) = 0;
46 };57 };
4758
48 enum ElementTraceOptions {59 struct Properties {
49 trace_none = 0x0, // Trace no elements.60 typedef std::vector<locale::iso639_1::type> languages_type;
50 trace_begin = 0x1, // Trace the beginning of elements.61
51 trace_end = 0x2 // Trace the ending of elements.62 bool comments_separate_tokens;
52 };63 bool elements_separate_tokens;
64 bool processing_instructions_separate_tokens;
65 languages_type languages;
66 char const *uri;
67 };
68
69 virtual void properties( Properties *result ) const = 0;
5370
54 virtual void destroy() const = 0;71 virtual void destroy() const = 0;
55 virtual void element( Item const &qname, int trace_options );
56 Numbers& numbers();72 Numbers& numbers();
57 Numbers const& numbers() const;73 Numbers const& numbers() const;
58 int trace_options() const;74
5975 void tokenize_node( Item const &node, locale::iso639_1::type lang, Callback &callback );
60 virtual void tokenize( char const *utf8_s, size_type utf8_len, locale::iso639_1::type lang,76
61 bool wildcards, Callback &callback, void *payload = 0 ) = 0;77 virtual void tokenize_string( char const *utf8_s, size_type utf8_len, locale::iso639_1::type lang,
78 bool wildcards, Callback &callback, Item const *item = 0 ) = 0;
6279
63protected:80protected:
64 Tokenizer( Numbers&, int trace_options = trace_none );81 Tokenizer( Numbers& );
65 virtual ~Tokenizer();82 virtual ~Tokenizer();
83
84 bool find_lang_attribute( Item const&, locale::iso639_1::type *lang );
85 virtual void item( Item const&, bool entering );
86 virtual void tokenize_node_impl( Item const&, locale::iso639_1::type, Callback&, bool tokenize_acp );
66};87};
67\endcode88\endcode
6889
@@ -76,8 +97,8 @@
76It simply keeps track of the current97It simply keeps track of the current
77token, sentence, and paragraph numbers.98token, sentence, and paragraph numbers.
7899
79To implement the \c Tokenizer,100To implement a \c Tokenizer,
80you need to implement the \c %tokenize() function where:101you need to implement the \c %tokenize_string() function where:
81102
82<table>103<table>
83 <tr>104 <tr>
@@ -115,9 +136,13 @@
115 </td>136 </td>
116 </tr>137 </tr>
117 <tr>138 <tr>
118 <td>\c payload</td>139 <td>\c item</td>
119 <td>140 <td>
120 Optional implementation-defined data.141 The \c Item whence this token came.
142 If the token occurred within an element,
143 the \c Item is the text node.
144 If the token occurred within an attribute,
145 the \c Item is the attribute node.
121 </td>146 </td>
122 </tr>147 </tr>
123</table>148</table>
@@ -127,21 +152,30 @@
127However,152However,
128the things a tokenizer should take into consideration include:153the things a tokenizer should take into consideration include:
129154
130 - Detecting sentence termination ('.', '?', and '!' characters).155- Detecting sentence termination ('.', '?', and '!' characters).
131 - Handling floating-point numbers with possible thousands separators156- Handling floating-point numbers with possible thousands separators
132 in US and European formats, e.g. "98.7", "98,7", "10,000", etc.157 in US and European formats, e.g. "98.7", "98,7", "10,000", etc.
133 - Distinguishing '.' used as a sentence terminator158- Distinguishing '.' used as a sentence terminator
134 from '.' used as a decimal point.159 from '.' used as a decimal point.
135 - Handling apostrophies, e.g., "men's".160- Handling apostrophies, e.g., "men's".
136 - Handling acronyms, e.g., "AT&T".161- Handling acronyms, e.g., "AT&T".
137162
138\subsection ft_paragraphs Paragraphs163The task of iterating over an XML element's child nodes
164is done by \c tokenize_node_impl().
165Its default implementation
166treats XML elements, comments, and processing instructions
167as token separators.
168(See \ref ft_tokenizer_properties.)
169If you want to change that,
170you need to override \c tokenize_node_impl().
171
172\subsection ft_tokenizer_paragraphs Paragraphs
139173
140By default,174By default,
141Zorba increments the current paragraph number once175Zorba increments the current paragraph number once
142for each XML element encountered.176for each XML element encountered.
143However,177However,
144this doens't work well for mixed content.178this doesn't work well for mixed content.
145For example, in the XHTML:179For example, in the XHTML:
146\code180\code
147<p>The <em>best</em> thing ever!</p>181<p>The <em>best</em> thing ever!</p>
@@ -150,31 +184,65 @@
150but Zorba will consider that 3 paragraphs by default.184but Zorba will consider that 3 paragraphs by default.
151185
152Your tokenizer can take control over when the paragraph number is incremented186Your tokenizer can take control over when the paragraph number is incremented
153by passing the bitwise-or187by overriding the \c item() function.
154of the \c ElementTraceOptions values188The \c item() function is passed the \c Item of the current XML element
155to the constructor189and whether the item is being entered or exited.
156and overriding the \c element() function.
157The \c element() function is passed the QName of the current XML element
158and (depending on the initial value passed to the constructor)
159one of \c trace_begin or \c trace_end.
160Note that this function is called
161only if the trace options value
162passed to the constructor
163was non-zero.
164190
165For example,191For example,
166the \c element() function for tokenizing XHTML192the \c item() function for tokenizing XHTML
167would be along the lines of:193would be along the lines of:
168\code194\code
169void MyTokenizer::element( Item const &qname, int trace_options ) {195void MyTokenizer::item( Item const &item, bool entering ) {
170 if ( trace_options & trace_end )196 if ( entering && item.isNode() && item.getNodeKind() == store::StoreConsts::elementNode ) {
171 return;197 Item qname;
172 String const name( qname.getLocalName() );198 item.getNodeName( qname );
173 if ( /* qname is an XHTML block-level element */ )199 if ( /* qname matches an XHTML block-level element's name */ )
174 ++numbers().para;200 ++numbers().para;
175}201}
176\endcode202\endcode
177203
204\subsection ft_tokenizer_properties Properties
205
206To implement a \c Tokenizer,
207you need also to implement the \c %properties() function
208that fills in the \c Properties struct where:
209
210<table>
211 <tr>
212 <td>\c comments_separate_tokens</td>
213 <td>
214 If \c true, XML comments separate tokens. For example,
215 <code>net&lt;!-- --&gt;work</code> would be 2 tokens instead of 1.
216 </td>
217 </tr>
218 <tr>
219 <td>\c elements_separate_tokens</td>
220 <td>
221 If \c true, XML elements separate tokens. For example,
222 <code>&lt;b&gt;B&lt;/b&gt;old</code> would be 2 tokens instead of 1.
223 </td>
224 </tr>
225 <tr>
226 <td>\c processing_instructions_separate_tokens</td>
227 <td>
228 If \c true, XML processing instructions separate tokens. For example,
229 <code>net&lt;?PI pi?&gt;work</code> would be 2 tokens instead of 1.
230 </td>
231 </tr>
232 <tr>
233 <td>\c languages</td>
234 <td>
235 The list of languages supported by the tokenizer.
236 </td>
237 </tr>
238 <tr>
239 <td>\c uri</td>
240 <td>
241 The URI that uniquely identifies the %Tokenizer.
242 </td>
243 </tr>
244</table>
245
178\section ft_class_tokenizer_provider The TokenizerProviderClass246\section ft_class_tokenizer_provider The TokenizerProviderClass
179247
180In addition to a \c Tokenizer,248In addition to a \c Tokenizer,
@@ -185,20 +253,51 @@
185class TokenizerProvider {253class TokenizerProvider {
186public:254public:
187 virtual ~TokenizerProvider();255 virtual ~TokenizerProvider();
188 virtual Tokenizer::ptr getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers &numbers ) const = 0;256 virtual bool getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers *numbers = 0, Tokenizer::ptr* = 0 ) const = 0;
189};257};
190\endcode258\endcode
191259
260Specifically, you need to implement the \c getTokenizer() function where:
261
262<table>
263 <tr>
264 <td>\c lang</td>
265 <td>The language to tokenize.</td>
266 </tr>
267 <tr>
268 <td>\c num</td>
269 <td>
270 The \c Numbers to use.
271 If \c null,
272 \a t is not set.
273 </td>
274 </tr>
275 <tr>
276 <td>\c t</td>
277 <td>
278 If not \c null,
279 set to point to a Tokenizer for \a lang.
280 </td>
281 </tr>
282</table>
283
192A simple \c TokenizerProvider for our tokenizer can be implemented as:284A simple \c TokenizerProvider for our tokenizer can be implemented as:
193285
194\code286\code
195class MyTokenizerProvider : public TokenizerProvider {287class MyTokenizerProvider : public TokenizerProvider {
196public:288public:
197 Tokenizer::ptr getTokenizer( locale::iso639_1::type lang ) const;289 getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers* = 0, Tokenizer::ptr* = 0 ) const;
198};290};
199291
200Tokenizer::ptr MyTokenizerProvider::getTokenizer( locale::iso639_1::type lang const {292bool MyTokenizerProvider::getTokenizer( locale::iso639_1::type lang, Tokenizer::Numbers *num, Tokenizer::ptr *t ) const {
201 return Tokenizer::ptr( new MyTokenizer );293 switch ( lang ) {
294 case iso639_1::en:
295 if ( num && t )
296 t->reset( new MyTokenizer );
297 return true;
298 default:
299 return false;
300 }
202}301}
203\endcode302\endcode
204303
205304
=== modified file 'include/zorba/locale.h'
--- include/zorba/locale.h 2012-04-24 12:39:38 +0000
+++ include/zorba/locale.h 2012-04-24 21:06:20 +0000
@@ -22,24 +22,198 @@
2222
23 /////////////////////////////////////////////////////////////////////////// 23 ///////////////////////////////////////////////////////////////////////////
2424
25 /**
26 * Defines constants for all ISO 639-1 language codes.
27 */
25 namespace iso639_1 {28 namespace iso639_1 {
26 enum type {29 enum type {
27 unknown,30 unknown,
28 da, // Danish31 aa, ///< Afar
29 de, // German32 ab, ///< Abkhazian
30 en, // English33 ae, ///< Avestan
31 es, // Spanish34 af, ///< Afrikaans
32 fi, // Finnish35 ak, ///< Akan
33 fr, // French36 am, ///< Amharic
34 hu, // Hungarian37 an, ///< Aragonese
35 it, // Italian38 ar, ///< Arabic
36 nl, // Dutch39 as, ///< Assamese
37 no, // Norwegian40 av, ///< Avaric
38 pt, // Portuguese41 ay, ///< Aymara
39 ro, // Romanian42 az, ///< Azerbaijani
40 ru, // Russian43 ba, ///< Bashkir
41 sv, // Swedish44 be, ///< Byelorussian
42 tr, // Turkish45 bg, ///< Bulgarian
46 bh, ///< Bihari
47 bi, ///< Bislama
48 bm, ///< Bambara
49 bn, ///< Bengali; Bangla
50 bo, ///< Tibetan
51 br, ///< Breton
52 bs, ///< Bosnian
53 ca, ///< Catalan
54 ce, ///< Chechen
55 ch, ///< Chamorro
56 co, ///< Corsican
57 cr, ///< Cree
58 cs, ///< Czech
59 cu, ///< Church Slavic; Church Slavonic
60 cv, ///< Chuvash
61 cy, ///< Welsh
62 da, ///< Danish
63 de, ///< German
64 dv, ///< Divehi
65 dz, ///< Bhutani
66 ee, ///< Ewe
67 el, ///< Greek
68 en, ///< English
69 eo, ///< Esperanto
70 es, ///< Spanish
71 et, ///< Estonian
72 eu, ///< Basque
73 fa, ///< Persian
74 ff, ///< Fulah
75 fi, ///< Finnish
76 fj, ///< Fiji
77 fo, ///< Faroese
78 fr, ///< French
79 fy, ///< Frisian
80 ga, ///< Irish
81 gd, ///< Scots Gaelic
82 gl, ///< Galician
83 gn, ///< Guarani
84 gu, ///< Gujarati
85 gv, ///< Manx
86 ha, ///< Hausa
87 he, ///< Hebrew (formerly iw)
88 hi, ///< Hindi
89 ho, ///< Hiri Motu
90 hr, ///< Croatian
91 ht, ///< Haitian Creole
92 hu, ///< Hungarian
93 hy, ///< Armenian
94 hz, ///< Herero
95 ia, ///< Interlingua
96 id, ///< Indonesian (formerly in)
97 ie, ///< Interlingue
98 ig, ///< Igbo
99 ii, ///< Nuosu
100 ik, ///< Inupiak
101 io, ///< Ido
102 is, ///< Icelandic
103 it, ///< Italian
104 iu, ///< Inuktitut
105 ja, ///< Japanese
106 jv, ///< Javanese
107 ka, ///< Georgian
108 kg, ///< Kongo
109 ki, ///< Gikuyu
110 kj, ///< Kuanyama
111 kk, ///< Kazakh
112 kl, ///< Greenlandic
113 km, ///< Cambodian
114 kn, ///< Kannada
115 ko, ///< Korean
116 kr, ///< Kanuri
117 ks, ///< Kashmiri
118 ku, ///< Kurdish
119 kv, ///< Komi
120 kw, ///< Cornish
121 ky, ///< Kirghiz
122 la, ///< Latin
123 lb, ///< Letzeburgesch
124 lg, ///< Ganda
125 li, ///< Limburgan; Limburger; Limburgish
126 ln, ///< Lingala
127 lo, ///< Laothian
128 lt, ///< Lithuanian
129 lu, ///< Luba-Katanga
130 lv, ///< Latvian
131 mg, ///< Malagasy
132 mh, ///< Marshallese
133 mi, ///< Maori
134 mk, ///< Macedonian
135 ml, ///< Malayalam
136 mn, ///< Mongolian
137 mo, ///< Moldavian
138 mr, ///< Marathi
139 ms, ///< Malay
140 mt, ///< Maltese
141 my, ///< Burmese
142 na, ///< Nauru
143 nb, ///< Norwegian Bokmal
144 nd, ///< Ndebele, North
145 ne, ///< Nepali
146 ng, ///< Ndonga
147 nl, ///< Dutch
148 nn, ///< Norwegian Nynorsk
149 no, ///< Norwegian
150 nr, ///< Ndebele, South
151 nv, ///< Navajo; Navaho
152 ny, ///< Chichewa; Chewa; Nyanja
153 oc, ///< Occitan
154 oj, ///< Ojibwa
155 om, ///< Oromo
156 or_, ///< Oriya
157 os, ///< Ossetian; Ossetic
158 pa, ///< Panjabi; Punjabi
159 pi, ///< Pali
160 pl, ///< Polish
161 ps, ///< Pashto, Pushto
162 pt, ///< Portuguese
163 qu, ///< Quechua
164 rm, ///< Romansh
165 rn, ///< Kirundi
166 ro, ///< Romanian
167 ru, ///< Russian
168 rw, ///< Kinyarwanda
169 sa, ///< Sanskrit
170 sc, ///< Sardinian
171 sd, ///< Sindhi
172 se, ///< Northern Sami
173 sg, ///< Sangho
174 sh, ///< Serbo-Croatian
175 si, ///< Sinhalese
176 sk, ///< Slovak
177 sl, ///< Slovenian
178 sm, ///< Samoan
179 sn, ///< Shona
180 so, ///< Somali
181 sq, ///< Albanian
182 sr, ///< Serbian
183 ss, ///< Siswati
184 st, ///< Sesotho
185 su, ///< Sundanese
186 sv, ///< Swedish
187 sw, ///< Swahili
188 ta, ///< Tamil
189 te, ///< Telugu
190 tg, ///< Tajik
191 th, ///< Thai
192 ti, ///< Tigrinya
193 tk, ///< Turkmen
194 tl, ///< Tagalog
195 tn, ///< Setswana
196 to, ///< Tonga
197 tr, ///< Turkish
198 ts, ///< Tsonga
199 tt, ///< Tatar
200 tw, ///< Twi
201 ty, ///< Tahitian
202 ug, ///< Uighur
203 uk, ///< Ukrainian
204 ur, ///< Urdu
205 uz, ///< Uzbek
206 ve, ///< Venda
207 vi, ///< Vietnamese
208 vo, ///< Volapuk
209 wa, ///< Walloon
210 wo, ///< Wolof
211 xh, ///< Xhosa
212 yi, ///< Yiddish
213 yo, ///< Yoruba
214 za, ///< Zhuang
215 zh, ///< Chinese
216 zu, ///< Zulu
43 NUM_ENTRIES217 NUM_ENTRIES
44 };218 };
45 }219 }
46220
=== modified file 'include/zorba/pregenerated/diagnostic_list.h'
--- include/zorba/pregenerated/diagnostic_list.h 2012-04-24 12:39:38 +0000
+++ include/zorba/pregenerated/diagnostic_list.h 2012-04-24 21:06:20 +0000
@@ -454,6 +454,14 @@
454extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8402_THESAURUS_ENDIANNESS_MISMATCH;454extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8402_THESAURUS_ENDIANNESS_MISMATCH;
455455
456extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8403_THESAURUS_DATA_ERROR;456extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8403_THESAURUS_DATA_ERROR;
457
458extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8404_STEM_LANG_NOT_SUPPORTED;
459
460extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8405_STOP_WORDS_LANG_NOT_SUPPORTED;
461
462extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8406_THESAURUS_LANG_NOT_SUPPORTED;
463
464extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQP8407_TOKENIZER_LANG_NOT_SUPPORTED;
457#endif465#endif
458466
459extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQD0001_PREFIX_NOT_DECLARED;467extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZXQD0001_PREFIX_NOT_DECLARED;
460468
=== modified file 'include/zorba/stemmer.h'
--- include/zorba/stemmer.h 2012-04-24 12:39:38 +0000
+++ include/zorba/stemmer.h 2012-04-24 21:06:20 +0000
@@ -52,6 +52,23 @@
52 virtual void destroy() const = 0;52 virtual void destroy() const = 0;
5353
54 /**54 /**
55 * Various properties of this %Stemmer.
56 */
57 struct Properties {
58 /**
59 * The URI that uniquely identifies this %Stemmer.
60 */
61 char const *uri;
62 };
63
64 /**
65 * Gets the Properties of this %Stemmer.
66 *
67 * @param result The Properties to populate.
68 */
69 virtual void properties( Properties *result ) const = 0;
70
71 /**
55 * Stems the given word.72 * Stems the given word.
56 *73 *
57 * @param word The word to stem.74 * @param word The word to stem.
@@ -66,7 +83,7 @@
66};83};
6784
68/**85/**
69 * A %StemmerProvider, given an language, provies a stemmer for it.86 * A %StemmerProvider, given a language, provides a Stemmer for it.
70 */87 */
71class ZORBA_DLL_PUBLIC StemmerProvider {88class ZORBA_DLL_PUBLIC StemmerProvider {
72public:89public:
@@ -76,10 +93,12 @@
76 * Gets a Stemmer for the given language.93 * Gets a Stemmer for the given language.
77 *94 *
78 * @param lang The language to get a Stemmer for.95 * @param lang The language to get a Stemmer for.
79 * @return The relevant Stemmer or \c NULL if no stemmer for the given96 * @param s If not \c null, set to point to a Stemmer for \a lang.
80 * language is available.97 * @return Returns \c true only if this provider can provide a stemmer for
98 * \a lang.
81 */99 */
82 virtual Stemmer::ptr getStemmer( locale::iso639_1::type lang ) const = 0;100 virtual bool getStemmer( locale::iso639_1::type lang,
101 Stemmer::ptr *s = 0 ) const = 0;
83};102};
84103
85///////////////////////////////////////////////////////////////////////////////104///////////////////////////////////////////////////////////////////////////////
86105
=== modified file 'include/zorba/thesaurus.h'
--- include/zorba/thesaurus.h 2012-04-24 12:39:38 +0000
+++ include/zorba/thesaurus.h 2012-04-24 21:06:20 +0000
@@ -32,25 +32,13 @@
32///////////////////////////////////////////////////////////////////////////////32///////////////////////////////////////////////////////////////////////////////
3333
34/**34/**
35 * Contains additional data for URIMappers and URLResolvers35 * A %Thesaurus provides a way to look up related phrases for a given phrase.
36 * when mapping/resolving a Thesaurus URI.36 */
37 */37class ZORBA_DLL_PUBLIC Thesaurus {
38class ZORBA_DLL_PUBLIC ThesaurusEntityData : public EntityData {38public:
39public:39 typedef std::unique_ptr<
40 /**40 Thesaurus const,internal::ztd::destroy_delete<Thesaurus const>
41 * Gets the language for which a thesaurus is being requested.41 >
42 *
43 * @return said language.
44 */
45 virtual locale::iso639_1::type getLanguage() const = 0;
46};
47
48/**
49 * A %Thesaurus is-a Resource for thesaurus implementations.
50 */
51class ZORBA_DLL_PUBLIC Thesaurus : public Resource {
52public:
53 typedef std::unique_ptr<Thesaurus,internal::ztd::destroy_delete<Thesaurus> >
54 ptr;42 ptr;
5543
56 /**44 /**
@@ -88,11 +76,11 @@
88 * Destroys this %Thesaurus.76 * Destroys this %Thesaurus.
89 * This function is called by Zorba when the %Thesaurus is no longer needed.77 * This function is called by Zorba when the %Thesaurus is no longer needed.
90 *78 *
91 * If your URLResolver dynamically allocates %Thesaurus objects, then the79 * If your implementation dynamically allocates %Thesaurus objects, then your
92 * implementation can simply be (and usually is) <code>delete this</code>.80 * implementation can simply be (and usually is) <code>delete this</code>.
93 *81 *
94 * If your URLResolver returns a pointer to a static %Thesaurus object, then82 * If your implementation returns a pointer to a static %Thesaurus object,
95 * the implementation should do nothing.83 * then your implementation should do nothing.
96 */84 */
97 virtual void destroy() const = 0;85 virtual void destroy() const = 0;
9886
@@ -119,6 +107,32 @@
119107
120///////////////////////////////////////////////////////////////////////////////108///////////////////////////////////////////////////////////////////////////////
121109
110/**
111 * A %ThesaurusProvider is-a Resource for providing thesauri for a given
112 * language.
113 */
114class ZORBA_DLL_PUBLIC ThesaurusProvider : public Resource {
115public:
116 typedef std::unique_ptr<
117 ThesaurusProvider const,
118 internal::ztd::destroy_delete<ThesaurusProvider const>
119 >
120 ptr;
121
122 /**
123 * Gets a Thesaurus for the given language.
124 *
125 * @param lang The desired language of the thesaurus.
126 * @param t If not \c null, set to point to a Thesaurus for \a lang.
127 * @return Returns \c true only if this provider can provide a thesaurus for
128 * \a lang.
129 */
130 virtual bool getThesaurus( locale::iso639_1::type lang,
131 Thesaurus::ptr *t = 0 ) const = 0;
132};
133
134///////////////////////////////////////////////////////////////////////////////
135
122} // namespace zorba136} // namespace zorba
123#endif /* ZORBA_NO_FULL_TEXT */137#endif /* ZORBA_NO_FULL_TEXT */
124#endif /* ZORBA_THESAURUS_API_H */138#endif /* ZORBA_THESAURUS_API_H */
125139
=== modified file 'include/zorba/tokenizer.h'
--- include/zorba/tokenizer.h 2012-04-24 12:39:38 +0000
+++ include/zorba/tokenizer.h 2012-04-24 21:06:20 +0000
@@ -18,6 +18,8 @@
18#ifndef ZORBA_TOKENIZER_API_H18#ifndef ZORBA_TOKENIZER_API_H
19#define ZORBA_TOKENIZER_API_H19#define ZORBA_TOKENIZER_API_H
2020
21#include <vector>
22
21#include <zorba/config.h>23#include <zorba/config.h>
22#include <zorba/locale.h>24#include <zorba/locale.h>
23#include <zorba/internal/unique_ptr.h>25#include <zorba/internal/unique_ptr.h>
@@ -67,8 +69,6 @@
67 * A %Callback is called once per token.69 * A %Callback is called once per token.
68 * This is only internally by Zorba.70 * This is only internally by Zorba.
69 * You do not need to derive from this class.71 * You do not need to derive from this class.
70 * The only thing you need to do is call the callback's \c operator() once
71 * for each token you parse in \c tokenize().
72 */72 */
73 class Callback {73 class Callback {
74 public:74 public:
@@ -77,19 +77,75 @@
77 virtual ~Callback();77 virtual ~Callback();
7878
79 /**79 /**
80 * This member-function is called whenever an item that is being tokenized
81 * is entered or exited.
82 *
83 * @param item The item being entered or exited.
84 * @param entering If \c true, the item is being entered; if \c false, the
85 * item is being exited.
86 */
87 virtual void item( Item const &item, bool entering );
88
89 /**
80 * This member-function is called once per token.90 * This member-function is called once per token.
81 *91 *
82 * @param utf8_s The UTF-8 token string. It is not null-terminated.92 * @param utf8_s The UTF-8 token string. It is not null-terminated.
83 * @param utf8_len The number of bytes in the token string.93 * @param utf8_len The number of bytes in the token string.
94 * @param lang The language of the token.
84 * @param token_no The token number. Token numbers start at 0.95 * @param token_no The token number. Token numbers start at 0.
85 * @param sent_no The sentence number. Sentence numbers start at 1.96 * @param sent_no The sentence number. Sentence numbers start at 1.
86 * @param para_no The paragraph number. Paragraph numbers start at 1.97 * @param para_no The paragraph number. Paragraph numbers start at 1.
87 * @param payload Optional user-defined data.98 * @param item The Item this token is from, if any.
88 */99 */
89 virtual void operator()( char const *utf8_s, size_type utf8_len,100 virtual void token( char const *utf8_s, size_type utf8_len,
90 size_type token_no, size_type sent_no,101 locale::iso639_1::type lang,
91 size_type para_no, void *payload = 0 ) = 0;102 size_type token_no, size_type sent_no,
92 };103 size_type para_no, Item const *item = 0 ) = 0;
104 };
105
106 /////////////////////////////////////////////////////////////////////////////
107
108 /**
109 * Various properties of this %Tokenizer.
110 */
111 struct Properties {
112 typedef std::vector<locale::iso639_1::type> languages_type;
113
114 /**
115 * If \c true, XML comments separate tokens. For example,
116 * \c net&lt;!----&gt;work would be 2 tokens instead of 1.
117 */
118 bool comments_separate_tokens;
119
120 /**
121 * If \c true, XML elements separate tokens. For example,
122 * \c &lt;b&gt;B&lt;/b&gt;old would be 2 tokens instead of 1.
123 */
124 bool elements_separate_tokens;
125
126 /**
127 * If \c true, XML processing instructions separate tokens. For example,
128 * <code>net<?PI pi?>work</code> would be 2 tokens instead of 1.
129 */
130 bool processing_instructions_separate_tokens;
131
132 /**
133 * The set of languages supported.
134 */
135 languages_type languages;
136
137 /**
138 * The URI that uniquely identifies this %Tokenizer.
139 */
140 char const* uri;
141 };
142
143 /**
144 * Gets the Properties of this %Tokenizer.
145 *
146 * @param result The Properties to populate.
147 */
148 virtual void properties( Properties *result ) const = 0;
93149
94 /////////////////////////////////////////////////////////////////////////////150 /////////////////////////////////////////////////////////////////////////////
95151
@@ -106,39 +162,6 @@
106 virtual void destroy() const = 0;162 virtual void destroy() const = 0;
107163
108 /**164 /**
109 * Trace options for XML elements combined via bitwise-or.
110 */
111 enum ElementTraceOptions {
112 trace_none = 0x0, ///< Trace no elements.
113 trace_begin = 0x1, ///< Trace the beginning of elements.
114 trace_end = 0x2 ///< Trace the ending of elements.
115 };
116
117 /**
118 * Gets the trace options. If the value is \c trace_none, then the paragraph
119 * number will be incremented upon entering an XML element; if the value is
120 * anything other than \c trace_none, then the tokenizer assumes
121 * responsibility for incrementing the paragraph number.
122 *
123 * @return Returns said options.
124 */
125 int trace_options() const {
126 return trace_options_;
127 }
128
129 /**
130 * This function is called whenever an XML element is entered during
131 * tokenization. Note that this function is called only if \c
132 * trace_options() returns non-zero.
133 *
134 * @param qname The element's QName.
135 * @param trace_options The bitwise-or of the trace option(s) in effect for a
136 * particular call.
137 * @see trace_options()
138 */
139 virtual void element( Item const &qname, int trace_options );
140
141 /**
142 * Gets this %Tokenizer's associated Numbers.165 * Gets this %Tokenizer's associated Numbers.
143 *166 *
144 * @return Returns said Numbers.167 * @return Returns said Numbers.
@@ -153,6 +176,16 @@
153 Numbers const& numbers() const;176 Numbers const& numbers() const;
154177
155 /**178 /**
179 * Tokenizes the given node.
180 *
181 * @param node The node to tokenize.
182 * @param lang The default language to use.
183 * @param callback The Callback to call once per token.
184 */
185 void tokenize_node( Item const &node, locale::iso639_1::type lang,
186 Callback &callback );
187
188 /**
156 * Tokenizes the given string.189 * Tokenizes the given string.
157 *190 *
158 * @param utf8_s The UTF-8 string to tokenize. It need not be191 * @param utf8_s The UTF-8 string to tokenize. It need not be
@@ -162,11 +195,11 @@
162 * @param wildcards If \c true, allows XQuery wildcard syntax characters to195 * @param wildcards If \c true, allows XQuery wildcard syntax characters to
163 * be part of tokens.196 * be part of tokens.
164 * @param callback The Callback to call once per token.197 * @param callback The Callback to call once per token.
165 * @param payload Optional user-defined data.198 * @param item The Item this string is from, if any.
166 */199 */
167 virtual void tokenize( char const *utf8_s, size_type utf8_len,200 virtual void tokenize_string( char const *utf8_s, size_type utf8_len,
168 locale::iso639_1::type lang, bool wildcards,201 locale::iso639_1::type lang, bool wildcards,
169 Callback &callback, void *payload = 0 ) = 0;202 Callback &callback, Item const *item = 0 ) = 0;
170203
171 /////////////////////////////////////////////////////////////////////////////204 /////////////////////////////////////////////////////////////////////////////
172205
@@ -175,27 +208,71 @@
175 * Constructs a %Tokenizer.208 * Constructs a %Tokenizer.
176 *209 *
177 * @param numbers the Numbers to use.210 * @param numbers the Numbers to use.
178 * @param trace_options The bitwise-or of the available trace options, if
179 * any.
180 */211 */
181 Tokenizer( Numbers &numbers, int trace_options = trace_none );212 Tokenizer( Numbers &numbers );
182213
183 /**214 /**
184 * Destroys a %Tokenizer.215 * Destroys a %Tokenizer.
185 */216 */
186 virtual ~Tokenizer() = 0;217 virtual ~Tokenizer() = 0;
187218
219 /**
220 * Given an element, finds its \c xml:lang attribute, if any, and gets its
221 * value.
222 *
223 * @param element The element to check.
224 * @param lang A pointer to where to put the found language, if any.
225 * @return Returns \c true only if an \c xml:lang attribute is found and the
226 * value is a known language.
227 */
228 bool find_lang_attribute( Item const &element, locale::iso639_1::type *lang );
229
230 /**
231 * This member-function is called whenever an item that is being tokenized is
232 * entered or exited.
233 *
234 * @param item The item being entered or exited.
235 * @param entering If \c true, the item is being entered; if \c false, the
236 * item is being exited.
237 */
238 virtual void item( Item const &item, bool entering );
239
240 /**
241 * Tokenizes the given node and all of its child nodes, if any. For each
242 * node, it is required that this function call the item() member function of
243 * both this %Tokenizer and of the Callback twice, once each for entrance and
244 * exit.
245 *
246 * @param node The node to tokenize.
247 * @param lang The default language to use.
248 * @param callback The Callback to call per token.
249 * @param tokenize_acp If \c true, additionally tokenize all attribute,
250 * comment, and processing-instruction nodes encountered;
251 * if \c false, skip them.
252 */
253 virtual void tokenize_node_impl( Item const &node,
254 locale::iso639_1::type lang,
255 Callback &callback, bool tokenize_acp );
256
188private:257private:
189 int trace_options_;258 Numbers *numbers_;
190 Numbers *no_;
191};259};
192260
261inline Tokenizer::Tokenizer( Numbers &numbers ) : numbers_( &numbers ) {
262}
263
193inline Tokenizer::Numbers& Tokenizer::numbers() {264inline Tokenizer::Numbers& Tokenizer::numbers() {
194 return *no_;265 return *numbers_;
195}266}
196267
197inline Tokenizer::Numbers const& Tokenizer::numbers() const {268inline Tokenizer::Numbers const& Tokenizer::numbers() const {
198 return *no_;269 return *numbers_;
270}
271
272inline void Tokenizer::tokenize_node( Item const &item,
273 locale::iso639_1::type lang,
274 Callback &callback ) {
275 tokenize_node_impl( item, lang, callback, true );
199}276}
200277
201///////////////////////////////////////////////////////////////////////////////278///////////////////////////////////////////////////////////////////////////////
@@ -211,11 +288,14 @@
211 * Creates a new %Tokenizer.288 * Creates a new %Tokenizer.
212 *289 *
213 * @param lang The language of the text that the tokenizer will tokenize.290 * @param lang The language of the text that the tokenizer will tokenize.
214 * @param numbers The Numbers to use.291 * @param numbers The Numbers to use. If \c null, \a t is not set.
215 * @return Returns said %Tokenizer.292 * @param t If not \c null, set to point to a Tokenizer for \a lang.
293 * @return Returns \c true only if this provider can provide a tokenizer for
294 * \a lang.
216 */295 */
217 virtual Tokenizer::ptr getTokenizer( locale::iso639_1::type lang,296 virtual bool getTokenizer( locale::iso639_1::type lang,
218 Tokenizer::Numbers &numbers ) const = 0;297 Tokenizer::Numbers *numbers = 0,
298 Tokenizer::ptr *t = 0 ) const = 0;
219};299};
220300
221///////////////////////////////////////////////////////////////////////////////301///////////////////////////////////////////////////////////////////////////////
222302
=== modified file 'include/zorba/uri_resolvers.h'
--- include/zorba/uri_resolvers.h 2012-04-24 12:39:38 +0000
+++ include/zorba/uri_resolvers.h 2012-04-24 21:06:20 +0000
@@ -50,7 +50,8 @@
50class ZORBA_DLL_PUBLIC Resource50class ZORBA_DLL_PUBLIC Resource
51{51{
52public:52public:
53 typedef std::unique_ptr<Resource,internal::ztd::destroy_delete<Resource> > ptr;53 typedef std::unique_ptr<Resource,internal::ztd::destroy_delete<Resource> >
54 ptr;
5455
55 virtual ~Resource() = 0;56 virtual ~Resource() = 0;
5657
@@ -172,8 +173,8 @@
172 * object itself will be discarded.173 * object itself will be discarded.
173 *174 *
174 * In any case, if they create a Resource, Zorba will take memory175 * In any case, if they create a Resource, Zorba will take memory
175 * ownership of the Resource and delete it when it is no longer176 * ownership of the Resource and delete it (by calling destroy() on it)
176 * needed.177 * when it is no longer needed.
177 */178 */
178 virtual Resource* resolveURL(const zorba::String& aUrl,179 virtual Resource* resolveURL(const zorba::String& aUrl,
179 EntityData const* aEntityData) = 0;180 EntityData const* aEntityData) = 0;
180181
=== modified file 'modules/com/zorba-xquery/www/modules/CMakeLists.txt'
--- modules/com/zorba-xquery/www/modules/CMakeLists.txt 2012-04-24 12:39:38 +0000
+++ modules/com/zorba-xquery/www/modules/CMakeLists.txt 2012-04-24 21:06:20 +0000
@@ -72,6 +72,13 @@
72DECLARE_ZORBA_MODULE(FILE xqdoc.xq VERSION 2.072DECLARE_ZORBA_MODULE(FILE xqdoc.xq VERSION 2.0
73 URI "http://www.zorba-xquery.com/modules/xqdoc")73 URI "http://www.zorba-xquery.com/modules/xqdoc")
7474
75IF(NOT ZORBA_NO_FULL_TEXT)
76 DECLARE_ZORBA_MODULE(FILE full-text.xq VERSION 2.0
77 URI "http://www.zorba-xquery.com/modules/full-text")
78 DECLARE_ZORBA_SCHEMA(FILE full-text.xsd
79 URI "http://www.zorba-xquery.com/modules/full-text")
80ENDIF(NOT ZORBA_NO_FULL_TEXT)
81
75# Subdirectories82# Subdirectories
76DECLARE_ZORBA_MODULE(FILE converters/base64.xq VERSION 2.083DECLARE_ZORBA_MODULE(FILE converters/base64.xq VERSION 2.0
77 URI "http://www.zorba-xquery.com/modules/converters/base64")84 URI "http://www.zorba-xquery.com/modules/converters/base64")
7885
=== added file 'modules/com/zorba-xquery/www/modules/full-text.xq'
--- modules/com/zorba-xquery/www/modules/full-text.xq 1970-01-01 00:00:00 +0000
+++ modules/com/zorba-xquery/www/modules/full-text.xq 2012-04-24 21:06:20 +0000
@@ -0,0 +1,872 @@
1xquery version "3.0";
2
3(:
4 : Copyright 2006-2011 The FLWOR Foundation.
5 :
6 : Licensed under the Apache License, Version 2.0 (the "License");
7 : you may not use this file except in compliance with the License.
8 : You may obtain a copy of the License at
9 :
10 : http://www.apache.org/licenses/LICENSE-2.0
11 :
12 : Unless required by applicable law or agreed to in writing, software
13 : distributed under the License is distributed on an "AS IS" BASIS,
14 : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 : See the License for the specific language governing permissions and
16 : limitations under the License.
17 :)
18
19(:===========================================================================:)
20
21(:~
22 : This module provides an XQuery API to full-text functions.
23 : For general information about Zorba's implementation of the
24 : <a href="http://www.w3.org/TR/xpath-full-text-10/">XQuery and XPath Full Text 1.0 specification</a>
25 : as well as instructions for building an installing a thesaurus,
26 : see the <a href="http://www.zorba-xquery.com/html/documentation/latest/zorba/ft_thesaurus">Full Text Thesaurus documentation</a>.
27 : <h2>Notes on languages</h2>
28 : To refer to paricular human languages,
29 : Zorba uses both the
30 : <a href="http://en.wikipedia.org/wiki/ISO_639-1">ISO 639-1</a>
31 : and
32 : <a href="http://en.wikipedia.org/wiki/ISO_639-2">ISO 639-2</a>
33 : languages codes.
34 : Note that Zorba supports only a subset of the
35 : <a href="http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes">complete list of language codes</a>
36 : and not every function supports the same subset.
37 : <p/>
38 : Most functions in this module take a language as a parameter
39 : using the
40 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>
41 : XML schema data type.
42 : <h2>Notes on stemming</h2>
43 : The <code>stem()</code> functions return the
44 : <a href="http://en.wikipedia.org/wiki/Word_stem">stem</a>
45 : of a word.
46 : In Zorba,
47 : the stem of a word itself, however, is not guaranteed to be a word.
48 : It is best to consider a stem as an opaque byte sequence.
49 : All that is guaranteed about a stem is that,
50 : for a given word,
51 : the stem of that word will always be the same byte sequence.
52 : Hence,
53 : you sould never compare the result of one of the <code>stem()</code>
54 : functions against a non-stemmed string,
55 : for example:
56 : <pre>
57 : if ( ft:stem( "apples" ) eq "apple" ) ** WRONG **
58 : </pre>
59 : Instead do:
60 : <pre>
61 : if ( ft:stem( "apples" ) eq ft:stem( "apple" ) ) ** CORRECT **
62 : </pre>
63 : <h2>Notes on the thesaurus</h2>
64 : The <code>thesaurus-lookup()</code> functions have "levels"
65 : and "relationship" parameters.
66 : The values for these are implementation-defined.
67 : Zorba's default implementation uses the
68 : <a href="http://wordnet.princeton.edu/">WordNet lexical database</a>,
69 : version 3.0.
70 : <p/>
71 : In WordNet,
72 : the number of "levels" that two phrases are apart
73 : are how many hierarchical meanings apart they are.
74 : For example,
75 : "canary" is 5 levels away from "vertebrate"
76 : (carary &gt; finch &gt; oscine &gt; passerine &gt; bird &gt; vertebrate).
77 : <p/>
78 : When using the WordNet implementation,
79 : Zorba supports all of the relationships (and their abbreviations)
80 : specified by
81 : <a href="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=7776">ISO 2788</a>
82 : and
83 : <a href="http://www.niso.org/kst/reports/standards?step=2&amp;gid=&amp;project_key=7cc9b583cb5a62e8c15d3099e0bb46bbae9cf38a">ANSI/NISO Z39.19-2005</a>
84 : with the exceptions of "HN" (history note)
85 : and "X SN" (see scope note for).
86 : These relationships are:
87 : <table>
88 : <tr>
89 : <th>Rel.</th>
90 : <th>Meaning</th>
91 : <th>WordNet Rel.</th>
92 : </tr>
93 : <tr>
94 : <td>BT</td>
95 : <td>broader term</td>
96 : <td>hypernym</td>
97 : </tr>
98 : <tr>
99 : <td>BTG</td>
100 : <td>broader term generic</td>
101 : <td>hypernym</td>
102 : </tr>
103 : <tr>
104 : <td>BTI</td>
105 : <td>broader term instance</td>
106 : <td>instance hypernym</td>
107 : </tr>
108 : <tr>
109 : <td>BTP</td>
110 : <td>broader term partitive</td>
111 : <td>part meronym</td>
112 : </tr>
113 : <tr>
114 : <td>NT</td>
115 : <td>narrower term</td>
116 : <td>hyponym</td>
117 : </tr>
118 : <tr>
119 : <td>NTG</td>
120 : <td>narrower term generic</td>
121 : <td>hyponym</td>
122 : </tr>
123 : <tr>
124 : <td>NTI</td>
125 : <td>narrower term instance</td>
126 : <td>instance hyponym</td>
127 : </tr>
128 : <tr>
129 : <td>NTP</td>
130 : <td>narrower term partitive</td>
131 : <td>part holonym</td>
132 : </tr>
133 : <tr>
134 : <td>RT</td>
135 : <td>related term</td>
136 : <td>also see</td>
137 : </tr>
138 : <tr>
139 : <td>SN</td>
140 : <td>scope note</td>
141 : <td>n/a</td>
142 : </tr>
143 : <tr>
144 : <td>TT</td>
145 : <td>top term</td>
146 : <td>hypernym</td>
147 : </tr>
148 : <tr>
149 : <td>UF</td>
150 : <td>non-preferred term</td>
151 : <td>n/a</td>
152 : </tr>
153 : <tr>
154 : <td>USE</td>
155 : <td>preferred term</td>
156 : <td>n/a</td>
157 : </tr>
158 : </table>
159 : Note that you can specify relationships
160 : either by their abbreviation
161 : or their meaning.
162 : Relationships are case-insensitive.
163 :
164 : In addition to the
165 : <a href="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=7776">ISO 2788</a>
166 : and
167 : <a href="http://www.niso.org/kst/reports/standards?step=2&amp;gid=&amp;project_key=7cc9b583cb5a62e8c15d3099e0bb46bbae9cf38a">ANSI/NISO Z39.19-2005</a>
168 : relationships,
169 : Zorba also supports all of the relationships offered by WordNet.
170 : These relationships are:
171 : <table class="ft_rels">
172 : <tr>
173 : <th>Relationship</th>
174 : <th>Meaning</th>
175 : </tr>
176 : <tr>
177 : <td nowrap="nowrap">also see</td>
178 : <td>
179 : A word that is related to another,
180 : e.g., for "varnished" (furniture)
181 : one should <em>also see</em> "finished."
182 : </td>
183 : </tr>
184 : <tr>
185 : <td>antonym</td>
186 : <td>
187 : A word opposite in meaning to another,
188 : e.g., "light" is an <em>antonym</em> for "heavy."
189 : </td>
190 : </tr>
191 : <tr>
192 : <td>attribute</td>
193 : <td>
194 : A noun for which adjectives express values,
195 : e.g., "weight" is an <em>attribute</em>
196 : for which the adjectives "light" and "heavy"
197 : express values.
198 : </td>
199 : </tr>
200 : <tr>
201 : <td>cause</td>
202 : <td>
203 : A verb that causes another,
204 : e.g., "show" is a <em>cause</em> of "see."
205 : </td>
206 : </tr>
207 : <tr>
208 : <td nowrap="nowrap">derivationally related form</td>
209 : <td>
210 : A word that is derived from a root word,
211 : e.g., "metric" is a <em>derivationally related form</em> of "meter."
212 : </td>
213 : </tr>
214 : <tr>
215 : <td nowrap="nowrap">derived from adjective</td>
216 : <td>
217 : An adverb that is derived from an adjective,
218 : e.g., "correctly" is <em>derived from the adjective</em> "correct."
219 : </td>
220 : </tr>
221 : <tr>
222 : <td>entailment</td>
223 : <td>
224 : A verb that presupposes another,
225 : e.g., "snoring" <em>entails</em> "sleeping."
226 : </td>
227 : </tr>
228 : <tr>
229 : <td>hypernym</td>
230 : <td>
231 : A word with a broad meaning that more specific words fall under,
232 : e.g., "meal" is a <em>hypernym</em> of "breakfast."
233 : </td>
234 : </tr>
235 : <tr>
236 : <td>hyponym</td>
237 : <td>
238 : A word of more specific meaning than a general term applicable to it,
239 : e.g., "breakfast" is a <em>hyponym</em> of "meal."
240 : </td>
241 : </tr>
242 : <tr>
243 : <td nowrap="nowrap">instance hypernym</td>
244 : <td>
245 : A word that denotes a category of some specific instance,
246 : e.g., "author" is an <em>instance hypernym</em> of "Asimov."
247 : </td>
248 : </tr>
249 : <tr>
250 : <td nowrap="nowrap">instance hyponym</td>
251 : <td>
252 : A term that donotes a specific instance of some general category,
253 : e.g., "Asimov" is an <em>instance hyponym</em> of "author."
254 : </td>
255 : </tr>
256 : <tr>
257 : <td nowrap="nowrap">member holonym</td>
258 : <td>
259 : A word that denotes a collection of individuals,
260 : e.g., "faculty" is a <em>member holonym</em> of "professor."
261 : </td>
262 : </tr>
263 : <tr>
264 : <td nowrap="nowrap">member meronym</td>
265 : <td>
266 : A word that denotes a member of a larger group,
267 : e.g., a "person" is a <em>member meronym</em> of a "crowd."
268 : </td>
269 : </tr>
270 : <tr>
271 : <td nowrap="nowrap">part holonym</td>
272 : <td>
273 : A word that denotes a larger whole comprised of some part,
274 : e.g., "car" is a <em>part holonym</em> of "engine."
275 : </td>
276 : </tr>
277 : <tr>
278 : <td nowrap="nowrap">part meronym</td>
279 : <td>
280 : A word that denotes a part of a larger whole,
281 : e.g., an "engine" is <em>part meronym</em> of a "car."
282 : </td>
283 : </tr>
284 : <tr>
285 : <td nowrap="nowrap">participle of verb</td>
286 : <td>
287 : An adjective that is the participle of some verb,
288 : e.g., "breaking" is the <em>participle of the verb</em> "break."
289 : </td>
290 : </tr>
291 : <tr>
292 : <td>pertainym</td>
293 : <td>
294 : An adjective that classifies its noun,
295 : e.g., "musical" is a <em>pertainym</em> in "musical instrument."
296 : </td>
297 : </tr>
298 : <tr>
299 : <td nowrap="nowrap">similar to</td>
300 : <td>
301 : Similar, though not necessarily interchangeable, adjectives.
302 : For example, "shiny" is <em>similar to</em> "bright",
303 : but they have subtle differences.
304 : </td>
305 : </tr>
306 : <tr>
307 : <td nowrap="nowrap">substance holonym</td>
308 : <td>
309 : A word that denotes a larger whole containing some constituent
310 : substance, e.g., "bread" is a <em>substance holonym</em> of "flour."
311 : </td>
312 : </tr>
313 : <tr>
314 : <td nowrap="nowrap">substance meronym</td>
315 : <td>
316 : A word that denotes a constituant substance of some larger whole,
317 : e.g., "flour" is a <em>substance meronym</em> of "bread."
318 : </td>
319 : </tr>
320 : <tr>
321 : <td nowrap="nowrap">verb group</td>
322 : <td>
323 : A verb that is a member of a group of similar verbs,
324 : e.g., "live" is in the <em>verb group</em>
325 : of "dwell", "live", "inhabit", etc.
326 : </td>
327 : </tr>
328 : </table>
329 : <h2>Notes on tokenization</h2>
330 : For general information about Zorba's implementation of tokenization,
331 : including what constitutes a token,
332 : see the <a href="http://www.zorba-xquery.com/html/documentation/latest/zorba/ft_tokenizer">Full Text Tokenizer</a> documentation.
333 :)
334
335(:===========================================================================:)
336
337module namespace ft = "http://www.zorba-xquery.com/modules/full-text";
338
339import schema namespace ft-schema =
340 "http://www.zorba-xquery.com/modules/full-text";
341
342declare namespace err = "http://www.w3.org/2005/xqt-errors";
343declare namespace zerr = "http://www.zorba-xquery.com/errors";
344
345declare namespace ver = "http://www.zorba-xquery.com/options/versioning";
346declare option ver:module-version "2.0";
347
348(:===========================================================================:)
349
350(:~
351 : Predeclared constant for the Danish
352 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
353 :)
354declare variable $ft:lang-da as xs:language := xs:language("da");
355
356(:~
357 : Predeclared constant for the German
358 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
359 :)
360declare variable $ft:lang-de as xs:language := xs:language("de");
361
362(:~
363 : Predeclared constant for the English
364 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
365 :)
366declare variable $ft:lang-en as xs:language := xs:language("en");
367
368(:~
369 : Predeclared constant for the Spanish
370 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
371 :)
372declare variable $ft:lang-es as xs:language := xs:language("es");
373
374(:~
375 : Predeclared constant for the Finnish
376 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
377 :)
378declare variable $ft:lang-fi as xs:language := xs:language("fi");
379
380(:~
381 : Predeclared constant for the French
382 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
383 :)
384declare variable $ft:lang-fr as xs:language := xs:language("fr");
385
386(:~
387 : Predeclared constant for the Hungarian
388 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
389 :)
390declare variable $ft:lang-hu as xs:language := xs:language("hu");
391
392(:~
393 : Predeclared constant for the Italian
394 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
395 :)
396declare variable $ft:lang-it as xs:language := xs:language("it");
397
398(:~
399 : Predeclared constant for the Dutch
400 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
401 :)
402declare variable $ft:lang-nl as xs:language := xs:language("nl");
403
404(:~
405 : Predeclared constant for the Norwegian
406 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
407 :)
408declare variable $ft:lang-no as xs:language := xs:language("no");
409
410(:~
411 : Predeclared constant for the Portuguese
412 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
413 :)
414declare variable $ft:lang-pt as xs:language := xs:language("pt");
415
416(:~
417 : Predeclared constant for the Romanian
418 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
419 :)
420declare variable $ft:lang-ro as xs:language := xs:language("ro");
421
422(:~
423 : Predeclared constant for the Russian
424 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
425 :)
426declare variable $ft:lang-ru as xs:language := xs:language("ru");
427
428(:~
429 : Predeclared constant for the Swedish
430 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
431 :)
432declare variable $ft:lang-sv as xs:language := xs:language("sv");
433
434(:~
435 : Predeclared constant for the Turkish
436 : <a href="http://www.w3.org/TR/xmlschema-2/#language"><code>xs:language</code></a>.
437 :)
438declare variable $ft:lang-tr as xs:language := xs:language("tr");
439
440(:===========================================================================:)
441
442(:~
443 : Gets the current
444 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>:
445 : either the langauge specified by the
446 : <code><a href="http://www.w3.org/TR/xpath-full-text-10/#doc-xquery10-FTOptionDecl">declare ft-option using</a>
447 : <a href="http://www.w3.org/TR/xpath-full-text-10/#ftlanguageoption">language</a></code>
448 : statement (if any)
449 : or the one returned by <code>ft:host-lang()</code> (if none).
450 :
451 : @return said language.
452 : @example test/rbkt/Queries/zorba/fulltext/ft-module-current-lang-true-1.xq
453 :)
454declare function ft:current-lang()
455 as xs:language external;
456
457(:~
458 : Gets the host's current
459 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>.
460 : The "host" is the computer on which Zorba is running.
461 : The host's current language is obtained as follows:
462 : <ul>
463 : <li>
464 : For *nix systems:
465 : <ol>
466 : <li>
467 : If <a ref="http://www.cplusplus.com/reference/clibrary/clocale/setlocale/"><code>setlocale</code>(3)</a> returns non-null,
468 : the language corresponding to that locale is used.
469 : </li>
470 : <li>
471 : Else, if the <code>LANG</code> environment variable is set,
472 : that language is ued.
473 : </li>
474 : <li>
475 : Otherwise, there is no default language.
476 : </li>
477 : </ol>
478 : </li>
479 : <li>
480 : For Windows systems,
481 : the language corresponding to the locale returned by the
482 : <a href="http://msdn.microsoft.com/en-us/library/windows/desktop/dd318101(v=vs.85).aspx"><code>GetLocaleInfo()</code></a>
483 : function is used.
484 : </li>
485 : </ul>
486 :
487 : @return said language.
488 :)
489declare function ft:host-lang()
490 as xs:language external;
491
492(:~
493 : Checks whether the given
494 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
495 : is supported for stemming.
496 :
497 : @param $lang The language to check.
498 : @return <code>true</code> only if the language is supported.
499 : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stem-lang-es-supported-true.xq
500 :)
501declare function ft:is-stem-lang-supported( $lang as xs:language )
502 as xs:boolean external;
503
504(:~
505 : Checks whether the given word is a stop-word.
506 :
507 : @param $word The word to check.
508 : @param $lang The
509 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
510 : of <code>$word</code>.
511 : @return <code>true</code> only if <code>$word</code> is a stop-word.
512 : @error zerr:ZXQP8405 if <code>$lang</code> is not supported for stop-words.
513 : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-1.xq
514 : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-3.xq
515 :)
516declare function ft:is-stop-word( $word as xs:string, $lang as xs:language )
517 as xs:boolean external;
518
519(:~
520 : Checks whether the given word is a stop-word.
521 :
522 : @param $word The word to check.
523 : The word's <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
524 : is assumed to be the one returned by <code>ft:current-lang()</code>.
525 : @return <code>true</code> only if <code>$word</code> is a stop-word.
526 : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
527 : general.
528 : @error zerr:ZXQP8405 if <code>ft:current-lang()</code> is not supported for
529 : stop-words specifically.
530 : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-2.xq
531 : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-true-4.xq
532 :)
533declare function ft:is-stop-word( $word as xs:string )
534 as xs:boolean external;
535
536(:~
537 : Checks whether the given
538 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
539 : is supported for stop words.
540 :
541 : @param $lang The language to check.
542 : @return <code>true</code> only if the language is supported.
543 : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-en-supported-true.xq
544 : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-1.xq
545 : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-stop-word-lang-supported-false-2.xq
546 :)
547declare function ft:is-stop-word-lang-supported( $lang as xs:language )
548 as xs:boolean external;
549
550(:~
551 : Checks whether the given
552 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
553 : is supported for look-up using the default thesaurus.
554 :
555 : @param $lang The language to check.
556 : @return <code>true</code> only if the language is supported.
557 :)
558declare function ft:is-thesaurus-lang-supported( $lang as xs:language )
559 as xs:boolean external;
560
561(:~
562 : Checks whether the given
563 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
564 : is supported for look-up using the thesaurus specified by the given URI.
565 :
566 : @param $uri The URI specifying the thesaurus to use.
567 : @param $lang The language to check.
568 : @return <code>true</code> only if the language is supported.
569 : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus
570 : that is not found in the statically known thesauri.
571 : @example test/rbkt/Queries/zorba/fulltext/ft-module-is-thesaurus-lang-supported-true-1.xq
572 :)
573declare function ft:is-thesaurus-lang-supported( $uri as xs:string,
574 $lang as xs:language )
575 as xs:boolean external;
576
577(:~
578 : Checks whether the given
579 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
580 : is supported for tokenization.
581 :
582 : @param $lang The language to check.
583 : @return <code>true</code> only if the language is supported.
584 :)
585declare function ft:is-tokenizer-lang-supported( $lang as xs:language )
586 as xs:boolean external;
587
588(:~
589 : Stems the given word.
590 :
591 : @param $word The word to stem.
592 : @param $lang The
593 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
594 : of <code>$word</code>.
595 : @return the stem of <code>$word</code>.
596 : @error err:FTST0009 if <code>$lang</code> is not supported in general.
597 : @error zerr:ZXQP8404 if <code>$lang</code> is not supported for stemming
598 : specifically.
599 : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-1.xq
600 : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-2.xq
601 :)
602declare function ft:stem( $word as xs:string, $lang as xs:language )
603 as xs:string external;
604
605(:~
606 : Stems the given word.
607 :
608 : @param $word The word to stem.
609 : The word's <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
610 : is assumed to be the one returned by <code>ft:current-lang()</code>.
611 : @return the stem of <code>$word</code>.
612 : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
613 : general.
614 : @error zerr:ZXQP8404 if <code>ft:current-lang()</code> is not supported for
615 : stemming specifically.
616 : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-3.xq
617 : @example test/rbkt/Queries/zorba/fulltext/ft-module-stem-4.xq
618 :)
619declare function ft:stem( $word as xs:string )
620 as xs:string external;
621
622(:~
623 : Strips all diacritical marks from all characters.
624 :
625 : @param $string The string to strip diacritical marks from.
626 : @return <code>$string</code> with diacritical marks stripped.
627 : @example test/rbkt/Queries/zorba/fulltext/ft-module-strip-diacritics-1.xq
628 :)
629declare function ft:strip-diacritics( $string as xs:string )
630 as xs:string external;
631
632(:~
633 : Looks-up the given phrase in the default thesaurus.
634 :
635 : @param $phrase The phrase to look up.
636 : The phrase's
637 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
638 : is assumed to be the one returned by <code>ft:current-lang()</code>.
639 : @return the original and related phrases.
640 : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
641 : general.
642 : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported
643 : by the currently running version of Zorba.
644 : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match
645 : that of the CPU on which Zorba is currently running.
646 : @error zerr:ZXQP8403 if there was an error reading the thesaurus data.
647 : @error zerr:ZXQP8406 if <code>ft:current-lang()</code> is not supported for
648 : thesaurus look-up specifically.
649 : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-1.xq
650 :)
651declare function ft:thesaurus-lookup( $phrase as xs:string )
652 as xs:string+ external;
653
654(:~
655 : Looks-up the given phrase in the thesaurus specified by the given URI.
656 :
657 : @param $uri The URI specifying the thesaurus to use.
658 : @param $phrase The phrase to look up.
659 : @param $lang The
660 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
661 : of <code>$phrase</code>.
662 : @return the original and related phrases.
663 : @error err:FTST0009 if <code>$lang</code> is not supported in general.
664 : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus
665 : that is not found in the statically known thesauri.
666 : @error zerr:ZOSE0001 if the thesaurus data file could not be found.
667 : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file.
668 : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported
669 : by the currently running version of Zorba.
670 : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match
671 : that of the CPU on which Zorba is currently running.
672 : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file.
673 : @error zerr:ZXQP8406 if <code>$lang</code> is not supported for thesaurus
674 : look-up specifically.
675 : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-2.xq
676 :)
677declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string,
678 $lang as xs:language )
679 as xs:string+ external;
680
681(:~
682 : Looks-up the given phrase in a thesaurus.
683 :
684 : @param $uri The URI specifying the thesaurus to use.
685 : @param $phrase The phrase to look up.
686 : The phrase's
687 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
688 : is assumed to be the one the one returned by <code>ft:current-lang()</code>.
689 : @return the original and related phrases.
690 : @error err:FTST0009 if <code>ft:current-lang()</code> is unsupported in
691 : general.
692 : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus
693 : that is not found in the statically known thesauri.
694 : @error zerr:ZOSE0001 if the thesaurus data file could not be found.
695 : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file.
696 : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported
697 : by the currently running version of Zorba.
698 : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match
699 : that of the CPU on which Zorba is currently running.
700 : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file.
701 : @error zerr:ZXQP8406 if <code>ft:current-lang()</code> is not supported for
702 : thesaurus look-up specifically.
703 : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-3.xq
704 :)
705declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string )
706 as xs:string+ external;
707
708(:~
709 : Looks-up the given phrase in a thesaurus.
710 :
711 : @param $uri The URI specifying the thesaurus to use.
712 : @param $phrase The phrase to look up.
713 : @param $lang The
714 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
715 : of <code>$phrase</code>.
716 : @param $relationship The relationship the results are to have to
717 : <code>$phrase</code>.
718 : @return the original and related phrases.
719 : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus
720 : that is not found in the statically known thesauri.
721 : @error err:FTST0009 if <code>$lang</code> is not supported in general.
722 : @error zerr:ZOSE0001 if the thesaurus data file could not be found.
723 : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file.
724 : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported
725 : by the currently running version of Zorba.
726 : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match
727 : that of the CPU on which Zorba is currently running.
728 : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file.
729 : @error zerr:ZXQP8406 if <code>$lang</code> is not supported for thesaurus
730 : look-up specifically.
731 : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-4.xq
732 :)
733declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string,
734 $lang as xs:language,
735 $relationship as xs:string )
736 as xs:string+ external;
737
738(:~
739 : Looks-up the given phrase in a thesaurus.
740 :
741 : @param $uri The URI specifying the thesaurus to use.
742 : @param $phrase The phrase to look up.
743 : @param $lang The
744 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
745 : of <code>$phrase</code>.
746 : @param $relationship The relationship the results are to have to
747 : <code>$phrase</code>.
748 : @param $level-least The minimum number of levels within the thesaurus to be
749 : travers$ed.
750 : @param $level-most The maximum number of levels within the thesaurus to be
751 : traversed.
752 : @return the original and related phrases.
753 : @error err:FOCA0003 if either <code>$level-least</code> or
754 : <code>$level-most</code> is either negative or too large.
755 : @error err:FTST0018 if <code>$uri</code> refers to a thesaurus
756 : that is not found in the statically known thesauri.
757 : @error err:FTST0009 if <code>$lang</code> is not supported in general.
758 : @error zerr:ZOSE0001 if the thesaurus data file could not be found.
759 : @error zerr:ZOSE0002 if the thesaurus data file is not a plain file.
760 : @error zerr:ZXQP8401 if the thesaurus data file's version is not supported
761 : by the currently running version of Zorba.
762 : @error zerr:ZXQP8402 if the thesaurus data file's endianness does not match
763 : that of the CPU on which Zorba is currently running.
764 : @error zerr:ZXQP8403 if there was an error reading the thesaurus data file.
765 : @error zerr:ZXQP8406 if <code>$lang</code> is not supported for thesaurus
766 : look-up specifically.
767 : @example test/rbkt/Queries/zorba/fulltext/ft-module-thesaurus-lookup-5.xq
768 :)
769declare function ft:thesaurus-lookup( $uri as xs:string, $phrase as xs:string,
770 $lang as xs:language,
771 $relationship as xs:string,
772 $level-least as xs:integer,
773 $level-most as xs:integer )
774 as xs:string+ external;
775
776(:~
777 : Tokenizes the given document.
778 :
779 : @param $node The node to tokenize.
780 : @param $lang The default
781 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
782 : of <code>$node</code>.
783 : @return a (possibly empty) sequence of tokens.
784 : @error err:FTST0009 if <code>$lang</code> is not supported in general.
785 : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq
786 :)
787declare function ft:tokenize( $node as node(), $lang as xs:language )
788 as element(ft-schema:token)* external;
789
790(:~
791 : Tokenizes the given document.
792 :
793 : @param $node The node to tokenize.
794 : The document's default
795 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
796 : is assumed to be the one returned by <code>ft:current-lang()</code>.
797 : @return a (possibly empty) sequence of tokens.
798 : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
799 : general.
800 : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq
801 : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq
802 : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq
803 :)
804declare function ft:tokenize( $node as node() )
805 as element(ft-schema:token)* external;
806
807(:~
808 : Tokenizes the given string.
809 :
810 : @param $string The string to tokenize.
811 : @param $lang The default
812 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
813 : of <code>$string</code>.
814 : @return a (possibly empty) sequence of tokens.
815 : @error err:FTST0009 if <code>$lang</code> is not supported in general.
816 : @error zerr:ZXQP8407 if <code>$lang</code> is not supported for
817 : tokenization specifically.
818 : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-1.xq
819 :)
820declare function ft:tokenize-string( $string as xs:string,
821 $lang as xs:language )
822 as xs:string* external;
823
824(:~
825 : Tokenizes the given string.
826 :
827 : @param $string The string to tokenize.
828 : The string's default
829 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
830 : is assumed to be the one returned by <code>ft:current-lang()</code>.
831 : @return a (possibly empty) sequence of tokens.
832 : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
833 : general.
834 : @error zerr:ZXQP8407 if <code>ft:current_lang()</code> is not supported for
835 : tokenization specifically.
836 : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-string-2.xq
837 :)
838declare function ft:tokenize-string( $string as xs:string )
839 as xs:string* external;
840
841(:~
842 : Gets properties of the tokenizer for the given
843 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>.
844 :
845 : @param $lang The langauage of the tokenizer to get the properties of.
846 : @return said properties.
847 : @error err:FTST0009 if <code>$lang</code> is not supported in general.
848 : @error zerr:ZXQP8407 if <code>$lang</code> is not supported for
849 : tokenization specifically.
850 : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenizer-properties-1.xq
851 :)
852declare function ft:tokenizer-properties( $lang as xs:language )
853 as element(ft-schema:tokenizer-properties) external;
854
855(:~
856 : Gets properties of the tokenizer for the
857 : <a href="http://www.w3.org/TR/xmlschema-2/#language">language</a>
858 : returned by <code>ft:current-lang()</code>.
859 :
860 : @return said properties.
861 : @error err:FTST0009 if <code>ft:current-lang()</code> is not supported in
862 : general.
863 : @error zerr:ZXQP8407 if <code>ft:current_lang()</code> is not supported for
864 : tokenization specifically.
865 : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenizer-properties-2.xq
866 :)
867declare function ft:tokenizer-properties()
868 as element(ft-schema:tokenizer-properties) external;
869
870(:===========================================================================:)
871
872(: vim:set et sw=2 ts=2: :)
0873
=== added file 'modules/com/zorba-xquery/www/modules/full-text.xsd'
--- modules/com/zorba-xquery/www/modules/full-text.xsd 1970-01-01 00:00:00 +0000
+++ modules/com/zorba-xquery/www/modules/full-text.xsd 2012-04-24 21:06:20 +0000
@@ -0,0 +1,134 @@
1<?xml version="1.0"?>
2<!--
3 ! Copyright 2006-2011 The FLWOR Foundation.
4 !
5 ! Licensed under the Apache License, Version 2.0 (the "License");
6 ! you may not use this file except in compliance with the License.
7 ! You may obtain a copy of the License at
8 !
9 ! http://www.apache.org/licenses/LICENSE-2.0
10 !
11 ! Unless required by applicable law or agreed to in writing, software
12 ! distributed under the License is distributed on an "AS IS" BASIS,
13 ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ! See the License for the specific language governing permissions and
15 ! limitations under the License.
16-->
17
18<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
19 targetNamespace="http://www.zorba-xquery.com/modules/full-text"
20 xmlns="http://www.zorba-xquery.com/modules/full-text"
21 elementFormDefault="qualified"
22 attributeFormDefault="unqualified">
23
24 <!--======================================================================-->
25
26 <xs:element name="compare-options">
27 <xs:complexType>
28 <xs:attributeGroup ref="compare-attributes"/>
29 </xs:complexType>
30 </xs:element>
31
32 <xs:attributeGroup name="compare-attributes">
33 <xs:attribute name="case" type="sensitivity" default="insensitive"/>
34 <xs:attribute name="diacritics" type="sensitivity" default="insensitive"/>
35 <xs:attribute name="stem" type="yes-no-both" default="no"/>
36 </xs:attributeGroup>
37
38 <xs:simpleType name="sensitivity">
39 <xs:restriction base="xs:string">
40 <xs:enumeration value="insensitive"/>
41 <xs:enumeration value="sensitive"/>
42 <xs:enumeration value="both"/>
43 </xs:restriction>
44 </xs:simpleType>
45
46 <xs:simpleType name="yes-no-both">
47 <xs:restriction base="xs:string">
48 <xs:enumeration value="yes"/>
49 <xs:enumeration value="no"/>
50 <xs:enumeration value="both"/>
51 </xs:restriction>
52 </xs:simpleType>
53
54 <xs:complexType name="boolean-value">
55 <xs:attribute name="value" type="xs:boolean" use="required"/>
56 </xs:complexType>
57
58 <!--======================================================================-->
59
60 <xs:element name="token">
61 <xs:complexType>
62
63 <!-- The language of the token. -->
64 <xs:attribute name="lang" type="xs:language"/>
65
66 <!-- The sentence number. -->
67 <xs:attribute name="sentence" type="xs:nonNegativeInteger" use="required"/>
68
69 <!-- The paragraph number. -->
70 <xs:attribute name="paragraph" type="xs:nonNegativeInteger" use="required"/>
71
72 <!-- The token string value. -->
73 <xs:attribute name="value" type="xs:string" use="required"/>
74
75 <!--
76 ! A reference to the originating node. If the token occurred within an
77 ! element, the reference refers to the text node. If the token occurred
78 ! within an attribute, the reference refers to the attribute node.
79 -->
80 <xs:attribute name="node-ref" type="xs:anyURI"/>
81
82 </xs:complexType>
83 </xs:element>
84
85 <!--======================================================================-->
86
87 <xs:element name="tokenizer-properties">
88 <xs:complexType>
89 <xs:all>
90
91 <!--
92 ! If true, XML comments separate tokens. (No example can be provided
93 ! here because it is illegal to nest an XML comment inside an XML
94 ! comment.)
95 -->
96 <xs:element name="comments-separate-tokens" type="boolean-value"/>
97
98 <!--
99 ! If true, XML elements separate tokens. For example,
100 ! <b>B</b>old would be 2 tokens instead of 1.
101 -->
102 <xs:element name="elements-separate-tokens" type="boolean-value"/>
103
104 <!--
105 ! If true, XML processing instructions separate tokens. For example,
106 ! net<?PI pi?>work would be 2 tokens instead of 1.
107 -->
108 <xs:element name="processing-instructions-separate-tokens" type="boolean-value"/>
109
110 <!--
111 ! The list of languages that the tokenizer can tokenize.
112 -->
113 <xs:element name="supported-languages">
114 <xs:complexType>
115 <xs:sequence>
116 <xs:element name="lang" type="xs:language" maxOccurs="unbounded"/>
117 </xs:sequence>
118 </xs:complexType>
119 </xs:element>
120
121 </xs:all>
122
123 <!--
124 ! The tokenizer's identifying URI.
125 -->
126 <xs:attribute name="uri" type="xs:anyURI"/>
127
128 </xs:complexType>
129 </xs:element>
130
131 <!--======================================================================-->
132
133</xs:schema>
134<!-- vim:set et sw=2 ts=2: -->
0135
=== modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp'
--- modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp 2012-04-24 12:39:38 +0000
+++ modules/com/zorba-xquery/www/modules/http-client.xq.src/http_request_handler.cpp 2012-04-24 21:06:20 +0000
@@ -39,7 +39,6 @@
39 theSerStream(NULL),39 theSerStream(NULL),
40 thePost(NULL),40 thePost(NULL),
41 theLast(NULL),41 theLast(NULL),
42 theLastSerializerOptions(NULL),
43 theIsHeadRequest(false)42 theIsHeadRequest(false)
44 {43 {
45 theHeaderLists.push_back(NULL);44 theHeaderLists.push_back(NULL);
@@ -260,6 +259,7 @@
260 void HttpRequestHandler::cleanUpBody()259 void HttpRequestHandler::cleanUpBody()
261 {260 {
262 delete theSerStream;261 delete theSerStream;
262 theSerStream = 0;
263 theLastBodyHadContent = false;263 theLastBodyHadContent = false;
264 }264 }
265265
266266
=== modified file 'modules/com/zorba-xquery/www/modules/pregenerated/errors.xq'
--- modules/com/zorba-xquery/www/modules/pregenerated/errors.xq 2012-04-24 12:39:38 +0000
+++ modules/com/zorba-xquery/www/modules/pregenerated/errors.xq 2012-04-24 21:06:20 +0000
@@ -188,6 +188,7 @@
188188
189(:~189(:~
190 :190 :
191 : The thesaurus data file's endianness does not match that of the CPU.
191 : 192 :
192:)193:)
193declare variable $zerr:ZXQP8402 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8402");194declare variable $zerr:ZXQP8402 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8402");
@@ -201,6 +202,22 @@
201202
202(:~203(:~
203:)204:)
205declare variable $zerr:ZXQP8404 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8404");
206
207(:~
208:)
209declare variable $zerr:ZXQP8405 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8405");
210
211(:~
212:)
213declare variable $zerr:ZXQP8406 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8406");
214
215(:~
216:)
217declare variable $zerr:ZXQP8407 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQP8407");
218
219(:~
220:)
204declare variable $zerr:ZXQD0001 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQD0001");221declare variable $zerr:ZXQD0001 as xs:QName := fn:QName($zerr:NS, "zerr:ZXQD0001");
205222
206(:~223(:~
207224
=== modified file 'modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq'
--- modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq 2012-04-24 12:39:38 +0000
+++ modules/com/zorba-xquery/www/modules/xqdoc2xhtml/index.xq 2012-04-24 21:06:20 +0000
@@ -839,9 +839,7 @@
839 if(fn:matches($specLine, "Args:")) then839 if(fn:matches($specLine, "Args:")) then
840 let $arg_split := fn:substring-after($specLine, "-x")840 let $arg_split := fn:substring-after($specLine, "-x")
841 return841 return
842 if(fn:string-length($arg_split) eq 0) then842 if(fn:string-length($arg_split) eq 0) then string-join($specLines, " ")
843 fn:error($err:UE008, fn:concat("Unknown Args: in spec file for example <", $exampleSource,"> .
844 Add the example input and expected output by hand in the example, in a commentary that should also include the word 'output'."))
845 else843 else
846 let $var_value := fn:tokenize($arg_split, "=")844 let $var_value := fn:tokenize($arg_split, "=")
847 let $var_name := fn:normalize-space(fn:replace($var_value[1], ":$", ""))845 let $var_name := fn:normalize-space(fn:replace($var_value[1], ":$", ""))
848846
=== modified file 'scripts/zt-wn-get'
--- scripts/zt-wn-get 2012-04-24 12:39:38 +0000
+++ scripts/zt-wn-get 2012-04-24 21:06:20 +0000
@@ -22,7 +22,7 @@
22 echo 'Arguments: [--workdir <workdir>] [--builddir <builddir>]'22 echo 'Arguments: [--workdir <workdir>] [--builddir <builddir>]'
23 echo ' [--thesaurusurl <thesaurusurl>]'23 echo ' [--thesaurusurl <thesaurusurl>]'
24 echo ' <zorba_repository>'24 echo ' <zorba_repository>'
25 echo '<zorba_repository> is the top-level SVN working copy.'25 echo '<zorba_repository> is the top-level BZR working copy.'
26 echo '<workdir> is a temp directory to download and unzip XQTS (default: /tmp).'26 echo '<workdir> is a temp directory to download and unzip XQTS (default: /tmp).'
27 echo '<builddir> is the directory Zorba has been built in'27 echo '<builddir> is the directory Zorba has been built in'
28 echo ' (default: <zorba_repository>/build)'28 echo ' (default: <zorba_repository>/build)'
@@ -71,8 +71,8 @@
71echo Build dir is at $BUILD71echo Build dir is at $BUILD
7272
73# Compile thesaurus to binary format73# Compile thesaurus to binary format
74mkdir -p $BUILD/test/rbkt/thesauri74mkdir -p $BUILD/LIB_PATH/edu/princeton/wordnet
75THESAURUS_DEST="$BUILD/test/rbkt/thesauri/wordnet-en.zth"75THESAURUS_DEST="$BUILD/LIB_PATH/edu/princeton/wordnet/wordnet-en.zth"
76echo "Compiling thesaurus to $THESAURUS_DEST..."76echo "Compiling thesaurus to $THESAURUS_DEST..."
77untar_dir=`mktemp -d "$WORK/thesaurus.XXXXXX"`77untar_dir=`mktemp -d "$WORK/thesaurus.XXXXXX"`
78cd "$untar_dir"78cd "$untar_dir"
7979
=== modified file 'src/api/CMakeLists.txt'
--- src/api/CMakeLists.txt 2012-04-24 12:39:38 +0000
+++ src/api/CMakeLists.txt 2012-04-24 21:06:20 +0000
@@ -62,8 +62,9 @@
62IF (NOT ZORBA_NO_FULL_TEXT)62IF (NOT ZORBA_NO_FULL_TEXT)
63 LIST(APPEND API_SRCS63 LIST(APPEND API_SRCS
64 stemmer.cpp64 stemmer.cpp
65 stemmer_wrapper.cpp65 stemmer_wrappers.cpp
66 thesaurus.cpp)66 thesaurus.cpp
67 thesaurus_wrappers.cpp)
67ENDIF (NOT ZORBA_NO_FULL_TEXT)68ENDIF (NOT ZORBA_NO_FULL_TEXT)
6869
69ADD_SRC_SUBFOLDER(API_SRCS serialization API_SERIALIZATION_SRCS)70ADD_SRC_SUBFOLDER(API_SRCS serialization API_SERIALIZATION_SRCS)
7071
=== modified file 'src/api/staticcontextimpl.cpp'
--- src/api/staticcontextimpl.cpp 2012-04-24 12:39:38 +0000
+++ src/api/staticcontextimpl.cpp 2012-04-24 21:06:20 +0000
@@ -42,8 +42,8 @@
42#include "context/static_context.h"42#include "context/static_context.h"
43#include "context/static_context_consts.h"43#include "context/static_context_consts.h"
44#ifndef ZORBA_NO_FULL_TEXT44#ifndef ZORBA_NO_FULL_TEXT
45#include "context/stemmer_wrappers.h"45#include "stemmer_wrappers.h"
46#include "context/thesaurus_wrappers.h"46#include "thesaurus_wrappers.h"
47#endif /* ZORBA_NO_FULL_TEXT */47#endif /* ZORBA_NO_FULL_TEXT */
48#include "uri_resolver_wrappers.h"48#include "uri_resolver_wrappers.h"
4949
@@ -65,7 +65,6 @@
6565
66namespace zorba {66namespace zorba {
6767
68
69/*******************************************************************************68/*******************************************************************************
70 Create a StaticContextImpl obj as well as an internal static_context obj S.69 Create a StaticContextImpl obj as well as an internal static_context obj S.
71 S is created as a child of the zorba root sctx. This constructor is used70 S is created as a child of the zorba root sctx. This constructor is used
7271
=== renamed file 'src/api/stemmer_wrapper.cpp' => 'src/api/stemmer_wrappers.cpp'
--- src/api/stemmer_wrapper.cpp 2012-04-24 12:39:38 +0000
+++ src/api/stemmer_wrappers.cpp 2012-04-24 21:06:20 +0000
@@ -23,7 +23,7 @@
23#include "diagnostics/assert.h"23#include "diagnostics/assert.h"
24#include "util/cxx_util.h"24#include "util/cxx_util.h"
2525
26#include "stemmer_wrapper.h"26#include "stemmer_wrappers.h"
2727
28using namespace zorba::locale;28using namespace zorba::locale;
2929
@@ -32,8 +32,8 @@
3232
33///////////////////////////////////////////////////////////////////////////////33///////////////////////////////////////////////////////////////////////////////
3434
35StemmerWrapper::StemmerWrapper( zorba::Stemmer::ptr p ) :35StemmerWrapper::StemmerWrapper( zorba::Stemmer::ptr api_stemmer ) :
36 api_stemmer_( std::move( p ) )36 api_stemmer_( std::move( api_stemmer ) )
37{37{
38 ZORBA_ASSERT( api_stemmer_.get() );38 ZORBA_ASSERT( api_stemmer_.get() );
39}39}
@@ -42,6 +42,12 @@
42 api_stemmer_.release()->destroy();42 api_stemmer_.release()->destroy();
43}43}
4444
45void StemmerWrapper::properties( Properties *props ) const {
46 zorba::Stemmer::Properties api_props;
47 api_stemmer_->properties( &api_props );
48 props->uri = api_props.uri;
49}
50
45void StemmerWrapper::stem( zstring const &word, iso639_1::type lang,51void StemmerWrapper::stem( zstring const &word, iso639_1::type lang,
46 zstring *result ) const {52 zstring *result ) const {
47 String const api_word( Unmarshaller::newString( word ) );53 String const api_word( Unmarshaller::newString( word ) );
@@ -52,19 +58,22 @@
52///////////////////////////////////////////////////////////////////////////////58///////////////////////////////////////////////////////////////////////////////
5359
54StemmerProviderWrapper::60StemmerProviderWrapper::
55StemmerProviderWrapper( zorba::StemmerProvider const *p ) :61StemmerProviderWrapper( zorba::StemmerProvider const *api_stemmer_provider ) :
56 api_stemmer_provider_( p )62 api_stemmer_provider_( api_stemmer_provider )
57{63{
58 ZORBA_ASSERT( api_stemmer_provider_ );64 ZORBA_ASSERT( api_stemmer_provider_ );
59}65}
6066
61Stemmer::ptr67bool StemmerProviderWrapper::getStemmer( iso639_1::type lang,
62StemmerProviderWrapper::get_stemmer( iso639_1::type lang ) const {68 Stemmer::ptr *result ) const {
63 zorba::Stemmer::ptr p( api_stemmer_provider_->getStemmer( lang ) );69 zorba::Stemmer::ptr api_ptr;
64 Stemmer::ptr result;70 zorba::Stemmer::ptr *const api_ptr_ptr = result ? &api_ptr : nullptr;
65 if ( p.get() )71 if ( api_stemmer_provider_->getStemmer( lang, api_ptr_ptr ) ) {
66 result.reset( new StemmerWrapper( std::move( p ) ) );72 if ( result )
67 return std::move( result );73 result->reset( new StemmerWrapper( std::move( api_ptr ) ) );
74 return true;
75 }
76 return false;
68}77}
6978
70///////////////////////////////////////////////////////////////////////////////79///////////////////////////////////////////////////////////////////////////////
7180
=== renamed file 'src/api/stemmer_wrapper.h' => 'src/api/stemmer_wrappers.h'
--- src/api/stemmer_wrapper.h 2012-04-24 12:39:38 +0000
+++ src/api/stemmer_wrappers.h 2012-04-24 21:06:20 +0000
@@ -35,6 +35,7 @@
3535
36 // inherited36 // inherited
37 void destroy() const;37 void destroy() const;
38 void properties( Properties* ) const;
38 void stem( zstring const &word, locale::iso639_1::type lang,39 void stem( zstring const &word, locale::iso639_1::type lang,
39 zstring *result ) const;40 zstring *result ) const;
40private:41private:
@@ -50,7 +51,7 @@
50 }51 }
5152
52 // inherited53 // inherited
53 Stemmer::ptr get_stemmer( locale::iso639_1::type lang ) const;54 bool getStemmer( locale::iso639_1::type, Stemmer::ptr* = 0 ) const;
54private:55private:
55 zorba::StemmerProvider const *const api_stemmer_provider_;56 zorba::StemmerProvider const *const api_stemmer_provider_;
56};57};
5758
=== modified file 'src/api/thesaurus.cpp'
--- src/api/thesaurus.cpp 2012-04-24 12:39:38 +0000
+++ src/api/thesaurus.cpp 2012-04-24 21:06:20 +0000
@@ -25,9 +25,11 @@
25 // out-of-line since it's virtual25 // out-of-line since it's virtual
26}26}
2727
28//Thesaurus::iterator::~iterator() {28#if 0
29// // out-of-line since it's virtual29Thesaurus::iterator::~iterator() {
30//}30 // out-of-line since it's virtual
31}
32#endif
3133
32///////////////////////////////////////////////////////////////////////////////34///////////////////////////////////////////////////////////////////////////////
3335
3436
=== renamed file 'src/context/thesaurus_wrappers.cpp' => 'src/api/thesaurus_wrappers.cpp'
--- src/context/thesaurus_wrappers.cpp 2012-04-24 12:39:38 +0000
+++ src/api/thesaurus_wrappers.cpp 2012-04-24 21:06:20 +0000
@@ -87,6 +87,27 @@
8787
88///////////////////////////////////////////////////////////////////////////////88///////////////////////////////////////////////////////////////////////////////
8989
90ThesaurusProviderWrapper::
91ThesaurusProviderWrapper( zorba::ThesaurusProvider const *p ) :
92 api_thesaurus_provider_( p )
93{
94 ZORBA_ASSERT( api_thesaurus_provider_ );
95}
96
97bool ThesaurusProviderWrapper::getThesaurus( iso639_1::type lang,
98 Thesaurus::ptr *result ) const {
99 zorba::Thesaurus::ptr api_ptr;
100 zorba::Thesaurus::ptr *const api_ptr_ptr = result ? &api_ptr : nullptr;
101 if ( api_thesaurus_provider_->getThesaurus( lang, api_ptr_ptr ) ) {
102 if ( result )
103 result->reset( new ThesaurusWrapper( std::move( api_ptr ) ) );
104 return true;
105 }
106 return false;
107}
108
109///////////////////////////////////////////////////////////////////////////////
110
90} // namespace internal111} // namespace internal
91} // namespace zorba112} // namespace zorba
92113
93114
=== renamed file 'src/context/thesaurus_wrappers.h' => 'src/api/thesaurus_wrappers.h'
--- src/context/thesaurus_wrappers.h 2012-04-24 12:39:38 +0000
+++ src/api/thesaurus_wrappers.h 2012-04-24 21:06:20 +0000
@@ -22,6 +22,7 @@
22#ifndef ZORBA_NO_FULL_TEXT22#ifndef ZORBA_NO_FULL_TEXT
2323
24#include <zorba/thesaurus.h>24#include <zorba/thesaurus.h>
25
25#include "runtime/full_text/thesaurus.h"26#include "runtime/full_text/thesaurus.h"
2627
27namespace zorba {28namespace zorba {
@@ -54,6 +55,17 @@
54 zorba::Thesaurus::ptr api_thesaurus_;55 zorba::Thesaurus::ptr api_thesaurus_;
55};56};
5657
58class ThesaurusProviderWrapper : public ThesaurusProvider {
59public:
60 ThesaurusProviderWrapper( zorba::ThesaurusProvider const* );
61
62 // inherited
63 bool getThesaurus( locale::iso639_1::type, Thesaurus::ptr* ) const;
64
65private:
66 zorba::ThesaurusProvider::ptr const api_thesaurus_provider_;
67};
68
57///////////////////////////////////////////////////////////////////////////////69///////////////////////////////////////////////////////////////////////////////
5870
59} // namespace internal71} // namespace internal
6072
=== modified file 'src/api/uri_resolver_wrappers.cpp'
--- src/api/uri_resolver_wrappers.cpp 2012-04-24 12:39:38 +0000
+++ src/api/uri_resolver_wrappers.cpp 2012-04-24 21:06:20 +0000
@@ -15,24 +15,20 @@
15 */15 */
16#include "stdafx.h"16#include "stdafx.h"
1717
18#include <zorba/thesaurus.h>
19
20#include "runtime/full_text/thesaurus.h"
21
22#include "thesaurus_wrappers.h"
23#include "unmarshaller.h"
18#include "uri_resolver_wrappers.h"24#include "uri_resolver_wrappers.h"
19#include "uriresolverimpl.h"25#include "uriresolverimpl.h"
20#include "unmarshaller.h"
21#include <zorba/thesaurus.h>
22#include <runtime/full_text/thesaurus.h>
23#include <context/thesaurus_wrappers.h>
2426
25namespace zorba27namespace zorba
26{28{
27 // "Convenience" class for passing an internal EntityData object to29 // "Convenience" class for passing an internal EntityData object to
28 // external mappers/resolvers. This can serve as a plain EntityData or30 // external mappers/resolvers.
29 // a ThesaurusEntityData. However, when there's another EntityData subclass
30 // in future, this won't work as EntityData becomes an ambiguous base class...
31#ifndef ZORBA_NO_FULL_TEXT
32 class EntityDataWrapper : public ThesaurusEntityData
33#else
34 class EntityDataWrapper : public EntityData31 class EntityDataWrapper : public EntityData
35#endif /* ZORBA_NO_FULL_TEXT */
36 {32 {
37 public:33 public:
38 static EntityDataWrapper const* create(internal::EntityData const* aData) {34 static EntityDataWrapper const* create(internal::EntityData const* aData) {
@@ -45,12 +41,7 @@
45 return new EntityDataWrapper(EntityData::SCHEMA);41 return new EntityDataWrapper(EntityData::SCHEMA);
46#ifndef ZORBA_NO_FULL_TEXT42#ifndef ZORBA_NO_FULL_TEXT
47 case internal::EntityData::THESAURUS:43 case internal::EntityData::THESAURUS:
48 {44 return new EntityDataWrapper(EntityData::THESAURUS);
49 EntityDataWrapper* retval = new EntityDataWrapper(EntityData::THESAURUS);
50 retval->theThesaurusLang =
51 dynamic_cast<const internal::ThesaurusEntityData*>(aData)->getLanguage();
52 return retval;
53 }
54 case internal::EntityData::STOP_WORDS:45 case internal::EntityData::STOP_WORDS:
55 return new EntityDataWrapper(EntityData::STOP_WORDS);46 return new EntityDataWrapper(EntityData::STOP_WORDS);
56#endif /* ZORBA_NO_FULL_TEXT */47#endif /* ZORBA_NO_FULL_TEXT */
@@ -67,21 +58,12 @@
67 return theKind;58 return theKind;
68 }59 }
6960
70#ifndef ZORBA_NO_FULL_TEXT
71 virtual zorba::locale::iso639_1::type getLanguage() const {
72 return theThesaurusLang;
73 }
74#endif /* ZORBA_NO_FULL_TEXT */
75
76 private:61 private:
77 EntityDataWrapper(EntityData::Kind aKind)62 EntityDataWrapper(EntityData::Kind aKind)
78 : theKind(aKind)63 : theKind(aKind)
79 {}64 {}
8065
81 EntityData::Kind const theKind;66 EntityData::Kind const theKind;
82#ifndef ZORBA_NO_FULL_TEXT
83 zorba::locale::iso639_1::type theThesaurusLang;
84#endif /* ZORBA_NO_FULL_TEXT */
85 };67 };
8668
87 URIMapperWrapper::URIMapperWrapper(zorba::URIMapper& aUserMapper)69 URIMapperWrapper::URIMapperWrapper(zorba::URIMapper& aUserMapper)
@@ -169,13 +151,13 @@
169 }151 }
170#ifndef ZORBA_NO_FULL_TEXT152#ifndef ZORBA_NO_FULL_TEXT
171 else {153 else {
172 Thesaurus* lUserThesaurus = dynamic_cast<Thesaurus*>(lUserPtr.get());154 ThesaurusProvider* lUserThesaurusProvider =
173 if (lUserThesaurus != NULL) {155 dynamic_cast<ThesaurusProvider*>(lUserPtr.get());
174 // Here we pass memory ownership of the actual Thesaurus to the156 if (lUserThesaurusProvider) {
175 // internal ThesaurusWrapper.157 // Here we pass memory ownership of the actual ThesaurusProvider to
176 lRetval = new internal::ThesaurusWrapper158 // the internal ThesaurusWrapper.
177 (Thesaurus::ptr(lUserThesaurus));159 lRetval = new internal::ThesaurusProviderWrapper
178 lUserPtr.release();160 (lUserThesaurusProvider);
179 }161 }
180 else {162 else {
181 assert(false);163 assert(false);
182164
=== modified file 'src/api/xmldatamanagerimpl.cpp'
--- src/api/xmldatamanagerimpl.cpp 2012-04-24 12:39:38 +0000
+++ src/api/xmldatamanagerimpl.cpp 2012-04-24 21:06:20 +0000
@@ -47,7 +47,7 @@
47#include "runtime/util/flowctl_exception.h"47#include "runtime/util/flowctl_exception.h"
4848
49#ifndef ZORBA_NO_FULL_TEXT49#ifndef ZORBA_NO_FULL_TEXT
50#include "stemmer_wrapper.h"50#include "stemmer_wrappers.h"
51#endif /* ZORBA_NO_FULL_TEXT */51#endif /* ZORBA_NO_FULL_TEXT */
5252
53namespace zorba {53namespace zorba {
5454
=== modified file 'src/api/xmldatamanagerimpl.h'
--- src/api/xmldatamanagerimpl.h 2012-04-24 12:39:38 +0000
+++ src/api/xmldatamanagerimpl.h 2012-04-24 21:06:20 +0000
@@ -27,7 +27,7 @@
27#include "util/singleton.h"27#include "util/singleton.h"
2828
29#ifndef ZORBA_NO_FULL_TEXT29#ifndef ZORBA_NO_FULL_TEXT
30#include "stemmer_wrapper.h"30#include "stemmer_wrappers.h"
31#endif /* ZORBA_NO_FULL_TEXT */31#endif /* ZORBA_NO_FULL_TEXT */
3232
33namespace zorba {33namespace zorba {
3434
=== modified file 'src/compiler/codegen/plan_visitor.cpp'
--- src/compiler/codegen/plan_visitor.cpp 2012-04-24 12:39:38 +0000
+++ src/compiler/codegen/plan_visitor.cpp 2012-04-24 21:06:20 +0000
@@ -250,7 +250,7 @@
250class plan_ftnode_visitor : public ftnode_visitor 250class plan_ftnode_visitor : public ftnode_visitor
251{251{
252public:252public:
253 typedef std::list<PlanIter_t> PlanIter_list_t;253 typedef std::vector<PlanIter_t> PlanIter_list_t;
254254
255 plan_ftnode_visitor( plan_visitor* v ) : plan_visitor_( v ) { }255 plan_ftnode_visitor( plan_visitor* v ) : plan_visitor_( v ) { }
256256
257257
=== modified file 'src/compiler/expression/expr_put.cpp'
--- src/compiler/expression/expr_put.cpp 2012-04-24 12:39:38 +0000
+++ src/compiler/expression/expr_put.cpp 2012-04-24 21:06:20 +0000
@@ -41,6 +41,7 @@
41#include "compiler/expression/function_item_expr.h"41#include "compiler/expression/function_item_expr.h"
42#include "compiler/parser/parse_constants.h"42#include "compiler/parser/parse_constants.h"
4343
44#include "diagnostics/assert.h"
44#include "functions/function.h"45#include "functions/function.h"
45#include "functions/udf.h"46#include "functions/udf.h"
4647
4748
=== modified file 'src/compiler/translator/translator.cpp'
--- src/compiler/translator/translator.cpp 2012-04-24 12:39:38 +0000
+++ src/compiler/translator/translator.cpp 2012-04-24 21:06:20 +0000
@@ -68,6 +68,7 @@
68#include "functions/signature.h"68#include "functions/signature.h"
69#include "functions/udf.h"69#include "functions/udf.h"
70#include "functions/external_function.h"70#include "functions/external_function.h"
71#include "functions/func_ft_module.h"
7172
72#include "annotations/annotations.h"73#include "annotations/annotations.h"
7374
@@ -859,7 +860,7 @@
859{860{
860 ZORBA_ASSERT(count >= 0);861 ZORBA_ASSERT(count >= 0);
861862
862 ftnode *n = NULL;863 ftnode *n = nullptr;
863 while ( count-- > 0 )864 while ( count-- > 0 )
864 {865 {
865 ZORBA_FATAL( !theFTNodeStack.empty(), "" );866 ZORBA_FATAL( !theFTNodeStack.empty(), "" );
@@ -3294,6 +3295,41 @@
3294 qnameItem->getLocalName())));3295 qnameItem->getLocalName())));
3295 }3296 }
32963297
3298#ifndef ZORBA_NO_FULL_TEXT
3299 if (qnameItem->getNamespace() == static_context::ZORBA_FULL_TEXT_FN_NS &&
3300 (qnameItem->getLocalName() == "tokenizer-properties" ||
3301 qnameItem->getLocalName() == "tokenize"))
3302 {
3303 FunctionConsts::FunctionKind kind;
3304
3305 if (qnameItem->getLocalName() == "tokenizer-properties")
3306 {
3307 assert(numParams <= 1);
3308
3309 if (numParams == 1)
3310 kind = FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_1;
3311 else
3312 kind = FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_0;
3313
3314 f = new full_text_tokenizer_properties(f->getSignature(), kind);
3315 }
3316 else
3317 {
3318 assert(numParams == 1 || numParams == 2);
3319
3320 if (numParams == 2)
3321 kind = FunctionConsts::FULL_TEXT_TOKENIZE_2;
3322 else
3323 kind = FunctionConsts::FULL_TEXT_TOKENIZE_1;
3324
3325 f = new full_text_tokenize(f->getSignature(), kind);
3326 }
3327
3328 f->setStaticContext(theRootSctx);
3329 bind_fn(f, numParams, loc);
3330 }
3331#endif /* ZORBA_NO_FULL_TEXT */
3332
3297 f->setAnnotations(theAnnotations);3333 f->setAnnotations(theAnnotations);
3298 theAnnotations = NULL; // important to reset3334 theAnnotations = NULL; // important to reset
32993335
@@ -12512,7 +12548,7 @@
12512{12548{
12513 TRACE_VISIT ();12549 TRACE_VISIT ();
12514#ifndef ZORBA_NO_FULL_TEXT12550#ifndef ZORBA_NO_FULL_TEXT
12515 push_ftstack( NULL ); // sentinel12551 push_ftstack( nullptr ); // sentinel
12516#endif /* ZORBA_NO_FULL_TEXT */12552#endif /* ZORBA_NO_FULL_TEXT */
12517 return no_state;12553 return no_state;
12518}12554}
@@ -12756,7 +12792,7 @@
12756void *begin_visit (const FTMildNot& v) {12792void *begin_visit (const FTMildNot& v) {
12757 TRACE_VISIT ();12793 TRACE_VISIT ();
12758#ifndef ZORBA_NO_FULL_TEXT12794#ifndef ZORBA_NO_FULL_TEXT
12759 push_ftstack( NULL ); // sentinel12795 push_ftstack( nullptr ); // sentinel
12760#endif /* ZORBA_NO_FULL_TEXT */12796#endif /* ZORBA_NO_FULL_TEXT */
12761 return no_state;12797 return no_state;
12762}12798}
@@ -12799,7 +12835,7 @@
12799void *begin_visit (const FTOr& v) {12835void *begin_visit (const FTOr& v) {
12800 TRACE_VISIT ();12836 TRACE_VISIT ();
12801#ifndef ZORBA_NO_FULL_TEXT12837#ifndef ZORBA_NO_FULL_TEXT
12802 push_ftstack( NULL ); // sentinel12838 push_ftstack( nullptr ); // sentinel
12803#endif /* ZORBA_NO_FULL_TEXT */12839#endif /* ZORBA_NO_FULL_TEXT */
12804 return no_state;12840 return no_state;
12805}12841}
@@ -13058,7 +13094,7 @@
13058 levels = dynamic_cast<ftrange*>( pop_ftstack() );13094 levels = dynamic_cast<ftrange*>( pop_ftstack() );
13059 ZORBA_ASSERT( levels );13095 ZORBA_ASSERT( levels );
13060 } else13096 } else
13061 levels = NULL;13097 levels = nullptr;
1306213098
13063 ftthesaurus_id *const tid = new ftthesaurus_id(13099 ftthesaurus_id *const tid = new ftthesaurus_id(
13064 loc, v.get_uri(), v.get_relationship(), levels13100 loc, v.get_uri(), v.get_relationship(), levels
@@ -13070,7 +13106,7 @@
13070void *begin_visit (const FTThesaurusOption& v) {13106void *begin_visit (const FTThesaurusOption& v) {
13071 TRACE_VISIT ();13107 TRACE_VISIT ();
13072#ifndef ZORBA_NO_FULL_TEXT13108#ifndef ZORBA_NO_FULL_TEXT
13073 push_ftstack( NULL ); // sentinel13109 push_ftstack( nullptr ); // sentinel
13074#endif /* ZORBA_NO_FULL_TEXT */13110#endif /* ZORBA_NO_FULL_TEXT */
13075 return no_state;13111 return no_state;
13076}13112}
@@ -13078,10 +13114,8 @@
13078void end_visit (const FTThesaurusOption& v, void* /*visit_state*/) {13114void end_visit (const FTThesaurusOption& v, void* /*visit_state*/) {
13079 TRACE_VISIT_OUT ();13115 TRACE_VISIT_OUT ();
13080#ifndef ZORBA_NO_FULL_TEXT13116#ifndef ZORBA_NO_FULL_TEXT
13081 ftthesaurus_id *default_tid = NULL;13117 ftthesaurus_id *const default_tid = v.includes_default() ?
13082 if ( v.includes_default() ) {13118 new ftthesaurus_id( loc, "##default" ) : nullptr;
13083 default_tid = new ftthesaurus_id( loc, "##default" );
13084 }
1308513119
13086 ftthesaurus_option::thesaurus_id_list_t list;13120 ftthesaurus_option::thesaurus_id_list_t list;
13087 while ( true ) {13121 while ( true ) {
1308813122
=== modified file 'src/context/CMakeLists.txt'
--- src/context/CMakeLists.txt 2012-04-24 12:39:38 +0000
+++ src/context/CMakeLists.txt 2012-04-24 21:06:20 +0000
@@ -32,11 +32,6 @@
32 features.cpp32 features.cpp
33 )33 )
3434
35IF (NOT ZORBA_NO_FULL_TEXT)
36 LIST(APPEND CONTEXT_SRCS
37 thesaurus_wrappers.cpp)
38ENDIF (NOT ZORBA_NO_FULL_TEXT)
39
40SET(CONTEXT_BUILD_SRCS35SET(CONTEXT_BUILD_SRCS
41 ${CMAKE_CURRENT_BINARY_DIR}/context/root_static_context_init.cpp36 ${CMAKE_CURRENT_BINARY_DIR}/context/root_static_context_init.cpp
42 )37 )
4338
=== modified file 'src/context/default_url_resolvers.cpp'
--- src/context/default_url_resolvers.cpp 2012-04-24 12:39:38 +0000
+++ src/context/default_url_resolvers.cpp 2012-04-24 21:06:20 +0000
@@ -17,6 +17,7 @@
1717
1818
19#include "context/default_url_resolvers.h"19#include "context/default_url_resolvers.h"
20#include "util/cxx_util.h"
20#include "util/uri_util.h"21#include "util/uri_util.h"
21#include "util/http_util.h"22#include "util/http_util.h"
22#include "util/fs_util.h"23#include "util/fs_util.h"
@@ -41,8 +42,15 @@
41HTTPURLResolver::resolveURL42HTTPURLResolver::resolveURL
42(zstring const& aUrl, EntityData const* aEntityData)43(zstring const& aUrl, EntityData const* aEntityData)
43{44{
44 if (aEntityData->getKind() == EntityData::COLLECTION)45 switch ( aEntityData->getKind() ) {
45 return NULL;46 case EntityData::COLLECTION:
47#ifndef ZORBA_NO_FULL_TEXT
48 case EntityData::THESAURUS:
49#endif /* ZORBA_NO_FULL_TEXT */
50 return nullptr;
51 default:
52 break;
53 }
4654
47 uri::scheme lScheme = uri::get_scheme(aUrl);55 uri::scheme lScheme = uri::get_scheme(aUrl);
48 switch (lScheme) {56 switch (lScheme) {
@@ -82,8 +90,15 @@
82FileURLResolver::resolveURL90FileURLResolver::resolveURL
83(zstring const& aUrl, EntityData const* aEntityData)91(zstring const& aUrl, EntityData const* aEntityData)
84{92{
85 if (aEntityData->getKind() == EntityData::COLLECTION)93 switch ( aEntityData->getKind() ) {
86 return NULL;94 case EntityData::COLLECTION:
95#ifndef ZORBA_NO_FULL_TEXT
96 case EntityData::THESAURUS:
97#endif /* ZORBA_NO_FULL_TEXT */
98 return nullptr;
99 default:
100 break;
101 }
87102
88 uri::scheme lScheme = uri::get_scheme(aUrl);103 uri::scheme lScheme = uri::get_scheme(aUrl);
89 if (lScheme != uri::file) {104 if (lScheme != uri::file) {
@@ -111,7 +126,6 @@
111{126{
112 if (aEntityData->getKind() != EntityData::COLLECTION)127 if (aEntityData->getKind() != EntityData::COLLECTION)
113 return NULL;128 return NULL;
114
115 store::Item_t lName;129 store::Item_t lName;
116 GENV_STORE.getItemFactory()->createQName(lName, aUrl.c_str(), "", "zorba-internal-name-for-w3c-collections");130 GENV_STORE.getItemFactory()->createQName(lName, aUrl.c_str(), "", "zorba-internal-name-for-w3c-collections");
117 store::Collection_t lColl = GENV_STORE.getCollection(lName.getp(), true);131 store::Collection_t lColl = GENV_STORE.getCollection(lName.getp(), true);
118132
=== modified file 'src/context/static_context.cpp'
--- src/context/static_context.cpp 2012-04-24 12:39:38 +0000
+++ src/context/static_context.cpp 2012-04-24 21:06:20 +0000
@@ -378,11 +378,16 @@
378static_context::ZORBA_XML_FN_NS =378static_context::ZORBA_XML_FN_NS =
379"http://www.zorba-xquery.com/modules/xml";379"http://www.zorba-xquery.com/modules/xml";
380380
381#ifndef ZORBA_NO_FULL_TEXT
382const char*
383static_context::ZORBA_FULL_TEXT_FN_NS =
384"http://www.zorba-xquery.com/modules/full-text";
385#endif /* ZORBA_NO_FULL_TEXT */
386
381const char*387const char*
382static_context::ZORBA_XML_FN_OPTIONS_NS =388static_context::ZORBA_XML_FN_OPTIONS_NS =
383"http://www.zorba-xquery.com/modules/xml-options";389"http://www.zorba-xquery.com/modules/xml-options";
384390
385
386/***************************************************************************//**391/***************************************************************************//**
387 Target namespaces of zorba reserved modules392 Target namespaces of zorba reserved modules
388********************************************************************************/393********************************************************************************/
@@ -451,8 +456,11 @@
451 ns == ZORBA_JSON_FN_NS ||456 ns == ZORBA_JSON_FN_NS ||
452 ns == ZORBA_FETCH_FN_NS ||457 ns == ZORBA_FETCH_FN_NS ||
453 ns == ZORBA_NODE_FN_NS ||458 ns == ZORBA_NODE_FN_NS ||
459#ifndef ZORBA_NO_FULL_TEXT
460 ns == ZORBA_FULL_TEXT_FN_NS ||
461#endif /* ZORBA_NO_FULL_TEXT */
454 ns == ZORBA_XML_FN_NS);462 ns == ZORBA_XML_FN_NS);
455 }463 }
456 else if (ns == W3C_FN_NS || ns == XQUERY_MATH_FN_NS)464 else if (ns == W3C_FN_NS || ns == XQUERY_MATH_FN_NS)
457 {465 {
458 return true;466 return true;
@@ -1585,7 +1593,7 @@
1585 std::auto_ptr<internal::Resource>& oResource,1593 std::auto_ptr<internal::Resource>& oResource,
1586 zstring& oErrorMessage) const1594 zstring& oErrorMessage) const
1587{1595{
1588 oErrorMessage = "";1596 oErrorMessage.clear();
15891597
1590 // Iterate through all candidate URLs...1598 // Iterate through all candidate URLs...
1591 for (std::vector<zstring>::iterator url = aUrls.begin();1599 for (std::vector<zstring>::iterator url = aUrls.begin();
@@ -1621,7 +1629,7 @@
1621 }1629 }
1622 catch (const std::exception& e)1630 catch (const std::exception& e)
1623 {1631 {
1624 if (oErrorMessage == "")1632 if (oErrorMessage.empty())
1625 {1633 {
1626 // Really no point in saving anything more than the first message1634 // Really no point in saving anything more than the first message
1627 oErrorMessage = e.what();1635 oErrorMessage = e.what();
16281636
=== modified file 'src/context/static_context.h'
--- src/context/static_context.h 2012-04-24 12:39:38 +0000
+++ src/context/static_context.h 2012-04-24 21:06:20 +0000
@@ -471,6 +471,9 @@
471 static const char* ZORBA_FETCH_FN_NS;471 static const char* ZORBA_FETCH_FN_NS;
472 static const char* ZORBA_NODE_FN_NS;472 static const char* ZORBA_NODE_FN_NS;
473 static const char* ZORBA_XML_FN_NS;473 static const char* ZORBA_XML_FN_NS;
474#ifndef ZORBA_NO_FULL_TEXT
475 static const char* ZORBA_FULL_TEXT_FN_NS;
476#endif /* ZORBA_NO_FULL_TEXT */
474 static const char* ZORBA_XML_FN_OPTIONS_NS;477 static const char* ZORBA_XML_FN_OPTIONS_NS;
475478
476 // Namespaces of virtual modules declaring zorba builtin functions479 // Namespaces of virtual modules declaring zorba builtin functions
477480
=== removed file 'src/context/stemmer_wrappers.cpp'
--- src/context/stemmer_wrappers.cpp 2012-04-24 12:39:38 +0000
+++ src/context/stemmer_wrappers.cpp 1970-01-01 00:00:00 +0000
@@ -1,74 +0,0 @@
1/*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include "stdafx.h"
17
18#include <zorba/config.h>
19
20#ifndef ZORBA_NO_FULL_TEXT
21
22#include "api/unmarshaller.h"
23#include "diagnostics/assert.h"
24#include "util/cxx_util.h"
25
26#include "stemmer_wrappers.h"
27
28using namespace zorba::locale;
29
30namespace zorba {
31namespace internal {
32
33///////////////////////////////////////////////////////////////////////////////
34
35StemmerWrapper::StemmerWrapper( zorba::Stemmer const *s ) :
36 api_stemmer_( s )
37{
38 ZORBA_ASSERT( api_stemmer_ );
39}
40
41void StemmerWrapper::stem( zstring const &word, iso639_1::type lang,
42 zstring *result ) const {
43 String const api_word( Unmarshaller::newString( word ) );
44 String api_result( Unmarshaller::newString( *result ) );
45 api_stemmer_->stem( api_word, lang, &api_result );
46}
47
48///////////////////////////////////////////////////////////////////////////////
49
50StemmerProviderWrapper::
51StemmerProviderWrapper( zorba::StemmerProvider const *p ) :
52 api_stemmer_provider_( p )
53{
54 ZORBA_ASSERT( api_stemmer_provider_ );
55}
56
57Stemmer const*
58StemmerProviderWrapper::get_stemmer( iso639_1::type lang ) const {
59 zorba::Stemmer const *const s = api_stemmer_provider_->getStemmer( lang );
60 return s ? new StemmerWrapper( s ) : nullptr;
61}
62
63///////////////////////////////////////////////////////////////////////////////
64
65} // namespace internal
66} // namespace zorba
67
68#endif /* ZORBA_NO_FULL_TEXT */
69/*
70 * Local variables:
71 * mode: c++
72 * End:
73 */
74/* vim:set et sw=2 ts=2: */
750
=== removed file 'src/context/stemmer_wrappers.h'
--- src/context/stemmer_wrappers.h 2012-04-24 12:39:38 +0000
+++ src/context/stemmer_wrappers.h 1970-01-01 00:00:00 +0000
@@ -1,63 +0,0 @@
1/*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#pragma once
17#ifndef ZORBA_STEMMER_WRAPPERS_H
18#define ZORBA_STEMMER_WRAPPERS_H
19
20#include <zorba/config.h>
21
22#if 0
23#ifndef ZORBA_NO_FULL_TEXT
24
25#include <zorba/stemmer.h>
26#include "zorbautils/stemmer.h"
27
28namespace zorba {
29namespace internal {
30
31///////////////////////////////////////////////////////////////////////////////
32
33class StemmerWrapper : public Stemmer {
34public:
35 StemmerWrapper( zorba::Stemmer const *api_stemmer );
36 void stem( zstring const &word, locale::iso639_1::type lang,
37 zstring *result ) const;
38private:
39 zorba::Stemmer const *const api_stemmer_;
40};
41
42class StemmerProviderWrapper : public StemmerProvider {
43public:
44 StemmerProviderWrapper( zorba::StemmerProvider const *p );
45 Stemmer const* get_stemmer( locale::iso639_1::type lang ) const;
46private:
47 zorba::StemmerProvider const *const api_stemmer_provider_;
48};
49
50///////////////////////////////////////////////////////////////////////////////
51
52} // namespace internal
53} // namespace zorba
54
55#endif /* ZORBA_NO_FULL_TEXT */
56#endif
57#endif /* ZORBA_STEMMER_WRAPPERS_H */
58/*
59 * Local variables:
60 * mode: c++
61 * End:
62 */
63/* vim:set et sw=2 ts=2: */
640
=== modified file 'src/context/uri_resolver.cpp'
--- src/context/uri_resolver.cpp 2012-04-24 12:39:38 +0000
+++ src/context/uri_resolver.cpp 2012-04-24 21:06:20 +0000
@@ -117,19 +117,6 @@
117 {117 {
118 }118 }
119119
120#ifndef ZORBA_NO_FULL_TEXT
121 ThesaurusEntityData::ThesaurusEntityData(locale::iso639_1::type aLang)
122 : EntityData(EntityData::THESAURUS),
123 theLang(aLang)
124 {
125 }
126
127 locale::iso639_1::type ThesaurusEntityData::getLanguage() const
128 {
129 return theLang;
130 }
131#endif /* ZORBA_NO_FULL_TEXT */
132
133/*************120/*************
134 * URIMapper is an abstract class, but we have to define its vtbl and121 * URIMapper is an abstract class, but we have to define its vtbl and
135 * base destructor somewhere.122 * base destructor somewhere.
136123
=== modified file 'src/context/uri_resolver.h'
--- src/context/uri_resolver.h 2012-04-24 12:39:38 +0000
+++ src/context/uri_resolver.h 2012-04-24 21:06:20 +0000
@@ -55,21 +55,21 @@
55 /**55 /**
56 * @brief Return the URL used to load this Resource.56 * @brief Return the URL used to load this Resource.
57 */57 */
58 zstring getUrl() { return theUrl; }58 zstring const& getUrl() const { return theUrl; }
5959
60 virtual ~Resource() = 0;60 virtual ~Resource() = 0;
6161
62 protected:62protected:
6363
64 Resource();64 Resource();
6565
66 private:66private:
6767
68 /**68 /**
69 * Used by static_context to populate the URL.69 * Used by static_context to populate the URL.
70 */70 */
71 void setUrl(zstring const &aUrl) { theUrl = aUrl; }
71 friend class zorba::static_context;72 friend class zorba::static_context;
72 void setUrl(zstring aUrl) { theUrl = aUrl; }
7373
74 zstring theUrl;74 zstring theUrl;
75};75};
@@ -193,25 +193,6 @@
193 Kind const theKind;193 Kind const theKind;
194};194};
195195
196#ifndef ZORBA_NO_FULL_TEXT
197/**
198 * @brief The class containing additional data for URIMappers and URLResolvers
199 * when mapping/resolving a Thesaurus URI.
200 */
201class ThesaurusEntityData : public EntityData
202{
203public:
204 ThesaurusEntityData(locale::iso639_1::type aLang);
205 /**
206 * @brief Return the language for which a thesaurus is being requested.
207 */
208 virtual locale::iso639_1::type getLanguage() const;
209
210private:
211 locale::iso639_1::type const theLang;
212};
213#endif /* ZORBA_NO_FULL_TEXT */
214
215/**196/**
216 * @brief Interface for URL resolving.197 * @brief Interface for URL resolving.
217 *198 *
218199
=== modified file 'src/diagnostics/assert.cpp'
--- src/diagnostics/assert.cpp 2012-04-24 12:39:38 +0000
+++ src/diagnostics/assert.cpp 2012-04-24 21:06:20 +0000
@@ -68,7 +68,7 @@
68 file, 68 file,
69 line, 69 line,
70 zerr::ZXQP0002_ASSERT_FAILED, 70 zerr::ZXQP0002_ASSERT_FAILED,
71 ( msg ? ERROR_PARAMS( condition, msg ) : ERROR_PARAMS( condition ))71 ( msg ? ERROR_PARAMS( condition, msg ) : ERROR_PARAMS( condition ) )
72 );72 );
73}73}
7474
7575
=== modified file 'src/diagnostics/assert.h'
--- src/diagnostics/assert.h 2012-04-24 12:39:38 +0000
+++ src/diagnostics/assert.h 2012-04-24 21:06:20 +0000
@@ -20,6 +20,10 @@
20#ifndef ZORBA_ASSERT_H20#ifndef ZORBA_ASSERT_H
21#define ZORBA_ASSERT_H21#define ZORBA_ASSERT_H
2222
23#include <sstream>
24
25#include "util/cxx_util.h"
26
23namespace zorba {27namespace zorba {
2428
25/**29/**
@@ -35,7 +39,7 @@
35void assertion_failed( char const *condition,39void assertion_failed( char const *condition,
36 char const *file, 40 char const *file,
37 int line, 41 int line,
38 char const *msg = 0);42 char const *msg = nullptr );
3943
40/**44/**
41 * Zorba version of the standard assert(3) macro.45 * Zorba version of the standard assert(3) macro.
4246
=== modified file 'src/diagnostics/diagnostic_en.xml'
--- src/diagnostics/diagnostic_en.xml 2012-04-24 12:39:38 +0000
+++ src/diagnostics/diagnostic_en.xml 2012-04-24 21:06:20 +0000
@@ -1746,7 +1746,7 @@
1746 <diagnostic code="ZXQP8401" name="THESAURUS_VERSION_MISMATCH"1746 <diagnostic code="ZXQP8401" name="THESAURUS_VERSION_MISMATCH"
1747 if="!defined(ZORBA_NO_FULL_TEXT)">1747 if="!defined(ZORBA_NO_FULL_TEXT)">
1748 <comment>1748 <comment>
1749 The version of the thesaurus is not the expected version.1749 The version of the thesaurus is not the expected version.
1750 </comment>1750 </comment>
1751 <value>"$1": wrong WordNet file version; should be "$2"</value>1751 <value>"$1": wrong WordNet file version; should be "$2"</value>
1752 </diagnostic>1752 </diagnostic>
@@ -1754,19 +1754,39 @@
1754 <diagnostic code="ZXQP8402" name="THESAURUS_ENDIANNESS_MISMATCH"1754 <diagnostic code="ZXQP8402" name="THESAURUS_ENDIANNESS_MISMATCH"
1755 if="!defined(ZORBA_NO_FULL_TEXT)">1755 if="!defined(ZORBA_NO_FULL_TEXT)">
1756 <comment>1756 <comment>
1757 The thesaurus data file's endianness does not match that of the CPU.
1757 </comment>1758 </comment>
1758 <value>thesaurus data endianness does not match CPU</value>1759 <value>thesaurus data endianness does not match CPU</value>
1759 The thesaurus data file's endianness does not match that of the CPU.
1760 </diagnostic>1760 </diagnostic>
17611761
1762 <diagnostic code="ZXQP8403" name="THESAURUS_DATA_ERROR"1762 <diagnostic code="ZXQP8403" name="THESAURUS_DATA_ERROR"
1763 if="!defined(ZORBA_NO_FULL_TEXT)">1763 if="!defined(ZORBA_NO_FULL_TEXT)">
1764 <comment>1764 <comment>
1765 The thesaurus data contains an unexpected value.1765 The thesaurus data contains an unexpected value.
1766 </comment>1766 </comment>
1767 <value>thesaurus data error${: 1}</value>1767 <value>thesaurus data error${: 1}</value>
1768 </diagnostic>1768 </diagnostic>
17691769
1770 <diagnostic code="ZXQP8404" name="STEM_LANG_NOT_SUPPORTED"
1771 if="!defined(ZORBA_NO_FULL_TEXT)">
1772 <value>"$1": langauge not supported for stemming</value>
1773 </diagnostic>
1774
1775 <diagnostic code="ZXQP8405" name="STOP_WORDS_LANG_NOT_SUPPORTED"
1776 if="!defined(ZORBA_NO_FULL_TEXT)">
1777 <value>"$1": langauge not supported for stop-words</value>
1778 </diagnostic>
1779
1780 <diagnostic code="ZXQP8406" name="THESAURUS_LANG_NOT_SUPPORTED"
1781 if="!defined(ZORBA_NO_FULL_TEXT)">
1782 <value>"$1": langauge not supported for thesaurus</value>
1783 </diagnostic>
1784
1785 <diagnostic code="ZXQP8407" name="TOKENIZER_LANG_NOT_SUPPORTED"
1786 if="!defined(ZORBA_NO_FULL_TEXT)">
1787 <value>"$1": langauge not supported for tokenizer</value>
1788 </diagnostic>
1789
1770 <diagnostic code="ZXQD0001" name="PREFIX_NOT_DECLARED">1790 <diagnostic code="ZXQD0001" name="PREFIX_NOT_DECLARED">
1771 <value>"$1": prefix not declared when calling function "$2" from $3</value>1791 <value>"$1": prefix not declared when calling function "$2" from $3</value>
1772 </diagnostic>1792 </diagnostic>
17731793
=== modified file 'src/diagnostics/pregenerated/diagnostic_list.cpp'
--- src/diagnostics/pregenerated/diagnostic_list.cpp 2012-04-24 12:39:38 +0000
+++ src/diagnostics/pregenerated/diagnostic_list.cpp 2012-04-24 21:06:20 +0000
@@ -660,6 +660,18 @@
660660
661661
662ZorbaErrorCode ZXQP8403_THESAURUS_DATA_ERROR( "ZXQP8403" );662ZorbaErrorCode ZXQP8403_THESAURUS_DATA_ERROR( "ZXQP8403" );
663
664
665ZorbaErrorCode ZXQP8404_STEM_LANG_NOT_SUPPORTED( "ZXQP8404" );
666
667
668ZorbaErrorCode ZXQP8405_STOP_WORDS_LANG_NOT_SUPPORTED( "ZXQP8405" );
669
670
671ZorbaErrorCode ZXQP8406_THESAURUS_LANG_NOT_SUPPORTED( "ZXQP8406" );
672
673
674ZorbaErrorCode ZXQP8407_TOKENIZER_LANG_NOT_SUPPORTED( "ZXQP8407" );
663#endif675#endif
664676
665677
666678
=== modified file 'src/diagnostics/pregenerated/dict_en.cpp'
--- src/diagnostics/pregenerated/dict_en.cpp 2012-04-24 12:39:38 +0000
+++ src/diagnostics/pregenerated/dict_en.cpp 2012-04-24 21:06:20 +0000
@@ -434,6 +434,18 @@
434#if !defined(ZORBA_NO_FULL_TEXT)434#if !defined(ZORBA_NO_FULL_TEXT)
435 { "ZXQP8403", "thesaurus data error${: 1}" },435 { "ZXQP8403", "thesaurus data error${: 1}" },
436#endif436#endif
437#if !defined(ZORBA_NO_FULL_TEXT)
438 { "ZXQP8404", "\"$1\": langauge not supported for stemming" },
439#endif
440#if !defined(ZORBA_NO_FULL_TEXT)
441 { "ZXQP8405", "\"$1\": langauge not supported for stop-words" },
442#endif
443#if !defined(ZORBA_NO_FULL_TEXT)
444 { "ZXQP8406", "\"$1\": langauge not supported for thesaurus" },
445#endif
446#if !defined(ZORBA_NO_FULL_TEXT)
447 { "ZXQP8407", "\"$1\": langauge not supported for tokenizer" },
448#endif
437 { "~AllMatchesHasExcludes", "AllMatches contains StringExclude" },449 { "~AllMatchesHasExcludes", "AllMatches contains StringExclude" },
438 { "~AlreadySpecified", "already specified" },450 { "~AlreadySpecified", "already specified" },
439 { "~ArithOpNotDefinedBetween_23", "arithmetic operation not defined between types \"$2\" and \"$3\"" },451 { "~ArithOpNotDefinedBetween_23", "arithmetic operation not defined between types \"$2\" and \"$3\"" },
440452
=== modified file 'src/functions/CMakeLists.txt'
--- src/functions/CMakeLists.txt 2012-04-24 12:39:38 +0000
+++ src/functions/CMakeLists.txt 2012-04-24 21:06:20 +0000
@@ -83,3 +83,7 @@
83 func_apply.cpp83 func_apply.cpp
84 func_serialize_impl.cpp84 func_serialize_impl.cpp
85)85)
86
87IF (NOT ZORBA_NO_FULL_TEXT)
88 LIST(APPEND FUNCTIONS_SRCS func_ft_module_impl.cpp)
89ENDIF (NOT ZORBA_NO_FULL_TEXT)
8690
=== modified file 'src/functions/external_function.cpp'
--- src/functions/external_function.cpp 2012-04-24 12:39:38 +0000
+++ src/functions/external_function.cpp 2012-04-24 21:06:20 +0000
@@ -45,12 +45,12 @@
45 :45 :
46 function(sig, FunctionConsts::FN_UNKNOWN),46 function(sig, FunctionConsts::FN_UNKNOWN),
47 theLoc(loc),47 theLoc(loc),
48 theModuleSctx(modSctx),
49 theNamespace(ns),48 theNamespace(ns),
50 theScriptingKind(scriptingType),49 theScriptingKind(scriptingType),
51 theImpl(impl)50 theImpl(impl)
52{51{
53 resetFlag(FunctionConsts::isBuiltin);52 resetFlag(FunctionConsts::isBuiltin);
53 theModuleSctx = modSctx;
54}54}
5555
5656
@@ -62,7 +62,6 @@
62 zorba::serialization::serialize_baseclass(ar, (function*)this);62 zorba::serialization::serialize_baseclass(ar, (function*)this);
6363
64 ar & theLoc;64 ar & theLoc;
65 ar & theModuleSctx;
66 ar & theNamespace;65 ar & theNamespace;
67 ar & theScriptingKind;66 ar & theScriptingKind;
6867
6968
=== added file 'src/functions/func_ft_module_impl.cpp'
--- src/functions/func_ft_module_impl.cpp 1970-01-01 00:00:00 +0000
+++ src/functions/func_ft_module_impl.cpp 2012-04-24 21:06:20 +0000
@@ -0,0 +1,110 @@
1/*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include "stdafx.h"
17
18#include "functions/func_ft_module.h"
19
20#include "runtime/full_text/ft_module.h"
21
22#define FT_MODULE_NS "http://www.zorba-xquery.com/modules/full-text"
23
24namespace zorba {
25
26///////////////////////////////////////////////////////////////////////////////
27
28void populate_context_ft_module_impl( static_context *sctx ) {
29
30 xqtref_t tokenize_return_type =
31 GENV_TYPESYSTEM.create_node_type(
32 store::StoreConsts::elementNode,
33 createQName( FT_MODULE_NS, "", "token" ),
34 NULL,
35 TypeConstants::QUANT_STAR,
36 false,
37 false
38 );
39 {
40 DECL_WITH_KIND( sctx, full_text_tokenize,
41 (createQName( FT_MODULE_NS, "", "tokenize"),
42 GENV_TYPESYSTEM.ANY_NODE_TYPE_ONE,
43 tokenize_return_type),
44 FunctionConsts::FULL_TEXT_TOKENIZE_1
45 );
46 }
47 {
48 DECL_WITH_KIND( sctx, full_text_tokenize,
49 (createQName( FT_MODULE_NS, "", "tokenize"),
50 GENV_TYPESYSTEM.ANY_NODE_TYPE_ONE,
51 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
52 tokenize_return_type),
53 FunctionConsts::FULL_TEXT_TOKENIZE_2
54 );
55 }
56
57 xqtref_t tokenizer_properties_return_type =
58 GENV_TYPESYSTEM.create_node_type(
59 store::StoreConsts::elementNode,
60 createQName( FT_MODULE_NS, "", "tokenizer-properties" ),
61 NULL,
62 TypeConstants::QUANT_ONE,
63 false,
64 false
65 );
66 {
67 DECL_WITH_KIND( sctx, full_text_tokenizer_properties,
68 (createQName( FT_MODULE_NS, "", "tokenizer-properties"),
69 tokenizer_properties_return_type),
70 FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_0
71 );
72 }
73 {
74 DECL_WITH_KIND( sctx, full_text_tokenizer_properties,
75 (createQName( FT_MODULE_NS, "", "tokenizer-properties"),
76 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
77 tokenizer_properties_return_type),
78 FunctionConsts::FULL_TEXT_TOKENIZER_PROPERTIES_1
79 );
80 }
81
82}
83
84///////////////////////////////////////////////////////////////////////////////
85
86PlanIter_t full_text_tokenizer_properties::codegen(
87 CompilerCB*,
88 static_context* sctx,
89 const QueryLoc& loc,
90 std::vector<PlanIter_t>& argv,
91 expr& ann) const
92{
93 return new TokenizerPropertiesIterator(theModuleSctx, loc, argv);
94}
95
96
97PlanIter_t full_text_tokenize::codegen(
98 CompilerCB*,
99 static_context* sctx,
100 const QueryLoc& loc,
101 std::vector<PlanIter_t>& argv,
102 expr& ann) const
103{
104 return new TokenizeIterator(theModuleSctx, loc, argv);
105}
106
107///////////////////////////////////////////////////////////////////////////////
108
109} // namespace zorba
110/* vim:set et sw=2 ts=2: */
0111
=== modified file 'src/functions/function.cpp'
--- src/functions/function.cpp 2012-04-24 12:39:38 +0000
+++ src/functions/function.cpp 2012-04-24 21:06:20 +0000
@@ -43,6 +43,7 @@
43 theSignature(sig),43 theSignature(sig),
44 theKind(kind),44 theKind(kind),
45 theFlags(0),45 theFlags(0),
46 theModuleSctx(NULL),
46 theXQueryVersion(StaticContextConsts::xquery_version_1_0)47 theXQueryVersion(StaticContextConsts::xquery_version_1_0)
47{48{
48 setFlag(FunctionConsts::isBuiltin);49 setFlag(FunctionConsts::isBuiltin);
@@ -70,6 +71,7 @@
70 SERIALIZE_ENUM(FunctionConsts::FunctionKind, theKind);71 SERIALIZE_ENUM(FunctionConsts::FunctionKind, theKind);
71 ar & theFlags;72 ar & theFlags;
72 ar & theAnnotationList;73 ar & theAnnotationList;
74 ar & theModuleSctx;
73 SERIALIZE_ENUM(StaticContextConsts::xquery_version_t, theXQueryVersion);75 SERIALIZE_ENUM(StaticContextConsts::xquery_version_t, theXQueryVersion);
74}76}
7577
@@ -92,6 +94,7 @@
92 return n == VARIADIC_SIG_SIZE || argv.size() == n;94 return n == VARIADIC_SIG_SIZE || argv.size() == n;
93}95}
9496
97
95/*******************************************************************************98/*******************************************************************************
9699
97********************************************************************************/100********************************************************************************/
98101
=== modified file 'src/functions/function.h'
--- src/functions/function.h 2012-04-24 12:39:38 +0000
+++ src/functions/function.h 2012-04-24 21:06:20 +0000
@@ -42,7 +42,10 @@
4242
4343
44/*******************************************************************************44/*******************************************************************************
4545 theModuleContext:
46 -----------------
47 The root sctx of the module containing the declaration. It is NULL for
48 functions that must be executed in the static context of the caller.
46********************************************************************************/49********************************************************************************/
47class function : public SimpleRCObject50class function : public SimpleRCObject
48{51{
@@ -51,6 +54,7 @@
51 FunctionConsts::FunctionKind theKind;54 FunctionConsts::FunctionKind theKind;
52 uint32_t theFlags;55 uint32_t theFlags;
53 AnnotationList_t theAnnotationList;56 AnnotationList_t theAnnotationList;
57 static_context * theModuleSctx;
5458
55 StaticContextConsts::xquery_version_t theXQueryVersion;59 StaticContextConsts::xquery_version_t theXQueryVersion;
5660
@@ -89,6 +93,10 @@
8993
90 bool isVariadic() const { return theSignature.isVariadic(); }94 bool isVariadic() const { return theSignature.isVariadic(); }
9195
96 static_context* getStaticContext() const { return theModuleSctx; }
97
98 void setStaticContext(static_context* sctx) { theModuleSctx = sctx; }
99
92 void setFlag(FunctionConsts::AnnotationFlags flag)100 void setFlag(FunctionConsts::AnnotationFlags flag)
93 {101 {
94 theFlags |= flag;102 theFlags |= flag;
95103
=== modified file 'src/functions/library.cpp'
--- src/functions/library.cpp 2012-04-24 12:39:38 +0000
+++ src/functions/library.cpp 2012-04-24 21:06:20 +0000
@@ -68,6 +68,10 @@
68#include "functions/func_reflection.h"68#include "functions/func_reflection.h"
69#include "functions/func_apply.h"69#include "functions/func_apply.h"
70#include "functions/func_fetch.h"70#include "functions/func_fetch.h"
71#ifndef ZORBA_NO_FULL_TEXT
72#include "functions/func_ft_module.h"
73#include "runtime/full_text/ft_module_impl.h"
74#endif /* ZORBA_NO_FULL_TEXT */
7175
72#include "functions/func_function_item_iter.h"76#include "functions/func_function_item_iter.h"
7377
@@ -144,6 +148,10 @@
144 populate_context_apply(sctx);148 populate_context_apply(sctx);
145149
146 populate_context_fetch(sctx);150 populate_context_fetch(sctx);
151#ifndef ZORBA_NO_FULL_TEXT
152 populate_context_ft_module(sctx);
153 populate_context_ft_module_impl(sctx);
154#endif /* ZORBA_NO_FULL_TEXT */
147155
148 ar.set_loading_hardcoded_objects(false);156 ar.set_loading_hardcoded_objects(false);
149}157}
150158
=== added file 'src/functions/pregenerated/func_ft_module.cpp'
--- src/functions/pregenerated/func_ft_module.cpp 1970-01-01 00:00:00 +0000
+++ src/functions/pregenerated/func_ft_module.cpp 2012-04-24 21:06:20 +0000
@@ -0,0 +1,496 @@
1/*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// ******************************************
18// * *
19// * THIS IS A GENERATED FILE. DO NOT EDIT! *
20// * SEE .xml FILE WITH SAME NAME *
21// * *
22// ******************************************
23
24
25#include "stdafx.h"
26#include "runtime/full_text/ft_module.h"
27#include "functions/func_ft_module.h"
28
29
30namespace zorba{
31
32
33#ifndef ZORBA_NO_FULL_TEXT
34PlanIter_t full_text_current_lang::codegen(
35 CompilerCB*,
36 static_context* sctx,
37 const QueryLoc& loc,
38 std::vector<PlanIter_t>& argv,
39 expr& ann) const
40{
41 return new CurrentLangIterator(sctx, loc, argv);
42}
43
44#endif
45#ifndef ZORBA_NO_FULL_TEXT
46PlanIter_t full_text_host_lang::codegen(
47 CompilerCB*,
48 static_context* sctx,
49 const QueryLoc& loc,
50 std::vector<PlanIter_t>& argv,
51 expr& ann) const
52{
53 return new HostLangIterator(sctx, loc, argv);
54}
55
56#endif
57#ifndef ZORBA_NO_FULL_TEXT
58PlanIter_t full_text_is_stem_lang_supported::codegen(
59 CompilerCB*,
60 static_context* sctx,
61 const QueryLoc& loc,
62 std::vector<PlanIter_t>& argv,
63 expr& ann) const
64{
65 return new IsStemLangSupportedIterator(sctx, loc, argv);
66}
67
68#endif
69#ifndef ZORBA_NO_FULL_TEXT
70PlanIter_t full_text_is_stop_word::codegen(
71 CompilerCB*,
72 static_context* sctx,
73 const QueryLoc& loc,
74 std::vector<PlanIter_t>& argv,
75 expr& ann) const
76{
77 return new IsStopWordIterator(sctx, loc, argv);
78}
79
80#endif
81#ifndef ZORBA_NO_FULL_TEXT
82PlanIter_t full_text_is_stop_word_lang_supported::codegen(
83 CompilerCB*,
84 static_context* sctx,
85 const QueryLoc& loc,
86 std::vector<PlanIter_t>& argv,
87 expr& ann) const
88{
89 return new IsStopWordLangSupportedIterator(sctx, loc, argv);
90}
91
92#endif
93#ifndef ZORBA_NO_FULL_TEXT
94PlanIter_t full_text_is_thesaurus_lang_supported::codegen(
95 CompilerCB*,
96 static_context* sctx,
97 const QueryLoc& loc,
98 std::vector<PlanIter_t>& argv,
99 expr& ann) const
100{
101 return new IsThesaurusLangSupportedIterator(sctx, loc, argv);
102}
103
104#endif
105#ifndef ZORBA_NO_FULL_TEXT
106PlanIter_t full_text_is_tokenizer_lang_supported::codegen(
107 CompilerCB*,
108 static_context* sctx,
109 const QueryLoc& loc,
110 std::vector<PlanIter_t>& argv,
111 expr& ann) const
112{
113 return new IsTokenizerLangSupportedIterator(sctx, loc, argv);
114}
115
116#endif
117#ifndef ZORBA_NO_FULL_TEXT
118PlanIter_t full_text_stem::codegen(
119 CompilerCB*,
120 static_context* sctx,
121 const QueryLoc& loc,
122 std::vector<PlanIter_t>& argv,
123 expr& ann) const
124{
125 return new StemIterator(sctx, loc, argv);
126}
127
128#endif
129#ifndef ZORBA_NO_FULL_TEXT
130PlanIter_t full_text_strip_diacritics::codegen(
131 CompilerCB*,
132 static_context* sctx,
133 const QueryLoc& loc,
134 std::vector<PlanIter_t>& argv,
135 expr& ann) const
136{
137 return new StripDiacriticsIterator(sctx, loc, argv);
138}
139
140#endif
141#ifndef ZORBA_NO_FULL_TEXT
142PlanIter_t full_text_thesaurus_lookup::codegen(
143 CompilerCB*,
144 static_context* sctx,
145 const QueryLoc& loc,
146 std::vector<PlanIter_t>& argv,
147 expr& ann) const
148{
149 return new ThesaurusLookupIterator(sctx, loc, argv);
150}
151
152#endif
153#ifndef ZORBA_NO_FULL_TEXT
154
155#endif
156#ifndef ZORBA_NO_FULL_TEXT
157
158#endif
159#ifndef ZORBA_NO_FULL_TEXT
160PlanIter_t full_text_tokenize_string::codegen(
161 CompilerCB*,
162 static_context* sctx,
163 const QueryLoc& loc,
164 std::vector<PlanIter_t>& argv,
165 expr& ann) const
166{
167 return new TokenizeStringIterator(sctx, loc, argv);
168}
169
170#endif
171
172void populate_context_ft_module(static_context* sctx)
173{
174
175#ifndef ZORBA_NO_FULL_TEXT
176 {
177
178
179 DECL_WITH_KIND(sctx, full_text_current_lang,
180 (createQName("http://www.zorba-xquery.com/modules/full-text","","current-lang"),
181 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE),
182 FunctionConsts::FULL_TEXT_CURRENT_LANG_0);
183
184 }
185
186
187#endif
188
189
190#ifndef ZORBA_NO_FULL_TEXT
191 {
192
193
194 DECL_WITH_KIND(sctx, full_text_host_lang,
195 (createQName("http://www.zorba-xquery.com/modules/full-text","","host-lang"),
196 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE),
197 FunctionConsts::FULL_TEXT_HOST_LANG_0);
198
199 }
200
201
202#endif
203
204
205#ifndef ZORBA_NO_FULL_TEXT
206 {
207
208
209 DECL_WITH_KIND(sctx, full_text_is_stem_lang_supported,
210 (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stem-lang-supported"),
211 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
212 GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
213 FunctionConsts::FULL_TEXT_IS_STEM_LANG_SUPPORTED_1);
214
215 }
216
217
218#endif
219
220
221#ifndef ZORBA_NO_FULL_TEXT
222 {
223
224
225 DECL_WITH_KIND(sctx, full_text_is_stop_word,
226 (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stop-word"),
227 GENV_TYPESYSTEM.STRING_TYPE_ONE,
228 GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
229 FunctionConsts::FULL_TEXT_IS_STOP_WORD_1);
230
231 }
232
233
234#endif
235
236
237#ifndef ZORBA_NO_FULL_TEXT
238 {
239
240
241 DECL_WITH_KIND(sctx, full_text_is_stop_word,
242 (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stop-word"),
243 GENV_TYPESYSTEM.STRING_TYPE_ONE,
244 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
245 GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
246 FunctionConsts::FULL_TEXT_IS_STOP_WORD_2);
247
248 }
249
250
251#endif
252
253
254#ifndef ZORBA_NO_FULL_TEXT
255 {
256
257
258 DECL_WITH_KIND(sctx, full_text_is_stop_word_lang_supported,
259 (createQName("http://www.zorba-xquery.com/modules/full-text","","is-stop-word-lang-supported"),
260 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
261 GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
262 FunctionConsts::FULL_TEXT_IS_STOP_WORD_LANG_SUPPORTED_1);
263
264 }
265
266
267#endif
268
269
270#ifndef ZORBA_NO_FULL_TEXT
271 {
272
273
274 DECL_WITH_KIND(sctx, full_text_is_thesaurus_lang_supported,
275 (createQName("http://www.zorba-xquery.com/modules/full-text","","is-thesaurus-lang-supported"),
276 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
277 GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
278 FunctionConsts::FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_1);
279
280 }
281
282
283#endif
284
285
286#ifndef ZORBA_NO_FULL_TEXT
287 {
288
289
290 DECL_WITH_KIND(sctx, full_text_is_thesaurus_lang_supported,
291 (createQName("http://www.zorba-xquery.com/modules/full-text","","is-thesaurus-lang-supported"),
292 GENV_TYPESYSTEM.STRING_TYPE_ONE,
293 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
294 GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
295 FunctionConsts::FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_2);
296
297 }
298
299
300#endif
301
302
303#ifndef ZORBA_NO_FULL_TEXT
304 {
305
306
307 DECL_WITH_KIND(sctx, full_text_is_tokenizer_lang_supported,
308 (createQName("http://www.zorba-xquery.com/modules/full-text","","is-tokenizer-lang-supported"),
309 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
310 GENV_TYPESYSTEM.BOOLEAN_TYPE_ONE),
311 FunctionConsts::FULL_TEXT_IS_TOKENIZER_LANG_SUPPORTED_1);
312
313 }
314
315
316#endif
317
318
319#ifndef ZORBA_NO_FULL_TEXT
320 {
321
322
323 DECL_WITH_KIND(sctx, full_text_stem,
324 (createQName("http://www.zorba-xquery.com/modules/full-text","","stem"),
325 GENV_TYPESYSTEM.STRING_TYPE_ONE,
326 GENV_TYPESYSTEM.STRING_TYPE_ONE),
327 FunctionConsts::FULL_TEXT_STEM_1);
328
329 }
330
331
332#endif
333
334
335#ifndef ZORBA_NO_FULL_TEXT
336 {
337
338
339 DECL_WITH_KIND(sctx, full_text_stem,
340 (createQName("http://www.zorba-xquery.com/modules/full-text","","stem"),
341 GENV_TYPESYSTEM.STRING_TYPE_ONE,
342 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
343 GENV_TYPESYSTEM.STRING_TYPE_ONE),
344 FunctionConsts::FULL_TEXT_STEM_2);
345
346 }
347
348
349#endif
350
351
352#ifndef ZORBA_NO_FULL_TEXT
353 {
354
355
356 DECL_WITH_KIND(sctx, full_text_strip_diacritics,
357 (createQName("http://www.zorba-xquery.com/modules/full-text","","strip-diacritics"),
358 GENV_TYPESYSTEM.STRING_TYPE_ONE,
359 GENV_TYPESYSTEM.STRING_TYPE_ONE),
360 FunctionConsts::FULL_TEXT_STRIP_DIACRITICS_1);
361
362 }
363
364
365#endif
366
367
368#ifndef ZORBA_NO_FULL_TEXT
369 {
370
371
372 DECL_WITH_KIND(sctx, full_text_thesaurus_lookup,
373 (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"),
374 GENV_TYPESYSTEM.STRING_TYPE_ONE,
375 GENV_TYPESYSTEM.STRING_TYPE_PLUS),
376 FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_1);
377
378 }
379
380
381#endif
382
383
384#ifndef ZORBA_NO_FULL_TEXT
385 {
386
387
388 DECL_WITH_KIND(sctx, full_text_thesaurus_lookup,
389 (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"),
390 GENV_TYPESYSTEM.STRING_TYPE_ONE,
391 GENV_TYPESYSTEM.STRING_TYPE_ONE,
392 GENV_TYPESYSTEM.STRING_TYPE_PLUS),
393 FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_2);
394
395 }
396
397
398#endif
399
400
401#ifndef ZORBA_NO_FULL_TEXT
402 {
403
404
405 DECL_WITH_KIND(sctx, full_text_thesaurus_lookup,
406 (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"),
407 GENV_TYPESYSTEM.STRING_TYPE_ONE,
408 GENV_TYPESYSTEM.STRING_TYPE_ONE,
409 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
410 GENV_TYPESYSTEM.STRING_TYPE_PLUS),
411 FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_3);
412
413 }
414
415
416#endif
417
418
419#ifndef ZORBA_NO_FULL_TEXT
420 {
421
422
423 DECL_WITH_KIND(sctx, full_text_thesaurus_lookup,
424 (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"),
425 GENV_TYPESYSTEM.STRING_TYPE_ONE,
426 GENV_TYPESYSTEM.STRING_TYPE_ONE,
427 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
428 GENV_TYPESYSTEM.STRING_TYPE_ONE,
429 GENV_TYPESYSTEM.STRING_TYPE_PLUS),
430 FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_4);
431
432 }
433
434
435#endif
436
437
438#ifndef ZORBA_NO_FULL_TEXT
439 {
440
441
442 DECL_WITH_KIND(sctx, full_text_thesaurus_lookup,
443 (createQName("http://www.zorba-xquery.com/modules/full-text","","thesaurus-lookup"),
444 GENV_TYPESYSTEM.STRING_TYPE_ONE,
445 GENV_TYPESYSTEM.STRING_TYPE_ONE,
446 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
447 GENV_TYPESYSTEM.STRING_TYPE_ONE,
448 GENV_TYPESYSTEM.INTEGER_TYPE_ONE,
449 GENV_TYPESYSTEM.INTEGER_TYPE_ONE,
450 GENV_TYPESYSTEM.STRING_TYPE_PLUS),
451 FunctionConsts::FULL_TEXT_THESAURUS_LOOKUP_6);
452
453 }
454
455
456#endif
457
458
459#ifndef ZORBA_NO_FULL_TEXT
460 {
461
462
463 DECL_WITH_KIND(sctx, full_text_tokenize_string,
464 (createQName("http://www.zorba-xquery.com/modules/full-text","","tokenize-string"),
465 GENV_TYPESYSTEM.STRING_TYPE_ONE,
466 GENV_TYPESYSTEM.STRING_TYPE_STAR),
467 FunctionConsts::FULL_TEXT_TOKENIZE_STRING_1);
468
469 }
470
471
472#endif
473
474
475#ifndef ZORBA_NO_FULL_TEXT
476 {
477
478
479 DECL_WITH_KIND(sctx, full_text_tokenize_string,
480 (createQName("http://www.zorba-xquery.com/modules/full-text","","tokenize-string"),
481 GENV_TYPESYSTEM.STRING_TYPE_ONE,
482 GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
483 GENV_TYPESYSTEM.STRING_TYPE_STAR),
484 FunctionConsts::FULL_TEXT_TOKENIZE_STRING_2);
485
486 }
487
488
489#endif
490}
491
492
493}
494
495
496
0497
=== added file 'src/functions/pregenerated/func_ft_module.h'
--- src/functions/pregenerated/func_ft_module.h 1970-01-01 00:00:00 +0000
+++ src/functions/pregenerated/func_ft_module.h 2012-04-24 21:06:20 +0000
@@ -0,0 +1,259 @@
1/*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// ******************************************
18// * *
19// * THIS IS A GENERATED FILE. DO NOT EDIT! *
20// * SEE .xml FILE WITH SAME NAME *
21// * *
22// ******************************************
23
24
25#ifndef ZORBA_FUNCTIONS_FT_MODULE_H
26#define ZORBA_FUNCTIONS_FT_MODULE_H
27
28
29#include "common/shared_types.h"
30#include "functions/function_impl.h"
31
32
33namespace zorba {
34
35
36void populate_context_ft_module(static_context* sctx);
37
38
39#ifndef ZORBA_NO_FULL_TEXT
40
41//full-text:current-lang
42class full_text_current_lang : public function
43{
44public:
45 full_text_current_lang(const signature& sig, FunctionConsts::FunctionKind kind)
46 :
47 function(sig, kind)
48 {
49
50 }
51
52 CODEGEN_DECL();
53};
54#endif
55#ifndef ZORBA_NO_FULL_TEXT
56
57//full-text:host-lang
58class full_text_host_lang : public function
59{
60public:
61 full_text_host_lang(const signature& sig, FunctionConsts::FunctionKind kind)
62 :
63 function(sig, kind)
64 {
65
66 }
67
68 CODEGEN_DECL();
69};
70#endif
71#ifndef ZORBA_NO_FULL_TEXT
72
73//full-text:is-stem-lang-supported
74class full_text_is_stem_lang_supported : public function
75{
76public:
77 full_text_is_stem_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind)
78 :
79 function(sig, kind)
80 {
81
82 }
83
84 CODEGEN_DECL();
85};
86#endif
87#ifndef ZORBA_NO_FULL_TEXT
88
89//full-text:is-stop-word
90class full_text_is_stop_word : public function
91{
92public:
93 full_text_is_stop_word(const signature& sig, FunctionConsts::FunctionKind kind)
94 :
95 function(sig, kind)
96 {
97
98 }
99
100 CODEGEN_DECL();
101};
102#endif
103#ifndef ZORBA_NO_FULL_TEXT
104
105//full-text:is-stop-word-lang-supported
106class full_text_is_stop_word_lang_supported : public function
107{
108public:
109 full_text_is_stop_word_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind)
110 :
111 function(sig, kind)
112 {
113
114 }
115
116 CODEGEN_DECL();
117};
118#endif
119#ifndef ZORBA_NO_FULL_TEXT
120
121//full-text:is-thesaurus-lang-supported
122class full_text_is_thesaurus_lang_supported : public function
123{
124public:
125 full_text_is_thesaurus_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind)
126 :
127 function(sig, kind)
128 {
129
130 }
131
132 CODEGEN_DECL();
133};
134#endif
135#ifndef ZORBA_NO_FULL_TEXT
136
137//full-text:is-tokenizer-lang-supported
138class full_text_is_tokenizer_lang_supported : public function
139{
140public:
141 full_text_is_tokenizer_lang_supported(const signature& sig, FunctionConsts::FunctionKind kind)
142 :
143 function(sig, kind)
144 {
145
146 }
147
148 CODEGEN_DECL();
149};
150#endif
151#ifndef ZORBA_NO_FULL_TEXT
152
153//full-text:stem
154class full_text_stem : public function
155{
156public:
157 full_text_stem(const signature& sig, FunctionConsts::FunctionKind kind)
158 :
159 function(sig, kind)
160 {
161
162 }
163
164 CODEGEN_DECL();
165};
166#endif
167#ifndef ZORBA_NO_FULL_TEXT
168
169//full-text:strip-diacritics
170class full_text_strip_diacritics : public function
171{
172public:
173 full_text_strip_diacritics(const signature& sig, FunctionConsts::FunctionKind kind)
174 :
175 function(sig, kind)
176 {
177
178 }
179
180 CODEGEN_DECL();
181};
182#endif
183#ifndef ZORBA_NO_FULL_TEXT
184
185//full-text:thesaurus-lookup
186class full_text_thesaurus_lookup : public function
187{
188public:
189 full_text_thesaurus_lookup(const signature& sig, FunctionConsts::FunctionKind kind)
190 :
191 function(sig, kind)
192 {
193
194 }
195
196 CODEGEN_DECL();
197};
198#endif
199#ifndef ZORBA_NO_FULL_TEXT
200
201//full-text:tokenize
202class full_text_tokenize : public function
203{
204public:
205 full_text_tokenize(const signature& sig, FunctionConsts::FunctionKind kind)
206 :
207 function(sig, kind)
208 {
209
210 }
211
212 CODEGEN_DECL();
213};
214#endif
215#ifndef ZORBA_NO_FULL_TEXT
216
217//full-text:tokenizer-properties
218class full_text_tokenizer_properties : public function
219{
220public:
221 full_text_tokenizer_properties(const signature& sig, FunctionConsts::FunctionKind kind)
222 :
223 function(sig, kind)
224 {
225
226 }
227
228 bool accessesDynCtx() const { return true; }
229
230 CODEGEN_DECL();
231};
232#endif
233#ifndef ZORBA_NO_FULL_TEXT
234
235//full-text:tokenize-string
236class full_text_tokenize_string : public function
237{
238public:
239 full_text_tokenize_string(const signature& sig, FunctionConsts::FunctionKind kind)
240 :
241 function(sig, kind)
242 {
243
244 }
245
246 CODEGEN_DECL();
247};
248#endif
249
250
251} //namespace zorba
252
253
254#endif
255/*
256 * Local variables:
257 * mode: c++
258 * End:
259 */
0260
=== modified file 'src/functions/pregenerated/function_enum.h'
--- src/functions/pregenerated/function_enum.h 2012-04-24 12:39:38 +0000
+++ src/functions/pregenerated/function_enum.h 2012-04-24 21:06:20 +0000
@@ -138,6 +138,29 @@
138 FN_ZORBA_FETCH_CONTENT_2,138 FN_ZORBA_FETCH_CONTENT_2,
139 FN_ZORBA_FETCH_CONTENT_TYPE_1,139 FN_ZORBA_FETCH_CONTENT_TYPE_1,
140 FN_PUT_2,140 FN_PUT_2,
141 FULL_TEXT_CURRENT_LANG_0,
142 FULL_TEXT_HOST_LANG_0,
143 FULL_TEXT_IS_STEM_LANG_SUPPORTED_1,
144 FULL_TEXT_IS_STOP_WORD_1,
145 FULL_TEXT_IS_STOP_WORD_2,
146 FULL_TEXT_IS_STOP_WORD_LANG_SUPPORTED_1,
147 FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_1,
148 FULL_TEXT_IS_THESAURUS_LANG_SUPPORTED_2,
149 FULL_TEXT_IS_TOKENIZER_LANG_SUPPORTED_1,
150 FULL_TEXT_STEM_1,
151 FULL_TEXT_STEM_2,
152 FULL_TEXT_STRIP_DIACRITICS_1,
153 FULL_TEXT_THESAURUS_LOOKUP_1,
154 FULL_TEXT_THESAURUS_LOOKUP_2,
155 FULL_TEXT_THESAURUS_LOOKUP_3,
156 FULL_TEXT_THESAURUS_LOOKUP_4,
157 FULL_TEXT_THESAURUS_LOOKUP_6,
158 FULL_TEXT_TOKENIZE_1,
159 FULL_TEXT_TOKENIZE_2,
160 FULL_TEXT_TOKENIZER_PROPERTIES_0,
161 FULL_TEXT_TOKENIZER_PROPERTIES_1,
162 FULL_TEXT_TOKENIZE_STRING_1,
163 FULL_TEXT_TOKENIZE_STRING_2,
141 FN_FUNCTION_NAME_1,164 FN_FUNCTION_NAME_1,
142 FN_FUNCTION_ARITY_1,165 FN_FUNCTION_ARITY_1,
143 FN_PARTIAL_APPLY_2,166 FN_PARTIAL_APPLY_2,
144167
=== modified file 'src/runtime/full_text/CMakeLists.txt'
--- src/runtime/full_text/CMakeLists.txt 2012-04-24 12:39:38 +0000
+++ src/runtime/full_text/CMakeLists.txt 2012-04-24 21:06:20 +0000
@@ -13,6 +13,7 @@
13# limitations under the License.13# limitations under the License.
1414
15SET(FULLTEXT_SRCS15SET(FULLTEXT_SRCS
16 ft_util.cpp
16 ft_match.cpp17 ft_match.cpp
17 ft_query_item.cpp18 ft_query_item.cpp
18 ft_single_token_iterator.cpp19 ft_single_token_iterator.cpp
@@ -40,6 +41,7 @@
40 thesaurus.cpp41 thesaurus.cpp
41 tokenizer.cpp42 tokenizer.cpp
42 default_tokenizer.cpp43 default_tokenizer.cpp
44 ft_module.cpp
43 )45 )
4446
45IF (ZORBA_NO_ICU)47IF (ZORBA_NO_ICU)
@@ -51,5 +53,5 @@
51ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS)53ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS)
5254
53IF (ZORBA_WITH_FILE_ACCESS)55IF (ZORBA_WITH_FILE_ACCESS)
54 ADD_SRC_SUBFOLDER(FULLTEXT_SRCS thesauri THESAURUS_SRCS)56 ADD_SRC_SUBFOLDER(FULLTEXT_SRCS thesauri THESAURUS_SRCS)
55ENDIF (ZORBA_WITH_FILE_ACCESS)57ENDIF (ZORBA_WITH_FILE_ACCESS)
5658
=== modified file 'src/runtime/full_text/apply.cpp'
--- src/runtime/full_text/apply.cpp 2012-04-24 12:39:38 +0000
+++ src/runtime/full_text/apply.cpp 2012-04-24 21:06:20 +0000
@@ -26,13 +26,14 @@
26#include "diagnostics/dict.h"26#include "diagnostics/dict.h"
27#include "diagnostics/xquery_diagnostics.h"27#include "diagnostics/xquery_diagnostics.h"
28#include "store/api/item.h"28#include "store/api/item.h"
29#include "store/api/item_factory.h"
29#include "store/api/store.h"30#include "store/api/store.h"
30#include "store/api/item_factory.h"
31#include "system/globalenv.h"31#include "system/globalenv.h"
32#include "util/cxx_util.h"32#include "util/cxx_util.h"
33#include "util/indent.h"33#include "util/indent.h"
34#include "util/stl_util.h"34#include "util/stl_util.h"
35#include "zorbamisc/ns_consts.h"35#include "zorbamisc/ns_consts.h"
36#include "zorbautils/locale.h"
3637
37#ifndef NDEBUG38#ifndef NDEBUG
38# include "system/properties.h"39# include "system/properties.h"
@@ -1184,11 +1185,10 @@
1184 {1185 {
1185 }1186 }
11861187
1187 void operator()( char const *utf8_s, size_type utf8_len, size_type,1188 // inherited
1188 size_type, size_type, void* ) {1189 void item( Item const&, bool );
1189 FTToken const t( utf8_s, (int)utf8_len, token_no_, lang_ );1190 void token( char const*, size_type, iso639_1::type, size_type, size_type,
1190 tokens_.push_back( t );1191 size_type, Item const* );
1191 }
11921192
1193private:1193private:
1194 FTTokenSeqIterator::FTTokens &tokens_;1194 FTTokenSeqIterator::FTTokens &tokens_;
@@ -1196,51 +1196,72 @@
1196 iso639_1::type const lang_;1196 iso639_1::type const lang_;
1197};1197};
11981198
1199void thesaurus_callback::item( Item const&, bool ) {
1200 // out-of-line since it's virtual
1201}
1202
1203void thesaurus_callback::token( char const *utf8_s, size_type utf8_len,
1204 iso639_1::type, size_type, size_type,
1205 size_type, Item const* ) {
1206 FTToken const t( utf8_s, (int)utf8_len, token_no_, lang_ );
1207 tokens_.push_back( t );
1208}
1209
1199} // anonymous namespace1210} // anonymous namespace
12001211
1201void ftcontains_visitor::1212void ftcontains_visitor::
1202lookup_thesaurus( ftthesaurus_id const &tid, zstring const &query_phrase,1213lookup_thesaurus( ftthesaurus_id const &t_id, zstring const &query_phrase,
1203 FTToken const &qt0, query_item_star_t &result ) {1214 FTToken const &qt0, query_item_star_t &result ) {
1204 ft_int at_least, at_most;1215 ft_int at_least, at_most;
1205 if ( ftrange const *const levels = tid.get_levels() )1216 if ( ftrange const *const levels = t_id.get_levels() )
1206 eval_ftrange( *levels, &at_least, &at_most );1217 eval_ftrange( *levels, &at_least, &at_most );
1207 else1218 else
1208 at_least = 0, at_most = numeric_limits<ft_int>::max();1219 at_least = 0, at_most = numeric_limits<ft_int>::max();
12091220
1210 zstring const &uri = tid.get_uri();1221 zstring const &uri = t_id.get_uri();
12111222
1212 zstring error_msg;1223 zstring error_msg;
1213 auto_ptr<internal::Resource> rsrc = static_ctx_.resolve_uri(1224 auto_ptr<internal::Resource> rsrc = static_ctx_.resolve_uri(
1214 uri, internal::ThesaurusEntityData( qt0.lang() ), error_msg1225 uri, internal::EntityData::THESAURUS, error_msg
1215 );1226 );
1216 if ( !rsrc.get() )1227 if ( !rsrc.get() )
1217 throw XQUERY_EXCEPTION( err::FTST0018, ERROR_PARAMS( uri ) );1228 throw XQUERY_EXCEPTION( err::FTST0018, ERROR_PARAMS( uri ) );
12181229
1219 internal::Thesaurus::ptr thesaurus(1230 internal::ThesaurusProvider const *const t_provider =
1220 dynamic_cast<internal::Thesaurus*>( rsrc.release() )1231 dynamic_cast<internal::ThesaurusProvider const*>( rsrc.get() );
1221 );1232 ZORBA_ASSERT( t_provider );
1222 if ( !thesaurus )1233
1223 throw XQUERY_EXCEPTION( err::FTST0018, ERROR_PARAMS( uri ) );1234 internal::Thesaurus::ptr thesaurus;
12241235 if ( !t_provider->getThesaurus( qt0.lang(), &thesaurus ) )
1225 internal::Thesaurus::iterator::ptr tresult(1236 throw XQUERY_EXCEPTION(
1237 zerr::ZXQP8406_THESAURUS_LANG_NOT_SUPPORTED,
1238 ERROR_PARAMS( iso639_1::string_of[ qt0.lang() ] )
1239 );
1240
1241 internal::Thesaurus::iterator::ptr t_synonyms(
1226 thesaurus->lookup(1242 thesaurus->lookup(
1227 query_phrase, tid.get_relationship(), at_least, at_most1243 query_phrase, t_id.get_relationship(), at_least, at_most
1228 )1244 )
1229 );1245 );
1230 if ( !tresult )1246 if ( !t_synonyms )
1231 return;1247 return;
12321248
1233 FTTokenSeqIterator::FTTokens synonyms;1249 FTTokenSeqIterator::FTTokens synonyms;
1234 thesaurus_callback cb( qt0.pos(), qt0.lang(), synonyms );1250 thesaurus_callback cb( qt0.pos(), qt0.lang(), synonyms );
12351251
1236 Tokenizer::Numbers tno;1252 Tokenizer::Numbers t_num;
1237 Tokenizer::ptr tokenizer(1253 TokenizerProvider const *const provider = GENV_STORE.getTokenizerProvider();
1238 GENV_STORE.getTokenizerProvider()->getTokenizer( qt0.lang(), tno )1254 ZORBA_ASSERT( provider );
1239 );1255 Tokenizer::ptr tokenizer;
1256 if ( !provider->getTokenizer( qt0.lang(), &t_num, &tokenizer ) )
1257 throw XQUERY_EXCEPTION(
1258 zerr::ZXQP8407_TOKENIZER_LANG_NOT_SUPPORTED,
1259 ERROR_PARAMS( iso639_1::string_of[ qt0.lang() ] )
1260 );
12401261
1241 for ( zstring synonym; tresult->next( &synonym ); ) {1262 for ( zstring synonym; t_synonyms->next( &synonym ); ) {
1242 synonyms.clear();1263 synonyms.clear();
1243 tokenizer->tokenize(1264 tokenizer->tokenize_string(
1244 synonym.data(), synonym.size(), qt0.lang(), false, cb1265 synonym.data(), synonym.size(), qt0.lang(), false, cb
1245 );1266 );
1246 query_item_t const query_item( new FTTokenSeqIterator( synonyms ) );1267 query_item_t const query_item( new FTTokenSeqIterator( synonyms ) );
12471268
=== added file 'src/runtime/full_text/ft_module_impl.cpp'
--- src/runtime/full_text/ft_module_impl.cpp 1970-01-01 00:00:00 +0000
+++ src/runtime/full_text/ft_module_impl.cpp 2012-04-24 21:06:20 +0000
@@ -0,0 +1,843 @@
1/*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <zorba/config.h>
18
19#ifndef ZORBA_NO_FULL_TEXT
20
21# include <limits>
22# include <typeinfo>
23
24# include <zorba/diagnostic_list.h>
25
26# include "api/unmarshaller.h"
27# include "context/namespace_context.h"
28# include "context/static_context.h"
29# include "diagnostics/assert.h"
30# include "diagnostics/xquery_diagnostics.h"
31# include "store/api/index.h"
32# include "store/api/item.h"
33# include "store/api/item_factory.h"
34# include "store/api/iterator.h"
35# include "store/api/store.h"
36# include "system/globalenv.h"
37# include "types/casting.h"
38# include "types/typeimpl.h"
39# include "types/typeops.h"
40# include "util/utf8_util.h"
41# include "zorbatypes/URI.h"
42# include "zorbautils/locale.h"
43
44# include "ft_stop_words_set.h"
45# include "ft_token_seq_iterator.h"
46# include "ft_util.h"
47# include "thesaurus.h"
48
49#endif /* ZORBA_NO_FULL_TEXT */
50
51#include "runtime/full_text/ft_module.h"
52
53using namespace std;
54using namespace zorba::locale;
55
56namespace zorba {
57
58///////////////////////////////////////////////////////////////////////////////
59
60#ifndef ZORBA_NO_FULL_TEXT
61inline iso639_1::type get_lang_from( static_context const *sctx ) {
62 iso639_1::type const lang = get_lang_from( sctx->get_match_options() );
63 return lang ? lang : get_host_lang();
64}
65
66static iso639_1::type get_lang_from( store::Item_t lang_item,
67 QueryLoc const &loc ) {
68 zstring lang_string;
69 lang_item->getStringValue2( lang_string );
70
71 if ( !GenericCast::instance()->castableToLanguage( lang_string ) )
72 throw XQUERY_EXCEPTION(
73 err::XPTY0004,
74 ERROR_PARAMS(
75 ZED( BadType_23o ), lang_string, ZED( NoCastTo_45o ), "xs:language"
76 ),
77 ERROR_LOC( loc )
78 );
79 if ( iso639_1::type const lang = find_lang( lang_string.c_str() ) )
80 return lang;
81 throw XQUERY_EXCEPTION(
82 err::FTST0009, ERROR_PARAMS( lang_string ), ERROR_LOC( loc )
83 );
84}
85#endif /* ZORBA_NO_FULL_TEXT */
86
87///////////////////////////////////////////////////////////////////////////////
88
89bool CurrentLangIterator::nextImpl( store::Item_t &result,
90 PlanState &plan_state ) const {
91#ifdef ZORBA_NO_FULL_TEXT
92 return false;
93#else
94 iso639_1::type const lang = get_lang_from( getStaticContext() );
95 zstring lang_string( iso639_1::string_of[ lang ] );
96
97 PlanIteratorState *state;
98 DEFAULT_STACK_INIT( PlanIteratorState, state, plan_state );
99
100 GENV_ITEMFACTORY->createLanguage( result, lang_string );
101 STACK_PUSH( true, state );
102
103 STACK_END( state );
104#endif /* ZORBA_NO_FULL_TEXT */
105}
106
107///////////////////////////////////////////////////////////////////////////////
108
109bool HostLangIterator::nextImpl( store::Item_t &result,
110 PlanState &plan_state ) const {
111#ifdef ZORBA_NO_FULL_TEXT
112 return false;
113#else
114 iso639_1::type const lang = get_host_lang();
115 zstring lang_string = iso639_1::string_of[ lang ];
116
117 PlanIteratorState *state;
118 DEFAULT_STACK_INIT( PlanIteratorState, state, plan_state );
119
120 GENV_ITEMFACTORY->createLanguage( result, lang_string );
121 STACK_PUSH( true, state );
122
123 STACK_END( state );
124#endif /* ZORBA_NO_FULL_TEXT */
125}
126
127///////////////////////////////////////////////////////////////////////////////
128
129bool IsStemLangSupportedIterator::nextImpl( store::Item_t &result,
130 PlanState &plan_state ) const {
131#ifdef ZORBA_NO_FULL_TEXT
132 return false;
133#else
134 bool is_supported;
135 store::Item_t item;
136
137 PlanIteratorState *state;
138 DEFAULT_STACK_INIT( PlanIteratorState, state, plan_state );
139
140 consumeNext( item, theChildren[0], plan_state );
141 try {
142 internal::StemmerProvider const *const provider =
143 GENV_STORE.getStemmerProvider();
144 is_supported = provider->getStemmer( get_lang_from( item, loc ) );
145 }
146 catch ( XQueryException const &e ) {
147 if ( e.diagnostic() != err::FTST0009 )
148 throw;
149 is_supported = false;
150 }
151
152 GENV_ITEMFACTORY->createBoolean( result, is_supported );
153 STACK_PUSH( true, state );
154
155 STACK_END( state );
156#endif /* ZORBA_NO_FULL_TEXT */
157}
158
159///////////////////////////////////////////////////////////////////////////////
160
161bool IsStopWordIterator::nextImpl( store::Item_t &result,
162 PlanState &plan_state ) const {
163#ifdef ZORBA_NO_FULL_TEXT
164 return false;
165#else
166 store::Item_t item;
167 iso639_1::type lang;
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches