Merge lp:~zorba-coders/zorba/update3.0_data-cleaning-module into lp:zorba/data-cleaning-module

Proposed by Juan Zacarias
Status: Merged
Merged at revision: 48
Proposed branch: lp:~zorba-coders/zorba/update3.0_data-cleaning-module
Merge into: lp:zorba/data-cleaning-module
Diff against target: 2691 lines (+636/-710)
81 files modified
src/CMakeLists.txt (+8/-8)
src/character-based-string-similarity.xq (+42/-49)
src/com/CMakeLists.txt (+0/-17)
src/com/zorba-xquery/CMakeLists.txt (+0/-17)
src/com/zorba-xquery/www/CMakeLists.txt (+0/-17)
src/com/zorba-xquery/www/modules/CMakeLists.txt (+0/-17)
src/consolidation.xq (+212/-212)
src/conversion.xq (+36/-36)
src/hybrid-string-similarity.xq (+70/-70)
src/normalization.xq (+41/-40)
src/phonetic-string-similarity.xq (+32/-32)
src/set-similarity.xq (+49/-49)
src/token-based-string-similarity.xq (+78/-78)
test/Queries/data-cleaning/character-based-string-similarity/edit-distance.xq (+1/-1)
test/Queries/data-cleaning/character-based-string-similarity/jaro-winkler.xq (+1/-1)
test/Queries/data-cleaning/character-based-string-similarity/jaro.xq (+1/-1)
test/Queries/data-cleaning/character-based-string-similarity/needleman-wunsch.xq (+1/-1)
test/Queries/data-cleaning/consolidation/least-attributes.xq (+1/-1)
test/Queries/data-cleaning/consolidation/least-distinct-attributes.xq (+1/-1)
test/Queries/data-cleaning/consolidation/least-distinct-elements.xq (+1/-1)
test/Queries/data-cleaning/consolidation/least-distinct-nodes.xq (+1/-1)
test/Queries/data-cleaning/consolidation/least-elements.xq (+1/-1)
test/Queries/data-cleaning/consolidation/least-nodes.xq (+1/-1)
test/Queries/data-cleaning/consolidation/least-similar-edit-distance.xq (+1/-1)
test/Queries/data-cleaning/consolidation/least-tokens.xq (+1/-1)
test/Queries/data-cleaning/consolidation/leastfrequent_1.xq (+1/-1)
test/Queries/data-cleaning/consolidation/longest_1.xq (+1/-1)
test/Queries/data-cleaning/consolidation/matching_1.xq (+1/-1)
test/Queries/data-cleaning/consolidation/most-attributes.xq (+1/-1)
test/Queries/data-cleaning/consolidation/most-distinct-attributes.xq (+1/-1)
test/Queries/data-cleaning/consolidation/most-distinct-elements.xq (+1/-1)
test/Queries/data-cleaning/consolidation/most-distinct-nodes.xq (+1/-1)
test/Queries/data-cleaning/consolidation/most-elements.xq (+1/-1)
test/Queries/data-cleaning/consolidation/most-frequent.xq (+1/-1)
test/Queries/data-cleaning/consolidation/most-nodes.xq (+1/-1)
test/Queries/data-cleaning/consolidation/most-similar-edit-distance.xq (+1/-1)
test/Queries/data-cleaning/consolidation/most-tokens.xq (+1/-1)
test/Queries/data-cleaning/consolidation/shortest_1.xq (+1/-1)
test/Queries/data-cleaning/consolidation/superstring_1.xq (+1/-1)
test/Queries/data-cleaning/conversion/address-from-geocode.xq (+1/-1)
test/Queries/data-cleaning/conversion/address-from-phone.xq (+1/-1)
test/Queries/data-cleaning/conversion/address-from-user.xq (+1/-1)
test/Queries/data-cleaning/conversion/currency-convert.xq (+1/-1)
test/Queries/data-cleaning/conversion/geocode-from-address.xq (+1/-1)
test/Queries/data-cleaning/conversion/phone-from-address.xq (+1/-1)
test/Queries/data-cleaning/conversion/phone-from-user.xq (+1/-1)
test/Queries/data-cleaning/conversion/unit-convert.xq (+1/-1)
test/Queries/data-cleaning/conversion/user-from-address.xq (+1/-1)
test/Queries/data-cleaning/conversion/user-from-phone.xq (+1/-1)
test/Queries/data-cleaning/hybrid-string-similarity/monge-elkan-jaro-winkler.xq (+1/-1)
test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-edit-distance.xq (+1/-1)
test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro-winkler.xq (+1/-1)
test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro.xq (+1/-1)
test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-metaphone.xq (+1/-1)
test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-soundex.xq (+1/-1)
test/Queries/data-cleaning/normalization/normalize-address.xq (+1/-1)
test/Queries/data-cleaning/normalization/to-date.xq (+1/-1)
test/Queries/data-cleaning/normalization/to-dateTime.spec (+1/-1)
test/Queries/data-cleaning/normalization/to-dateTime.xq (+1/-1)
test/Queries/data-cleaning/normalization/to-time.spec (+1/-1)
test/Queries/data-cleaning/normalization/to-time.xq (+1/-1)
test/Queries/data-cleaning/phonetic-string-similarity/metaphone-key.xq (+1/-1)
test/Queries/data-cleaning/phonetic-string-similarity/metaphone.xq (+1/-1)
test/Queries/data-cleaning/phonetic-string-similarity/soundex-key.xq (+1/-1)
test/Queries/data-cleaning/phonetic-string-similarity/soundex.xq (+1/-1)
test/Queries/data-cleaning/set-similarity/deep-intersect.xq (+1/-1)
test/Queries/data-cleaning/set-similarity/deep-union.xq (+1/-1)
test/Queries/data-cleaning/set-similarity/dice.xq (+1/-1)
test/Queries/data-cleaning/set-similarity/distinct.xq (+1/-1)
test/Queries/data-cleaning/set-similarity/jaccard.xq (+1/-1)
test/Queries/data-cleaning/set-similarity/overlap.xq (+1/-1)
test/Queries/data-cleaning/token-based-string-similarity/cosine-ngrams.xq (+1/-1)
test/Queries/data-cleaning/token-based-string-similarity/cosine-tokens.xq (+1/-1)
test/Queries/data-cleaning/token-based-string-similarity/cosine.xq (+1/-1)
test/Queries/data-cleaning/token-based-string-similarity/dice-ngrams.xq (+1/-1)
test/Queries/data-cleaning/token-based-string-similarity/dice-tokens.xq (+1/-1)
test/Queries/data-cleaning/token-based-string-similarity/jaccard-ngrams.xq (+1/-1)
test/Queries/data-cleaning/token-based-string-similarity/jaccard-tokens.xq (+1/-1)
test/Queries/data-cleaning/token-based-string-similarity/ngrams.xq (+1/-1)
test/Queries/data-cleaning/token-based-string-similarity/overlap-ngrams.xq (+1/-1)
test/Queries/data-cleaning/token-based-string-similarity/overlap-tokens.xq (+1/-1)
To merge this branch: bzr merge lp:~zorba-coders/zorba/update3.0_data-cleaning-module
Reviewer Review Type Date Requested Status
Bruno Martins Approve
Juan Zacarias Approve
Chris Hillery Approve
Review via email: mp+170213@code.launchpad.net

Commit message

Changes for Update 3.0

Description of the change

Changes for Update 3.0

To post a comment you must log in.
Revision history for this message
Chris Hillery (ceejatec) wrote :

1. Need to put <p/> tags between doc paragraphs, including after leading short description. (Remove <br/> tags.)

2. The error in currency-convert() should be in the module's own namespace/prefix, with ALL-CAPS name.

3. Based on the test cases, some other functions also raise errors, but those are not documented.

review: Needs Fixing
48. By Juan Zacarias

Reverted update 3.0 changes.

49. By Juan Zacarias

Updated module to 3.0, updated documentation and changed module's namespace.

50. By Juan Zacarias

Added error to documentation.

51. By Juan Zacarias

Changed error name to caps

Revision history for this message
Juan Zacarias (juan457) wrote :

Changes done

Revision history for this message
Chris Hillery (ceejatec) :
review: Approve
Revision history for this message
Juan Zacarias (juan457) :
review: Approve
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

Validation queue result for https://code.launchpad.net/~zorba-coders/zorba/update3.0_data-cleaning-module/+merge/170213

Stage "TestZorbaUbuntu" failed.
2 tests failed (8345 total tests run).

Check test results at http://jenkins.lambda.nu/job/TestZorbaUbuntu/59/testReport/ to view the results.

52. By Chris Hillery

Fixed <p> typos.

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

Validation queue succeeded - proposal merged!

Revision history for this message
Bruno Martins (bgmartins) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== removed file 'src/CMakeLists.txt'
2--- src/CMakeLists.txt 2011-07-26 10:43:16 +0000
3+++ src/CMakeLists.txt 1970-01-01 00:00:00 +0000
4@@ -1,20 +0,0 @@
5-# Copyright 2006-2008 The FLWOR Foundation.
6-#
7-# Licensed under the Apache License, Version 2.0 (the "License");
8-# you may not use this file except in compliance with the License.
9-# You may obtain a copy of the License at
10-#
11-# http://www.apache.org/licenses/LICENSE-2.0
12-#
13-# Unless required by applicable law or agreed to in writing, software
14-# distributed under the License is distributed on an "AS IS" BASIS,
15-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16-# See the License for the specific language governing permissions and
17-# limitations under the License.
18-
19-# all external module libraries are generated in the directory
20-# of the corresponding .xq file
21-MESSAGE(STATUS "Add com")
22-ADD_SUBDIRECTORY(com)
23-
24-MESSAGE(STATUS "End modules")
25
26=== renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/CMakeLists.txt' => 'src/CMakeLists.txt'
27--- src/com/zorba-xquery/www/modules/data-cleaning/CMakeLists.txt 2011-08-07 20:36:50 +0000
28+++ src/CMakeLists.txt 2013-07-10 00:51:26 +0000
29@@ -12,29 +12,29 @@
30 # See the License for the specific language governing permissions and
31 # limitations under the License.
32
33-DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity"
34+DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/character-based-string-similarity"
35 VERSION 2.0 FILE "character-based-string-similarity.xq")
36
37-DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"
38+DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/consolidation"
39 VERSION 2.0 FILE "consolidation.xq")
40
41 DECLARE_ZORBA_SCHEMA( FILE whitepages_schema.xsd
42 URI "http://api.whitepages.com/schema/")
43
44-DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/conversion"
45+DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/conversion"
46 VERSION 2.0 FILE "conversion.xq")
47
48-DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity"
49+DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/hybrid-string-similarity"
50 VERSION 2.0 FILE "hybrid-string-similarity.xq")
51
52-DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/normalization"
53+DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/normalization"
54 VERSION 2.0 FILE "normalization.xq")
55
56-DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity"
57+DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/phonetic-string-similarity"
58 VERSION 2.0 FILE "phonetic-string-similarity.xq")
59
60-DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"
61+DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/set-similarity"
62 VERSION 2.0 FILE "set-similarity.xq")
63
64-DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"
65+DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/token-based-string-similarity"
66 VERSION 2.0 FILE "token-based-string-similarity.xq")
67
68=== renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/character-based-string-similarity.xq' => 'src/character-based-string-similarity.xq'
69--- src/com/zorba-xquery/www/modules/data-cleaning/character-based-string-similarity.xq 2012-09-28 13:34:20 +0000
70+++ src/character-based-string-similarity.xq 2013-07-10 00:51:26 +0000
71@@ -2,7 +2,7 @@
72
73 (:
74 : Copyright 2006-2009 The FLWOR Foundation.
75- :
76+ :
77 : Licensed under the Apache License, Version 2.0 (the "License");
78 : you may not use this file except in compliance with the License.
79 : You may obtain a copy of the License at
80@@ -17,35 +17,32 @@
81 :)
82
83 (:~
84- : This library module provides character-based string similarity functions
85+ : <p>This library module provides character-based string similarity functions
86 : that view strings as sequences of characters, generally computing a similarity score
87 : that corresponds to the cost of transforming one string into another.
88- :
89 : These functions are particularly useful for matching near duplicate strings
90- : in the presence of typographical errors.
91- :
92- : The logic contained in this module is not specific to any particular XQuery implementation.
93+ : in the presence of typographical errors. </p>
94+ : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p>
95 :
96 : @author Bruno Martins and Diogo Simões
97 : @project Zorba/Data Cleaning/Character-Based String Similarity
98 :)
99
100-module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity";
101+module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity";
102
103 declare namespace ver = "http://www.zorba-xquery.com/options/versioning";
104 declare option ver:module-version "2.0";
105
106 (:~
107- : Returns the edit distance between two strings.
108- :
109- : This distance, also refered to as the Levenshtein distance, is defined as the minimum number
110+ : <p>Returns the edit distance between two strings.</p>
111+ : <p/>
112+ : <p>This distance, also refered to as the Levenshtein distance, is defined as the minimum number
113 : of edits needed to transform one string into the other, with the allowable edit operations
114- : being insertion, deletion, or substitution of a single character.
115- :
116- : <br/>
117- : Example usage : <code>edit-distance("FLWOR", "FLOWER")</code>
118- : <br/>
119- : The function invocation in the example above returns : <code>2</code>
120+ : being insertion, deletion, or substitution of a single character.</p>
121+ : <p/>
122+ : <p>Example usage : <code>edit-distance("FLWOR", "FLOWER")</code></p>
123+ : <p/>
124+ : <p>The function invocation in the example above returns : <code>2</code></p>
125 :
126 : @param $s1 The first string.
127 : @param $s2 The second string.
128@@ -63,17 +60,16 @@
129 };
130
131 (:~
132- : Returns the Jaro similarity coefficient between two strings.
133- :
134- : This similarity coefficient is based on the number of transposed characters and on a
135+ : <p>Returns the Jaro similarity coefficient between two strings.</p>
136+ : <p/>
137+ : <p>This similarity coefficient is based on the number of transposed characters and on a
138 : weighted sum of the percentage of matched characters held within the strings. The higher
139 : the Jaro-Winkler value is, the more similar the strings are. The coefficient is
140- : normalized such that 0 equates to no similarity and 1 is an exact match.
141- :
142- : <br/>
143- : Example usage : <code>jaro("FLWOR Found.", "FLWOR Foundation")</code>
144- : <br/>
145- : The function invocation in the example above returns : <code>0.5853174603174603</code>
146+ : normalized such that 0 equates to no similarity and 1 is an exact match.</p>
147+ : <p/>
148+ : <p>Example usage : <code>jaro("FLWOR Found.", "FLWOR Foundation")</code></p>
149+ : <p/>
150+ : <p>The function invocation in the example above returns : <code>0.5853174603174603</code></p>
151 :
152 : @param $s1 The first string.
153 : @param $s2 The second string.
154@@ -97,15 +93,14 @@
155 };
156
157 (:~
158- : Returns the Jaro-Winkler similarity coefficient between two strings.
159- :
160- : This similarity coefficient corresponds to an extension of the Jaro similarity coefficient that weights or
161- : penalizes strings based on their similarity at the beginning of the string, up to a given prefix size.
162- :
163- : <br/>
164- : Example usage : <code>jaro-winkler("DWAYNE", "DUANE", 4, 0.1 )</code>
165- : <br/>
166- : The function invocation in the example above returns : <code>0.8577777777777778</code>
167+ : <p>Returns the Jaro-Winkler similarity coefficient between two strings.</p>
168+ : <p/>
169+ : <p>This similarity coefficient corresponds to an extension of the Jaro similarity coefficient that weights or
170+ : penalizes strings based on their similarity at the beginning of the string, up to a given prefix size.</p>
171+ : <p/>
172+ : <p>Example usage : <code>jaro-winkler("DWAYNE", "DUANE", 4, 0.1 )</code></p>
173+ : <p/>
174+ : <p>The function invocation in the example above returns : <code>0.8577777777777778</code></p>
175 :
176 : @param $s1 The first string.
177 : @param $s2 The second string.
178@@ -122,16 +117,15 @@
179 };
180
181 (:~
182- : Returns the Needleman-Wunsch distance between two strings.
183- :
184- : The Needleman-Wunsch distance is similar to the basic edit distance metric, adding a
185+ : <p>Returns the Needleman-Wunsch distance between two strings.</p>
186+ : <p/>
187+ : <p>The Needleman-Wunsch distance is similar to the basic edit distance metric, adding a
188 : variable cost adjustment to the cost of a gap (i.e., an insertion or deletion) in the
189- : distance metric.
190- :
191- : <br/>
192- : Example usage : <code>needleman-wunsch("KAK", "KQRK", 1, 1)</code>
193- : <br/>
194- : The function invocation in the example above returns : <code>0</code>
195+ : distance metric.</p>
196+ : <p/>
197+ : <p>Example usage : <code>needleman-wunsch("KAK", "KQRK", 1, 1)</code></p>
198+ : <p/>
199+ : <p>The function invocation in the example above returns : <code>0</code></p>
200 :
201 : @param $s1 The first string.
202 : @param $s2 The second string.
203@@ -152,12 +146,11 @@
204 };
205
206 (:~
207- : Returns the Smith-Waterman distance between two strings.
208- :
209- : <br/>
210- : Example usage : <code>smith-waterman("ACACACTA", "AGCACACA", 2, 1)</code>
211- : <br/>
212- : The function invocation in the example above returns : <code>12</code>
213+ : <p>Returns the Smith-Waterman distance between two strings.</p>
214+ : <p/>
215+ : <p>Example usage : <code>smith-waterman("ACACACTA", "AGCACACA", 2, 1)</code></p>
216+ : <p/>
217+ : <p>The function invocation in the example above returns : <code>12</code></p>
218 :
219 : @param $s1 The first string.
220 : @param $s2 The second string.
221
222=== removed directory 'src/com'
223=== removed file 'src/com/CMakeLists.txt'
224--- src/com/CMakeLists.txt 2011-10-06 08:17:41 +0000
225+++ src/com/CMakeLists.txt 1970-01-01 00:00:00 +0000
226@@ -1,17 +0,0 @@
227-# Copyright 2006-2008 The FLWOR Foundation.
228-#
229-# Licensed under the Apache License, Version 2.0 (the "License");
230-# you may not use this file except in compliance with the License.
231-# You may obtain a copy of the License at
232-#
233-# http://www.apache.org/licenses/LICENSE-2.0
234-#
235-# Unless required by applicable law or agreed to in writing, software
236-# distributed under the License is distributed on an "AS IS" BASIS,
237-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
238-# See the License for the specific language governing permissions and
239-# limitations under the License.
240-
241-# all external module libraries are generated in the directory
242-# of the corresponding .xq file
243-ADD_SUBDIRECTORY(zorba-xquery)
244
245=== removed directory 'src/com/zorba-xquery'
246=== removed file 'src/com/zorba-xquery/CMakeLists.txt'
247--- src/com/zorba-xquery/CMakeLists.txt 2011-10-06 08:17:41 +0000
248+++ src/com/zorba-xquery/CMakeLists.txt 1970-01-01 00:00:00 +0000
249@@ -1,17 +0,0 @@
250-# Copyright 2006-2008 The FLWOR Foundation.
251-#
252-# Licensed under the Apache License, Version 2.0 (the "License");
253-# you may not use this file except in compliance with the License.
254-# You may obtain a copy of the License at
255-#
256-# http://www.apache.org/licenses/LICENSE-2.0
257-#
258-# Unless required by applicable law or agreed to in writing, software
259-# distributed under the License is distributed on an "AS IS" BASIS,
260-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
261-# See the License for the specific language governing permissions and
262-# limitations under the License.
263-
264-# all external module libraries are generated in the directory
265-# of the corresponding .xq file
266-ADD_SUBDIRECTORY(www)
267
268=== removed directory 'src/com/zorba-xquery/www'
269=== removed file 'src/com/zorba-xquery/www/CMakeLists.txt'
270--- src/com/zorba-xquery/www/CMakeLists.txt 2011-10-06 08:17:41 +0000
271+++ src/com/zorba-xquery/www/CMakeLists.txt 1970-01-01 00:00:00 +0000
272@@ -1,17 +0,0 @@
273-# Copyright 2006-2008 The FLWOR Foundation.
274-#
275-# Licensed under the Apache License, Version 2.0 (the "License");
276-# you may not use this file except in compliance with the License.
277-# You may obtain a copy of the License at
278-#
279-# http://www.apache.org/licenses/LICENSE-2.0
280-#
281-# Unless required by applicable law or agreed to in writing, software
282-# distributed under the License is distributed on an "AS IS" BASIS,
283-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
284-# See the License for the specific language governing permissions and
285-# limitations under the License.
286-
287-# all external module libraries are generated in the directory
288-# of the corresponding .xq file
289-ADD_SUBDIRECTORY(modules)
290
291=== removed directory 'src/com/zorba-xquery/www/modules'
292=== removed file 'src/com/zorba-xquery/www/modules/CMakeLists.txt'
293--- src/com/zorba-xquery/www/modules/CMakeLists.txt 2011-10-06 08:17:41 +0000
294+++ src/com/zorba-xquery/www/modules/CMakeLists.txt 1970-01-01 00:00:00 +0000
295@@ -1,17 +0,0 @@
296-# Copyright 2006-2008 The FLWOR Foundation.
297-#
298-# Licensed under the Apache License, Version 2.0 (the "License");
299-# you may not use this file except in compliance with the License.
300-# You may obtain a copy of the License at
301-#
302-# http://www.apache.org/licenses/LICENSE-2.0
303-#
304-# Unless required by applicable law or agreed to in writing, software
305-# distributed under the License is distributed on an "AS IS" BASIS,
306-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
307-# See the License for the specific language governing permissions and
308-# limitations under the License.
309-
310-# all external module libraries are generated in the directory
311-# of the corresponding .xq file
312-ADD_SUBDIRECTORY(data-cleaning)
313
314=== removed directory 'src/com/zorba-xquery/www/modules/data-cleaning'
315=== renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/consolidation.xq' => 'src/consolidation.xq'
316--- src/com/zorba-xquery/www/modules/data-cleaning/consolidation.xq 2012-11-28 18:23:07 +0000
317+++ src/consolidation.xq 2013-07-10 00:51:26 +0000
318@@ -17,33 +17,33 @@
319 :)
320
321 (:~
322- : This library module provides data consolidation functions that generally take as input a sequence of XML nodes
323- : and apply some rule in order do decide which node is better suited to represent the entire sequence.
324- :
325- : The logic contained in this module is not specific to any particular XQuery implementation,
326+ : <p>This library module provides data consolidation functions that generally take as input a sequence of XML nodes
327+ : and apply some rule in order do decide which node is better suited to represent the entire sequence.</p>
328+ : <p/>
329+ : <p>The logic contained in this module is not specific to any particular XQuery implementation,
330 : although the consolidation functions based on matching sequences against XPath expressions require
331- : some form of dynamic evaluation for XPath expressions.
332+ : some form of dynamic evaluation for XPath expressions.</p>
333 :
334 : @author Bruno Martins
335 : @project Zorba/Data Cleaning/Consolidation
336 :)
337
338-module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
339+module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
340
341-import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity";
342-import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity";
343+import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity";
344+import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity";
345
346 declare namespace ver = "http://www.zorba-xquery.com/options/versioning";
347 declare option ver:module-version "2.0";
348
349 (:~
350- : Returns the single most frequent node in a sequence of nodes provided as input.
351- : If more then one answer is possible, returns the first node according to the order of the input sequence.
352- :
353- : <br/>
354- : Example usage : <code>most-frequent( ( "a", "a", "b") )</code>
355- : <br/>
356- : The function invocation in the example above returns : <code>("a")</code>
357+ : <p>Returns the single most frequent node in a sequence of nodes provided as input.</p>
358+ : <p>If more then one answer is possible, returns the first node according to the order of the input sequence.</p>
359+ : <p/>
360+ :
361+ : <p>Example usage : <code>most-frequent( ( "a", "a", "b") )</code></p>
362+ : <p/>
363+ : <p>The function invocation in the example above returns : <code>("a")</code></p>
364 :
365 : @param $s A sequence of nodes.
366 : @return The most frequent node in the input sequence.
367@@ -54,13 +54,13 @@
368 };
369
370 (:~
371- : Returns the single less frequent node in a sequence of nodes provided as input.
372- : If more then one answer is possible, return the first node according to the order of the input sequence.
373- :
374- : <br/>
375- : Example usage : <code>least-frequent( ( "a", "a", "b") )</code>
376- : <br/>
377- : The function invocation in the example above returns : <code>("b")</code>
378+ : <p>Returns the single less frequent node in a sequence of nodes provided as input.</p>
379+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
380+ : <p/>
381+ :
382+ : <p>Example usage : <code>least-frequent( ( "a", "a", "b") )</code></p>
383+ : <p/>
384+ : <p>The function invocation in the example above returns : <code>("b")</code></p>
385 :
386 : @param $s A sequence of nodes.
387 : @return The least frequent node in the input sequence.
388@@ -72,13 +72,13 @@
389 };
390
391 (:~
392- : Returns the single longest string, in terms of the number of characters, in a sequence of strings provided as input.
393- : If more then one answer is possible, return the first string according to the order of the input sequence.
394- :
395- : <br/>
396- : Example usage : <code>con:longest( ( "a", "aa", "aaa") )</code>
397- : <br/>
398- : The function invocation in the example above returns : <code>("aaa")</code>
399+ : <p>Returns the single longest string, in terms of the number of characters, in a sequence of strings provided as input.</p>
400+ : <p>If more then one answer is possible, return the first string according to the order of the input sequence.</p>
401+ : <p/>
402+ :
403+ : <p>Example usage : <code>con:longest( ( "a", "aa", "aaa") )</code></p>
404+ : <p/>
405+ : <p>The function invocation in the example above returns : <code>("aaa")</code></p>
406 :
407 : @param $s A sequence of strings.
408 : @return The longest string in the input sequence.
409@@ -90,13 +90,13 @@
410 };
411
412 (:~
413- : Returns the single shortest string, in terms of the number of characters, in a sequence of strings provided as input.
414- : If more then one answer is possible, return the first string according to the order of the input sequence.
415- :
416- : <br/>
417- : Example usage : <code>shortest( ( "a", "aa", "aaa") )</code>
418- : <br/>
419- : The function invocation in the example above returns : <code>("a")</code>
420+ : <p>Returns the single shortest string, in terms of the number of characters, in a sequence of strings provided as input.</p>
421+ : <p>If more then one answer is possible, return the first string according to the order of the input sequence.</p>
422+ : <p/>
423+ :
424+ : <p>Example usage : <code>shortest( ( "a", "aa", "aaa") )</code></p>
425+ : <p/>
426+ : <p>The function invocation in the example above returns : <code>("a")</code></p>
427 :
428 : @param $s A sequence of strings.
429 : @return The shortest string in the input sequence.
430@@ -108,13 +108,13 @@
431 };
432
433 (:~
434- : Returns the single longest string, in terms of the number of tokens, in a sequence of strings provided as input.
435- : If more then one answer is possible, return the first string according to the order of the input sequence.
436- :
437- : <br/>
438- : Example usage : <code>most-tokens( ( "a b c", "a b", "a"), " +" )</code>
439- : <br/>
440- : The function invocation in the example above returns : <code>("a b c")</code>
441+ : <p>Returns the single longest string, in terms of the number of tokens, in a sequence of strings provided as input.</p>
442+ : <p>If more then one answer is possible, return the first string according to the order of the input sequence.</p>
443+ : <p/>
444+ :
445+ : <p>Example usage : <code>most-tokens( ( "a b c", "a b", "a"), " +" )</code></p>
446+ : <p/>
447+ : <p>The function invocation in the example above returns : <code>("a b c")</code></p>
448 :
449 : @param $s A sequence of strings.
450 : @param $r A regular expression forming the delimiter character(s) which mark the boundaries between adjacent tokens.
451@@ -127,13 +127,13 @@
452 };
453
454 (:~
455- : Returns the single shortest string, in terms of the number of tokens, in a sequence of strings provided as input.
456- : If more then one answer is possible, return the first string according to the order of the input sequence.
457- :
458- : <br/>
459- : Example usage : <code>least-tokens( ( "a b c", "a b", "a"), " +" )</code>
460- : <br/>
461- : The function invocation in the example above returns : <code>("a")</code>
462+ : <p>Returns the single shortest string, in terms of the number of tokens, in a sequence of strings provided as input.</p>
463+ : <p>If more then one answer is possible, return the first string according to the order of the input sequence.</p>
464+ : <p/>
465+ :
466+ : <p>Example usage : <code>least-tokens( ( "a b c", "a b", "a"), " +" )</code></p>
467+ : <p/>
468+ : <p>The function invocation in the example above returns : <code>("a")</code></p>
469 :
470 : @param $s A sequence of strings.
471 : @param $r A regular expression forming the delimiter character(s) which mark the boundaries between adjacent tokens.
472@@ -146,12 +146,12 @@
473 };
474
475 (:~
476- : Returns the strings from an input sequence of strings that match a particular regular expression.
477- :
478- : <br/>
479- : Example usage : <code>matching( ( "a A b", "c AAA d", "e BB f"), "A+" )</code>
480- : <br/>
481- : The function invocation in the example above returns : <code>( "a A b", "c AAA d")</code>
482+ : <p>Returns the strings from an input sequence of strings that match a particular regular expression.</p>
483+ : <p/>
484+ :
485+ : <p>Example usage : <code>matching( ( "a A b", "c AAA d", "e BB f"), "A+" )</code></p>
486+ : <p/>
487+ : <p>The function invocation in the example above returns : <code>( "a A b", "c AAA d")</code></p>
488 :
489 : @param $s A sequence of strings.
490 : @param $r The regular expression to be used in the matching.
491@@ -163,14 +163,14 @@
492 };
493
494 (:~
495- : Returns the single string, from an input sequence of strings, that appears more frequently as part
496- : of the other strings in the sequence. If no such string exists, the function returns an empty sequence.
497- : If more then one answer is possible, the function returns the first string according to the order of the input sequence.
498- :
499- : <br/>
500- : Example usage : <code>super-string( ( "aaa bbb ccc", "aaa bbb", "aaa ddd", "eee fff" ) )</code>
501- : <br/>
502- : The function invocation in the example above returns : <code>( "aaa bbb" )</code>
503+ : <p>Returns the single string, from an input sequence of strings, that appears more frequently as part
504+ : of the other strings in the sequence. If no such string exists, the function returns an empty sequence.</p>
505+ : <p>If more then one answer is possible, the function returns the first string according to the order of the input sequence.</p>
506+ : <p/>
507+ :
508+ : <p>Example usage : <code>super-string( ( "aaa bbb ccc", "aaa bbb", "aaa ddd", "eee fff" ) )</code></p>
509+ : <p/>
510+ : <p>The function invocation in the example above returns : <code>( "aaa bbb" )</code></p>
511 :
512 : @param $s A sequence of strings.
513 : @return The string that appears more frequently as part of the other strings in the sequence.
514@@ -187,15 +187,15 @@
515 };
516
517 (:~
518- : Returns the single most similar string, in terms of the edit distance metric towards an input string,
519+ : <p>Returns the single most similar string, in terms of the edit distance metric towards an input string,
520 : in a sequence of strings provided as input. If more than one string has a maximum similarity (a minimum
521 : value for the edit distance metric), the function return the first string according to the order of the
522- : input sequence.
523- :
524- : <br/>
525- : Example usage : <code>most-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" )</code>
526- : <br/>
527- : The function invocation in the example above returns : <code>( "aaabbb" )</code>
528+ : input sequence.</p>
529+ : <p/>
530+ :
531+ : <p>Example usage : <code>most-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" )</code></p>
532+ : <p/>
533+ : <p>The function invocation in the example above returns : <code>( "aaabbb" )</code></p>
534 :
535 : @param $s A sequence of strings.
536 : @param $m The string towards which we want to measure the edit distance.
537@@ -208,14 +208,14 @@
538 };
539
540 (:~
541- : Returns the single least similar string, in terms of the edit distance metric towards an input string,
542+ : <p>Returns the single least similar string, in terms of the edit distance metric towards an input string,
543 : in a sequence of strings provided as input. If more than one string has a minimum similarity (a maximum
544- : value for the edit distance metric), return the first string according to the order of the input sequence.
545- :
546- : <br/>
547- : Example usage : <code>least-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" )</code>
548- : <br/>
549- : The function invocation in the example above returns : <code>( "eeefff" )</code>
550+ : value for the edit distance metric), return the first string according to the order of the input sequence.</p>
551+ : <p/>
552+ :
553+ : <p>Example usage : <code>least-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" )</code></p>
554+ : <p/>
555+ : <p>The function invocation in the example above returns : <code>( "eeefff" )</code></p>
556 :
557 : @param $s A sequence of strings.
558 : @param $m The string towards which we want to measure the edit distance.
559@@ -228,14 +228,14 @@
560 };
561
562 (:~
563- : Returns the single node having the largest number of descending elements (sub-elements at any given depth)
564- : in a sequence of nodes provided as input.
565- : If more then one answer is possible, return the first node according to the order of the input sequence.
566- :
567- : <br/>
568- : Example usage : <code>most-elements( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;a/&gt;, &lt;b/&gt;) )</code>
569- : <br/>
570- : The function invocation in the example above returns : <code>(&lt;a&gt;&lt;b/&gt;&lt;/a&gt;)</code>
571+ : <p>Returns the single node having the largest number of descending elements (sub-elements at any given depth)
572+ : in a sequence of nodes provided as input.</p>
573+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
574+ : <p/>
575+ :
576+ : <p>Example usage : <code>most-elements( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;a/&gt;, &lt;b/&gt;) )</code></p>
577+ : <p/>
578+ : <p>The function invocation in the example above returns : <code>(&lt;a&gt;&lt;b/&gt;&lt;/a&gt;)</code></p>
579 :
580 : @param $s A sequence of nodes.
581 : @return The node having the largest number of descending elements in the input sequence.
582@@ -246,14 +246,14 @@
583 };
584
585 (:~
586- : Returns the single node having the largest number of descending attributes (attributes at any given depth)
587- : in a sequence of nodes provided as input.
588- : If more then one answer is possible, return the first node according to the order of the input sequence.
589- :
590- : <br/>
591- : Example usage : <code>most-attributes( ( &lt;a att1="a1" att2="a2"/&gt;, &lt;b att1="a1" /&gt;, &lt;c/&gt; ) )</code>
592- : <br/>
593- : The function invocation in the example above returns : <code>(&lt;a att1="a1" att2="a2"/&gt;)</code>
594+ : <p>Returns the single node having the largest number of descending attributes (attributes at any given depth)
595+ : in a sequence of nodes provided as input.</p>
596+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
597+ : <p/>
598+ :
599+ : <p>Example usage : <code>most-attributes( ( &lt;a att1="a1" att2="a2"/&gt;, &lt;b att1="a1" /&gt;, &lt;c/&gt; ) )</code></p>
600+ : <p/>
601+ : <p>The function invocation in the example above returns : <code>(&lt;a att1="a1" att2="a2"/&gt;)</code></p>
602 :
603 : @param $s A sequence of nodes.
604 : @return The node having the largest number of descending attributes in the input sequence.
605@@ -264,14 +264,14 @@
606 };
607
608 (:~
609- : Returns the single node having the largest number of descending nodes (sub-nodes at any given depth) in a
610- : sequence of nodes provided as input.
611- : If more then one answer is possible, return the first node according to the order of the input sequence.
612- :
613- : <br/>
614- : Example usage : <code>most-nodes( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;a/&gt;, &lt;b/&gt;) )</code>
615- : <br/>
616- : The function invocation in the example above returns : <code>(&lt;a&gt;&lt;b/&gt;&lt;/a&gt;)</code>
617+ : <p>Returns the single node having the largest number of descending nodes (sub-nodes at any given depth) in a
618+ : sequence of nodes provided as input.</p>
619+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
620+ : <p/>
621+ :
622+ : <p>Example usage : <code>most-nodes( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;a/&gt;, &lt;b/&gt;) )</code></p>
623+ : <p/>
624+ : <p>The function invocation in the example above returns : <code>(&lt;a&gt;&lt;b/&gt;&lt;/a&gt;)</code></p>
625 :
626 : @param $s A sequence of nodes.
627 : @return The node having the largest number of descending nodes in the input sequence.
628@@ -282,14 +282,14 @@
629 };
630
631 (:~
632- : Returns the single node having the smallest number of descending elements (sub-elements at any given depth)
633- : in a sequence of nodes provided as input.
634- : If more then one answer is possible, return the first node according to the order of the input sequence.
635- :
636- : <br/>
637- : Example usage : <code>least-elements( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;b&gt;&lt;c/&gt;&lt;/b&gt;, &lt;d/&gt;) )</code>
638- : <br/>
639- : The function invocation in the example above returns : <code>(&lt;d/&gt;)</code>
640+ : <p>Returns the single node having the smallest number of descending elements (sub-elements at any given depth)
641+ : in a sequence of nodes provided as input.</p>
642+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
643+ : <p/>
644+ :
645+ : <p>Example usage : <code>least-elements( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;b&gt;&lt;c/&gt;&lt;/b&gt;, &lt;d/&gt;) )</code></p>
646+ : <p/>
647+ : <p>The function invocation in the example above returns : <code>(&lt;d/&gt;)</code></p>
648 :
649 : @param $s A sequence of nodes.
650 : @return The node having the smallest number of descending elements in the input sequence.
651@@ -300,14 +300,14 @@
652 };
653
654 (:~
655- : Returns the single node having the smallest number of descending attributes (attributes at any given depth)
656- : in a sequence of nodes provided as input.
657- : If more then one answer is possible, return the first node according to the order of the input sequence.
658- :
659- : <br/>
660- : Example usage : <code>least-attributes( ( &lt;a att1="a1" att2="a2"/&gt;, &lt;b att1="a1" /&gt;, &lt;c/&gt; ) )</code>
661- : <br/>
662- : The function invocation in the example above returns : <code>(&lt;c/&gt;)</code>
663+ : <p>Returns the single node having the smallest number of descending attributes (attributes at any given depth)
664+ : in a sequence of nodes provided as input.</p>
665+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
666+ : <p/>
667+ :
668+ : <p>Example usage : <code>least-attributes( ( &lt;a att1="a1" att2="a2"/&gt;, &lt;b att1="a1" /&gt;, &lt;c/&gt; ) )</code></p>
669+ : <p/>
670+ : <p>The function invocation in the example above returns : <code>(&lt;c/&gt;)</code></p>
671 :
672 : @param $s A sequence of nodes.
673 : @return The node having the smallest number of descending attributes in the input sequence.
674@@ -318,14 +318,14 @@
675 };
676
677 (:~
678- : Returns the single node having the smallest number of descending nodes (sub-nodes at any given depth)
679- : in a sequence of nodes provided as input.
680- : If more then one answer is possible, return the first node according to the order of the input sequence.
681- :
682- : <br/>
683- : Example usage : <code>least-nodes( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;b&gt;&lt;c/&gt;&lt;/b&gt;, &lt;d/&gt;) )</code>
684- : <br/>
685- : The function invocation in the example above returns : <code>(&lt;d/&gt;)</code>
686+ : <p>Returns the single node having the smallest number of descending nodes (sub-nodes at any given depth)
687+ : in a sequence of nodes provided as input.</p>
688+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
689+ : <p/>
690+ :
691+ : <p>Example usage : <code>least-nodes( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;b&gt;&lt;c/&gt;&lt;/b&gt;, &lt;d/&gt;) )</code></p>
692+ : <p/>
693+ : <p>The function invocation in the example above returns : <code>(&lt;d/&gt;)</code></p>
694 :
695 : @param $s A sequence of nodes.
696 : @return The node having the smallest number of descending nodes in the input sequence.
697@@ -336,14 +336,14 @@
698 };
699
700 (:~
701- : Returns the single node having the largest number of distinct descending elements (sub-elements at any
702- : given depth) in a sequence of nodes provided as input.
703- : If more then one answer is possible, return the first node according to the order of the input sequence.
704- :
705- : <br/>
706- : Example usage : <code>most-distinct-elements( ( &lt;a&gt;&lt;b/&gt;&lt;c/&gt;&lt;d/&gt;&lt;/a&gt;, &lt;a&gt;&lt;b/&gt;&lt;b/&gt;&lt;c/&gt;&lt;/a&gt;, &lt;a/&gt; ) )</code>
707- : <br/>
708- : The function invocation in the example above returns : <code>(&lt;a&gt;&lt;b/&gt;&lt;c/&gt;&lt;d/&gt;&lt;/a&gt;)</code>
709+ : <p>Returns the single node having the largest number of distinct descending elements (sub-elements at any
710+ : given depth) in a sequence of nodes provided as input.</p>
711+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
712+ : <p/>
713+ :
714+ : <p>Example usage : <code>most-distinct-elements( ( &lt;a&gt;&lt;b/&gt;&lt;c/&gt;&lt;d/&gt;&lt;/a&gt;, &lt;a&gt;&lt;b/&gt;&lt;b/&gt;&lt;c/&gt;&lt;/a&gt;, &lt;a/&gt; ) )</code></p>
715+ : <p/>
716+ : <p>The function invocation in the example above returns : <code>(&lt;a&gt;&lt;b/&gt;&lt;c/&gt;&lt;d/&gt;&lt;/a&gt;)</code></p>
717 :
718 : @param $s A sequence of nodes.
719 : @return The node having the largest number of distinct descending elements in the input sequence.
720@@ -354,14 +354,14 @@
721 };
722
723 (:~
724- : Returns the single node having the largest number of distinct descending attributes (attributes at any
725- : given depth) in a sequence of nodes provided as input.
726- : If more then one answer is possible, return the first node according to the order of the input sequence.
727- :
728- : <br/>
729- : Example usage : <code>most-distinct-attributes( ( &lt;a att1="a1" att2="a2" att3="a3"/&gt;, &lt;a att1="a1" att2="a2"&gt;&lt;b att2="a2" /&gt;&lt;/a&gt;, &lt;c/&gt; ) )</code>
730- : <br/>
731- : The function invocation in the example above returns : <code>(&lt;a att1="a1" att2="a2" att3="a3"/&gt;)</code>
732+ : <p>Returns the single node having the largest number of distinct descending attributes (attributes at any
733+ : given depth) in a sequence of nodes provided as input.</p>
734+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
735+ : <p/>
736+ :
737+ : <p>Example usage : <code>most-distinct-attributes( ( &lt;a att1="a1" att2="a2" att3="a3"/&gt;, &lt;a att1="a1" att2="a2"&gt;&lt;b att2="a2" /&gt;&lt;/a&gt;, &lt;c/&gt; ) )</code></p>
738+ : <p/>
739+ : <p>The function invocation in the example above returns : <code>(&lt;a att1="a1" att2="a2" att3="a3"/&gt;)</code></p>
740 :
741 : @param $s A sequence of nodes.
742 : @return The node having the largest number of distinct descending attributes in the input sequence.
743@@ -372,14 +372,14 @@
744 };
745
746 (:~
747- : Returns the single node having the largest number of distinct descending nodes (sub-nodes at any given depth) in
748- : a sequence of nodes provided as input.
749- : If more then one answer is possible, return the first node according to the order of the input sequence.
750- :
751- : <br/>
752- : Example usage : <code>most-distinct-nodes( ( &lt;a>&lt;b/>&lt;/a&gt;, &lt;a>&lt;a/&gt;&lt;/a&gt;, &lt;b/&gt;) )</code>
753- : <br/>
754- : The function invocation in the example above returns : <code>(&lt;a&gt;&lt;b/&gt;&lt;/a&gt;)</code>
755+ : <p>Returns the single node having the largest number of distinct descending nodes (sub-nodes at any given depth) in
756+ : a sequence of nodes provided as input.</p>
757+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
758+ : <p/>
759+ :
760+ : <p>Example usage : <code>most-distinct-nodes( ( &lt;a>&lt;b/>&lt;/a&gt;, &lt;a>&lt;a/&gt;&lt;/a&gt;, &lt;b/&gt;) )</code></p>
761+ : <p/>
762+ : <p>The function invocation in the example above returns : <code>(&lt;a&gt;&lt;b/&gt;&lt;/a&gt;)</code></p>
763 :
764 : @param $s A sequence of nodes.
765 : @return The node having the largest number of distinct descending nodes in the input sequence.
766@@ -390,14 +390,14 @@
767 };
768
769 (:~
770- : Returns the single node having the smallest number of distinct descending elements (sub-elements at any
771- : given depth) in a sequence of nodes provided as input.
772- : If more then one answer is possible, return the first node according to the order of the input sequence.
773- :
774- : <br/>
775- : Example usage : <code> least-distinct-elements( ( &lt;a>&lt;b/&gt;&lt;/a&gt;, &lt;b&gt;&lt;c/&gt;&lt;/b&gt;, &lt;d/&gt;) ) </code>
776- : <br/>
777- : The function invocation in the example above returns : <code> (&lt;d/&gt;) </code>
778+ : <p>Returns the single node having the smallest number of distinct descending elements (sub-elements at any
779+ : given depth) in a sequence of nodes provided as input.</p>
780+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
781+ : <p/>
782+ :
783+ : <p>Example usage : <code> least-distinct-elements( ( &lt;a>&lt;b/&gt;&lt;/a&gt;, &lt;b&gt;&lt;c/&gt;&lt;/b&gt;, &lt;d/&gt;) ) </code></p>
784+ : <p/>
785+ : <p>The function invocation in the example above returns : <code> (&lt;d/&gt;) </code></p>
786 :
787 : @param $s A sequence of nodes.
788 : @return The node having the smallest number of distinct descending elements in the input sequence.
789@@ -408,14 +408,14 @@
790 };
791
792 (:~
793- : Returns the single node having the smallest number of distinct descending attributes (attributes at any
794- : given depth) in a sequence of nodes provided as input.
795- : If more then one answer is possible, return the first node according to the order of the input sequence.
796- :
797- : <br/>
798- : Example usage : <code> least-distinct-attributes( ( &lt;a att1="a1" att2="a2"/&gt;, &lt;b att1="a1" /&gt;, &lt;c/&gt; ) ) </code>
799- : <br/>
800- : The function invocation in the example above returns : <code> (&lt;c/&gt;) </code>
801+ : <p>Returns the single node having the smallest number of distinct descending attributes (attributes at any
802+ : given depth) in a sequence of nodes provided as input.</p>
803+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
804+ : <p/>
805+ :
806+ : <p>Example usage : <code> least-distinct-attributes( ( &lt;a att1="a1" att2="a2"/&gt;, &lt;b att1="a1" /&gt;, &lt;c/&gt; ) ) </code></p>
807+ : <p/>
808+ : <p>The function invocation in the example above returns : <code> (&lt;c/&gt;) </code></p>
809 :
810 : @param $s A sequence of nodes.
811 : @return The node having the smallest number of distinct descending attributes in the input sequence.
812@@ -426,14 +426,14 @@
813 };
814
815 (:~
816- : Returns the single node having the smallest number of distinct descending nodes (sub-nodes at any given depth)
817- : in a sequence of nodes provided as input.
818- : If more then one answer is possible, return the first node according to the order of the input sequence.
819- :
820- : <br/>
821- : Example usage : <code> least-distinct-nodes( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;b&gt;&lt;c/&gt;&lt;/b&gt;, &lt;d/&gt;) ) </code>
822- : <br/>
823- : The function invocation in the example above returns : <code> (&lt;d/&gt;) </code>
824+ : <p>Returns the single node having the smallest number of distinct descending nodes (sub-nodes at any given depth)
825+ : in a sequence of nodes provided as input.</p>
826+ : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p>
827+ : <p/>
828+ :
829+ : <p>Example usage : <code> least-distinct-nodes( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;b&gt;&lt;c/&gt;&lt;/b&gt;, &lt;d/&gt;) ) </code></p>
830+ : <p/>
831+ : <p>The function invocation in the example above returns : <code> (&lt;d/&gt;) </code></p>
832 :
833 : @param $s A sequence of nodes.
834 : @return The node having the smallest number of distinct descending nodes in the input sequence.
835@@ -444,13 +444,13 @@
836 };
837
838 (:~
839- : Returns the elements from an input sequence of elements that, when matched to a given set of XPath expressions,
840- : produce a non-empty set of nodes in all the cases.
841- :
842- : <br/>
843- : Example usage : <code> all-xpaths( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;c&gt;&lt;d/&gt;&lt;/c&gt;, &lt;d/&gt;), (".//b") ) </code>
844- : <br/>
845- : The function invocation in the example above returns : <code> (&lt;a&gt;&lt;b/&gt;&lt;/a&gt;) </code>
846+ : <p>Returns the elements from an input sequence of elements that, when matched to a given set of XPath expressions,
847+ : produce a non-empty set of nodes in all the cases.</p>
848+ : <p/>
849+ :
850+ : <p>Example usage : <code> all-xpaths( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;c&gt;&lt;d/&gt;&lt;/c&gt;, &lt;d/&gt;), (".//b") ) </code></p>
851+ : <p/>
852+ : <p>The function invocation in the example above returns : <code> (&lt;a&gt;&lt;b/&gt;&lt;/a&gt;) </code></p>
853 :
854 : @param $s A sequence of elements.
855 : @param $paths A sequence of strings denoting XPath expressions.
856@@ -470,13 +470,13 @@
857 };
858
859 (:~
860- : Returns the elements from a sequence of elements that, when matched to a given set of XPath expressions,
861- : produce a non-empty set of nodes for some of the cases.
862- :
863- : <br/>
864- : Example usage : <code> some-xpaths( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;d&gt;&lt;c/&gt;&lt;/d&gt;, &lt;d/&gt;), (".//b", ".//c") ) </code>
865- : <br/>
866- : The function invocation in the example above returns : <code> ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt; , &lt;d&gt;&lt;c/&gt;&lt;/d&gt; ) </code>
867+ : <p>Returns the elements from a sequence of elements that, when matched to a given set of XPath expressions,
868+ : produce a non-empty set of nodes for some of the cases.</p>
869+ : <p/>
870+ :
871+ : <p>Example usage : <code> some-xpaths( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;d&gt;&lt;c/&gt;&lt;/d&gt;, &lt;d/&gt;), (".//b", ".//c") ) </code></p>
872+ : <p/>
873+ : <p>The function invocation in the example above returns : <code> ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt; , &lt;d&gt;&lt;c/&gt;&lt;/d&gt; ) </code></p>
874 :
875 : @param $s A sequence of elements.
876 : @param $paths A sequence of strings denoting XPath expressions.
877@@ -497,14 +497,14 @@
878 };
879
880 (:~
881- : Returns the single element from an input sequence of elements that matches the largest number of
882- : XPath expressions from a given set, producing a non-empty set of nodes.
883- : If more then one answer is possible, return the first element according to the order of the input sequence.
884- :
885- : <br/>
886- : Example usage : <code> most-xpaths( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;d&gt;&lt;c/&gt;&lt;b/&gt;&lt;/d&gt;, &lt;d/&gt;) , (".//b", ".//c") ) </code>
887- : <br/>
888- : The function invocation in the example above returns : <code> ( &lt;d&gt;&lt;c/&gt;&lt;b/&gt;&lt;/d&gt; ) </code>
889+ : <p>Returns the single element from an input sequence of elements that matches the largest number of
890+ : XPath expressions from a given set, producing a non-empty set of nodes.</p>
891+ : <p>If more then one answer is possible, return the first element according to the order of the input sequence.</p>
892+ : <p/>
893+ :
894+ : <p>Example usage : <code> most-xpaths( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;d&gt;&lt;c/&gt;&lt;b/&gt;&lt;/d&gt;, &lt;d/&gt;) , (".//b", ".//c") ) </code></p>
895+ : <p/>
896+ : <p>The function invocation in the example above returns : <code> ( &lt;d&gt;&lt;c/&gt;&lt;b/&gt;&lt;/d&gt; ) </code></p>
897 :
898 : @param $s A sequence of elements.
899 : @param $paths A sequence of strings denoting XPath expressions.
900@@ -524,18 +524,18 @@
901 return $str
902 )[1]
903 :)
904- ""
905+ ""
906 };
907
908 (:~
909- : Returns the single element from an input sequence of elements that matches the smallest number of
910- : XPath expressions from a given set, producing a non-empty set of nodes.
911- : If more then one answer is possible, return the first element according to the order of the input sequence.
912- :
913- : <br/>
914- : Example usage : <code> least-xpaths( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;d&gt;&lt;c/&gt;&lt;b/&gt;&lt;/d&gt;, &lt;d/&gt;) , (".//b", ".//c") ) </code>
915- : <br/>
916- : The function invocation in the example above returns : <code> ( $lt;d/&gt; ) </code>
917+ : <p>Returns the single element from an input sequence of elements that matches the smallest number of
918+ : XPath expressions from a given set, producing a non-empty set of nodes.</p>
919+ : <p>If more then one answer is possible, return the first element according to the order of the input sequence.</p>
920+ : <p/>
921+ :
922+ : <p>Example usage : <code> least-xpaths( ( &lt;a&gt;&lt;b/&gt;&lt;/a&gt;, &lt;d&gt;&lt;c/&gt;&lt;b/&gt;&lt;/d&gt;, &lt;d/&gt;) , (".//b", ".//c") ) </code></p>
923+ : <p/>
924+ : <p>The function invocation in the example above returns : <code> ( $lt;d/&gt; ) </code></p>
925 :
926 : @param $s A sequence of elements.
927 : @param $paths A sequence of strings denoting XPath expressions.
928@@ -559,18 +559,18 @@
929 };
930
931 (:~
932- : Returns the nodes from an input sequence of nodes that validate against a given XML Schema.
933- :
934- : <br/>
935- : Example usage : <code> validating-schema ( ( &lt;a/&gt; , &lt;b/&gt; ), &lt;xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"&gt;&lt;xs:element name="a" /&gt;&lt;/xs:schema&gt; ) </code>
936- : <br/>
937- : The function invocation in the example above returns : <code> ( &lt;a/&gt; ) </code>
938+ : <p>Returns the nodes from an input sequence of nodes that validate against a given XML Schema.</p>
939+ : <p/>
940+ :
941+ : <p>Example usage : <code> validating-schema ( ( &lt;a/&gt; , &lt;b/&gt; ), &lt;xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"&gt;&lt;xs:element name="a" /&gt;&lt;/xs:schema&gt; ) </code></p>
942+ : <p/>
943+ : <p>The function invocation in the example above returns : <code> ( &lt;a/&gt; ) </code></p>
944 :
945 : @param $s A sequence of elements.
946 : @param $schema An element encoding an XML Schema.
947 : @return The nodes that validate against the XML Schema.
948 :
949- : <br/><br/><b> Attention : This function is still not implemented. </b> <br/>
950+ : <b> Attention : This function is still not implemented. </b>
951 :
952 :)
953 declare function con:validating-schema ( $s as element()*, $schema as element() ) as element()*{
954
955=== renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/conversion.xq' => 'src/conversion.xq'
956--- src/com/zorba-xquery/www/modules/data-cleaning/conversion.xq 2012-09-28 13:34:20 +0000
957+++ src/conversion.xq 2013-07-10 00:51:26 +0000
958@@ -17,16 +17,16 @@
959 :)
960
961 (:~
962- : This library module provides data conversion functions for processing calendar dates,
963- : temporal values, currency values, units of measurement, location names and postal addresses.
964- :
965- : The logic contained in this module is not specific to any particular XQuery implementation.
966+ : <p>This library module provides data conversion functions for processing calendar dates,
967+ : temporal values, currency values, units of measurement, location names and postal addresses.</p>
968+ : <p/>
969+ : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p>
970 :
971 : @author Bruno Martins and Diogo Simões
972 : @project Zorba/Data Cleaning/Conversion
973 :)
974
975-module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
976+module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
977
978 declare namespace exref = "http://www.ecb.int/vocabulary/2002-08-01/eurofxref";
979 declare namespace an = "http://www.zorba-xquery.com/annotations";
980@@ -40,12 +40,12 @@
981 declare namespace ver = "http://www.zorba-xquery.com/options/versioning";
982 declare option ver:module-version "2.0";
983
984-(:~ The key to be used when accessing the White Pages Web service :)
985+(:~ <p>The key to be used when accessing the White Pages Web service</p> :)
986 declare variable $conversion:key := "06ea2f21cc15602b6a3e242e3225a81a";
987
988 (:~
989- : Uses a White-pages Web service to discover information about a given name,
990- : returning a sequence of strings for the phone numbers associated to the name.
991+ : <p>Uses a White-pages Web service to discover information about a given name,
992+ : returning a sequence of strings for the phone numbers associated to the name.</p>
993 :
994 :
995 : @param $name The name of person or organization.
996@@ -61,8 +61,8 @@
997 };
998
999 (:~
1000- : Uses a White-pages Web service to discover information about a given name,
1001- : returning a sequence of strings for the addresses associated to the name.
1002+ : <p>Uses a White-pages Web service to discover information about a given name,
1003+ : returning a sequence of strings for the addresses associated to the name.</p>
1004 :
1005 :
1006 : @param $name The name of person or organization.
1007@@ -83,8 +83,8 @@
1008
1009
1010 (:~
1011- : Uses a White-pages Web service to discover information about a given phone number,
1012- : returning a sequence of strings for the name associated to the phone number.
1013+ : <p>Uses a White-pages Web service to discover information about a given phone number,
1014+ : returning a sequence of strings for the name associated to the phone number.</p>
1015 :
1016 :
1017 : @param $phone-number A string with 10 digits corresponding to the phone number.
1018@@ -98,8 +98,8 @@
1019 };
1020
1021 (:~
1022- : Uses a White-pages Web service to discover information about a given phone number,
1023- : returning a string for the address associated to the phone number.
1024+ : <p>Uses a White-pages Web service to discover information about a given phone number,
1025+ : returning a string for the address associated to the phone number.</p>
1026 :
1027 :
1028 : @param $phone-number A string with 10 digits corresponding to the phone number.
1029@@ -120,8 +120,8 @@
1030 };
1031
1032 (:~
1033- : Uses a White-pages Web service to discover information about a given address,
1034- : returning a sequence of strings for the names associated to the address.
1035+ : <p>Uses a White-pages Web service to discover information about a given address,
1036+ : returning a sequence of strings for the names associated to the address.</p>
1037 :
1038 :
1039 : @param $address A string corresponding to the address (ex: 5655 E Gaskill Rd, Willcox, AZ, US).
1040@@ -146,8 +146,8 @@
1041 };
1042
1043 (:~
1044- : Uses a White-pages Web service to discover information about a given address,
1045- : returning a sequence of strings for the phone number associated to the address.
1046+ : <p>Uses a White-pages Web service to discover information about a given address,
1047+ : returning a sequence of strings for the phone number associated to the address.</p>
1048 :
1049 :
1050 : @param $address A string corresponding to the address (ex: 5655 E Gaskill Rd, Willcox, AZ, US).
1051@@ -180,8 +180,8 @@
1052 };
1053
1054 (:~
1055- : Conversion function for units of measurement, acting as a wrapper over the CuppaIT WebService.
1056- : <br/>
1057+ : <p>Conversion function for units of measurement, acting as a wrapper over the CuppaIT WebService.</p>
1058+ :
1059 :
1060 :
1061 : @param $v The amount we wish to convert.
1062@@ -296,7 +296,7 @@
1063 };
1064
1065 (:~
1066- : Placename to geospatial coordinates converter, acting as a wrapper over the Yahoo! geocoder service.
1067+ : <p>Placename to geospatial coordinates converter, acting as a wrapper over the Yahoo! geocoder service.</p>
1068 :
1069 :
1070 : @param $q A sequence of strings corresponding to the different components (e.g., street, city, country, etc.) of the place name.
1071@@ -313,7 +313,7 @@
1072 };
1073
1074 (:~
1075- : Geospatial coordinates to placename converter, acting as a wrapper over the Yahoo! reverse geocoder service.
1076+ : <p>Geospatial coordinates to placename converter, acting as a wrapper over the Yahoo! reverse geocoder service.</p>
1077 :
1078 :
1079 : @param $lat Geospatial latitude.
1080@@ -337,9 +337,9 @@
1081 };
1082
1083 (:~
1084- : Currency conversion function, acting as a wrapper over the WebService from the European Central Bank.
1085- :
1086- : WebService documentation at http://www.ecb.int/stats/exchange/eurofxref/html/index.en.html
1087+ : <p>Currency conversion function, acting as a wrapper over the WebService from the European Central Bank.</p>
1088+ : <p/>
1089+ : <p>WebService documentation at <a src="http://www.ecb.int/stats/exchange/eurofxref/html/index.en.html">http://www.ecb.int/stats/exchange/eurofxref/html/index.en.html</a></p>
1090 :
1091 :
1092 : @param $v The amount we wish to convert.
1093@@ -347,7 +347,7 @@
1094 : @param $m2 The target currency (e.g., "USD").
1095 : @param $date The reference date.
1096 : @return The value resulting from the conversion.
1097- : @error conversion:notsupported if the date, the source currency type or the target currency type are not known to the service.
1098+ : @error conversion:NOTSUPPORTED if the date, the source currency type or the target currency type are not known to the service.
1099 : @see http://www.ecb.int/stats/exchange/eurofxref/html/index.en.html
1100 : @example test/Queries/data-cleaning/conversion/currency-convert.xq
1101 :)
1102@@ -361,17 +361,17 @@
1103 let $fromEUR := if ( $m2="EUR" ) then (xs:double(1.0)) else ( $doc//exref:Cube[xs:string(@currency)=$m2]/xs:double(@rate) )
1104 let $result := ($v div $toEUR) * $fromEUR
1105 return if (matches(string($result),"-?[0-9]+(\.[0-9]+)?")) then ($result)
1106- else (error(QName('http://www.zorba-xquery.com/modules/data-cleaning/conversion', 'conversion:notsupported'), data($result)))
1107+ else (error(QName('http://zorba.io/modules/data-cleaning/conversion', 'conversion:NOTSUPPORTED'), data($result)))
1108 };
1109
1110 (:~
1111- : Uses a whois service to discover information about a given domain name, returning a sequence of strings
1112- : for the phone numbers associated to the name.
1113+ : <p>Uses a whois service to discover information about a given domain name, returning a sequence of strings
1114+ : for the phone numbers associated to the name.</p>
1115 :
1116 : @param $addr A string with the domain.
1117 : @return A sequence of strings for the phone numbers associated to the domain name.
1118 :
1119- : <br/><br/><b> Attention : This function is still not implemented. </b> <br/>
1120+ : <p><b> Attention : This function is still not implemented. </b></p>
1121 :
1122 :)
1123 declare function conversion:phone-from-domain ( $domain as xs:string ) as xs:string*{
1124@@ -379,13 +379,13 @@
1125 };
1126
1127 (:~
1128- : Uses a whois service to discover information about a given domain name, returning a sequence of strings
1129- : for the addresses associated to the name.
1130+ : <p>Uses a whois service to discover information about a given domain name, returning a sequence of strings
1131+ : for the addresses associated to the name.</p>
1132 :
1133 : @param $addr A string with the domain.
1134 : @return A sequence of strings for the addresses associated to the domain name.
1135 :
1136- : <br/><br/><b> Attention : This function is still not implemented. </b> <br/>
1137+ : <p><b> Attention : This function is still not implemented. </b></p>
1138 :
1139 :)
1140 declare function conversion:address-from-domain ( $domain as xs:string ) as xs:string*{
1141@@ -393,13 +393,13 @@
1142 };
1143
1144 (:~
1145- : Uses a whois service to discover information about a given domain name, returning a sequence of strings
1146- : for the person or organization names associated to the name.
1147+ : <p>Uses a whois service to discover information about a given domain name, returning a sequence of strings
1148+ : for the person or organization names associated to the name.</p>
1149 :
1150 : @param $addr A string with the domain.
1151 : @return A sequence of strings for the person or organization names associated to the domain name.
1152 :
1153- : <br/><br/><b> Attention : This function is still not implemented. </b> <br/>
1154+ : <p><b> Attention : This function is still not implemented. </b></p>
1155 :
1156 :)
1157 declare function conversion:name-from-domain ( $domain as xs:string ) as xs:string*{
1158
1159=== renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/hybrid-string-similarity.xq' => 'src/hybrid-string-similarity.xq'
1160--- src/com/zorba-xquery/www/modules/data-cleaning/hybrid-string-similarity.xq 2012-09-28 13:34:20 +0000
1161+++ src/hybrid-string-similarity.xq 2013-07-10 00:51:26 +0000
1162@@ -17,43 +17,43 @@
1163 :)
1164
1165 (:~
1166- : This library module provides hybrid string similarity functions, combining the properties of
1167- : character-based string similarity functions and token-based string similarity functions.
1168- :
1169- : The logic contained in this module is not specific to any particular XQuery implementation,
1170+ : <p>This library module provides hybrid string similarity functions, combining the properties of
1171+ : character-based string similarity functions and token-based string similarity functions.</p>
1172+ : <p/>
1173+ : <p>The logic contained in this module is not specific to any particular XQuery implementation,
1174 : although the module requires the trigonometic functions of XQuery 3.0 or a math extension
1175- : function such as sqrt($x as numeric) for computing the square root.
1176+ : function such as sqrt($x as numeric) for computing the square root.</p>
1177 :
1178 : @author Bruno Martins and Diogo Simões
1179 : @project Zorba/Data Cleaning/Hybrid String Similarity
1180 :)
1181
1182-module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity";
1183+module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity";
1184
1185-(: In the QizX os Saxon XQuery engines, it is possible to call external functions from the Java math library :)
1186-(: declare namespace math = "java:java.lang.Math"; :)
1187+(: <p>In the QizX os Saxon XQuery engines, it is possible to call external functions from the Java math library :)
1188+(: declare namespace math = "java:java.lang.Math";</p> :)
1189 declare namespace math = "http://www.w3.org/2005/xpath-functions/math";
1190
1191-import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity";
1192-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
1193-import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity";
1194-import module namespace simp = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity";
1195+import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity";
1196+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
1197+import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity";
1198+import module namespace simp = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity";
1199
1200 declare namespace ver = "http://www.zorba-xquery.com/options/versioning";
1201 declare option ver:module-version "2.0";
1202
1203 (:~
1204- : Returns the cosine similarity coefficient between sets of tokens extracted from two strings.
1205- :
1206- : The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the
1207- : term-frequency heuristic from Information Retrieval).
1208- : The Soundex phonetic similarity function is used to discover token identity, which is equivalent to saying that
1209- : this function returns the cosine similarity coefficient between sets of Soundex keys.
1210- :
1211- : <br/>
1212- : Example usage : <code> soft-cosine-tokens-soundex("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +") </code>
1213- : <br/>
1214- : The function invocation in the example above returns : <code> 1.0 </code>
1215+ : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings.</p>
1216+ : <p/>
1217+ : <p>The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the
1218+ : term-frequency heuristic from Information Retrieval).</p>
1219+ : <p>The Soundex phonetic similarity function is used to discover token identity, which is equivalent to saying that
1220+ : this function returns the cosine similarity coefficient between sets of Soundex keys.</p>
1221+ : <p/>
1222+ :
1223+ : <p>Example usage : <code> soft-cosine-tokens-soundex("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +") </code></p>
1224+ : <p/>
1225+ : <p>The function invocation in the example above returns : <code> 1.0 </code></p>
1226 :
1227 : @param $s1 The first string.
1228 : @param $s2 The second string.
1229@@ -68,16 +68,16 @@
1230 };
1231
1232 (:~
1233- : Returns the cosine similarity coefficient between sets of tokens extracted from two strings.
1234- : The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the
1235- : term-frequency heuristic from Information Retrieval).
1236- : The Metaphone phonetic similarity function is used to discover token identity, which is equivalent to saying that
1237- : this function returns the cosine similarity coefficient between sets of Metaphone keys.
1238- :
1239- : <br/>
1240- : Example usage : <code> soft-cosine-tokens-metaphone("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +" ) </code>
1241- : <br/>
1242- : The function invocation in the example above returns : <code> 1.0 </code>
1243+ : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings.</p>
1244+ : <p>The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the
1245+ : term-frequency heuristic from Information Retrieval).</p>
1246+ : <p>The Metaphone phonetic similarity function is used to discover token identity, which is equivalent to saying that
1247+ : this function returns the cosine similarity coefficient between sets of Metaphone keys.</p>
1248+ : <p/>
1249+ :
1250+ : <p>Example usage : <code> soft-cosine-tokens-metaphone("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +" ) </code></p>
1251+ : <p/>
1252+ : <p>The function invocation in the example above returns : <code> 1.0 </code></p>
1253 :
1254 : @param $s1 The first string.
1255 : @param $s2 The second string.
1256@@ -92,16 +92,16 @@
1257 };
1258
1259 (:~
1260- : Returns the cosine similarity coefficient between sets of tokens extracted from two strings.
1261- : The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the
1262- : term-frequency heuristic from Information Retrieval).
1263- : The Edit Distance similarity function is used to discover token identity, and tokens having an edit distance
1264- : bellow a given threshold are considered as matching tokens.
1265- :
1266- : <br/>
1267- : Example usage : <code> soft-cosine-tokens-edit-distance("The FLWOR Foundation", "FLWOR Found.", " +", 0 ) </code>
1268- : <br/>
1269- : The function invocation in the example above returns : <code> 0.408248290463863 </code>
1270+ : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings.</p>
1271+ : <p>The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the
1272+ : term-frequency heuristic from Information Retrieval).</p>
1273+ : <p>The Edit Distance similarity function is used to discover token identity, and tokens having an edit distance
1274+ : bellow a given threshold are considered as matching tokens.</p>
1275+ : <p/>
1276+ :
1277+ : <p>Example usage : <code> soft-cosine-tokens-edit-distance("The FLWOR Foundation", "FLWOR Found.", " +", 0 ) </code></p>
1278+ : <p/>
1279+ : <p>The function invocation in the example above returns : <code> 0.408248290463863 </code></p>
1280 :
1281 : @param $s1 The first string.
1282 : @param $s2 The second string.
1283@@ -128,16 +128,16 @@
1284 };
1285
1286 (:~
1287- : Returns the cosine similarity coefficient between sets of tokens extracted from two strings.
1288- : The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the
1289- : term-frequency heuristic from Information Retrieval).
1290- : The Jaro similarity function is used to discover token identity, and tokens having a Jaro similarity above
1291- : a given threshold are considered as matching tokens.
1292- :
1293- : <br/>
1294- : Example usage : <code> soft-cosine-tokens-jaro("The FLWOR Foundation", "FLWOR Found.", " +", 1 ) </code>
1295- : <br/>
1296- : The function invocation in the example above returns : <code> 0.5 </code>
1297+ : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings.</p>
1298+ : <p>The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the
1299+ : term-frequency heuristic from Information Retrieval).</p>
1300+ : <p>The Jaro similarity function is used to discover token identity, and tokens having a Jaro similarity above
1301+ : a given threshold are considered as matching tokens.</p>
1302+ : <p/>
1303+ :
1304+ : <p>Example usage : <code> soft-cosine-tokens-jaro("The FLWOR Foundation", "FLWOR Found.", " +", 1 ) </code></p>
1305+ : <p/>
1306+ : <p>The function invocation in the example above returns : <code> 0.5 </code></p>
1307 :
1308 : @param $s1 The first string.
1309 : @param $s2 The second string.
1310@@ -162,16 +162,16 @@
1311 };
1312
1313 (:~
1314- : Returns the cosine similarity coefficient between sets of tokens extracted from two strings.
1315- : The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the
1316- : term-frequency heuristic from Information Retrieval).
1317- : The Jaro-Winkler similarity function is used to discover token identity, and tokens having a Jaro-Winkler
1318- : similarity above a given threshold are considered as matching tokens.
1319- :
1320- : <br/>
1321- : Example usage : <code> soft-cosine-tokens-jaro-winkler("The FLWOR Foundation", "FLWOR Found.", " +", 1, 4, 0.1 ) </code>
1322- : <br/>
1323- : The function invocation in the example above returns : <code> 0.45 </code>
1324+ : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings.</p>
1325+ : <p>The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the
1326+ : term-frequency heuristic from Information Retrieval).</p>
1327+ : <p>The Jaro-Winkler similarity function is used to discover token identity, and tokens having a Jaro-Winkler
1328+ : similarity above a given threshold are considered as matching tokens.</p>
1329+ : <p/>
1330+ :
1331+ : <p>Example usage : <code> soft-cosine-tokens-jaro-winkler("The FLWOR Foundation", "FLWOR Found.", " +", 1, 4, 0.1 ) </code></p>
1332+ : <p/>
1333+ : <p>The function invocation in the example above returns : <code> 0.45 </code></p>
1334 :
1335 : @param $s1 The first string.
1336 : @param $s2 The second string.
1337@@ -198,13 +198,13 @@
1338 };
1339
1340 (:~
1341- : Returns the Monge-Elkan similarity coefficient between two strings, using the Jaro-Winkler
1342- : similarity function to discover token identity.
1343- :
1344- : <br/>
1345- : Example usage : <code> monge-elkan-jaro-winkler("Comput. Sci. and Eng. Dept., University of California, San Diego", "Department of Computer Scinece, Univ. Calif., San Diego", 4, 0.1) </code>
1346- : <br/>
1347- : The function invocation in the example above returns : <code> 0.992 </code>
1348+ : <p>Returns the Monge-Elkan similarity coefficient between two strings, using the Jaro-Winkler</p>
1349+ : <p>similarity function to discover token identity.</p>
1350+ : <p/>
1351+ :
1352+ : <p>Example usage : <code> monge-elkan-jaro-winkler("Comput. Sci. and Eng. Dept., University of California, San Diego", "Department of Computer Scinece, Univ. Calif., San Diego", 4, 0.1) </code></p>
1353+ : <p/>
1354+ : <p>The function invocation in the example above returns : <code> 0.992 </code></p>
1355 :
1356 : @param $s1 The first string.
1357 : @param $s2 The second string.
1358
1359=== renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/normalization.xq' => 'src/normalization.xq'
1360--- src/com/zorba-xquery/www/modules/data-cleaning/normalization.xq 2012-09-28 13:34:20 +0000
1361+++ src/normalization.xq 2013-07-10 00:51:26 +0000
1362@@ -17,18 +17,17 @@
1363 :)
1364
1365 (:~
1366- : This library module provides data normalization functions for processing calendar dates,
1367+ : <p>This library module provides data normalization functions for processing calendar dates,
1368 : temporal values, currency values, units of measurement, location names and postal addresses.
1369- :
1370- : These functions are particularly useful for converting different data representations into cannonical formats.
1371- :
1372- : The logic contained in this module is not specific to any particular XQuery implementation.
1373+ : These functions are particularly useful for converting different data representations into cannonical formats.</p>
1374+ :
1375+ : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p>
1376 :
1377 : @author Bruno Martins and Diogo Simões
1378 : @project Zorba/Data Cleaning/Normalization
1379 :)
1380
1381-module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization";
1382+module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization";
1383
1384 import module namespace http = "http://www.zorba-xquery.com/modules/http-client";
1385
1386@@ -37,8 +36,8 @@
1387 declare option ver:module-version "2.0";
1388
1389 (:~
1390- : Converts a given string representation of a date value into a date representation valid according
1391- : to the corresponding XML Schema type.
1392+ : <p>Converts a given string representation of a date value into a date representation valid according
1393+ : to the corresponding XML Schema type.</p>
1394 :
1395 :
1396 : @param $sd The string representation for the date
1397@@ -47,18 +46,18 @@
1398 : by a single letter or 'O' or 'E' and then a single letter. Any character in the format string that is not part of a conversion
1399 : specification is interpreted literally, and the string '%%' gives '%'. The supported conversion specifications are as follows:
1400 : <pre>
1401- : '%b' Abbreviated month name in the current locale.<br/>
1402- : '%B' Full month name in the current locale.<br/>
1403- : '%d' Day of the month as decimal number (01-31).<br/>
1404- : '%m' Month as decimal number (01-12).<br/>
1405- : '%x' Date, locale-specific.<br/>
1406- : '%y' Year without century (00-99).<br/>
1407- : '%Y' Year with century.<br/>
1408- : '%C' Century (00-99): the integer part of the year divided by 100.<br/>
1409- : '%D' Locale-specific date format such as '%m/%d/%y'.<br/>
1410- : '%e' Day of the month as decimal number (1-31), with a leading pace for a single-digit number.<br/>
1411- : '%F' Equivalent to %Y-%m-%d (the ISO 8601 date format).<br/>
1412- : '%h' Equivalent to '%b'.<br/>
1413+ : '%b' Abbreviated month name in the current locale.
1414+ : '%B' Full month name in the current locale.
1415+ : '%d' Day of the month as decimal number (01-31).
1416+ : '%m' Month as decimal number (01-12).
1417+ : '%x' Date, locale-specific.
1418+ : '%y' Year without century (00-99).
1419+ : '%Y' Year with century.
1420+ : '%C' Century (00-99): the integer part of the year divided by 100.
1421+ : '%D' Locale-specific date format such as '%m/%d/%y'.
1422+ : '%e' Day of the month as decimal number (1-31), with a leading pace for a single-digit number.
1423+ : '%F' Equivalent to %Y-%m-%d (the ISO 8601 date format).
1424+ : '%h' Equivalent to '%b'.
1425 :</pre>
1426 :
1427 : @return The date value resulting from the conversion.
1428@@ -147,8 +146,8 @@
1429 };
1430
1431 (:~
1432- : Converts a given string representation of a time value into a time representation valid according to
1433- : the corresponding XML Schema type.
1434+ : <p>Converts a given string representation of a time value into a time representation valid according to
1435+ : the corresponding XML Schema type.</p>
1436 :
1437 :
1438 : @param $sd The string representation for the time.
1439@@ -156,13 +155,13 @@
1440 : conversion specifications. In the format string, a conversion specification is introduced by '%', usually followed by a single
1441 : letter or 'O' or 'E' and then a single letter. Any character in the format string that is not part of a conversion specification
1442 : is interpreted literally, and the string '%%' gives '%'. The supported conversion specifications are as follows:
1443- :
1444+ : <p/>
1445 : <pre class="ace-static">
1446 : '%H' Hours as decimal number (00-23).
1447 : '%I' Hours as decimal number (01-12).
1448 : '%M' Minute as decimal number (00-59).
1449 : '%p' AM/PM indicator in the locale. Used in conjunction with '%I' and *not* with '%H'.
1450- : '%S' Second as decimal number (00-61), allowing for up to two leap-seconds.<br/>
1451+ : '%S' Second as decimal number (00-61), allowing for up to two leap-seconds.
1452 : '%X' Time, locale-specific.
1453 : '%z' Offset from Greenwich, so '-0900' is 9 hours west of Greenwich.
1454 : '%Z' Time zone as a character string.
1455@@ -173,6 +172,7 @@
1456 : '%T' Equivalent to '%H:%M:%S'.
1457 :</pre>
1458 :
1459+ : @error normalization:NOTSUPPORTED if the date type is not known to the service.
1460 : @return The time value resulting from the conversion.
1461 : @example test/Queries/data-cleaning/normalization/to-time.xq
1462 :)
1463@@ -524,8 +524,8 @@
1464 };
1465
1466 (:~
1467- : Converts a given string representation of a dateTime value into a dateTime representation
1468- : valid according to the corresponding XML Schema type.
1469+ : <p>Converts a given string representation of a dateTime value into a dateTime representation
1470+ : valid according to the corresponding XML Schema type.</p>
1471 :
1472 :
1473 : @param $sd The string representation for the dateTime.
1474@@ -533,7 +533,7 @@
1475 : of conversion specifications. In the format string, a conversion specification is introduced by '%', usually followed by a single
1476 : letter or 'O' or 'E' and then a single letter. Any character in the format string that is not part of a conversion specification
1477 : is interpreted literally, and the string '%%' gives '%'. The supported conversion specifications are as follows:
1478- :
1479+ : <p/>
1480 : <pre class="ace-static">
1481 : '%b' Abbreviated month name in the current locale.
1482 : '%B' Full month name in the current locale.
1483@@ -566,6 +566,7 @@
1484 : '%T' Equivalent to '%H:%M:%S'.
1485 :</pre>
1486 :
1487+ : @error normalization:NOTSUPPORTED if the dateTime type is not known to the service.
1488 : @return The dateTime value resulting from the conversion.
1489 : @example test/Queries/data-cleaning/normalization/to-dateTime.xq
1490 :)
1491@@ -1183,8 +1184,8 @@
1492 };
1493
1494 (:~
1495- : Uses an address normalization Web service to convert a postal address given as input into a
1496- : cannonical representation format.
1497+ : <p>Uses an address normalization Web service to convert a postal address given as input into a
1498+ : cannonical representation format.</p>
1499 :
1500 :
1501 : @param $addr A sequence of strings encoding an address, where each string in the sequence corresponds to a different component (e.g., street, city, country, etc.) of the address.
1502@@ -1208,13 +1209,13 @@
1503 };
1504
1505 (:~
1506- : Uses an phone number normalization Web service to convert a phone number given as input into a
1507- : cannonical representation.
1508+ : <p>Uses an phone number normalization Web service to convert a phone number given as input into a
1509+ : cannonical representation.</p>
1510 :
1511 : @param $phone A strings encoding a phone number.
1512 : @return A strings with the phone number encoded in a cannonical format.
1513- :
1514- : <br/><br/><b> Attention : This function is still not implemented. </b> <br/>
1515+ : <p/>
1516+ : <p><b> Attention : This function is still not implemented. </b></p>
1517 :
1518 :)
1519 declare function normalization:normalize-phone ( $addr as xs:string* ) as xs:string* {
1520@@ -1222,8 +1223,8 @@
1521 };
1522
1523 (:~
1524- : Internal auxiliary function that returns an XML representation for a dictionary that contains the
1525- : time-shift value associated to different time-zone abbreviations.
1526+ : <p>Internal auxiliary function that returns an XML representation for a dictionary that contains the
1527+ : time-shift value associated to different time-zone abbreviations.</p>
1528 :)
1529 declare %private function normalization:timeZone-dictionary() as element(){
1530 let $result :=
1531@@ -1453,8 +1454,8 @@
1532 };
1533
1534 (:~
1535- : Internal auxiliary function that returns an XML representation for a dictionary that contains a
1536- : numeric value associated to different month name abbreviations.
1537+ : <p>Internal auxiliary function that returns an XML representation for a dictionary that contains a
1538+ : numeric value associated to different month name abbreviations.</p>
1539 :)
1540 declare %private function normalization:month-dictionary() as element(){
1541 let $dictionary :=
1542@@ -1523,7 +1524,7 @@
1543 };
1544
1545 (:~
1546- : Internal auxiliary function that checks if a string is in xs:dateTime format
1547+ : <p>Internal auxiliary function that checks if a string is in xs:dateTime format</p>
1548 :
1549 :
1550 : @param $dateTime The string representation for the dateTime.
1551@@ -1534,7 +1535,7 @@
1552 };
1553
1554 (:~
1555- : Internal auxiliary function that checks if a string is in xs:date format
1556+ : <p>Internal auxiliary function that checks if a string is in xs:date format</p>
1557 :
1558 :
1559 : @param $dateTime The string representation for the date.
1560@@ -1545,7 +1546,7 @@
1561 };
1562
1563 (:~
1564- : Internal auxiliary function that checks if a string is in xs:time format
1565+ : <p>Internal auxiliary function that checks if a string is in xs:time format</p>
1566 :
1567 :
1568 : @param $dateTime The string representation for the time.
1569
1570=== renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/phonetic-string-similarity.xq' => 'src/phonetic-string-similarity.xq'
1571--- src/com/zorba-xquery/www/modules/data-cleaning/phonetic-string-similarity.xq 2013-05-18 00:36:01 +0000
1572+++ src/phonetic-string-similarity.xq 2013-07-10 00:51:26 +0000
1573@@ -17,29 +17,29 @@
1574 :)
1575
1576 (:~
1577- : This library module provides phonetic string similarity functions, comparing strings with basis on how they sound.
1578- :
1579- : These metrics are particularly effective in matching names, since names are often spelled in different
1580- : ways that sound the same.
1581- :
1582- : The logic contained in this module is not specific to any particular XQuery implementation.
1583+ : <p>This library module provides phonetic string similarity functions, comparing strings with basis on how they sound.</p>
1584+ : <p/>
1585+ : <p>These metrics are particularly effective in matching names, since names are often spelled in different
1586+ : ways that sound the same.</p>
1587+ : <p/>
1588+ : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p>
1589 :
1590 : @author Bruno Martins
1591 : @project Zorba/Data Cleaning/Phonectic String Similarity
1592 :)
1593
1594-module namespace simp = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity";
1595+module namespace simp = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity";
1596
1597 declare namespace ver = "http://www.zorba-xquery.com/options/versioning";
1598 declare option ver:module-version "2.0";
1599
1600 (:~
1601- : Returns the Soundex key for a given string.
1602- :
1603- : <br/>
1604- : Example usage : <code>soundex-key("Robert")</code>
1605- : <br/>
1606- : The function invocation in the example above returns : <code>"R163"</code>
1607+ : <p>Returns the Soundex key for a given string.</p>
1608+ : <p/>
1609+ :
1610+ : <p>Example usage : <code>soundex-key("Robert")</code></p>
1611+ : <p/>
1612+ : <p>The function invocation in the example above returns : <code>"R163"</code></p>
1613 :
1614 : @param $s1 The string.
1615 : @return The Soundex key for the given input string.
1616@@ -55,12 +55,12 @@
1617 };
1618
1619 (:~
1620- : Checks if two strings have the same Soundex key.
1621- :
1622- : <br/>
1623- : Example usage : <code>soundex( "Robert" , "Rupert" )</code>
1624- : <br/>
1625- : The function invocation in the example above returns : <code>true</code>
1626+ : <p>Checks if two strings have the same Soundex key.</p>
1627+ : <p/>
1628+ :
1629+ : <p>Example usage : <code>soundex( "Robert" , "Rupert" )</code></p>
1630+ : <p/>
1631+ : <p>The function invocation in the example above returns : <code>true</code></p>
1632 :
1633 : @param $s1 The first string.
1634 : @param $s2 The second string.
1635@@ -72,13 +72,13 @@
1636 };
1637
1638 (:~
1639- : Returns the Metaphone key for a given string.
1640- : The Metaphone algorithm produces variable length keys as its output, as opposed to Soundex's fixed-length keys.
1641- :
1642- : <br/>
1643- : Example usage : <code>metaphone-key("ALEKSANDER")</code>
1644- : <br/>
1645- : The function invocation in the example above returns : <code>"ALKSNTR"</code>
1646+ : <p>Returns the Metaphone key for a given string.</p>
1647+ : <p>The Metaphone algorithm produces variable length keys as its output, as opposed to Soundex's fixed-length keys.</p>
1648+ : <p/>
1649+ :
1650+ : <p>Example usage : <code>metaphone-key("ALEKSANDER")</code></p>
1651+ : <p/>
1652+ : <p>The function invocation in the example above returns : <code>"ALKSNTR"</code></p>
1653 :
1654 : @param $s1 The string.
1655 : @return The Metaphone key for the given input string.
1656@@ -99,12 +99,12 @@
1657 };
1658
1659 (:~
1660- : Checks if two strings have the same Metaphone key.
1661- :
1662- : <br/>
1663- : Example usage : <code>metaphone("ALEKSANDER", "ALEXANDRE")</code>
1664- : <br/>
1665- : The function invocation in the example above returns : <code>true</code>
1666+ : <p>Checks if two strings have the same Metaphone key.</p>
1667+ : <p/>
1668+ :
1669+ : <p>Example usage : <code>metaphone("ALEKSANDER", "ALEXANDRE")</code></p>
1670+ : <p/>
1671+ : <p>The function invocation in the example above returns : <code>true</code></p>
1672 :
1673 : @param $s1 The first string.
1674 : @param $s2 The second string.
1675
1676=== renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/set-similarity.xq' => 'src/set-similarity.xq'
1677--- src/com/zorba-xquery/www/modules/data-cleaning/set-similarity.xq 2012-09-28 13:34:20 +0000
1678+++ src/set-similarity.xq 2013-07-10 00:51:26 +0000
1679@@ -17,29 +17,29 @@
1680 :)
1681
1682 (:~
1683- : This library module provides similarity functions for comparing sets of XML
1684- : nodes (e.g., sets of XML elements, attributes or atomic values).
1685- :
1686- : These functions are particularly useful for matching near duplicate sets of XML nodes.
1687- :
1688- : The logic contained in this module is not specific to any particular XQuery implementation.
1689+ : <p>This library module provides similarity functions for comparing sets of XML
1690+ : nodes (e.g., sets of XML elements, attributes or atomic values).</p>
1691+ : <p/>
1692+ : <p>These functions are particularly useful for matching near duplicate sets of XML nodes.</p>
1693+ : <p/>
1694+ : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p>
1695 :
1696 : @author Bruno Martins
1697 : @project Zorba/Data Cleaning/Set Similarity
1698 :)
1699
1700-module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity";
1701+module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity";
1702
1703 declare namespace ver = "http://www.zorba-xquery.com/options/versioning";
1704 declare option ver:module-version "2.0";
1705
1706 (:~
1707- : Returns the union between two sets, using the deep-equal() function to compare the XML nodes from the sets.
1708- :
1709- : <br/>
1710- : Example usage : <code> deep-union ( ( "a", "b", "c") , ( "a", "a", <d/> ) ) </code>
1711- : <br/>
1712- : The function invocation in the example above returns : <code> ("a", "b", "c", <d/> ) </code>
1713+ : <p>Returns the union between two sets, using the deep-equal() function to compare the XML nodes from the sets.</p>
1714+ : <p/>
1715+ :
1716+ : <p>Example usage : <code> deep-union ( ( "a", "b", "c") , ( "a", "a", <d/> ) ) </code></p>
1717+ : <p/>
1718+ : <p>The function invocation in the example above returns : <code> ("a", "b", "c", <d/> ) </code></p>
1719 :
1720 : @param $s1 The first set.
1721 : @param $s2 The second set.
1722@@ -54,12 +54,12 @@
1723 };
1724
1725 (:~
1726- : Returns the intersection between two sets, using the deep-equal() function to compare the XML nodes from the sets.
1727- :
1728- : <br/>
1729- : Example usage : <code> deep-intersect ( ( "a", "b", "c") , ( "a", "a", <d/> ) ) </code>
1730- : <br/>
1731- : The function invocation in the example above returns : <code> ("a") </code>
1732+ : <p>Returns the intersection between two sets, using the deep-equal() function to compare the XML nodes from the sets.</p>
1733+ : <p/>
1734+ :
1735+ : <p>Example usage : <code> deep-intersect ( ( "a", "b", "c") , ( "a", "a", <d/> ) ) </code></p>
1736+ : <p/>
1737+ : <p>The function invocation in the example above returns : <code> ("a") </code></p>
1738 :
1739 : @param $s1 The first set.
1740 : @param $s2 The second set.
1741@@ -75,12 +75,12 @@
1742 };
1743
1744 (:~
1745- : Removes exact duplicates from a set, using the deep-equal() function to compare the XML nodes from the sets.
1746- :
1747- : <br/>
1748- : Example usage : <code> distinct ( ( "a", "a", <b/> ) ) </code>
1749- : <br/>
1750- : The function invocation in the example above returns : <code> ("a", <b/> ) </code>
1751+ : <p>Removes exact duplicates from a set, using the deep-equal() function to compare the XML nodes from the sets.</p>
1752+ : <p/>
1753+ :
1754+ : <p>Example usage : <code> distinct ( ( "a", "a", <b/> ) ) </code></p>
1755+ : <p/>
1756+ : <p>The function invocation in the example above returns : <code> ("a", <b/> ) </code></p>
1757 :
1758 : @param $s A set.
1759 : @return The set provided as input without the exact duplicates (i.e., returns the distinct nodes from the set provided as input).
1760@@ -93,14 +93,14 @@
1761 };
1762
1763 (:~
1764- : Returns the overlap coefficient between two sets of XML nodes.
1765- : The overlap coefficient is defined as the shared information between the input sets
1766- : (i.e., the size of the intersection) over the size of the smallest input set.
1767- :
1768- : <br/>
1769- : Example usage : <code> overlap ( ( "a", "b", <c/> ) , ( "a", "a", "b" ) ) </code>
1770- : <br/>
1771- : The function invocation in the example above returns : <code> 1.0 </code>
1772+ : <p>Returns the overlap coefficient between two sets of XML nodes.</p>
1773+ : <p>The overlap coefficient is defined as the shared information between the input sets
1774+ : (i.e., the size of the intersection) over the size of the smallest input set.</p>
1775+ : <p/>
1776+ :
1777+ : <p>Example usage : <code> overlap ( ( "a", "b", <c/> ) , ( "a", "a", "b" ) ) </code></p>
1778+ : <p/>
1779+ : <p>The function invocation in the example above returns : <code> 1.0 </code></p>
1780 :
1781 : @param $s1 The first set.
1782 : @param $s2 The second set.
1783@@ -112,14 +112,14 @@
1784 };
1785
1786 (:~
1787- : Returns the Dice similarity coefficient between two sets of XML nodes.
1788- : The Dice coefficient is defined as defined as twice the shared information between the input sets
1789- : (i.e., the size of the intersection) over the sum of the cardinalities for the input sets.
1790- :
1791- : <br/>
1792- : Example usage : <code> dice ( ( "a", "b", <c/> ) , ( "a", "a", "d") ) </code>
1793- : <br/>
1794- : The function invocation in the example above returns : <code> 0.4 </code>
1795+ : <p>Returns the Dice similarity coefficient between two sets of XML nodes.</p>
1796+ : <p>The Dice coefficient is defined as defined as twice the shared information between the input sets
1797+ : (i.e., the size of the intersection) over the sum of the cardinalities for the input sets.</p>
1798+ : <p/>
1799+ :
1800+ : <p>Example usage : <code> dice ( ( "a", "b", <c/> ) , ( "a", "a", "d") ) </code></p>
1801+ : <p/>
1802+ : <p>The function invocation in the example above returns : <code> 0.4 </code></p>
1803 :
1804 : @param $s1 The first set.
1805 : @param $s2 The second set.
1806@@ -131,14 +131,14 @@
1807 };
1808
1809 (:~
1810- : Returns the Jaccard similarity coefficient between two sets of XML nodes.
1811- : The Jaccard coefficient is defined as the size of the intersection divided by the size of the
1812- : union of the input sets.
1813- :
1814- : <br/>
1815- : Example usage : <code> jaccard ( ( "a", "b", <c/> ) , ( "a", "a", "d") ) </code>
1816- : <br/>
1817- : The function invocation in the example above returns : <code> 0.25 </code>
1818+ : <p>Returns the Jaccard similarity coefficient between two sets of XML nodes.</p>
1819+ : <p>The Jaccard coefficient is defined as the size of the intersection divided by the size of the
1820+ : union of the input sets.</p>
1821+ : <p/>
1822+ :
1823+ : <p>Example usage : <code> jaccard ( ( "a", "b", <c/> ) , ( "a", "a", "d") ) </code></p>
1824+ : <p/>
1825+ : <p>The function invocation in the example above returns : <code> 0.25 </code></p>
1826 :
1827 : @param $s1 The first set.
1828 : @param $s2 The second set.
1829
1830=== renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/token-based-string-similarity.xq' => 'src/token-based-string-similarity.xq'
1831--- src/com/zorba-xquery/www/modules/data-cleaning/token-based-string-similarity.xq 2012-09-28 13:34:20 +0000
1832+++ src/token-based-string-similarity.xq 2013-07-10 00:51:26 +0000
1833@@ -17,40 +17,40 @@
1834 :)
1835
1836 (:~
1837- : This library module provides token-based string similarity functions that view strings
1838- : as sets or multi-sets of tokens and use set-related properties to compute similarity scores.
1839- : The tokens correspond to groups of characters extracted from the strings being compared, such as
1840- : individual words or character n-grams.
1841- :
1842- : These functions are particularly useful for matching near duplicate strings in cases where
1843- : typographical conventions often lead to rearrangement of words (e.g., "John Smith" versus "Smith, John").
1844- :
1845- : The logic contained in this module is not specific to any particular XQuery implementation,
1846+ : <p>This library module provides token-based string similarity functions that view strings
1847+ : as sets or multi-sets of tokens and use set-related properties to compute similarity scores.</p>
1848+ : <p>The tokens correspond to groups of characters extracted from the strings being compared, such as
1849+ : individual words or character n-grams.</p>
1850+ : <p/>
1851+ : <p>These functions are particularly useful for matching near duplicate strings in cases where
1852+ : typographical conventions often lead to rearrangement of words (e.g., "John Smith" versus "Smith, John").</p>
1853+ : <p/>
1854+ : <p>The logic contained in this module is not specific to any particular XQuery implementation,
1855 : although the module requires the trigonometic functions of XQuery 3.0 or a math extension
1856- : function such as sqrt($x as numeric) for computing the square root.
1857+ : function such as sqrt($x as numeric) for computing the square root.</p>
1858 :
1859 : @author Bruno Martins
1860 : @project Zorba/Data Cleaning/Token Based String Similarity
1861 :)
1862
1863-module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
1864+module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
1865
1866-(: In the QizX or Saxon XQuery engines, it is possible to call external functions from the Java math library :)
1867-(: declare namespace math = "java:java.lang.Math"; :)
1868+(: <p>In the QizX or Saxon XQuery engines, it is possible to call external functions from the Java math library :)
1869+(: declare namespace math = "java:java.lang.Math";</p> :)
1870 declare namespace math = "http://www.w3.org/2005/xpath-functions/math";
1871
1872-import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity";
1873+import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity";
1874
1875 declare namespace ver = "http://www.zorba-xquery.com/options/versioning";
1876 declare option ver:module-version "2.0";
1877
1878 (:~
1879- : Returns the individual character n-grams forming a string.
1880- :
1881- : <br/>
1882- : Example usage : <code> ngrams("FLWOR", 2 ) </code>
1883- : <br/>
1884- : The function invocation in the example above returns : <code> ("_F" , "FL" , "LW" , "WO" , "LW" , "WO" , "OR" , "R_") </code>
1885+ : <p>Returns the individual character n-grams forming a string.</p>
1886+ : <p/>
1887+ :
1888+ : <p>Example usage : <code> ngrams("FLWOR", 2 ) </code></p>
1889+ : <p/>
1890+ : <p>The function invocation in the example above returns : <code> ("_F" , "FL" , "LW" , "WO" , "LW" , "WO" , "OR" , "R_") </code></p>
1891 :
1892 : @param $s The input string.
1893 : @param $n The number of characters to consider when extracting n-grams.
1894@@ -73,13 +73,13 @@
1895 };
1896
1897 (:~
1898- : Auxiliary function for computing the cosine similarity coefficient between strings,
1899- : using stringdescriptors based on sets of character n-grams or sets of tokens extracted from two strings.
1900- :
1901- : <br/>
1902- : Example usage : <code> cosine( ("aa","bb") , ("bb","aa")) </code>
1903- : <br/>
1904- : The function invocation in the example above returns : <code> 1.0 </code>
1905+ : <p>Auxiliary function for computing the cosine similarity coefficient between strings,
1906+ : using stringdescriptors based on sets of character n-grams or sets of tokens extracted from two strings.</p>
1907+ : <p/>
1908+ :
1909+ : <p>Example usage : <code> cosine( ("aa","bb") , ("bb","aa")) </code></p>
1910+ : <p/>
1911+ : <p>The function invocation in the example above returns : <code> 1.0 </code></p>
1912 :
1913 : @param $desc1 The descriptor for the first string.
1914 : @param $desc2 The descriptor for the second string.
1915@@ -97,12 +97,12 @@
1916 };
1917
1918 (:~
1919- : Returns the Dice similarity coefficient between sets of character n-grams extracted from two strings.
1920- :
1921- : <br/>
1922- : Example usage : <code> dice-ngrams("DWAYNE", "DUANE", 2 ) </code>
1923- : <br/>
1924- : The function invocation in the example above returns : <code> 0.4615384615384616 </code>
1925+ : <p>Returns the Dice similarity coefficient between sets of character n-grams extracted from two strings.</p>
1926+ : <p/>
1927+ :
1928+ : <p>Example usage : <code> dice-ngrams("DWAYNE", "DUANE", 2 ) </code></p>
1929+ : <p/>
1930+ : <p>The function invocation in the example above returns : <code> 0.4615384615384616 </code></p>
1931 :
1932 : @param $s1 The first string.
1933 : @param $s2 The second string.
1934@@ -115,12 +115,12 @@
1935 };
1936
1937 (:~
1938- : Returns the overlap similarity coefficient between sets of character n-grams extracted from two strings.
1939- :
1940- : <br/>
1941- : Example usage : <code> overlap-ngrams("DWAYNE", "DUANE", 2 ) </code>
1942- : <br/>
1943- : The function invocation in the example above returns : <code> 0.5 </code>
1944+ : <p>Returns the overlap similarity coefficient between sets of character n-grams extracted from two strings.</p>
1945+ : <p/>
1946+ :
1947+ : <p>Example usage : <code> overlap-ngrams("DWAYNE", "DUANE", 2 ) </code></p>
1948+ : <p/>
1949+ : <p>The function invocation in the example above returns : <code> 0.5 </code></p>
1950 :
1951 : @param $s1 The first string.
1952 : @param $s2 The second string.
1953@@ -133,12 +133,12 @@
1954 };
1955
1956 (:~
1957- : Returns the Jaccard similarity coefficient between sets of character n-grams extracted from two strings.
1958- :
1959- : <br/>
1960- : Example usage : <code> jaccard-ngrams("DWAYNE", "DUANE", 2 ) </code>
1961- : <br/>
1962- : The function invocation in the example above returns : <code> 0.3 </code>
1963+ : <p>Returns the Jaccard similarity coefficient between sets of character n-grams extracted from two strings.</p>
1964+ : <p/>
1965+ :
1966+ : <p>Example usage : <code> jaccard-ngrams("DWAYNE", "DUANE", 2 ) </code></p>
1967+ : <p/>
1968+ : <p>The function invocation in the example above returns : <code> 0.3 </code></p>
1969 :
1970 : @param $s1 The first string.
1971 : @param $s2 The second string.
1972@@ -151,14 +151,14 @@
1973 };
1974
1975 (:~
1976- : Returns the cosine similarity coefficient between sets of character n-grams extracted from two strings.
1977- : The n-grams from each string are weighted according to their occurence frequency (i.e., weighted according to
1978- : the term-frequency heuristic from Information Retrieval).
1979- :
1980- : <br/>
1981- : Example usage : <code> cosine-ngrams("DWAYNE", "DUANE", 2 ) </code>
1982- : <br/>
1983- : The function invocation in the example above returns : <code> 0.2401922307076307 </code>
1984+ : <p>Returns the cosine similarity coefficient between sets of character n-grams extracted from two strings.</p>
1985+ : <p>The n-grams from each string are weighted according to their occurence frequency (i.e., weighted according to
1986+ : the term-frequency heuristic from Information Retrieval).</p>
1987+ : <p/>
1988+ :
1989+ : <p>Example usage : <code> cosine-ngrams("DWAYNE", "DUANE", 2 ) </code></p>
1990+ : <p/>
1991+ : <p>The function invocation in the example above returns : <code> 0.2401922307076307 </code></p>
1992 :
1993 : @param $s1 The first string.
1994 : @param $s2 The second string.
1995@@ -173,12 +173,12 @@
1996 };
1997
1998 (:~
1999- : Returns the Dice similarity coefficient between sets of tokens extracted from two strings.
2000- :
2001- : <br/>
2002- : Example usage : <code> dice-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code>
2003- : <br/>
2004- : The function invocation in the example above returns : <code> 0.4 </code>
2005+ : <p>Returns the Dice similarity coefficient between sets of tokens extracted from two strings.</p>
2006+ : <p/>
2007+ :
2008+ : <p>Example usage : <code> dice-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code></p>
2009+ : <p/>
2010+ : <p>The function invocation in the example above returns : <code> 0.4 </code></p>
2011 :
2012 : @param $s1 The first string.
2013 : @param $s2 The second string.
2014@@ -191,12 +191,12 @@
2015 };
2016
2017 (:~
2018- : Returns the overlap similarity coefficient between sets of tokens extracted from two strings.
2019- :
2020- : <br/>
2021- : Example usage : <code> overlap-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code>
2022- : <br/>
2023- : The function invocation in the example above returns : <code> 0.5 </code>
2024+ : <p>Returns the overlap similarity coefficient between sets of tokens extracted from two strings.</p>
2025+ : <p/>
2026+ :
2027+ : <p>Example usage : <code> overlap-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code></p>
2028+ : <p/>
2029+ : <p>The function invocation in the example above returns : <code> 0.5 </code></p>
2030 :
2031 : @param $s1 The first string.
2032 : @param $s2 The second string.
2033@@ -209,12 +209,12 @@
2034 };
2035
2036 (:~
2037- : Returns the Jaccard similarity coefficient between sets of tokens extracted from two strings.
2038- :
2039- : <br/>
2040- : Example usage : <code> jaccard-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code>
2041- : <br/>
2042- : The function invocation in the example above returns : <code> 0.25 </code>
2043+ : <p>Returns the Jaccard similarity coefficient between sets of tokens extracted from two strings.</p>
2044+ : <p/>
2045+ :
2046+ : <p>Example usage : <code> jaccard-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code></p>
2047+ : <p/>
2048+ : <p>The function invocation in the example above returns : <code> 0.25 </code></p>
2049 :
2050 : @param $s1 The first string.
2051 : @param $s2 The second string.
2052@@ -227,14 +227,14 @@
2053 };
2054
2055 (:~
2056- : Returns the cosine similarity coefficient between sets of tokens extracted from two strings. The tokens
2057+ : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings. The tokens
2058 : from each string are weighted according to their occurence frequency (i.e., weighted according to the
2059- : term-frequency heuristic from Information Retrieval).
2060- :
2061- : <br/>
2062- : Example usage : <code> cosine-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code>
2063- : <br/>
2064- : The function invocation in the example above returns : <code> 0.408248290463863 </code>
2065+ : term-frequency heuristic from Information Retrieval).</p>
2066+ : <p/>
2067+ :
2068+ : <p>Example usage : <code> cosine-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code></p>
2069+ : <p/>
2070+ : <p>The function invocation in the example above returns : <code> 0.408248290463863 </code></p>
2071 :
2072 : @param $s1 The first string.
2073 : @param $s2 The second string.
2074
2075=== renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/whitepages_schema.xsd' => 'src/whitepages_schema.xsd'
2076=== modified file 'test/Queries/data-cleaning/character-based-string-similarity/edit-distance.xq'
2077--- test/Queries/data-cleaning/character-based-string-similarity/edit-distance.xq 2011-07-26 20:10:34 +0000
2078+++ test/Queries/data-cleaning/character-based-string-similarity/edit-distance.xq 2013-07-10 00:51:26 +0000
2079@@ -1,3 +1,3 @@
2080-import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity";
2081+import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity";
2082
2083 simc:edit-distance("FLWOR", "FLOWER")
2084
2085=== modified file 'test/Queries/data-cleaning/character-based-string-similarity/jaro-winkler.xq'
2086--- test/Queries/data-cleaning/character-based-string-similarity/jaro-winkler.xq 2011-07-26 20:10:34 +0000
2087+++ test/Queries/data-cleaning/character-based-string-similarity/jaro-winkler.xq 2013-07-10 00:51:26 +0000
2088@@ -1,3 +1,3 @@
2089-import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity";
2090+import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity";
2091
2092 simc:jaro-winkler("DWAYNE", "DUANE", 4, 0.1 )
2093
2094=== modified file 'test/Queries/data-cleaning/character-based-string-similarity/jaro.xq'
2095--- test/Queries/data-cleaning/character-based-string-similarity/jaro.xq 2011-07-26 20:10:34 +0000
2096+++ test/Queries/data-cleaning/character-based-string-similarity/jaro.xq 2013-07-10 00:51:26 +0000
2097@@ -1,3 +1,3 @@
2098-import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity";
2099+import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity";
2100
2101 simc:jaro("FLWOR Found.", "FLWOR Foundation")
2102
2103=== modified file 'test/Queries/data-cleaning/character-based-string-similarity/needleman-wunsch.xq'
2104--- test/Queries/data-cleaning/character-based-string-similarity/needleman-wunsch.xq 2011-07-26 20:10:34 +0000
2105+++ test/Queries/data-cleaning/character-based-string-similarity/needleman-wunsch.xq 2013-07-10 00:51:26 +0000
2106@@ -1,3 +1,3 @@
2107-import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity";
2108+import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity";
2109
2110 simc:needleman-wunsch("KAK", "KQRK", 1, 1)
2111
2112=== modified file 'test/Queries/data-cleaning/consolidation/least-attributes.xq'
2113--- test/Queries/data-cleaning/consolidation/least-attributes.xq 2011-07-19 19:12:03 +0000
2114+++ test/Queries/data-cleaning/consolidation/least-attributes.xq 2013-07-10 00:51:26 +0000
2115@@ -1,3 +1,3 @@
2116-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2117+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2118
2119 con:least-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) )
2120
2121=== modified file 'test/Queries/data-cleaning/consolidation/least-distinct-attributes.xq'
2122--- test/Queries/data-cleaning/consolidation/least-distinct-attributes.xq 2011-07-19 19:12:03 +0000
2123+++ test/Queries/data-cleaning/consolidation/least-distinct-attributes.xq 2013-07-10 00:51:26 +0000
2124@@ -1,3 +1,3 @@
2125-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2126+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2127
2128 con:least-distinct-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) )
2129
2130=== modified file 'test/Queries/data-cleaning/consolidation/least-distinct-elements.xq'
2131--- test/Queries/data-cleaning/consolidation/least-distinct-elements.xq 2011-07-19 19:12:03 +0000
2132+++ test/Queries/data-cleaning/consolidation/least-distinct-elements.xq 2013-07-10 00:51:26 +0000
2133@@ -1,3 +1,3 @@
2134-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2135+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2136
2137 con:least-distinct-elements( ( <a><b/></a>, <b><c/></b>, <d/>) )
2138
2139=== modified file 'test/Queries/data-cleaning/consolidation/least-distinct-nodes.xq'
2140--- test/Queries/data-cleaning/consolidation/least-distinct-nodes.xq 2011-07-19 19:12:03 +0000
2141+++ test/Queries/data-cleaning/consolidation/least-distinct-nodes.xq 2013-07-10 00:51:26 +0000
2142@@ -1,3 +1,3 @@
2143-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2144+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2145
2146 con:least-distinct-nodes( ( <a><b/></a>, <b><c/></b>, <d/>) )
2147
2148=== modified file 'test/Queries/data-cleaning/consolidation/least-elements.xq'
2149--- test/Queries/data-cleaning/consolidation/least-elements.xq 2011-07-19 19:12:03 +0000
2150+++ test/Queries/data-cleaning/consolidation/least-elements.xq 2013-07-10 00:51:26 +0000
2151@@ -1,3 +1,3 @@
2152-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2153+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2154
2155 con:least-elements( ( <a><b/></a>, <b><c/></b>, <d/>) )
2156
2157=== modified file 'test/Queries/data-cleaning/consolidation/least-nodes.xq'
2158--- test/Queries/data-cleaning/consolidation/least-nodes.xq 2011-07-19 19:12:03 +0000
2159+++ test/Queries/data-cleaning/consolidation/least-nodes.xq 2013-07-10 00:51:26 +0000
2160@@ -1,3 +1,3 @@
2161-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2162+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2163
2164 con:least-nodes( ( <a><b/></a>, <b><c/></b>, <d/>) )
2165
2166=== modified file 'test/Queries/data-cleaning/consolidation/least-similar-edit-distance.xq'
2167--- test/Queries/data-cleaning/consolidation/least-similar-edit-distance.xq 2011-07-19 19:12:03 +0000
2168+++ test/Queries/data-cleaning/consolidation/least-similar-edit-distance.xq 2013-07-10 00:51:26 +0000
2169@@ -1,3 +1,3 @@
2170-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2171+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2172
2173 con:least-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" )
2174
2175=== modified file 'test/Queries/data-cleaning/consolidation/least-tokens.xq'
2176--- test/Queries/data-cleaning/consolidation/least-tokens.xq 2011-07-19 19:12:03 +0000
2177+++ test/Queries/data-cleaning/consolidation/least-tokens.xq 2013-07-10 00:51:26 +0000
2178@@ -1,3 +1,3 @@
2179-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2180+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2181
2182 con:least-tokens( ( "a b c", "a b", "a"), " +" )
2183
2184=== modified file 'test/Queries/data-cleaning/consolidation/leastfrequent_1.xq'
2185--- test/Queries/data-cleaning/consolidation/leastfrequent_1.xq 2011-07-19 19:12:03 +0000
2186+++ test/Queries/data-cleaning/consolidation/leastfrequent_1.xq 2013-07-10 00:51:26 +0000
2187@@ -1,3 +1,3 @@
2188-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2189+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2190
2191 con:least-frequent( ( "a", "a", "b") )
2192
2193=== modified file 'test/Queries/data-cleaning/consolidation/longest_1.xq'
2194--- test/Queries/data-cleaning/consolidation/longest_1.xq 2011-07-19 19:12:03 +0000
2195+++ test/Queries/data-cleaning/consolidation/longest_1.xq 2013-07-10 00:51:26 +0000
2196@@ -1,3 +1,3 @@
2197-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2198+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2199
2200 con:longest( ( "a", "aa", "aaa") )
2201
2202=== modified file 'test/Queries/data-cleaning/consolidation/matching_1.xq'
2203--- test/Queries/data-cleaning/consolidation/matching_1.xq 2011-07-19 19:12:03 +0000
2204+++ test/Queries/data-cleaning/consolidation/matching_1.xq 2013-07-10 00:51:26 +0000
2205@@ -1,3 +1,3 @@
2206-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2207+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2208
2209 con:matching( ( "a A b", "c AAA d", "e BB f"), "A+" )
2210
2211=== modified file 'test/Queries/data-cleaning/consolidation/most-attributes.xq'
2212--- test/Queries/data-cleaning/consolidation/most-attributes.xq 2011-07-19 19:12:03 +0000
2213+++ test/Queries/data-cleaning/consolidation/most-attributes.xq 2013-07-10 00:51:26 +0000
2214@@ -1,3 +1,3 @@
2215-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2216+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2217
2218 con:most-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) )
2219
2220=== modified file 'test/Queries/data-cleaning/consolidation/most-distinct-attributes.xq'
2221--- test/Queries/data-cleaning/consolidation/most-distinct-attributes.xq 2011-07-19 19:12:03 +0000
2222+++ test/Queries/data-cleaning/consolidation/most-distinct-attributes.xq 2013-07-10 00:51:26 +0000
2223@@ -1,3 +1,3 @@
2224-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2225+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2226
2227 con:most-distinct-attributes( ( <a att1="a1" att2="a2" att3="a3"/>, <a att1="a1" att2="a2"><b att2="a2" /></a>, <c/> ) )
2228
2229=== modified file 'test/Queries/data-cleaning/consolidation/most-distinct-elements.xq'
2230--- test/Queries/data-cleaning/consolidation/most-distinct-elements.xq 2011-07-19 19:12:03 +0000
2231+++ test/Queries/data-cleaning/consolidation/most-distinct-elements.xq 2013-07-10 00:51:26 +0000
2232@@ -1,3 +1,3 @@
2233-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2234+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2235
2236 con:most-distinct-elements( ( <a><b/><c/><d/></a>, <a><b/><b/><c/></a>, <a/> ) )
2237
2238=== modified file 'test/Queries/data-cleaning/consolidation/most-distinct-nodes.xq'
2239--- test/Queries/data-cleaning/consolidation/most-distinct-nodes.xq 2011-07-19 19:12:03 +0000
2240+++ test/Queries/data-cleaning/consolidation/most-distinct-nodes.xq 2013-07-10 00:51:26 +0000
2241@@ -1,3 +1,3 @@
2242-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2243+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2244
2245 con:most-distinct-nodes( ( <a><b/></a>, <a><a/></a>, <b/>) )
2246
2247=== modified file 'test/Queries/data-cleaning/consolidation/most-elements.xq'
2248--- test/Queries/data-cleaning/consolidation/most-elements.xq 2011-07-19 19:12:03 +0000
2249+++ test/Queries/data-cleaning/consolidation/most-elements.xq 2013-07-10 00:51:26 +0000
2250@@ -1,3 +1,3 @@
2251-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2252+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2253
2254 con:most-elements( ( <a><b/></a>, <a/>, <b/>) )
2255
2256=== modified file 'test/Queries/data-cleaning/consolidation/most-frequent.xq'
2257--- test/Queries/data-cleaning/consolidation/most-frequent.xq 2011-07-19 19:12:03 +0000
2258+++ test/Queries/data-cleaning/consolidation/most-frequent.xq 2013-07-10 00:51:26 +0000
2259@@ -1,3 +1,3 @@
2260-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2261+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2262
2263 con:most-frequent( ( "a", "a", "b") )
2264
2265=== modified file 'test/Queries/data-cleaning/consolidation/most-nodes.xq'
2266--- test/Queries/data-cleaning/consolidation/most-nodes.xq 2011-07-19 19:12:03 +0000
2267+++ test/Queries/data-cleaning/consolidation/most-nodes.xq 2013-07-10 00:51:26 +0000
2268@@ -1,3 +1,3 @@
2269-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2270+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2271
2272 con:most-nodes( ( <a><b/></a>, <a/>, <b/>) )
2273
2274=== modified file 'test/Queries/data-cleaning/consolidation/most-similar-edit-distance.xq'
2275--- test/Queries/data-cleaning/consolidation/most-similar-edit-distance.xq 2011-07-19 19:12:03 +0000
2276+++ test/Queries/data-cleaning/consolidation/most-similar-edit-distance.xq 2013-07-10 00:51:26 +0000
2277@@ -1,3 +1,3 @@
2278-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2279+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2280
2281 con:most-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" )
2282
2283=== modified file 'test/Queries/data-cleaning/consolidation/most-tokens.xq'
2284--- test/Queries/data-cleaning/consolidation/most-tokens.xq 2011-07-19 19:12:03 +0000
2285+++ test/Queries/data-cleaning/consolidation/most-tokens.xq 2013-07-10 00:51:26 +0000
2286@@ -1,3 +1,3 @@
2287-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2288+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2289
2290 con:most-tokens( ( "a b c", "a b", "a"), " +" )
2291
2292=== modified file 'test/Queries/data-cleaning/consolidation/shortest_1.xq'
2293--- test/Queries/data-cleaning/consolidation/shortest_1.xq 2011-07-19 19:12:03 +0000
2294+++ test/Queries/data-cleaning/consolidation/shortest_1.xq 2013-07-10 00:51:26 +0000
2295@@ -1,3 +1,3 @@
2296-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2297+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2298
2299 con:shortest( ( "a", "aa", "aaa") )
2300
2301=== modified file 'test/Queries/data-cleaning/consolidation/superstring_1.xq'
2302--- test/Queries/data-cleaning/consolidation/superstring_1.xq 2011-07-19 19:12:03 +0000
2303+++ test/Queries/data-cleaning/consolidation/superstring_1.xq 2013-07-10 00:51:26 +0000
2304@@ -1,3 +1,3 @@
2305-import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation";
2306+import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation";
2307
2308 con:superstring( ( "aaa bbb ccc", "aaa bbb", "aaa ddd", "eee fff" ) )
2309
2310=== modified file 'test/Queries/data-cleaning/conversion/address-from-geocode.xq'
2311--- test/Queries/data-cleaning/conversion/address-from-geocode.xq 2012-10-03 13:53:12 +0000
2312+++ test/Queries/data-cleaning/conversion/address-from-geocode.xq 2013-07-10 00:51:26 +0000
2313@@ -1,4 +1,4 @@
2314-import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
2315+import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
2316
2317 for $s in conversion:address-from-geocode ( 38.725735 , -9.15021 )
2318 return fn:lower-case($s)
2319
2320=== modified file 'test/Queries/data-cleaning/conversion/address-from-phone.xq'
2321--- test/Queries/data-cleaning/conversion/address-from-phone.xq 2012-02-05 22:52:01 +0000
2322+++ test/Queries/data-cleaning/conversion/address-from-phone.xq 2013-07-10 00:51:26 +0000
2323@@ -1,4 +1,4 @@
2324-import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
2325+import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
2326
2327 let $arg := conversion:address-from-phone('8654582358')[1]
2328 let $result :=
2329
2330=== modified file 'test/Queries/data-cleaning/conversion/address-from-user.xq'
2331--- test/Queries/data-cleaning/conversion/address-from-user.xq 2012-02-05 22:52:01 +0000
2332+++ test/Queries/data-cleaning/conversion/address-from-user.xq 2013-07-10 00:51:26 +0000
2333@@ -1,4 +1,4 @@
2334-import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
2335+import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
2336
2337 let $arg := conversion:address-from-user('Maria Lurdes')[1]
2338 let $result :=
2339
2340=== modified file 'test/Queries/data-cleaning/conversion/currency-convert.xq'
2341--- test/Queries/data-cleaning/conversion/currency-convert.xq 2011-07-19 19:12:03 +0000
2342+++ test/Queries/data-cleaning/conversion/currency-convert.xq 2013-07-10 00:51:26 +0000
2343@@ -1,3 +1,3 @@
2344-import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
2345+import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
2346
2347 conversion:currency-convert ( 1, "USD", "EUR", "2011-01-18" )
2348
2349=== modified file 'test/Queries/data-cleaning/conversion/geocode-from-address.xq'
2350--- test/Queries/data-cleaning/conversion/geocode-from-address.xq 2011-11-08 21:16:29 +0000
2351+++ test/Queries/data-cleaning/conversion/geocode-from-address.xq 2013-07-10 00:51:26 +0000
2352@@ -1,4 +1,4 @@
2353-import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
2354+import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
2355
2356 let $geocode := conversion:geocode-from-address ( ("Lisboa", "Portugal") )
2357 for $result in $geocode
2358
2359=== modified file 'test/Queries/data-cleaning/conversion/phone-from-address.xq'
2360--- test/Queries/data-cleaning/conversion/phone-from-address.xq 2012-02-05 22:52:01 +0000
2361+++ test/Queries/data-cleaning/conversion/phone-from-address.xq 2013-07-10 00:51:26 +0000
2362@@ -1,4 +1,4 @@
2363-import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
2364+import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
2365
2366 let $arg := conversion:phone-from-address('5655 E Gaskill Rd, Willcox, AZ, US')[1]
2367 let $result :=
2368
2369=== modified file 'test/Queries/data-cleaning/conversion/phone-from-user.xq'
2370--- test/Queries/data-cleaning/conversion/phone-from-user.xq 2012-02-05 22:52:01 +0000
2371+++ test/Queries/data-cleaning/conversion/phone-from-user.xq 2013-07-10 00:51:26 +0000
2372@@ -1,4 +1,4 @@
2373-import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
2374+import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
2375
2376 let $arg := conversion:phone-from-user ('Maria Lurdes')[1]
2377 let $result :=
2378
2379=== modified file 'test/Queries/data-cleaning/conversion/unit-convert.xq'
2380--- test/Queries/data-cleaning/conversion/unit-convert.xq 2011-07-19 19:12:03 +0000
2381+++ test/Queries/data-cleaning/conversion/unit-convert.xq 2013-07-10 00:51:26 +0000
2382@@ -1,3 +1,3 @@
2383-import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
2384+import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
2385
2386 conversion:unit-convert ( 1 , "Distance", "mile", "kilometer" )
2387
2388=== modified file 'test/Queries/data-cleaning/conversion/user-from-address.xq'
2389--- test/Queries/data-cleaning/conversion/user-from-address.xq 2012-02-05 22:52:01 +0000
2390+++ test/Queries/data-cleaning/conversion/user-from-address.xq 2013-07-10 00:51:26 +0000
2391@@ -1,4 +1,4 @@
2392-import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
2393+import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
2394
2395 let $arg := conversion:user-from-address('5655 E Gaskill Rd, Willcox, AZ, US')[1]
2396 let $result :=
2397
2398=== modified file 'test/Queries/data-cleaning/conversion/user-from-phone.xq'
2399--- test/Queries/data-cleaning/conversion/user-from-phone.xq 2012-02-05 22:52:01 +0000
2400+++ test/Queries/data-cleaning/conversion/user-from-phone.xq 2013-07-10 00:51:26 +0000
2401@@ -1,4 +1,4 @@
2402-import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion";
2403+import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion";
2404
2405 let $arg := conversion:user-from-phone ('8654582358')[1]
2406 let $result :=
2407
2408=== modified file 'test/Queries/data-cleaning/hybrid-string-similarity/monge-elkan-jaro-winkler.xq'
2409--- test/Queries/data-cleaning/hybrid-string-similarity/monge-elkan-jaro-winkler.xq 2011-07-19 19:12:03 +0000
2410+++ test/Queries/data-cleaning/hybrid-string-similarity/monge-elkan-jaro-winkler.xq 2013-07-10 00:51:26 +0000
2411@@ -1,3 +1,3 @@
2412-import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity";
2413+import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity";
2414
2415 simh:monge-elkan-jaro-winkler("Comput. Sci. and Eng. Dept., University of California, San Diego", "Department of Computer Scinece, Univ. Calif., San Diego", 4, 0.1)
2416
2417=== modified file 'test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-edit-distance.xq'
2418--- test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-edit-distance.xq 2011-07-19 19:12:03 +0000
2419+++ test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-edit-distance.xq 2013-07-10 00:51:26 +0000
2420@@ -1,3 +1,3 @@
2421-import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity";
2422+import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity";
2423
2424 simh:soft-cosine-tokens-edit-distance("The FLWOR Foundation", "FLWOR Found.", " +", 0 )
2425
2426=== modified file 'test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro-winkler.xq'
2427--- test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro-winkler.xq 2011-07-19 19:12:03 +0000
2428+++ test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro-winkler.xq 2013-07-10 00:51:26 +0000
2429@@ -1,3 +1,3 @@
2430-import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity";
2431+import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity";
2432
2433 simh:soft-cosine-tokens-jaro-winkler("The FLWOR Foundation", "FLWOR Found.", " +", 1, 4, 0.1 )
2434
2435=== modified file 'test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro.xq'
2436--- test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro.xq 2011-07-19 19:12:03 +0000
2437+++ test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro.xq 2013-07-10 00:51:26 +0000
2438@@ -1,3 +1,3 @@
2439-import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity";
2440+import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity";
2441
2442 simh:soft-cosine-tokens-jaro("The FLWOR Foundation", "FLWOR Found.", " +", 1 )
2443
2444=== modified file 'test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-metaphone.xq'
2445--- test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-metaphone.xq 2011-07-19 19:12:03 +0000
2446+++ test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-metaphone.xq 2013-07-10 00:51:26 +0000
2447@@ -1,3 +1,3 @@
2448-import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity";
2449+import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity";
2450
2451 simh:soft-cosine-tokens-metaphone("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +" )
2452
2453=== modified file 'test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-soundex.xq'
2454--- test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-soundex.xq 2011-07-19 19:12:03 +0000
2455+++ test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-soundex.xq 2013-07-10 00:51:26 +0000
2456@@ -1,3 +1,3 @@
2457-import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity";
2458+import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity";
2459
2460 simh:soft-cosine-tokens-soundex("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +")
2461
2462=== modified file 'test/Queries/data-cleaning/normalization/normalize-address.xq'
2463--- test/Queries/data-cleaning/normalization/normalize-address.xq 2011-07-19 19:12:03 +0000
2464+++ test/Queries/data-cleaning/normalization/normalize-address.xq 2013-07-10 00:51:26 +0000
2465@@ -1,3 +1,3 @@
2466-import module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization";
2467+import module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization";
2468
2469 normalization:normalize-address ( ( 'Marques de Pombal' , 'Lisboa' ) )
2470
2471=== modified file 'test/Queries/data-cleaning/normalization/to-date.xq'
2472--- test/Queries/data-cleaning/normalization/to-date.xq 2011-07-19 19:12:03 +0000
2473+++ test/Queries/data-cleaning/normalization/to-date.xq 2013-07-10 00:51:26 +0000
2474@@ -1,3 +1,3 @@
2475-import module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization";
2476+import module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization";
2477
2478 normalization:to-date ( "24OCT2002" , "%d%b%Y" )
2479
2480=== modified file 'test/Queries/data-cleaning/normalization/to-dateTime.spec'
2481--- test/Queries/data-cleaning/normalization/to-dateTime.spec 2011-07-26 20:10:34 +0000
2482+++ test/Queries/data-cleaning/normalization/to-dateTime.spec 2013-07-10 00:51:26 +0000
2483@@ -1,1 +1,1 @@
2484-Error: http://www.zorba-xquery.com/modules/data-cleaning/normalization:notsupported
2485+Error: http://zorba.io/modules/data-cleaning/normalization:NOTSUPPORTED
2486
2487=== modified file 'test/Queries/data-cleaning/normalization/to-dateTime.xq'
2488--- test/Queries/data-cleaning/normalization/to-dateTime.xq 2011-07-26 20:10:34 +0000
2489+++ test/Queries/data-cleaning/normalization/to-dateTime.xq 2013-07-10 00:51:26 +0000
2490@@ -1,3 +1,3 @@
2491-import module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization";
2492+import module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization";
2493
2494 normalization:to-dateTime( "24OCT2002 21:22" , "%d%b%Y %H%M" )
2495
2496=== modified file 'test/Queries/data-cleaning/normalization/to-time.spec'
2497--- test/Queries/data-cleaning/normalization/to-time.spec 2011-07-19 19:12:03 +0000
2498+++ test/Queries/data-cleaning/normalization/to-time.spec 2013-07-10 00:51:26 +0000
2499@@ -1,1 +1,1 @@
2500-Error: http://www.zorba-xquery.com/modules/data-cleaning/normalization:notsupported
2501+Error: http://zorba.io/modules/data-cleaning/normalization:NOTSUPPORTED
2502
2503=== modified file 'test/Queries/data-cleaning/normalization/to-time.xq'
2504--- test/Queries/data-cleaning/normalization/to-time.xq 2011-07-19 19:12:03 +0000
2505+++ test/Queries/data-cleaning/normalization/to-time.xq 2013-07-10 00:51:26 +0000
2506@@ -1,3 +1,3 @@
2507-import module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization";
2508+import module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization";
2509
2510 normalization:to-time ( "09 hours 10 minutes" , "%H hours %M minutes" )
2511
2512=== modified file 'test/Queries/data-cleaning/phonetic-string-similarity/metaphone-key.xq'
2513--- test/Queries/data-cleaning/phonetic-string-similarity/metaphone-key.xq 2011-07-19 19:12:03 +0000
2514+++ test/Queries/data-cleaning/phonetic-string-similarity/metaphone-key.xq 2013-07-10 00:51:26 +0000
2515@@ -1,3 +1,3 @@
2516-import module namespace simp = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity";
2517+import module namespace simp = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity";
2518
2519 simp:metaphone-key("ALEKSANDER")
2520
2521=== modified file 'test/Queries/data-cleaning/phonetic-string-similarity/metaphone.xq'
2522--- test/Queries/data-cleaning/phonetic-string-similarity/metaphone.xq 2011-07-19 19:12:03 +0000
2523+++ test/Queries/data-cleaning/phonetic-string-similarity/metaphone.xq 2013-07-10 00:51:26 +0000
2524@@ -1,3 +1,3 @@
2525-import module namespace simp = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity";
2526+import module namespace simp = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity";
2527
2528 simp:metaphone-key("ALEKSANDER")
2529
2530=== modified file 'test/Queries/data-cleaning/phonetic-string-similarity/soundex-key.xq'
2531--- test/Queries/data-cleaning/phonetic-string-similarity/soundex-key.xq 2013-05-18 00:38:53 +0000
2532+++ test/Queries/data-cleaning/phonetic-string-similarity/soundex-key.xq 2013-07-10 00:51:26 +0000
2533@@ -1,4 +1,4 @@
2534-import module namespace simpl = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity";
2535+import module namespace simpl = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity";
2536
2537 simpl:soundex-key("Robert"),
2538 simpl:soundex-key("BARHAM") eq "B650" and
2539
2540=== modified file 'test/Queries/data-cleaning/phonetic-string-similarity/soundex.xq'
2541--- test/Queries/data-cleaning/phonetic-string-similarity/soundex.xq 2011-07-19 19:12:03 +0000
2542+++ test/Queries/data-cleaning/phonetic-string-similarity/soundex.xq 2013-07-10 00:51:26 +0000
2543@@ -1,3 +1,3 @@
2544-import module namespace simp = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity";
2545+import module namespace simp = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity";
2546
2547 simp:soundex( "Robert" , "Rupert" )
2548
2549=== modified file 'test/Queries/data-cleaning/set-similarity/deep-intersect.xq'
2550--- test/Queries/data-cleaning/set-similarity/deep-intersect.xq 2011-07-19 19:12:03 +0000
2551+++ test/Queries/data-cleaning/set-similarity/deep-intersect.xq 2013-07-10 00:51:26 +0000
2552@@ -1,3 +1,3 @@
2553-import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity";
2554+import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity";
2555
2556 set:deep-intersect ( ( "a", "b", "c") , ( "a", "a", <d/> ) )
2557
2558=== modified file 'test/Queries/data-cleaning/set-similarity/deep-union.xq'
2559--- test/Queries/data-cleaning/set-similarity/deep-union.xq 2011-07-19 19:12:03 +0000
2560+++ test/Queries/data-cleaning/set-similarity/deep-union.xq 2013-07-10 00:51:26 +0000
2561@@ -1,3 +1,3 @@
2562-import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity";
2563+import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity";
2564
2565 set:deep-union ( ( "a", "b", "c") , ( "a", "a", <d/> ) )
2566
2567=== modified file 'test/Queries/data-cleaning/set-similarity/dice.xq'
2568--- test/Queries/data-cleaning/set-similarity/dice.xq 2011-07-19 19:12:03 +0000
2569+++ test/Queries/data-cleaning/set-similarity/dice.xq 2013-07-10 00:51:26 +0000
2570@@ -1,3 +1,3 @@
2571-import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity";
2572+import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity";
2573
2574 set:dice ( ( "a", "b", <c/> ) , ( "a", "a", "d") )
2575
2576=== modified file 'test/Queries/data-cleaning/set-similarity/distinct.xq'
2577--- test/Queries/data-cleaning/set-similarity/distinct.xq 2011-07-19 19:12:03 +0000
2578+++ test/Queries/data-cleaning/set-similarity/distinct.xq 2013-07-10 00:51:26 +0000
2579@@ -1,3 +1,3 @@
2580-import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity";
2581+import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity";
2582
2583 set:distinct (( "a", "a", <b/> ))
2584
2585=== modified file 'test/Queries/data-cleaning/set-similarity/jaccard.xq'
2586--- test/Queries/data-cleaning/set-similarity/jaccard.xq 2011-07-19 19:12:03 +0000
2587+++ test/Queries/data-cleaning/set-similarity/jaccard.xq 2013-07-10 00:51:26 +0000
2588@@ -1,3 +1,3 @@
2589-import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity";
2590+import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity";
2591
2592 set:jaccard ( ( "a", "b", <c/> ) , ( "a", "a", "d") )
2593
2594=== modified file 'test/Queries/data-cleaning/set-similarity/overlap.xq'
2595--- test/Queries/data-cleaning/set-similarity/overlap.xq 2011-07-19 19:12:03 +0000
2596+++ test/Queries/data-cleaning/set-similarity/overlap.xq 2013-07-10 00:51:26 +0000
2597@@ -1,3 +1,3 @@
2598-import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity";
2599+import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity";
2600
2601 set:overlap ( ( "a", "b", <c/> ) , ( "a", "a", "b" ) )
2602
2603=== modified file 'test/Queries/data-cleaning/token-based-string-similarity/cosine-ngrams.xq'
2604--- test/Queries/data-cleaning/token-based-string-similarity/cosine-ngrams.xq 2011-07-19 19:12:03 +0000
2605+++ test/Queries/data-cleaning/token-based-string-similarity/cosine-ngrams.xq 2013-07-10 00:51:26 +0000
2606@@ -1,3 +1,3 @@
2607-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
2608+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
2609
2610 simt:cosine-ngrams("DWAYNE", "DUANE", 2 )
2611
2612=== modified file 'test/Queries/data-cleaning/token-based-string-similarity/cosine-tokens.xq'
2613--- test/Queries/data-cleaning/token-based-string-similarity/cosine-tokens.xq 2011-07-19 19:12:03 +0000
2614+++ test/Queries/data-cleaning/token-based-string-similarity/cosine-tokens.xq 2013-07-10 00:51:26 +0000
2615@@ -1,3 +1,3 @@
2616-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
2617+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
2618
2619 simt:cosine-tokens("The FLWOR Foundation", "FLWOR Found.", " +" )
2620
2621=== modified file 'test/Queries/data-cleaning/token-based-string-similarity/cosine.xq'
2622--- test/Queries/data-cleaning/token-based-string-similarity/cosine.xq 2011-07-19 19:12:03 +0000
2623+++ test/Queries/data-cleaning/token-based-string-similarity/cosine.xq 2013-07-10 00:51:26 +0000
2624@@ -1,3 +1,3 @@
2625-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
2626+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
2627
2628 simt:cosine( ("aa","bb") , ("bb","aa"))
2629
2630=== modified file 'test/Queries/data-cleaning/token-based-string-similarity/dice-ngrams.xq'
2631--- test/Queries/data-cleaning/token-based-string-similarity/dice-ngrams.xq 2011-07-19 19:12:03 +0000
2632+++ test/Queries/data-cleaning/token-based-string-similarity/dice-ngrams.xq 2013-07-10 00:51:26 +0000
2633@@ -1,3 +1,3 @@
2634-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
2635+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
2636
2637 simt:dice-ngrams("DWAYNE", "DUANE", 2 )
2638
2639=== modified file 'test/Queries/data-cleaning/token-based-string-similarity/dice-tokens.xq'
2640--- test/Queries/data-cleaning/token-based-string-similarity/dice-tokens.xq 2011-07-19 19:12:03 +0000
2641+++ test/Queries/data-cleaning/token-based-string-similarity/dice-tokens.xq 2013-07-10 00:51:26 +0000
2642@@ -1,3 +1,3 @@
2643-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
2644+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
2645
2646 simt:dice-tokens("The FLWOR Foundation", "FLWOR Found.", " +" )
2647
2648=== modified file 'test/Queries/data-cleaning/token-based-string-similarity/jaccard-ngrams.xq'
2649--- test/Queries/data-cleaning/token-based-string-similarity/jaccard-ngrams.xq 2011-07-19 19:12:03 +0000
2650+++ test/Queries/data-cleaning/token-based-string-similarity/jaccard-ngrams.xq 2013-07-10 00:51:26 +0000
2651@@ -1,3 +1,3 @@
2652-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
2653+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
2654
2655 simt:jaccard-ngrams("DWAYNE", "DUANE", 2 )
2656
2657=== modified file 'test/Queries/data-cleaning/token-based-string-similarity/jaccard-tokens.xq'
2658--- test/Queries/data-cleaning/token-based-string-similarity/jaccard-tokens.xq 2011-07-19 19:12:03 +0000
2659+++ test/Queries/data-cleaning/token-based-string-similarity/jaccard-tokens.xq 2013-07-10 00:51:26 +0000
2660@@ -1,3 +1,3 @@
2661-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
2662+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
2663
2664 simt:jaccard-tokens("The FLWOR Foundation", "FLWOR Found.", " +" )
2665
2666=== modified file 'test/Queries/data-cleaning/token-based-string-similarity/ngrams.xq'
2667--- test/Queries/data-cleaning/token-based-string-similarity/ngrams.xq 2011-07-19 19:12:03 +0000
2668+++ test/Queries/data-cleaning/token-based-string-similarity/ngrams.xq 2013-07-10 00:51:26 +0000
2669@@ -1,3 +1,3 @@
2670-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
2671+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
2672
2673 simt:ngrams("FLWOR", 2 )
2674
2675=== modified file 'test/Queries/data-cleaning/token-based-string-similarity/overlap-ngrams.xq'
2676--- test/Queries/data-cleaning/token-based-string-similarity/overlap-ngrams.xq 2011-07-19 19:12:03 +0000
2677+++ test/Queries/data-cleaning/token-based-string-similarity/overlap-ngrams.xq 2013-07-10 00:51:26 +0000
2678@@ -1,3 +1,3 @@
2679-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
2680+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
2681
2682 simt:overlap-ngrams("DWAYNE", "DUANE", 2 )
2683
2684=== modified file 'test/Queries/data-cleaning/token-based-string-similarity/overlap-tokens.xq'
2685--- test/Queries/data-cleaning/token-based-string-similarity/overlap-tokens.xq 2011-07-19 19:12:03 +0000
2686+++ test/Queries/data-cleaning/token-based-string-similarity/overlap-tokens.xq 2013-07-10 00:51:26 +0000
2687@@ -1,3 +1,3 @@
2688-import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity";
2689+import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity";
2690
2691 simt:overlap-tokens("The FLWOR Foundation", "FLWOR Found.", " +" )

Subscribers

People subscribed via source and target branches

to all changes: