Merge lp:~zorba-coders/zorba/update3.0_data-cleaning-module into lp:zorba/data-cleaning-module
- update3.0_data-cleaning-module
- Merge into data-cleaning-module
Status: | Merged | ||||
---|---|---|---|---|---|
Merged at revision: | 48 | ||||
Proposed branch: | lp:~zorba-coders/zorba/update3.0_data-cleaning-module | ||||
Merge into: | lp:zorba/data-cleaning-module | ||||
Diff against target: |
2691 lines (+636/-710) 81 files modified
src/CMakeLists.txt (+8/-8) src/character-based-string-similarity.xq (+42/-49) src/com/CMakeLists.txt (+0/-17) src/com/zorba-xquery/CMakeLists.txt (+0/-17) src/com/zorba-xquery/www/CMakeLists.txt (+0/-17) src/com/zorba-xquery/www/modules/CMakeLists.txt (+0/-17) src/consolidation.xq (+212/-212) src/conversion.xq (+36/-36) src/hybrid-string-similarity.xq (+70/-70) src/normalization.xq (+41/-40) src/phonetic-string-similarity.xq (+32/-32) src/set-similarity.xq (+49/-49) src/token-based-string-similarity.xq (+78/-78) test/Queries/data-cleaning/character-based-string-similarity/edit-distance.xq (+1/-1) test/Queries/data-cleaning/character-based-string-similarity/jaro-winkler.xq (+1/-1) test/Queries/data-cleaning/character-based-string-similarity/jaro.xq (+1/-1) test/Queries/data-cleaning/character-based-string-similarity/needleman-wunsch.xq (+1/-1) test/Queries/data-cleaning/consolidation/least-attributes.xq (+1/-1) test/Queries/data-cleaning/consolidation/least-distinct-attributes.xq (+1/-1) test/Queries/data-cleaning/consolidation/least-distinct-elements.xq (+1/-1) test/Queries/data-cleaning/consolidation/least-distinct-nodes.xq (+1/-1) test/Queries/data-cleaning/consolidation/least-elements.xq (+1/-1) test/Queries/data-cleaning/consolidation/least-nodes.xq (+1/-1) test/Queries/data-cleaning/consolidation/least-similar-edit-distance.xq (+1/-1) test/Queries/data-cleaning/consolidation/least-tokens.xq (+1/-1) test/Queries/data-cleaning/consolidation/leastfrequent_1.xq (+1/-1) test/Queries/data-cleaning/consolidation/longest_1.xq (+1/-1) test/Queries/data-cleaning/consolidation/matching_1.xq (+1/-1) test/Queries/data-cleaning/consolidation/most-attributes.xq (+1/-1) test/Queries/data-cleaning/consolidation/most-distinct-attributes.xq (+1/-1) test/Queries/data-cleaning/consolidation/most-distinct-elements.xq (+1/-1) test/Queries/data-cleaning/consolidation/most-distinct-nodes.xq (+1/-1) test/Queries/data-cleaning/consolidation/most-elements.xq (+1/-1) test/Queries/data-cleaning/consolidation/most-frequent.xq (+1/-1) test/Queries/data-cleaning/consolidation/most-nodes.xq (+1/-1) test/Queries/data-cleaning/consolidation/most-similar-edit-distance.xq (+1/-1) test/Queries/data-cleaning/consolidation/most-tokens.xq (+1/-1) test/Queries/data-cleaning/consolidation/shortest_1.xq (+1/-1) test/Queries/data-cleaning/consolidation/superstring_1.xq (+1/-1) test/Queries/data-cleaning/conversion/address-from-geocode.xq (+1/-1) test/Queries/data-cleaning/conversion/address-from-phone.xq (+1/-1) test/Queries/data-cleaning/conversion/address-from-user.xq (+1/-1) test/Queries/data-cleaning/conversion/currency-convert.xq (+1/-1) test/Queries/data-cleaning/conversion/geocode-from-address.xq (+1/-1) test/Queries/data-cleaning/conversion/phone-from-address.xq (+1/-1) test/Queries/data-cleaning/conversion/phone-from-user.xq (+1/-1) test/Queries/data-cleaning/conversion/unit-convert.xq (+1/-1) test/Queries/data-cleaning/conversion/user-from-address.xq (+1/-1) test/Queries/data-cleaning/conversion/user-from-phone.xq (+1/-1) test/Queries/data-cleaning/hybrid-string-similarity/monge-elkan-jaro-winkler.xq (+1/-1) test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-edit-distance.xq (+1/-1) test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro-winkler.xq (+1/-1) test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro.xq (+1/-1) test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-metaphone.xq (+1/-1) test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-soundex.xq (+1/-1) test/Queries/data-cleaning/normalization/normalize-address.xq (+1/-1) test/Queries/data-cleaning/normalization/to-date.xq (+1/-1) test/Queries/data-cleaning/normalization/to-dateTime.spec (+1/-1) test/Queries/data-cleaning/normalization/to-dateTime.xq (+1/-1) test/Queries/data-cleaning/normalization/to-time.spec (+1/-1) test/Queries/data-cleaning/normalization/to-time.xq (+1/-1) test/Queries/data-cleaning/phonetic-string-similarity/metaphone-key.xq (+1/-1) test/Queries/data-cleaning/phonetic-string-similarity/metaphone.xq (+1/-1) test/Queries/data-cleaning/phonetic-string-similarity/soundex-key.xq (+1/-1) test/Queries/data-cleaning/phonetic-string-similarity/soundex.xq (+1/-1) test/Queries/data-cleaning/set-similarity/deep-intersect.xq (+1/-1) test/Queries/data-cleaning/set-similarity/deep-union.xq (+1/-1) test/Queries/data-cleaning/set-similarity/dice.xq (+1/-1) test/Queries/data-cleaning/set-similarity/distinct.xq (+1/-1) test/Queries/data-cleaning/set-similarity/jaccard.xq (+1/-1) test/Queries/data-cleaning/set-similarity/overlap.xq (+1/-1) test/Queries/data-cleaning/token-based-string-similarity/cosine-ngrams.xq (+1/-1) test/Queries/data-cleaning/token-based-string-similarity/cosine-tokens.xq (+1/-1) test/Queries/data-cleaning/token-based-string-similarity/cosine.xq (+1/-1) test/Queries/data-cleaning/token-based-string-similarity/dice-ngrams.xq (+1/-1) test/Queries/data-cleaning/token-based-string-similarity/dice-tokens.xq (+1/-1) test/Queries/data-cleaning/token-based-string-similarity/jaccard-ngrams.xq (+1/-1) test/Queries/data-cleaning/token-based-string-similarity/jaccard-tokens.xq (+1/-1) test/Queries/data-cleaning/token-based-string-similarity/ngrams.xq (+1/-1) test/Queries/data-cleaning/token-based-string-similarity/overlap-ngrams.xq (+1/-1) test/Queries/data-cleaning/token-based-string-similarity/overlap-tokens.xq (+1/-1) |
||||
To merge this branch: | bzr merge lp:~zorba-coders/zorba/update3.0_data-cleaning-module | ||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Bruno Martins | Approve | ||
Juan Zacarias | Approve | ||
Chris Hillery | Approve | ||
Review via email: mp+170213@code.launchpad.net |
Commit message
Changes for Update 3.0
Description of the change
Changes for Update 3.0
- 48. By Juan Zacarias
-
Reverted update 3.0 changes.
- 49. By Juan Zacarias
-
Updated module to 3.0, updated documentation and changed module's namespace.
- 50. By Juan Zacarias
-
Added error to documentation.
- 51. By Juan Zacarias
-
Changed error name to caps
Juan Zacarias (juan457) wrote : | # |
Changes done
Chris Hillery (ceejatec) : | # |
Juan Zacarias (juan457) : | # |
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue starting for the following merge proposals:
https:/
Progress dashboard at http://
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue result for https:/
Stage "TestZorbaUbuntu" failed.
2 tests failed (8345 total tests run).
Check test results at http://
- 52. By Chris Hillery
-
Fixed <p> typos.
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue starting for the following merge proposals:
https:/
Progress dashboard at http://
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue succeeded - proposal merged!
Bruno Martins (bgmartins) : | # |
Preview Diff
1 | === removed file 'src/CMakeLists.txt' |
2 | --- src/CMakeLists.txt 2011-07-26 10:43:16 +0000 |
3 | +++ src/CMakeLists.txt 1970-01-01 00:00:00 +0000 |
4 | @@ -1,20 +0,0 @@ |
5 | -# Copyright 2006-2008 The FLWOR Foundation. |
6 | -# |
7 | -# Licensed under the Apache License, Version 2.0 (the "License"); |
8 | -# you may not use this file except in compliance with the License. |
9 | -# You may obtain a copy of the License at |
10 | -# |
11 | -# http://www.apache.org/licenses/LICENSE-2.0 |
12 | -# |
13 | -# Unless required by applicable law or agreed to in writing, software |
14 | -# distributed under the License is distributed on an "AS IS" BASIS, |
15 | -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | -# See the License for the specific language governing permissions and |
17 | -# limitations under the License. |
18 | - |
19 | -# all external module libraries are generated in the directory |
20 | -# of the corresponding .xq file |
21 | -MESSAGE(STATUS "Add com") |
22 | -ADD_SUBDIRECTORY(com) |
23 | - |
24 | -MESSAGE(STATUS "End modules") |
25 | |
26 | === renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/CMakeLists.txt' => 'src/CMakeLists.txt' |
27 | --- src/com/zorba-xquery/www/modules/data-cleaning/CMakeLists.txt 2011-08-07 20:36:50 +0000 |
28 | +++ src/CMakeLists.txt 2013-07-10 00:51:26 +0000 |
29 | @@ -12,29 +12,29 @@ |
30 | # See the License for the specific language governing permissions and |
31 | # limitations under the License. |
32 | |
33 | -DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity" |
34 | +DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/character-based-string-similarity" |
35 | VERSION 2.0 FILE "character-based-string-similarity.xq") |
36 | |
37 | -DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/consolidation" |
38 | +DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/consolidation" |
39 | VERSION 2.0 FILE "consolidation.xq") |
40 | |
41 | DECLARE_ZORBA_SCHEMA( FILE whitepages_schema.xsd |
42 | URI "http://api.whitepages.com/schema/") |
43 | |
44 | -DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/conversion" |
45 | +DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/conversion" |
46 | VERSION 2.0 FILE "conversion.xq") |
47 | |
48 | -DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity" |
49 | +DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/hybrid-string-similarity" |
50 | VERSION 2.0 FILE "hybrid-string-similarity.xq") |
51 | |
52 | -DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/normalization" |
53 | +DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/normalization" |
54 | VERSION 2.0 FILE "normalization.xq") |
55 | |
56 | -DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity" |
57 | +DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/phonetic-string-similarity" |
58 | VERSION 2.0 FILE "phonetic-string-similarity.xq") |
59 | |
60 | -DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity" |
61 | +DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/set-similarity" |
62 | VERSION 2.0 FILE "set-similarity.xq") |
63 | |
64 | -DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity" |
65 | +DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/data-cleaning/token-based-string-similarity" |
66 | VERSION 2.0 FILE "token-based-string-similarity.xq") |
67 | |
68 | === renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/character-based-string-similarity.xq' => 'src/character-based-string-similarity.xq' |
69 | --- src/com/zorba-xquery/www/modules/data-cleaning/character-based-string-similarity.xq 2012-09-28 13:34:20 +0000 |
70 | +++ src/character-based-string-similarity.xq 2013-07-10 00:51:26 +0000 |
71 | @@ -2,7 +2,7 @@ |
72 | |
73 | (: |
74 | : Copyright 2006-2009 The FLWOR Foundation. |
75 | - : |
76 | + : |
77 | : Licensed under the Apache License, Version 2.0 (the "License"); |
78 | : you may not use this file except in compliance with the License. |
79 | : You may obtain a copy of the License at |
80 | @@ -17,35 +17,32 @@ |
81 | :) |
82 | |
83 | (:~ |
84 | - : This library module provides character-based string similarity functions |
85 | + : <p>This library module provides character-based string similarity functions |
86 | : that view strings as sequences of characters, generally computing a similarity score |
87 | : that corresponds to the cost of transforming one string into another. |
88 | - : |
89 | : These functions are particularly useful for matching near duplicate strings |
90 | - : in the presence of typographical errors. |
91 | - : |
92 | - : The logic contained in this module is not specific to any particular XQuery implementation. |
93 | + : in the presence of typographical errors. </p> |
94 | + : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p> |
95 | : |
96 | : @author Bruno Martins and Diogo Simões |
97 | : @project Zorba/Data Cleaning/Character-Based String Similarity |
98 | :) |
99 | |
100 | -module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity"; |
101 | +module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity"; |
102 | |
103 | declare namespace ver = "http://www.zorba-xquery.com/options/versioning"; |
104 | declare option ver:module-version "2.0"; |
105 | |
106 | (:~ |
107 | - : Returns the edit distance between two strings. |
108 | - : |
109 | - : This distance, also refered to as the Levenshtein distance, is defined as the minimum number |
110 | + : <p>Returns the edit distance between two strings.</p> |
111 | + : <p/> |
112 | + : <p>This distance, also refered to as the Levenshtein distance, is defined as the minimum number |
113 | : of edits needed to transform one string into the other, with the allowable edit operations |
114 | - : being insertion, deletion, or substitution of a single character. |
115 | - : |
116 | - : <br/> |
117 | - : Example usage : <code>edit-distance("FLWOR", "FLOWER")</code> |
118 | - : <br/> |
119 | - : The function invocation in the example above returns : <code>2</code> |
120 | + : being insertion, deletion, or substitution of a single character.</p> |
121 | + : <p/> |
122 | + : <p>Example usage : <code>edit-distance("FLWOR", "FLOWER")</code></p> |
123 | + : <p/> |
124 | + : <p>The function invocation in the example above returns : <code>2</code></p> |
125 | : |
126 | : @param $s1 The first string. |
127 | : @param $s2 The second string. |
128 | @@ -63,17 +60,16 @@ |
129 | }; |
130 | |
131 | (:~ |
132 | - : Returns the Jaro similarity coefficient between two strings. |
133 | - : |
134 | - : This similarity coefficient is based on the number of transposed characters and on a |
135 | + : <p>Returns the Jaro similarity coefficient between two strings.</p> |
136 | + : <p/> |
137 | + : <p>This similarity coefficient is based on the number of transposed characters and on a |
138 | : weighted sum of the percentage of matched characters held within the strings. The higher |
139 | : the Jaro-Winkler value is, the more similar the strings are. The coefficient is |
140 | - : normalized such that 0 equates to no similarity and 1 is an exact match. |
141 | - : |
142 | - : <br/> |
143 | - : Example usage : <code>jaro("FLWOR Found.", "FLWOR Foundation")</code> |
144 | - : <br/> |
145 | - : The function invocation in the example above returns : <code>0.5853174603174603</code> |
146 | + : normalized such that 0 equates to no similarity and 1 is an exact match.</p> |
147 | + : <p/> |
148 | + : <p>Example usage : <code>jaro("FLWOR Found.", "FLWOR Foundation")</code></p> |
149 | + : <p/> |
150 | + : <p>The function invocation in the example above returns : <code>0.5853174603174603</code></p> |
151 | : |
152 | : @param $s1 The first string. |
153 | : @param $s2 The second string. |
154 | @@ -97,15 +93,14 @@ |
155 | }; |
156 | |
157 | (:~ |
158 | - : Returns the Jaro-Winkler similarity coefficient between two strings. |
159 | - : |
160 | - : This similarity coefficient corresponds to an extension of the Jaro similarity coefficient that weights or |
161 | - : penalizes strings based on their similarity at the beginning of the string, up to a given prefix size. |
162 | - : |
163 | - : <br/> |
164 | - : Example usage : <code>jaro-winkler("DWAYNE", "DUANE", 4, 0.1 )</code> |
165 | - : <br/> |
166 | - : The function invocation in the example above returns : <code>0.8577777777777778</code> |
167 | + : <p>Returns the Jaro-Winkler similarity coefficient between two strings.</p> |
168 | + : <p/> |
169 | + : <p>This similarity coefficient corresponds to an extension of the Jaro similarity coefficient that weights or |
170 | + : penalizes strings based on their similarity at the beginning of the string, up to a given prefix size.</p> |
171 | + : <p/> |
172 | + : <p>Example usage : <code>jaro-winkler("DWAYNE", "DUANE", 4, 0.1 )</code></p> |
173 | + : <p/> |
174 | + : <p>The function invocation in the example above returns : <code>0.8577777777777778</code></p> |
175 | : |
176 | : @param $s1 The first string. |
177 | : @param $s2 The second string. |
178 | @@ -122,16 +117,15 @@ |
179 | }; |
180 | |
181 | (:~ |
182 | - : Returns the Needleman-Wunsch distance between two strings. |
183 | - : |
184 | - : The Needleman-Wunsch distance is similar to the basic edit distance metric, adding a |
185 | + : <p>Returns the Needleman-Wunsch distance between two strings.</p> |
186 | + : <p/> |
187 | + : <p>The Needleman-Wunsch distance is similar to the basic edit distance metric, adding a |
188 | : variable cost adjustment to the cost of a gap (i.e., an insertion or deletion) in the |
189 | - : distance metric. |
190 | - : |
191 | - : <br/> |
192 | - : Example usage : <code>needleman-wunsch("KAK", "KQRK", 1, 1)</code> |
193 | - : <br/> |
194 | - : The function invocation in the example above returns : <code>0</code> |
195 | + : distance metric.</p> |
196 | + : <p/> |
197 | + : <p>Example usage : <code>needleman-wunsch("KAK", "KQRK", 1, 1)</code></p> |
198 | + : <p/> |
199 | + : <p>The function invocation in the example above returns : <code>0</code></p> |
200 | : |
201 | : @param $s1 The first string. |
202 | : @param $s2 The second string. |
203 | @@ -152,12 +146,11 @@ |
204 | }; |
205 | |
206 | (:~ |
207 | - : Returns the Smith-Waterman distance between two strings. |
208 | - : |
209 | - : <br/> |
210 | - : Example usage : <code>smith-waterman("ACACACTA", "AGCACACA", 2, 1)</code> |
211 | - : <br/> |
212 | - : The function invocation in the example above returns : <code>12</code> |
213 | + : <p>Returns the Smith-Waterman distance between two strings.</p> |
214 | + : <p/> |
215 | + : <p>Example usage : <code>smith-waterman("ACACACTA", "AGCACACA", 2, 1)</code></p> |
216 | + : <p/> |
217 | + : <p>The function invocation in the example above returns : <code>12</code></p> |
218 | : |
219 | : @param $s1 The first string. |
220 | : @param $s2 The second string. |
221 | |
222 | === removed directory 'src/com' |
223 | === removed file 'src/com/CMakeLists.txt' |
224 | --- src/com/CMakeLists.txt 2011-10-06 08:17:41 +0000 |
225 | +++ src/com/CMakeLists.txt 1970-01-01 00:00:00 +0000 |
226 | @@ -1,17 +0,0 @@ |
227 | -# Copyright 2006-2008 The FLWOR Foundation. |
228 | -# |
229 | -# Licensed under the Apache License, Version 2.0 (the "License"); |
230 | -# you may not use this file except in compliance with the License. |
231 | -# You may obtain a copy of the License at |
232 | -# |
233 | -# http://www.apache.org/licenses/LICENSE-2.0 |
234 | -# |
235 | -# Unless required by applicable law or agreed to in writing, software |
236 | -# distributed under the License is distributed on an "AS IS" BASIS, |
237 | -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
238 | -# See the License for the specific language governing permissions and |
239 | -# limitations under the License. |
240 | - |
241 | -# all external module libraries are generated in the directory |
242 | -# of the corresponding .xq file |
243 | -ADD_SUBDIRECTORY(zorba-xquery) |
244 | |
245 | === removed directory 'src/com/zorba-xquery' |
246 | === removed file 'src/com/zorba-xquery/CMakeLists.txt' |
247 | --- src/com/zorba-xquery/CMakeLists.txt 2011-10-06 08:17:41 +0000 |
248 | +++ src/com/zorba-xquery/CMakeLists.txt 1970-01-01 00:00:00 +0000 |
249 | @@ -1,17 +0,0 @@ |
250 | -# Copyright 2006-2008 The FLWOR Foundation. |
251 | -# |
252 | -# Licensed under the Apache License, Version 2.0 (the "License"); |
253 | -# you may not use this file except in compliance with the License. |
254 | -# You may obtain a copy of the License at |
255 | -# |
256 | -# http://www.apache.org/licenses/LICENSE-2.0 |
257 | -# |
258 | -# Unless required by applicable law or agreed to in writing, software |
259 | -# distributed under the License is distributed on an "AS IS" BASIS, |
260 | -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
261 | -# See the License for the specific language governing permissions and |
262 | -# limitations under the License. |
263 | - |
264 | -# all external module libraries are generated in the directory |
265 | -# of the corresponding .xq file |
266 | -ADD_SUBDIRECTORY(www) |
267 | |
268 | === removed directory 'src/com/zorba-xquery/www' |
269 | === removed file 'src/com/zorba-xquery/www/CMakeLists.txt' |
270 | --- src/com/zorba-xquery/www/CMakeLists.txt 2011-10-06 08:17:41 +0000 |
271 | +++ src/com/zorba-xquery/www/CMakeLists.txt 1970-01-01 00:00:00 +0000 |
272 | @@ -1,17 +0,0 @@ |
273 | -# Copyright 2006-2008 The FLWOR Foundation. |
274 | -# |
275 | -# Licensed under the Apache License, Version 2.0 (the "License"); |
276 | -# you may not use this file except in compliance with the License. |
277 | -# You may obtain a copy of the License at |
278 | -# |
279 | -# http://www.apache.org/licenses/LICENSE-2.0 |
280 | -# |
281 | -# Unless required by applicable law or agreed to in writing, software |
282 | -# distributed under the License is distributed on an "AS IS" BASIS, |
283 | -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
284 | -# See the License for the specific language governing permissions and |
285 | -# limitations under the License. |
286 | - |
287 | -# all external module libraries are generated in the directory |
288 | -# of the corresponding .xq file |
289 | -ADD_SUBDIRECTORY(modules) |
290 | |
291 | === removed directory 'src/com/zorba-xquery/www/modules' |
292 | === removed file 'src/com/zorba-xquery/www/modules/CMakeLists.txt' |
293 | --- src/com/zorba-xquery/www/modules/CMakeLists.txt 2011-10-06 08:17:41 +0000 |
294 | +++ src/com/zorba-xquery/www/modules/CMakeLists.txt 1970-01-01 00:00:00 +0000 |
295 | @@ -1,17 +0,0 @@ |
296 | -# Copyright 2006-2008 The FLWOR Foundation. |
297 | -# |
298 | -# Licensed under the Apache License, Version 2.0 (the "License"); |
299 | -# you may not use this file except in compliance with the License. |
300 | -# You may obtain a copy of the License at |
301 | -# |
302 | -# http://www.apache.org/licenses/LICENSE-2.0 |
303 | -# |
304 | -# Unless required by applicable law or agreed to in writing, software |
305 | -# distributed under the License is distributed on an "AS IS" BASIS, |
306 | -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
307 | -# See the License for the specific language governing permissions and |
308 | -# limitations under the License. |
309 | - |
310 | -# all external module libraries are generated in the directory |
311 | -# of the corresponding .xq file |
312 | -ADD_SUBDIRECTORY(data-cleaning) |
313 | |
314 | === removed directory 'src/com/zorba-xquery/www/modules/data-cleaning' |
315 | === renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/consolidation.xq' => 'src/consolidation.xq' |
316 | --- src/com/zorba-xquery/www/modules/data-cleaning/consolidation.xq 2012-11-28 18:23:07 +0000 |
317 | +++ src/consolidation.xq 2013-07-10 00:51:26 +0000 |
318 | @@ -17,33 +17,33 @@ |
319 | :) |
320 | |
321 | (:~ |
322 | - : This library module provides data consolidation functions that generally take as input a sequence of XML nodes |
323 | - : and apply some rule in order do decide which node is better suited to represent the entire sequence. |
324 | - : |
325 | - : The logic contained in this module is not specific to any particular XQuery implementation, |
326 | + : <p>This library module provides data consolidation functions that generally take as input a sequence of XML nodes |
327 | + : and apply some rule in order do decide which node is better suited to represent the entire sequence.</p> |
328 | + : <p/> |
329 | + : <p>The logic contained in this module is not specific to any particular XQuery implementation, |
330 | : although the consolidation functions based on matching sequences against XPath expressions require |
331 | - : some form of dynamic evaluation for XPath expressions. |
332 | + : some form of dynamic evaluation for XPath expressions.</p> |
333 | : |
334 | : @author Bruno Martins |
335 | : @project Zorba/Data Cleaning/Consolidation |
336 | :) |
337 | |
338 | -module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
339 | +module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
340 | |
341 | -import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"; |
342 | -import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity"; |
343 | +import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity"; |
344 | +import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity"; |
345 | |
346 | declare namespace ver = "http://www.zorba-xquery.com/options/versioning"; |
347 | declare option ver:module-version "2.0"; |
348 | |
349 | (:~ |
350 | - : Returns the single most frequent node in a sequence of nodes provided as input. |
351 | - : If more then one answer is possible, returns the first node according to the order of the input sequence. |
352 | - : |
353 | - : <br/> |
354 | - : Example usage : <code>most-frequent( ( "a", "a", "b") )</code> |
355 | - : <br/> |
356 | - : The function invocation in the example above returns : <code>("a")</code> |
357 | + : <p>Returns the single most frequent node in a sequence of nodes provided as input.</p> |
358 | + : <p>If more then one answer is possible, returns the first node according to the order of the input sequence.</p> |
359 | + : <p/> |
360 | + : |
361 | + : <p>Example usage : <code>most-frequent( ( "a", "a", "b") )</code></p> |
362 | + : <p/> |
363 | + : <p>The function invocation in the example above returns : <code>("a")</code></p> |
364 | : |
365 | : @param $s A sequence of nodes. |
366 | : @return The most frequent node in the input sequence. |
367 | @@ -54,13 +54,13 @@ |
368 | }; |
369 | |
370 | (:~ |
371 | - : Returns the single less frequent node in a sequence of nodes provided as input. |
372 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
373 | - : |
374 | - : <br/> |
375 | - : Example usage : <code>least-frequent( ( "a", "a", "b") )</code> |
376 | - : <br/> |
377 | - : The function invocation in the example above returns : <code>("b")</code> |
378 | + : <p>Returns the single less frequent node in a sequence of nodes provided as input.</p> |
379 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
380 | + : <p/> |
381 | + : |
382 | + : <p>Example usage : <code>least-frequent( ( "a", "a", "b") )</code></p> |
383 | + : <p/> |
384 | + : <p>The function invocation in the example above returns : <code>("b")</code></p> |
385 | : |
386 | : @param $s A sequence of nodes. |
387 | : @return The least frequent node in the input sequence. |
388 | @@ -72,13 +72,13 @@ |
389 | }; |
390 | |
391 | (:~ |
392 | - : Returns the single longest string, in terms of the number of characters, in a sequence of strings provided as input. |
393 | - : If more then one answer is possible, return the first string according to the order of the input sequence. |
394 | - : |
395 | - : <br/> |
396 | - : Example usage : <code>con:longest( ( "a", "aa", "aaa") )</code> |
397 | - : <br/> |
398 | - : The function invocation in the example above returns : <code>("aaa")</code> |
399 | + : <p>Returns the single longest string, in terms of the number of characters, in a sequence of strings provided as input.</p> |
400 | + : <p>If more then one answer is possible, return the first string according to the order of the input sequence.</p> |
401 | + : <p/> |
402 | + : |
403 | + : <p>Example usage : <code>con:longest( ( "a", "aa", "aaa") )</code></p> |
404 | + : <p/> |
405 | + : <p>The function invocation in the example above returns : <code>("aaa")</code></p> |
406 | : |
407 | : @param $s A sequence of strings. |
408 | : @return The longest string in the input sequence. |
409 | @@ -90,13 +90,13 @@ |
410 | }; |
411 | |
412 | (:~ |
413 | - : Returns the single shortest string, in terms of the number of characters, in a sequence of strings provided as input. |
414 | - : If more then one answer is possible, return the first string according to the order of the input sequence. |
415 | - : |
416 | - : <br/> |
417 | - : Example usage : <code>shortest( ( "a", "aa", "aaa") )</code> |
418 | - : <br/> |
419 | - : The function invocation in the example above returns : <code>("a")</code> |
420 | + : <p>Returns the single shortest string, in terms of the number of characters, in a sequence of strings provided as input.</p> |
421 | + : <p>If more then one answer is possible, return the first string according to the order of the input sequence.</p> |
422 | + : <p/> |
423 | + : |
424 | + : <p>Example usage : <code>shortest( ( "a", "aa", "aaa") )</code></p> |
425 | + : <p/> |
426 | + : <p>The function invocation in the example above returns : <code>("a")</code></p> |
427 | : |
428 | : @param $s A sequence of strings. |
429 | : @return The shortest string in the input sequence. |
430 | @@ -108,13 +108,13 @@ |
431 | }; |
432 | |
433 | (:~ |
434 | - : Returns the single longest string, in terms of the number of tokens, in a sequence of strings provided as input. |
435 | - : If more then one answer is possible, return the first string according to the order of the input sequence. |
436 | - : |
437 | - : <br/> |
438 | - : Example usage : <code>most-tokens( ( "a b c", "a b", "a"), " +" )</code> |
439 | - : <br/> |
440 | - : The function invocation in the example above returns : <code>("a b c")</code> |
441 | + : <p>Returns the single longest string, in terms of the number of tokens, in a sequence of strings provided as input.</p> |
442 | + : <p>If more then one answer is possible, return the first string according to the order of the input sequence.</p> |
443 | + : <p/> |
444 | + : |
445 | + : <p>Example usage : <code>most-tokens( ( "a b c", "a b", "a"), " +" )</code></p> |
446 | + : <p/> |
447 | + : <p>The function invocation in the example above returns : <code>("a b c")</code></p> |
448 | : |
449 | : @param $s A sequence of strings. |
450 | : @param $r A regular expression forming the delimiter character(s) which mark the boundaries between adjacent tokens. |
451 | @@ -127,13 +127,13 @@ |
452 | }; |
453 | |
454 | (:~ |
455 | - : Returns the single shortest string, in terms of the number of tokens, in a sequence of strings provided as input. |
456 | - : If more then one answer is possible, return the first string according to the order of the input sequence. |
457 | - : |
458 | - : <br/> |
459 | - : Example usage : <code>least-tokens( ( "a b c", "a b", "a"), " +" )</code> |
460 | - : <br/> |
461 | - : The function invocation in the example above returns : <code>("a")</code> |
462 | + : <p>Returns the single shortest string, in terms of the number of tokens, in a sequence of strings provided as input.</p> |
463 | + : <p>If more then one answer is possible, return the first string according to the order of the input sequence.</p> |
464 | + : <p/> |
465 | + : |
466 | + : <p>Example usage : <code>least-tokens( ( "a b c", "a b", "a"), " +" )</code></p> |
467 | + : <p/> |
468 | + : <p>The function invocation in the example above returns : <code>("a")</code></p> |
469 | : |
470 | : @param $s A sequence of strings. |
471 | : @param $r A regular expression forming the delimiter character(s) which mark the boundaries between adjacent tokens. |
472 | @@ -146,12 +146,12 @@ |
473 | }; |
474 | |
475 | (:~ |
476 | - : Returns the strings from an input sequence of strings that match a particular regular expression. |
477 | - : |
478 | - : <br/> |
479 | - : Example usage : <code>matching( ( "a A b", "c AAA d", "e BB f"), "A+" )</code> |
480 | - : <br/> |
481 | - : The function invocation in the example above returns : <code>( "a A b", "c AAA d")</code> |
482 | + : <p>Returns the strings from an input sequence of strings that match a particular regular expression.</p> |
483 | + : <p/> |
484 | + : |
485 | + : <p>Example usage : <code>matching( ( "a A b", "c AAA d", "e BB f"), "A+" )</code></p> |
486 | + : <p/> |
487 | + : <p>The function invocation in the example above returns : <code>( "a A b", "c AAA d")</code></p> |
488 | : |
489 | : @param $s A sequence of strings. |
490 | : @param $r The regular expression to be used in the matching. |
491 | @@ -163,14 +163,14 @@ |
492 | }; |
493 | |
494 | (:~ |
495 | - : Returns the single string, from an input sequence of strings, that appears more frequently as part |
496 | - : of the other strings in the sequence. If no such string exists, the function returns an empty sequence. |
497 | - : If more then one answer is possible, the function returns the first string according to the order of the input sequence. |
498 | - : |
499 | - : <br/> |
500 | - : Example usage : <code>super-string( ( "aaa bbb ccc", "aaa bbb", "aaa ddd", "eee fff" ) )</code> |
501 | - : <br/> |
502 | - : The function invocation in the example above returns : <code>( "aaa bbb" )</code> |
503 | + : <p>Returns the single string, from an input sequence of strings, that appears more frequently as part |
504 | + : of the other strings in the sequence. If no such string exists, the function returns an empty sequence.</p> |
505 | + : <p>If more then one answer is possible, the function returns the first string according to the order of the input sequence.</p> |
506 | + : <p/> |
507 | + : |
508 | + : <p>Example usage : <code>super-string( ( "aaa bbb ccc", "aaa bbb", "aaa ddd", "eee fff" ) )</code></p> |
509 | + : <p/> |
510 | + : <p>The function invocation in the example above returns : <code>( "aaa bbb" )</code></p> |
511 | : |
512 | : @param $s A sequence of strings. |
513 | : @return The string that appears more frequently as part of the other strings in the sequence. |
514 | @@ -187,15 +187,15 @@ |
515 | }; |
516 | |
517 | (:~ |
518 | - : Returns the single most similar string, in terms of the edit distance metric towards an input string, |
519 | + : <p>Returns the single most similar string, in terms of the edit distance metric towards an input string, |
520 | : in a sequence of strings provided as input. If more than one string has a maximum similarity (a minimum |
521 | : value for the edit distance metric), the function return the first string according to the order of the |
522 | - : input sequence. |
523 | - : |
524 | - : <br/> |
525 | - : Example usage : <code>most-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" )</code> |
526 | - : <br/> |
527 | - : The function invocation in the example above returns : <code>( "aaabbb" )</code> |
528 | + : input sequence.</p> |
529 | + : <p/> |
530 | + : |
531 | + : <p>Example usage : <code>most-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" )</code></p> |
532 | + : <p/> |
533 | + : <p>The function invocation in the example above returns : <code>( "aaabbb" )</code></p> |
534 | : |
535 | : @param $s A sequence of strings. |
536 | : @param $m The string towards which we want to measure the edit distance. |
537 | @@ -208,14 +208,14 @@ |
538 | }; |
539 | |
540 | (:~ |
541 | - : Returns the single least similar string, in terms of the edit distance metric towards an input string, |
542 | + : <p>Returns the single least similar string, in terms of the edit distance metric towards an input string, |
543 | : in a sequence of strings provided as input. If more than one string has a minimum similarity (a maximum |
544 | - : value for the edit distance metric), return the first string according to the order of the input sequence. |
545 | - : |
546 | - : <br/> |
547 | - : Example usage : <code>least-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" )</code> |
548 | - : <br/> |
549 | - : The function invocation in the example above returns : <code>( "eeefff" )</code> |
550 | + : value for the edit distance metric), return the first string according to the order of the input sequence.</p> |
551 | + : <p/> |
552 | + : |
553 | + : <p>Example usage : <code>least-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" )</code></p> |
554 | + : <p/> |
555 | + : <p>The function invocation in the example above returns : <code>( "eeefff" )</code></p> |
556 | : |
557 | : @param $s A sequence of strings. |
558 | : @param $m The string towards which we want to measure the edit distance. |
559 | @@ -228,14 +228,14 @@ |
560 | }; |
561 | |
562 | (:~ |
563 | - : Returns the single node having the largest number of descending elements (sub-elements at any given depth) |
564 | - : in a sequence of nodes provided as input. |
565 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
566 | - : |
567 | - : <br/> |
568 | - : Example usage : <code>most-elements( ( <a><b/></a>, <a/>, <b/>) )</code> |
569 | - : <br/> |
570 | - : The function invocation in the example above returns : <code>(<a><b/></a>)</code> |
571 | + : <p>Returns the single node having the largest number of descending elements (sub-elements at any given depth) |
572 | + : in a sequence of nodes provided as input.</p> |
573 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
574 | + : <p/> |
575 | + : |
576 | + : <p>Example usage : <code>most-elements( ( <a><b/></a>, <a/>, <b/>) )</code></p> |
577 | + : <p/> |
578 | + : <p>The function invocation in the example above returns : <code>(<a><b/></a>)</code></p> |
579 | : |
580 | : @param $s A sequence of nodes. |
581 | : @return The node having the largest number of descending elements in the input sequence. |
582 | @@ -246,14 +246,14 @@ |
583 | }; |
584 | |
585 | (:~ |
586 | - : Returns the single node having the largest number of descending attributes (attributes at any given depth) |
587 | - : in a sequence of nodes provided as input. |
588 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
589 | - : |
590 | - : <br/> |
591 | - : Example usage : <code>most-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) )</code> |
592 | - : <br/> |
593 | - : The function invocation in the example above returns : <code>(<a att1="a1" att2="a2"/>)</code> |
594 | + : <p>Returns the single node having the largest number of descending attributes (attributes at any given depth) |
595 | + : in a sequence of nodes provided as input.</p> |
596 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
597 | + : <p/> |
598 | + : |
599 | + : <p>Example usage : <code>most-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) )</code></p> |
600 | + : <p/> |
601 | + : <p>The function invocation in the example above returns : <code>(<a att1="a1" att2="a2"/>)</code></p> |
602 | : |
603 | : @param $s A sequence of nodes. |
604 | : @return The node having the largest number of descending attributes in the input sequence. |
605 | @@ -264,14 +264,14 @@ |
606 | }; |
607 | |
608 | (:~ |
609 | - : Returns the single node having the largest number of descending nodes (sub-nodes at any given depth) in a |
610 | - : sequence of nodes provided as input. |
611 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
612 | - : |
613 | - : <br/> |
614 | - : Example usage : <code>most-nodes( ( <a><b/></a>, <a/>, <b/>) )</code> |
615 | - : <br/> |
616 | - : The function invocation in the example above returns : <code>(<a><b/></a>)</code> |
617 | + : <p>Returns the single node having the largest number of descending nodes (sub-nodes at any given depth) in a |
618 | + : sequence of nodes provided as input.</p> |
619 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
620 | + : <p/> |
621 | + : |
622 | + : <p>Example usage : <code>most-nodes( ( <a><b/></a>, <a/>, <b/>) )</code></p> |
623 | + : <p/> |
624 | + : <p>The function invocation in the example above returns : <code>(<a><b/></a>)</code></p> |
625 | : |
626 | : @param $s A sequence of nodes. |
627 | : @return The node having the largest number of descending nodes in the input sequence. |
628 | @@ -282,14 +282,14 @@ |
629 | }; |
630 | |
631 | (:~ |
632 | - : Returns the single node having the smallest number of descending elements (sub-elements at any given depth) |
633 | - : in a sequence of nodes provided as input. |
634 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
635 | - : |
636 | - : <br/> |
637 | - : Example usage : <code>least-elements( ( <a><b/></a>, <b><c/></b>, <d/>) )</code> |
638 | - : <br/> |
639 | - : The function invocation in the example above returns : <code>(<d/>)</code> |
640 | + : <p>Returns the single node having the smallest number of descending elements (sub-elements at any given depth) |
641 | + : in a sequence of nodes provided as input.</p> |
642 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
643 | + : <p/> |
644 | + : |
645 | + : <p>Example usage : <code>least-elements( ( <a><b/></a>, <b><c/></b>, <d/>) )</code></p> |
646 | + : <p/> |
647 | + : <p>The function invocation in the example above returns : <code>(<d/>)</code></p> |
648 | : |
649 | : @param $s A sequence of nodes. |
650 | : @return The node having the smallest number of descending elements in the input sequence. |
651 | @@ -300,14 +300,14 @@ |
652 | }; |
653 | |
654 | (:~ |
655 | - : Returns the single node having the smallest number of descending attributes (attributes at any given depth) |
656 | - : in a sequence of nodes provided as input. |
657 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
658 | - : |
659 | - : <br/> |
660 | - : Example usage : <code>least-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) )</code> |
661 | - : <br/> |
662 | - : The function invocation in the example above returns : <code>(<c/>)</code> |
663 | + : <p>Returns the single node having the smallest number of descending attributes (attributes at any given depth) |
664 | + : in a sequence of nodes provided as input.</p> |
665 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
666 | + : <p/> |
667 | + : |
668 | + : <p>Example usage : <code>least-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) )</code></p> |
669 | + : <p/> |
670 | + : <p>The function invocation in the example above returns : <code>(<c/>)</code></p> |
671 | : |
672 | : @param $s A sequence of nodes. |
673 | : @return The node having the smallest number of descending attributes in the input sequence. |
674 | @@ -318,14 +318,14 @@ |
675 | }; |
676 | |
677 | (:~ |
678 | - : Returns the single node having the smallest number of descending nodes (sub-nodes at any given depth) |
679 | - : in a sequence of nodes provided as input. |
680 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
681 | - : |
682 | - : <br/> |
683 | - : Example usage : <code>least-nodes( ( <a><b/></a>, <b><c/></b>, <d/>) )</code> |
684 | - : <br/> |
685 | - : The function invocation in the example above returns : <code>(<d/>)</code> |
686 | + : <p>Returns the single node having the smallest number of descending nodes (sub-nodes at any given depth) |
687 | + : in a sequence of nodes provided as input.</p> |
688 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
689 | + : <p/> |
690 | + : |
691 | + : <p>Example usage : <code>least-nodes( ( <a><b/></a>, <b><c/></b>, <d/>) )</code></p> |
692 | + : <p/> |
693 | + : <p>The function invocation in the example above returns : <code>(<d/>)</code></p> |
694 | : |
695 | : @param $s A sequence of nodes. |
696 | : @return The node having the smallest number of descending nodes in the input sequence. |
697 | @@ -336,14 +336,14 @@ |
698 | }; |
699 | |
700 | (:~ |
701 | - : Returns the single node having the largest number of distinct descending elements (sub-elements at any |
702 | - : given depth) in a sequence of nodes provided as input. |
703 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
704 | - : |
705 | - : <br/> |
706 | - : Example usage : <code>most-distinct-elements( ( <a><b/><c/><d/></a>, <a><b/><b/><c/></a>, <a/> ) )</code> |
707 | - : <br/> |
708 | - : The function invocation in the example above returns : <code>(<a><b/><c/><d/></a>)</code> |
709 | + : <p>Returns the single node having the largest number of distinct descending elements (sub-elements at any |
710 | + : given depth) in a sequence of nodes provided as input.</p> |
711 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
712 | + : <p/> |
713 | + : |
714 | + : <p>Example usage : <code>most-distinct-elements( ( <a><b/><c/><d/></a>, <a><b/><b/><c/></a>, <a/> ) )</code></p> |
715 | + : <p/> |
716 | + : <p>The function invocation in the example above returns : <code>(<a><b/><c/><d/></a>)</code></p> |
717 | : |
718 | : @param $s A sequence of nodes. |
719 | : @return The node having the largest number of distinct descending elements in the input sequence. |
720 | @@ -354,14 +354,14 @@ |
721 | }; |
722 | |
723 | (:~ |
724 | - : Returns the single node having the largest number of distinct descending attributes (attributes at any |
725 | - : given depth) in a sequence of nodes provided as input. |
726 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
727 | - : |
728 | - : <br/> |
729 | - : Example usage : <code>most-distinct-attributes( ( <a att1="a1" att2="a2" att3="a3"/>, <a att1="a1" att2="a2"><b att2="a2" /></a>, <c/> ) )</code> |
730 | - : <br/> |
731 | - : The function invocation in the example above returns : <code>(<a att1="a1" att2="a2" att3="a3"/>)</code> |
732 | + : <p>Returns the single node having the largest number of distinct descending attributes (attributes at any |
733 | + : given depth) in a sequence of nodes provided as input.</p> |
734 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
735 | + : <p/> |
736 | + : |
737 | + : <p>Example usage : <code>most-distinct-attributes( ( <a att1="a1" att2="a2" att3="a3"/>, <a att1="a1" att2="a2"><b att2="a2" /></a>, <c/> ) )</code></p> |
738 | + : <p/> |
739 | + : <p>The function invocation in the example above returns : <code>(<a att1="a1" att2="a2" att3="a3"/>)</code></p> |
740 | : |
741 | : @param $s A sequence of nodes. |
742 | : @return The node having the largest number of distinct descending attributes in the input sequence. |
743 | @@ -372,14 +372,14 @@ |
744 | }; |
745 | |
746 | (:~ |
747 | - : Returns the single node having the largest number of distinct descending nodes (sub-nodes at any given depth) in |
748 | - : a sequence of nodes provided as input. |
749 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
750 | - : |
751 | - : <br/> |
752 | - : Example usage : <code>most-distinct-nodes( ( <a><b/></a>, <a><a/></a>, <b/>) )</code> |
753 | - : <br/> |
754 | - : The function invocation in the example above returns : <code>(<a><b/></a>)</code> |
755 | + : <p>Returns the single node having the largest number of distinct descending nodes (sub-nodes at any given depth) in |
756 | + : a sequence of nodes provided as input.</p> |
757 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
758 | + : <p/> |
759 | + : |
760 | + : <p>Example usage : <code>most-distinct-nodes( ( <a><b/></a>, <a><a/></a>, <b/>) )</code></p> |
761 | + : <p/> |
762 | + : <p>The function invocation in the example above returns : <code>(<a><b/></a>)</code></p> |
763 | : |
764 | : @param $s A sequence of nodes. |
765 | : @return The node having the largest number of distinct descending nodes in the input sequence. |
766 | @@ -390,14 +390,14 @@ |
767 | }; |
768 | |
769 | (:~ |
770 | - : Returns the single node having the smallest number of distinct descending elements (sub-elements at any |
771 | - : given depth) in a sequence of nodes provided as input. |
772 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
773 | - : |
774 | - : <br/> |
775 | - : Example usage : <code> least-distinct-elements( ( <a><b/></a>, <b><c/></b>, <d/>) ) </code> |
776 | - : <br/> |
777 | - : The function invocation in the example above returns : <code> (<d/>) </code> |
778 | + : <p>Returns the single node having the smallest number of distinct descending elements (sub-elements at any |
779 | + : given depth) in a sequence of nodes provided as input.</p> |
780 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
781 | + : <p/> |
782 | + : |
783 | + : <p>Example usage : <code> least-distinct-elements( ( <a><b/></a>, <b><c/></b>, <d/>) ) </code></p> |
784 | + : <p/> |
785 | + : <p>The function invocation in the example above returns : <code> (<d/>) </code></p> |
786 | : |
787 | : @param $s A sequence of nodes. |
788 | : @return The node having the smallest number of distinct descending elements in the input sequence. |
789 | @@ -408,14 +408,14 @@ |
790 | }; |
791 | |
792 | (:~ |
793 | - : Returns the single node having the smallest number of distinct descending attributes (attributes at any |
794 | - : given depth) in a sequence of nodes provided as input. |
795 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
796 | - : |
797 | - : <br/> |
798 | - : Example usage : <code> least-distinct-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) ) </code> |
799 | - : <br/> |
800 | - : The function invocation in the example above returns : <code> (<c/>) </code> |
801 | + : <p>Returns the single node having the smallest number of distinct descending attributes (attributes at any |
802 | + : given depth) in a sequence of nodes provided as input.</p> |
803 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
804 | + : <p/> |
805 | + : |
806 | + : <p>Example usage : <code> least-distinct-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) ) </code></p> |
807 | + : <p/> |
808 | + : <p>The function invocation in the example above returns : <code> (<c/>) </code></p> |
809 | : |
810 | : @param $s A sequence of nodes. |
811 | : @return The node having the smallest number of distinct descending attributes in the input sequence. |
812 | @@ -426,14 +426,14 @@ |
813 | }; |
814 | |
815 | (:~ |
816 | - : Returns the single node having the smallest number of distinct descending nodes (sub-nodes at any given depth) |
817 | - : in a sequence of nodes provided as input. |
818 | - : If more then one answer is possible, return the first node according to the order of the input sequence. |
819 | - : |
820 | - : <br/> |
821 | - : Example usage : <code> least-distinct-nodes( ( <a><b/></a>, <b><c/></b>, <d/>) ) </code> |
822 | - : <br/> |
823 | - : The function invocation in the example above returns : <code> (<d/>) </code> |
824 | + : <p>Returns the single node having the smallest number of distinct descending nodes (sub-nodes at any given depth) |
825 | + : in a sequence of nodes provided as input.</p> |
826 | + : <p>If more then one answer is possible, return the first node according to the order of the input sequence.</p> |
827 | + : <p/> |
828 | + : |
829 | + : <p>Example usage : <code> least-distinct-nodes( ( <a><b/></a>, <b><c/></b>, <d/>) ) </code></p> |
830 | + : <p/> |
831 | + : <p>The function invocation in the example above returns : <code> (<d/>) </code></p> |
832 | : |
833 | : @param $s A sequence of nodes. |
834 | : @return The node having the smallest number of distinct descending nodes in the input sequence. |
835 | @@ -444,13 +444,13 @@ |
836 | }; |
837 | |
838 | (:~ |
839 | - : Returns the elements from an input sequence of elements that, when matched to a given set of XPath expressions, |
840 | - : produce a non-empty set of nodes in all the cases. |
841 | - : |
842 | - : <br/> |
843 | - : Example usage : <code> all-xpaths( ( <a><b/></a>, <c><d/></c>, <d/>), (".//b") ) </code> |
844 | - : <br/> |
845 | - : The function invocation in the example above returns : <code> (<a><b/></a>) </code> |
846 | + : <p>Returns the elements from an input sequence of elements that, when matched to a given set of XPath expressions, |
847 | + : produce a non-empty set of nodes in all the cases.</p> |
848 | + : <p/> |
849 | + : |
850 | + : <p>Example usage : <code> all-xpaths( ( <a><b/></a>, <c><d/></c>, <d/>), (".//b") ) </code></p> |
851 | + : <p/> |
852 | + : <p>The function invocation in the example above returns : <code> (<a><b/></a>) </code></p> |
853 | : |
854 | : @param $s A sequence of elements. |
855 | : @param $paths A sequence of strings denoting XPath expressions. |
856 | @@ -470,13 +470,13 @@ |
857 | }; |
858 | |
859 | (:~ |
860 | - : Returns the elements from a sequence of elements that, when matched to a given set of XPath expressions, |
861 | - : produce a non-empty set of nodes for some of the cases. |
862 | - : |
863 | - : <br/> |
864 | - : Example usage : <code> some-xpaths( ( <a><b/></a>, <d><c/></d>, <d/>), (".//b", ".//c") ) </code> |
865 | - : <br/> |
866 | - : The function invocation in the example above returns : <code> ( <a><b/></a> , <d><c/></d> ) </code> |
867 | + : <p>Returns the elements from a sequence of elements that, when matched to a given set of XPath expressions, |
868 | + : produce a non-empty set of nodes for some of the cases.</p> |
869 | + : <p/> |
870 | + : |
871 | + : <p>Example usage : <code> some-xpaths( ( <a><b/></a>, <d><c/></d>, <d/>), (".//b", ".//c") ) </code></p> |
872 | + : <p/> |
873 | + : <p>The function invocation in the example above returns : <code> ( <a><b/></a> , <d><c/></d> ) </code></p> |
874 | : |
875 | : @param $s A sequence of elements. |
876 | : @param $paths A sequence of strings denoting XPath expressions. |
877 | @@ -497,14 +497,14 @@ |
878 | }; |
879 | |
880 | (:~ |
881 | - : Returns the single element from an input sequence of elements that matches the largest number of |
882 | - : XPath expressions from a given set, producing a non-empty set of nodes. |
883 | - : If more then one answer is possible, return the first element according to the order of the input sequence. |
884 | - : |
885 | - : <br/> |
886 | - : Example usage : <code> most-xpaths( ( <a><b/></a>, <d><c/><b/></d>, <d/>) , (".//b", ".//c") ) </code> |
887 | - : <br/> |
888 | - : The function invocation in the example above returns : <code> ( <d><c/><b/></d> ) </code> |
889 | + : <p>Returns the single element from an input sequence of elements that matches the largest number of |
890 | + : XPath expressions from a given set, producing a non-empty set of nodes.</p> |
891 | + : <p>If more then one answer is possible, return the first element according to the order of the input sequence.</p> |
892 | + : <p/> |
893 | + : |
894 | + : <p>Example usage : <code> most-xpaths( ( <a><b/></a>, <d><c/><b/></d>, <d/>) , (".//b", ".//c") ) </code></p> |
895 | + : <p/> |
896 | + : <p>The function invocation in the example above returns : <code> ( <d><c/><b/></d> ) </code></p> |
897 | : |
898 | : @param $s A sequence of elements. |
899 | : @param $paths A sequence of strings denoting XPath expressions. |
900 | @@ -524,18 +524,18 @@ |
901 | return $str |
902 | )[1] |
903 | :) |
904 | - "" |
905 | + "" |
906 | }; |
907 | |
908 | (:~ |
909 | - : Returns the single element from an input sequence of elements that matches the smallest number of |
910 | - : XPath expressions from a given set, producing a non-empty set of nodes. |
911 | - : If more then one answer is possible, return the first element according to the order of the input sequence. |
912 | - : |
913 | - : <br/> |
914 | - : Example usage : <code> least-xpaths( ( <a><b/></a>, <d><c/><b/></d>, <d/>) , (".//b", ".//c") ) </code> |
915 | - : <br/> |
916 | - : The function invocation in the example above returns : <code> ( $lt;d/> ) </code> |
917 | + : <p>Returns the single element from an input sequence of elements that matches the smallest number of |
918 | + : XPath expressions from a given set, producing a non-empty set of nodes.</p> |
919 | + : <p>If more then one answer is possible, return the first element according to the order of the input sequence.</p> |
920 | + : <p/> |
921 | + : |
922 | + : <p>Example usage : <code> least-xpaths( ( <a><b/></a>, <d><c/><b/></d>, <d/>) , (".//b", ".//c") ) </code></p> |
923 | + : <p/> |
924 | + : <p>The function invocation in the example above returns : <code> ( $lt;d/> ) </code></p> |
925 | : |
926 | : @param $s A sequence of elements. |
927 | : @param $paths A sequence of strings denoting XPath expressions. |
928 | @@ -559,18 +559,18 @@ |
929 | }; |
930 | |
931 | (:~ |
932 | - : Returns the nodes from an input sequence of nodes that validate against a given XML Schema. |
933 | - : |
934 | - : <br/> |
935 | - : Example usage : <code> validating-schema ( ( <a/> , <b/> ), <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"><xs:element name="a" /></xs:schema> ) </code> |
936 | - : <br/> |
937 | - : The function invocation in the example above returns : <code> ( <a/> ) </code> |
938 | + : <p>Returns the nodes from an input sequence of nodes that validate against a given XML Schema.</p> |
939 | + : <p/> |
940 | + : |
941 | + : <p>Example usage : <code> validating-schema ( ( <a/> , <b/> ), <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"><xs:element name="a" /></xs:schema> ) </code></p> |
942 | + : <p/> |
943 | + : <p>The function invocation in the example above returns : <code> ( <a/> ) </code></p> |
944 | : |
945 | : @param $s A sequence of elements. |
946 | : @param $schema An element encoding an XML Schema. |
947 | : @return The nodes that validate against the XML Schema. |
948 | : |
949 | - : <br/><br/><b> Attention : This function is still not implemented. </b> <br/> |
950 | + : <b> Attention : This function is still not implemented. </b> |
951 | : |
952 | :) |
953 | declare function con:validating-schema ( $s as element()*, $schema as element() ) as element()*{ |
954 | |
955 | === renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/conversion.xq' => 'src/conversion.xq' |
956 | --- src/com/zorba-xquery/www/modules/data-cleaning/conversion.xq 2012-09-28 13:34:20 +0000 |
957 | +++ src/conversion.xq 2013-07-10 00:51:26 +0000 |
958 | @@ -17,16 +17,16 @@ |
959 | :) |
960 | |
961 | (:~ |
962 | - : This library module provides data conversion functions for processing calendar dates, |
963 | - : temporal values, currency values, units of measurement, location names and postal addresses. |
964 | - : |
965 | - : The logic contained in this module is not specific to any particular XQuery implementation. |
966 | + : <p>This library module provides data conversion functions for processing calendar dates, |
967 | + : temporal values, currency values, units of measurement, location names and postal addresses.</p> |
968 | + : <p/> |
969 | + : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p> |
970 | : |
971 | : @author Bruno Martins and Diogo Simões |
972 | : @project Zorba/Data Cleaning/Conversion |
973 | :) |
974 | |
975 | -module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
976 | +module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
977 | |
978 | declare namespace exref = "http://www.ecb.int/vocabulary/2002-08-01/eurofxref"; |
979 | declare namespace an = "http://www.zorba-xquery.com/annotations"; |
980 | @@ -40,12 +40,12 @@ |
981 | declare namespace ver = "http://www.zorba-xquery.com/options/versioning"; |
982 | declare option ver:module-version "2.0"; |
983 | |
984 | -(:~ The key to be used when accessing the White Pages Web service :) |
985 | +(:~ <p>The key to be used when accessing the White Pages Web service</p> :) |
986 | declare variable $conversion:key := "06ea2f21cc15602b6a3e242e3225a81a"; |
987 | |
988 | (:~ |
989 | - : Uses a White-pages Web service to discover information about a given name, |
990 | - : returning a sequence of strings for the phone numbers associated to the name. |
991 | + : <p>Uses a White-pages Web service to discover information about a given name, |
992 | + : returning a sequence of strings for the phone numbers associated to the name.</p> |
993 | : |
994 | : |
995 | : @param $name The name of person or organization. |
996 | @@ -61,8 +61,8 @@ |
997 | }; |
998 | |
999 | (:~ |
1000 | - : Uses a White-pages Web service to discover information about a given name, |
1001 | - : returning a sequence of strings for the addresses associated to the name. |
1002 | + : <p>Uses a White-pages Web service to discover information about a given name, |
1003 | + : returning a sequence of strings for the addresses associated to the name.</p> |
1004 | : |
1005 | : |
1006 | : @param $name The name of person or organization. |
1007 | @@ -83,8 +83,8 @@ |
1008 | |
1009 | |
1010 | (:~ |
1011 | - : Uses a White-pages Web service to discover information about a given phone number, |
1012 | - : returning a sequence of strings for the name associated to the phone number. |
1013 | + : <p>Uses a White-pages Web service to discover information about a given phone number, |
1014 | + : returning a sequence of strings for the name associated to the phone number.</p> |
1015 | : |
1016 | : |
1017 | : @param $phone-number A string with 10 digits corresponding to the phone number. |
1018 | @@ -98,8 +98,8 @@ |
1019 | }; |
1020 | |
1021 | (:~ |
1022 | - : Uses a White-pages Web service to discover information about a given phone number, |
1023 | - : returning a string for the address associated to the phone number. |
1024 | + : <p>Uses a White-pages Web service to discover information about a given phone number, |
1025 | + : returning a string for the address associated to the phone number.</p> |
1026 | : |
1027 | : |
1028 | : @param $phone-number A string with 10 digits corresponding to the phone number. |
1029 | @@ -120,8 +120,8 @@ |
1030 | }; |
1031 | |
1032 | (:~ |
1033 | - : Uses a White-pages Web service to discover information about a given address, |
1034 | - : returning a sequence of strings for the names associated to the address. |
1035 | + : <p>Uses a White-pages Web service to discover information about a given address, |
1036 | + : returning a sequence of strings for the names associated to the address.</p> |
1037 | : |
1038 | : |
1039 | : @param $address A string corresponding to the address (ex: 5655 E Gaskill Rd, Willcox, AZ, US). |
1040 | @@ -146,8 +146,8 @@ |
1041 | }; |
1042 | |
1043 | (:~ |
1044 | - : Uses a White-pages Web service to discover information about a given address, |
1045 | - : returning a sequence of strings for the phone number associated to the address. |
1046 | + : <p>Uses a White-pages Web service to discover information about a given address, |
1047 | + : returning a sequence of strings for the phone number associated to the address.</p> |
1048 | : |
1049 | : |
1050 | : @param $address A string corresponding to the address (ex: 5655 E Gaskill Rd, Willcox, AZ, US). |
1051 | @@ -180,8 +180,8 @@ |
1052 | }; |
1053 | |
1054 | (:~ |
1055 | - : Conversion function for units of measurement, acting as a wrapper over the CuppaIT WebService. |
1056 | - : <br/> |
1057 | + : <p>Conversion function for units of measurement, acting as a wrapper over the CuppaIT WebService.</p> |
1058 | + : |
1059 | : |
1060 | : |
1061 | : @param $v The amount we wish to convert. |
1062 | @@ -296,7 +296,7 @@ |
1063 | }; |
1064 | |
1065 | (:~ |
1066 | - : Placename to geospatial coordinates converter, acting as a wrapper over the Yahoo! geocoder service. |
1067 | + : <p>Placename to geospatial coordinates converter, acting as a wrapper over the Yahoo! geocoder service.</p> |
1068 | : |
1069 | : |
1070 | : @param $q A sequence of strings corresponding to the different components (e.g., street, city, country, etc.) of the place name. |
1071 | @@ -313,7 +313,7 @@ |
1072 | }; |
1073 | |
1074 | (:~ |
1075 | - : Geospatial coordinates to placename converter, acting as a wrapper over the Yahoo! reverse geocoder service. |
1076 | + : <p>Geospatial coordinates to placename converter, acting as a wrapper over the Yahoo! reverse geocoder service.</p> |
1077 | : |
1078 | : |
1079 | : @param $lat Geospatial latitude. |
1080 | @@ -337,9 +337,9 @@ |
1081 | }; |
1082 | |
1083 | (:~ |
1084 | - : Currency conversion function, acting as a wrapper over the WebService from the European Central Bank. |
1085 | - : |
1086 | - : WebService documentation at http://www.ecb.int/stats/exchange/eurofxref/html/index.en.html |
1087 | + : <p>Currency conversion function, acting as a wrapper over the WebService from the European Central Bank.</p> |
1088 | + : <p/> |
1089 | + : <p>WebService documentation at <a src="http://www.ecb.int/stats/exchange/eurofxref/html/index.en.html">http://www.ecb.int/stats/exchange/eurofxref/html/index.en.html</a></p> |
1090 | : |
1091 | : |
1092 | : @param $v The amount we wish to convert. |
1093 | @@ -347,7 +347,7 @@ |
1094 | : @param $m2 The target currency (e.g., "USD"). |
1095 | : @param $date The reference date. |
1096 | : @return The value resulting from the conversion. |
1097 | - : @error conversion:notsupported if the date, the source currency type or the target currency type are not known to the service. |
1098 | + : @error conversion:NOTSUPPORTED if the date, the source currency type or the target currency type are not known to the service. |
1099 | : @see http://www.ecb.int/stats/exchange/eurofxref/html/index.en.html |
1100 | : @example test/Queries/data-cleaning/conversion/currency-convert.xq |
1101 | :) |
1102 | @@ -361,17 +361,17 @@ |
1103 | let $fromEUR := if ( $m2="EUR" ) then (xs:double(1.0)) else ( $doc//exref:Cube[xs:string(@currency)=$m2]/xs:double(@rate) ) |
1104 | let $result := ($v div $toEUR) * $fromEUR |
1105 | return if (matches(string($result),"-?[0-9]+(\.[0-9]+)?")) then ($result) |
1106 | - else (error(QName('http://www.zorba-xquery.com/modules/data-cleaning/conversion', 'conversion:notsupported'), data($result))) |
1107 | + else (error(QName('http://zorba.io/modules/data-cleaning/conversion', 'conversion:NOTSUPPORTED'), data($result))) |
1108 | }; |
1109 | |
1110 | (:~ |
1111 | - : Uses a whois service to discover information about a given domain name, returning a sequence of strings |
1112 | - : for the phone numbers associated to the name. |
1113 | + : <p>Uses a whois service to discover information about a given domain name, returning a sequence of strings |
1114 | + : for the phone numbers associated to the name.</p> |
1115 | : |
1116 | : @param $addr A string with the domain. |
1117 | : @return A sequence of strings for the phone numbers associated to the domain name. |
1118 | : |
1119 | - : <br/><br/><b> Attention : This function is still not implemented. </b> <br/> |
1120 | + : <p><b> Attention : This function is still not implemented. </b></p> |
1121 | : |
1122 | :) |
1123 | declare function conversion:phone-from-domain ( $domain as xs:string ) as xs:string*{ |
1124 | @@ -379,13 +379,13 @@ |
1125 | }; |
1126 | |
1127 | (:~ |
1128 | - : Uses a whois service to discover information about a given domain name, returning a sequence of strings |
1129 | - : for the addresses associated to the name. |
1130 | + : <p>Uses a whois service to discover information about a given domain name, returning a sequence of strings |
1131 | + : for the addresses associated to the name.</p> |
1132 | : |
1133 | : @param $addr A string with the domain. |
1134 | : @return A sequence of strings for the addresses associated to the domain name. |
1135 | : |
1136 | - : <br/><br/><b> Attention : This function is still not implemented. </b> <br/> |
1137 | + : <p><b> Attention : This function is still not implemented. </b></p> |
1138 | : |
1139 | :) |
1140 | declare function conversion:address-from-domain ( $domain as xs:string ) as xs:string*{ |
1141 | @@ -393,13 +393,13 @@ |
1142 | }; |
1143 | |
1144 | (:~ |
1145 | - : Uses a whois service to discover information about a given domain name, returning a sequence of strings |
1146 | - : for the person or organization names associated to the name. |
1147 | + : <p>Uses a whois service to discover information about a given domain name, returning a sequence of strings |
1148 | + : for the person or organization names associated to the name.</p> |
1149 | : |
1150 | : @param $addr A string with the domain. |
1151 | : @return A sequence of strings for the person or organization names associated to the domain name. |
1152 | : |
1153 | - : <br/><br/><b> Attention : This function is still not implemented. </b> <br/> |
1154 | + : <p><b> Attention : This function is still not implemented. </b></p> |
1155 | : |
1156 | :) |
1157 | declare function conversion:name-from-domain ( $domain as xs:string ) as xs:string*{ |
1158 | |
1159 | === renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/hybrid-string-similarity.xq' => 'src/hybrid-string-similarity.xq' |
1160 | --- src/com/zorba-xquery/www/modules/data-cleaning/hybrid-string-similarity.xq 2012-09-28 13:34:20 +0000 |
1161 | +++ src/hybrid-string-similarity.xq 2013-07-10 00:51:26 +0000 |
1162 | @@ -17,43 +17,43 @@ |
1163 | :) |
1164 | |
1165 | (:~ |
1166 | - : This library module provides hybrid string similarity functions, combining the properties of |
1167 | - : character-based string similarity functions and token-based string similarity functions. |
1168 | - : |
1169 | - : The logic contained in this module is not specific to any particular XQuery implementation, |
1170 | + : <p>This library module provides hybrid string similarity functions, combining the properties of |
1171 | + : character-based string similarity functions and token-based string similarity functions.</p> |
1172 | + : <p/> |
1173 | + : <p>The logic contained in this module is not specific to any particular XQuery implementation, |
1174 | : although the module requires the trigonometic functions of XQuery 3.0 or a math extension |
1175 | - : function such as sqrt($x as numeric) for computing the square root. |
1176 | + : function such as sqrt($x as numeric) for computing the square root.</p> |
1177 | : |
1178 | : @author Bruno Martins and Diogo Simões |
1179 | : @project Zorba/Data Cleaning/Hybrid String Similarity |
1180 | :) |
1181 | |
1182 | -module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity"; |
1183 | +module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity"; |
1184 | |
1185 | -(: In the QizX os Saxon XQuery engines, it is possible to call external functions from the Java math library :) |
1186 | -(: declare namespace math = "java:java.lang.Math"; :) |
1187 | +(: <p>In the QizX os Saxon XQuery engines, it is possible to call external functions from the Java math library :) |
1188 | +(: declare namespace math = "java:java.lang.Math";</p> :) |
1189 | declare namespace math = "http://www.w3.org/2005/xpath-functions/math"; |
1190 | |
1191 | -import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"; |
1192 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
1193 | -import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity"; |
1194 | -import module namespace simp = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity"; |
1195 | +import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity"; |
1196 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
1197 | +import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity"; |
1198 | +import module namespace simp = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity"; |
1199 | |
1200 | declare namespace ver = "http://www.zorba-xquery.com/options/versioning"; |
1201 | declare option ver:module-version "2.0"; |
1202 | |
1203 | (:~ |
1204 | - : Returns the cosine similarity coefficient between sets of tokens extracted from two strings. |
1205 | - : |
1206 | - : The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the |
1207 | - : term-frequency heuristic from Information Retrieval). |
1208 | - : The Soundex phonetic similarity function is used to discover token identity, which is equivalent to saying that |
1209 | - : this function returns the cosine similarity coefficient between sets of Soundex keys. |
1210 | - : |
1211 | - : <br/> |
1212 | - : Example usage : <code> soft-cosine-tokens-soundex("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +") </code> |
1213 | - : <br/> |
1214 | - : The function invocation in the example above returns : <code> 1.0 </code> |
1215 | + : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings.</p> |
1216 | + : <p/> |
1217 | + : <p>The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the |
1218 | + : term-frequency heuristic from Information Retrieval).</p> |
1219 | + : <p>The Soundex phonetic similarity function is used to discover token identity, which is equivalent to saying that |
1220 | + : this function returns the cosine similarity coefficient between sets of Soundex keys.</p> |
1221 | + : <p/> |
1222 | + : |
1223 | + : <p>Example usage : <code> soft-cosine-tokens-soundex("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +") </code></p> |
1224 | + : <p/> |
1225 | + : <p>The function invocation in the example above returns : <code> 1.0 </code></p> |
1226 | : |
1227 | : @param $s1 The first string. |
1228 | : @param $s2 The second string. |
1229 | @@ -68,16 +68,16 @@ |
1230 | }; |
1231 | |
1232 | (:~ |
1233 | - : Returns the cosine similarity coefficient between sets of tokens extracted from two strings. |
1234 | - : The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the |
1235 | - : term-frequency heuristic from Information Retrieval). |
1236 | - : The Metaphone phonetic similarity function is used to discover token identity, which is equivalent to saying that |
1237 | - : this function returns the cosine similarity coefficient between sets of Metaphone keys. |
1238 | - : |
1239 | - : <br/> |
1240 | - : Example usage : <code> soft-cosine-tokens-metaphone("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +" ) </code> |
1241 | - : <br/> |
1242 | - : The function invocation in the example above returns : <code> 1.0 </code> |
1243 | + : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings.</p> |
1244 | + : <p>The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the |
1245 | + : term-frequency heuristic from Information Retrieval).</p> |
1246 | + : <p>The Metaphone phonetic similarity function is used to discover token identity, which is equivalent to saying that |
1247 | + : this function returns the cosine similarity coefficient between sets of Metaphone keys.</p> |
1248 | + : <p/> |
1249 | + : |
1250 | + : <p>Example usage : <code> soft-cosine-tokens-metaphone("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +" ) </code></p> |
1251 | + : <p/> |
1252 | + : <p>The function invocation in the example above returns : <code> 1.0 </code></p> |
1253 | : |
1254 | : @param $s1 The first string. |
1255 | : @param $s2 The second string. |
1256 | @@ -92,16 +92,16 @@ |
1257 | }; |
1258 | |
1259 | (:~ |
1260 | - : Returns the cosine similarity coefficient between sets of tokens extracted from two strings. |
1261 | - : The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the |
1262 | - : term-frequency heuristic from Information Retrieval). |
1263 | - : The Edit Distance similarity function is used to discover token identity, and tokens having an edit distance |
1264 | - : bellow a given threshold are considered as matching tokens. |
1265 | - : |
1266 | - : <br/> |
1267 | - : Example usage : <code> soft-cosine-tokens-edit-distance("The FLWOR Foundation", "FLWOR Found.", " +", 0 ) </code> |
1268 | - : <br/> |
1269 | - : The function invocation in the example above returns : <code> 0.408248290463863 </code> |
1270 | + : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings.</p> |
1271 | + : <p>The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the |
1272 | + : term-frequency heuristic from Information Retrieval).</p> |
1273 | + : <p>The Edit Distance similarity function is used to discover token identity, and tokens having an edit distance |
1274 | + : bellow a given threshold are considered as matching tokens.</p> |
1275 | + : <p/> |
1276 | + : |
1277 | + : <p>Example usage : <code> soft-cosine-tokens-edit-distance("The FLWOR Foundation", "FLWOR Found.", " +", 0 ) </code></p> |
1278 | + : <p/> |
1279 | + : <p>The function invocation in the example above returns : <code> 0.408248290463863 </code></p> |
1280 | : |
1281 | : @param $s1 The first string. |
1282 | : @param $s2 The second string. |
1283 | @@ -128,16 +128,16 @@ |
1284 | }; |
1285 | |
1286 | (:~ |
1287 | - : Returns the cosine similarity coefficient between sets of tokens extracted from two strings. |
1288 | - : The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the |
1289 | - : term-frequency heuristic from Information Retrieval). |
1290 | - : The Jaro similarity function is used to discover token identity, and tokens having a Jaro similarity above |
1291 | - : a given threshold are considered as matching tokens. |
1292 | - : |
1293 | - : <br/> |
1294 | - : Example usage : <code> soft-cosine-tokens-jaro("The FLWOR Foundation", "FLWOR Found.", " +", 1 ) </code> |
1295 | - : <br/> |
1296 | - : The function invocation in the example above returns : <code> 0.5 </code> |
1297 | + : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings.</p> |
1298 | + : <p>The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the |
1299 | + : term-frequency heuristic from Information Retrieval).</p> |
1300 | + : <p>The Jaro similarity function is used to discover token identity, and tokens having a Jaro similarity above |
1301 | + : a given threshold are considered as matching tokens.</p> |
1302 | + : <p/> |
1303 | + : |
1304 | + : <p>Example usage : <code> soft-cosine-tokens-jaro("The FLWOR Foundation", "FLWOR Found.", " +", 1 ) </code></p> |
1305 | + : <p/> |
1306 | + : <p>The function invocation in the example above returns : <code> 0.5 </code></p> |
1307 | : |
1308 | : @param $s1 The first string. |
1309 | : @param $s2 The second string. |
1310 | @@ -162,16 +162,16 @@ |
1311 | }; |
1312 | |
1313 | (:~ |
1314 | - : Returns the cosine similarity coefficient between sets of tokens extracted from two strings. |
1315 | - : The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the |
1316 | - : term-frequency heuristic from Information Retrieval). |
1317 | - : The Jaro-Winkler similarity function is used to discover token identity, and tokens having a Jaro-Winkler |
1318 | - : similarity above a given threshold are considered as matching tokens. |
1319 | - : |
1320 | - : <br/> |
1321 | - : Example usage : <code> soft-cosine-tokens-jaro-winkler("The FLWOR Foundation", "FLWOR Found.", " +", 1, 4, 0.1 ) </code> |
1322 | - : <br/> |
1323 | - : The function invocation in the example above returns : <code> 0.45 </code> |
1324 | + : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings.</p> |
1325 | + : <p>The tokens from each string are weighted according to their occurence frequency (i.e., weighted according to the |
1326 | + : term-frequency heuristic from Information Retrieval).</p> |
1327 | + : <p>The Jaro-Winkler similarity function is used to discover token identity, and tokens having a Jaro-Winkler |
1328 | + : similarity above a given threshold are considered as matching tokens.</p> |
1329 | + : <p/> |
1330 | + : |
1331 | + : <p>Example usage : <code> soft-cosine-tokens-jaro-winkler("The FLWOR Foundation", "FLWOR Found.", " +", 1, 4, 0.1 ) </code></p> |
1332 | + : <p/> |
1333 | + : <p>The function invocation in the example above returns : <code> 0.45 </code></p> |
1334 | : |
1335 | : @param $s1 The first string. |
1336 | : @param $s2 The second string. |
1337 | @@ -198,13 +198,13 @@ |
1338 | }; |
1339 | |
1340 | (:~ |
1341 | - : Returns the Monge-Elkan similarity coefficient between two strings, using the Jaro-Winkler |
1342 | - : similarity function to discover token identity. |
1343 | - : |
1344 | - : <br/> |
1345 | - : Example usage : <code> monge-elkan-jaro-winkler("Comput. Sci. and Eng. Dept., University of California, San Diego", "Department of Computer Scinece, Univ. Calif., San Diego", 4, 0.1) </code> |
1346 | - : <br/> |
1347 | - : The function invocation in the example above returns : <code> 0.992 </code> |
1348 | + : <p>Returns the Monge-Elkan similarity coefficient between two strings, using the Jaro-Winkler</p> |
1349 | + : <p>similarity function to discover token identity.</p> |
1350 | + : <p/> |
1351 | + : |
1352 | + : <p>Example usage : <code> monge-elkan-jaro-winkler("Comput. Sci. and Eng. Dept., University of California, San Diego", "Department of Computer Scinece, Univ. Calif., San Diego", 4, 0.1) </code></p> |
1353 | + : <p/> |
1354 | + : <p>The function invocation in the example above returns : <code> 0.992 </code></p> |
1355 | : |
1356 | : @param $s1 The first string. |
1357 | : @param $s2 The second string. |
1358 | |
1359 | === renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/normalization.xq' => 'src/normalization.xq' |
1360 | --- src/com/zorba-xquery/www/modules/data-cleaning/normalization.xq 2012-09-28 13:34:20 +0000 |
1361 | +++ src/normalization.xq 2013-07-10 00:51:26 +0000 |
1362 | @@ -17,18 +17,17 @@ |
1363 | :) |
1364 | |
1365 | (:~ |
1366 | - : This library module provides data normalization functions for processing calendar dates, |
1367 | + : <p>This library module provides data normalization functions for processing calendar dates, |
1368 | : temporal values, currency values, units of measurement, location names and postal addresses. |
1369 | - : |
1370 | - : These functions are particularly useful for converting different data representations into cannonical formats. |
1371 | - : |
1372 | - : The logic contained in this module is not specific to any particular XQuery implementation. |
1373 | + : These functions are particularly useful for converting different data representations into cannonical formats.</p> |
1374 | + : |
1375 | + : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p> |
1376 | : |
1377 | : @author Bruno Martins and Diogo Simões |
1378 | : @project Zorba/Data Cleaning/Normalization |
1379 | :) |
1380 | |
1381 | -module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization"; |
1382 | +module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization"; |
1383 | |
1384 | import module namespace http = "http://www.zorba-xquery.com/modules/http-client"; |
1385 | |
1386 | @@ -37,8 +36,8 @@ |
1387 | declare option ver:module-version "2.0"; |
1388 | |
1389 | (:~ |
1390 | - : Converts a given string representation of a date value into a date representation valid according |
1391 | - : to the corresponding XML Schema type. |
1392 | + : <p>Converts a given string representation of a date value into a date representation valid according |
1393 | + : to the corresponding XML Schema type.</p> |
1394 | : |
1395 | : |
1396 | : @param $sd The string representation for the date |
1397 | @@ -47,18 +46,18 @@ |
1398 | : by a single letter or 'O' or 'E' and then a single letter. Any character in the format string that is not part of a conversion |
1399 | : specification is interpreted literally, and the string '%%' gives '%'. The supported conversion specifications are as follows: |
1400 | : <pre> |
1401 | - : '%b' Abbreviated month name in the current locale.<br/> |
1402 | - : '%B' Full month name in the current locale.<br/> |
1403 | - : '%d' Day of the month as decimal number (01-31).<br/> |
1404 | - : '%m' Month as decimal number (01-12).<br/> |
1405 | - : '%x' Date, locale-specific.<br/> |
1406 | - : '%y' Year without century (00-99).<br/> |
1407 | - : '%Y' Year with century.<br/> |
1408 | - : '%C' Century (00-99): the integer part of the year divided by 100.<br/> |
1409 | - : '%D' Locale-specific date format such as '%m/%d/%y'.<br/> |
1410 | - : '%e' Day of the month as decimal number (1-31), with a leading pace for a single-digit number.<br/> |
1411 | - : '%F' Equivalent to %Y-%m-%d (the ISO 8601 date format).<br/> |
1412 | - : '%h' Equivalent to '%b'.<br/> |
1413 | + : '%b' Abbreviated month name in the current locale. |
1414 | + : '%B' Full month name in the current locale. |
1415 | + : '%d' Day of the month as decimal number (01-31). |
1416 | + : '%m' Month as decimal number (01-12). |
1417 | + : '%x' Date, locale-specific. |
1418 | + : '%y' Year without century (00-99). |
1419 | + : '%Y' Year with century. |
1420 | + : '%C' Century (00-99): the integer part of the year divided by 100. |
1421 | + : '%D' Locale-specific date format such as '%m/%d/%y'. |
1422 | + : '%e' Day of the month as decimal number (1-31), with a leading pace for a single-digit number. |
1423 | + : '%F' Equivalent to %Y-%m-%d (the ISO 8601 date format). |
1424 | + : '%h' Equivalent to '%b'. |
1425 | :</pre> |
1426 | : |
1427 | : @return The date value resulting from the conversion. |
1428 | @@ -147,8 +146,8 @@ |
1429 | }; |
1430 | |
1431 | (:~ |
1432 | - : Converts a given string representation of a time value into a time representation valid according to |
1433 | - : the corresponding XML Schema type. |
1434 | + : <p>Converts a given string representation of a time value into a time representation valid according to |
1435 | + : the corresponding XML Schema type.</p> |
1436 | : |
1437 | : |
1438 | : @param $sd The string representation for the time. |
1439 | @@ -156,13 +155,13 @@ |
1440 | : conversion specifications. In the format string, a conversion specification is introduced by '%', usually followed by a single |
1441 | : letter or 'O' or 'E' and then a single letter. Any character in the format string that is not part of a conversion specification |
1442 | : is interpreted literally, and the string '%%' gives '%'. The supported conversion specifications are as follows: |
1443 | - : |
1444 | + : <p/> |
1445 | : <pre class="ace-static"> |
1446 | : '%H' Hours as decimal number (00-23). |
1447 | : '%I' Hours as decimal number (01-12). |
1448 | : '%M' Minute as decimal number (00-59). |
1449 | : '%p' AM/PM indicator in the locale. Used in conjunction with '%I' and *not* with '%H'. |
1450 | - : '%S' Second as decimal number (00-61), allowing for up to two leap-seconds.<br/> |
1451 | + : '%S' Second as decimal number (00-61), allowing for up to two leap-seconds. |
1452 | : '%X' Time, locale-specific. |
1453 | : '%z' Offset from Greenwich, so '-0900' is 9 hours west of Greenwich. |
1454 | : '%Z' Time zone as a character string. |
1455 | @@ -173,6 +172,7 @@ |
1456 | : '%T' Equivalent to '%H:%M:%S'. |
1457 | :</pre> |
1458 | : |
1459 | + : @error normalization:NOTSUPPORTED if the date type is not known to the service. |
1460 | : @return The time value resulting from the conversion. |
1461 | : @example test/Queries/data-cleaning/normalization/to-time.xq |
1462 | :) |
1463 | @@ -524,8 +524,8 @@ |
1464 | }; |
1465 | |
1466 | (:~ |
1467 | - : Converts a given string representation of a dateTime value into a dateTime representation |
1468 | - : valid according to the corresponding XML Schema type. |
1469 | + : <p>Converts a given string representation of a dateTime value into a dateTime representation |
1470 | + : valid according to the corresponding XML Schema type.</p> |
1471 | : |
1472 | : |
1473 | : @param $sd The string representation for the dateTime. |
1474 | @@ -533,7 +533,7 @@ |
1475 | : of conversion specifications. In the format string, a conversion specification is introduced by '%', usually followed by a single |
1476 | : letter or 'O' or 'E' and then a single letter. Any character in the format string that is not part of a conversion specification |
1477 | : is interpreted literally, and the string '%%' gives '%'. The supported conversion specifications are as follows: |
1478 | - : |
1479 | + : <p/> |
1480 | : <pre class="ace-static"> |
1481 | : '%b' Abbreviated month name in the current locale. |
1482 | : '%B' Full month name in the current locale. |
1483 | @@ -566,6 +566,7 @@ |
1484 | : '%T' Equivalent to '%H:%M:%S'. |
1485 | :</pre> |
1486 | : |
1487 | + : @error normalization:NOTSUPPORTED if the dateTime type is not known to the service. |
1488 | : @return The dateTime value resulting from the conversion. |
1489 | : @example test/Queries/data-cleaning/normalization/to-dateTime.xq |
1490 | :) |
1491 | @@ -1183,8 +1184,8 @@ |
1492 | }; |
1493 | |
1494 | (:~ |
1495 | - : Uses an address normalization Web service to convert a postal address given as input into a |
1496 | - : cannonical representation format. |
1497 | + : <p>Uses an address normalization Web service to convert a postal address given as input into a |
1498 | + : cannonical representation format.</p> |
1499 | : |
1500 | : |
1501 | : @param $addr A sequence of strings encoding an address, where each string in the sequence corresponds to a different component (e.g., street, city, country, etc.) of the address. |
1502 | @@ -1208,13 +1209,13 @@ |
1503 | }; |
1504 | |
1505 | (:~ |
1506 | - : Uses an phone number normalization Web service to convert a phone number given as input into a |
1507 | - : cannonical representation. |
1508 | + : <p>Uses an phone number normalization Web service to convert a phone number given as input into a |
1509 | + : cannonical representation.</p> |
1510 | : |
1511 | : @param $phone A strings encoding a phone number. |
1512 | : @return A strings with the phone number encoded in a cannonical format. |
1513 | - : |
1514 | - : <br/><br/><b> Attention : This function is still not implemented. </b> <br/> |
1515 | + : <p/> |
1516 | + : <p><b> Attention : This function is still not implemented. </b></p> |
1517 | : |
1518 | :) |
1519 | declare function normalization:normalize-phone ( $addr as xs:string* ) as xs:string* { |
1520 | @@ -1222,8 +1223,8 @@ |
1521 | }; |
1522 | |
1523 | (:~ |
1524 | - : Internal auxiliary function that returns an XML representation for a dictionary that contains the |
1525 | - : time-shift value associated to different time-zone abbreviations. |
1526 | + : <p>Internal auxiliary function that returns an XML representation for a dictionary that contains the |
1527 | + : time-shift value associated to different time-zone abbreviations.</p> |
1528 | :) |
1529 | declare %private function normalization:timeZone-dictionary() as element(){ |
1530 | let $result := |
1531 | @@ -1453,8 +1454,8 @@ |
1532 | }; |
1533 | |
1534 | (:~ |
1535 | - : Internal auxiliary function that returns an XML representation for a dictionary that contains a |
1536 | - : numeric value associated to different month name abbreviations. |
1537 | + : <p>Internal auxiliary function that returns an XML representation for a dictionary that contains a |
1538 | + : numeric value associated to different month name abbreviations.</p> |
1539 | :) |
1540 | declare %private function normalization:month-dictionary() as element(){ |
1541 | let $dictionary := |
1542 | @@ -1523,7 +1524,7 @@ |
1543 | }; |
1544 | |
1545 | (:~ |
1546 | - : Internal auxiliary function that checks if a string is in xs:dateTime format |
1547 | + : <p>Internal auxiliary function that checks if a string is in xs:dateTime format</p> |
1548 | : |
1549 | : |
1550 | : @param $dateTime The string representation for the dateTime. |
1551 | @@ -1534,7 +1535,7 @@ |
1552 | }; |
1553 | |
1554 | (:~ |
1555 | - : Internal auxiliary function that checks if a string is in xs:date format |
1556 | + : <p>Internal auxiliary function that checks if a string is in xs:date format</p> |
1557 | : |
1558 | : |
1559 | : @param $dateTime The string representation for the date. |
1560 | @@ -1545,7 +1546,7 @@ |
1561 | }; |
1562 | |
1563 | (:~ |
1564 | - : Internal auxiliary function that checks if a string is in xs:time format |
1565 | + : <p>Internal auxiliary function that checks if a string is in xs:time format</p> |
1566 | : |
1567 | : |
1568 | : @param $dateTime The string representation for the time. |
1569 | |
1570 | === renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/phonetic-string-similarity.xq' => 'src/phonetic-string-similarity.xq' |
1571 | --- src/com/zorba-xquery/www/modules/data-cleaning/phonetic-string-similarity.xq 2013-05-18 00:36:01 +0000 |
1572 | +++ src/phonetic-string-similarity.xq 2013-07-10 00:51:26 +0000 |
1573 | @@ -17,29 +17,29 @@ |
1574 | :) |
1575 | |
1576 | (:~ |
1577 | - : This library module provides phonetic string similarity functions, comparing strings with basis on how they sound. |
1578 | - : |
1579 | - : These metrics are particularly effective in matching names, since names are often spelled in different |
1580 | - : ways that sound the same. |
1581 | - : |
1582 | - : The logic contained in this module is not specific to any particular XQuery implementation. |
1583 | + : <p>This library module provides phonetic string similarity functions, comparing strings with basis on how they sound.</p> |
1584 | + : <p/> |
1585 | + : <p>These metrics are particularly effective in matching names, since names are often spelled in different |
1586 | + : ways that sound the same.</p> |
1587 | + : <p/> |
1588 | + : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p> |
1589 | : |
1590 | : @author Bruno Martins |
1591 | : @project Zorba/Data Cleaning/Phonectic String Similarity |
1592 | :) |
1593 | |
1594 | -module namespace simp = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity"; |
1595 | +module namespace simp = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity"; |
1596 | |
1597 | declare namespace ver = "http://www.zorba-xquery.com/options/versioning"; |
1598 | declare option ver:module-version "2.0"; |
1599 | |
1600 | (:~ |
1601 | - : Returns the Soundex key for a given string. |
1602 | - : |
1603 | - : <br/> |
1604 | - : Example usage : <code>soundex-key("Robert")</code> |
1605 | - : <br/> |
1606 | - : The function invocation in the example above returns : <code>"R163"</code> |
1607 | + : <p>Returns the Soundex key for a given string.</p> |
1608 | + : <p/> |
1609 | + : |
1610 | + : <p>Example usage : <code>soundex-key("Robert")</code></p> |
1611 | + : <p/> |
1612 | + : <p>The function invocation in the example above returns : <code>"R163"</code></p> |
1613 | : |
1614 | : @param $s1 The string. |
1615 | : @return The Soundex key for the given input string. |
1616 | @@ -55,12 +55,12 @@ |
1617 | }; |
1618 | |
1619 | (:~ |
1620 | - : Checks if two strings have the same Soundex key. |
1621 | - : |
1622 | - : <br/> |
1623 | - : Example usage : <code>soundex( "Robert" , "Rupert" )</code> |
1624 | - : <br/> |
1625 | - : The function invocation in the example above returns : <code>true</code> |
1626 | + : <p>Checks if two strings have the same Soundex key.</p> |
1627 | + : <p/> |
1628 | + : |
1629 | + : <p>Example usage : <code>soundex( "Robert" , "Rupert" )</code></p> |
1630 | + : <p/> |
1631 | + : <p>The function invocation in the example above returns : <code>true</code></p> |
1632 | : |
1633 | : @param $s1 The first string. |
1634 | : @param $s2 The second string. |
1635 | @@ -72,13 +72,13 @@ |
1636 | }; |
1637 | |
1638 | (:~ |
1639 | - : Returns the Metaphone key for a given string. |
1640 | - : The Metaphone algorithm produces variable length keys as its output, as opposed to Soundex's fixed-length keys. |
1641 | - : |
1642 | - : <br/> |
1643 | - : Example usage : <code>metaphone-key("ALEKSANDER")</code> |
1644 | - : <br/> |
1645 | - : The function invocation in the example above returns : <code>"ALKSNTR"</code> |
1646 | + : <p>Returns the Metaphone key for a given string.</p> |
1647 | + : <p>The Metaphone algorithm produces variable length keys as its output, as opposed to Soundex's fixed-length keys.</p> |
1648 | + : <p/> |
1649 | + : |
1650 | + : <p>Example usage : <code>metaphone-key("ALEKSANDER")</code></p> |
1651 | + : <p/> |
1652 | + : <p>The function invocation in the example above returns : <code>"ALKSNTR"</code></p> |
1653 | : |
1654 | : @param $s1 The string. |
1655 | : @return The Metaphone key for the given input string. |
1656 | @@ -99,12 +99,12 @@ |
1657 | }; |
1658 | |
1659 | (:~ |
1660 | - : Checks if two strings have the same Metaphone key. |
1661 | - : |
1662 | - : <br/> |
1663 | - : Example usage : <code>metaphone("ALEKSANDER", "ALEXANDRE")</code> |
1664 | - : <br/> |
1665 | - : The function invocation in the example above returns : <code>true</code> |
1666 | + : <p>Checks if two strings have the same Metaphone key.</p> |
1667 | + : <p/> |
1668 | + : |
1669 | + : <p>Example usage : <code>metaphone("ALEKSANDER", "ALEXANDRE")</code></p> |
1670 | + : <p/> |
1671 | + : <p>The function invocation in the example above returns : <code>true</code></p> |
1672 | : |
1673 | : @param $s1 The first string. |
1674 | : @param $s2 The second string. |
1675 | |
1676 | === renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/set-similarity.xq' => 'src/set-similarity.xq' |
1677 | --- src/com/zorba-xquery/www/modules/data-cleaning/set-similarity.xq 2012-09-28 13:34:20 +0000 |
1678 | +++ src/set-similarity.xq 2013-07-10 00:51:26 +0000 |
1679 | @@ -17,29 +17,29 @@ |
1680 | :) |
1681 | |
1682 | (:~ |
1683 | - : This library module provides similarity functions for comparing sets of XML |
1684 | - : nodes (e.g., sets of XML elements, attributes or atomic values). |
1685 | - : |
1686 | - : These functions are particularly useful for matching near duplicate sets of XML nodes. |
1687 | - : |
1688 | - : The logic contained in this module is not specific to any particular XQuery implementation. |
1689 | + : <p>This library module provides similarity functions for comparing sets of XML |
1690 | + : nodes (e.g., sets of XML elements, attributes or atomic values).</p> |
1691 | + : <p/> |
1692 | + : <p>These functions are particularly useful for matching near duplicate sets of XML nodes.</p> |
1693 | + : <p/> |
1694 | + : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p> |
1695 | : |
1696 | : @author Bruno Martins |
1697 | : @project Zorba/Data Cleaning/Set Similarity |
1698 | :) |
1699 | |
1700 | -module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"; |
1701 | +module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity"; |
1702 | |
1703 | declare namespace ver = "http://www.zorba-xquery.com/options/versioning"; |
1704 | declare option ver:module-version "2.0"; |
1705 | |
1706 | (:~ |
1707 | - : Returns the union between two sets, using the deep-equal() function to compare the XML nodes from the sets. |
1708 | - : |
1709 | - : <br/> |
1710 | - : Example usage : <code> deep-union ( ( "a", "b", "c") , ( "a", "a", <d/> ) ) </code> |
1711 | - : <br/> |
1712 | - : The function invocation in the example above returns : <code> ("a", "b", "c", <d/> ) </code> |
1713 | + : <p>Returns the union between two sets, using the deep-equal() function to compare the XML nodes from the sets.</p> |
1714 | + : <p/> |
1715 | + : |
1716 | + : <p>Example usage : <code> deep-union ( ( "a", "b", "c") , ( "a", "a", <d/> ) ) </code></p> |
1717 | + : <p/> |
1718 | + : <p>The function invocation in the example above returns : <code> ("a", "b", "c", <d/> ) </code></p> |
1719 | : |
1720 | : @param $s1 The first set. |
1721 | : @param $s2 The second set. |
1722 | @@ -54,12 +54,12 @@ |
1723 | }; |
1724 | |
1725 | (:~ |
1726 | - : Returns the intersection between two sets, using the deep-equal() function to compare the XML nodes from the sets. |
1727 | - : |
1728 | - : <br/> |
1729 | - : Example usage : <code> deep-intersect ( ( "a", "b", "c") , ( "a", "a", <d/> ) ) </code> |
1730 | - : <br/> |
1731 | - : The function invocation in the example above returns : <code> ("a") </code> |
1732 | + : <p>Returns the intersection between two sets, using the deep-equal() function to compare the XML nodes from the sets.</p> |
1733 | + : <p/> |
1734 | + : |
1735 | + : <p>Example usage : <code> deep-intersect ( ( "a", "b", "c") , ( "a", "a", <d/> ) ) </code></p> |
1736 | + : <p/> |
1737 | + : <p>The function invocation in the example above returns : <code> ("a") </code></p> |
1738 | : |
1739 | : @param $s1 The first set. |
1740 | : @param $s2 The second set. |
1741 | @@ -75,12 +75,12 @@ |
1742 | }; |
1743 | |
1744 | (:~ |
1745 | - : Removes exact duplicates from a set, using the deep-equal() function to compare the XML nodes from the sets. |
1746 | - : |
1747 | - : <br/> |
1748 | - : Example usage : <code> distinct ( ( "a", "a", <b/> ) ) </code> |
1749 | - : <br/> |
1750 | - : The function invocation in the example above returns : <code> ("a", <b/> ) </code> |
1751 | + : <p>Removes exact duplicates from a set, using the deep-equal() function to compare the XML nodes from the sets.</p> |
1752 | + : <p/> |
1753 | + : |
1754 | + : <p>Example usage : <code> distinct ( ( "a", "a", <b/> ) ) </code></p> |
1755 | + : <p/> |
1756 | + : <p>The function invocation in the example above returns : <code> ("a", <b/> ) </code></p> |
1757 | : |
1758 | : @param $s A set. |
1759 | : @return The set provided as input without the exact duplicates (i.e., returns the distinct nodes from the set provided as input). |
1760 | @@ -93,14 +93,14 @@ |
1761 | }; |
1762 | |
1763 | (:~ |
1764 | - : Returns the overlap coefficient between two sets of XML nodes. |
1765 | - : The overlap coefficient is defined as the shared information between the input sets |
1766 | - : (i.e., the size of the intersection) over the size of the smallest input set. |
1767 | - : |
1768 | - : <br/> |
1769 | - : Example usage : <code> overlap ( ( "a", "b", <c/> ) , ( "a", "a", "b" ) ) </code> |
1770 | - : <br/> |
1771 | - : The function invocation in the example above returns : <code> 1.0 </code> |
1772 | + : <p>Returns the overlap coefficient between two sets of XML nodes.</p> |
1773 | + : <p>The overlap coefficient is defined as the shared information between the input sets |
1774 | + : (i.e., the size of the intersection) over the size of the smallest input set.</p> |
1775 | + : <p/> |
1776 | + : |
1777 | + : <p>Example usage : <code> overlap ( ( "a", "b", <c/> ) , ( "a", "a", "b" ) ) </code></p> |
1778 | + : <p/> |
1779 | + : <p>The function invocation in the example above returns : <code> 1.0 </code></p> |
1780 | : |
1781 | : @param $s1 The first set. |
1782 | : @param $s2 The second set. |
1783 | @@ -112,14 +112,14 @@ |
1784 | }; |
1785 | |
1786 | (:~ |
1787 | - : Returns the Dice similarity coefficient between two sets of XML nodes. |
1788 | - : The Dice coefficient is defined as defined as twice the shared information between the input sets |
1789 | - : (i.e., the size of the intersection) over the sum of the cardinalities for the input sets. |
1790 | - : |
1791 | - : <br/> |
1792 | - : Example usage : <code> dice ( ( "a", "b", <c/> ) , ( "a", "a", "d") ) </code> |
1793 | - : <br/> |
1794 | - : The function invocation in the example above returns : <code> 0.4 </code> |
1795 | + : <p>Returns the Dice similarity coefficient between two sets of XML nodes.</p> |
1796 | + : <p>The Dice coefficient is defined as defined as twice the shared information between the input sets |
1797 | + : (i.e., the size of the intersection) over the sum of the cardinalities for the input sets.</p> |
1798 | + : <p/> |
1799 | + : |
1800 | + : <p>Example usage : <code> dice ( ( "a", "b", <c/> ) , ( "a", "a", "d") ) </code></p> |
1801 | + : <p/> |
1802 | + : <p>The function invocation in the example above returns : <code> 0.4 </code></p> |
1803 | : |
1804 | : @param $s1 The first set. |
1805 | : @param $s2 The second set. |
1806 | @@ -131,14 +131,14 @@ |
1807 | }; |
1808 | |
1809 | (:~ |
1810 | - : Returns the Jaccard similarity coefficient between two sets of XML nodes. |
1811 | - : The Jaccard coefficient is defined as the size of the intersection divided by the size of the |
1812 | - : union of the input sets. |
1813 | - : |
1814 | - : <br/> |
1815 | - : Example usage : <code> jaccard ( ( "a", "b", <c/> ) , ( "a", "a", "d") ) </code> |
1816 | - : <br/> |
1817 | - : The function invocation in the example above returns : <code> 0.25 </code> |
1818 | + : <p>Returns the Jaccard similarity coefficient between two sets of XML nodes.</p> |
1819 | + : <p>The Jaccard coefficient is defined as the size of the intersection divided by the size of the |
1820 | + : union of the input sets.</p> |
1821 | + : <p/> |
1822 | + : |
1823 | + : <p>Example usage : <code> jaccard ( ( "a", "b", <c/> ) , ( "a", "a", "d") ) </code></p> |
1824 | + : <p/> |
1825 | + : <p>The function invocation in the example above returns : <code> 0.25 </code></p> |
1826 | : |
1827 | : @param $s1 The first set. |
1828 | : @param $s2 The second set. |
1829 | |
1830 | === renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/token-based-string-similarity.xq' => 'src/token-based-string-similarity.xq' |
1831 | --- src/com/zorba-xquery/www/modules/data-cleaning/token-based-string-similarity.xq 2012-09-28 13:34:20 +0000 |
1832 | +++ src/token-based-string-similarity.xq 2013-07-10 00:51:26 +0000 |
1833 | @@ -17,40 +17,40 @@ |
1834 | :) |
1835 | |
1836 | (:~ |
1837 | - : This library module provides token-based string similarity functions that view strings |
1838 | - : as sets or multi-sets of tokens and use set-related properties to compute similarity scores. |
1839 | - : The tokens correspond to groups of characters extracted from the strings being compared, such as |
1840 | - : individual words or character n-grams. |
1841 | - : |
1842 | - : These functions are particularly useful for matching near duplicate strings in cases where |
1843 | - : typographical conventions often lead to rearrangement of words (e.g., "John Smith" versus "Smith, John"). |
1844 | - : |
1845 | - : The logic contained in this module is not specific to any particular XQuery implementation, |
1846 | + : <p>This library module provides token-based string similarity functions that view strings |
1847 | + : as sets or multi-sets of tokens and use set-related properties to compute similarity scores.</p> |
1848 | + : <p>The tokens correspond to groups of characters extracted from the strings being compared, such as |
1849 | + : individual words or character n-grams.</p> |
1850 | + : <p/> |
1851 | + : <p>These functions are particularly useful for matching near duplicate strings in cases where |
1852 | + : typographical conventions often lead to rearrangement of words (e.g., "John Smith" versus "Smith, John").</p> |
1853 | + : <p/> |
1854 | + : <p>The logic contained in this module is not specific to any particular XQuery implementation, |
1855 | : although the module requires the trigonometic functions of XQuery 3.0 or a math extension |
1856 | - : function such as sqrt($x as numeric) for computing the square root. |
1857 | + : function such as sqrt($x as numeric) for computing the square root.</p> |
1858 | : |
1859 | : @author Bruno Martins |
1860 | : @project Zorba/Data Cleaning/Token Based String Similarity |
1861 | :) |
1862 | |
1863 | -module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
1864 | +module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
1865 | |
1866 | -(: In the QizX or Saxon XQuery engines, it is possible to call external functions from the Java math library :) |
1867 | -(: declare namespace math = "java:java.lang.Math"; :) |
1868 | +(: <p>In the QizX or Saxon XQuery engines, it is possible to call external functions from the Java math library :) |
1869 | +(: declare namespace math = "java:java.lang.Math";</p> :) |
1870 | declare namespace math = "http://www.w3.org/2005/xpath-functions/math"; |
1871 | |
1872 | -import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"; |
1873 | +import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity"; |
1874 | |
1875 | declare namespace ver = "http://www.zorba-xquery.com/options/versioning"; |
1876 | declare option ver:module-version "2.0"; |
1877 | |
1878 | (:~ |
1879 | - : Returns the individual character n-grams forming a string. |
1880 | - : |
1881 | - : <br/> |
1882 | - : Example usage : <code> ngrams("FLWOR", 2 ) </code> |
1883 | - : <br/> |
1884 | - : The function invocation in the example above returns : <code> ("_F" , "FL" , "LW" , "WO" , "LW" , "WO" , "OR" , "R_") </code> |
1885 | + : <p>Returns the individual character n-grams forming a string.</p> |
1886 | + : <p/> |
1887 | + : |
1888 | + : <p>Example usage : <code> ngrams("FLWOR", 2 ) </code></p> |
1889 | + : <p/> |
1890 | + : <p>The function invocation in the example above returns : <code> ("_F" , "FL" , "LW" , "WO" , "LW" , "WO" , "OR" , "R_") </code></p> |
1891 | : |
1892 | : @param $s The input string. |
1893 | : @param $n The number of characters to consider when extracting n-grams. |
1894 | @@ -73,13 +73,13 @@ |
1895 | }; |
1896 | |
1897 | (:~ |
1898 | - : Auxiliary function for computing the cosine similarity coefficient between strings, |
1899 | - : using stringdescriptors based on sets of character n-grams or sets of tokens extracted from two strings. |
1900 | - : |
1901 | - : <br/> |
1902 | - : Example usage : <code> cosine( ("aa","bb") , ("bb","aa")) </code> |
1903 | - : <br/> |
1904 | - : The function invocation in the example above returns : <code> 1.0 </code> |
1905 | + : <p>Auxiliary function for computing the cosine similarity coefficient between strings, |
1906 | + : using stringdescriptors based on sets of character n-grams or sets of tokens extracted from two strings.</p> |
1907 | + : <p/> |
1908 | + : |
1909 | + : <p>Example usage : <code> cosine( ("aa","bb") , ("bb","aa")) </code></p> |
1910 | + : <p/> |
1911 | + : <p>The function invocation in the example above returns : <code> 1.0 </code></p> |
1912 | : |
1913 | : @param $desc1 The descriptor for the first string. |
1914 | : @param $desc2 The descriptor for the second string. |
1915 | @@ -97,12 +97,12 @@ |
1916 | }; |
1917 | |
1918 | (:~ |
1919 | - : Returns the Dice similarity coefficient between sets of character n-grams extracted from two strings. |
1920 | - : |
1921 | - : <br/> |
1922 | - : Example usage : <code> dice-ngrams("DWAYNE", "DUANE", 2 ) </code> |
1923 | - : <br/> |
1924 | - : The function invocation in the example above returns : <code> 0.4615384615384616 </code> |
1925 | + : <p>Returns the Dice similarity coefficient between sets of character n-grams extracted from two strings.</p> |
1926 | + : <p/> |
1927 | + : |
1928 | + : <p>Example usage : <code> dice-ngrams("DWAYNE", "DUANE", 2 ) </code></p> |
1929 | + : <p/> |
1930 | + : <p>The function invocation in the example above returns : <code> 0.4615384615384616 </code></p> |
1931 | : |
1932 | : @param $s1 The first string. |
1933 | : @param $s2 The second string. |
1934 | @@ -115,12 +115,12 @@ |
1935 | }; |
1936 | |
1937 | (:~ |
1938 | - : Returns the overlap similarity coefficient between sets of character n-grams extracted from two strings. |
1939 | - : |
1940 | - : <br/> |
1941 | - : Example usage : <code> overlap-ngrams("DWAYNE", "DUANE", 2 ) </code> |
1942 | - : <br/> |
1943 | - : The function invocation in the example above returns : <code> 0.5 </code> |
1944 | + : <p>Returns the overlap similarity coefficient between sets of character n-grams extracted from two strings.</p> |
1945 | + : <p/> |
1946 | + : |
1947 | + : <p>Example usage : <code> overlap-ngrams("DWAYNE", "DUANE", 2 ) </code></p> |
1948 | + : <p/> |
1949 | + : <p>The function invocation in the example above returns : <code> 0.5 </code></p> |
1950 | : |
1951 | : @param $s1 The first string. |
1952 | : @param $s2 The second string. |
1953 | @@ -133,12 +133,12 @@ |
1954 | }; |
1955 | |
1956 | (:~ |
1957 | - : Returns the Jaccard similarity coefficient between sets of character n-grams extracted from two strings. |
1958 | - : |
1959 | - : <br/> |
1960 | - : Example usage : <code> jaccard-ngrams("DWAYNE", "DUANE", 2 ) </code> |
1961 | - : <br/> |
1962 | - : The function invocation in the example above returns : <code> 0.3 </code> |
1963 | + : <p>Returns the Jaccard similarity coefficient between sets of character n-grams extracted from two strings.</p> |
1964 | + : <p/> |
1965 | + : |
1966 | + : <p>Example usage : <code> jaccard-ngrams("DWAYNE", "DUANE", 2 ) </code></p> |
1967 | + : <p/> |
1968 | + : <p>The function invocation in the example above returns : <code> 0.3 </code></p> |
1969 | : |
1970 | : @param $s1 The first string. |
1971 | : @param $s2 The second string. |
1972 | @@ -151,14 +151,14 @@ |
1973 | }; |
1974 | |
1975 | (:~ |
1976 | - : Returns the cosine similarity coefficient between sets of character n-grams extracted from two strings. |
1977 | - : The n-grams from each string are weighted according to their occurence frequency (i.e., weighted according to |
1978 | - : the term-frequency heuristic from Information Retrieval). |
1979 | - : |
1980 | - : <br/> |
1981 | - : Example usage : <code> cosine-ngrams("DWAYNE", "DUANE", 2 ) </code> |
1982 | - : <br/> |
1983 | - : The function invocation in the example above returns : <code> 0.2401922307076307 </code> |
1984 | + : <p>Returns the cosine similarity coefficient between sets of character n-grams extracted from two strings.</p> |
1985 | + : <p>The n-grams from each string are weighted according to their occurence frequency (i.e., weighted according to |
1986 | + : the term-frequency heuristic from Information Retrieval).</p> |
1987 | + : <p/> |
1988 | + : |
1989 | + : <p>Example usage : <code> cosine-ngrams("DWAYNE", "DUANE", 2 ) </code></p> |
1990 | + : <p/> |
1991 | + : <p>The function invocation in the example above returns : <code> 0.2401922307076307 </code></p> |
1992 | : |
1993 | : @param $s1 The first string. |
1994 | : @param $s2 The second string. |
1995 | @@ -173,12 +173,12 @@ |
1996 | }; |
1997 | |
1998 | (:~ |
1999 | - : Returns the Dice similarity coefficient between sets of tokens extracted from two strings. |
2000 | - : |
2001 | - : <br/> |
2002 | - : Example usage : <code> dice-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code> |
2003 | - : <br/> |
2004 | - : The function invocation in the example above returns : <code> 0.4 </code> |
2005 | + : <p>Returns the Dice similarity coefficient between sets of tokens extracted from two strings.</p> |
2006 | + : <p/> |
2007 | + : |
2008 | + : <p>Example usage : <code> dice-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code></p> |
2009 | + : <p/> |
2010 | + : <p>The function invocation in the example above returns : <code> 0.4 </code></p> |
2011 | : |
2012 | : @param $s1 The first string. |
2013 | : @param $s2 The second string. |
2014 | @@ -191,12 +191,12 @@ |
2015 | }; |
2016 | |
2017 | (:~ |
2018 | - : Returns the overlap similarity coefficient between sets of tokens extracted from two strings. |
2019 | - : |
2020 | - : <br/> |
2021 | - : Example usage : <code> overlap-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code> |
2022 | - : <br/> |
2023 | - : The function invocation in the example above returns : <code> 0.5 </code> |
2024 | + : <p>Returns the overlap similarity coefficient between sets of tokens extracted from two strings.</p> |
2025 | + : <p/> |
2026 | + : |
2027 | + : <p>Example usage : <code> overlap-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code></p> |
2028 | + : <p/> |
2029 | + : <p>The function invocation in the example above returns : <code> 0.5 </code></p> |
2030 | : |
2031 | : @param $s1 The first string. |
2032 | : @param $s2 The second string. |
2033 | @@ -209,12 +209,12 @@ |
2034 | }; |
2035 | |
2036 | (:~ |
2037 | - : Returns the Jaccard similarity coefficient between sets of tokens extracted from two strings. |
2038 | - : |
2039 | - : <br/> |
2040 | - : Example usage : <code> jaccard-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code> |
2041 | - : <br/> |
2042 | - : The function invocation in the example above returns : <code> 0.25 </code> |
2043 | + : <p>Returns the Jaccard similarity coefficient between sets of tokens extracted from two strings.</p> |
2044 | + : <p/> |
2045 | + : |
2046 | + : <p>Example usage : <code> jaccard-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code></p> |
2047 | + : <p/> |
2048 | + : <p>The function invocation in the example above returns : <code> 0.25 </code></p> |
2049 | : |
2050 | : @param $s1 The first string. |
2051 | : @param $s2 The second string. |
2052 | @@ -227,14 +227,14 @@ |
2053 | }; |
2054 | |
2055 | (:~ |
2056 | - : Returns the cosine similarity coefficient between sets of tokens extracted from two strings. The tokens |
2057 | + : <p>Returns the cosine similarity coefficient between sets of tokens extracted from two strings. The tokens |
2058 | : from each string are weighted according to their occurence frequency (i.e., weighted according to the |
2059 | - : term-frequency heuristic from Information Retrieval). |
2060 | - : |
2061 | - : <br/> |
2062 | - : Example usage : <code> cosine-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code> |
2063 | - : <br/> |
2064 | - : The function invocation in the example above returns : <code> 0.408248290463863 </code> |
2065 | + : term-frequency heuristic from Information Retrieval).</p> |
2066 | + : <p/> |
2067 | + : |
2068 | + : <p>Example usage : <code> cosine-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) </code></p> |
2069 | + : <p/> |
2070 | + : <p>The function invocation in the example above returns : <code> 0.408248290463863 </code></p> |
2071 | : |
2072 | : @param $s1 The first string. |
2073 | : @param $s2 The second string. |
2074 | |
2075 | === renamed file 'src/com/zorba-xquery/www/modules/data-cleaning/whitepages_schema.xsd' => 'src/whitepages_schema.xsd' |
2076 | === modified file 'test/Queries/data-cleaning/character-based-string-similarity/edit-distance.xq' |
2077 | --- test/Queries/data-cleaning/character-based-string-similarity/edit-distance.xq 2011-07-26 20:10:34 +0000 |
2078 | +++ test/Queries/data-cleaning/character-based-string-similarity/edit-distance.xq 2013-07-10 00:51:26 +0000 |
2079 | @@ -1,3 +1,3 @@ |
2080 | -import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity"; |
2081 | +import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity"; |
2082 | |
2083 | simc:edit-distance("FLWOR", "FLOWER") |
2084 | |
2085 | === modified file 'test/Queries/data-cleaning/character-based-string-similarity/jaro-winkler.xq' |
2086 | --- test/Queries/data-cleaning/character-based-string-similarity/jaro-winkler.xq 2011-07-26 20:10:34 +0000 |
2087 | +++ test/Queries/data-cleaning/character-based-string-similarity/jaro-winkler.xq 2013-07-10 00:51:26 +0000 |
2088 | @@ -1,3 +1,3 @@ |
2089 | -import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity"; |
2090 | +import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity"; |
2091 | |
2092 | simc:jaro-winkler("DWAYNE", "DUANE", 4, 0.1 ) |
2093 | |
2094 | === modified file 'test/Queries/data-cleaning/character-based-string-similarity/jaro.xq' |
2095 | --- test/Queries/data-cleaning/character-based-string-similarity/jaro.xq 2011-07-26 20:10:34 +0000 |
2096 | +++ test/Queries/data-cleaning/character-based-string-similarity/jaro.xq 2013-07-10 00:51:26 +0000 |
2097 | @@ -1,3 +1,3 @@ |
2098 | -import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity"; |
2099 | +import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity"; |
2100 | |
2101 | simc:jaro("FLWOR Found.", "FLWOR Foundation") |
2102 | |
2103 | === modified file 'test/Queries/data-cleaning/character-based-string-similarity/needleman-wunsch.xq' |
2104 | --- test/Queries/data-cleaning/character-based-string-similarity/needleman-wunsch.xq 2011-07-26 20:10:34 +0000 |
2105 | +++ test/Queries/data-cleaning/character-based-string-similarity/needleman-wunsch.xq 2013-07-10 00:51:26 +0000 |
2106 | @@ -1,3 +1,3 @@ |
2107 | -import module namespace simc = "http://www.zorba-xquery.com/modules/data-cleaning/character-based-string-similarity"; |
2108 | +import module namespace simc = "http://zorba.io/modules/data-cleaning/character-based-string-similarity"; |
2109 | |
2110 | simc:needleman-wunsch("KAK", "KQRK", 1, 1) |
2111 | |
2112 | === modified file 'test/Queries/data-cleaning/consolidation/least-attributes.xq' |
2113 | --- test/Queries/data-cleaning/consolidation/least-attributes.xq 2011-07-19 19:12:03 +0000 |
2114 | +++ test/Queries/data-cleaning/consolidation/least-attributes.xq 2013-07-10 00:51:26 +0000 |
2115 | @@ -1,3 +1,3 @@ |
2116 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2117 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2118 | |
2119 | con:least-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) ) |
2120 | |
2121 | === modified file 'test/Queries/data-cleaning/consolidation/least-distinct-attributes.xq' |
2122 | --- test/Queries/data-cleaning/consolidation/least-distinct-attributes.xq 2011-07-19 19:12:03 +0000 |
2123 | +++ test/Queries/data-cleaning/consolidation/least-distinct-attributes.xq 2013-07-10 00:51:26 +0000 |
2124 | @@ -1,3 +1,3 @@ |
2125 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2126 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2127 | |
2128 | con:least-distinct-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) ) |
2129 | |
2130 | === modified file 'test/Queries/data-cleaning/consolidation/least-distinct-elements.xq' |
2131 | --- test/Queries/data-cleaning/consolidation/least-distinct-elements.xq 2011-07-19 19:12:03 +0000 |
2132 | +++ test/Queries/data-cleaning/consolidation/least-distinct-elements.xq 2013-07-10 00:51:26 +0000 |
2133 | @@ -1,3 +1,3 @@ |
2134 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2135 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2136 | |
2137 | con:least-distinct-elements( ( <a><b/></a>, <b><c/></b>, <d/>) ) |
2138 | |
2139 | === modified file 'test/Queries/data-cleaning/consolidation/least-distinct-nodes.xq' |
2140 | --- test/Queries/data-cleaning/consolidation/least-distinct-nodes.xq 2011-07-19 19:12:03 +0000 |
2141 | +++ test/Queries/data-cleaning/consolidation/least-distinct-nodes.xq 2013-07-10 00:51:26 +0000 |
2142 | @@ -1,3 +1,3 @@ |
2143 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2144 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2145 | |
2146 | con:least-distinct-nodes( ( <a><b/></a>, <b><c/></b>, <d/>) ) |
2147 | |
2148 | === modified file 'test/Queries/data-cleaning/consolidation/least-elements.xq' |
2149 | --- test/Queries/data-cleaning/consolidation/least-elements.xq 2011-07-19 19:12:03 +0000 |
2150 | +++ test/Queries/data-cleaning/consolidation/least-elements.xq 2013-07-10 00:51:26 +0000 |
2151 | @@ -1,3 +1,3 @@ |
2152 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2153 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2154 | |
2155 | con:least-elements( ( <a><b/></a>, <b><c/></b>, <d/>) ) |
2156 | |
2157 | === modified file 'test/Queries/data-cleaning/consolidation/least-nodes.xq' |
2158 | --- test/Queries/data-cleaning/consolidation/least-nodes.xq 2011-07-19 19:12:03 +0000 |
2159 | +++ test/Queries/data-cleaning/consolidation/least-nodes.xq 2013-07-10 00:51:26 +0000 |
2160 | @@ -1,3 +1,3 @@ |
2161 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2162 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2163 | |
2164 | con:least-nodes( ( <a><b/></a>, <b><c/></b>, <d/>) ) |
2165 | |
2166 | === modified file 'test/Queries/data-cleaning/consolidation/least-similar-edit-distance.xq' |
2167 | --- test/Queries/data-cleaning/consolidation/least-similar-edit-distance.xq 2011-07-19 19:12:03 +0000 |
2168 | +++ test/Queries/data-cleaning/consolidation/least-similar-edit-distance.xq 2013-07-10 00:51:26 +0000 |
2169 | @@ -1,3 +1,3 @@ |
2170 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2171 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2172 | |
2173 | con:least-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" ) |
2174 | |
2175 | === modified file 'test/Queries/data-cleaning/consolidation/least-tokens.xq' |
2176 | --- test/Queries/data-cleaning/consolidation/least-tokens.xq 2011-07-19 19:12:03 +0000 |
2177 | +++ test/Queries/data-cleaning/consolidation/least-tokens.xq 2013-07-10 00:51:26 +0000 |
2178 | @@ -1,3 +1,3 @@ |
2179 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2180 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2181 | |
2182 | con:least-tokens( ( "a b c", "a b", "a"), " +" ) |
2183 | |
2184 | === modified file 'test/Queries/data-cleaning/consolidation/leastfrequent_1.xq' |
2185 | --- test/Queries/data-cleaning/consolidation/leastfrequent_1.xq 2011-07-19 19:12:03 +0000 |
2186 | +++ test/Queries/data-cleaning/consolidation/leastfrequent_1.xq 2013-07-10 00:51:26 +0000 |
2187 | @@ -1,3 +1,3 @@ |
2188 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2189 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2190 | |
2191 | con:least-frequent( ( "a", "a", "b") ) |
2192 | |
2193 | === modified file 'test/Queries/data-cleaning/consolidation/longest_1.xq' |
2194 | --- test/Queries/data-cleaning/consolidation/longest_1.xq 2011-07-19 19:12:03 +0000 |
2195 | +++ test/Queries/data-cleaning/consolidation/longest_1.xq 2013-07-10 00:51:26 +0000 |
2196 | @@ -1,3 +1,3 @@ |
2197 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2198 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2199 | |
2200 | con:longest( ( "a", "aa", "aaa") ) |
2201 | |
2202 | === modified file 'test/Queries/data-cleaning/consolidation/matching_1.xq' |
2203 | --- test/Queries/data-cleaning/consolidation/matching_1.xq 2011-07-19 19:12:03 +0000 |
2204 | +++ test/Queries/data-cleaning/consolidation/matching_1.xq 2013-07-10 00:51:26 +0000 |
2205 | @@ -1,3 +1,3 @@ |
2206 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2207 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2208 | |
2209 | con:matching( ( "a A b", "c AAA d", "e BB f"), "A+" ) |
2210 | |
2211 | === modified file 'test/Queries/data-cleaning/consolidation/most-attributes.xq' |
2212 | --- test/Queries/data-cleaning/consolidation/most-attributes.xq 2011-07-19 19:12:03 +0000 |
2213 | +++ test/Queries/data-cleaning/consolidation/most-attributes.xq 2013-07-10 00:51:26 +0000 |
2214 | @@ -1,3 +1,3 @@ |
2215 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2216 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2217 | |
2218 | con:most-attributes( ( <a att1="a1" att2="a2"/>, <b att1="a1" />, <c/> ) ) |
2219 | |
2220 | === modified file 'test/Queries/data-cleaning/consolidation/most-distinct-attributes.xq' |
2221 | --- test/Queries/data-cleaning/consolidation/most-distinct-attributes.xq 2011-07-19 19:12:03 +0000 |
2222 | +++ test/Queries/data-cleaning/consolidation/most-distinct-attributes.xq 2013-07-10 00:51:26 +0000 |
2223 | @@ -1,3 +1,3 @@ |
2224 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2225 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2226 | |
2227 | con:most-distinct-attributes( ( <a att1="a1" att2="a2" att3="a3"/>, <a att1="a1" att2="a2"><b att2="a2" /></a>, <c/> ) ) |
2228 | |
2229 | === modified file 'test/Queries/data-cleaning/consolidation/most-distinct-elements.xq' |
2230 | --- test/Queries/data-cleaning/consolidation/most-distinct-elements.xq 2011-07-19 19:12:03 +0000 |
2231 | +++ test/Queries/data-cleaning/consolidation/most-distinct-elements.xq 2013-07-10 00:51:26 +0000 |
2232 | @@ -1,3 +1,3 @@ |
2233 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2234 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2235 | |
2236 | con:most-distinct-elements( ( <a><b/><c/><d/></a>, <a><b/><b/><c/></a>, <a/> ) ) |
2237 | |
2238 | === modified file 'test/Queries/data-cleaning/consolidation/most-distinct-nodes.xq' |
2239 | --- test/Queries/data-cleaning/consolidation/most-distinct-nodes.xq 2011-07-19 19:12:03 +0000 |
2240 | +++ test/Queries/data-cleaning/consolidation/most-distinct-nodes.xq 2013-07-10 00:51:26 +0000 |
2241 | @@ -1,3 +1,3 @@ |
2242 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2243 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2244 | |
2245 | con:most-distinct-nodes( ( <a><b/></a>, <a><a/></a>, <b/>) ) |
2246 | |
2247 | === modified file 'test/Queries/data-cleaning/consolidation/most-elements.xq' |
2248 | --- test/Queries/data-cleaning/consolidation/most-elements.xq 2011-07-19 19:12:03 +0000 |
2249 | +++ test/Queries/data-cleaning/consolidation/most-elements.xq 2013-07-10 00:51:26 +0000 |
2250 | @@ -1,3 +1,3 @@ |
2251 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2252 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2253 | |
2254 | con:most-elements( ( <a><b/></a>, <a/>, <b/>) ) |
2255 | |
2256 | === modified file 'test/Queries/data-cleaning/consolidation/most-frequent.xq' |
2257 | --- test/Queries/data-cleaning/consolidation/most-frequent.xq 2011-07-19 19:12:03 +0000 |
2258 | +++ test/Queries/data-cleaning/consolidation/most-frequent.xq 2013-07-10 00:51:26 +0000 |
2259 | @@ -1,3 +1,3 @@ |
2260 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2261 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2262 | |
2263 | con:most-frequent( ( "a", "a", "b") ) |
2264 | |
2265 | === modified file 'test/Queries/data-cleaning/consolidation/most-nodes.xq' |
2266 | --- test/Queries/data-cleaning/consolidation/most-nodes.xq 2011-07-19 19:12:03 +0000 |
2267 | +++ test/Queries/data-cleaning/consolidation/most-nodes.xq 2013-07-10 00:51:26 +0000 |
2268 | @@ -1,3 +1,3 @@ |
2269 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2270 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2271 | |
2272 | con:most-nodes( ( <a><b/></a>, <a/>, <b/>) ) |
2273 | |
2274 | === modified file 'test/Queries/data-cleaning/consolidation/most-similar-edit-distance.xq' |
2275 | --- test/Queries/data-cleaning/consolidation/most-similar-edit-distance.xq 2011-07-19 19:12:03 +0000 |
2276 | +++ test/Queries/data-cleaning/consolidation/most-similar-edit-distance.xq 2013-07-10 00:51:26 +0000 |
2277 | @@ -1,3 +1,3 @@ |
2278 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2279 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2280 | |
2281 | con:most-similar-edit-distance( ( "aaabbbccc", "aaabbb", "eeefff" ), "aaab" ) |
2282 | |
2283 | === modified file 'test/Queries/data-cleaning/consolidation/most-tokens.xq' |
2284 | --- test/Queries/data-cleaning/consolidation/most-tokens.xq 2011-07-19 19:12:03 +0000 |
2285 | +++ test/Queries/data-cleaning/consolidation/most-tokens.xq 2013-07-10 00:51:26 +0000 |
2286 | @@ -1,3 +1,3 @@ |
2287 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2288 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2289 | |
2290 | con:most-tokens( ( "a b c", "a b", "a"), " +" ) |
2291 | |
2292 | === modified file 'test/Queries/data-cleaning/consolidation/shortest_1.xq' |
2293 | --- test/Queries/data-cleaning/consolidation/shortest_1.xq 2011-07-19 19:12:03 +0000 |
2294 | +++ test/Queries/data-cleaning/consolidation/shortest_1.xq 2013-07-10 00:51:26 +0000 |
2295 | @@ -1,3 +1,3 @@ |
2296 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2297 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2298 | |
2299 | con:shortest( ( "a", "aa", "aaa") ) |
2300 | |
2301 | === modified file 'test/Queries/data-cleaning/consolidation/superstring_1.xq' |
2302 | --- test/Queries/data-cleaning/consolidation/superstring_1.xq 2011-07-19 19:12:03 +0000 |
2303 | +++ test/Queries/data-cleaning/consolidation/superstring_1.xq 2013-07-10 00:51:26 +0000 |
2304 | @@ -1,3 +1,3 @@ |
2305 | -import module namespace con = "http://www.zorba-xquery.com/modules/data-cleaning/consolidation"; |
2306 | +import module namespace con = "http://zorba.io/modules/data-cleaning/consolidation"; |
2307 | |
2308 | con:superstring( ( "aaa bbb ccc", "aaa bbb", "aaa ddd", "eee fff" ) ) |
2309 | |
2310 | === modified file 'test/Queries/data-cleaning/conversion/address-from-geocode.xq' |
2311 | --- test/Queries/data-cleaning/conversion/address-from-geocode.xq 2012-10-03 13:53:12 +0000 |
2312 | +++ test/Queries/data-cleaning/conversion/address-from-geocode.xq 2013-07-10 00:51:26 +0000 |
2313 | @@ -1,4 +1,4 @@ |
2314 | -import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
2315 | +import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
2316 | |
2317 | for $s in conversion:address-from-geocode ( 38.725735 , -9.15021 ) |
2318 | return fn:lower-case($s) |
2319 | |
2320 | === modified file 'test/Queries/data-cleaning/conversion/address-from-phone.xq' |
2321 | --- test/Queries/data-cleaning/conversion/address-from-phone.xq 2012-02-05 22:52:01 +0000 |
2322 | +++ test/Queries/data-cleaning/conversion/address-from-phone.xq 2013-07-10 00:51:26 +0000 |
2323 | @@ -1,4 +1,4 @@ |
2324 | -import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
2325 | +import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
2326 | |
2327 | let $arg := conversion:address-from-phone('8654582358')[1] |
2328 | let $result := |
2329 | |
2330 | === modified file 'test/Queries/data-cleaning/conversion/address-from-user.xq' |
2331 | --- test/Queries/data-cleaning/conversion/address-from-user.xq 2012-02-05 22:52:01 +0000 |
2332 | +++ test/Queries/data-cleaning/conversion/address-from-user.xq 2013-07-10 00:51:26 +0000 |
2333 | @@ -1,4 +1,4 @@ |
2334 | -import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
2335 | +import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
2336 | |
2337 | let $arg := conversion:address-from-user('Maria Lurdes')[1] |
2338 | let $result := |
2339 | |
2340 | === modified file 'test/Queries/data-cleaning/conversion/currency-convert.xq' |
2341 | --- test/Queries/data-cleaning/conversion/currency-convert.xq 2011-07-19 19:12:03 +0000 |
2342 | +++ test/Queries/data-cleaning/conversion/currency-convert.xq 2013-07-10 00:51:26 +0000 |
2343 | @@ -1,3 +1,3 @@ |
2344 | -import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
2345 | +import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
2346 | |
2347 | conversion:currency-convert ( 1, "USD", "EUR", "2011-01-18" ) |
2348 | |
2349 | === modified file 'test/Queries/data-cleaning/conversion/geocode-from-address.xq' |
2350 | --- test/Queries/data-cleaning/conversion/geocode-from-address.xq 2011-11-08 21:16:29 +0000 |
2351 | +++ test/Queries/data-cleaning/conversion/geocode-from-address.xq 2013-07-10 00:51:26 +0000 |
2352 | @@ -1,4 +1,4 @@ |
2353 | -import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
2354 | +import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
2355 | |
2356 | let $geocode := conversion:geocode-from-address ( ("Lisboa", "Portugal") ) |
2357 | for $result in $geocode |
2358 | |
2359 | === modified file 'test/Queries/data-cleaning/conversion/phone-from-address.xq' |
2360 | --- test/Queries/data-cleaning/conversion/phone-from-address.xq 2012-02-05 22:52:01 +0000 |
2361 | +++ test/Queries/data-cleaning/conversion/phone-from-address.xq 2013-07-10 00:51:26 +0000 |
2362 | @@ -1,4 +1,4 @@ |
2363 | -import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
2364 | +import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
2365 | |
2366 | let $arg := conversion:phone-from-address('5655 E Gaskill Rd, Willcox, AZ, US')[1] |
2367 | let $result := |
2368 | |
2369 | === modified file 'test/Queries/data-cleaning/conversion/phone-from-user.xq' |
2370 | --- test/Queries/data-cleaning/conversion/phone-from-user.xq 2012-02-05 22:52:01 +0000 |
2371 | +++ test/Queries/data-cleaning/conversion/phone-from-user.xq 2013-07-10 00:51:26 +0000 |
2372 | @@ -1,4 +1,4 @@ |
2373 | -import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
2374 | +import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
2375 | |
2376 | let $arg := conversion:phone-from-user ('Maria Lurdes')[1] |
2377 | let $result := |
2378 | |
2379 | === modified file 'test/Queries/data-cleaning/conversion/unit-convert.xq' |
2380 | --- test/Queries/data-cleaning/conversion/unit-convert.xq 2011-07-19 19:12:03 +0000 |
2381 | +++ test/Queries/data-cleaning/conversion/unit-convert.xq 2013-07-10 00:51:26 +0000 |
2382 | @@ -1,3 +1,3 @@ |
2383 | -import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
2384 | +import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
2385 | |
2386 | conversion:unit-convert ( 1 , "Distance", "mile", "kilometer" ) |
2387 | |
2388 | === modified file 'test/Queries/data-cleaning/conversion/user-from-address.xq' |
2389 | --- test/Queries/data-cleaning/conversion/user-from-address.xq 2012-02-05 22:52:01 +0000 |
2390 | +++ test/Queries/data-cleaning/conversion/user-from-address.xq 2013-07-10 00:51:26 +0000 |
2391 | @@ -1,4 +1,4 @@ |
2392 | -import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
2393 | +import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
2394 | |
2395 | let $arg := conversion:user-from-address('5655 E Gaskill Rd, Willcox, AZ, US')[1] |
2396 | let $result := |
2397 | |
2398 | === modified file 'test/Queries/data-cleaning/conversion/user-from-phone.xq' |
2399 | --- test/Queries/data-cleaning/conversion/user-from-phone.xq 2012-02-05 22:52:01 +0000 |
2400 | +++ test/Queries/data-cleaning/conversion/user-from-phone.xq 2013-07-10 00:51:26 +0000 |
2401 | @@ -1,4 +1,4 @@ |
2402 | -import module namespace conversion = "http://www.zorba-xquery.com/modules/data-cleaning/conversion"; |
2403 | +import module namespace conversion = "http://zorba.io/modules/data-cleaning/conversion"; |
2404 | |
2405 | let $arg := conversion:user-from-phone ('8654582358')[1] |
2406 | let $result := |
2407 | |
2408 | === modified file 'test/Queries/data-cleaning/hybrid-string-similarity/monge-elkan-jaro-winkler.xq' |
2409 | --- test/Queries/data-cleaning/hybrid-string-similarity/monge-elkan-jaro-winkler.xq 2011-07-19 19:12:03 +0000 |
2410 | +++ test/Queries/data-cleaning/hybrid-string-similarity/monge-elkan-jaro-winkler.xq 2013-07-10 00:51:26 +0000 |
2411 | @@ -1,3 +1,3 @@ |
2412 | -import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity"; |
2413 | +import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity"; |
2414 | |
2415 | simh:monge-elkan-jaro-winkler("Comput. Sci. and Eng. Dept., University of California, San Diego", "Department of Computer Scinece, Univ. Calif., San Diego", 4, 0.1) |
2416 | |
2417 | === modified file 'test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-edit-distance.xq' |
2418 | --- test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-edit-distance.xq 2011-07-19 19:12:03 +0000 |
2419 | +++ test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-edit-distance.xq 2013-07-10 00:51:26 +0000 |
2420 | @@ -1,3 +1,3 @@ |
2421 | -import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity"; |
2422 | +import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity"; |
2423 | |
2424 | simh:soft-cosine-tokens-edit-distance("The FLWOR Foundation", "FLWOR Found.", " +", 0 ) |
2425 | |
2426 | === modified file 'test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro-winkler.xq' |
2427 | --- test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro-winkler.xq 2011-07-19 19:12:03 +0000 |
2428 | +++ test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro-winkler.xq 2013-07-10 00:51:26 +0000 |
2429 | @@ -1,3 +1,3 @@ |
2430 | -import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity"; |
2431 | +import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity"; |
2432 | |
2433 | simh:soft-cosine-tokens-jaro-winkler("The FLWOR Foundation", "FLWOR Found.", " +", 1, 4, 0.1 ) |
2434 | |
2435 | === modified file 'test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro.xq' |
2436 | --- test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro.xq 2011-07-19 19:12:03 +0000 |
2437 | +++ test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-jaro.xq 2013-07-10 00:51:26 +0000 |
2438 | @@ -1,3 +1,3 @@ |
2439 | -import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity"; |
2440 | +import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity"; |
2441 | |
2442 | simh:soft-cosine-tokens-jaro("The FLWOR Foundation", "FLWOR Found.", " +", 1 ) |
2443 | |
2444 | === modified file 'test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-metaphone.xq' |
2445 | --- test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-metaphone.xq 2011-07-19 19:12:03 +0000 |
2446 | +++ test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-metaphone.xq 2013-07-10 00:51:26 +0000 |
2447 | @@ -1,3 +1,3 @@ |
2448 | -import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity"; |
2449 | +import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity"; |
2450 | |
2451 | simh:soft-cosine-tokens-metaphone("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +" ) |
2452 | |
2453 | === modified file 'test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-soundex.xq' |
2454 | --- test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-soundex.xq 2011-07-19 19:12:03 +0000 |
2455 | +++ test/Queries/data-cleaning/hybrid-string-similarity/soft-cosine-tokens-soundex.xq 2013-07-10 00:51:26 +0000 |
2456 | @@ -1,3 +1,3 @@ |
2457 | -import module namespace simh = "http://www.zorba-xquery.com/modules/data-cleaning/hybrid-string-similarity"; |
2458 | +import module namespace simh = "http://zorba.io/modules/data-cleaning/hybrid-string-similarity"; |
2459 | |
2460 | simh:soft-cosine-tokens-soundex("ALEKSANDER SMITH", "ALEXANDER SMYTH", " +") |
2461 | |
2462 | === modified file 'test/Queries/data-cleaning/normalization/normalize-address.xq' |
2463 | --- test/Queries/data-cleaning/normalization/normalize-address.xq 2011-07-19 19:12:03 +0000 |
2464 | +++ test/Queries/data-cleaning/normalization/normalize-address.xq 2013-07-10 00:51:26 +0000 |
2465 | @@ -1,3 +1,3 @@ |
2466 | -import module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization"; |
2467 | +import module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization"; |
2468 | |
2469 | normalization:normalize-address ( ( 'Marques de Pombal' , 'Lisboa' ) ) |
2470 | |
2471 | === modified file 'test/Queries/data-cleaning/normalization/to-date.xq' |
2472 | --- test/Queries/data-cleaning/normalization/to-date.xq 2011-07-19 19:12:03 +0000 |
2473 | +++ test/Queries/data-cleaning/normalization/to-date.xq 2013-07-10 00:51:26 +0000 |
2474 | @@ -1,3 +1,3 @@ |
2475 | -import module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization"; |
2476 | +import module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization"; |
2477 | |
2478 | normalization:to-date ( "24OCT2002" , "%d%b%Y" ) |
2479 | |
2480 | === modified file 'test/Queries/data-cleaning/normalization/to-dateTime.spec' |
2481 | --- test/Queries/data-cleaning/normalization/to-dateTime.spec 2011-07-26 20:10:34 +0000 |
2482 | +++ test/Queries/data-cleaning/normalization/to-dateTime.spec 2013-07-10 00:51:26 +0000 |
2483 | @@ -1,1 +1,1 @@ |
2484 | -Error: http://www.zorba-xquery.com/modules/data-cleaning/normalization:notsupported |
2485 | +Error: http://zorba.io/modules/data-cleaning/normalization:NOTSUPPORTED |
2486 | |
2487 | === modified file 'test/Queries/data-cleaning/normalization/to-dateTime.xq' |
2488 | --- test/Queries/data-cleaning/normalization/to-dateTime.xq 2011-07-26 20:10:34 +0000 |
2489 | +++ test/Queries/data-cleaning/normalization/to-dateTime.xq 2013-07-10 00:51:26 +0000 |
2490 | @@ -1,3 +1,3 @@ |
2491 | -import module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization"; |
2492 | +import module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization"; |
2493 | |
2494 | normalization:to-dateTime( "24OCT2002 21:22" , "%d%b%Y %H%M" ) |
2495 | |
2496 | === modified file 'test/Queries/data-cleaning/normalization/to-time.spec' |
2497 | --- test/Queries/data-cleaning/normalization/to-time.spec 2011-07-19 19:12:03 +0000 |
2498 | +++ test/Queries/data-cleaning/normalization/to-time.spec 2013-07-10 00:51:26 +0000 |
2499 | @@ -1,1 +1,1 @@ |
2500 | -Error: http://www.zorba-xquery.com/modules/data-cleaning/normalization:notsupported |
2501 | +Error: http://zorba.io/modules/data-cleaning/normalization:NOTSUPPORTED |
2502 | |
2503 | === modified file 'test/Queries/data-cleaning/normalization/to-time.xq' |
2504 | --- test/Queries/data-cleaning/normalization/to-time.xq 2011-07-19 19:12:03 +0000 |
2505 | +++ test/Queries/data-cleaning/normalization/to-time.xq 2013-07-10 00:51:26 +0000 |
2506 | @@ -1,3 +1,3 @@ |
2507 | -import module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization"; |
2508 | +import module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization"; |
2509 | |
2510 | normalization:to-time ( "09 hours 10 minutes" , "%H hours %M minutes" ) |
2511 | |
2512 | === modified file 'test/Queries/data-cleaning/phonetic-string-similarity/metaphone-key.xq' |
2513 | --- test/Queries/data-cleaning/phonetic-string-similarity/metaphone-key.xq 2011-07-19 19:12:03 +0000 |
2514 | +++ test/Queries/data-cleaning/phonetic-string-similarity/metaphone-key.xq 2013-07-10 00:51:26 +0000 |
2515 | @@ -1,3 +1,3 @@ |
2516 | -import module namespace simp = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity"; |
2517 | +import module namespace simp = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity"; |
2518 | |
2519 | simp:metaphone-key("ALEKSANDER") |
2520 | |
2521 | === modified file 'test/Queries/data-cleaning/phonetic-string-similarity/metaphone.xq' |
2522 | --- test/Queries/data-cleaning/phonetic-string-similarity/metaphone.xq 2011-07-19 19:12:03 +0000 |
2523 | +++ test/Queries/data-cleaning/phonetic-string-similarity/metaphone.xq 2013-07-10 00:51:26 +0000 |
2524 | @@ -1,3 +1,3 @@ |
2525 | -import module namespace simp = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity"; |
2526 | +import module namespace simp = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity"; |
2527 | |
2528 | simp:metaphone-key("ALEKSANDER") |
2529 | |
2530 | === modified file 'test/Queries/data-cleaning/phonetic-string-similarity/soundex-key.xq' |
2531 | --- test/Queries/data-cleaning/phonetic-string-similarity/soundex-key.xq 2013-05-18 00:38:53 +0000 |
2532 | +++ test/Queries/data-cleaning/phonetic-string-similarity/soundex-key.xq 2013-07-10 00:51:26 +0000 |
2533 | @@ -1,4 +1,4 @@ |
2534 | -import module namespace simpl = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity"; |
2535 | +import module namespace simpl = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity"; |
2536 | |
2537 | simpl:soundex-key("Robert"), |
2538 | simpl:soundex-key("BARHAM") eq "B650" and |
2539 | |
2540 | === modified file 'test/Queries/data-cleaning/phonetic-string-similarity/soundex.xq' |
2541 | --- test/Queries/data-cleaning/phonetic-string-similarity/soundex.xq 2011-07-19 19:12:03 +0000 |
2542 | +++ test/Queries/data-cleaning/phonetic-string-similarity/soundex.xq 2013-07-10 00:51:26 +0000 |
2543 | @@ -1,3 +1,3 @@ |
2544 | -import module namespace simp = "http://www.zorba-xquery.com/modules/data-cleaning/phonetic-string-similarity"; |
2545 | +import module namespace simp = "http://zorba.io/modules/data-cleaning/phonetic-string-similarity"; |
2546 | |
2547 | simp:soundex( "Robert" , "Rupert" ) |
2548 | |
2549 | === modified file 'test/Queries/data-cleaning/set-similarity/deep-intersect.xq' |
2550 | --- test/Queries/data-cleaning/set-similarity/deep-intersect.xq 2011-07-19 19:12:03 +0000 |
2551 | +++ test/Queries/data-cleaning/set-similarity/deep-intersect.xq 2013-07-10 00:51:26 +0000 |
2552 | @@ -1,3 +1,3 @@ |
2553 | -import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"; |
2554 | +import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity"; |
2555 | |
2556 | set:deep-intersect ( ( "a", "b", "c") , ( "a", "a", <d/> ) ) |
2557 | |
2558 | === modified file 'test/Queries/data-cleaning/set-similarity/deep-union.xq' |
2559 | --- test/Queries/data-cleaning/set-similarity/deep-union.xq 2011-07-19 19:12:03 +0000 |
2560 | +++ test/Queries/data-cleaning/set-similarity/deep-union.xq 2013-07-10 00:51:26 +0000 |
2561 | @@ -1,3 +1,3 @@ |
2562 | -import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"; |
2563 | +import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity"; |
2564 | |
2565 | set:deep-union ( ( "a", "b", "c") , ( "a", "a", <d/> ) ) |
2566 | |
2567 | === modified file 'test/Queries/data-cleaning/set-similarity/dice.xq' |
2568 | --- test/Queries/data-cleaning/set-similarity/dice.xq 2011-07-19 19:12:03 +0000 |
2569 | +++ test/Queries/data-cleaning/set-similarity/dice.xq 2013-07-10 00:51:26 +0000 |
2570 | @@ -1,3 +1,3 @@ |
2571 | -import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"; |
2572 | +import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity"; |
2573 | |
2574 | set:dice ( ( "a", "b", <c/> ) , ( "a", "a", "d") ) |
2575 | |
2576 | === modified file 'test/Queries/data-cleaning/set-similarity/distinct.xq' |
2577 | --- test/Queries/data-cleaning/set-similarity/distinct.xq 2011-07-19 19:12:03 +0000 |
2578 | +++ test/Queries/data-cleaning/set-similarity/distinct.xq 2013-07-10 00:51:26 +0000 |
2579 | @@ -1,3 +1,3 @@ |
2580 | -import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"; |
2581 | +import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity"; |
2582 | |
2583 | set:distinct (( "a", "a", <b/> )) |
2584 | |
2585 | === modified file 'test/Queries/data-cleaning/set-similarity/jaccard.xq' |
2586 | --- test/Queries/data-cleaning/set-similarity/jaccard.xq 2011-07-19 19:12:03 +0000 |
2587 | +++ test/Queries/data-cleaning/set-similarity/jaccard.xq 2013-07-10 00:51:26 +0000 |
2588 | @@ -1,3 +1,3 @@ |
2589 | -import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"; |
2590 | +import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity"; |
2591 | |
2592 | set:jaccard ( ( "a", "b", <c/> ) , ( "a", "a", "d") ) |
2593 | |
2594 | === modified file 'test/Queries/data-cleaning/set-similarity/overlap.xq' |
2595 | --- test/Queries/data-cleaning/set-similarity/overlap.xq 2011-07-19 19:12:03 +0000 |
2596 | +++ test/Queries/data-cleaning/set-similarity/overlap.xq 2013-07-10 00:51:26 +0000 |
2597 | @@ -1,3 +1,3 @@ |
2598 | -import module namespace set = "http://www.zorba-xquery.com/modules/data-cleaning/set-similarity"; |
2599 | +import module namespace set = "http://zorba.io/modules/data-cleaning/set-similarity"; |
2600 | |
2601 | set:overlap ( ( "a", "b", <c/> ) , ( "a", "a", "b" ) ) |
2602 | |
2603 | === modified file 'test/Queries/data-cleaning/token-based-string-similarity/cosine-ngrams.xq' |
2604 | --- test/Queries/data-cleaning/token-based-string-similarity/cosine-ngrams.xq 2011-07-19 19:12:03 +0000 |
2605 | +++ test/Queries/data-cleaning/token-based-string-similarity/cosine-ngrams.xq 2013-07-10 00:51:26 +0000 |
2606 | @@ -1,3 +1,3 @@ |
2607 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
2608 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
2609 | |
2610 | simt:cosine-ngrams("DWAYNE", "DUANE", 2 ) |
2611 | |
2612 | === modified file 'test/Queries/data-cleaning/token-based-string-similarity/cosine-tokens.xq' |
2613 | --- test/Queries/data-cleaning/token-based-string-similarity/cosine-tokens.xq 2011-07-19 19:12:03 +0000 |
2614 | +++ test/Queries/data-cleaning/token-based-string-similarity/cosine-tokens.xq 2013-07-10 00:51:26 +0000 |
2615 | @@ -1,3 +1,3 @@ |
2616 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
2617 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
2618 | |
2619 | simt:cosine-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) |
2620 | |
2621 | === modified file 'test/Queries/data-cleaning/token-based-string-similarity/cosine.xq' |
2622 | --- test/Queries/data-cleaning/token-based-string-similarity/cosine.xq 2011-07-19 19:12:03 +0000 |
2623 | +++ test/Queries/data-cleaning/token-based-string-similarity/cosine.xq 2013-07-10 00:51:26 +0000 |
2624 | @@ -1,3 +1,3 @@ |
2625 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
2626 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
2627 | |
2628 | simt:cosine( ("aa","bb") , ("bb","aa")) |
2629 | |
2630 | === modified file 'test/Queries/data-cleaning/token-based-string-similarity/dice-ngrams.xq' |
2631 | --- test/Queries/data-cleaning/token-based-string-similarity/dice-ngrams.xq 2011-07-19 19:12:03 +0000 |
2632 | +++ test/Queries/data-cleaning/token-based-string-similarity/dice-ngrams.xq 2013-07-10 00:51:26 +0000 |
2633 | @@ -1,3 +1,3 @@ |
2634 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
2635 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
2636 | |
2637 | simt:dice-ngrams("DWAYNE", "DUANE", 2 ) |
2638 | |
2639 | === modified file 'test/Queries/data-cleaning/token-based-string-similarity/dice-tokens.xq' |
2640 | --- test/Queries/data-cleaning/token-based-string-similarity/dice-tokens.xq 2011-07-19 19:12:03 +0000 |
2641 | +++ test/Queries/data-cleaning/token-based-string-similarity/dice-tokens.xq 2013-07-10 00:51:26 +0000 |
2642 | @@ -1,3 +1,3 @@ |
2643 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
2644 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
2645 | |
2646 | simt:dice-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) |
2647 | |
2648 | === modified file 'test/Queries/data-cleaning/token-based-string-similarity/jaccard-ngrams.xq' |
2649 | --- test/Queries/data-cleaning/token-based-string-similarity/jaccard-ngrams.xq 2011-07-19 19:12:03 +0000 |
2650 | +++ test/Queries/data-cleaning/token-based-string-similarity/jaccard-ngrams.xq 2013-07-10 00:51:26 +0000 |
2651 | @@ -1,3 +1,3 @@ |
2652 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
2653 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
2654 | |
2655 | simt:jaccard-ngrams("DWAYNE", "DUANE", 2 ) |
2656 | |
2657 | === modified file 'test/Queries/data-cleaning/token-based-string-similarity/jaccard-tokens.xq' |
2658 | --- test/Queries/data-cleaning/token-based-string-similarity/jaccard-tokens.xq 2011-07-19 19:12:03 +0000 |
2659 | +++ test/Queries/data-cleaning/token-based-string-similarity/jaccard-tokens.xq 2013-07-10 00:51:26 +0000 |
2660 | @@ -1,3 +1,3 @@ |
2661 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
2662 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
2663 | |
2664 | simt:jaccard-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) |
2665 | |
2666 | === modified file 'test/Queries/data-cleaning/token-based-string-similarity/ngrams.xq' |
2667 | --- test/Queries/data-cleaning/token-based-string-similarity/ngrams.xq 2011-07-19 19:12:03 +0000 |
2668 | +++ test/Queries/data-cleaning/token-based-string-similarity/ngrams.xq 2013-07-10 00:51:26 +0000 |
2669 | @@ -1,3 +1,3 @@ |
2670 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
2671 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
2672 | |
2673 | simt:ngrams("FLWOR", 2 ) |
2674 | |
2675 | === modified file 'test/Queries/data-cleaning/token-based-string-similarity/overlap-ngrams.xq' |
2676 | --- test/Queries/data-cleaning/token-based-string-similarity/overlap-ngrams.xq 2011-07-19 19:12:03 +0000 |
2677 | +++ test/Queries/data-cleaning/token-based-string-similarity/overlap-ngrams.xq 2013-07-10 00:51:26 +0000 |
2678 | @@ -1,3 +1,3 @@ |
2679 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
2680 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
2681 | |
2682 | simt:overlap-ngrams("DWAYNE", "DUANE", 2 ) |
2683 | |
2684 | === modified file 'test/Queries/data-cleaning/token-based-string-similarity/overlap-tokens.xq' |
2685 | --- test/Queries/data-cleaning/token-based-string-similarity/overlap-tokens.xq 2011-07-19 19:12:03 +0000 |
2686 | +++ test/Queries/data-cleaning/token-based-string-similarity/overlap-tokens.xq 2013-07-10 00:51:26 +0000 |
2687 | @@ -1,3 +1,3 @@ |
2688 | -import module namespace simt = "http://www.zorba-xquery.com/modules/data-cleaning/token-based-string-similarity"; |
2689 | +import module namespace simt = "http://zorba.io/modules/data-cleaning/token-based-string-similarity"; |
2690 | |
2691 | simt:overlap-tokens("The FLWOR Foundation", "FLWOR Found.", " +" ) |
1. Need to put <p/> tags between doc paragraphs, including after leading short description. (Remove <br/> tags.)
2. The error in currency-convert() should be in the module's own namespace/prefix, with ALL-CAPS name.
3. Based on the test cases, some other functions also raise errors, but those are not documented.