Merge lp:~jpakkane/libcolumbus/hud-rework into lp:libcolumbus
- hud-rework
- Merge into trunk
Status: | Merged | ||||||||
---|---|---|---|---|---|---|---|---|---|
Approved by: | Pete Woods | ||||||||
Approved revision: | 485 | ||||||||
Merged at revision: | 461 | ||||||||
Proposed branch: | lp:~jpakkane/libcolumbus/hud-rework | ||||||||
Merge into: | lp:libcolumbus | ||||||||
Diff against target: |
468 lines (+291/-19) 9 files modified
CMakeLists.txt (+1/-1) debian/changelog (+6/-0) include/Matcher.hh (+11/-0) include/WordStore.hh (+1/-0) src/MatchResults.cc (+4/-1) src/Matcher.cc (+71/-12) src/WordStore.cc (+8/-4) test/MatcherTest.cc (+171/-1) test/TrieTest.cc (+18/-0) |
||||||||
To merge this branch: | bzr merge lp:~jpakkane/libcolumbus/hud-rework | ||||||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Pete Woods (community) | Approve | ||
PS Jenkins bot (community) | continuous-integration | Approve | |
Review via email:
|
Commit message
Add new online search mode to get better performance in HUD.
Description of the change
Add new online search mode to get better performance in HUD.
- 483. By Jussi Pakkanen
-
Bumped version number.

Pete Woods (pete-woods) wrote : | # |

PS Jenkins bot (ps-jenkins) wrote : | # |
PASSED: Continuous integration, rev:483
http://
Executed test runs:
SUCCESS: http://
SUCCESS: http://
SUCCESS: http://
Click here to trigger a rebuild:
http://
- 484. By Jussi Pakkanen
-
Some test cleanup.

Jussi Pakkanen (jpakkane) wrote : | # |
Fixed.

PS Jenkins bot (ps-jenkins) wrote : | # |
FAILED: Continuous integration, rev:484
http://
Executed test runs:
FAILURE: http://
FAILURE: http://
FAILURE: http://
Click here to trigger a rebuild:
http://
- 485. By Jussi Pakkanen
-
Blah.

PS Jenkins bot (ps-jenkins) wrote : | # |
PASSED: Continuous integration, rev:485
http://
Executed test runs:
SUCCESS: http://
SUCCESS: http://
SUCCESS: http://
Click here to trigger a rebuild:
http://

Pete Woods (pete-woods) wrote : | # |
Looks good to me! Tried with the dependent branch of HUD (lp:~pete-woods/hud/tweak-search-parameters).
Preview Diff
1 | === modified file 'CMakeLists.txt' | |||
2 | --- CMakeLists.txt 2013-08-09 19:35:42 +0000 | |||
3 | +++ CMakeLists.txt 2014-02-28 14:46:51 +0000 | |||
4 | @@ -38,7 +38,7 @@ | |||
5 | 38 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") | 38 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") |
6 | 39 | 39 | ||
7 | 40 | set(SO_VERSION_MAJOR "1") | 40 | set(SO_VERSION_MAJOR "1") |
9 | 41 | set(SO_VERSION_MINOR "0") | 41 | set(SO_VERSION_MINOR "1") |
10 | 42 | set(SO_VERSION_PATCH "0") | 42 | set(SO_VERSION_PATCH "0") |
11 | 43 | 43 | ||
12 | 44 | set(SO_VERSION "${SO_VERSION_MAJOR}.${SO_VERSION_MINOR}.${SO_VERSION_PATCH}") | 44 | set(SO_VERSION "${SO_VERSION_MAJOR}.${SO_VERSION_MINOR}.${SO_VERSION_PATCH}") |
13 | 45 | 45 | ||
14 | === modified file 'debian/changelog' | |||
15 | --- debian/changelog 2014-01-20 19:43:49 +0000 | |||
16 | +++ debian/changelog 2014-02-28 14:46:51 +0000 | |||
17 | @@ -1,3 +1,9 @@ | |||
18 | 1 | libcolumbus (1.1.0-0ubuntu1) UNRELEASED; urgency=medium | ||
19 | 2 | |||
20 | 3 | * New online search mode | ||
21 | 4 | |||
22 | 5 | -- Jussi Pakkanen <jussi.pakkanen@ubuntu.com> Fri, 28 Feb 2014 15:44:25 +0200 | ||
23 | 6 | |||
24 | 1 | libcolumbus (1.0.0+14.04.20140120-0ubuntu1) trusty; urgency=low | 7 | libcolumbus (1.0.0+14.04.20140120-0ubuntu1) trusty; urgency=low |
25 | 2 | 8 | ||
26 | 3 | * New rebuild forced | 9 | * New rebuild forced |
27 | 4 | 10 | ||
28 | === modified file 'include/Matcher.hh' | |||
29 | --- include/Matcher.hh 2013-08-07 12:20:52 +0000 | |||
30 | +++ include/Matcher.hh 2014-02-28 14:46:51 +0000 | |||
31 | @@ -60,6 +60,17 @@ | |||
32 | 60 | void index(const Corpus &c); | 60 | void index(const Corpus &c); |
33 | 61 | ErrorValues& getErrorValues(); | 61 | ErrorValues& getErrorValues(); |
34 | 62 | IndexWeights& getIndexWeights(); | 62 | IndexWeights& getIndexWeights(); |
35 | 63 | /* | ||
36 | 64 | * This function is optimized for online matches, that is, queries | ||
37 | 65 | * that are live updated during typing. It uses slightly different | ||
38 | 66 | * search heuristics to ensure results that "feel good" to humans. | ||
39 | 67 | * | ||
40 | 68 | * The second argument is the field that should be the primary focus. | ||
41 | 69 | * Usually it means having the text that will be shown to the user. | ||
42 | 70 | * As an example, in the HUD, this field would contain the command | ||
43 | 71 | * (and nothing else) that will be executed. | ||
44 | 72 | */ | ||
45 | 73 | MatchResults onlineMatch(const WordList &query, const Word &primaryIndex); | ||
46 | 63 | }; | 74 | }; |
47 | 64 | 75 | ||
48 | 65 | COL_NAMESPACE_END | 76 | COL_NAMESPACE_END |
49 | 66 | 77 | ||
50 | === modified file 'include/WordStore.hh' | |||
51 | --- include/WordStore.hh 2013-06-14 12:26:10 +0000 | |||
52 | +++ include/WordStore.hh 2014-02-28 14:46:51 +0000 | |||
53 | @@ -49,6 +49,7 @@ | |||
54 | 49 | 49 | ||
55 | 50 | 50 | ||
56 | 51 | WordID getID(const Word &w); | 51 | WordID getID(const Word &w); |
57 | 52 | bool hasWord(const Word &w) const; | ||
58 | 52 | Word getWord(const WordID id) const; | 53 | Word getWord(const WordID id) const; |
59 | 53 | bool hasWord(const WordID id) const; | 54 | bool hasWord(const WordID id) const; |
60 | 54 | }; | 55 | }; |
61 | 55 | 56 | ||
62 | === modified file 'src/MatchResults.cc' | |||
63 | --- src/MatchResults.cc 2013-06-20 12:25:11 +0000 | |||
64 | +++ src/MatchResults.cc 2014-02-28 14:46:51 +0000 | |||
65 | @@ -96,7 +96,10 @@ | |||
66 | 96 | if(p->sorted) | 96 | if(p->sorted) |
67 | 97 | return; | 97 | return; |
68 | 98 | MatchResults *me = const_cast<MatchResults*>(this); | 98 | MatchResults *me = const_cast<MatchResults*>(this); |
70 | 99 | sort(me->p->results.rbegin(), me->p->results.rend()); | 99 | stable_sort(me->p->results.rbegin(), me->p->results.rend(), |
71 | 100 | [](const pair<double, DocumentID> &a, const pair<double, DocumentID> &b) -> bool{ | ||
72 | 101 | return a.first < b.first; | ||
73 | 102 | }); | ||
74 | 100 | me->p->sorted = true; | 103 | me->p->sorted = true; |
75 | 101 | } | 104 | } |
76 | 102 | 105 | ||
77 | 103 | 106 | ||
78 | === modified file 'src/Matcher.cc' | |||
79 | --- src/Matcher.cc 2013-08-07 12:20:52 +0000 | |||
80 | +++ src/Matcher.cc 2014-02-28 14:46:51 +0000 | |||
81 | @@ -36,6 +36,7 @@ | |||
82 | 36 | #include <stdexcept> | 36 | #include <stdexcept> |
83 | 37 | #include <map> | 37 | #include <map> |
84 | 38 | #include <vector> | 38 | #include <vector> |
85 | 39 | #include <algorithm> | ||
86 | 39 | 40 | ||
87 | 40 | #ifdef HAS_SPARSE_HASH | 41 | #ifdef HAS_SPARSE_HASH |
88 | 41 | #include <google/sparse_hash_map> | 42 | #include <google/sparse_hash_map> |
89 | @@ -98,6 +99,7 @@ | |||
90 | 98 | IndexWeights weights; | 99 | IndexWeights weights; |
91 | 99 | MatcherStatistics stats; | 100 | MatcherStatistics stats; |
92 | 100 | WordStore store; | 101 | WordStore store; |
93 | 102 | map<pair<DocumentID, WordID>, size_t> originalSizes; // Lengths of original documents. | ||
94 | 101 | }; | 103 | }; |
95 | 102 | 104 | ||
96 | 103 | void ReverseIndex::add(const WordID wordID, const WordID indexID, const DocumentID id) { | 105 | void ReverseIndex::add(const WordID wordID, const WordID indexID, const DocumentID id) { |
97 | @@ -230,15 +232,6 @@ | |||
98 | 230 | } | 232 | } |
99 | 231 | } | 233 | } |
100 | 232 | 234 | ||
101 | 233 | static void expandQuery(const WordList &query, WordList &expandedQuery) { | ||
102 | 234 | for(size_t i=0; i<query.size(); i++) | ||
103 | 235 | expandedQuery.addWord(query[i]); | ||
104 | 236 | |||
105 | 237 | for(size_t i=0; i<query.size()-1; i++) { | ||
106 | 238 | expandedQuery.addWord(query[i].join(query[i+1])); | ||
107 | 239 | } | ||
108 | 240 | } | ||
109 | 241 | |||
110 | 242 | static bool subtermsMatch(MatcherPrivate *p, const ResultFilter &filter, size_t term, DocumentID id) { | 235 | static bool subtermsMatch(MatcherPrivate *p, const ResultFilter &filter, size_t term, DocumentID id) { |
111 | 243 | for(size_t subTerm=0; subTerm < filter.numSubTerms(term); subTerm++) { | 236 | for(size_t subTerm=0; subTerm < filter.numSubTerms(term); subTerm++) { |
112 | 244 | const Word &filterName = filter.getField(term, subTerm); | 237 | const Word &filterName = filter.getField(term, subTerm); |
113 | @@ -286,6 +279,10 @@ | |||
114 | 286 | const Word &fieldName = textNames[ti]; | 279 | const Word &fieldName = textNames[ti]; |
115 | 287 | const WordID fieldID = p->store.getID(fieldName); | 280 | const WordID fieldID = p->store.getID(fieldName); |
116 | 288 | const WordList &text = d.getText(fieldName); | 281 | const WordList &text = d.getText(fieldName); |
117 | 282 | pair<DocumentID, WordID> lengths; | ||
118 | 283 | lengths.first = d.getID(); | ||
119 | 284 | lengths.second = fieldID; | ||
120 | 285 | p->originalSizes[lengths] = text.size(); | ||
121 | 289 | for(size_t wi=0; wi<text.size(); wi++) { | 286 | for(size_t wi=0; wi<text.size(); wi++) { |
122 | 290 | const Word &word = text[wi]; | 287 | const Word &word = text[wi]; |
123 | 291 | const WordID wordID = p->store.getID(word); | 288 | const WordID wordID = p->store.getID(word); |
124 | @@ -336,16 +333,14 @@ | |||
125 | 336 | const int maxIterations = 1; | 333 | const int maxIterations = 1; |
126 | 337 | const int increment = LevenshteinIndex::getDefaultError(); | 334 | const int increment = LevenshteinIndex::getDefaultError(); |
127 | 338 | const size_t minMatches = 10; | 335 | const size_t minMatches = 10; |
128 | 339 | WordList expandedQuery; | ||
129 | 340 | MatchResults allMatches; | 336 | MatchResults allMatches; |
130 | 341 | 337 | ||
131 | 342 | if(query.size() == 0) | 338 | if(query.size() == 0) |
132 | 343 | return matchedDocuments; | 339 | return matchedDocuments; |
133 | 344 | expandQuery(query, expandedQuery); | ||
134 | 345 | // Try to search with ever growing error until we find enough matches. | 340 | // Try to search with ever growing error until we find enough matches. |
135 | 346 | for(int i=0; i<maxIterations; i++) { | 341 | for(int i=0; i<maxIterations; i++) { |
136 | 347 | MatchResults matches; | 342 | MatchResults matches; |
138 | 348 | matchWithRelevancy(expandedQuery, params, i*increment, matches); | 343 | matchWithRelevancy(query, params, i*increment, matches); |
139 | 349 | if(matches.size() >= minMatches || i == maxIterations-1) { | 344 | if(matches.size() >= minMatches || i == maxIterations-1) { |
140 | 350 | allMatches.addResults(matches); | 345 | allMatches.addResults(matches); |
141 | 351 | break; | 346 | break; |
142 | @@ -392,5 +387,69 @@ | |||
143 | 392 | return p->weights; | 387 | return p->weights; |
144 | 393 | } | 388 | } |
145 | 394 | 389 | ||
146 | 390 | static map<DocumentID, size_t> countExacts(MatcherPrivate *p, const WordList &query, const WordID indexID) { | ||
147 | 391 | map<DocumentID, size_t> matchCounts; | ||
148 | 392 | for(size_t i=0; i<query.size(); i++) { | ||
149 | 393 | const Word &w = query[i]; | ||
150 | 394 | if(w.length() == 0 || !p->store.hasWord(w)) { | ||
151 | 395 | continue; | ||
152 | 396 | } | ||
153 | 397 | WordID curWord = p->store.getID(w); | ||
154 | 398 | vector<DocumentID> exacts; | ||
155 | 399 | p->reverseIndex.findDocuments(curWord, indexID, exacts); | ||
156 | 400 | for(const auto &i : exacts) { | ||
157 | 401 | matchCounts[i]++; // Default is zero initialisation. | ||
158 | 402 | } | ||
159 | 403 | } | ||
160 | 404 | return matchCounts; | ||
161 | 405 | } | ||
162 | 406 | |||
163 | 407 | struct DocCount { | ||
164 | 408 | DocumentID id; | ||
165 | 409 | size_t matches; | ||
166 | 410 | }; | ||
167 | 411 | |||
168 | 412 | MatchResults Matcher::onlineMatch(const WordList &query, const Word &primaryIndex) { | ||
169 | 413 | MatchResults results; | ||
170 | 414 | set<DocumentID> exactMatched; | ||
171 | 415 | map<DocumentID, double> accumulator; | ||
172 | 416 | if(!p->store.hasWord(primaryIndex)) { | ||
173 | 417 | string msg("Index named "); | ||
174 | 418 | msg += primaryIndex.asUtf8(); | ||
175 | 419 | msg += " is not known"; | ||
176 | 420 | throw invalid_argument(msg); | ||
177 | 421 | } | ||
178 | 422 | WordID indexID = p->store.getID(primaryIndex); | ||
179 | 423 | // How many times each document matched with zero error. | ||
180 | 424 | vector<DocCount> stats; | ||
181 | 425 | for(const auto &i : countExacts(p, query, indexID)) { | ||
182 | 426 | DocCount c; | ||
183 | 427 | pair<DocumentID, WordID> key; | ||
184 | 428 | exactMatched.insert(i.first); | ||
185 | 429 | key.first = i.first; | ||
186 | 430 | key.second = indexID; | ||
187 | 431 | c.id = i.first; | ||
188 | 432 | c.matches = i.second; | ||
189 | 433 | stats.push_back(c); | ||
190 | 434 | } | ||
191 | 435 | for(const auto &i: stats) { | ||
192 | 436 | accumulator[i.id] = 2*i.matches; | ||
193 | 437 | if(i.matches == query.size() | ||
194 | 438 | && i.matches == p->originalSizes[make_pair(i.id, indexID)]) { // Perfect match. | ||
195 | 439 | accumulator[i.id] += 100; | ||
196 | 440 | } | ||
197 | 441 | } | ||
198 | 442 | // Merge in fuzzy matches. | ||
199 | 443 | MatchResults fuzzyResults = match(query); | ||
200 | 444 | for(size_t i = 0; i<fuzzyResults.size(); i++) { | ||
201 | 445 | DocumentID docid = fuzzyResults.getDocumentID(i); | ||
202 | 446 | accumulator[docid] += fuzzyResults.getRelevancy(i); | ||
203 | 447 | } | ||
204 | 448 | for(const auto &i : accumulator) { | ||
205 | 449 | results.addResult(i.first, i.second); | ||
206 | 450 | } | ||
207 | 451 | return results; | ||
208 | 452 | } | ||
209 | 453 | |||
210 | 395 | COL_NAMESPACE_END | 454 | COL_NAMESPACE_END |
211 | 396 | 455 | ||
212 | 397 | 456 | ||
213 | === modified file 'src/WordStore.cc' | |||
214 | --- src/WordStore.cc 2013-01-31 10:23:45 +0000 | |||
215 | +++ src/WordStore.cc 2014-02-28 14:46:51 +0000 | |||
216 | @@ -53,15 +53,19 @@ | |||
217 | 53 | } | 53 | } |
218 | 54 | 54 | ||
219 | 55 | WordID WordStore::getID(const Word &w) { | 55 | WordID WordStore::getID(const Word &w) { |
224 | 56 | TrieOffset node = p->words.findWord(w); | 56 | if(p->words.hasWord(w)) { |
225 | 57 | if(node) | 57 | return p->words.getWordID(p->words.findWord(w)); |
226 | 58 | return p->words.getWordID(node); | 58 | } |
227 | 59 | node = p->words.insertWord(w, p->wordIndex.size()); | 59 | TrieOffset node = p->words.insertWord(w, p->wordIndex.size()); |
228 | 60 | p->wordIndex.push_back(node); | 60 | p->wordIndex.push_back(node); |
229 | 61 | WordID result = p->wordIndex.size()-1; | 61 | WordID result = p->wordIndex.size()-1; |
230 | 62 | return result; | 62 | return result; |
231 | 63 | } | 63 | } |
232 | 64 | 64 | ||
233 | 65 | bool WordStore::hasWord(const Word &w) const { | ||
234 | 66 | return p->words.hasWord(w); | ||
235 | 67 | } | ||
236 | 68 | |||
237 | 65 | Word WordStore::getWord(const WordID id) const { | 69 | Word WordStore::getWord(const WordID id) const { |
238 | 66 | if(!hasWord(id)) { | 70 | if(!hasWord(id)) { |
239 | 67 | throw out_of_range("Tried to access non-existing WordID in WordStore."); | 71 | throw out_of_range("Tried to access non-existing WordID in WordStore."); |
240 | 68 | 72 | ||
241 | === modified file 'test/MatcherTest.cc' | |||
242 | --- test/MatcherTest.cc 2013-06-20 12:10:40 +0000 | |||
243 | +++ test/MatcherTest.cc 2014-02-28 14:46:51 +0000 | |||
244 | @@ -23,6 +23,7 @@ | |||
245 | 23 | #include "WordList.hh" | 23 | #include "WordList.hh" |
246 | 24 | #include "Document.hh" | 24 | #include "Document.hh" |
247 | 25 | #include "MatchResults.hh" | 25 | #include "MatchResults.hh" |
248 | 26 | #include "ColumbusHelpers.hh" | ||
249 | 26 | #include <cassert> | 27 | #include <cassert> |
250 | 27 | 28 | ||
251 | 28 | using namespace Columbus; | 29 | using namespace Columbus; |
252 | @@ -123,7 +124,169 @@ | |||
253 | 123 | c.addDocument(d2); | 124 | c.addDocument(d2); |
254 | 124 | m.index(c); | 125 | m.index(c); |
255 | 125 | 126 | ||
257 | 126 | matches = m.match("Sara Michell Geller"); | 127 | matches = m.match("Sari Michell Geller"); |
258 | 128 | assert(matches.getDocumentID(0) == correct); | ||
259 | 129 | } | ||
260 | 130 | |||
261 | 131 | void testSentence() { | ||
262 | 132 | Corpus c; | ||
263 | 133 | DocumentID correct = 1; | ||
264 | 134 | DocumentID wrong = 0; | ||
265 | 135 | Document d1(correct); | ||
266 | 136 | Document d2(wrong); | ||
267 | 137 | Word fieldName("name"); | ||
268 | 138 | Word secondName("context"); | ||
269 | 139 | Matcher m; | ||
270 | 140 | MatchResults matches; | ||
271 | 141 | |||
272 | 142 | d1.addText(fieldName, "Fit Canvas to Layers"); | ||
273 | 143 | d1.addText(secondName, "View Zoom (100%)"); | ||
274 | 144 | d2.addText(fieldName, "Fit image in Window"); | ||
275 | 145 | d2.addText(secondName, "Image"); | ||
276 | 146 | |||
277 | 147 | c.addDocument(d1); | ||
278 | 148 | c.addDocument(d2); | ||
279 | 149 | |||
280 | 150 | m.index(c); | ||
281 | 151 | matches = m.match("fit canvas to layers"); | ||
282 | 152 | assert(matches.getDocumentID(0) == correct); | ||
283 | 153 | } | ||
284 | 154 | |||
285 | 155 | void testExactOrder() { | ||
286 | 156 | Corpus c; | ||
287 | 157 | DocumentID correct = 1; | ||
288 | 158 | DocumentID wrong = 0; | ||
289 | 159 | DocumentID moreWrong = 100; | ||
290 | 160 | Document d1(correct); | ||
291 | 161 | Document d2(wrong); | ||
292 | 162 | Document d3(moreWrong); | ||
293 | 163 | Word fieldName("name"); | ||
294 | 164 | Word secondName("context"); | ||
295 | 165 | Matcher m; | ||
296 | 166 | MatchResults matches; | ||
297 | 167 | WordList q = splitToWords("fit canvas to layers"); | ||
298 | 168 | d1.addText(fieldName, "Fit Canvas to Layers"); | ||
299 | 169 | d1.addText(secondName, "View Zoom (100%)"); | ||
300 | 170 | d2.addText(fieldName, "Fit image in Window"); | ||
301 | 171 | d2.addText(secondName, "Image"); | ||
302 | 172 | d3.addText(fieldName, "Not matching."); | ||
303 | 173 | d3.addText(secondName, "fit canvas to layers"); | ||
304 | 174 | c.addDocument(d1); | ||
305 | 175 | c.addDocument(d2); | ||
306 | 176 | c.addDocument(d3); | ||
307 | 177 | |||
308 | 178 | m.index(c); | ||
309 | 179 | matches = m.onlineMatch(q, fieldName); | ||
310 | 180 | assert(matches.size() >= 1); | ||
311 | 181 | assert(matches.getDocumentID(0) == correct); | ||
312 | 182 | } | ||
313 | 183 | |||
314 | 184 | void testSmallestMatch() { | ||
315 | 185 | Corpus c; | ||
316 | 186 | DocumentID correct = 1; | ||
317 | 187 | DocumentID wrong = 0; | ||
318 | 188 | Document d1(correct); | ||
319 | 189 | Document d2(wrong); | ||
320 | 190 | Word fieldName("name"); | ||
321 | 191 | Word field2("dummy"); | ||
322 | 192 | Matcher m; | ||
323 | 193 | MatchResults matches; | ||
324 | 194 | WordList q = splitToWords("save"); | ||
325 | 195 | d1.addText(fieldName, "save"); | ||
326 | 196 | d1.addText(field2, "lots of text to ensure statistics of this field are ignored"); | ||
327 | 197 | d2.addText(fieldName, "save as"); | ||
328 | 198 | c.addDocument(d1); | ||
329 | 199 | c.addDocument(d2); | ||
330 | 200 | |||
331 | 201 | m.index(c); | ||
332 | 202 | matches = m.onlineMatch(q, fieldName); | ||
333 | 203 | assert(matches.size() == 2); | ||
334 | 204 | assert(matches.getDocumentID(0) == correct); | ||
335 | 205 | } | ||
336 | 206 | |||
337 | 207 | void noCommonMatch() { | ||
338 | 208 | Corpus c; | ||
339 | 209 | DocumentID correct = 1; | ||
340 | 210 | Document d1(correct); | ||
341 | 211 | Word fieldName("name"); | ||
342 | 212 | Word field2("dummy"); | ||
343 | 213 | Matcher m; | ||
344 | 214 | MatchResults matches; | ||
345 | 215 | WordList q = splitToWords("fit canvas to selection"); | ||
346 | 216 | d1.addText(fieldName, "Preparing your Images for the Web"); | ||
347 | 217 | d1.addText(fieldName, "Help user manual"); | ||
348 | 218 | c.addDocument(d1); | ||
349 | 219 | |||
350 | 220 | m.index(c); | ||
351 | 221 | matches = m.onlineMatch(q, fieldName); | ||
352 | 222 | assert(matches.size() == 0); | ||
353 | 223 | } | ||
354 | 224 | |||
355 | 225 | void emptyMatch() { | ||
356 | 226 | Corpus c; | ||
357 | 227 | DocumentID correct = 1; | ||
358 | 228 | Document d1(correct); | ||
359 | 229 | Word fieldName("name"); | ||
360 | 230 | Word field2("dummy"); | ||
361 | 231 | Matcher m; | ||
362 | 232 | MatchResults matches; | ||
363 | 233 | WordList q; | ||
364 | 234 | d1.addText(fieldName, "Preparing your Images for the Web"); | ||
365 | 235 | d1.addText(fieldName, "Help user manual"); | ||
366 | 236 | c.addDocument(d1); | ||
367 | 237 | |||
368 | 238 | m.index(c); | ||
369 | 239 | matches = m.onlineMatch(q, fieldName); | ||
370 | 240 | assert(matches.size() == 0); | ||
371 | 241 | } | ||
372 | 242 | |||
373 | 243 | void testMatchCount() { | ||
374 | 244 | Corpus c; | ||
375 | 245 | DocumentID correct = 1; | ||
376 | 246 | DocumentID wrong = 0; | ||
377 | 247 | Document d1(correct); | ||
378 | 248 | Document d2(wrong); | ||
379 | 249 | Word fieldName("name"); | ||
380 | 250 | Word secondName("context"); | ||
381 | 251 | Matcher m; | ||
382 | 252 | MatchResults matches; | ||
383 | 253 | WordList q = splitToWords("fit canvas to selection"); | ||
384 | 254 | d1.addText(fieldName, "Fit Canvas to Layers"); | ||
385 | 255 | d1.addText(secondName, "View Zoom (100%)"); | ||
386 | 256 | d2.addText(fieldName, "Selection editor"); | ||
387 | 257 | d2.addText(secondName, "Windows dockable dialogs"); | ||
388 | 258 | c.addDocument(d1); | ||
389 | 259 | c.addDocument(d2); | ||
390 | 260 | |||
391 | 261 | m.index(c); | ||
392 | 262 | matches = m.onlineMatch(q, fieldName); | ||
393 | 263 | assert(matches.size() == 2); | ||
394 | 264 | assert(matches.getDocumentID(0) == correct); | ||
395 | 265 | } | ||
396 | 266 | |||
397 | 267 | void testPerfect() { | ||
398 | 268 | Corpus c; | ||
399 | 269 | DocumentID correct = 0; | ||
400 | 270 | Document d1(1); | ||
401 | 271 | Document d2(correct); | ||
402 | 272 | Document d3(2); | ||
403 | 273 | Document d4(3); | ||
404 | 274 | Word fieldName("name"); | ||
405 | 275 | Matcher m; | ||
406 | 276 | MatchResults matches; | ||
407 | 277 | WordList q = splitToWords("save"); | ||
408 | 278 | d1.addText(fieldName, "Save as"); | ||
409 | 279 | d2.addText(fieldName, "Save"); | ||
410 | 280 | d3.addText(fieldName, "Save yourself"); | ||
411 | 281 | d4.addText(fieldName, "Save the whales"); | ||
412 | 282 | c.addDocument(d1); | ||
413 | 283 | c.addDocument(d2); | ||
414 | 284 | c.addDocument(d3); | ||
415 | 285 | c.addDocument(d4); | ||
416 | 286 | |||
417 | 287 | m.index(c); | ||
418 | 288 | matches = m.onlineMatch(q, fieldName); | ||
419 | 289 | assert(matches.size() >= 1); | ||
420 | 127 | assert(matches.getDocumentID(0) == correct); | 290 | assert(matches.getDocumentID(0) == correct); |
421 | 128 | } | 291 | } |
422 | 129 | 292 | ||
423 | @@ -132,6 +295,13 @@ | |||
424 | 132 | testMatcher(); | 295 | testMatcher(); |
425 | 133 | testRelevancy(); | 296 | testRelevancy(); |
426 | 134 | testMultiWord(); | 297 | testMultiWord(); |
427 | 298 | testSentence(); | ||
428 | 299 | testExactOrder(); | ||
429 | 300 | testSmallestMatch(); | ||
430 | 301 | noCommonMatch(); | ||
431 | 302 | emptyMatch(); | ||
432 | 303 | testMatchCount(); | ||
433 | 304 | testPerfect(); | ||
434 | 135 | } catch(const std::exception &e) { | 305 | } catch(const std::exception &e) { |
435 | 136 | fprintf(stderr, "Fail: %s\n", e.what()); | 306 | fprintf(stderr, "Fail: %s\n", e.what()); |
436 | 137 | return 666; | 307 | return 666; |
437 | 138 | 308 | ||
438 | === modified file 'test/TrieTest.cc' | |||
439 | --- test/TrieTest.cc 2013-04-03 13:50:54 +0000 | |||
440 | +++ test/TrieTest.cc 2014-02-28 14:46:51 +0000 | |||
441 | @@ -46,10 +46,28 @@ | |||
442 | 46 | assert(result == w2); | 46 | assert(result == w2); |
443 | 47 | } | 47 | } |
444 | 48 | 48 | ||
445 | 49 | void testHas() { | ||
446 | 50 | Trie t; | ||
447 | 51 | Word w1("abc"); | ||
448 | 52 | Word w2("abd"); | ||
449 | 53 | Word w3("a"); | ||
450 | 54 | Word w4("x"); | ||
451 | 55 | Word result; | ||
452 | 56 | |||
453 | 57 | WordID i1 = 1; | ||
454 | 58 | |||
455 | 59 | assert(t.numWords() == 0); | ||
456 | 60 | t.insertWord(w1, i1); | ||
457 | 61 | assert(t.hasWord(w1)); | ||
458 | 62 | assert(!t.hasWord(w2)); | ||
459 | 63 | assert(!t.hasWord(w3)); | ||
460 | 64 | assert(!t.hasWord(w4)); | ||
461 | 65 | } | ||
462 | 49 | 66 | ||
463 | 50 | int main(int /*argc*/, char **/*argv*/) { | 67 | int main(int /*argc*/, char **/*argv*/) { |
464 | 51 | // Move basic tests from levtrietest here. | 68 | // Move basic tests from levtrietest here. |
465 | 52 | testWordBuilding(); | 69 | testWordBuilding(); |
466 | 70 | testHas(); | ||
467 | 53 | return 0; | 71 | return 0; |
468 | 54 | } | 72 | } |
469 | 55 | 73 |
Requests:
* Increment the debian version number
* Add some more "save foo" noise (before and after "save") to the save test