Merge lp:~jpakkane/libcolumbus/hud-rework into lp:libcolumbus

Proposed by Jussi Pakkanen
Status: Merged
Approved by: Pete Woods
Approved revision: 485
Merged at revision: 461
Proposed branch: lp:~jpakkane/libcolumbus/hud-rework
Merge into: lp:libcolumbus
Diff against target: 468 lines (+291/-19)
9 files modified
CMakeLists.txt (+1/-1)
debian/changelog (+6/-0)
include/Matcher.hh (+11/-0)
include/WordStore.hh (+1/-0)
src/MatchResults.cc (+4/-1)
src/Matcher.cc (+71/-12)
src/WordStore.cc (+8/-4)
test/MatcherTest.cc (+171/-1)
test/TrieTest.cc (+18/-0)
To merge this branch: bzr merge lp:~jpakkane/libcolumbus/hud-rework
Reviewer Review Type Date Requested Status
Pete Woods (community) Approve
PS Jenkins bot (community) continuous-integration Approve
Review via email: mp+208799@code.launchpad.net

Commit message

Add new online search mode to get better performance in HUD.

Description of the change

Add new online search mode to get better performance in HUD.

To post a comment you must log in.
lp:~jpakkane/libcolumbus/hud-rework updated
483. By Jussi Pakkanen

Bumped version number.

Revision history for this message
Pete Woods (pete-woods) wrote :

Requests:

* Increment the debian version number
* Add some more "save foo" noise (before and after "save") to the save test

Revision history for this message
PS Jenkins bot (ps-jenkins) wrote :
review: Approve (continuous-integration)
lp:~jpakkane/libcolumbus/hud-rework updated
484. By Jussi Pakkanen

Some test cleanup.

Revision history for this message
Jussi Pakkanen (jpakkane) wrote :

Fixed.

Revision history for this message
PS Jenkins bot (ps-jenkins) wrote :
review: Needs Fixing (continuous-integration)
lp:~jpakkane/libcolumbus/hud-rework updated
485. By Jussi Pakkanen

Blah.

Revision history for this message
PS Jenkins bot (ps-jenkins) wrote :
review: Approve (continuous-integration)
Revision history for this message
Pete Woods (pete-woods) wrote :

Looks good to me! Tried with the dependent branch of HUD (lp:~pete-woods/hud/tweak-search-parameters).

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'CMakeLists.txt'
--- CMakeLists.txt 2013-08-09 19:35:42 +0000
+++ CMakeLists.txt 2014-02-28 14:46:51 +0000
@@ -38,7 +38,7 @@
38set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")38set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
3939
40set(SO_VERSION_MAJOR "1")40set(SO_VERSION_MAJOR "1")
41set(SO_VERSION_MINOR "0")41set(SO_VERSION_MINOR "1")
42set(SO_VERSION_PATCH "0")42set(SO_VERSION_PATCH "0")
4343
44set(SO_VERSION "${SO_VERSION_MAJOR}.${SO_VERSION_MINOR}.${SO_VERSION_PATCH}")44set(SO_VERSION "${SO_VERSION_MAJOR}.${SO_VERSION_MINOR}.${SO_VERSION_PATCH}")
4545
=== modified file 'debian/changelog'
--- debian/changelog 2014-01-20 19:43:49 +0000
+++ debian/changelog 2014-02-28 14:46:51 +0000
@@ -1,3 +1,9 @@
1libcolumbus (1.1.0-0ubuntu1) UNRELEASED; urgency=medium
2
3 * New online search mode
4
5 -- Jussi Pakkanen <jussi.pakkanen@ubuntu.com> Fri, 28 Feb 2014 15:44:25 +0200
6
1libcolumbus (1.0.0+14.04.20140120-0ubuntu1) trusty; urgency=low7libcolumbus (1.0.0+14.04.20140120-0ubuntu1) trusty; urgency=low
28
3 * New rebuild forced9 * New rebuild forced
410
=== modified file 'include/Matcher.hh'
--- include/Matcher.hh 2013-08-07 12:20:52 +0000
+++ include/Matcher.hh 2014-02-28 14:46:51 +0000
@@ -60,6 +60,17 @@
60 void index(const Corpus &c);60 void index(const Corpus &c);
61 ErrorValues& getErrorValues();61 ErrorValues& getErrorValues();
62 IndexWeights& getIndexWeights();62 IndexWeights& getIndexWeights();
63 /*
64 * This function is optimized for online matches, that is, queries
65 * that are live updated during typing. It uses slightly different
66 * search heuristics to ensure results that "feel good" to humans.
67 *
68 * The second argument is the field that should be the primary focus.
69 * Usually it means having the text that will be shown to the user.
70 * As an example, in the HUD, this field would contain the command
71 * (and nothing else) that will be executed.
72 */
73 MatchResults onlineMatch(const WordList &query, const Word &primaryIndex);
63};74};
6475
65COL_NAMESPACE_END76COL_NAMESPACE_END
6677
=== modified file 'include/WordStore.hh'
--- include/WordStore.hh 2013-06-14 12:26:10 +0000
+++ include/WordStore.hh 2014-02-28 14:46:51 +0000
@@ -49,6 +49,7 @@
4949
5050
51 WordID getID(const Word &w);51 WordID getID(const Word &w);
52 bool hasWord(const Word &w) const;
52 Word getWord(const WordID id) const;53 Word getWord(const WordID id) const;
53 bool hasWord(const WordID id) const;54 bool hasWord(const WordID id) const;
54};55};
5556
=== modified file 'src/MatchResults.cc'
--- src/MatchResults.cc 2013-06-20 12:25:11 +0000
+++ src/MatchResults.cc 2014-02-28 14:46:51 +0000
@@ -96,7 +96,10 @@
96 if(p->sorted)96 if(p->sorted)
97 return;97 return;
98 MatchResults *me = const_cast<MatchResults*>(this);98 MatchResults *me = const_cast<MatchResults*>(this);
99 sort(me->p->results.rbegin(), me->p->results.rend());99 stable_sort(me->p->results.rbegin(), me->p->results.rend(),
100 [](const pair<double, DocumentID> &a, const pair<double, DocumentID> &b) -> bool{
101 return a.first < b.first;
102 });
100 me->p->sorted = true;103 me->p->sorted = true;
101}104}
102105
103106
=== modified file 'src/Matcher.cc'
--- src/Matcher.cc 2013-08-07 12:20:52 +0000
+++ src/Matcher.cc 2014-02-28 14:46:51 +0000
@@ -36,6 +36,7 @@
36#include <stdexcept>36#include <stdexcept>
37#include <map>37#include <map>
38#include <vector>38#include <vector>
39#include <algorithm>
3940
40#ifdef HAS_SPARSE_HASH41#ifdef HAS_SPARSE_HASH
41#include <google/sparse_hash_map>42#include <google/sparse_hash_map>
@@ -98,6 +99,7 @@
98 IndexWeights weights;99 IndexWeights weights;
99 MatcherStatistics stats;100 MatcherStatistics stats;
100 WordStore store;101 WordStore store;
102 map<pair<DocumentID, WordID>, size_t> originalSizes; // Lengths of original documents.
101};103};
102104
103void ReverseIndex::add(const WordID wordID, const WordID indexID, const DocumentID id) {105void ReverseIndex::add(const WordID wordID, const WordID indexID, const DocumentID id) {
@@ -230,15 +232,6 @@
230 }232 }
231}233}
232234
233static void expandQuery(const WordList &query, WordList &expandedQuery) {
234 for(size_t i=0; i<query.size(); i++)
235 expandedQuery.addWord(query[i]);
236
237 for(size_t i=0; i<query.size()-1; i++) {
238 expandedQuery.addWord(query[i].join(query[i+1]));
239 }
240}
241
242static bool subtermsMatch(MatcherPrivate *p, const ResultFilter &filter, size_t term, DocumentID id) {235static bool subtermsMatch(MatcherPrivate *p, const ResultFilter &filter, size_t term, DocumentID id) {
243 for(size_t subTerm=0; subTerm < filter.numSubTerms(term); subTerm++) {236 for(size_t subTerm=0; subTerm < filter.numSubTerms(term); subTerm++) {
244 const Word &filterName = filter.getField(term, subTerm);237 const Word &filterName = filter.getField(term, subTerm);
@@ -286,6 +279,10 @@
286 const Word &fieldName = textNames[ti];279 const Word &fieldName = textNames[ti];
287 const WordID fieldID = p->store.getID(fieldName);280 const WordID fieldID = p->store.getID(fieldName);
288 const WordList &text = d.getText(fieldName);281 const WordList &text = d.getText(fieldName);
282 pair<DocumentID, WordID> lengths;
283 lengths.first = d.getID();
284 lengths.second = fieldID;
285 p->originalSizes[lengths] = text.size();
289 for(size_t wi=0; wi<text.size(); wi++) {286 for(size_t wi=0; wi<text.size(); wi++) {
290 const Word &word = text[wi];287 const Word &word = text[wi];
291 const WordID wordID = p->store.getID(word);288 const WordID wordID = p->store.getID(word);
@@ -336,16 +333,14 @@
336 const int maxIterations = 1;333 const int maxIterations = 1;
337 const int increment = LevenshteinIndex::getDefaultError();334 const int increment = LevenshteinIndex::getDefaultError();
338 const size_t minMatches = 10;335 const size_t minMatches = 10;
339 WordList expandedQuery;
340 MatchResults allMatches;336 MatchResults allMatches;
341337
342 if(query.size() == 0)338 if(query.size() == 0)
343 return matchedDocuments;339 return matchedDocuments;
344 expandQuery(query, expandedQuery);
345 // Try to search with ever growing error until we find enough matches.340 // Try to search with ever growing error until we find enough matches.
346 for(int i=0; i<maxIterations; i++) {341 for(int i=0; i<maxIterations; i++) {
347 MatchResults matches;342 MatchResults matches;
348 matchWithRelevancy(expandedQuery, params, i*increment, matches);343 matchWithRelevancy(query, params, i*increment, matches);
349 if(matches.size() >= minMatches || i == maxIterations-1) {344 if(matches.size() >= minMatches || i == maxIterations-1) {
350 allMatches.addResults(matches);345 allMatches.addResults(matches);
351 break;346 break;
@@ -392,5 +387,69 @@
392 return p->weights;387 return p->weights;
393}388}
394389
390static map<DocumentID, size_t> countExacts(MatcherPrivate *p, const WordList &query, const WordID indexID) {
391 map<DocumentID, size_t> matchCounts;
392 for(size_t i=0; i<query.size(); i++) {
393 const Word &w = query[i];
394 if(w.length() == 0 || !p->store.hasWord(w)) {
395 continue;
396 }
397 WordID curWord = p->store.getID(w);
398 vector<DocumentID> exacts;
399 p->reverseIndex.findDocuments(curWord, indexID, exacts);
400 for(const auto &i : exacts) {
401 matchCounts[i]++; // Default is zero initialisation.
402 }
403 }
404 return matchCounts;
405}
406
407struct DocCount {
408 DocumentID id;
409 size_t matches;
410};
411
412MatchResults Matcher::onlineMatch(const WordList &query, const Word &primaryIndex) {
413 MatchResults results;
414 set<DocumentID> exactMatched;
415 map<DocumentID, double> accumulator;
416 if(!p->store.hasWord(primaryIndex)) {
417 string msg("Index named ");
418 msg += primaryIndex.asUtf8();
419 msg += " is not known";
420 throw invalid_argument(msg);
421 }
422 WordID indexID = p->store.getID(primaryIndex);
423 // How many times each document matched with zero error.
424 vector<DocCount> stats;
425 for(const auto &i : countExacts(p, query, indexID)) {
426 DocCount c;
427 pair<DocumentID, WordID> key;
428 exactMatched.insert(i.first);
429 key.first = i.first;
430 key.second = indexID;
431 c.id = i.first;
432 c.matches = i.second;
433 stats.push_back(c);
434 }
435 for(const auto &i: stats) {
436 accumulator[i.id] = 2*i.matches;
437 if(i.matches == query.size()
438 && i.matches == p->originalSizes[make_pair(i.id, indexID)]) { // Perfect match.
439 accumulator[i.id] += 100;
440 }
441 }
442 // Merge in fuzzy matches.
443 MatchResults fuzzyResults = match(query);
444 for(size_t i = 0; i<fuzzyResults.size(); i++) {
445 DocumentID docid = fuzzyResults.getDocumentID(i);
446 accumulator[docid] += fuzzyResults.getRelevancy(i);
447 }
448 for(const auto &i : accumulator) {
449 results.addResult(i.first, i.second);
450 }
451 return results;
452}
453
395COL_NAMESPACE_END454COL_NAMESPACE_END
396455
397456
=== modified file 'src/WordStore.cc'
--- src/WordStore.cc 2013-01-31 10:23:45 +0000
+++ src/WordStore.cc 2014-02-28 14:46:51 +0000
@@ -53,15 +53,19 @@
53}53}
5454
55WordID WordStore::getID(const Word &w) {55WordID WordStore::getID(const Word &w) {
56 TrieOffset node = p->words.findWord(w);56 if(p->words.hasWord(w)) {
57 if(node)57 return p->words.getWordID(p->words.findWord(w));
58 return p->words.getWordID(node);58 }
59 node = p->words.insertWord(w, p->wordIndex.size());59 TrieOffset node = p->words.insertWord(w, p->wordIndex.size());
60 p->wordIndex.push_back(node);60 p->wordIndex.push_back(node);
61 WordID result = p->wordIndex.size()-1;61 WordID result = p->wordIndex.size()-1;
62 return result;62 return result;
63}63}
6464
65bool WordStore::hasWord(const Word &w) const {
66 return p->words.hasWord(w);
67}
68
65Word WordStore::getWord(const WordID id) const {69Word WordStore::getWord(const WordID id) const {
66 if(!hasWord(id)) {70 if(!hasWord(id)) {
67 throw out_of_range("Tried to access non-existing WordID in WordStore.");71 throw out_of_range("Tried to access non-existing WordID in WordStore.");
6872
=== modified file 'test/MatcherTest.cc'
--- test/MatcherTest.cc 2013-06-20 12:10:40 +0000
+++ test/MatcherTest.cc 2014-02-28 14:46:51 +0000
@@ -23,6 +23,7 @@
23#include "WordList.hh"23#include "WordList.hh"
24#include "Document.hh"24#include "Document.hh"
25#include "MatchResults.hh"25#include "MatchResults.hh"
26#include "ColumbusHelpers.hh"
26#include <cassert>27#include <cassert>
2728
28using namespace Columbus;29using namespace Columbus;
@@ -123,7 +124,169 @@
123 c.addDocument(d2);124 c.addDocument(d2);
124 m.index(c);125 m.index(c);
125126
126 matches = m.match("Sara Michell Geller");127 matches = m.match("Sari Michell Geller");
128 assert(matches.getDocumentID(0) == correct);
129}
130
131void testSentence() {
132 Corpus c;
133 DocumentID correct = 1;
134 DocumentID wrong = 0;
135 Document d1(correct);
136 Document d2(wrong);
137 Word fieldName("name");
138 Word secondName("context");
139 Matcher m;
140 MatchResults matches;
141
142 d1.addText(fieldName, "Fit Canvas to Layers");
143 d1.addText(secondName, "View Zoom (100%)");
144 d2.addText(fieldName, "Fit image in Window");
145 d2.addText(secondName, "Image");
146
147 c.addDocument(d1);
148 c.addDocument(d2);
149
150 m.index(c);
151 matches = m.match("fit canvas to layers");
152 assert(matches.getDocumentID(0) == correct);
153}
154
155void testExactOrder() {
156 Corpus c;
157 DocumentID correct = 1;
158 DocumentID wrong = 0;
159 DocumentID moreWrong = 100;
160 Document d1(correct);
161 Document d2(wrong);
162 Document d3(moreWrong);
163 Word fieldName("name");
164 Word secondName("context");
165 Matcher m;
166 MatchResults matches;
167 WordList q = splitToWords("fit canvas to layers");
168 d1.addText(fieldName, "Fit Canvas to Layers");
169 d1.addText(secondName, "View Zoom (100%)");
170 d2.addText(fieldName, "Fit image in Window");
171 d2.addText(secondName, "Image");
172 d3.addText(fieldName, "Not matching.");
173 d3.addText(secondName, "fit canvas to layers");
174 c.addDocument(d1);
175 c.addDocument(d2);
176 c.addDocument(d3);
177
178 m.index(c);
179 matches = m.onlineMatch(q, fieldName);
180 assert(matches.size() >= 1);
181 assert(matches.getDocumentID(0) == correct);
182}
183
184void testSmallestMatch() {
185 Corpus c;
186 DocumentID correct = 1;
187 DocumentID wrong = 0;
188 Document d1(correct);
189 Document d2(wrong);
190 Word fieldName("name");
191 Word field2("dummy");
192 Matcher m;
193 MatchResults matches;
194 WordList q = splitToWords("save");
195 d1.addText(fieldName, "save");
196 d1.addText(field2, "lots of text to ensure statistics of this field are ignored");
197 d2.addText(fieldName, "save as");
198 c.addDocument(d1);
199 c.addDocument(d2);
200
201 m.index(c);
202 matches = m.onlineMatch(q, fieldName);
203 assert(matches.size() == 2);
204 assert(matches.getDocumentID(0) == correct);
205}
206
207void noCommonMatch() {
208 Corpus c;
209 DocumentID correct = 1;
210 Document d1(correct);
211 Word fieldName("name");
212 Word field2("dummy");
213 Matcher m;
214 MatchResults matches;
215 WordList q = splitToWords("fit canvas to selection");
216 d1.addText(fieldName, "Preparing your Images for the Web");
217 d1.addText(fieldName, "Help user manual");
218 c.addDocument(d1);
219
220 m.index(c);
221 matches = m.onlineMatch(q, fieldName);
222 assert(matches.size() == 0);
223}
224
225void emptyMatch() {
226 Corpus c;
227 DocumentID correct = 1;
228 Document d1(correct);
229 Word fieldName("name");
230 Word field2("dummy");
231 Matcher m;
232 MatchResults matches;
233 WordList q;
234 d1.addText(fieldName, "Preparing your Images for the Web");
235 d1.addText(fieldName, "Help user manual");
236 c.addDocument(d1);
237
238 m.index(c);
239 matches = m.onlineMatch(q, fieldName);
240 assert(matches.size() == 0);
241}
242
243void testMatchCount() {
244 Corpus c;
245 DocumentID correct = 1;
246 DocumentID wrong = 0;
247 Document d1(correct);
248 Document d2(wrong);
249 Word fieldName("name");
250 Word secondName("context");
251 Matcher m;
252 MatchResults matches;
253 WordList q = splitToWords("fit canvas to selection");
254 d1.addText(fieldName, "Fit Canvas to Layers");
255 d1.addText(secondName, "View Zoom (100%)");
256 d2.addText(fieldName, "Selection editor");
257 d2.addText(secondName, "Windows dockable dialogs");
258 c.addDocument(d1);
259 c.addDocument(d2);
260
261 m.index(c);
262 matches = m.onlineMatch(q, fieldName);
263 assert(matches.size() == 2);
264 assert(matches.getDocumentID(0) == correct);
265}
266
267void testPerfect() {
268 Corpus c;
269 DocumentID correct = 0;
270 Document d1(1);
271 Document d2(correct);
272 Document d3(2);
273 Document d4(3);
274 Word fieldName("name");
275 Matcher m;
276 MatchResults matches;
277 WordList q = splitToWords("save");
278 d1.addText(fieldName, "Save as");
279 d2.addText(fieldName, "Save");
280 d3.addText(fieldName, "Save yourself");
281 d4.addText(fieldName, "Save the whales");
282 c.addDocument(d1);
283 c.addDocument(d2);
284 c.addDocument(d3);
285 c.addDocument(d4);
286
287 m.index(c);
288 matches = m.onlineMatch(q, fieldName);
289 assert(matches.size() >= 1);
127 assert(matches.getDocumentID(0) == correct);290 assert(matches.getDocumentID(0) == correct);
128}291}
129292
@@ -132,6 +295,13 @@
132 testMatcher();295 testMatcher();
133 testRelevancy();296 testRelevancy();
134 testMultiWord();297 testMultiWord();
298 testSentence();
299 testExactOrder();
300 testSmallestMatch();
301 noCommonMatch();
302 emptyMatch();
303 testMatchCount();
304 testPerfect();
135 } catch(const std::exception &e) {305 } catch(const std::exception &e) {
136 fprintf(stderr, "Fail: %s\n", e.what());306 fprintf(stderr, "Fail: %s\n", e.what());
137 return 666;307 return 666;
138308
=== modified file 'test/TrieTest.cc'
--- test/TrieTest.cc 2013-04-03 13:50:54 +0000
+++ test/TrieTest.cc 2014-02-28 14:46:51 +0000
@@ -46,10 +46,28 @@
46 assert(result == w2);46 assert(result == w2);
47}47}
4848
49void testHas() {
50 Trie t;
51 Word w1("abc");
52 Word w2("abd");
53 Word w3("a");
54 Word w4("x");
55 Word result;
56
57 WordID i1 = 1;
58
59 assert(t.numWords() == 0);
60 t.insertWord(w1, i1);
61 assert(t.hasWord(w1));
62 assert(!t.hasWord(w2));
63 assert(!t.hasWord(w3));
64 assert(!t.hasWord(w4));
65}
4966
50int main(int /*argc*/, char **/*argv*/) {67int main(int /*argc*/, char **/*argv*/) {
51 // Move basic tests from levtrietest here.68 // Move basic tests from levtrietest here.
52 testWordBuilding();69 testWordBuilding();
70 testHas();
53 return 0;71 return 0;
54}72}
5573

Subscribers

People subscribed via source and target branches

to all changes: