Merge lp:~mhr3/zeitgeist/fts-extras into lp:~zeitgeist/zeitgeist/bluebird
- fts-extras
- Merge into bluebird
Proposed by
Michal Hruby
Status: | Merged |
---|---|
Merged at revision: | 391 |
Proposed branch: | lp:~mhr3/zeitgeist/fts-extras |
Merge into: | lp:~zeitgeist/zeitgeist/bluebird |
Prerequisite: | lp:~zeitgeist/zeitgeist/fts++ |
Diff against target: |
1057 lines (+678/-45) 15 files modified
configure.ac (+37/-0) extensions/fts++/Makefile.am (+5/-0) extensions/fts++/fts.cpp (+30/-0) extensions/fts++/fts.h (+13/-0) extensions/fts++/fts.vapi (+10/-0) extensions/fts++/indexer.cpp (+210/-40) extensions/fts++/indexer.h (+16/-1) extensions/fts++/stringutils.cpp (+87/-1) extensions/fts++/stringutils.h (+8/-0) extensions/fts++/test/Makefile.am (+5/-0) extensions/fts++/test/test-indexer.cpp (+89/-0) extensions/fts++/test/test-stringutils.cpp (+95/-0) extensions/fts++/zeitgeist-fts.vala (+17/-0) extensions/fts.vala (+49/-3) src/remote.vala (+7/-0) |
To merge this branch: | bzr merge lp:~mhr3/zeitgeist/fts-extras |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Siegfried Gevatter | Approve | ||
Review via email: mp+92430@code.launchpad.net |
Commit message
Description of the change
Adds a few more extra features to FTS.
To post a comment you must log in.
lp:~mhr3/zeitgeist/fts-extras
updated
- 438. By Michal Hruby
-
Lower prio of the timeout source
- 439. By Michal Hruby
-
Add more string utils
- 440. By Michal Hruby
-
Preprocess everything we index
- 441. By Michal Hruby
-
Few more fixes
- 442. By Michal Hruby
-
Add more tests
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'configure.ac' | |||
2 | --- configure.ac 2012-02-08 18:54:58 +0000 | |||
3 | +++ configure.ac 2012-02-10 12:11:19 +0000 | |||
4 | @@ -40,6 +40,30 @@ | |||
5 | 40 | AC_SUBST(ZEITGEIST_LIBS) | 40 | AC_SUBST(ZEITGEIST_LIBS) |
6 | 41 | 41 | ||
7 | 42 | ################################################# | 42 | ################################################# |
8 | 43 | # Dee-ICU check | ||
9 | 44 | ################################################# | ||
10 | 45 | DEE_ICU_REQUIRED=1.0.2 | ||
11 | 46 | |||
12 | 47 | AC_ARG_WITH([dee-icu], | ||
13 | 48 | AS_HELP_STRING([--with-dee-icu[=@<:@no/auto/yes@:>@]], | ||
14 | 49 | [Build the FTS extension with dee-icu]), | ||
15 | 50 | [with_dee_icu=$withval], | ||
16 | 51 | [with_dee_icu="auto"]) | ||
17 | 52 | |||
18 | 53 | if test "x$with_dee_icu" = "xauto" ; then | ||
19 | 54 | PKG_CHECK_EXISTS([dee-icu-1.0 >= $DEE_ICU_REQUIRED], | ||
20 | 55 | with_dee_icu="yes", | ||
21 | 56 | with_dee_icu="no") | ||
22 | 57 | fi | ||
23 | 58 | |||
24 | 59 | if test "x$with_dee_icu" = "xyes" ; then | ||
25 | 60 | PKG_CHECK_MODULES(DEE_ICU, dee-icu-1.0 >= $DEE_ICU_REQUIRED) | ||
26 | 61 | AC_DEFINE(HAVE_DEE_ICU, 1, [Have dee-icu]) | ||
27 | 62 | fi | ||
28 | 63 | |||
29 | 64 | AM_CONDITIONAL(HAVE_DEE_ICU, test "x$with_dee_icu" = "xyes") | ||
30 | 65 | |||
31 | 66 | ################################################# | ||
32 | 43 | # DBus service | 67 | # DBus service |
33 | 44 | ################################################# | 68 | ################################################# |
34 | 45 | 69 | ||
35 | @@ -88,3 +112,16 @@ | |||
36 | 88 | fi | 112 | fi |
37 | 89 | 113 | ||
38 | 90 | AC_OUTPUT | 114 | AC_OUTPUT |
39 | 115 | |||
40 | 116 | cat <<EOF | ||
41 | 117 | |||
42 | 118 | ${PACKAGE}-${VERSION} | ||
43 | 119 | |||
44 | 120 | Build Environment | ||
45 | 121 | Install Prefix: ${prefix} | ||
46 | 122 | |||
47 | 123 | Optional dependencies | ||
48 | 124 | dee-icu: ${with_dee_icu} | ||
49 | 125 | |||
50 | 126 | EOF | ||
51 | 127 | |||
52 | 91 | 128 | ||
53 | === modified file 'extensions/fts++/Makefile.am' | |||
54 | --- extensions/fts++/Makefile.am 2012-02-08 18:54:58 +0000 | |||
55 | +++ extensions/fts++/Makefile.am 2012-02-10 12:11:19 +0000 | |||
56 | @@ -76,6 +76,11 @@ | |||
57 | 76 | -lxapian \ | 76 | -lxapian \ |
58 | 77 | $(NULL) | 77 | $(NULL) |
59 | 78 | 78 | ||
60 | 79 | if HAVE_DEE_ICU | ||
61 | 80 | AM_CPPFLAGS += $(DEE_ICU_CFLAGS) | ||
62 | 81 | zeitgeist_fts_LDADD += $(DEE_ICU_LIBS) | ||
63 | 82 | endif | ||
64 | 83 | |||
65 | 79 | BUILT_SOURCES = \ | 84 | BUILT_SOURCES = \ |
66 | 80 | zeitgeist-internal.stamp \ | 85 | zeitgeist-internal.stamp \ |
67 | 81 | zeitgeist-fts_vala.stamp \ | 86 | zeitgeist-fts_vala.stamp \ |
68 | 82 | 87 | ||
69 | === modified file 'extensions/fts++/fts.cpp' | |||
70 | --- extensions/fts++/fts.cpp 2012-02-09 09:32:33 +0000 | |||
71 | +++ extensions/fts++/fts.cpp 2012-02-10 12:11:19 +0000 | |||
72 | @@ -84,6 +84,36 @@ | |||
73 | 84 | return results; | 84 | return results; |
74 | 85 | } | 85 | } |
75 | 86 | 86 | ||
76 | 87 | GPtrArray* | ||
77 | 88 | zeitgeist_indexer_search_with_relevancies (ZeitgeistIndexer *indexer, | ||
78 | 89 | const gchar *search_string, | ||
79 | 90 | ZeitgeistTimeRange *time_range, | ||
80 | 91 | GPtrArray *templates, | ||
81 | 92 | guint offset, | ||
82 | 93 | guint count, | ||
83 | 94 | ZeitgeistResultType result_type, | ||
84 | 95 | gdouble **relevancies, | ||
85 | 96 | gint *relevancies_size, | ||
86 | 97 | guint *matches, | ||
87 | 98 | GError **error) | ||
88 | 99 | { | ||
89 | 100 | GPtrArray *results; | ||
90 | 101 | ZeitgeistFTS::Controller *_indexer; | ||
91 | 102 | |||
92 | 103 | g_return_val_if_fail (indexer != NULL, NULL); | ||
93 | 104 | g_return_val_if_fail (search_string != NULL, NULL); | ||
94 | 105 | g_return_val_if_fail (ZEITGEIST_IS_TIME_RANGE (time_range), NULL); | ||
95 | 106 | g_return_val_if_fail (error == NULL || *error == NULL, NULL); | ||
96 | 107 | |||
97 | 108 | _indexer = (ZeitgeistFTS::Controller*) indexer; | ||
98 | 109 | |||
99 | 110 | results = _indexer->indexer->SearchWithRelevancies ( | ||
100 | 111 | search_string, time_range, templates, offset, count, result_type, | ||
101 | 112 | relevancies, relevancies_size, matches, error); | ||
102 | 113 | |||
103 | 114 | return results; | ||
104 | 115 | } | ||
105 | 116 | |||
106 | 87 | void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer, | 117 | void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer, |
107 | 88 | GPtrArray *events) | 118 | GPtrArray *events) |
108 | 89 | { | 119 | { |
109 | 90 | 120 | ||
110 | === modified file 'extensions/fts++/fts.h' | |||
111 | --- extensions/fts++/fts.h 2012-02-09 09:32:33 +0000 | |||
112 | +++ extensions/fts++/fts.h 2012-02-10 12:11:19 +0000 | |||
113 | @@ -43,6 +43,19 @@ | |||
114 | 43 | guint *matches, | 43 | guint *matches, |
115 | 44 | GError **error); | 44 | GError **error); |
116 | 45 | 45 | ||
117 | 46 | GPtrArray* zeitgeist_indexer_search_with_relevancies | ||
118 | 47 | (ZeitgeistIndexer *indexer, | ||
119 | 48 | const gchar *search_string, | ||
120 | 49 | ZeitgeistTimeRange *time_range, | ||
121 | 50 | GPtrArray *templates, | ||
122 | 51 | guint offset, | ||
123 | 52 | guint count, | ||
124 | 53 | ZeitgeistResultType result_type, | ||
125 | 54 | gdouble **relevancies, | ||
126 | 55 | gint *relevancies_size, | ||
127 | 56 | guint *matches, | ||
128 | 57 | GError **error); | ||
129 | 58 | |||
130 | 46 | void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer, | 59 | void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer, |
131 | 47 | GPtrArray *events); | 60 | GPtrArray *events); |
132 | 48 | 61 | ||
133 | 49 | 62 | ||
134 | === modified file 'extensions/fts++/fts.vapi' | |||
135 | --- extensions/fts++/fts.vapi 2012-02-07 17:02:30 +0000 | |||
136 | +++ extensions/fts++/fts.vapi 2012-02-10 12:11:19 +0000 | |||
137 | @@ -14,6 +14,16 @@ | |||
138 | 14 | ResultType result_type, | 14 | ResultType result_type, |
139 | 15 | out uint matches) throws GLib.Error; | 15 | out uint matches) throws GLib.Error; |
140 | 16 | 16 | ||
141 | 17 | public GLib.GenericArray<Event> search_with_relevancies ( | ||
142 | 18 | string search_string, | ||
143 | 19 | TimeRange time_range, | ||
144 | 20 | GLib.GenericArray<Event> templates, | ||
145 | 21 | uint offset, | ||
146 | 22 | uint count, | ||
147 | 23 | ResultType result_type, | ||
148 | 24 | out double[] relevancies, | ||
149 | 25 | out uint matches) throws GLib.Error; | ||
150 | 26 | |||
151 | 17 | public void index_events (GLib.GenericArray<Event> events); | 27 | public void index_events (GLib.GenericArray<Event> events); |
152 | 18 | 28 | ||
153 | 19 | public void delete_events (uint[] event_ids); | 29 | public void delete_events (uint[] event_ids); |
154 | 20 | 30 | ||
155 | === modified file 'extensions/fts++/indexer.cpp' | |||
156 | --- extensions/fts++/indexer.cpp 2012-02-09 09:37:48 +0000 | |||
157 | +++ extensions/fts++/indexer.cpp 2012-02-10 12:11:19 +0000 | |||
158 | @@ -356,10 +356,40 @@ | |||
159 | 356 | } | 356 | } |
160 | 357 | } | 357 | } |
161 | 358 | 358 | ||
162 | 359 | std::string Indexer::PreprocessString (std::string const& input) | ||
163 | 360 | { | ||
164 | 361 | if (input.empty ()) return input; | ||
165 | 362 | |||
166 | 363 | std::string result (StringUtils::RemoveUnderscores (input)); | ||
167 | 364 | // a simple heuristic for the uncamelcaser | ||
168 | 365 | size_t num_digits = StringUtils::CountDigits (result); | ||
169 | 366 | if (result.length () > 3 && num_digits < result.length () / 2) | ||
170 | 367 | { | ||
171 | 368 | // FIXME: process digits?, atm they stay attached to the text | ||
172 | 369 | result = StringUtils::UnCamelcase (result); | ||
173 | 370 | } | ||
174 | 371 | |||
175 | 372 | std::string folded (StringUtils::AsciiFold (result)); | ||
176 | 373 | if (!folded.empty ()) | ||
177 | 374 | { | ||
178 | 375 | result += ' '; | ||
179 | 376 | result += folded; | ||
180 | 377 | } | ||
181 | 378 | |||
182 | 379 | #ifdef DEBUG_PREPROCESSING | ||
183 | 380 | if (input != result) | ||
184 | 381 | g_debug ("processed: %s\n-> %s", input.c_str (), result.c_str ()); | ||
185 | 382 | #endif | ||
186 | 383 | |||
187 | 384 | return result; | ||
188 | 385 | } | ||
189 | 386 | |||
190 | 359 | void Indexer::IndexText (std::string const& text) | 387 | void Indexer::IndexText (std::string const& text) |
191 | 360 | { | 388 | { |
192 | 361 | // FIXME: ascii folding! | ||
193 | 362 | tokenizer->index_text (text, 5); | 389 | tokenizer->index_text (text, 5); |
194 | 390 | // this is by definition already a human readable display string, | ||
195 | 391 | // so it shouldn't need removal of underscores and uncamelcase | ||
196 | 392 | tokenizer->index_text (StringUtils::AsciiFold (text), 5); | ||
197 | 363 | } | 393 | } |
198 | 364 | 394 | ||
199 | 365 | void Indexer::IndexUri (std::string const& uri, std::string const& origin) | 395 | void Indexer::IndexUri (std::string const& uri, std::string const& origin) |
200 | @@ -403,9 +433,10 @@ | |||
201 | 403 | gchar *pn = g_file_get_parse_name (f); | 433 | gchar *pn = g_file_get_parse_name (f); |
202 | 404 | gchar *basename = g_path_get_basename (pn); | 434 | gchar *basename = g_path_get_basename (pn); |
203 | 405 | 435 | ||
207 | 406 | // FIXME: remove unscores, CamelCase and process digits | 436 | // remove unscores, CamelCase and process digits |
208 | 407 | tokenizer->index_text (basename, 5); | 437 | std::string processed (PreprocessString (basename)); |
209 | 408 | tokenizer->index_text (basename, 5, "N"); | 438 | tokenizer->index_text (processed, 5); |
210 | 439 | tokenizer->index_text (processed, 5, "N"); | ||
211 | 409 | 440 | ||
212 | 410 | g_free (basename); | 441 | g_free (basename); |
213 | 411 | // limit the directory indexing to just a few levels | 442 | // limit the directory indexing to just a few levels |
214 | @@ -420,17 +451,17 @@ | |||
215 | 420 | g_free (dir); | 451 | g_free (dir); |
216 | 421 | g_free (pn); | 452 | g_free (pn); |
217 | 422 | 453 | ||
219 | 423 | while (path_component.length () > 2 && | 454 | while (path_component.length () > 2 && |
220 | 424 | weight_index < G_N_ELEMENTS (path_weights)) | 455 | weight_index < G_N_ELEMENTS (path_weights)) |
221 | 425 | { | 456 | { |
222 | 426 | // if this is already home directory we don't want it | 457 | // if this is already home directory we don't want it |
225 | 427 | if (path_component.length () == home_dir_path.length () && | 458 | if (path_component == home_dir_path) return; |
224 | 428 | path_component == home_dir_path) return; | ||
226 | 429 | 459 | ||
227 | 430 | gchar *name = g_path_get_basename (path_component.c_str ()); | 460 | gchar *name = g_path_get_basename (path_component.c_str ()); |
228 | 431 | 461 | ||
231 | 432 | // FIXME: un-underscore, uncamelcase, ascii fold | 462 | // un-underscore, uncamelcase, ascii fold |
232 | 433 | tokenizer->index_text (name, path_weights[weight_index++]); | 463 | processed = PreprocessString (name); |
233 | 464 | tokenizer->index_text (processed, path_weights[weight_index++]); | ||
234 | 434 | 465 | ||
235 | 435 | dir = g_path_get_dirname (path_component.c_str ()); | 466 | dir = g_path_get_dirname (path_component.c_str ()); |
236 | 436 | path_component = dir; | 467 | path_component = dir; |
237 | @@ -471,9 +502,10 @@ | |||
238 | 471 | 502 | ||
239 | 472 | if (g_utf8_validate (unescaped_basename, -1, NULL)) | 503 | if (g_utf8_validate (unescaped_basename, -1, NULL)) |
240 | 473 | { | 504 | { |
244 | 474 | // FIXME: remove unscores, CamelCase and process digits | 505 | // remove unscores, CamelCase and process digits |
245 | 475 | tokenizer->index_text (unescaped_basename, 5); | 506 | std::string processed (PreprocessString (unescaped_basename)); |
246 | 476 | tokenizer->index_text (unescaped_basename, 5, "N"); | 507 | tokenizer->index_text (processed, 5); |
247 | 508 | tokenizer->index_text (processed, 5, "N"); | ||
248 | 477 | } | 509 | } |
249 | 478 | 510 | ||
250 | 479 | // and also index hostname (taken from origin field if possible) | 511 | // and also index hostname (taken from origin field if possible) |
251 | @@ -505,6 +537,7 @@ | |||
252 | 505 | { | 537 | { |
253 | 506 | // we *really* don't want to index anything with this scheme | 538 | // we *really* don't want to index anything with this scheme |
254 | 507 | } | 539 | } |
255 | 540 | // how about special casing (s)ftp and ssh? | ||
256 | 508 | else | 541 | else |
257 | 509 | { | 542 | { |
258 | 510 | std::string authority, path, query; | 543 | std::string authority, path, query; |
259 | @@ -593,12 +626,11 @@ | |||
260 | 593 | unsigned name_weight = is_subject ? 5 : 2; | 626 | unsigned name_weight = is_subject ? 5 : 2; |
261 | 594 | unsigned comment_weight = 2; | 627 | unsigned comment_weight = 2; |
262 | 595 | 628 | ||
263 | 596 | // FIXME: ascii folding somewhere | ||
264 | 597 | |||
265 | 598 | val = g_app_info_get_display_name (ai); | 629 | val = g_app_info_get_display_name (ai); |
266 | 599 | if (val && val[0] != '\0') | 630 | if (val && val[0] != '\0') |
267 | 600 | { | 631 | { |
269 | 601 | std::string display_name (val); | 632 | std::string display_name (PreprocessString (val)); |
270 | 633 | |||
271 | 602 | tokenizer->index_text (display_name, name_weight); | 634 | tokenizer->index_text (display_name, name_weight); |
272 | 603 | tokenizer->index_text (display_name, name_weight, "A"); | 635 | tokenizer->index_text (display_name, name_weight, "A"); |
273 | 604 | } | 636 | } |
274 | @@ -606,9 +638,14 @@ | |||
275 | 606 | val = g_desktop_app_info_get_generic_name (dai); | 638 | val = g_desktop_app_info_get_generic_name (dai); |
276 | 607 | if (val && val[0] != '\0') | 639 | if (val && val[0] != '\0') |
277 | 608 | { | 640 | { |
278 | 641 | // this shouldn't need uncamelcasing | ||
279 | 609 | std::string generic_name (val); | 642 | std::string generic_name (val); |
280 | 643 | std::string generic_name_folded (StringUtils::AsciiFold (generic_name)); | ||
281 | 644 | |||
282 | 610 | tokenizer->index_text (generic_name, name_weight); | 645 | tokenizer->index_text (generic_name, name_weight); |
283 | 611 | tokenizer->index_text (generic_name, name_weight, "A"); | 646 | tokenizer->index_text (generic_name, name_weight, "A"); |
284 | 647 | tokenizer->index_text (generic_name_folded, name_weight); | ||
285 | 648 | tokenizer->index_text (generic_name_folded, name_weight, "A"); | ||
286 | 612 | } | 649 | } |
287 | 613 | 650 | ||
288 | 614 | if (!is_subject) return true; | 651 | if (!is_subject) return true; |
289 | @@ -642,7 +679,35 @@ | |||
290 | 642 | return true; | 679 | return true; |
291 | 643 | } | 680 | } |
292 | 644 | 681 | ||
294 | 645 | GPtrArray* Indexer::Search (const gchar *search_string, | 682 | std::string Indexer::CompileQueryString (const gchar *search_string, |
295 | 683 | ZeitgeistTimeRange *time_range, | ||
296 | 684 | GPtrArray *templates) | ||
297 | 685 | { | ||
298 | 686 | std::string query_string (search_string); | ||
299 | 687 | |||
300 | 688 | if (templates && templates->len > 0) | ||
301 | 689 | { | ||
302 | 690 | std::string filters (CompileEventFilterQuery (templates)); | ||
303 | 691 | query_string = "(" + query_string + ") AND (" + filters + ")"; | ||
304 | 692 | } | ||
305 | 693 | |||
306 | 694 | if (time_range) | ||
307 | 695 | { | ||
308 | 696 | gint64 start_time = zeitgeist_time_range_get_start (time_range); | ||
309 | 697 | gint64 end_time = zeitgeist_time_range_get_end (time_range); | ||
310 | 698 | |||
311 | 699 | if (start_time > 0 || end_time < G_MAXINT64) | ||
312 | 700 | { | ||
313 | 701 | std::string time_filter (CompileTimeRangeFilterQuery (start_time, end_time)); | ||
314 | 702 | query_string = "(" + query_string + ") AND (" + time_filter + ")"; | ||
315 | 703 | } | ||
316 | 704 | } | ||
317 | 705 | |||
318 | 706 | g_debug ("query: %s", query_string.c_str ()); | ||
319 | 707 | return query_string; | ||
320 | 708 | } | ||
321 | 709 | |||
322 | 710 | GPtrArray* Indexer::Search (const gchar *search, | ||
323 | 646 | ZeitgeistTimeRange *time_range, | 711 | ZeitgeistTimeRange *time_range, |
324 | 647 | GPtrArray *templates, | 712 | GPtrArray *templates, |
325 | 648 | guint offset, | 713 | guint offset, |
326 | @@ -654,28 +719,22 @@ | |||
327 | 654 | GPtrArray *results = NULL; | 719 | GPtrArray *results = NULL; |
328 | 655 | try | 720 | try |
329 | 656 | { | 721 | { |
352 | 657 | std::string query_string(search_string); | 722 | std::string query_string (CompileQueryString (search, time_range, templates)); |
353 | 658 | 723 | ||
354 | 659 | if (templates && templates->len > 0) | 724 | // When sorting by some result types, we need to fetch some extra events |
355 | 660 | { | 725 | // from the Xapian index because the final result set will be coalesced |
356 | 661 | std::string filters (CompileEventFilterQuery (templates)); | 726 | // on some property of the event |
357 | 662 | query_string = "(" + query_string + ") AND (" + filters + ")"; | 727 | guint maxhits; |
358 | 663 | } | 728 | if (result_type == 100 || |
359 | 664 | 729 | result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS || | |
360 | 665 | if (time_range) | 730 | result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS) |
361 | 666 | { | 731 | { |
362 | 667 | gint64 start_time = zeitgeist_time_range_get_start (time_range); | 732 | maxhits = count; |
363 | 668 | gint64 end_time = zeitgeist_time_range_get_end (time_range); | 733 | } |
364 | 669 | 734 | else | |
365 | 670 | if (start_time > 0 || end_time < G_MAXINT64) | 735 | { |
366 | 671 | { | 736 | maxhits = count * 3; |
367 | 672 | std::string time_filter (CompileTimeRangeFilterQuery (start_time, end_time)); | 737 | } |
346 | 673 | query_string = "(" + query_string + ") AND (" + time_filter + ")"; | ||
347 | 674 | } | ||
348 | 675 | } | ||
349 | 676 | |||
350 | 677 | // FIXME: which result types coalesce? | ||
351 | 678 | guint maxhits = count * 3; | ||
368 | 679 | 738 | ||
369 | 680 | if (result_type == 100) | 739 | if (result_type == 100) |
370 | 681 | { | 740 | { |
371 | @@ -686,7 +745,6 @@ | |||
372 | 686 | enquire->set_sort_by_value (VALUE_TIMESTAMP, true); | 745 | enquire->set_sort_by_value (VALUE_TIMESTAMP, true); |
373 | 687 | } | 746 | } |
374 | 688 | 747 | ||
375 | 689 | g_debug ("query: %s", query_string.c_str ()); | ||
376 | 690 | Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS)); | 748 | Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS)); |
377 | 691 | enquire->set_query (q); | 749 | enquire->set_query (q); |
378 | 692 | Xapian::MSet hits (enquire->get_mset (offset, maxhits)); | 750 | Xapian::MSet hits (enquire->get_mset (offset, maxhits)); |
379 | @@ -753,7 +811,119 @@ | |||
380 | 753 | } | 811 | } |
381 | 754 | catch (Xapian::Error const& e) | 812 | catch (Xapian::Error const& e) |
382 | 755 | { | 813 | { |
384 | 756 | g_warning ("Failed to index event: %s", e.get_msg ().c_str ()); | 814 | g_warning ("Failed to search index: %s", e.get_msg ().c_str ()); |
385 | 815 | g_set_error_literal (error, | ||
386 | 816 | ZEITGEIST_ENGINE_ERROR, | ||
387 | 817 | ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR, | ||
388 | 818 | e.get_msg ().c_str ()); | ||
389 | 819 | } | ||
390 | 820 | |||
391 | 821 | return results; | ||
392 | 822 | } | ||
393 | 823 | |||
394 | 824 | GPtrArray* Indexer::SearchWithRelevancies (const gchar *search, | ||
395 | 825 | ZeitgeistTimeRange *time_range, | ||
396 | 826 | GPtrArray *templates, | ||
397 | 827 | guint offset, | ||
398 | 828 | guint count, | ||
399 | 829 | ZeitgeistResultType result_type, | ||
400 | 830 | gdouble **relevancies, | ||
401 | 831 | gint *relevancies_size, | ||
402 | 832 | guint *matches, | ||
403 | 833 | GError **error) | ||
404 | 834 | { | ||
405 | 835 | GPtrArray *results = NULL; | ||
406 | 836 | try | ||
407 | 837 | { | ||
408 | 838 | std::string query_string (CompileQueryString (search, time_range, templates)); | ||
409 | 839 | |||
410 | 840 | guint maxhits = count; | ||
411 | 841 | |||
412 | 842 | if (result_type == 100) | ||
413 | 843 | { | ||
414 | 844 | enquire->set_sort_by_relevance (); | ||
415 | 845 | } | ||
416 | 846 | else | ||
417 | 847 | { | ||
418 | 848 | enquire->set_sort_by_value (VALUE_TIMESTAMP, true); | ||
419 | 849 | } | ||
420 | 850 | |||
421 | 851 | Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS)); | ||
422 | 852 | enquire->set_query (q); | ||
423 | 853 | Xapian::MSet hits (enquire->get_mset (offset, maxhits)); | ||
424 | 854 | Xapian::doccount hitcount = hits.get_matches_estimated (); | ||
425 | 855 | |||
426 | 856 | if (result_type == 100) | ||
427 | 857 | { | ||
428 | 858 | std::vector<unsigned> event_ids; | ||
429 | 859 | std::vector<gdouble> relevancy_arr; | ||
430 | 860 | Xapian::MSetIterator iter, end; | ||
431 | 861 | for (iter = hits.begin (), end = hits.end (); iter != end; ++iter) | ||
432 | 862 | { | ||
433 | 863 | Xapian::Document doc(iter.get_document ()); | ||
434 | 864 | double unserialized = | ||
435 | 865 | Xapian::sortable_unserialise (doc.get_value (VALUE_EVENT_ID)); | ||
436 | 866 | unsigned event_id = static_cast<unsigned>(unserialized); | ||
437 | 867 | event_ids.push_back (event_id); | ||
438 | 868 | |||
439 | 869 | double rank = iter.get_percent () / 100.; | ||
440 | 870 | relevancy_arr.push_back (rank); | ||
441 | 871 | } | ||
442 | 872 | |||
443 | 873 | results = zeitgeist_db_reader_get_events (zg_reader, | ||
444 | 874 | &event_ids[0], | ||
445 | 875 | event_ids.size (), | ||
446 | 876 | NULL, | ||
447 | 877 | error); | ||
448 | 878 | |||
449 | 879 | if (results->len != relevancy_arr.size ()) | ||
450 | 880 | { | ||
451 | 881 | g_warning ("Results don't match relevancies!"); | ||
452 | 882 | g_set_error_literal (error, | ||
453 | 883 | ZEITGEIST_ENGINE_ERROR, | ||
454 | 884 | ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR, | ||
455 | 885 | "Internal database error"); | ||
456 | 886 | return NULL; | ||
457 | 887 | } | ||
458 | 888 | |||
459 | 889 | if (relevancies) | ||
460 | 890 | { | ||
461 | 891 | *relevancies = (gdouble*) g_memdup (&relevancy_arr[0], | ||
462 | 892 | sizeof (gdouble) * results->len); | ||
463 | 893 | } | ||
464 | 894 | if (relevancies_size) | ||
465 | 895 | { | ||
466 | 896 | *relevancies_size = relevancy_arr.size (); | ||
467 | 897 | } | ||
468 | 898 | } | ||
469 | 899 | else | ||
470 | 900 | { | ||
471 | 901 | g_set_error_literal (error, | ||
472 | 902 | ZEITGEIST_ENGINE_ERROR, | ||
473 | 903 | ZEITGEIST_ENGINE_ERROR_INVALID_ARGUMENT, | ||
474 | 904 | "Only RELEVANCY result type is supported"); | ||
475 | 905 | /* | ||
476 | 906 | * perhaps something like this could be used here? | ||
477 | 907 | std::map<unsigned, gdouble> relevancy_map; | ||
478 | 908 | foreach (...) | ||
479 | 909 | { | ||
480 | 910 | double rank = iter.get_percent () / 100.; | ||
481 | 911 | if (rank > relevancy_map[event_id]) | ||
482 | 912 | { | ||
483 | 913 | relevancy_map[event_id] = rank; | ||
484 | 914 | } | ||
485 | 915 | } | ||
486 | 916 | */ | ||
487 | 917 | } | ||
488 | 918 | |||
489 | 919 | if (matches) | ||
490 | 920 | { | ||
491 | 921 | *matches = hitcount; | ||
492 | 922 | } | ||
493 | 923 | } | ||
494 | 924 | catch (Xapian::Error const& e) | ||
495 | 925 | { | ||
496 | 926 | g_warning ("Failed to search index: %s", e.get_msg ().c_str ()); | ||
497 | 757 | g_set_error_literal (error, | 927 | g_set_error_literal (error, |
498 | 758 | ZEITGEIST_ENGINE_ERROR, | 928 | ZEITGEIST_ENGINE_ERROR, |
499 | 759 | ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR, | 929 | ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR, |
500 | 760 | 930 | ||
501 | === modified file 'extensions/fts++/indexer.h' | |||
502 | --- extensions/fts++/indexer.h 2012-02-09 09:37:48 +0000 | |||
503 | +++ extensions/fts++/indexer.h 2012-02-10 12:11:19 +0000 | |||
504 | @@ -77,7 +77,7 @@ | |||
505 | 77 | void DeleteEvent (guint32 event_id); | 77 | void DeleteEvent (guint32 event_id); |
506 | 78 | void SetDbMetadata (std::string const& key, std::string const& value); | 78 | void SetDbMetadata (std::string const& key, std::string const& value); |
507 | 79 | 79 | ||
509 | 80 | GPtrArray* Search (const gchar *search_string, | 80 | GPtrArray* Search (const gchar *search, |
510 | 81 | ZeitgeistTimeRange *time_range, | 81 | ZeitgeistTimeRange *time_range, |
511 | 82 | GPtrArray *templates, | 82 | GPtrArray *templates, |
512 | 83 | guint offset, | 83 | guint offset, |
513 | @@ -85,11 +85,26 @@ | |||
514 | 85 | ZeitgeistResultType result_type, | 85 | ZeitgeistResultType result_type, |
515 | 86 | guint *matches, | 86 | guint *matches, |
516 | 87 | GError **error); | 87 | GError **error); |
517 | 88 | GPtrArray* SearchWithRelevancies (const gchar *search, | ||
518 | 89 | ZeitgeistTimeRange *time_range, | ||
519 | 90 | GPtrArray *templates, | ||
520 | 91 | guint offset, | ||
521 | 92 | guint count, | ||
522 | 93 | ZeitgeistResultType result_type, | ||
523 | 94 | gdouble **relevancies, | ||
524 | 95 | gint *relevancies_size, | ||
525 | 96 | guint *matches, | ||
526 | 97 | GError **error); | ||
527 | 88 | 98 | ||
528 | 89 | private: | 99 | private: |
529 | 90 | std::string ExpandType (std::string const& prefix, const gchar* unparsed_uri); | 100 | std::string ExpandType (std::string const& prefix, const gchar* unparsed_uri); |
530 | 91 | std::string CompileEventFilterQuery (GPtrArray *templates); | 101 | std::string CompileEventFilterQuery (GPtrArray *templates); |
531 | 92 | std::string CompileTimeRangeFilterQuery (gint64 start, gint64 end); | 102 | std::string CompileTimeRangeFilterQuery (gint64 start, gint64 end); |
532 | 103 | std::string CompileQueryString (const gchar *search, | ||
533 | 104 | ZeitgeistTimeRange *time_range, | ||
534 | 105 | GPtrArray *templates); | ||
535 | 106 | |||
536 | 107 | std::string PreprocessString (std::string const& input); | ||
537 | 93 | 108 | ||
538 | 94 | void AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc); | 109 | void AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc); |
539 | 95 | void IndexText (std::string const& text); | 110 | void IndexText (std::string const& text); |
540 | 96 | 111 | ||
541 | === modified file 'extensions/fts++/stringutils.cpp' | |||
542 | --- extensions/fts++/stringutils.cpp 2012-02-09 09:32:33 +0000 | |||
543 | +++ extensions/fts++/stringutils.cpp 2012-02-10 12:11:19 +0000 | |||
544 | @@ -17,9 +17,14 @@ | |||
545 | 17 | * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> | 17 | * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> |
546 | 18 | * | 18 | * |
547 | 19 | */ | 19 | */ |
548 | 20 | |||
549 | 21 | #include "stringutils.h" | ||
550 | 20 | #include <string> | 22 | #include <string> |
551 | 23 | #include <algorithm> | ||
552 | 21 | 24 | ||
554 | 22 | #include "stringutils.h" | 25 | #ifdef HAVE_DEE_ICU |
555 | 26 | #include <dee-icu.h> | ||
556 | 27 | #endif | ||
557 | 23 | 28 | ||
558 | 24 | using namespace std; | 29 | using namespace std; |
559 | 25 | 30 | ||
560 | @@ -123,6 +128,87 @@ | |||
561 | 123 | } | 128 | } |
562 | 124 | } | 129 | } |
563 | 125 | 130 | ||
564 | 131 | string RemoveUnderscores (string const &input) | ||
565 | 132 | { | ||
566 | 133 | string result (input); | ||
567 | 134 | std::replace (result.begin (), result.end (), '_', ' '); | ||
568 | 135 | |||
569 | 136 | return result; | ||
570 | 137 | } | ||
571 | 138 | |||
572 | 139 | static bool is_digit (char c) { return c >= '0' && c <= '9'; } | ||
573 | 140 | |||
574 | 141 | size_t CountDigits (string const &input) | ||
575 | 142 | { | ||
576 | 143 | return std::count_if (input.begin (), input.end (), is_digit); | ||
577 | 144 | } | ||
578 | 145 | |||
579 | 146 | static GRegex *camelcase_matcher = NULL; | ||
580 | 147 | |||
581 | 148 | static gboolean | ||
582 | 149 | matcher_cb (const GMatchInfo *match_info, GString *result, gpointer user_data) | ||
583 | 150 | { | ||
584 | 151 | gint start_pos; | ||
585 | 152 | g_match_info_fetch_pos (match_info, 0, &start_pos, NULL); | ||
586 | 153 | if (start_pos != 0) g_string_append_c (result, ' '); | ||
587 | 154 | gchar *word = g_match_info_fetch (match_info, 0); | ||
588 | 155 | g_string_append (result, word); | ||
589 | 156 | g_free (word); | ||
590 | 157 | |||
591 | 158 | return FALSE; | ||
592 | 159 | } | ||
593 | 160 | |||
594 | 161 | string UnCamelcase (string const &input) | ||
595 | 162 | { | ||
596 | 163 | if (camelcase_matcher == NULL) | ||
597 | 164 | { | ||
598 | 165 | camelcase_matcher = g_regex_new ("(?<=^|[[:lower:]])[[:upper:]]+[^[:upper:]]+", G_REGEX_OPTIMIZE, (GRegexMatchFlags) 0, NULL); | ||
599 | 166 | if (camelcase_matcher == NULL) g_critical ("Unable to create matcher!"); | ||
600 | 167 | } | ||
601 | 168 | |||
602 | 169 | gchar *result = g_regex_replace_eval (camelcase_matcher, input.c_str (), | ||
603 | 170 | input.length (), 0, | ||
604 | 171 | (GRegexMatchFlags) 0, | ||
605 | 172 | matcher_cb, NULL, NULL); | ||
606 | 173 | |||
607 | 174 | string ret (result); | ||
608 | 175 | g_free (result); | ||
609 | 176 | return ret; | ||
610 | 177 | } | ||
611 | 178 | |||
612 | 179 | #ifdef HAVE_DEE_ICU | ||
613 | 180 | static DeeICUTermFilter *icu_filter = NULL; | ||
614 | 181 | |||
615 | 182 | /** | ||
616 | 183 | * Use ascii folding filter on the input text and return folded version | ||
617 | 184 | * of the original string. | ||
618 | 185 | * | ||
619 | 186 | * Note that if the folded version is exactly the same as the original | ||
620 | 187 | * empty string will be returned. | ||
621 | 188 | */ | ||
622 | 189 | string AsciiFold (string const& input) | ||
623 | 190 | { | ||
624 | 191 | if (icu_filter == NULL) | ||
625 | 192 | { | ||
626 | 193 | icu_filter = dee_icu_term_filter_new_ascii_folder (); | ||
627 | 194 | if (icu_filter == NULL) return ""; | ||
628 | 195 | } | ||
629 | 196 | |||
630 | 197 | // FIXME: check first if the input contains any non-ascii chars? | ||
631 | 198 | |||
632 | 199 | gchar *folded = dee_icu_term_filter_apply (icu_filter, input.c_str ()); | ||
633 | 200 | string result (folded); | ||
634 | 201 | g_free (folded); | ||
635 | 202 | |||
636 | 203 | return result == input ? "" : result; | ||
637 | 204 | } | ||
638 | 205 | #else | ||
639 | 206 | string AsciiFold (string const& input) | ||
640 | 207 | { | ||
641 | 208 | return ""; | ||
642 | 209 | } | ||
643 | 210 | #endif | ||
644 | 211 | |||
645 | 126 | } /* namespace StringUtils */ | 212 | } /* namespace StringUtils */ |
646 | 127 | 213 | ||
647 | 128 | } /* namespace ZeitgeistFTS */ | 214 | } /* namespace ZeitgeistFTS */ |
648 | 129 | 215 | ||
649 | === modified file 'extensions/fts++/stringutils.h' | |||
650 | --- extensions/fts++/stringutils.h 2012-02-09 09:32:33 +0000 | |||
651 | +++ extensions/fts++/stringutils.h 2012-02-10 12:11:19 +0000 | |||
652 | @@ -37,6 +37,14 @@ | |||
653 | 37 | std::string &path, | 37 | std::string &path, |
654 | 38 | std::string &basename); | 38 | std::string &basename); |
655 | 39 | 39 | ||
656 | 40 | std::string RemoveUnderscores (std::string const &input); | ||
657 | 41 | |||
658 | 42 | size_t CountDigits (std::string const &input); | ||
659 | 43 | |||
660 | 44 | std::string UnCamelcase (std::string const &input); | ||
661 | 45 | |||
662 | 46 | std::string AsciiFold (std::string const& input); | ||
663 | 47 | |||
664 | 40 | } /* namespace StringUtils */ | 48 | } /* namespace StringUtils */ |
665 | 41 | 49 | ||
666 | 42 | } /* namespace ZeitgeistFTS */ | 50 | } /* namespace ZeitgeistFTS */ |
667 | 43 | 51 | ||
668 | === modified file 'extensions/fts++/test/Makefile.am' | |||
669 | --- extensions/fts++/test/Makefile.am 2012-02-08 18:54:58 +0000 | |||
670 | +++ extensions/fts++/test/Makefile.am 2012-02-10 12:11:19 +0000 | |||
671 | @@ -25,3 +25,8 @@ | |||
672 | 25 | -lxapian \ | 25 | -lxapian \ |
673 | 26 | $(NULL) | 26 | $(NULL) |
674 | 27 | 27 | ||
675 | 28 | if HAVE_DEE_ICU | ||
676 | 29 | AM_CPPFLAGS += $(DEE_ICU_CFLAGS) | ||
677 | 30 | test_fts_LDADD += $(DEE_ICU_LIBS) | ||
678 | 31 | endif | ||
679 | 32 | |||
680 | 28 | 33 | ||
681 | === modified file 'extensions/fts++/test/test-indexer.cpp' | |||
682 | --- extensions/fts++/test/test-indexer.cpp 2012-02-09 09:32:33 +0000 | |||
683 | +++ extensions/fts++/test/test-indexer.cpp 2012-02-10 12:11:19 +0000 | |||
684 | @@ -145,6 +145,26 @@ | |||
685 | 145 | return event; | 145 | return event; |
686 | 146 | } | 146 | } |
687 | 147 | 147 | ||
688 | 148 | static ZeitgeistEvent* create_test_event5 (void) | ||
689 | 149 | { | ||
690 | 150 | ZeitgeistEvent *event = zeitgeist_event_new (); | ||
691 | 151 | ZeitgeistSubject *subject = zeitgeist_subject_new (); | ||
692 | 152 | |||
693 | 153 | zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_SOURCE_CODE); | ||
694 | 154 | zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_FILE_DATA_OBJECT); | ||
695 | 155 | zeitgeist_subject_set_uri (subject, "file:///home/username/projects/GLibSignalImplementation.cpp"); | ||
696 | 156 | zeitgeist_subject_set_text (subject, "Because c++ is awesome"); | ||
697 | 157 | zeitgeist_subject_set_mimetype (subject, "text/x-c++src"); | ||
698 | 158 | |||
699 | 159 | zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_CREATE_EVENT); | ||
700 | 160 | zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY); | ||
701 | 161 | zeitgeist_event_set_actor (event, "application://gedit.desktop"); | ||
702 | 162 | zeitgeist_event_add_subject (event, subject); | ||
703 | 163 | |||
704 | 164 | g_object_unref (subject); | ||
705 | 165 | return event; | ||
706 | 166 | } | ||
707 | 167 | |||
708 | 148 | // Steals the event, ref it if you want to keep it | 168 | // Steals the event, ref it if you want to keep it |
709 | 149 | static guint | 169 | static guint |
710 | 150 | index_event (Fixture *fix, ZeitgeistEvent *event) | 170 | index_event (Fixture *fix, ZeitgeistEvent *event) |
711 | @@ -426,6 +446,71 @@ | |||
712 | 426 | } | 446 | } |
713 | 427 | 447 | ||
714 | 428 | static void | 448 | static void |
715 | 449 | test_simple_underscores (Fixture *fix, gconstpointer data) | ||
716 | 450 | { | ||
717 | 451 | guint matches; | ||
718 | 452 | guint event_id; | ||
719 | 453 | ZeitgeistEvent* event; | ||
720 | 454 | ZeitgeistSubject *subject; | ||
721 | 455 | |||
722 | 456 | // add test events to DBs | ||
723 | 457 | index_event (fix, create_test_event1 ()); | ||
724 | 458 | index_event (fix, create_test_event2 ()); | ||
725 | 459 | index_event (fix, create_test_event3 ()); | ||
726 | 460 | event_id = index_event (fix, create_test_event4 ()); | ||
727 | 461 | |||
728 | 462 | GPtrArray *results = | ||
729 | 463 | zeitgeist_indexer_search (fix->indexer, | ||
730 | 464 | "fabulo*", | ||
731 | 465 | zeitgeist_time_range_new_anytime (), | ||
732 | 466 | g_ptr_array_new (), | ||
733 | 467 | 0, | ||
734 | 468 | 10, | ||
735 | 469 | ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, | ||
736 | 470 | &matches, | ||
737 | 471 | NULL); | ||
738 | 472 | |||
739 | 473 | g_assert_cmpuint (matches, >, 0); | ||
740 | 474 | g_assert_cmpuint (results->len, ==, 1); | ||
741 | 475 | |||
742 | 476 | event = (ZeitgeistEvent*) results->pdata[0]; | ||
743 | 477 | g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id); | ||
744 | 478 | } | ||
745 | 479 | |||
746 | 480 | static void | ||
747 | 481 | test_simple_camelcase (Fixture *fix, gconstpointer data) | ||
748 | 482 | { | ||
749 | 483 | guint matches; | ||
750 | 484 | guint event_id; | ||
751 | 485 | ZeitgeistEvent* event; | ||
752 | 486 | ZeitgeistSubject *subject; | ||
753 | 487 | |||
754 | 488 | // add test events to DBs | ||
755 | 489 | index_event (fix, create_test_event1 ()); | ||
756 | 490 | index_event (fix, create_test_event2 ()); | ||
757 | 491 | index_event (fix, create_test_event3 ()); | ||
758 | 492 | index_event (fix, create_test_event4 ()); | ||
759 | 493 | event_id = index_event (fix, create_test_event5 ()); | ||
760 | 494 | |||
761 | 495 | GPtrArray *results = | ||
762 | 496 | zeitgeist_indexer_search (fix->indexer, | ||
763 | 497 | "signal", | ||
764 | 498 | zeitgeist_time_range_new_anytime (), | ||
765 | 499 | g_ptr_array_new (), | ||
766 | 500 | 0, | ||
767 | 501 | 10, | ||
768 | 502 | ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, | ||
769 | 503 | &matches, | ||
770 | 504 | NULL); | ||
771 | 505 | |||
772 | 506 | g_assert_cmpuint (matches, >, 0); | ||
773 | 507 | g_assert_cmpuint (results->len, ==, 1); | ||
774 | 508 | |||
775 | 509 | event = (ZeitgeistEvent*) results->pdata[0]; | ||
776 | 510 | g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id); | ||
777 | 511 | } | ||
778 | 512 | |||
779 | 513 | static void | ||
780 | 429 | test_simple_cjk (Fixture *fix, gconstpointer data) | 514 | test_simple_cjk (Fixture *fix, gconstpointer data) |
781 | 430 | { | 515 | { |
782 | 431 | guint matches; | 516 | guint matches; |
783 | @@ -517,6 +602,10 @@ | |||
784 | 517 | setup, test_simple_noexpand, teardown); | 602 | setup, test_simple_noexpand, teardown); |
785 | 518 | g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNoexpandValid", Fixture, 0, | 603 | g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNoexpandValid", Fixture, 0, |
786 | 519 | setup, test_simple_noexpand_valid, teardown); | 604 | setup, test_simple_noexpand_valid, teardown); |
787 | 605 | g_test_add ("/Zeitgeist/FTS/Indexer/SimpleUnderscores", Fixture, 0, | ||
788 | 606 | setup, test_simple_underscores, teardown); | ||
789 | 607 | g_test_add ("/Zeitgeist/FTS/Indexer/SimpleCamelcase", Fixture, 0, | ||
790 | 608 | setup, test_simple_camelcase, teardown); | ||
791 | 520 | g_test_add ("/Zeitgeist/FTS/Indexer/URLUnescape", Fixture, 0, | 609 | g_test_add ("/Zeitgeist/FTS/Indexer/URLUnescape", Fixture, 0, |
792 | 521 | setup, test_simple_url_unescape, teardown); | 610 | setup, test_simple_url_unescape, teardown); |
793 | 522 | g_test_add ("/Zeitgeist/FTS/Indexer/IDNSupport", Fixture, 0, | 611 | g_test_add ("/Zeitgeist/FTS/Indexer/IDNSupport", Fixture, 0, |
794 | 523 | 612 | ||
795 | === modified file 'extensions/fts++/test/test-stringutils.cpp' | |||
796 | --- extensions/fts++/test/test-stringutils.cpp 2012-02-09 09:32:33 +0000 | |||
797 | +++ extensions/fts++/test/test-stringutils.cpp 2012-02-10 12:11:19 +0000 | |||
798 | @@ -163,6 +163,91 @@ | |||
799 | 163 | g_assert_cmpstr ("type=A", ==, query.c_str ()); | 163 | g_assert_cmpstr ("type=A", ==, query.c_str ()); |
800 | 164 | } | 164 | } |
801 | 165 | 165 | ||
802 | 166 | static void | ||
803 | 167 | test_ascii_fold (Fixture *fix, gconstpointer data) | ||
804 | 168 | { | ||
805 | 169 | std::string folded; | ||
806 | 170 | |||
807 | 171 | folded = StringUtils::AsciiFold (""); | ||
808 | 172 | g_assert_cmpstr ("", ==, folded.c_str ()); | ||
809 | 173 | |||
810 | 174 | // if the original matches the folded version, AsciiFold returns "" | ||
811 | 175 | folded = StringUtils::AsciiFold ("a"); | ||
812 | 176 | g_assert_cmpstr ("", ==, folded.c_str ()); | ||
813 | 177 | |||
814 | 178 | folded = StringUtils::AsciiFold ("abcdef"); | ||
815 | 179 | g_assert_cmpstr ("", ==, folded.c_str ()); | ||
816 | 180 | |||
817 | 181 | folded = StringUtils::AsciiFold ("å"); | ||
818 | 182 | g_assert_cmpstr ("a", ==, folded.c_str ()); | ||
819 | 183 | |||
820 | 184 | folded = StringUtils::AsciiFold ("åå"); | ||
821 | 185 | g_assert_cmpstr ("aa", ==, folded.c_str ()); | ||
822 | 186 | |||
823 | 187 | folded = StringUtils::AsciiFold ("aåaåa"); | ||
824 | 188 | g_assert_cmpstr ("aaaaa", ==, folded.c_str ()); | ||
825 | 189 | } | ||
826 | 190 | |||
827 | 191 | static void | ||
828 | 192 | test_underscores (Fixture *fix, gconstpointer data) | ||
829 | 193 | { | ||
830 | 194 | g_assert_cmpstr ("", ==, StringUtils::RemoveUnderscores ("").c_str ()); | ||
831 | 195 | |||
832 | 196 | g_assert_cmpstr (" ", ==, StringUtils::RemoveUnderscores ("_").c_str ()); | ||
833 | 197 | |||
834 | 198 | g_assert_cmpstr (" ", ==, StringUtils::RemoveUnderscores ("___").c_str ()); | ||
835 | 199 | |||
836 | 200 | g_assert_cmpstr ("abcd", ==, StringUtils::RemoveUnderscores ("abcd").c_str ()); | ||
837 | 201 | |||
838 | 202 | g_assert_cmpstr (" abcd ", ==, StringUtils::RemoveUnderscores ("_abcd_").c_str ()); | ||
839 | 203 | |||
840 | 204 | g_assert_cmpstr ("a b c d", ==, StringUtils::RemoveUnderscores ("a_b_c_d").c_str ()); | ||
841 | 205 | } | ||
842 | 206 | |||
843 | 207 | static void | ||
844 | 208 | test_uncamelcase (Fixture *fix, gconstpointer data) | ||
845 | 209 | { | ||
846 | 210 | g_assert_cmpstr ("", ==, StringUtils::UnCamelcase ("").c_str ()); | ||
847 | 211 | |||
848 | 212 | g_assert_cmpstr ("abcd", ==, StringUtils::UnCamelcase ("abcd").c_str ()); | ||
849 | 213 | |||
850 | 214 | g_assert_cmpstr ("Abcd", ==, StringUtils::UnCamelcase ("Abcd").c_str ()); | ||
851 | 215 | |||
852 | 216 | g_assert_cmpstr ("ABCD", ==, StringUtils::UnCamelcase ("ABCD").c_str ()); | ||
853 | 217 | |||
854 | 218 | g_assert_cmpstr ("ABcd", ==, StringUtils::UnCamelcase ("ABcd").c_str ()); | ||
855 | 219 | |||
856 | 220 | g_assert_cmpstr ("Abcd Ef", ==, StringUtils::UnCamelcase ("AbcdEf").c_str ()); | ||
857 | 221 | |||
858 | 222 | g_assert_cmpstr ("Text Editor", ==, StringUtils::UnCamelcase ("Text Editor").c_str ()); | ||
859 | 223 | |||
860 | 224 | g_assert_cmpstr ("py Karaoke", ==, StringUtils::UnCamelcase ("pyKaraoke").c_str ()); | ||
861 | 225 | |||
862 | 226 | g_assert_cmpstr ("Zeitgeist Project", ==, StringUtils::UnCamelcase ("ZeitgeistProject").c_str ()); | ||
863 | 227 | |||
864 | 228 | g_assert_cmpstr ("Very Nice Camel Case Text", ==, StringUtils::UnCamelcase ("VeryNiceCamelCaseText").c_str ()); | ||
865 | 229 | |||
866 | 230 | g_assert_cmpstr ("Ňeedš Ťo Wórk Óń Útf Čhářacters As WelL", ==, | ||
867 | 231 | StringUtils::UnCamelcase ("ŇeedšŤoWórkÓńÚtfČhářactersAsWelL").c_str ()); | ||
868 | 232 | } | ||
869 | 233 | |||
870 | 234 | static void | ||
871 | 235 | test_count_digits (Fixture *fix, gconstpointer data) | ||
872 | 236 | { | ||
873 | 237 | g_assert_cmpuint (0, ==, StringUtils::CountDigits ("")); | ||
874 | 238 | |||
875 | 239 | g_assert_cmpuint (0, ==, StringUtils::CountDigits ("abcdefghijklmnopqrstuvwxyz")); | ||
876 | 240 | |||
877 | 241 | g_assert_cmpuint (10, ==, StringUtils::CountDigits ("0123456789")); | ||
878 | 242 | |||
879 | 243 | g_assert_cmpuint (1, ==, StringUtils::CountDigits ("abc3")); | ||
880 | 244 | |||
881 | 245 | g_assert_cmpuint (3, ==, StringUtils::CountDigits ("::123__poa//weee")); | ||
882 | 246 | |||
883 | 247 | g_assert_cmpuint (5, ==, StringUtils::CountDigits ("PCN30129.JPG")); | ||
884 | 248 | |||
885 | 249 | } | ||
886 | 250 | |||
887 | 166 | G_BEGIN_DECLS | 251 | G_BEGIN_DECLS |
888 | 167 | 252 | ||
889 | 168 | void test_stringutils_create_suite (void) | 253 | void test_stringutils_create_suite (void) |
890 | @@ -173,6 +258,16 @@ | |||
891 | 173 | setup, test_mangle, teardown); | 258 | setup, test_mangle, teardown); |
892 | 174 | g_test_add ("/Zeitgeist/FTS/StringUtils/SplitUri", Fixture, 0, | 259 | g_test_add ("/Zeitgeist/FTS/StringUtils/SplitUri", Fixture, 0, |
893 | 175 | setup, test_split, teardown); | 260 | setup, test_split, teardown); |
894 | 261 | g_test_add ("/Zeitgeist/FTS/StringUtils/RemoveUnderscores", Fixture, 0, | ||
895 | 262 | setup, test_underscores, teardown); | ||
896 | 263 | g_test_add ("/Zeitgeist/FTS/StringUtils/UnCamelcase", Fixture, 0, | ||
897 | 264 | setup, test_uncamelcase, teardown); | ||
898 | 265 | g_test_add ("/Zeitgeist/FTS/StringUtils/CountDigits", Fixture, 0, | ||
899 | 266 | setup, test_count_digits, teardown); | ||
900 | 267 | #ifdef HAVE_DEE_ICU | ||
901 | 268 | g_test_add ("/Zeitgeist/FTS/StringUtils/AsciiFold", Fixture, 0, | ||
902 | 269 | setup, test_ascii_fold, teardown); | ||
903 | 270 | #endif | ||
904 | 176 | } | 271 | } |
905 | 177 | 272 | ||
906 | 178 | G_END_DECLS | 273 | G_END_DECLS |
907 | 179 | 274 | ||
908 | === modified file 'extensions/fts++/zeitgeist-fts.vala' | |||
909 | --- extensions/fts++/zeitgeist-fts.vala 2012-02-09 09:32:33 +0000 | |||
910 | +++ extensions/fts++/zeitgeist-fts.vala 2012-02-10 12:11:19 +0000 | |||
911 | @@ -132,6 +132,23 @@ | |||
912 | 132 | events = Events.to_variant (results); | 132 | events = Events.to_variant (results); |
913 | 133 | } | 133 | } |
914 | 134 | 134 | ||
915 | 135 | public async void search_with_relevancies ( | ||
916 | 136 | string query_string, Variant time_range, | ||
917 | 137 | Variant filter_templates, | ||
918 | 138 | uint offset, uint count, uint result_type, | ||
919 | 139 | out Variant events, out double[] relevancies, | ||
920 | 140 | out uint matches) | ||
921 | 141 | throws Error | ||
922 | 142 | { | ||
923 | 143 | var tr = new TimeRange.from_variant (time_range); | ||
924 | 144 | var templates = Events.from_variant (filter_templates); | ||
925 | 145 | var results = instance.indexer.search_with_relevancies ( | ||
926 | 146 | query_string, tr, templates, offset, count, | ||
927 | 147 | (ResultType) result_type, out relevancies, out matches); | ||
928 | 148 | |||
929 | 149 | events = Events.to_variant (results); | ||
930 | 150 | } | ||
931 | 151 | |||
932 | 135 | private static void name_acquired_callback (DBusConnection conn) | 152 | private static void name_acquired_callback (DBusConnection conn) |
933 | 136 | { | 153 | { |
934 | 137 | name_acquired = true; | 154 | name_acquired = true; |
935 | 138 | 155 | ||
936 | === modified file 'extensions/fts.vala' | |||
937 | --- extensions/fts.vala 2012-02-07 12:47:44 +0000 | |||
938 | +++ extensions/fts.vala 2012-02-10 12:11:19 +0000 | |||
939 | @@ -31,6 +31,14 @@ | |||
940 | 31 | uint offset, uint count, uint result_type, | 31 | uint offset, uint count, uint result_type, |
941 | 32 | [DBus (signature = "a(asaasay)")] out Variant events, | 32 | [DBus (signature = "a(asaasay)")] out Variant events, |
942 | 33 | out uint matches) throws Error; | 33 | out uint matches) throws Error; |
943 | 34 | public abstract async void search_with_relevancies ( | ||
944 | 35 | string query_string, | ||
945 | 36 | [DBus (signature = "(xx)")] Variant time_range, | ||
946 | 37 | [DBus (signature = "a(asaasay)")] Variant filter_templates, | ||
947 | 38 | uint offset, uint count, uint result_type, | ||
948 | 39 | [DBus (signature = "a(asaasay)")] out Variant events, | ||
949 | 40 | out double[] relevancies, | ||
950 | 41 | out uint matches) throws Error; | ||
951 | 34 | } | 42 | } |
952 | 35 | 43 | ||
953 | 36 | /* Because of a Vala bug we have to define the proxy interface outside of | 44 | /* Because of a Vala bug we have to define the proxy interface outside of |
954 | @@ -55,6 +63,7 @@ | |||
955 | 55 | private const string INDEXER_NAME = "org.gnome.zeitgeist.SimpleIndexer"; | 63 | private const string INDEXER_NAME = "org.gnome.zeitgeist.SimpleIndexer"; |
956 | 56 | 64 | ||
957 | 57 | private RemoteSimpleIndexer siin; | 65 | private RemoteSimpleIndexer siin; |
958 | 66 | private bool siin_connection_failed = false; | ||
959 | 58 | private uint registration_id; | 67 | private uint registration_id; |
960 | 59 | private MonitorManager? notifier; | 68 | private MonitorManager? notifier; |
961 | 60 | 69 | ||
962 | @@ -67,6 +76,8 @@ | |||
963 | 67 | { | 76 | { |
964 | 68 | if (Utils.using_in_memory_database ()) return; | 77 | if (Utils.using_in_memory_database ()) return; |
965 | 69 | 78 | ||
966 | 79 | // FIXME: check dbus and see if fts is installed? | ||
967 | 80 | |||
968 | 70 | // installing a monitor from the daemon will ensure that we don't | 81 | // installing a monitor from the daemon will ensure that we don't |
969 | 71 | // miss any notifications that would be emitted in between | 82 | // miss any notifications that would be emitted in between |
970 | 72 | // zeitgeist start and fts daemon start | 83 | // zeitgeist start and fts daemon start |
971 | @@ -109,23 +120,40 @@ | |||
972 | 109 | try | 120 | try |
973 | 110 | { | 121 | { |
974 | 111 | siin = conn.get_proxy.end<RemoteSimpleIndexer> (res); | 122 | siin = conn.get_proxy.end<RemoteSimpleIndexer> (res); |
975 | 123 | siin_connection_failed = false; | ||
976 | 112 | } | 124 | } |
977 | 113 | catch (IOError err) | 125 | catch (IOError err) |
978 | 114 | { | 126 | { |
979 | 127 | siin_connection_failed = true; | ||
980 | 115 | warning ("%s", err.message); | 128 | warning ("%s", err.message); |
981 | 116 | } | 129 | } |
982 | 117 | } | 130 | } |
983 | 118 | 131 | ||
987 | 119 | public async void search (string query_string, Variant time_range, | 132 | public async void wait_for_proxy () throws Error |
985 | 120 | Variant filter_templates, uint offset, uint count, uint result_type, | ||
986 | 121 | out Variant events, out uint matches) throws Error | ||
988 | 122 | { | 133 | { |
989 | 134 | int i = 0; | ||
990 | 135 | while (this.siin == null && i < 6 && !siin_connection_failed) | ||
991 | 136 | { | ||
992 | 137 | Timeout.add_full (Priority.DEFAULT_IDLE, 250, | ||
993 | 138 | wait_for_proxy.callback); | ||
994 | 139 | i++; | ||
995 | 140 | yield; | ||
996 | 141 | } | ||
997 | 142 | |||
998 | 123 | if (siin == null || !(siin is DBusProxy)) | 143 | if (siin == null || !(siin is DBusProxy)) |
999 | 124 | { | 144 | { |
1000 | 125 | // FIXME: queue until we have the proxy | 145 | // FIXME: queue until we have the proxy |
1001 | 126 | throw new EngineError.DATABASE_ERROR ( | 146 | throw new EngineError.DATABASE_ERROR ( |
1002 | 127 | "Not connected to SimpleIndexer"); | 147 | "Not connected to SimpleIndexer"); |
1003 | 128 | } | 148 | } |
1004 | 149 | } | ||
1005 | 150 | |||
1006 | 151 | public async void search (string query_string, Variant time_range, | ||
1007 | 152 | Variant filter_templates, uint offset, uint count, uint result_type, | ||
1008 | 153 | out Variant events, out uint matches) throws Error | ||
1009 | 154 | { | ||
1010 | 155 | if (siin == null) yield wait_for_proxy (); | ||
1011 | 156 | |||
1012 | 129 | var timer = new Timer (); | 157 | var timer = new Timer (); |
1013 | 130 | yield siin.search (query_string, time_range, filter_templates, | 158 | yield siin.search (query_string, time_range, filter_templates, |
1014 | 131 | offset, count, result_type, | 159 | offset, count, result_type, |
1015 | @@ -134,6 +162,24 @@ | |||
1016 | 134 | (uint) events.n_children (), matches, timer.elapsed ()); | 162 | (uint) events.n_children (), matches, timer.elapsed ()); |
1017 | 135 | } | 163 | } |
1018 | 136 | 164 | ||
1019 | 165 | public async void search_with_relevancies ( | ||
1020 | 166 | string query_string, Variant time_range, | ||
1021 | 167 | Variant filter_templates, uint offset, uint count, uint result_type, | ||
1022 | 168 | out Variant events, out double[] relevancies, out uint matches) | ||
1023 | 169 | throws Error | ||
1024 | 170 | { | ||
1025 | 171 | if (siin == null) yield wait_for_proxy (); | ||
1026 | 172 | |||
1027 | 173 | var timer = new Timer (); | ||
1028 | 174 | yield siin.search_with_relevancies ( | ||
1029 | 175 | query_string, time_range, filter_templates, | ||
1030 | 176 | offset, count, result_type, | ||
1031 | 177 | out events, out relevancies, out matches); | ||
1032 | 178 | |||
1033 | 179 | debug ("Got %u[/%u] results from indexer (in %f seconds)", | ||
1034 | 180 | (uint) events.n_children (), matches, timer.elapsed ()); | ||
1035 | 181 | } | ||
1036 | 182 | |||
1037 | 137 | } | 183 | } |
1038 | 138 | 184 | ||
1039 | 139 | [ModuleInit] | 185 | [ModuleInit] |
1040 | 140 | 186 | ||
1041 | === modified file 'src/remote.vala' | |||
1042 | --- src/remote.vala 2012-02-05 14:52:13 +0000 | |||
1043 | +++ src/remote.vala 2012-02-10 12:11:19 +0000 | |||
1044 | @@ -121,6 +121,13 @@ | |||
1045 | 121 | uint offset, uint count, uint result_type, | 121 | uint offset, uint count, uint result_type, |
1046 | 122 | [DBus (signature = "a(asaasay)")] out Variant events, | 122 | [DBus (signature = "a(asaasay)")] out Variant events, |
1047 | 123 | out uint matches) throws Error; | 123 | out uint matches) throws Error; |
1048 | 124 | public abstract async void search_with_relevancies ( | ||
1049 | 125 | string query_string, | ||
1050 | 126 | [DBus (signature = "(xx)")] Variant time_range, | ||
1051 | 127 | [DBus (signature = "a(asaasay)")] Variant filter_templates, | ||
1052 | 128 | uint offset, uint count, uint result_type, | ||
1053 | 129 | [DBus (signature = "a(asaasay)")] out Variant events, | ||
1054 | 130 | out double[] relevancies, out uint matches) throws Error; | ||
1055 | 124 | } | 131 | } |
1056 | 125 | 132 | ||
1057 | 126 | /* FIXME: Remove this! Only here because of a bug in Vala (see ext-fts) */ | 133 | /* FIXME: Remove this! Only here because of a bug in Vala (see ext-fts) */ |
Awesome.