Merge lp:~rainct/zeitgeist/collapse-uri into lp:~zeitgeist/zeitgeist/bluebird

Proposed by Siegfried Gevatter
Status: Merged
Merge reported by: Michal Hruby
Merged at revision: not available
Proposed branch: lp:~rainct/zeitgeist/collapse-uri
Merge into: lp:~zeitgeist/zeitgeist/bluebird
Diff against target: 115 lines (+37/-1)
2 files modified
extensions/fts++/indexer.cpp (+33/-1)
extensions/fts++/indexer.h (+4/-0)
To merge this branch: bzr merge lp:~rainct/zeitgeist/collapse-uri
Reviewer Review Type Date Requested Status
Michal Hruby (community) Needs Fixing
Review via email: mp+95994@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Michal Hruby (mhr3) wrote :

Looks really good, a couple of comments:

8 +#include <cassert>
25 + assert (g_checksum_type_get_length (G_CHECKSUM_MD5) == 16);

We have glib for that (g_assert) ;)
Let's also turn all the magic "16"s into a const / #define.

88 +#include <glib/gchecksum.h>

Looks like some private header to me, docs say to use <glib.h>.

104 + if (checksum) { g_checksum_free (checksum); checksum = NULL; }

No need to break the style, C++'s destructors are always run just once.

75 + g_checksum_reset (checksum);

This should be done earlier, the .add_value() could throw an error screwing later use of the GChecksum.

review: Needs Fixing

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'extensions/fts++/indexer.cpp'
2--- extensions/fts++/indexer.cpp 2012-02-14 16:56:04 +0000
3+++ extensions/fts++/indexer.cpp 2012-03-05 20:47:20 +0000
4@@ -23,6 +23,7 @@
5 #include <xapian.h>
6 #include <queue>
7 #include <vector>
8+#include <cassert>
9
10 #include <gio/gio.h>
11 #include <gio/gdesktopappinfo.h>
12@@ -42,6 +43,7 @@
13
14 const Xapian::valueno VALUE_EVENT_ID = 0;
15 const Xapian::valueno VALUE_TIMESTAMP = 1;
16+const Xapian::valueno VALUE_URI_HASH = 2;
17
18 #define QUERY_PARSER_FLAGS \
19 Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_BOOLEAN | \
20@@ -100,6 +102,11 @@
21 this->query_parser->set_database (*this->db);
22
23 this->enquire = new Xapian::Enquire (*this->db);
24+
25+ assert (g_checksum_type_get_length (G_CHECKSUM_MD5) == 16);
26+ this->checksum = g_checksum_new (G_CHECKSUM_MD5);
27+ if (!this->checksum)
28+ g_critical ("GChecksum initialization failed.");
29
30 }
31 catch (const Xapian::Error &xp_error)
32@@ -727,7 +734,11 @@
33 guint maxhits;
34 if (result_type == 100 ||
35 result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS ||
36- result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS)
37+ result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_EVENTS ||
38+ result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS ||
39+ result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_SUBJECTS ||
40+ result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS ||
41+ result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS)
42 {
43 maxhits = count;
44 }
45@@ -745,6 +756,14 @@
46 enquire->set_sort_by_value (VALUE_TIMESTAMP, true);
47 }
48
49+ if (result_type == ZEITGEIST_RESULT_TYPE_MOST_RECENT_SUBJECTS ||
50+ result_type == ZEITGEIST_RESULT_TYPE_LEAST_RECENT_SUBJECTS ||
51+ result_type == ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS ||
52+ result_type == ZEITGEIST_RESULT_TYPE_LEAST_POPULAR_SUBJECTS)
53+ {
54+ enquire->set_collapse_key (VALUE_URI_HASH);
55+ }
56+
57 Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS));
58 enquire->set_query (q);
59 Xapian::MSet hits (enquire->get_mset (offset, maxhits));
60@@ -988,6 +1007,19 @@
61 return; // ignore this event completely...
62 }
63
64+ // We need the subject URI so we can use Xapian's collapse key feature
65+ // for *_SUBJECT grouping. However, to save space, we'll just save a hash.
66+ // A better option would be using URI's id, but for that we'd need a SQL
67+ // query that'd be subject to races.
68+ // FIXME(?): This doesn't work for events with multiple subjects.
69+ g_checksum_update (checksum, (guchar *) uri.c_str (), -1);
70+ guint8 uri_hash[17];
71+ gsize hash_size = 16;
72+ g_checksum_get_digest (checksum, uri_hash, &hash_size);
73+ assert (hash_size == 16);
74+ doc.add_value (VALUE_URI_HASH, std::string((char *) uri_hash, 16));
75+ g_checksum_reset (checksum);
76+
77 val = zeitgeist_subject_get_text (subject);
78 if (val && val[0] != '\0')
79 {
80
81=== modified file 'extensions/fts++/indexer.h'
82--- extensions/fts++/indexer.h 2012-02-14 16:56:04 +0000
83+++ extensions/fts++/indexer.h 2012-03-05 20:47:20 +0000
84@@ -21,6 +21,7 @@
85 #define _ZGFTS_INDEXER_H_
86
87 #include <glib-object.h>
88+#include <glib/gchecksum.h>
89 #include <gio/gio.h>
90 #include <xapian.h>
91
92@@ -42,6 +43,7 @@
93 , query_parser (NULL)
94 , enquire (NULL)
95 , tokenizer (NULL)
96+ , checksum (NULL)
97 , clear_failed_id (0)
98 {
99 const gchar *home_dir = g_get_home_dir ();
100@@ -54,6 +56,7 @@
101 if (enquire) delete enquire;
102 if (query_parser) delete query_parser;
103 if (db) delete db;
104+ if (checksum) { g_checksum_free (checksum); checksum = NULL; }
105
106 for (AppInfoMap::iterator it = app_info_cache.begin ();
107 it != app_info_cache.end (); ++it)
108@@ -120,6 +123,7 @@
109 Xapian::TermGenerator *tokenizer;
110 AppInfoMap app_info_cache;
111 ApplicationSet failed_lookups;
112+ GChecksum *checksum;
113
114 guint clear_failed_id;
115 std::string home_dir_path;

Subscribers

People subscribed via source and target branches