Merge lp:~mhr3/zeitgeist-extensions/scheme-detection into lp:zeitgeist-extensions

Proposed by Michal Hruby
Status: Merged
Approved by: Mikkel Kamstrup Erlandsen
Approved revision: 77
Merged at revision: 75
Proposed branch: lp:~mhr3/zeitgeist-extensions/scheme-detection
Merge into: lp:zeitgeist-extensions
Diff against target: 85 lines (+17/-22)
1 file modified
fts/fts.py (+17/-22)
To merge this branch: bzr merge lp:~mhr3/zeitgeist-extensions/scheme-detection
Reviewer Review Type Date Requested Status
Mikkel Kamstrup Erlandsen Approve
Review via email: mp+77481@code.launchpad.net
To post a comment you must log in.
76. By Michal Hruby

Get rid of unnecessary branches in the check_index_and_start_worker method

77. By Michal Hruby

Try to prevent some races

Revision history for this message
Mikkel Kamstrup Erlandsen (kamstrup) wrote :

Ok. The code looks good. I tested the hell out of this with log in/out cycles. Deleting random parts of the Xapian index, and calling ForceReindex() over DBus while concurrently doing searches. Everything was rock solid.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'fts/fts.py'
2--- fts/fts.py 2011-09-07 08:42:40 +0000
3+++ fts/fts.py 2011-09-30 09:44:25 +0000
4@@ -62,6 +62,7 @@
5 log = logging.getLogger("zeitgeist.fts")
6
7 INDEX_FILE = os.path.join(constants.DATA_PATH, "fts.index")
8+INDEX_VERSION = "1"
9 INDEX_LOCK = threading.Lock()
10 FTS_DBUS_OBJECT_PATH = "/org/gnome/zeitgeist/index/activity"
11 FTS_DBUS_INTERFACE = "org.gnome.zeitgeist.Index"
12@@ -314,28 +315,24 @@
13
14 This method should be called from the main thread and only once.
15 It starts the worker thread as a side effect.
16+
17+ We are clearing the queue, because there may be a race when an
18+ event insertion / deletion is already queued and our index
19+ is corrupted. Creating a new queue instance should be safe,
20+ because we're running in main thread as are the index_event
21+ and delete_event methods, and the worker thread wasn't yet
22+ started.
23 """
24- if "XAPIAN_CJK_NGRAM" in os.environ and self._index.get_metadata("cjk_ngram") != "1":
25- # If the database was built prior to CJK support
26- # force of a reindex
27- log.info("Index built without CJK support. Upgrading index")
28+ if self._index.get_metadata("fts_index_version") != INDEX_VERSION:
29+ log.info("Index must be upgraded. Doing full rebuild")
30+ self._queue = Queue(0)
31 self._queue.put(Reindex(self._engine))
32 elif self._index.get_doccount() == 0:
33 # If the index is empty we trigger a rebuild
34 # We must delay reindexing until after the engine is done setting up
35 log.info("Empty index detected. Doing full rebuild")
36+ self._queue = Queue(0)
37 self._queue.put(Reindex(self._engine))
38- else:
39- # If the index doesn't use the zgsu prefix, it must be old-style,
40- # and we must rebuild it
41- query = self._query_parser.parse_query ("zgsu:file*",
42- self.QUERY_PARSER_FLAGS)
43- self._enquire.set_query (query)
44- hits = self._enquire.get_mset (0, 1)
45- hit_count = hits.get_matches_estimated()
46- if hit_count == 0:
47- log.info ("Old index format detected. Rebuilding index.")
48- self._queue.put(Reindex(self._engine))
49
50 # Now that we've checked the index from the main thread we can start the worker
51 self._worker.start()
52@@ -472,10 +469,8 @@
53 self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OVERWRITE)
54 self._query_parser.set_database (self._index)
55 self._enquire = xapian.Enquire(self._index)
56-
57- # Register that this index was built with CJK enabled
58- if "XAPIAN_CJK_NGRAM" in os.environ :
59- self._index.set_metadata("cjk_ngram", "1")
60+
61+ self._index.set_metadata("fts_index_version", INDEX_VERSION)
62
63 log.info("Preparing to rebuild index with %s events" % len(event_list))
64 for e in event_list : self._queue.put(e)
65@@ -607,7 +602,7 @@
66 # usually web URIs, are indexed in another way because there may
67 # be domain name etc. in there we want to rank differently
68 scheme, host, path = self._split_uri (url_unescape (uri))
69- if scheme == "file://" or not scheme:
70+ if scheme == "file" or not scheme:
71 path, name = os.path.split(path)
72 self._tokenizer.index_text(name, 5)
73 self._tokenizer.index_text(name, 5, "N")
74@@ -617,9 +612,9 @@
75 while path and name:
76 weight = weight / 1.5
77 path, name = os.path.split(path)
78- self._tokenizer.index_text(name, weight)
79+ self._tokenizer.index_text(name, int(weight))
80
81- elif scheme == "mailto:":
82+ elif scheme == "mailto":
83 tokens = host.split("@")
84 name = tokens[0]
85 self._tokenizer.index_text(name, 6)

Subscribers

People subscribed via source and target branches

to all changes: