Merge lp:~mhr3/zeitgeist-extensions/scheme-detection into lp:zeitgeist-extensions

Proposed by Michal Hruby
Status: Merged
Approved by: Mikkel Kamstrup Erlandsen
Approved revision: 77
Merged at revision: 75
Proposed branch: lp:~mhr3/zeitgeist-extensions/scheme-detection
Merge into: lp:zeitgeist-extensions
Diff against target: 85 lines (+17/-22)
1 file modified
fts/fts.py (+17/-22)
To merge this branch: bzr merge lp:~mhr3/zeitgeist-extensions/scheme-detection
Reviewer Review Type Date Requested Status
Mikkel Kamstrup Erlandsen Approve
Review via email: mp+77481@code.launchpad.net
To post a comment you must log in.
76. By Michal Hruby

Get rid of unnecessary branches in the check_index_and_start_worker method

77. By Michal Hruby

Try to prevent some races

Revision history for this message
Mikkel Kamstrup Erlandsen (kamstrup) wrote :

Ok. The code looks good. I tested the hell out of this with log in/out cycles. Deleting random parts of the Xapian index, and calling ForceReindex() over DBus while concurrently doing searches. Everything was rock solid.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'fts/fts.py'
--- fts/fts.py 2011-09-07 08:42:40 +0000
+++ fts/fts.py 2011-09-30 09:44:25 +0000
@@ -62,6 +62,7 @@
62log = logging.getLogger("zeitgeist.fts")62log = logging.getLogger("zeitgeist.fts")
6363
64INDEX_FILE = os.path.join(constants.DATA_PATH, "fts.index")64INDEX_FILE = os.path.join(constants.DATA_PATH, "fts.index")
65INDEX_VERSION = "1"
65INDEX_LOCK = threading.Lock()66INDEX_LOCK = threading.Lock()
66FTS_DBUS_OBJECT_PATH = "/org/gnome/zeitgeist/index/activity"67FTS_DBUS_OBJECT_PATH = "/org/gnome/zeitgeist/index/activity"
67FTS_DBUS_INTERFACE = "org.gnome.zeitgeist.Index"68FTS_DBUS_INTERFACE = "org.gnome.zeitgeist.Index"
@@ -314,28 +315,24 @@
314 315
315 This method should be called from the main thread and only once.316 This method should be called from the main thread and only once.
316 It starts the worker thread as a side effect.317 It starts the worker thread as a side effect.
318
319 We are clearing the queue, because there may be a race when an
320 event insertion / deletion is already queued and our index
321 is corrupted. Creating a new queue instance should be safe,
322 because we're running in main thread as are the index_event
323 and delete_event methods, and the worker thread wasn't yet
324 started.
317 """325 """
318 if "XAPIAN_CJK_NGRAM" in os.environ and self._index.get_metadata("cjk_ngram") != "1":326 if self._index.get_metadata("fts_index_version") != INDEX_VERSION:
319 # If the database was built prior to CJK support327 log.info("Index must be upgraded. Doing full rebuild")
320 # force of a reindex328 self._queue = Queue(0)
321 log.info("Index built without CJK support. Upgrading index")
322 self._queue.put(Reindex(self._engine))329 self._queue.put(Reindex(self._engine))
323 elif self._index.get_doccount() == 0:330 elif self._index.get_doccount() == 0:
324 # If the index is empty we trigger a rebuild331 # If the index is empty we trigger a rebuild
325 # We must delay reindexing until after the engine is done setting up332 # We must delay reindexing until after the engine is done setting up
326 log.info("Empty index detected. Doing full rebuild")333 log.info("Empty index detected. Doing full rebuild")
334 self._queue = Queue(0)
327 self._queue.put(Reindex(self._engine))335 self._queue.put(Reindex(self._engine))
328 else:
329 # If the index doesn't use the zgsu prefix, it must be old-style,
330 # and we must rebuild it
331 query = self._query_parser.parse_query ("zgsu:file*",
332 self.QUERY_PARSER_FLAGS)
333 self._enquire.set_query (query)
334 hits = self._enquire.get_mset (0, 1)
335 hit_count = hits.get_matches_estimated()
336 if hit_count == 0:
337 log.info ("Old index format detected. Rebuilding index.")
338 self._queue.put(Reindex(self._engine))
339 336
340 # Now that we've checked the index from the main thread we can start the worker337 # Now that we've checked the index from the main thread we can start the worker
341 self._worker.start()338 self._worker.start()
@@ -472,10 +469,8 @@
472 self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OVERWRITE)469 self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OVERWRITE)
473 self._query_parser.set_database (self._index)470 self._query_parser.set_database (self._index)
474 self._enquire = xapian.Enquire(self._index)471 self._enquire = xapian.Enquire(self._index)
475 472
476 # Register that this index was built with CJK enabled473 self._index.set_metadata("fts_index_version", INDEX_VERSION)
477 if "XAPIAN_CJK_NGRAM" in os.environ :
478 self._index.set_metadata("cjk_ngram", "1")
479 474
480 log.info("Preparing to rebuild index with %s events" % len(event_list))475 log.info("Preparing to rebuild index with %s events" % len(event_list))
481 for e in event_list : self._queue.put(e)476 for e in event_list : self._queue.put(e)
@@ -607,7 +602,7 @@
607 # usually web URIs, are indexed in another way because there may602 # usually web URIs, are indexed in another way because there may
608 # be domain name etc. in there we want to rank differently603 # be domain name etc. in there we want to rank differently
609 scheme, host, path = self._split_uri (url_unescape (uri))604 scheme, host, path = self._split_uri (url_unescape (uri))
610 if scheme == "file://" or not scheme:605 if scheme == "file" or not scheme:
611 path, name = os.path.split(path)606 path, name = os.path.split(path)
612 self._tokenizer.index_text(name, 5)607 self._tokenizer.index_text(name, 5)
613 self._tokenizer.index_text(name, 5, "N")608 self._tokenizer.index_text(name, 5, "N")
@@ -617,9 +612,9 @@
617 while path and name:612 while path and name:
618 weight = weight / 1.5613 weight = weight / 1.5
619 path, name = os.path.split(path)614 path, name = os.path.split(path)
620 self._tokenizer.index_text(name, weight)615 self._tokenizer.index_text(name, int(weight))
621 616
622 elif scheme == "mailto:":617 elif scheme == "mailto":
623 tokens = host.split("@")618 tokens = host.split("@")
624 name = tokens[0]619 name = tokens[0]
625 self._tokenizer.index_text(name, 6)620 self._tokenizer.index_text(name, 6)

Subscribers

People subscribed via source and target branches

to all changes: