Merge lp:~thekorn/zeitgeist/db_schema_3 into lp:zeitgeist/0.1

Proposed by Markus Korn
Status: Merged
Merged at revision: 1635
Proposed branch: lp:~thekorn/zeitgeist/db_schema_3
Merge into: lp:zeitgeist/0.1
Diff against target: 123 lines (+32/-16)
5 files modified
_zeitgeist/engine/__init__.py (+1/-1)
_zeitgeist/engine/main.py (+9/-5)
_zeitgeist/engine/sql.py (+9/-8)
_zeitgeist/engine/upgrades/core_2_3.py (+11/-0)
test/sql-test.py (+2/-2)
To merge this branch: bzr merge lp:~thekorn/zeitgeist/db_schema_3
Reviewer Review Type Date Requested Status
Siegfried Gevatter Approve
Seif Lotfy Approve
Mikkel Kamstrup Erlandsen Pending
Review via email: mp+40877@code.launchpad.net

Description of the change

This branch introduces a new db schema (version 3) and bundles a few performance related fixes, [0] compares the performance of this branch lp:zeitgeist (each query is run ~10 times, the plot shows the average).
Changes in detail:
 * renamed event_view.subj_origin to event_view.subj_origin_uri and event_view.subj_origin_id to event_view.subj_origin, this makes "subj_origin" point to an id in the uri table, and "subj_origin_uri" the resolved value. Also we are not using the redundant "SELECT id FROM uri WHERE uri.id=event.subj_origin" query anymore. (LP: #673452)
 * removed event_view.subj_uri_id and added event.subj_id, both values are in fact the same, but we again sting on another "SELECT" query in the event_view.
 * Optimization in FindEvent queries: we are not ordering and grouping by resolved values anymore, we are doing it on integer ids, which is much faster (LP: #673394)

[0] https://dl.dropbox.com/u/174479/compare_queries.svg

To post a comment you must log in.
lp:~thekorn/zeitgeist/db_schema_3 updated
1637. By Markus Korn

fixed typo

1638. By Markus Korn

fixed another comment

Revision history for this message
Seif Lotfy (seif) wrote :

Looks good, makes sense, and works perfectly.
AWESOME JOB. +1

review: Approve
Revision history for this message
Siegfried Gevatter (rainct) wrote :

+1

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file '_zeitgeist/engine/__init__.py'
--- _zeitgeist/engine/__init__.py 2010-10-19 13:54:12 +0000
+++ _zeitgeist/engine/__init__.py 2010-11-15 15:32:17 +0000
@@ -55,7 +55,7 @@
55 55
56 # Required version of DB schema56 # Required version of DB schema
57 CORE_SCHEMA="core"57 CORE_SCHEMA="core"
58 CORE_SCHEMA_VERSION = 258 CORE_SCHEMA_VERSION = 3
59 59
60 USER_EXTENSION_PATH = os.path.join(DATA_PATH, "extensions")60 USER_EXTENSION_PATH = os.path.join(DATA_PATH, "extensions")
6161
6262
=== modified file '_zeitgeist/engine/main.py'
--- _zeitgeist/engine/main.py 2010-11-12 17:39:28 +0000
+++ _zeitgeist/engine/main.py 2010-11-15 15:32:17 +0000
@@ -148,8 +148,9 @@
148 148
149 def _get_subject_from_row(self, row):149 def _get_subject_from_row(self, row):
150 subject = Subject()150 subject = Subject()
151 for field in ("uri", "origin", "text", "storage"):151 for field in ("uri", "text", "storage"):
152 setattr(subject, field, row["subj_" + field])152 setattr(subject, field, row["subj_" + field])
153 setattr(subject, "origin", row["subj_origin_uri"])
153 for field in ("interpretation", "manifestation", "mimetype"):154 for field in ("interpretation", "manifestation", "mimetype"):
154 setattr(subject, field,155 setattr(subject, field,
155 getattr(self, "_" + field).value(row["subj_" + field]))156 getattr(self, "_" + field).value(row["subj_" + field]))
@@ -353,10 +354,13 @@
353 354
354 sql += (" ORDER BY timestamp DESC",355 sql += (" ORDER BY timestamp DESC",
355 " ORDER BY timestamp ASC",356 " ORDER BY timestamp ASC",
356 " GROUP BY subj_uri ORDER BY timestamp DESC",357 # thekorn: please note, event.subj_id == uri.id, as in
357 " GROUP BY subj_uri ORDER BY timestamp ASC",358 # the subj_id points directly to an entry in the uri table,
358 " GROUP BY subj_uri ORDER BY COUNT(subj_uri) DESC, timestamp DESC",359 # so we are in fact grouping by subj_uris here
359 " GROUP BY subj_uri ORDER BY COUNT(subj_uri) ASC, timestamp ASC",360 " GROUP BY subj_id ORDER BY timestamp DESC",
361 " GROUP BY subj_id ORDER BY timestamp ASC",
362 " GROUP BY subj_id ORDER BY COUNT(subj_id) DESC, timestamp DESC",
363 " GROUP BY subj_id ORDER BY COUNT(subj_id) ASC, timestamp ASC",
360 " GROUP BY actor ORDER BY COUNT(actor) DESC, timestamp DESC", 364 " GROUP BY actor ORDER BY COUNT(actor) DESC, timestamp DESC",
361 " GROUP BY actor ORDER BY COUNT(actor) ASC, timestamp ASC",365 " GROUP BY actor ORDER BY COUNT(actor) ASC, timestamp ASC",
362 " GROUP BY actor ORDER BY timestamp DESC",366 " GROUP BY actor ORDER BY timestamp DESC",
363367
=== modified file '_zeitgeist/engine/sql.py'
--- _zeitgeist/engine/sql.py 2010-10-25 20:26:03 +0000
+++ _zeitgeist/engine/sql.py 2010-11-15 15:32:17 +0000
@@ -369,14 +369,12 @@
369 AS payload,369 AS payload,
370 (SELECT value FROM uri WHERE uri.id=event.subj_id)370 (SELECT value FROM uri WHERE uri.id=event.subj_id)
371 AS subj_uri,371 AS subj_uri,
372 (SELECT id FROM uri WHERE uri.id=event.subj_id)372 event.subj_id, -- #this directly points to an id in the uri table
373 AS subj_uri_id,
374 event.subj_interpretation,373 event.subj_interpretation,
375 event.subj_manifestation,374 event.subj_manifestation,
375 event.subj_origin,
376 (SELECT value FROM uri WHERE uri.id=event.subj_origin)376 (SELECT value FROM uri WHERE uri.id=event.subj_origin)
377 AS subj_origin,377 AS subj_origin_uri,
378 (SELECT id FROM uri WHERE uri.id=event.subj_origin)
379 AS subj_origin_id,
380 event.subj_mimetype,378 event.subj_mimetype,
381 (SELECT value FROM text WHERE text.id = event.subj_text)379 (SELECT value FROM text WHERE text.id = event.subj_text)
382 AS subj_text,380 AS subj_text,
@@ -528,14 +526,17 @@
528 def add_text_condition(self, column, value, like=False, negation=False, cache=None):526 def add_text_condition(self, column, value, like=False, negation=False, cache=None):
529 if like:527 if like:
530 assert column in ("subj_uri", "subj_origin", "actor", "subj_mimetype"), \528 assert column in ("subj_uri", "subj_origin", "actor", "subj_mimetype"), \
531 "prefix search on the %r column is not supported by zeitgeist"529 "prefix search on the %r column is not supported by zeitgeist" %column
532 if column in ("subj_uri", "subj_origin"):530 if column == "subj_uri":
533 view_column = "%s_id" %column531 # subj_id directly points to the id of an uri entry
532 view_column = "subj_id"
534 else:533 else:
535 view_column = column534 view_column = column
536 optimized_glob, value = self.optimize_glob("id", TABLE_MAP.get(column, column), value)535 optimized_glob, value = self.optimize_glob("id", TABLE_MAP.get(column, column), value)
537 sql = "%s %sIN (%s)" %(view_column, self.NOT if negation else "", optimized_glob)536 sql = "%s %sIN (%s)" %(view_column, self.NOT if negation else "", optimized_glob)
538 else:537 else:
538 if column == "subj_origin":
539 column = "subj_origin_uri"
539 sql = "%s %s= ?" %(column, "!" if negation else "")540 sql = "%s %s= ?" %(column, "!" if negation else "")
540 if cache is not None:541 if cache is not None:
541 value = cache[value]542 value = cache[value]
542543
=== added file '_zeitgeist/engine/upgrades/core_2_3.py'
--- _zeitgeist/engine/upgrades/core_2_3.py 1970-01-01 00:00:00 +0000
+++ _zeitgeist/engine/upgrades/core_2_3.py 2010-11-15 15:32:17 +0000
@@ -0,0 +1,11 @@
1# upgrading from db version 2 to 3
2# this requires no update to the actual data in the database
3# it is only a schema change of event_view. This change is done
4# in sql.create_db()
5
6# the schema change is renaming 'subj_uri_id' column to 'subj_id', as
7# both values are the same. Also 'subj_origin' gets renamed to
8# 'subj_origin_uri' and 'subj_origin_id' to 'subj_origin'.
9
10def run(cursor):
11 pass
012
=== modified file 'test/sql-test.py'
--- test/sql-test.py 2010-09-22 17:15:03 +0000
+++ test/sql-test.py 2010-11-15 15:32:17 +0000
@@ -97,12 +97,12 @@
97 where = WhereClause(WhereClause.AND)97 where = WhereClause(WhereClause.AND)
98 where.add_text_condition("subj_uri", "bar", like=True)98 where.add_text_condition("subj_uri", "bar", like=True)
99 self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),99 self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),
100 "(subj_uri_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")100 "(subj_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")
101 101
102 where = WhereClause(WhereClause.AND)102 where = WhereClause(WhereClause.AND)
103 where.add_text_condition("subj_origin", "bar", like=True)103 where.add_text_condition("subj_origin", "bar", like=True)
104 self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),104 self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),
105 "(subj_origin_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")105 "(subj_origin IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")
106 106
107 where = WhereClause(WhereClause.AND)107 where = WhereClause(WhereClause.AND)
108 where.add_text_condition("actor", "bar", like=True, negation=True)108 where.add_text_condition("actor", "bar", like=True, negation=True)

Subscribers

People subscribed via source and target branches