Merge lp:~seif/zeitgeist/memory into lp:zeitgeist/0.1

Proposed by Seif Lotfy
Status: Merged
Merged at revision: 1767
Proposed branch: lp:~seif/zeitgeist/memory
Merge into: lp:zeitgeist/0.1
Diff against target: 195 lines (+31/-20)
5 files modified
_zeitgeist/engine/datamodel.py (+1/-1)
_zeitgeist/engine/main.py (+12/-11)
_zeitgeist/engine/remote.py (+1/-1)
_zeitgeist/engine/sql.py (+10/-0)
test/engine-test.py (+7/-7)
To merge this branch: bzr merge lp:~seif/zeitgeist/memory
Reviewer Review Type Date Requested Status
Siegfried Gevatter Approve
Review via email: mp+63848@code.launchpad.net

Description of the change

Reduce memory consumption by:
1) Using generators
2) disable SQL Cache (no real performance decline)
3) Use arrays for storing ids instead of lists
( 4) use tuples instead of lists when possible)

Results for this is less memory consumption. I think more can be done if we start using slots... But this is a clean hack without messing up the API/ABI

To post a comment you must log in.
lp:~seif/zeitgeist/memory updated
1772. By Seif Lotfy

make id_hash use arrays as a default value instead of a list (this reduced memory consumotion by a 0.4 MB for the reproducer.py

Revision history for this message
Siegfried Gevatter (rainct) wrote :

Merged, but without the PRAGMA.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file '_zeitgeist/engine/datamodel.py'
2--- _zeitgeist/engine/datamodel.py 2011-01-17 15:54:47 +0000
3+++ _zeitgeist/engine/datamodel.py 2011-06-11 02:35:46 +0000
4@@ -78,4 +78,4 @@
5 }.iteritems():
6 for prop in props:
7 datasource[prop] = plaintype(datasource[prop])
8- return list(datasource)
9+ return tuple(datasource)
10
11=== modified file '_zeitgeist/engine/main.py'
12--- _zeitgeist/engine/main.py 2011-06-04 14:49:19 +0000
13+++ _zeitgeist/engine/main.py 2011-06-11 02:35:46 +0000
14@@ -29,6 +29,7 @@
15 import os
16 import logging
17 from collections import defaultdict
18+from array import array
19
20 from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
21 ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR, WILDCARD
22@@ -199,8 +200,8 @@
23 return []
24
25 # Split ids into cached and uncached
26- uncached_ids = []
27- cached_ids = []
28+ uncached_ids = array("i")
29+ cached_ids = array("i")
30
31 # If ids batch greater than MAX_CACHE_BATCH_SIZE ids ignore cache
32 use_cache = True
33@@ -215,7 +216,7 @@
34 else:
35 uncached_ids.append(id)
36
37- id_hash = defaultdict(list)
38+ id_hash = defaultdict(lambda: array("i"))
39 for n, id in enumerate(ids):
40 # the same id can be at multible places (LP: #673916)
41 # cache all of them
42@@ -238,19 +239,19 @@
43 sorted_events[n] = event
44
45 # Get uncached events
46- rows = tuple(row for row in self._cursor.execute("""
47- SELECT * FROM event_view
48- WHERE id IN (%s)
49- """ % ",".join("%d" % id for id in uncached_ids)))
50+ rows = self._cursor.execute(""" SELECT * FROM event_view WHERE id IN (%s)
51+ """ % ",".join("%d" % id for id in uncached_ids))
52
53- log.debug("Got %d raw events in %fs" % (len(rows), time.time()-t))
54+ time_get_uncached = time.time() - t
55 t = time.time()
56
57 t_get_event = 0
58 t_get_subject = 0
59 t_apply_get_hooks = 0
60
61+ row_counter = 0
62 for row in rows:
63+ row_counter += 1
64 # Assumption: all rows of a same event for its different
65 # subjects are in consecutive order.
66 t_get_event -= time.time()
67@@ -286,6 +287,7 @@
68 # at a decent level
69
70
71+ log.debug("Got %d raw events in %fs" % (row_counter, time_get_uncached))
72 log.debug("Got %d events in %fs" % (len(sorted_events), time.time()-t))
73 log.debug(" Where time spent in _get_event_from_row in %fs" % (t_get_event))
74 log.debug(" Where time spent in _get_subject_from_row in %fs" % (t_get_subject))
75@@ -561,13 +563,12 @@
76
77 if max_events > 0:
78 sql += " LIMIT %d" % max_events
79-
80- result = tuple(r[0] for r in self._cursor.execute(sql, where.arguments))
81+ result = array("i", self._cursor.execute(sql, where.arguments).fetch(0))
82
83 if return_mode == 0:
84 log.debug("Found %d event IDs in %fs" % (len(result), time.time()- t))
85 elif return_mode == 1:
86- log.debug("Found %d events IDs in %fs" % (len(result), time.time()- t))
87+ log.debug("Found %d events in %fs" % (len(result), time.time()- t))
88 result = self.get_events(ids=result, sender=sender)
89 else:
90 raise Exception("%d" % return_mode)
91
92=== modified file '_zeitgeist/engine/remote.py'
93--- _zeitgeist/engine/remote.py 2011-06-02 20:15:11 +0000
94+++ _zeitgeist/engine/remote.py 2011-06-11 02:35:46 +0000
95@@ -77,7 +77,7 @@
96 for event in events:
97 if event is not None:
98 event._make_dbus_sendable()
99- return [NULL_EVENT if event is None else event for event in events]
100+ return tuple(NULL_EVENT if event is None else event for event in events)
101
102 # Reading stuff
103
104
105=== modified file '_zeitgeist/engine/sql.py'
106--- _zeitgeist/engine/sql.py 2011-05-18 20:48:13 +0000
107+++ _zeitgeist/engine/sql.py 2011-06-11 02:35:46 +0000
108@@ -75,6 +75,14 @@
109 explain_query(super(UnicodeCursor, self), statement, parameters)
110 return super(UnicodeCursor, self).execute(statement, parameters)
111
112+ def fetch(self, index=-1):
113+ if index >= 0:
114+ for row in self:
115+ yield row[index]
116+ else:
117+ for row in self:
118+ yield row
119+
120 def _get_schema_version (cursor, schema_name):
121 """
122 Returns the schema version for schema_name or returns 0 in case
123@@ -206,6 +214,8 @@
124 # we decided to set locking_mode to EXCLUSIVE, from now on only
125 # one connection to the database is allowed to revert this setting set locking_mode to NORMAL.
126 cursor.execute("PRAGMA locking_mode = EXCLUSIVE")
127+ # Seif: Disable cache since we already kinda support our own cache (LRUCache)
128+ cursor.execute("PRAGMA cache_size = 0")
129
130 # thekorn: as part of the workaround for (LP: #598666) we need to
131 # create the '_fix_cache' TEMP table on every start,
132
133=== modified file 'test/engine-test.py'
134--- test/engine-test.py 2011-05-07 12:00:54 +0000
135+++ test/engine-test.py 2011-06-11 02:35:46 +0000
136@@ -446,7 +446,7 @@
137 event = Event.new_for_values(subjects=[subj1, subj2])
138 orig_ids = self.engine.insert_events([event])
139 result_ids = self.engine.find_eventids(TimeRange.always(), [Event()], StorageState.Any, 0, 1)
140- self.assertEquals(orig_ids, result_ids)
141+ self.assertEquals(orig_ids, list(result_ids))
142
143 def testFindEventsEventTemplate(self):
144 import_events("test/data/five_events.js", self.engine)
145@@ -603,7 +603,7 @@
146 [tmpl], StorageState.Any, 10, ResultType.MostRecentEvents)
147
148 self.assertEquals(1, len(ids))
149- self.assertEquals(_ids, ids)
150+ self.assertEquals(_ids, list(ids))
151
152 def testNegation(self):
153 import_events("test/data/five_events.js", self.engine)
154@@ -1035,7 +1035,7 @@
155 reverse=True
156 )
157 ]
158- self.assertEquals(ids, sorted_event_ids)
159+ self.assertEquals(list(ids), sorted_event_ids)
160
161 def testResultTypesLeastRecentEvents(self):
162 import_events("test/data/five_events.js", self.engine)
163@@ -1049,7 +1049,7 @@
164 event.id for event in sorted(events,
165 cmp=lambda x, y: cmp(int(x.timestamp), int(y.timestamp)))
166 ]
167- self.assertEquals(ids, sorted_event_ids)
168+ self.assertEquals(list(ids), sorted_event_ids)
169
170 def testResultTypesMostPopularActor(self):
171 import_events("test/data/twenty_events.js", self.engine)
172@@ -1185,20 +1185,20 @@
173 # Get the least recent actors
174 ids = self.engine.find_eventids(TimeRange.always(),
175 [], StorageState.Any, 0, ResultType.OldestActor)
176- self.assertEquals(ids, [1, 3, 4])
177+ self.assertEquals(list(ids), [1, 3, 4])
178
179 # Get the least recent actors for "home/boo"
180 template = Event.new_for_values(subject_uri="home/boo")
181 ids = self.engine.find_eventids(TimeRange.always(),
182 [template], StorageState.Any, 0, ResultType.OldestActor)
183- self.assertEquals(ids, [2])
184+ self.assertEquals(list(ids), [2])
185
186 # Let's also try the same with MostRecentActor... Although there
187 # should be no problem here.
188 template = Event.new_for_values(subject_uri="home/boo")
189 ids = self.engine.find_eventids(TimeRange.always(),
190 [template], StorageState.Any, 0, ResultType.OldestActor)
191- self.assertEquals(ids, [2])
192+ self.assertEquals(list(ids), [2])
193
194 def testResultTypesOldestActor(self):
195 import_events("test/data/twenty_events.js", self.engine)

Subscribers

People subscribed via source and target branches