Zeitgeist Framework

Merge lp:~seif/zeitgeist/memory into lp:zeitgeist/0.1

memory
Merge into 0.8-python

Proposed by Seif Lotfy on 2011-06-08

Status:	Merged
Merged at revision:	1767
Proposed branch:	lp:~seif/zeitgeist/memory
Merge into:	lp:zeitgeist/0.1
Diff against target:	195 lines (+31/-20) 5 files modified _zeitgeist/engine/datamodel.py (+1/-1) _zeitgeist/engine/main.py (+12/-11) _zeitgeist/engine/remote.py (+1/-1) _zeitgeist/engine/sql.py (+10/-0) test/engine-test.py (+7/-7)
To merge this branch:	bzr merge lp:~seif/zeitgeist/memory
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
Siegfried Gevatter		2011-06-08	Approve on 2011-06-14
Review via email: mp+63848@code.launchpad.net

Description of the change

Reduce memory consumption by:
1) Using generators
2) disable SQL Cache (no real performance decline)
3) Use arrays for storing ids instead of lists
( 4) use tuples instead of lists when possible)

Results for this is less memory consumption. I think more can be done if we start using slots... But this is a clean hack without messing up the API/ABI

lp:~seif/zeitgeist/memory updated on 2011-06-11

1772. By Seif Lotfy on 2011-06-11: make id_hash use arrays as a default value instead of a list (this reduced memory consumotion by a 0.4 MB for the reproducer.py

Revision history for this message

Siegfried Gevatter (rainct) wrote on 2011-06-14:

Merged, but without the PRAGMA.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Alin Andrei

Manish Sinha (मनीष सिन्हा)

Markus Korn

Natan Yellin

Seif Lotfy

Siegfried Gevatter

Stefano Candori

1	=== modified file '_zeitgeist/engine/datamodel.py'
2	--- _zeitgeist/engine/datamodel.py 2011-01-17 15:54:47 +0000
3	+++ _zeitgeist/engine/datamodel.py 2011-06-11 02:35:46 +0000
4	@@ -78,4 +78,4 @@
5	}.iteritems():
6	for prop in props:
7	datasource[prop] = plaintype(datasource[prop])
8	- return list(datasource)
9	+ return tuple(datasource)
10
11	=== modified file '_zeitgeist/engine/main.py'
12	--- _zeitgeist/engine/main.py 2011-06-04 14:49:19 +0000
13	+++ _zeitgeist/engine/main.py 2011-06-11 02:35:46 +0000
14	@@ -29,6 +29,7 @@
15	import os
16	import logging
17	from collections import defaultdict
18	+from array import array
19
20	from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
21	ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR, WILDCARD
22	@@ -199,8 +200,8 @@
23	return []
24
25	# Split ids into cached and uncached
26	- uncached_ids = []
27	- cached_ids = []
28	+ uncached_ids = array("i")
29	+ cached_ids = array("i")
30
31	# If ids batch greater than MAX_CACHE_BATCH_SIZE ids ignore cache
32	use_cache = True
33	@@ -215,7 +216,7 @@
34	else:
35	uncached_ids.append(id)
36
37	- id_hash = defaultdict(list)
38	+ id_hash = defaultdict(lambda: array("i"))
39	for n, id in enumerate(ids):
40	# the same id can be at multible places (LP: #673916)
41	# cache all of them
42	@@ -238,19 +239,19 @@
43	sorted_events[n] = event
44
45	# Get uncached events
46	- rows = tuple(row for row in self._cursor.execute("""
47	- SELECT * FROM event_view
48	- WHERE id IN (%s)
49	- """ % ",".join("%d" % id for id in uncached_ids)))
50	+ rows = self._cursor.execute(""" SELECT * FROM event_view WHERE id IN (%s)
51	+ """ % ",".join("%d" % id for id in uncached_ids))
52
53	- log.debug("Got %d raw events in %fs" % (len(rows), time.time()-t))
54	+ time_get_uncached = time.time() - t
55	t = time.time()
56
57	t_get_event = 0
58	t_get_subject = 0
59	t_apply_get_hooks = 0
60
61	+ row_counter = 0
62	for row in rows:
63	+ row_counter += 1
64	# Assumption: all rows of a same event for its different
65	# subjects are in consecutive order.
66	t_get_event -= time.time()
67	@@ -286,6 +287,7 @@
68	# at a decent level
69
70
71	+ log.debug("Got %d raw events in %fs" % (row_counter, time_get_uncached))
72	log.debug("Got %d events in %fs" % (len(sorted_events), time.time()-t))
73	log.debug(" Where time spent in _get_event_from_row in %fs" % (t_get_event))
74	log.debug(" Where time spent in _get_subject_from_row in %fs" % (t_get_subject))
75	@@ -561,13 +563,12 @@
76
77	if max_events > 0:
78	sql += " LIMIT %d" % max_events
79	-
80	- result = tuple(r[0] for r in self._cursor.execute(sql, where.arguments))
81	+ result = array("i", self._cursor.execute(sql, where.arguments).fetch(0))
82
83	if return_mode == 0:
84	log.debug("Found %d event IDs in %fs" % (len(result), time.time()- t))
85	elif return_mode == 1:
86	- log.debug("Found %d events IDs in %fs" % (len(result), time.time()- t))
87	+ log.debug("Found %d events in %fs" % (len(result), time.time()- t))
88	result = self.get_events(ids=result, sender=sender)
89	else:
90	raise Exception("%d" % return_mode)
91
92	=== modified file '_zeitgeist/engine/remote.py'
93	--- _zeitgeist/engine/remote.py 2011-06-02 20:15:11 +0000
94	+++ _zeitgeist/engine/remote.py 2011-06-11 02:35:46 +0000
95	@@ -77,7 +77,7 @@
96	for event in events:
97	if event is not None:
98	event._make_dbus_sendable()
99	- return [NULL_EVENT if event is None else event for event in events]
100	+ return tuple(NULL_EVENT if event is None else event for event in events)
101
102	# Reading stuff
103
104
105	=== modified file '_zeitgeist/engine/sql.py'
106	--- _zeitgeist/engine/sql.py 2011-05-18 20:48:13 +0000
107	+++ _zeitgeist/engine/sql.py 2011-06-11 02:35:46 +0000
108	@@ -75,6 +75,14 @@
109	explain_query(super(UnicodeCursor, self), statement, parameters)
110	return super(UnicodeCursor, self).execute(statement, parameters)
111
112	+ def fetch(self, index=-1):
113	+ if index >= 0:
114	+ for row in self:
115	+ yield row[index]
116	+ else:
117	+ for row in self:
118	+ yield row
119	+
120	def _get_schema_version (cursor, schema_name):
121	"""
122	Returns the schema version for schema_name or returns 0 in case
123	@@ -206,6 +214,8 @@
124	# we decided to set locking_mode to EXCLUSIVE, from now on only
125	# one connection to the database is allowed to revert this setting set locking_mode to NORMAL.
126	cursor.execute("PRAGMA locking_mode = EXCLUSIVE")
127	+ # Seif: Disable cache since we already kinda support our own cache (LRUCache)
128	+ cursor.execute("PRAGMA cache_size = 0")
129
130	# thekorn: as part of the workaround for (LP: #598666) we need to
131	# create the '_fix_cache' TEMP table on every start,
132
133	=== modified file 'test/engine-test.py'
134	--- test/engine-test.py 2011-05-07 12:00:54 +0000
135	+++ test/engine-test.py 2011-06-11 02:35:46 +0000
136	@@ -446,7 +446,7 @@
137	event = Event.new_for_values(subjects=[subj1, subj2])
138	orig_ids = self.engine.insert_events([event])
139	result_ids = self.engine.find_eventids(TimeRange.always(), [Event()], StorageState.Any, 0, 1)
140	- self.assertEquals(orig_ids, result_ids)
141	+ self.assertEquals(orig_ids, list(result_ids))
142
143	def testFindEventsEventTemplate(self):
144	import_events("test/data/five_events.js", self.engine)
145	@@ -603,7 +603,7 @@
146	[tmpl], StorageState.Any, 10, ResultType.MostRecentEvents)
147
148	self.assertEquals(1, len(ids))
149	- self.assertEquals(_ids, ids)
150	+ self.assertEquals(_ids, list(ids))
151
152	def testNegation(self):
153	import_events("test/data/five_events.js", self.engine)
154	@@ -1035,7 +1035,7 @@
155	reverse=True
156	)
157	]
158	- self.assertEquals(ids, sorted_event_ids)
159	+ self.assertEquals(list(ids), sorted_event_ids)
160
161	def testResultTypesLeastRecentEvents(self):
162	import_events("test/data/five_events.js", self.engine)
163	@@ -1049,7 +1049,7 @@
164	event.id for event in sorted(events,
165	cmp=lambda x, y: cmp(int(x.timestamp), int(y.timestamp)))
166	]
167	- self.assertEquals(ids, sorted_event_ids)
168	+ self.assertEquals(list(ids), sorted_event_ids)
169
170	def testResultTypesMostPopularActor(self):
171	import_events("test/data/twenty_events.js", self.engine)
172	@@ -1185,20 +1185,20 @@
173	# Get the least recent actors
174	ids = self.engine.find_eventids(TimeRange.always(),
175	[], StorageState.Any, 0, ResultType.OldestActor)
176	- self.assertEquals(ids, [1, 3, 4])
177	+ self.assertEquals(list(ids), [1, 3, 4])
178
179	# Get the least recent actors for "home/boo"
180	template = Event.new_for_values(subject_uri="home/boo")
181	ids = self.engine.find_eventids(TimeRange.always(),
182	[template], StorageState.Any, 0, ResultType.OldestActor)
183	- self.assertEquals(ids, [2])
184	+ self.assertEquals(list(ids), [2])
185
186	# Let's also try the same with MostRecentActor... Although there
187	# should be no problem here.
188	template = Event.new_for_values(subject_uri="home/boo")
189	ids = self.engine.find_eventids(TimeRange.always(),
190	[template], StorageState.Any, 0, ResultType.OldestActor)
191	- self.assertEquals(ids, [2])
192	+ self.assertEquals(list(ids), [2])
193
194	def testResultTypesOldestActor(self):
195	import_events("test/data/twenty_events.js", self.engine)

Zeitgeist Framework

Merge lp:~seif/zeitgeist/memory into lp:zeitgeist/0.1

Commit message

Description of the change

Preview Diff

Subscribers