Merge lp:~mwhudson/loggerhead/persist-_rev_info into lp:~mwhudson/loggerhead/less-stupid-whole-history-redundancy

Proposed by Michael Hudson-Doyle
Status: Merged
Merge reported by: Michael Hudson-Doyle
Merged at revision: not available
Proposed branch: lp:~mwhudson/loggerhead/persist-_rev_info
Merge into: lp:~mwhudson/loggerhead/less-stupid-whole-history-redundancy
Diff against target: None lines
To merge this branch: bzr merge lp:~mwhudson/loggerhead/persist-_rev_info
Reviewer Review Type Date Requested Status
Paul Hummer (community) Approve
Review via email: mp+5854@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Paul Hummer (rockstar) wrote :

Thanks for documenting the whole history data structures.

review: Approve
Revision history for this message
Matt Nordhoff (mnordhoff) wrote :

It's not a big deal, but could somebody make this merge request... go
away? This branch wasn't merged into
less-stupid-whole-history-redundancy, but it *was* merged into
lp:loggerhead, so for all intents and purposes it's been dealt with. But
this request is still open, so it's still listed on the merge request
summary page.
--

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'loggerhead/apps/branch.py'
--- loggerhead/apps/branch.py 2009-04-07 18:29:10 +0000
+++ loggerhead/apps/branch.py 2009-04-24 10:23:44 +0000
@@ -38,27 +38,31 @@
38 self.branch_link = branch_link # Currently only used in Launchpad38 self.branch_link = branch_link # Currently only used in Launchpad
39 self.log = logging.getLogger('loggerhead.%s' % friendly_name)39 self.log = logging.getLogger('loggerhead.%s' % friendly_name)
40 if graph_cache is None:40 if graph_cache is None:
41 graph_cache = bzrlib.lru_cache.LRUCache()41 graph_cache = bzrlib.lru_cache.LRUCache(10)
42 self.graph_cache = graph_cache42 self.graph_cache = graph_cache
43 self.is_root = is_root43 self.is_root = is_root
44 self.served_url = served_url44 self.served_url = served_url
45 self.use_cdn = use_cdn45 self.use_cdn = use_cdn
4646
47 def get_history(self):47 def get_history(self):
48 _history = History(self.branch, self.graph_cache)48 file_cache = None
49 revinfo_disk_cache = None
49 cache_path = self._config.get('cachepath', None)50 cache_path = self._config.get('cachepath', None)
50 if cache_path is not None:51 if cache_path is not None:
51 # Only import the cache if we're going to use it.52 # Only import the cache if we're going to use it.
52 # This makes sqlite optional53 # This makes sqlite optional
53 try:54 try:
54 from loggerhead.changecache import FileChangeCache55 from loggerhead.changecache import (
56 FileChangeCache, RevInfoDiskCache)
55 except ImportError:57 except ImportError:
56 self.log.debug("Couldn't load python-sqlite,"58 self.log.debug("Couldn't load python-sqlite,"
57 " continuing without using a cache")59 " continuing without using a cache")
58 else:60 else:
59 _history.use_file_cache(61 file_cache = FileChangeCache(cache_path)
60 FileChangeCache(_history, cache_path))62 revinfo_disk_cache = RevInfoDiskCache(cache_path)
61 return _history63 return History(
64 self.branch, self.graph_cache, file_cache=file_cache,
65 revinfo_disk_cache=revinfo_disk_cache, cache_key=self.friendly_name)
6266
63 def url(self, *args, **kw):67 def url(self, *args, **kw):
64 if isinstance(args[0], list):68 if isinstance(args[0], list):
6569
=== modified file 'loggerhead/apps/filesystem.py'
--- loggerhead/apps/filesystem.py 2009-04-23 23:39:18 +0000
+++ loggerhead/apps/filesystem.py 2009-04-24 10:23:44 +0000
@@ -71,7 +71,7 @@
71class BranchesFromFileSystemRoot(object):71class BranchesFromFileSystemRoot(object):
7272
73 def __init__(self, folder, config):73 def __init__(self, folder, config):
74 self.graph_cache = lru_cache.LRUCache()74 self.graph_cache = lru_cache.LRUCache(10)
75 self.folder = folder75 self.folder = folder
76 self._config = config76 self._config = config
7777
@@ -94,7 +94,7 @@
94class UserBranchesFromFileSystemRoot(object):94class UserBranchesFromFileSystemRoot(object):
9595
96 def __init__(self, folder, config):96 def __init__(self, folder, config):
97 self.graph_cache = lru_cache.LRUCache()97 self.graph_cache = lru_cache.LRUCache(10)
98 self.folder = folder98 self.folder = folder
99 self._config = config99 self._config = config
100 self.trunk_dir = config.get_option('trunk_dir')100 self.trunk_dir = config.get_option('trunk_dir')
101101
=== modified file 'loggerhead/changecache.py'
--- loggerhead/changecache.py 2009-03-19 21:01:11 +0000
+++ loggerhead/changecache.py 2009-04-24 10:30:15 +0000
@@ -27,8 +27,10 @@
27"""27"""
2828
29import cPickle29import cPickle
30import marshal
30import os31import os
31import tempfile32import tempfile
33import zlib
3234
33try:35try:
34 from sqlite3 import dbapi236 from sqlite3 import dbapi2
@@ -38,16 +40,25 @@
38# We take an optimistic approach to concurrency here: we might do work twice40# We take an optimistic approach to concurrency here: we might do work twice
39# in the case of races, but not crash or corrupt data.41# in the case of races, but not crash or corrupt data.
4042
43def safe_init_db(filename, init_sql):
44 # To avoid races around creating the database, we create the db in
45 # a temporary file and rename it into the ultimate location.
46 fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(filename))
47 con = dbapi2.connect(temp_path)
48 cur = con.cursor()
49 cur.execute(init_sql)
50 con.commit()
51 con.close()
52 os.rename(temp_path, filename)
53
41class FakeShelf(object):54class FakeShelf(object):
4255
43 def __init__(self, filename):56 def __init__(self, filename):
44 create_table = not os.path.exists(filename)57 create_table = not os.path.exists(filename)
45 if create_table:58 if create_table:
46 # To avoid races around creating the database, we create the db in59 safe_init_db(
47 # a temporary file and rename it into the ultimate location.60 filename, "create table RevisionData "
48 fd, path = tempfile.mkstemp(dir=os.path.dirname(filename))61 "(revid binary primary key, data binary)")
49 self._create_table(path)
50 os.rename(path, filename)
51 self.connection = dbapi2.connect(filename)62 self.connection = dbapi2.connect(filename)
52 self.cursor = self.connection.cursor()63 self.cursor = self.connection.cursor()
5364
@@ -89,8 +100,7 @@
89100
90class FileChangeCache(object):101class FileChangeCache(object):
91102
92 def __init__(self, history, cache_path):103 def __init__(self, cache_path):
93 self.history = history
94104
95 if not os.path.exists(cache_path):105 if not os.path.exists(cache_path):
96 os.mkdir(cache_path)106 os.mkdir(cache_path)
@@ -104,3 +114,44 @@
104 changes = self.history.get_file_changes_uncached(entry)114 changes = self.history.get_file_changes_uncached(entry)
105 cache.add(entry.revid, changes)115 cache.add(entry.revid, changes)
106 return changes116 return changes
117
118
119class RevInfoDiskCache(object):
120 """Like `RevInfoMemoryCache` but backed in a sqlite DB."""
121
122 def __init__(self, cache_path):
123 if not os.path.exists(cache_path):
124 os.mkdir(cache_path)
125 filename = os.path.join(cache_path, 'revinfo.sql')
126 create_table = not os.path.exists(filename)
127 if create_table:
128 safe_init_db(
129 filename, "create table Data "
130 "(key binary primary key, revid binary, data binary)")
131 self.connection = dbapi2.connect(filename)
132 self.cursor = self.connection.cursor()
133
134 def get(self, key, revid):
135 self.cursor.execute(
136 "select revid, data from data where key = ?", (dbapi2.Binary(key),))
137 row = self.cursor.fetchone()
138 if row is None:
139 return None
140 elif str(row[0]) != revid:
141 return None
142 else:
143 return marshal.loads(zlib.decompress(row[1]))
144
145 def set(self, key, revid, data):
146 try:
147 self.cursor.execute(
148 'delete from data where key = ?', (dbapi2.Binary(key), ))
149 blob = zlib.compress(marshal.dumps(data))
150 self.cursor.execute(
151 "insert into data (key, revid, data) values (?, ?, ?)",
152 map(dbapi2.Binary, [key, revid, blob]))
153 self.connection.commit()
154 except dbapi2.IntegrityError:
155 # If another thread or process attempted to set the same key, we
156 # don't care too much -- it's only a cache after all!
157 pass
107158
=== modified file 'loggerhead/history.py'
--- loggerhead/history.py 2009-04-24 03:44:30 +0000
+++ loggerhead/history.py 2009-04-24 10:30:15 +0000
@@ -31,6 +31,7 @@
31import bisect31import bisect
32import datetime32import datetime
33import logging33import logging
34import marshal
34import re35import re
35import textwrap36import textwrap
36import threading37import threading
@@ -47,6 +48,7 @@
47import bzrlib.delta48import bzrlib.delta
48import bzrlib.diff49import bzrlib.diff
49import bzrlib.errors50import bzrlib.errors
51import bzrlib.lru_cache
50import bzrlib.progress52import bzrlib.progress
51import bzrlib.revision53import bzrlib.revision
52import bzrlib.textfile54import bzrlib.textfile
@@ -177,6 +179,43 @@
177 file_id=file_id))179 file_id=file_id))
178180
179181
182class RevInfoMemoryCache(object):
183 """A store that validates values against the revids they were stored with.
184
185 We use a unique key for each branch.
186
187 The reason for not just using the revid as the key is so that when a new
188 value is provided for a branch, we replace the old value used for the
189 branch.
190
191 There is another implementation of the same interface in
192 loggerhead.changecache.RevInfoDiskCache.
193 """
194
195 def __init__(self, cache):
196 self._cache = cache
197
198 def get(self, key, revid):
199 """Return the data associated with `key`, subject to a revid check.
200
201 If a value was stored under `key`, with the same revid, return it.
202 Otherwise return None.
203 """
204 cached = self._cache.get(key)
205 if cached is None:
206 return None
207 stored_revid, data = cached
208 if revid == stored_revid:
209 return data
210 else:
211 return None
212
213 def set(self, key, revid, data):
214 """Store `data` under `key`, to be checked against `revid` on get().
215 """
216 self._cache[key] = (revid, data)
217
218
180class History (object):219class History (object):
181 """Decorate a branch to provide information for rendering.220 """Decorate a branch to provide information for rendering.
182221
@@ -185,13 +224,76 @@
185 around it, serve the request, throw the History object away, unlock the224 around it, serve the request, throw the History object away, unlock the
186 branch and throw it away.225 branch and throw it away.
187226
188 :ivar _file_change_cache: xx227 :ivar _file_change_cache: An object that caches information about the
228 files that changed between two revisions.
229 :ivar _rev_info: A list of information about revisions. This is by far
230 the most cryptic data structure in loggerhead. At the top level, it
231 is a list of 3-tuples [(merge-info, where-merged, parents)].
232 `merge-info` is (seq, revid, merge_depth, revno_str, end_of_merge) --
233 like a merged sorted list, but the revno is stringified.
234 `where-merged` is a tuple of revisions that have this revision as a
235 non-lefthand parent. Finally, `parents` is just the usual list of
236 parents of this revision.
237 :ivar _rev_indices: A dictionary mapping each revision id to the index of
238 the information about it in _rev_info.
239 :ivar _full_history: A list of all revision ids in the ancestry of the
240 branch, in merge-sorted order. This is a bit silly, and shouldn't
241 really be stored on the instance...
242 :ivar _revno_revid: A dictionary mapping stringified revnos to revision
243 ids.
189 """244 """
190245
191 def __init__(self, branch, whole_history_data_cache):246 def _load_whole_history_data(self, caches, cache_key):
247 """Set the attributes relating to the whole history of the branch.
248
249 :param caches: a list of caches with interfaces like
250 `RevInfoMemoryCache` and be ordered from fastest to slowest.
251 :param cache_key: the key to use with the caches.
252 """
253 self._rev_indices = None
254 self._rev_info = None
255
256 missed_caches = []
257 def update_missed_caches():
258 for cache in missed_caches:
259 cache.set(cache_key, self.last_revid, self._rev_info)
260 for cache in caches:
261 data = cache.get(cache_key, self.last_revid)
262 if data is not None:
263 self._rev_info = data
264 update_missed_caches()
265 break
266 else:
267 missed_caches.append(cache)
268 else:
269 whole_history_data = compute_whole_history_data(self._branch)
270 self._rev_info, self._rev_indices = whole_history_data
271 update_missed_caches()
272
273 if self._rev_indices is not None:
274 self._full_history = []
275 self._revno_revid = {}
276 for ((_, revid, _, revno_str, _), _, _) in self._rev_info:
277 self._revno_revid[revno_str] = revid
278 self._full_history.append(revid)
279 else:
280 self._full_history = []
281 self._revno_revid = {}
282 self._rev_indices = {}
283 for ((seq, revid, _, revno_str, _), _, _) in self._rev_info:
284 self._rev_indices[revid] = seq
285 self._revno_revid[revno_str] = revid
286 self._full_history.append(revid)
287
288 def __init__(self, branch, whole_history_data_cache, file_cache=None,
289 revinfo_disk_cache=None, cache_key=None):
192 assert branch.is_locked(), (290 assert branch.is_locked(), (
193 "Can only construct a History object with a read-locked branch.")291 "Can only construct a History object with a read-locked branch.")
194 self._file_change_cache = None292 if file_cache is not None:
293 self._file_change_cache = file_cache
294 file_cache.history = self
295 else:
296 self._file_change_cache = None
195 self._branch = branch297 self._branch = branch
196 self._inventory_cache = {}298 self._inventory_cache = {}
197 self._branch_nick = self._branch.get_config().get_nickname()299 self._branch_nick = self._branch.get_config().get_nickname()
@@ -199,28 +301,10 @@
199301
200 self.last_revid = branch.last_revision()302 self.last_revid = branch.last_revision()
201303
202 cached_whole_history_data = whole_history_data_cache.get(self.last_revid)304 caches = [RevInfoMemoryCache(whole_history_data_cache)]
203 if cached_whole_history_data is None:305 if revinfo_disk_cache:
204 whole_history_data = compute_whole_history_data(branch)306 caches.append(revinfo_disk_cache)
205 (self._rev_info, self._rev_indices) = whole_history_data307 self._load_whole_history_data(caches, cache_key)
206 whole_history_data_cache[self.last_revid] = whole_history_data[0]
207 self._full_history = []
208 self._revno_revid = {}
209 for ((_, revid, _, revno_str, _), _, _) in self._rev_info:
210 self._revno_revid[revno_str] = revid
211 self._full_history.append(revid)
212 else:
213 self._rev_info = cached_whole_history_data
214 self._full_history = []
215 self._revno_revid = {}
216 self._rev_indices = {}
217 for ((seq, revid, _, revno_str, _), _, _) in self._rev_info:
218 self._rev_indices[revid] = seq
219 self._revno_revid[revno_str] = revid
220 self._full_history.append(revid)
221
222 def use_file_cache(self, cache):
223 self._file_change_cache = cache
224308
225 @property309 @property
226 def has_revisions(self):310 def has_revisions(self):
227311
=== modified file 'loggerhead/wholehistory.py'
--- loggerhead/wholehistory.py 2009-04-24 03:12:28 +0000
+++ loggerhead/wholehistory.py 2009-04-24 10:30:15 +0000
@@ -37,6 +37,10 @@
3737
3838
39def compute_whole_history_data(branch):39def compute_whole_history_data(branch):
40 """Compute _rev_info and _rev_indices for a branch.
41
42 See History.__doc__ for what these data structures mean.
43 """
40 z = time.time()44 z = time.time()
4145
42 last_revid = branch.last_revision()46 last_revid = branch.last_revision()

Subscribers

People subscribed via source and target branches

to all changes: