Merge lp:~mwhudson/loggerhead/persist-_rev_info into lp:~mwhudson/loggerhead/less-stupid-whole-history-redundancy
- persist-_rev_info
- Merge into less-stupid-whole-history-...
Proposed by
Michael Hudson-Doyle
Status: | Merged |
---|---|
Merge reported by: | Michael Hudson-Doyle |
Merged at revision: | not available |
Proposed branch: | lp:~mwhudson/loggerhead/persist-_rev_info |
Merge into: | lp:~mwhudson/loggerhead/less-stupid-whole-history-redundancy |
Diff against target: | None lines |
To merge this branch: | bzr merge lp:~mwhudson/loggerhead/persist-_rev_info |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Paul Hummer (community) | Approve | ||
Review via email: mp+5854@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Matt Nordhoff (mnordhoff) wrote : | # |
It's not a big deal, but could somebody make this merge request... go
away? This branch wasn't merged into
less-stupid-
lp:loggerhead, so for all intents and purposes it's been dealt with. But
this request is still open, so it's still listed on the merge request
summary page.
--
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'loggerhead/apps/branch.py' |
2 | --- loggerhead/apps/branch.py 2009-04-07 18:29:10 +0000 |
3 | +++ loggerhead/apps/branch.py 2009-04-24 10:23:44 +0000 |
4 | @@ -38,27 +38,31 @@ |
5 | self.branch_link = branch_link # Currently only used in Launchpad |
6 | self.log = logging.getLogger('loggerhead.%s' % friendly_name) |
7 | if graph_cache is None: |
8 | - graph_cache = bzrlib.lru_cache.LRUCache() |
9 | + graph_cache = bzrlib.lru_cache.LRUCache(10) |
10 | self.graph_cache = graph_cache |
11 | self.is_root = is_root |
12 | self.served_url = served_url |
13 | self.use_cdn = use_cdn |
14 | |
15 | def get_history(self): |
16 | - _history = History(self.branch, self.graph_cache) |
17 | + file_cache = None |
18 | + revinfo_disk_cache = None |
19 | cache_path = self._config.get('cachepath', None) |
20 | if cache_path is not None: |
21 | # Only import the cache if we're going to use it. |
22 | # This makes sqlite optional |
23 | try: |
24 | - from loggerhead.changecache import FileChangeCache |
25 | + from loggerhead.changecache import ( |
26 | + FileChangeCache, RevInfoDiskCache) |
27 | except ImportError: |
28 | self.log.debug("Couldn't load python-sqlite," |
29 | " continuing without using a cache") |
30 | else: |
31 | - _history.use_file_cache( |
32 | - FileChangeCache(_history, cache_path)) |
33 | - return _history |
34 | + file_cache = FileChangeCache(cache_path) |
35 | + revinfo_disk_cache = RevInfoDiskCache(cache_path) |
36 | + return History( |
37 | + self.branch, self.graph_cache, file_cache=file_cache, |
38 | + revinfo_disk_cache=revinfo_disk_cache, cache_key=self.friendly_name) |
39 | |
40 | def url(self, *args, **kw): |
41 | if isinstance(args[0], list): |
42 | |
43 | === modified file 'loggerhead/apps/filesystem.py' |
44 | --- loggerhead/apps/filesystem.py 2009-04-23 23:39:18 +0000 |
45 | +++ loggerhead/apps/filesystem.py 2009-04-24 10:23:44 +0000 |
46 | @@ -71,7 +71,7 @@ |
47 | class BranchesFromFileSystemRoot(object): |
48 | |
49 | def __init__(self, folder, config): |
50 | - self.graph_cache = lru_cache.LRUCache() |
51 | + self.graph_cache = lru_cache.LRUCache(10) |
52 | self.folder = folder |
53 | self._config = config |
54 | |
55 | @@ -94,7 +94,7 @@ |
56 | class UserBranchesFromFileSystemRoot(object): |
57 | |
58 | def __init__(self, folder, config): |
59 | - self.graph_cache = lru_cache.LRUCache() |
60 | + self.graph_cache = lru_cache.LRUCache(10) |
61 | self.folder = folder |
62 | self._config = config |
63 | self.trunk_dir = config.get_option('trunk_dir') |
64 | |
65 | === modified file 'loggerhead/changecache.py' |
66 | --- loggerhead/changecache.py 2009-03-19 21:01:11 +0000 |
67 | +++ loggerhead/changecache.py 2009-04-24 10:30:15 +0000 |
68 | @@ -27,8 +27,10 @@ |
69 | """ |
70 | |
71 | import cPickle |
72 | +import marshal |
73 | import os |
74 | import tempfile |
75 | +import zlib |
76 | |
77 | try: |
78 | from sqlite3 import dbapi2 |
79 | @@ -38,16 +40,25 @@ |
80 | # We take an optimistic approach to concurrency here: we might do work twice |
81 | # in the case of races, but not crash or corrupt data. |
82 | |
83 | +def safe_init_db(filename, init_sql): |
84 | + # To avoid races around creating the database, we create the db in |
85 | + # a temporary file and rename it into the ultimate location. |
86 | + fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(filename)) |
87 | + con = dbapi2.connect(temp_path) |
88 | + cur = con.cursor() |
89 | + cur.execute(init_sql) |
90 | + con.commit() |
91 | + con.close() |
92 | + os.rename(temp_path, filename) |
93 | + |
94 | class FakeShelf(object): |
95 | |
96 | def __init__(self, filename): |
97 | create_table = not os.path.exists(filename) |
98 | if create_table: |
99 | - # To avoid races around creating the database, we create the db in |
100 | - # a temporary file and rename it into the ultimate location. |
101 | - fd, path = tempfile.mkstemp(dir=os.path.dirname(filename)) |
102 | - self._create_table(path) |
103 | - os.rename(path, filename) |
104 | + safe_init_db( |
105 | + filename, "create table RevisionData " |
106 | + "(revid binary primary key, data binary)") |
107 | self.connection = dbapi2.connect(filename) |
108 | self.cursor = self.connection.cursor() |
109 | |
110 | @@ -89,8 +100,7 @@ |
111 | |
112 | class FileChangeCache(object): |
113 | |
114 | - def __init__(self, history, cache_path): |
115 | - self.history = history |
116 | + def __init__(self, cache_path): |
117 | |
118 | if not os.path.exists(cache_path): |
119 | os.mkdir(cache_path) |
120 | @@ -104,3 +114,44 @@ |
121 | changes = self.history.get_file_changes_uncached(entry) |
122 | cache.add(entry.revid, changes) |
123 | return changes |
124 | + |
125 | + |
126 | +class RevInfoDiskCache(object): |
127 | + """Like `RevInfoMemoryCache` but backed in a sqlite DB.""" |
128 | + |
129 | + def __init__(self, cache_path): |
130 | + if not os.path.exists(cache_path): |
131 | + os.mkdir(cache_path) |
132 | + filename = os.path.join(cache_path, 'revinfo.sql') |
133 | + create_table = not os.path.exists(filename) |
134 | + if create_table: |
135 | + safe_init_db( |
136 | + filename, "create table Data " |
137 | + "(key binary primary key, revid binary, data binary)") |
138 | + self.connection = dbapi2.connect(filename) |
139 | + self.cursor = self.connection.cursor() |
140 | + |
141 | + def get(self, key, revid): |
142 | + self.cursor.execute( |
143 | + "select revid, data from data where key = ?", (dbapi2.Binary(key),)) |
144 | + row = self.cursor.fetchone() |
145 | + if row is None: |
146 | + return None |
147 | + elif str(row[0]) != revid: |
148 | + return None |
149 | + else: |
150 | + return marshal.loads(zlib.decompress(row[1])) |
151 | + |
152 | + def set(self, key, revid, data): |
153 | + try: |
154 | + self.cursor.execute( |
155 | + 'delete from data where key = ?', (dbapi2.Binary(key), )) |
156 | + blob = zlib.compress(marshal.dumps(data)) |
157 | + self.cursor.execute( |
158 | + "insert into data (key, revid, data) values (?, ?, ?)", |
159 | + map(dbapi2.Binary, [key, revid, blob])) |
160 | + self.connection.commit() |
161 | + except dbapi2.IntegrityError: |
162 | + # If another thread or process attempted to set the same key, we |
163 | + # don't care too much -- it's only a cache after all! |
164 | + pass |
165 | |
166 | === modified file 'loggerhead/history.py' |
167 | --- loggerhead/history.py 2009-04-24 03:44:30 +0000 |
168 | +++ loggerhead/history.py 2009-04-24 10:30:15 +0000 |
169 | @@ -31,6 +31,7 @@ |
170 | import bisect |
171 | import datetime |
172 | import logging |
173 | +import marshal |
174 | import re |
175 | import textwrap |
176 | import threading |
177 | @@ -47,6 +48,7 @@ |
178 | import bzrlib.delta |
179 | import bzrlib.diff |
180 | import bzrlib.errors |
181 | +import bzrlib.lru_cache |
182 | import bzrlib.progress |
183 | import bzrlib.revision |
184 | import bzrlib.textfile |
185 | @@ -177,6 +179,43 @@ |
186 | file_id=file_id)) |
187 | |
188 | |
189 | +class RevInfoMemoryCache(object): |
190 | + """A store that validates values against the revids they were stored with. |
191 | + |
192 | + We use a unique key for each branch. |
193 | + |
194 | + The reason for not just using the revid as the key is so that when a new |
195 | + value is provided for a branch, we replace the old value used for the |
196 | + branch. |
197 | + |
198 | + There is another implementation of the same interface in |
199 | + loggerhead.changecache.RevInfoDiskCache. |
200 | + """ |
201 | + |
202 | + def __init__(self, cache): |
203 | + self._cache = cache |
204 | + |
205 | + def get(self, key, revid): |
206 | + """Return the data associated with `key`, subject to a revid check. |
207 | + |
208 | + If a value was stored under `key`, with the same revid, return it. |
209 | + Otherwise return None. |
210 | + """ |
211 | + cached = self._cache.get(key) |
212 | + if cached is None: |
213 | + return None |
214 | + stored_revid, data = cached |
215 | + if revid == stored_revid: |
216 | + return data |
217 | + else: |
218 | + return None |
219 | + |
220 | + def set(self, key, revid, data): |
221 | + """Store `data` under `key`, to be checked against `revid` on get(). |
222 | + """ |
223 | + self._cache[key] = (revid, data) |
224 | + |
225 | + |
226 | class History (object): |
227 | """Decorate a branch to provide information for rendering. |
228 | |
229 | @@ -185,13 +224,76 @@ |
230 | around it, serve the request, throw the History object away, unlock the |
231 | branch and throw it away. |
232 | |
233 | - :ivar _file_change_cache: xx |
234 | + :ivar _file_change_cache: An object that caches information about the |
235 | + files that changed between two revisions. |
236 | + :ivar _rev_info: A list of information about revisions. This is by far |
237 | + the most cryptic data structure in loggerhead. At the top level, it |
238 | + is a list of 3-tuples [(merge-info, where-merged, parents)]. |
239 | + `merge-info` is (seq, revid, merge_depth, revno_str, end_of_merge) -- |
240 | + like a merged sorted list, but the revno is stringified. |
241 | + `where-merged` is a tuple of revisions that have this revision as a |
242 | + non-lefthand parent. Finally, `parents` is just the usual list of |
243 | + parents of this revision. |
244 | + :ivar _rev_indices: A dictionary mapping each revision id to the index of |
245 | + the information about it in _rev_info. |
246 | + :ivar _full_history: A list of all revision ids in the ancestry of the |
247 | + branch, in merge-sorted order. This is a bit silly, and shouldn't |
248 | + really be stored on the instance... |
249 | + :ivar _revno_revid: A dictionary mapping stringified revnos to revision |
250 | + ids. |
251 | """ |
252 | |
253 | - def __init__(self, branch, whole_history_data_cache): |
254 | + def _load_whole_history_data(self, caches, cache_key): |
255 | + """Set the attributes relating to the whole history of the branch. |
256 | + |
257 | + :param caches: a list of caches with interfaces like |
258 | + `RevInfoMemoryCache` and be ordered from fastest to slowest. |
259 | + :param cache_key: the key to use with the caches. |
260 | + """ |
261 | + self._rev_indices = None |
262 | + self._rev_info = None |
263 | + |
264 | + missed_caches = [] |
265 | + def update_missed_caches(): |
266 | + for cache in missed_caches: |
267 | + cache.set(cache_key, self.last_revid, self._rev_info) |
268 | + for cache in caches: |
269 | + data = cache.get(cache_key, self.last_revid) |
270 | + if data is not None: |
271 | + self._rev_info = data |
272 | + update_missed_caches() |
273 | + break |
274 | + else: |
275 | + missed_caches.append(cache) |
276 | + else: |
277 | + whole_history_data = compute_whole_history_data(self._branch) |
278 | + self._rev_info, self._rev_indices = whole_history_data |
279 | + update_missed_caches() |
280 | + |
281 | + if self._rev_indices is not None: |
282 | + self._full_history = [] |
283 | + self._revno_revid = {} |
284 | + for ((_, revid, _, revno_str, _), _, _) in self._rev_info: |
285 | + self._revno_revid[revno_str] = revid |
286 | + self._full_history.append(revid) |
287 | + else: |
288 | + self._full_history = [] |
289 | + self._revno_revid = {} |
290 | + self._rev_indices = {} |
291 | + for ((seq, revid, _, revno_str, _), _, _) in self._rev_info: |
292 | + self._rev_indices[revid] = seq |
293 | + self._revno_revid[revno_str] = revid |
294 | + self._full_history.append(revid) |
295 | + |
296 | + def __init__(self, branch, whole_history_data_cache, file_cache=None, |
297 | + revinfo_disk_cache=None, cache_key=None): |
298 | assert branch.is_locked(), ( |
299 | "Can only construct a History object with a read-locked branch.") |
300 | - self._file_change_cache = None |
301 | + if file_cache is not None: |
302 | + self._file_change_cache = file_cache |
303 | + file_cache.history = self |
304 | + else: |
305 | + self._file_change_cache = None |
306 | self._branch = branch |
307 | self._inventory_cache = {} |
308 | self._branch_nick = self._branch.get_config().get_nickname() |
309 | @@ -199,28 +301,10 @@ |
310 | |
311 | self.last_revid = branch.last_revision() |
312 | |
313 | - cached_whole_history_data = whole_history_data_cache.get(self.last_revid) |
314 | - if cached_whole_history_data is None: |
315 | - whole_history_data = compute_whole_history_data(branch) |
316 | - (self._rev_info, self._rev_indices) = whole_history_data |
317 | - whole_history_data_cache[self.last_revid] = whole_history_data[0] |
318 | - self._full_history = [] |
319 | - self._revno_revid = {} |
320 | - for ((_, revid, _, revno_str, _), _, _) in self._rev_info: |
321 | - self._revno_revid[revno_str] = revid |
322 | - self._full_history.append(revid) |
323 | - else: |
324 | - self._rev_info = cached_whole_history_data |
325 | - self._full_history = [] |
326 | - self._revno_revid = {} |
327 | - self._rev_indices = {} |
328 | - for ((seq, revid, _, revno_str, _), _, _) in self._rev_info: |
329 | - self._rev_indices[revid] = seq |
330 | - self._revno_revid[revno_str] = revid |
331 | - self._full_history.append(revid) |
332 | - |
333 | - def use_file_cache(self, cache): |
334 | - self._file_change_cache = cache |
335 | + caches = [RevInfoMemoryCache(whole_history_data_cache)] |
336 | + if revinfo_disk_cache: |
337 | + caches.append(revinfo_disk_cache) |
338 | + self._load_whole_history_data(caches, cache_key) |
339 | |
340 | @property |
341 | def has_revisions(self): |
342 | |
343 | === modified file 'loggerhead/wholehistory.py' |
344 | --- loggerhead/wholehistory.py 2009-04-24 03:12:28 +0000 |
345 | +++ loggerhead/wholehistory.py 2009-04-24 10:30:15 +0000 |
346 | @@ -37,6 +37,10 @@ |
347 | |
348 | |
349 | def compute_whole_history_data(branch): |
350 | + """Compute _rev_info and _rev_indices for a branch. |
351 | + |
352 | + See History.__doc__ for what these data structures mean. |
353 | + """ |
354 | z = time.time() |
355 | |
356 | last_revid = branch.last_revision() |
Thanks for documenting the whole history data structures.