Merge lp:~mwhudson/loggerhead/persist-_rev_info into lp:~mwhudson/loggerhead/less-stupid-whole-history-redundancy
- persist-_rev_info
- Merge into less-stupid-whole-history-...
Proposed by
Michael Hudson-Doyle
Status: | Merged |
---|---|
Merge reported by: | Michael Hudson-Doyle |
Merged at revision: | not available |
Proposed branch: | lp:~mwhudson/loggerhead/persist-_rev_info |
Merge into: | lp:~mwhudson/loggerhead/less-stupid-whole-history-redundancy |
Diff against target: | None lines |
To merge this branch: | bzr merge lp:~mwhudson/loggerhead/persist-_rev_info |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Paul Hummer (community) | Approve | ||
Review via email: mp+5854@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Matt Nordhoff (mnordhoff) wrote : | # |
It's not a big deal, but could somebody make this merge request... go
away? This branch wasn't merged into
less-stupid-
lp:loggerhead, so for all intents and purposes it's been dealt with. But
this request is still open, so it's still listed on the merge request
summary page.
--
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'loggerhead/apps/branch.py' | |||
2 | --- loggerhead/apps/branch.py 2009-04-07 18:29:10 +0000 | |||
3 | +++ loggerhead/apps/branch.py 2009-04-24 10:23:44 +0000 | |||
4 | @@ -38,27 +38,31 @@ | |||
5 | 38 | self.branch_link = branch_link # Currently only used in Launchpad | 38 | self.branch_link = branch_link # Currently only used in Launchpad |
6 | 39 | self.log = logging.getLogger('loggerhead.%s' % friendly_name) | 39 | self.log = logging.getLogger('loggerhead.%s' % friendly_name) |
7 | 40 | if graph_cache is None: | 40 | if graph_cache is None: |
9 | 41 | graph_cache = bzrlib.lru_cache.LRUCache() | 41 | graph_cache = bzrlib.lru_cache.LRUCache(10) |
10 | 42 | self.graph_cache = graph_cache | 42 | self.graph_cache = graph_cache |
11 | 43 | self.is_root = is_root | 43 | self.is_root = is_root |
12 | 44 | self.served_url = served_url | 44 | self.served_url = served_url |
13 | 45 | self.use_cdn = use_cdn | 45 | self.use_cdn = use_cdn |
14 | 46 | 46 | ||
15 | 47 | def get_history(self): | 47 | def get_history(self): |
17 | 48 | _history = History(self.branch, self.graph_cache) | 48 | file_cache = None |
18 | 49 | revinfo_disk_cache = None | ||
19 | 49 | cache_path = self._config.get('cachepath', None) | 50 | cache_path = self._config.get('cachepath', None) |
20 | 50 | if cache_path is not None: | 51 | if cache_path is not None: |
21 | 51 | # Only import the cache if we're going to use it. | 52 | # Only import the cache if we're going to use it. |
22 | 52 | # This makes sqlite optional | 53 | # This makes sqlite optional |
23 | 53 | try: | 54 | try: |
25 | 54 | from loggerhead.changecache import FileChangeCache | 55 | from loggerhead.changecache import ( |
26 | 56 | FileChangeCache, RevInfoDiskCache) | ||
27 | 55 | except ImportError: | 57 | except ImportError: |
28 | 56 | self.log.debug("Couldn't load python-sqlite," | 58 | self.log.debug("Couldn't load python-sqlite," |
29 | 57 | " continuing without using a cache") | 59 | " continuing without using a cache") |
30 | 58 | else: | 60 | else: |
34 | 59 | _history.use_file_cache( | 61 | file_cache = FileChangeCache(cache_path) |
35 | 60 | FileChangeCache(_history, cache_path)) | 62 | revinfo_disk_cache = RevInfoDiskCache(cache_path) |
36 | 61 | return _history | 63 | return History( |
37 | 64 | self.branch, self.graph_cache, file_cache=file_cache, | ||
38 | 65 | revinfo_disk_cache=revinfo_disk_cache, cache_key=self.friendly_name) | ||
39 | 62 | 66 | ||
40 | 63 | def url(self, *args, **kw): | 67 | def url(self, *args, **kw): |
41 | 64 | if isinstance(args[0], list): | 68 | if isinstance(args[0], list): |
42 | 65 | 69 | ||
43 | === modified file 'loggerhead/apps/filesystem.py' | |||
44 | --- loggerhead/apps/filesystem.py 2009-04-23 23:39:18 +0000 | |||
45 | +++ loggerhead/apps/filesystem.py 2009-04-24 10:23:44 +0000 | |||
46 | @@ -71,7 +71,7 @@ | |||
47 | 71 | class BranchesFromFileSystemRoot(object): | 71 | class BranchesFromFileSystemRoot(object): |
48 | 72 | 72 | ||
49 | 73 | def __init__(self, folder, config): | 73 | def __init__(self, folder, config): |
51 | 74 | self.graph_cache = lru_cache.LRUCache() | 74 | self.graph_cache = lru_cache.LRUCache(10) |
52 | 75 | self.folder = folder | 75 | self.folder = folder |
53 | 76 | self._config = config | 76 | self._config = config |
54 | 77 | 77 | ||
55 | @@ -94,7 +94,7 @@ | |||
56 | 94 | class UserBranchesFromFileSystemRoot(object): | 94 | class UserBranchesFromFileSystemRoot(object): |
57 | 95 | 95 | ||
58 | 96 | def __init__(self, folder, config): | 96 | def __init__(self, folder, config): |
60 | 97 | self.graph_cache = lru_cache.LRUCache() | 97 | self.graph_cache = lru_cache.LRUCache(10) |
61 | 98 | self.folder = folder | 98 | self.folder = folder |
62 | 99 | self._config = config | 99 | self._config = config |
63 | 100 | self.trunk_dir = config.get_option('trunk_dir') | 100 | self.trunk_dir = config.get_option('trunk_dir') |
64 | 101 | 101 | ||
65 | === modified file 'loggerhead/changecache.py' | |||
66 | --- loggerhead/changecache.py 2009-03-19 21:01:11 +0000 | |||
67 | +++ loggerhead/changecache.py 2009-04-24 10:30:15 +0000 | |||
68 | @@ -27,8 +27,10 @@ | |||
69 | 27 | """ | 27 | """ |
70 | 28 | 28 | ||
71 | 29 | import cPickle | 29 | import cPickle |
72 | 30 | import marshal | ||
73 | 30 | import os | 31 | import os |
74 | 31 | import tempfile | 32 | import tempfile |
75 | 33 | import zlib | ||
76 | 32 | 34 | ||
77 | 33 | try: | 35 | try: |
78 | 34 | from sqlite3 import dbapi2 | 36 | from sqlite3 import dbapi2 |
79 | @@ -38,16 +40,25 @@ | |||
80 | 38 | # We take an optimistic approach to concurrency here: we might do work twice | 40 | # We take an optimistic approach to concurrency here: we might do work twice |
81 | 39 | # in the case of races, but not crash or corrupt data. | 41 | # in the case of races, but not crash or corrupt data. |
82 | 40 | 42 | ||
83 | 43 | def safe_init_db(filename, init_sql): | ||
84 | 44 | # To avoid races around creating the database, we create the db in | ||
85 | 45 | # a temporary file and rename it into the ultimate location. | ||
86 | 46 | fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(filename)) | ||
87 | 47 | con = dbapi2.connect(temp_path) | ||
88 | 48 | cur = con.cursor() | ||
89 | 49 | cur.execute(init_sql) | ||
90 | 50 | con.commit() | ||
91 | 51 | con.close() | ||
92 | 52 | os.rename(temp_path, filename) | ||
93 | 53 | |||
94 | 41 | class FakeShelf(object): | 54 | class FakeShelf(object): |
95 | 42 | 55 | ||
96 | 43 | def __init__(self, filename): | 56 | def __init__(self, filename): |
97 | 44 | create_table = not os.path.exists(filename) | 57 | create_table = not os.path.exists(filename) |
98 | 45 | if create_table: | 58 | if create_table: |
104 | 46 | # To avoid races around creating the database, we create the db in | 59 | safe_init_db( |
105 | 47 | # a temporary file and rename it into the ultimate location. | 60 | filename, "create table RevisionData " |
106 | 48 | fd, path = tempfile.mkstemp(dir=os.path.dirname(filename)) | 61 | "(revid binary primary key, data binary)") |
102 | 49 | self._create_table(path) | ||
103 | 50 | os.rename(path, filename) | ||
107 | 51 | self.connection = dbapi2.connect(filename) | 62 | self.connection = dbapi2.connect(filename) |
108 | 52 | self.cursor = self.connection.cursor() | 63 | self.cursor = self.connection.cursor() |
109 | 53 | 64 | ||
110 | @@ -89,8 +100,7 @@ | |||
111 | 89 | 100 | ||
112 | 90 | class FileChangeCache(object): | 101 | class FileChangeCache(object): |
113 | 91 | 102 | ||
116 | 92 | def __init__(self, history, cache_path): | 103 | def __init__(self, cache_path): |
115 | 93 | self.history = history | ||
117 | 94 | 104 | ||
118 | 95 | if not os.path.exists(cache_path): | 105 | if not os.path.exists(cache_path): |
119 | 96 | os.mkdir(cache_path) | 106 | os.mkdir(cache_path) |
120 | @@ -104,3 +114,44 @@ | |||
121 | 104 | changes = self.history.get_file_changes_uncached(entry) | 114 | changes = self.history.get_file_changes_uncached(entry) |
122 | 105 | cache.add(entry.revid, changes) | 115 | cache.add(entry.revid, changes) |
123 | 106 | return changes | 116 | return changes |
124 | 117 | |||
125 | 118 | |||
126 | 119 | class RevInfoDiskCache(object): | ||
127 | 120 | """Like `RevInfoMemoryCache` but backed in a sqlite DB.""" | ||
128 | 121 | |||
129 | 122 | def __init__(self, cache_path): | ||
130 | 123 | if not os.path.exists(cache_path): | ||
131 | 124 | os.mkdir(cache_path) | ||
132 | 125 | filename = os.path.join(cache_path, 'revinfo.sql') | ||
133 | 126 | create_table = not os.path.exists(filename) | ||
134 | 127 | if create_table: | ||
135 | 128 | safe_init_db( | ||
136 | 129 | filename, "create table Data " | ||
137 | 130 | "(key binary primary key, revid binary, data binary)") | ||
138 | 131 | self.connection = dbapi2.connect(filename) | ||
139 | 132 | self.cursor = self.connection.cursor() | ||
140 | 133 | |||
141 | 134 | def get(self, key, revid): | ||
142 | 135 | self.cursor.execute( | ||
143 | 136 | "select revid, data from data where key = ?", (dbapi2.Binary(key),)) | ||
144 | 137 | row = self.cursor.fetchone() | ||
145 | 138 | if row is None: | ||
146 | 139 | return None | ||
147 | 140 | elif str(row[0]) != revid: | ||
148 | 141 | return None | ||
149 | 142 | else: | ||
150 | 143 | return marshal.loads(zlib.decompress(row[1])) | ||
151 | 144 | |||
152 | 145 | def set(self, key, revid, data): | ||
153 | 146 | try: | ||
154 | 147 | self.cursor.execute( | ||
155 | 148 | 'delete from data where key = ?', (dbapi2.Binary(key), )) | ||
156 | 149 | blob = zlib.compress(marshal.dumps(data)) | ||
157 | 150 | self.cursor.execute( | ||
158 | 151 | "insert into data (key, revid, data) values (?, ?, ?)", | ||
159 | 152 | map(dbapi2.Binary, [key, revid, blob])) | ||
160 | 153 | self.connection.commit() | ||
161 | 154 | except dbapi2.IntegrityError: | ||
162 | 155 | # If another thread or process attempted to set the same key, we | ||
163 | 156 | # don't care too much -- it's only a cache after all! | ||
164 | 157 | pass | ||
165 | 107 | 158 | ||
166 | === modified file 'loggerhead/history.py' | |||
167 | --- loggerhead/history.py 2009-04-24 03:44:30 +0000 | |||
168 | +++ loggerhead/history.py 2009-04-24 10:30:15 +0000 | |||
169 | @@ -31,6 +31,7 @@ | |||
170 | 31 | import bisect | 31 | import bisect |
171 | 32 | import datetime | 32 | import datetime |
172 | 33 | import logging | 33 | import logging |
173 | 34 | import marshal | ||
174 | 34 | import re | 35 | import re |
175 | 35 | import textwrap | 36 | import textwrap |
176 | 36 | import threading | 37 | import threading |
177 | @@ -47,6 +48,7 @@ | |||
178 | 47 | import bzrlib.delta | 48 | import bzrlib.delta |
179 | 48 | import bzrlib.diff | 49 | import bzrlib.diff |
180 | 49 | import bzrlib.errors | 50 | import bzrlib.errors |
181 | 51 | import bzrlib.lru_cache | ||
182 | 50 | import bzrlib.progress | 52 | import bzrlib.progress |
183 | 51 | import bzrlib.revision | 53 | import bzrlib.revision |
184 | 52 | import bzrlib.textfile | 54 | import bzrlib.textfile |
185 | @@ -177,6 +179,43 @@ | |||
186 | 177 | file_id=file_id)) | 179 | file_id=file_id)) |
187 | 178 | 180 | ||
188 | 179 | 181 | ||
189 | 182 | class RevInfoMemoryCache(object): | ||
190 | 183 | """A store that validates values against the revids they were stored with. | ||
191 | 184 | |||
192 | 185 | We use a unique key for each branch. | ||
193 | 186 | |||
194 | 187 | The reason for not just using the revid as the key is so that when a new | ||
195 | 188 | value is provided for a branch, we replace the old value used for the | ||
196 | 189 | branch. | ||
197 | 190 | |||
198 | 191 | There is another implementation of the same interface in | ||
199 | 192 | loggerhead.changecache.RevInfoDiskCache. | ||
200 | 193 | """ | ||
201 | 194 | |||
202 | 195 | def __init__(self, cache): | ||
203 | 196 | self._cache = cache | ||
204 | 197 | |||
205 | 198 | def get(self, key, revid): | ||
206 | 199 | """Return the data associated with `key`, subject to a revid check. | ||
207 | 200 | |||
208 | 201 | If a value was stored under `key`, with the same revid, return it. | ||
209 | 202 | Otherwise return None. | ||
210 | 203 | """ | ||
211 | 204 | cached = self._cache.get(key) | ||
212 | 205 | if cached is None: | ||
213 | 206 | return None | ||
214 | 207 | stored_revid, data = cached | ||
215 | 208 | if revid == stored_revid: | ||
216 | 209 | return data | ||
217 | 210 | else: | ||
218 | 211 | return None | ||
219 | 212 | |||
220 | 213 | def set(self, key, revid, data): | ||
221 | 214 | """Store `data` under `key`, to be checked against `revid` on get(). | ||
222 | 215 | """ | ||
223 | 216 | self._cache[key] = (revid, data) | ||
224 | 217 | |||
225 | 218 | |||
226 | 180 | class History (object): | 219 | class History (object): |
227 | 181 | """Decorate a branch to provide information for rendering. | 220 | """Decorate a branch to provide information for rendering. |
228 | 182 | 221 | ||
229 | @@ -185,13 +224,76 @@ | |||
230 | 185 | around it, serve the request, throw the History object away, unlock the | 224 | around it, serve the request, throw the History object away, unlock the |
231 | 186 | branch and throw it away. | 225 | branch and throw it away. |
232 | 187 | 226 | ||
234 | 188 | :ivar _file_change_cache: xx | 227 | :ivar _file_change_cache: An object that caches information about the |
235 | 228 | files that changed between two revisions. | ||
236 | 229 | :ivar _rev_info: A list of information about revisions. This is by far | ||
237 | 230 | the most cryptic data structure in loggerhead. At the top level, it | ||
238 | 231 | is a list of 3-tuples [(merge-info, where-merged, parents)]. | ||
239 | 232 | `merge-info` is (seq, revid, merge_depth, revno_str, end_of_merge) -- | ||
240 | 233 | like a merged sorted list, but the revno is stringified. | ||
241 | 234 | `where-merged` is a tuple of revisions that have this revision as a | ||
242 | 235 | non-lefthand parent. Finally, `parents` is just the usual list of | ||
243 | 236 | parents of this revision. | ||
244 | 237 | :ivar _rev_indices: A dictionary mapping each revision id to the index of | ||
245 | 238 | the information about it in _rev_info. | ||
246 | 239 | :ivar _full_history: A list of all revision ids in the ancestry of the | ||
247 | 240 | branch, in merge-sorted order. This is a bit silly, and shouldn't | ||
248 | 241 | really be stored on the instance... | ||
249 | 242 | :ivar _revno_revid: A dictionary mapping stringified revnos to revision | ||
250 | 243 | ids. | ||
251 | 189 | """ | 244 | """ |
252 | 190 | 245 | ||
254 | 191 | def __init__(self, branch, whole_history_data_cache): | 246 | def _load_whole_history_data(self, caches, cache_key): |
255 | 247 | """Set the attributes relating to the whole history of the branch. | ||
256 | 248 | |||
257 | 249 | :param caches: a list of caches with interfaces like | ||
258 | 250 | `RevInfoMemoryCache` and be ordered from fastest to slowest. | ||
259 | 251 | :param cache_key: the key to use with the caches. | ||
260 | 252 | """ | ||
261 | 253 | self._rev_indices = None | ||
262 | 254 | self._rev_info = None | ||
263 | 255 | |||
264 | 256 | missed_caches = [] | ||
265 | 257 | def update_missed_caches(): | ||
266 | 258 | for cache in missed_caches: | ||
267 | 259 | cache.set(cache_key, self.last_revid, self._rev_info) | ||
268 | 260 | for cache in caches: | ||
269 | 261 | data = cache.get(cache_key, self.last_revid) | ||
270 | 262 | if data is not None: | ||
271 | 263 | self._rev_info = data | ||
272 | 264 | update_missed_caches() | ||
273 | 265 | break | ||
274 | 266 | else: | ||
275 | 267 | missed_caches.append(cache) | ||
276 | 268 | else: | ||
277 | 269 | whole_history_data = compute_whole_history_data(self._branch) | ||
278 | 270 | self._rev_info, self._rev_indices = whole_history_data | ||
279 | 271 | update_missed_caches() | ||
280 | 272 | |||
281 | 273 | if self._rev_indices is not None: | ||
282 | 274 | self._full_history = [] | ||
283 | 275 | self._revno_revid = {} | ||
284 | 276 | for ((_, revid, _, revno_str, _), _, _) in self._rev_info: | ||
285 | 277 | self._revno_revid[revno_str] = revid | ||
286 | 278 | self._full_history.append(revid) | ||
287 | 279 | else: | ||
288 | 280 | self._full_history = [] | ||
289 | 281 | self._revno_revid = {} | ||
290 | 282 | self._rev_indices = {} | ||
291 | 283 | for ((seq, revid, _, revno_str, _), _, _) in self._rev_info: | ||
292 | 284 | self._rev_indices[revid] = seq | ||
293 | 285 | self._revno_revid[revno_str] = revid | ||
294 | 286 | self._full_history.append(revid) | ||
295 | 287 | |||
296 | 288 | def __init__(self, branch, whole_history_data_cache, file_cache=None, | ||
297 | 289 | revinfo_disk_cache=None, cache_key=None): | ||
298 | 192 | assert branch.is_locked(), ( | 290 | assert branch.is_locked(), ( |
299 | 193 | "Can only construct a History object with a read-locked branch.") | 291 | "Can only construct a History object with a read-locked branch.") |
301 | 194 | self._file_change_cache = None | 292 | if file_cache is not None: |
302 | 293 | self._file_change_cache = file_cache | ||
303 | 294 | file_cache.history = self | ||
304 | 295 | else: | ||
305 | 296 | self._file_change_cache = None | ||
306 | 195 | self._branch = branch | 297 | self._branch = branch |
307 | 196 | self._inventory_cache = {} | 298 | self._inventory_cache = {} |
308 | 197 | self._branch_nick = self._branch.get_config().get_nickname() | 299 | self._branch_nick = self._branch.get_config().get_nickname() |
309 | @@ -199,28 +301,10 @@ | |||
310 | 199 | 301 | ||
311 | 200 | self.last_revid = branch.last_revision() | 302 | self.last_revid = branch.last_revision() |
312 | 201 | 303 | ||
335 | 202 | cached_whole_history_data = whole_history_data_cache.get(self.last_revid) | 304 | caches = [RevInfoMemoryCache(whole_history_data_cache)] |
336 | 203 | if cached_whole_history_data is None: | 305 | if revinfo_disk_cache: |
337 | 204 | whole_history_data = compute_whole_history_data(branch) | 306 | caches.append(revinfo_disk_cache) |
338 | 205 | (self._rev_info, self._rev_indices) = whole_history_data | 307 | self._load_whole_history_data(caches, cache_key) |
317 | 206 | whole_history_data_cache[self.last_revid] = whole_history_data[0] | ||
318 | 207 | self._full_history = [] | ||
319 | 208 | self._revno_revid = {} | ||
320 | 209 | for ((_, revid, _, revno_str, _), _, _) in self._rev_info: | ||
321 | 210 | self._revno_revid[revno_str] = revid | ||
322 | 211 | self._full_history.append(revid) | ||
323 | 212 | else: | ||
324 | 213 | self._rev_info = cached_whole_history_data | ||
325 | 214 | self._full_history = [] | ||
326 | 215 | self._revno_revid = {} | ||
327 | 216 | self._rev_indices = {} | ||
328 | 217 | for ((seq, revid, _, revno_str, _), _, _) in self._rev_info: | ||
329 | 218 | self._rev_indices[revid] = seq | ||
330 | 219 | self._revno_revid[revno_str] = revid | ||
331 | 220 | self._full_history.append(revid) | ||
332 | 221 | |||
333 | 222 | def use_file_cache(self, cache): | ||
334 | 223 | self._file_change_cache = cache | ||
339 | 224 | 308 | ||
340 | 225 | @property | 309 | @property |
341 | 226 | def has_revisions(self): | 310 | def has_revisions(self): |
342 | 227 | 311 | ||
343 | === modified file 'loggerhead/wholehistory.py' | |||
344 | --- loggerhead/wholehistory.py 2009-04-24 03:12:28 +0000 | |||
345 | +++ loggerhead/wholehistory.py 2009-04-24 10:30:15 +0000 | |||
346 | @@ -37,6 +37,10 @@ | |||
347 | 37 | 37 | ||
348 | 38 | 38 | ||
349 | 39 | def compute_whole_history_data(branch): | 39 | def compute_whole_history_data(branch): |
350 | 40 | """Compute _rev_info and _rev_indices for a branch. | ||
351 | 41 | |||
352 | 42 | See History.__doc__ for what these data structures mean. | ||
353 | 43 | """ | ||
354 | 40 | z = time.time() | 44 | z = time.time() |
355 | 41 | 45 | ||
356 | 42 | last_revid = branch.last_revision() | 46 | last_revid = branch.last_revision() |
Thanks for documenting the whole history data structures.