Merge lp:~mnordhoff/loggerhead/jam-integration into lp:~jameinel/loggerhead/integration
- jam-integration
- Merge into integration
Proposed by
Matt Nordhoff
Status: | Rejected |
---|---|
Rejected by: | John A Meinel |
Proposed branch: | lp:~mnordhoff/loggerhead/jam-integration |
Merge into: | lp:~jameinel/loggerhead/integration |
Diff against target: |
4126 lines (+2901/-709) 29 files modified
NEWS (+12/-0) __init__.py (+26/-5) docs/index.rst (+0/-2) docs/start-loggerhead.rst (+0/-46) docs/stop-loggerhead.rst (+0/-18) loggerhead/apps/__init__.py (+3/-1) loggerhead/apps/branch.py (+23/-37) loggerhead/apps/config.py (+2/-2) loggerhead/apps/transport.py (+4/-7) loggerhead/changecache.py (+0/-43) loggerhead/config.py (+1/-1) loggerhead/controllers/download_ui.py (+2/-2) loggerhead/history.py (+61/-118) loggerhead/history_db.py (+1621/-0) loggerhead/history_db_schema.py (+247/-0) loggerhead/main.py (+8/-0) loggerhead/templates/atom.pt (+3/-3) loggerhead/tests/__init__.py (+27/-0) loggerhead/tests/test_importer.py (+847/-0) loggerhead/tests/test_simple.py (+9/-2) loggerhead/trace.py (+0/-75) loggerhead/wholehistory.py (+0/-85) loggerhead/zptsupport.py (+0/-4) serve-branches (+3/-0) setup.py (+2/-4) start-loggerhead (+0/-146) start-loggerhead.1 (+0/-38) stop-loggerhead (+0/-51) stop-loggerhead.1 (+0/-19) |
To merge this branch: | bzr merge lp:~mnordhoff/loggerhead/jam-integration |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
John A Meinel | Pending | ||
Review via email:
|
Commit message
Merge the latest history-db.
Description of the change
This is simply an update, merging the latest history-db, since there were some conflicts.
If this branch is still being maintained...
To post a comment you must log in.
Revision history for this message

Max Kanat-Alexander (mkanat) wrote : | # |
Unmerged revisions
- 422. By Matt Nordhoff
-
Merge the latest history-db, fixing conflicts.
Updating diff...
An updated diff will be available in a few minutes. Reload to see the changes.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'NEWS' |
2 | --- NEWS 2010-04-23 00:02:29 +0000 |
3 | +++ NEWS 2010-05-15 23:47:26 +0000 |
4 | @@ -4,6 +4,9 @@ |
5 | dev [future] |
6 | ------------ |
7 | |
8 | + - Remove ``start-loggerhead`` and ``stop-loggerhead`` which were already |
9 | + deprecated. (John Arbash Meinel) |
10 | + |
11 | - Syntax highlighting is no longer applied for files greater than 512K, |
12 | reducing codebrowse.launchpad.net overloading. |
13 | (Max Kanat-Alexander, #513044) |
14 | @@ -25,6 +28,15 @@ |
15 | - Ignore readonly+ prefix when checking if Loggerhead is serving a |
16 | local location. (Reported by Tres Seaver.) (Matt Nordhoff) |
17 | |
18 | + - Set Cache-Control and Expires headers on static pages. |
19 | + (John Arbash Meinel) |
20 | + |
21 | + - Generate relative links where possible (everywhere but HTTP |
22 | + redirects and feed IDs). (Michael Hudson, Matt Nordhoff) |
23 | + |
24 | + - Fix bad redirect when visiting "/download" or "/download/". |
25 | + (Matt Nordhoff, #247992) |
26 | + |
27 | |
28 | 1.17 [20Aug2009] |
29 | ---------------- |
30 | |
31 | === modified file '__init__.py' |
32 | --- __init__.py 2010-04-26 19:04:06 +0000 |
33 | +++ __init__.py 2010-05-15 23:47:26 +0000 |
34 | @@ -71,11 +71,20 @@ |
35 | logger.addHandler(handler) |
36 | logging.getLogger('simpleTAL').addHandler(handler) |
37 | logging.getLogger('simpleTALES').addHandler(handler) |
38 | - |
39 | - def serve_http(transport, host=None, port=None, inet=None): |
40 | - from paste.httpexceptions import HTTPExceptionHandler |
41 | - from paste.httpserver import serve |
42 | - |
43 | + def _restrict_logging(logger_name): |
44 | + logger = logging.getLogger(logger_name) |
45 | + if logger.getEffectiveLevel() < logging.INFO: |
46 | + logger.setLevel(logging.INFO) |
47 | + # simpleTAL is *very* verbose in DEBUG mode, which is otherwise the |
48 | + # default. So quiet it up a bit. |
49 | + _restrict_logging('simpleTAL') |
50 | + _restrict_logging('simpleTALES') |
51 | + |
52 | + |
53 | + |
54 | + |
55 | + def _ensure_loggerhead_path(): |
56 | + """Ensure that you can 'import loggerhead' and get the root.""" |
57 | # loggerhead internal code will try to 'import loggerhead', so |
58 | # let's put it on the path if we can't find it in the existing path |
59 | try: |
60 | @@ -84,6 +93,12 @@ |
61 | import os.path, sys |
62 | sys.path.append(os.path.dirname(__file__)) |
63 | |
64 | + def serve_http(transport, host=None, port=None, inet=None): |
65 | + from paste.httpexceptions import HTTPExceptionHandler |
66 | + from paste.httpserver import serve |
67 | + |
68 | + _ensure_loggerhead_path() |
69 | + |
70 | from loggerhead.apps.transport import BranchesFromTransportRoot |
71 | from loggerhead.config import LoggerheadConfig |
72 | |
73 | @@ -135,3 +150,9 @@ |
74 | super(cmd_serve, self).run(*args, **kw) |
75 | |
76 | register_command(cmd_serve) |
77 | + |
78 | + def load_tests(standard_tests, module, loader): |
79 | + _ensure_loggerhead_path() |
80 | + standard_tests.addTests(loader.loadTestsFromModuleNames( |
81 | + ['bzrlib.plugins.loggerhead.loggerhead.tests'])) |
82 | + return standard_tests |
83 | |
84 | === modified file 'docs/index.rst' |
85 | --- docs/index.rst 2010-04-13 08:46:25 +0000 |
86 | +++ docs/index.rst 2010-05-15 23:47:26 +0000 |
87 | @@ -166,8 +166,6 @@ |
88 | :maxdepth: 2 |
89 | |
90 | serve-branches |
91 | - start-loggerhead |
92 | - stop-loggerhead |
93 | |
94 | |
95 | Support |
96 | |
97 | === removed file 'docs/start-loggerhead.rst' |
98 | --- docs/start-loggerhead.rst 2010-03-25 10:39:30 +0000 |
99 | +++ docs/start-loggerhead.rst 1970-01-01 00:00:00 +0000 |
100 | @@ -1,46 +0,0 @@ |
101 | -:command:`start-loggerhead` |
102 | -=========================== |
103 | - |
104 | -The :command:`start-loggerhead` command starts a new standalone Loggerhead |
105 | -server. By default, the server runs in the background (daemonized). |
106 | - |
107 | -.. program:: start-loggerhead |
108 | - |
109 | -Usage |
110 | ------ |
111 | - |
112 | -.. code-block:: sh |
113 | - |
114 | - start-loggerhead [OPTIONS] |
115 | - |
116 | -Options |
117 | -------- |
118 | - |
119 | -.. cmdoption:: --version |
120 | - |
121 | - Print the software version and exit. |
122 | - |
123 | -.. cmdoption:: -h, --help |
124 | - |
125 | - Show this help message and exit. |
126 | - |
127 | -.. cmdoption:: --foreground |
128 | - |
129 | - Run in the foreground; don't daemonize. |
130 | - |
131 | -.. cmdoption:: -C, --check |
132 | - |
133 | - Only start if not already running (useful for cron jobs). |
134 | - |
135 | -.. cmdoption:: -p, --pidfile=PIDFILE |
136 | - |
137 | - Override pid file location. |
138 | - |
139 | -.. cmdoption:: -c, --config-file=CONFIGFILE |
140 | - |
141 | - Override configuration file location (the default is |
142 | - :file:`/etc/loggerhead.conf`). |
143 | - |
144 | -.. cmdoption:: --log-folder=LOG_FOLDER |
145 | - |
146 | - The directory in which to place log files. |
147 | |
148 | === removed file 'docs/stop-loggerhead.rst' |
149 | --- docs/stop-loggerhead.rst 2010-03-25 10:25:13 +0000 |
150 | +++ docs/stop-loggerhead.rst 1970-01-01 00:00:00 +0000 |
151 | @@ -1,18 +0,0 @@ |
152 | -:command:`stop-loggerhead` |
153 | -====================================================================== |
154 | - |
155 | -The :command:`stop-loggerhead` command stops the currently running Loggerhead |
156 | -daemon. |
157 | - |
158 | -.. program:: stop-loggerhead |
159 | - |
160 | -Usage |
161 | ------ |
162 | - |
163 | -.. code-block:: sh |
164 | - |
165 | - stop-loggerhead [OPTIONS] |
166 | - |
167 | -.. cmdoption:: -p, --pidfile=PIDFILE |
168 | - |
169 | - override pid file location |
170 | |
171 | === modified file 'loggerhead/apps/__init__.py' |
172 | --- loggerhead/apps/__init__.py 2009-11-26 03:44:40 +0000 |
173 | +++ loggerhead/apps/__init__.py 2010-05-15 23:47:26 +0000 |
174 | @@ -9,7 +9,9 @@ |
175 | static = os.path.join( |
176 | os.path.dirname(os.path.dirname(__file__)), 'static') |
177 | |
178 | -static_app = urlparser.make_static(None, static) |
179 | +# Static things can be cached for half a day, we could probably make this |
180 | +# longer, except for just before rollout times. |
181 | +static_app = urlparser.make_static(None, static, cache_max_age=12*60*60) |
182 | |
183 | favicon_app = convert_file_errors(fileapp.FileApp( |
184 | os.path.join(static, 'images', 'favicon.ico'))) |
185 | |
186 | === modified file 'loggerhead/apps/branch.py' |
187 | --- loggerhead/apps/branch.py 2010-04-30 22:29:47 +0000 |
188 | +++ loggerhead/apps/branch.py 2010-05-15 23:47:26 +0000 |
189 | @@ -19,7 +19,6 @@ |
190 | import logging |
191 | import urllib |
192 | import sys |
193 | -import time |
194 | |
195 | import bzrlib.branch |
196 | import bzrlib.errors |
197 | @@ -48,7 +47,7 @@ |
198 | class BranchWSGIApp(object): |
199 | |
200 | def __init__(self, branch, friendly_name=None, config={}, |
201 | - graph_cache=None, branch_link=None, is_root=False, |
202 | + branch_link=None, is_root=False, |
203 | served_url=_DEFAULT, use_cdn=False): |
204 | # XXX: Why is config here a simple dictionary (which only ever has a |
205 | # single item that I can find), while every other 'self._config' |
206 | @@ -59,34 +58,26 @@ |
207 | self.friendly_name = friendly_name |
208 | self.branch_link = branch_link # Currently only used in Launchpad |
209 | self.log = logging.getLogger('loggerhead.%s' % (friendly_name,)) |
210 | - if graph_cache is None: |
211 | - graph_cache = bzrlib.lru_cache.LRUCache(10) |
212 | - self.graph_cache = graph_cache |
213 | self.is_root = is_root |
214 | self.served_url = served_url |
215 | self.use_cdn = use_cdn |
216 | |
217 | def get_history(self): |
218 | file_cache = None |
219 | - revinfo_disk_cache = None |
220 | show_merge_points = self._config.get('show_merge_points', True) |
221 | cache_path = self._config.get('cachepath', None) |
222 | if cache_path is not None: |
223 | # Only import the cache if we're going to use it. |
224 | # This makes sqlite optional |
225 | try: |
226 | - from loggerhead.changecache import ( |
227 | - FileChangeCache, RevInfoDiskCache) |
228 | + from loggerhead.changecache import FileChangeCache |
229 | except ImportError: |
230 | self.log.debug("Couldn't load python-sqlite," |
231 | " continuing without using a cache") |
232 | else: |
233 | file_cache = FileChangeCache(cache_path) |
234 | - revinfo_disk_cache = RevInfoDiskCache(cache_path) |
235 | return History( |
236 | - self.branch, self.graph_cache, file_cache=file_cache, |
237 | - revinfo_disk_cache=revinfo_disk_cache, |
238 | - cache_key=self.friendly_name, |
239 | + self.branch, file_cache=file_cache, cache_key=self.friendly_name, |
240 | show_merge_points=show_merge_points, |
241 | cache_path=cache_path, |
242 | ) |
243 | @@ -99,10 +90,15 @@ |
244 | if v is not None: |
245 | qs.append('%s=%s' % (k, urllib.quote(v))) |
246 | qs = '&'.join(qs) |
247 | - return request.construct_url( |
248 | - self._environ, script_name=self._url_base, |
249 | - path_info=unicode('/'.join(args)).encode('utf-8'), |
250 | - querystring=qs) |
251 | + path_info = urllib.quote( |
252 | + unicode('/'.join(args)).encode('utf-8'), safe='/~:') |
253 | + if qs: |
254 | + path_info += '?' + qs |
255 | + return self._url_base + path_info |
256 | + |
257 | + def absolute_url(self, *args, **kw): |
258 | + rel_url = self.url(*args, **kw) |
259 | + return request.resolve_relative_url(rel_url, self._environ) |
260 | |
261 | def context_url(self, *args, **kw): |
262 | kw = util.get_context(**kw) |
263 | @@ -161,33 +157,23 @@ |
264 | self.served_url = self.url([]) |
265 | except bzrlib.errors.InvalidURL: |
266 | self.served_url = None |
267 | - path_info = environ.get('PATH_INFO', None) |
268 | path = request.path_info_pop(environ) |
269 | if not path: |
270 | raise httpexceptions.HTTPMovedPermanently( |
271 | - self._url_base + '/changes') |
272 | + self.absolute_url('/changes')) |
273 | if path == 'static': |
274 | - # TODO: Unfortunately, we still call Branch.open() even if we are |
275 | - # serving a static path. This probably adds a fair amount of |
276 | - # overhead... |
277 | return static_app(environ, start_response) |
278 | cls = self.controllers_dict.get(path) |
279 | if cls is None: |
280 | raise httpexceptions.HTTPNotFound() |
281 | - def do_stuff(): |
282 | - self.branch.lock_read() |
283 | + self.branch.lock_read() |
284 | + try: |
285 | try: |
286 | - try: |
287 | - c = cls(self, self.get_history) |
288 | - return c(environ, start_response) |
289 | - except: |
290 | - environ['exc_info'] = sys.exc_info() |
291 | - environ['branch'] = self |
292 | - raise |
293 | - finally: |
294 | - self.branch.unlock() |
295 | - t = time.time() |
296 | - val = do_stuff() |
297 | - tdelta = time.time() - t |
298 | - self.log.info('Took %.3fs to generate: %s' % (tdelta, path_info)) |
299 | - return val |
300 | + c = cls(self, self.get_history) |
301 | + return c(environ, start_response) |
302 | + except: |
303 | + environ['exc_info'] = sys.exc_info() |
304 | + environ['branch'] = self |
305 | + raise |
306 | + finally: |
307 | + self.branch.unlock() |
308 | |
309 | === modified file 'loggerhead/apps/config.py' |
310 | --- loggerhead/apps/config.py 2009-05-13 13:14:04 +0000 |
311 | +++ loggerhead/apps/config.py 2010-05-15 23:47:26 +0000 |
312 | @@ -208,8 +208,8 @@ |
313 | environ['SCRIPT_NAME'] |
314 | segment = path_info_pop(environ) |
315 | if segment is None: |
316 | - raise httpexceptions.HTTPMovedPermanently( |
317 | - environ['SCRIPT_NAME'] + '/') |
318 | + raise httpexceptions.HTTPMovedPermanently.relative_redirect( |
319 | + environ['SCRIPT_NAME'] + '/', environ) |
320 | elif segment == '': |
321 | response = WSGIResponse() |
322 | self.browse(response) |
323 | |
324 | === modified file 'loggerhead/apps/transport.py' |
325 | --- loggerhead/apps/transport.py 2010-04-26 18:17:26 +0000 |
326 | +++ loggerhead/apps/transport.py 2010-05-15 23:47:26 +0000 |
327 | @@ -18,9 +18,8 @@ |
328 | |
329 | import threading |
330 | |
331 | -from bzrlib import branch, errors, lru_cache, ui, urlutils |
332 | +from bzrlib import branch, errors, ui, urlutils |
333 | from bzrlib.config import LocationConfig |
334 | -from bzrlib.smart import request |
335 | from bzrlib.transport import get_transport |
336 | from bzrlib.transport.http import wsgi |
337 | |
338 | @@ -33,6 +32,7 @@ |
339 | from loggerhead.apps import favicon_app, robots_app, static_app |
340 | from loggerhead.controllers.directory_ui import DirectoryUI |
341 | |
342 | +# TODO: Use bzrlib.ui.bool_from_string(), added in bzr 1.18 |
343 | _bools = { |
344 | 'yes': True, 'no': False, |
345 | 'on': True, 'off': False, |
346 | @@ -64,7 +64,6 @@ |
347 | name, |
348 | {'cachepath': self._config.SQL_DIR, |
349 | 'show_merge_points': show_merge_points}, |
350 | - self.root.graph_cache, |
351 | is_root=is_root, |
352 | use_cdn=self._config.get_option('use_cdn'), |
353 | ) |
354 | @@ -73,8 +72,8 @@ |
355 | def app_for_non_branch(self, environ): |
356 | segment = path_info_pop(environ) |
357 | if segment is None: |
358 | - raise httpexceptions.HTTPMovedPermanently( |
359 | - environ['SCRIPT_NAME'] + '/') |
360 | + raise httpexceptions.HTTPMovedPermanently.relative_redirect( |
361 | + environ['SCRIPT_NAME'] + '/', environ) |
362 | elif segment == '': |
363 | if self.name: |
364 | name = self.name |
365 | @@ -148,7 +147,6 @@ |
366 | class BranchesFromTransportRoot(object): |
367 | |
368 | def __init__(self, base, config): |
369 | - self.graph_cache = lru_cache.LRUCache(10) |
370 | self.base = base |
371 | self._config = config |
372 | |
373 | @@ -172,7 +170,6 @@ |
374 | class UserBranchesFromTransportRoot(object): |
375 | |
376 | def __init__(self, base, config): |
377 | - self.graph_cache = lru_cache.LRUCache(10) |
378 | self.base = base |
379 | self._config = config |
380 | self.trunk_dir = config.get_option('trunk_dir') |
381 | |
382 | === modified file 'loggerhead/changecache.py' |
383 | --- loggerhead/changecache.py 2009-05-02 14:01:05 +0000 |
384 | +++ loggerhead/changecache.py 2010-05-15 23:47:26 +0000 |
385 | @@ -27,10 +27,8 @@ |
386 | """ |
387 | |
388 | import cPickle |
389 | -import marshal |
390 | import os |
391 | import tempfile |
392 | -import zlib |
393 | |
394 | try: |
395 | from sqlite3 import dbapi2 |
396 | @@ -115,44 +113,3 @@ |
397 | changes = self.history.get_file_changes_uncached(entry) |
398 | cache.add(entry.revid, changes) |
399 | return changes |
400 | - |
401 | - |
402 | -class RevInfoDiskCache(object): |
403 | - """Like `RevInfoMemoryCache` but backed in a sqlite DB.""" |
404 | - |
405 | - def __init__(self, cache_path): |
406 | - if not os.path.exists(cache_path): |
407 | - os.mkdir(cache_path) |
408 | - filename = os.path.join(cache_path, 'revinfo.sql') |
409 | - create_table = not os.path.exists(filename) |
410 | - if create_table: |
411 | - safe_init_db( |
412 | - filename, "create table Data " |
413 | - "(key binary primary key, revid binary, data binary)") |
414 | - self.connection = dbapi2.connect(filename) |
415 | - self.cursor = self.connection.cursor() |
416 | - |
417 | - def get(self, key, revid): |
418 | - self.cursor.execute( |
419 | - "select revid, data from data where key = ?", (dbapi2.Binary(key),)) |
420 | - row = self.cursor.fetchone() |
421 | - if row is None: |
422 | - return None |
423 | - elif str(row[0]) != revid: |
424 | - return None |
425 | - else: |
426 | - return marshal.loads(zlib.decompress(row[1])) |
427 | - |
428 | - def set(self, key, revid, data): |
429 | - try: |
430 | - self.cursor.execute( |
431 | - 'delete from data where key = ?', (dbapi2.Binary(key), )) |
432 | - blob = zlib.compress(marshal.dumps(data)) |
433 | - self.cursor.execute( |
434 | - "insert into data (key, revid, data) values (?, ?, ?)", |
435 | - map(dbapi2.Binary, [key, revid, blob])) |
436 | - self.connection.commit() |
437 | - except dbapi2.IntegrityError: |
438 | - # If another thread or process attempted to set the same key, we |
439 | - # don't care too much -- it's only a cache after all! |
440 | - pass |
441 | |
442 | === modified file 'loggerhead/config.py' |
443 | --- loggerhead/config.py 2010-04-26 21:29:02 +0000 |
444 | +++ loggerhead/config.py 2010-05-15 23:47:26 +0000 |
445 | @@ -96,7 +96,7 @@ |
446 | } |
447 | |
448 | def _optparse_level_to_int_level(option, opt_str, value, parser): |
449 | - return _level_to_int_level(value) |
450 | + parser.values.log_level = _level_to_int_level(value) |
451 | |
452 | |
453 | def _level_to_int_level(value): |
454 | |
455 | === modified file 'loggerhead/controllers/download_ui.py' |
456 | --- loggerhead/controllers/download_ui.py 2010-04-08 01:08:33 +0000 |
457 | +++ loggerhead/controllers/download_ui.py 2010-05-15 23:47:26 +0000 |
458 | @@ -44,8 +44,8 @@ |
459 | args.append(arg) |
460 | |
461 | if len(args) < 2: |
462 | - raise httpexceptions.HTTPMovedPermanently(self._branch.url( |
463 | - '../changes')) |
464 | + raise httpexceptions.HTTPMovedPermanently( |
465 | + self._branch.absolute_url('/changes')) |
466 | |
467 | revid = h.fix_revid(args[0]) |
468 | file_id = args[1] |
469 | |
470 | === modified file 'loggerhead/history.py' |
471 | --- loggerhead/history.py 2010-05-04 21:55:04 +0000 |
472 | +++ loggerhead/history.py 2010-05-15 23:47:26 +0000 |
473 | @@ -1,5 +1,4 @@ |
474 | -# |
475 | -# Copyright (C) 2008, 2009 Canonical Ltd. |
476 | +# Copyright (C) 2006-2010 Canonical Ltd. |
477 | # (Authored by Martin Albisetti <argentina@gmail.com>) |
478 | # Copyright (C) 2006 Robey Pointer <robey@lag.net> |
479 | # Copyright (C) 2006 Goffredo Baroncelli <kreijack@inwind.it> |
480 | @@ -28,7 +27,6 @@ |
481 | # |
482 | |
483 | |
484 | -import bisect |
485 | import datetime |
486 | import logging |
487 | import os |
488 | @@ -36,21 +34,20 @@ |
489 | import textwrap |
490 | import threading |
491 | |
492 | -from bzrlib import lru_cache |
493 | +from bzrlib import ( |
494 | + graph, |
495 | + lru_cache, |
496 | + ) |
497 | import bzrlib.branch |
498 | import bzrlib.delta |
499 | import bzrlib.errors |
500 | import bzrlib.foreign |
501 | import bzrlib.revision |
502 | |
503 | -from loggerhead import search |
504 | -from loggerhead import util |
505 | -from loggerhead.wholehistory import compute_whole_history_data |
506 | - |
507 | -from bzrlib.plugins.history_db import ( |
508 | +from loggerhead import ( |
509 | history_db, |
510 | - _get_history_db_path, |
511 | - _get_querier, |
512 | + search, |
513 | + util, |
514 | ) |
515 | |
516 | |
517 | @@ -96,23 +93,6 @@ |
518 | return path |
519 | |
520 | |
521 | -class _RevListToTimestamps(object): |
522 | - """This takes a list of revisions, and allows you to bisect by date""" |
523 | - |
524 | - __slots__ = ['revid_list', 'repository'] |
525 | - |
526 | - def __init__(self, revid_list, repository): |
527 | - self.revid_list = revid_list |
528 | - self.repository = repository |
529 | - |
530 | - def __getitem__(self, index): |
531 | - """Get the date of the index'd item""" |
532 | - return datetime.datetime.fromtimestamp(self.repository.get_revision( |
533 | - self.revid_list[index]).timestamp) |
534 | - |
535 | - def __len__(self): |
536 | - return len(self.revid_list) |
537 | - |
538 | class FileChangeReporter(object): |
539 | |
540 | def __init__(self, old_inv, new_inv): |
541 | @@ -160,54 +140,9 @@ |
542 | filename=rich_filename(paths[1], kind), |
543 | file_id=file_id)) |
544 | |
545 | - |
546 | -class RevInfoMemoryCache(object): |
547 | - """A store that validates values against the revids they were stored with. |
548 | - |
549 | - We use a unique key for each branch. |
550 | - |
551 | - The reason for not just using the revid as the key is so that when a new |
552 | - value is provided for a branch, we replace the old value used for the |
553 | - branch. |
554 | - |
555 | - There is another implementation of the same interface in |
556 | - loggerhead.changecache.RevInfoDiskCache. |
557 | - """ |
558 | - |
559 | - def __init__(self, cache): |
560 | - self._cache = cache |
561 | - # lru_cache is not thread-safe, so we need to lock all accesses. |
562 | - # It is even modified when doing a get() on it. |
563 | - self._lock = threading.RLock() |
564 | - |
565 | - def get(self, key, revid): |
566 | - """Return the data associated with `key`, subject to a revid check. |
567 | - |
568 | - If a value was stored under `key`, with the same revid, return it. |
569 | - Otherwise return None. |
570 | - """ |
571 | - self._lock.acquire() |
572 | - try: |
573 | - cached = self._cache.get(key) |
574 | - finally: |
575 | - self._lock.release() |
576 | - if cached is None: |
577 | - return None |
578 | - stored_revid, data = cached |
579 | - if revid == stored_revid: |
580 | - return data |
581 | - else: |
582 | - return None |
583 | - |
584 | - def set(self, key, revid, data): |
585 | - """Store `data` under `key`, to be checked against `revid` on get(). |
586 | - """ |
587 | - self._lock.acquire() |
588 | - try: |
589 | - self._cache[key] = (revid, data) |
590 | - finally: |
591 | - self._lock.release() |
592 | - |
593 | +# The lru_cache is not thread-safe, so we need a lock around it for |
594 | +# all threads. |
595 | +rev_info_memory_cache_lock = threading.RLock() |
596 | |
597 | _raw_revno_revid_cache = lru_cache.LRUCache(10000) |
598 | _revno_revid_lock = threading.RLock() |
599 | @@ -231,8 +166,7 @@ |
600 | self._lock = lock |
601 | |
602 | def get(self, key): |
603 | - """Return the data associated with `key`. |
604 | - Otherwise return None. |
605 | + """Return the data associated with `key`. Otherwise return None. |
606 | |
607 | :param key: Can be a revno_str or a revid. |
608 | """ |
609 | @@ -244,25 +178,19 @@ |
610 | return cached |
611 | |
612 | def set(self, revid, revno_str): |
613 | - """Store `data` under `key`. |
614 | - """ |
615 | + """Record that in this branch `revid` has revno `revno_str`.""" |
616 | self._lock.acquire() |
617 | try: |
618 | - # TODO: StaticTuples ? Probably only useful if we cache more than |
619 | - # 10k of them. 100k/1M is probably useful. |
620 | + # Could use StaticTuples here, but probably only useful if we |
621 | + # cache more than 10k of them. 100k/1M is probably useful. |
622 | self._cache[(self._branch_tip, revid)] = revno_str |
623 | self._cache[(self._branch_tip, revno_str)] = revid |
624 | finally: |
625 | self._lock.release() |
626 | |
627 | -# Used to store locks that prevent multiple threads from building a |
628 | -# revision graph for the same branch at the same time, because that can |
629 | -# cause severe performance issues that are so bad that the system seems |
630 | -# to hang. |
631 | -revision_graph_locks = {} |
632 | -revision_graph_check_lock = threading.Lock() |
633 | history_db_importer_lock = threading.Lock() |
634 | |
635 | + |
636 | class History(object): |
637 | """Decorate a branch to provide information for rendering. |
638 | |
639 | @@ -279,10 +207,8 @@ |
640 | ids. |
641 | """ |
642 | |
643 | - def __init__(self, branch, whole_history_data_cache, file_cache=None, |
644 | - revinfo_disk_cache=None, cache_key=None, |
645 | - show_merge_points=True, |
646 | - cache_path=None): |
647 | + def __init__(self, branch, file_cache=None, cache_key=None, |
648 | + show_merge_points=True, cache_path=None): |
649 | assert branch.is_locked(), ( |
650 | "Can only construct a History object with a read-locked branch.") |
651 | if file_cache is not None: |
652 | @@ -293,20 +219,15 @@ |
653 | self._branch = branch |
654 | self._branch_tags = None |
655 | self._inventory_cache = {} |
656 | - # Map from (tip_revision, revision_id) => revno_str |
657 | - # and from (tip_revisino, revno_str) => revision_id |
658 | - self._querier = _get_querier(branch) |
659 | - if self._querier is None: |
660 | - assert cache_path is not None |
661 | - self._querier = history_db.Querier( |
662 | - os.path.join(cache_path, 'historydb.sql'), branch) |
663 | - # History-db is not configured for this branch, do it ourselves |
664 | - # sqlite is single-writer, so block concurrant updates. |
665 | - # Note that this was even done in the past because of perf issues, even |
666 | - # without a disk requirement. |
667 | + if cache_path is None: |
668 | + cache_path = ':memory:' |
669 | + else: |
670 | + cache_path = os.path.join(cache_path, 'historydb.sql') |
671 | + self._querier = history_db.Querier(cache_path, branch) |
672 | + # sqlite is single-writer, so block concurrent updates. |
673 | self._querier.set_importer_lock(history_db_importer_lock) |
674 | - # TODO: Is this being premature? It makes the rest of the code |
675 | - # simpler... |
676 | + # This may be premature, but for now if you need History, you almost |
677 | + # definitely need the branch imported. |
678 | self._querier.ensure_branch_tip() |
679 | self._branch_nick = self._branch.get_config().get_nickname() |
680 | self.log = logging.getLogger('loggerhead.%s' % (self._branch_nick,)) |
681 | @@ -328,9 +249,14 @@ |
682 | revno_str = self._revno_revid_cache.get(revid) |
683 | if revno_str is not None: |
684 | return revno_str |
685 | - revnos = self._querier.get_dotted_revno_range_multi([revid]) |
686 | - # TODO: Should probably handle KeyError? |
687 | - dotted_revno = revnos[revid] |
688 | + revnos = self._querier.get_dotted_revnos([revid]) |
689 | + try: |
690 | + dotted_revno = revnos[revid] |
691 | + except KeyError: |
692 | + # Asking for a revision which isn't in the ancestry can raise a |
693 | + # KeyError, it can also happen on empty branches. So we just return |
694 | + # 'unknown' |
695 | + return 'unknown' |
696 | revno_str = '.'.join(map(str, dotted_revno)) |
697 | self._revno_revid_cache.set(revid, revno_str) |
698 | return revno_str |
699 | @@ -351,8 +277,7 @@ |
700 | if not unknown: |
701 | return revno_map |
702 | # querier returns dotted revno tuples |
703 | - query_revno_map = self._querier.get_dotted_revno_range_multi( |
704 | - unknown) |
705 | + query_revno_map = self._querier.get_dotted_revnos(unknown) |
706 | ghosts = set(unknown) |
707 | for revid, dotted_revno in query_revno_map.iteritems(): |
708 | revno_str = '.'.join(map(str, dotted_revno)) |
709 | @@ -377,10 +302,6 @@ |
710 | |
711 | def _get_lh_parent(self, revid): |
712 | """Get the left-hand parent of a given revision id.""" |
713 | - # TODO: Move this into a public method on Querier |
714 | - # TODO: Possibly look into caching some of this info in memory, and |
715 | - # between HTTP requests. |
716 | - self._querier.ensure_branch_tip() |
717 | return self._querier._get_lh_parent_rev_id(revid) |
718 | |
719 | def _get_children(self, revid): |
720 | @@ -410,6 +331,7 @@ |
721 | # grabs the full history, and we now support stopping early. |
722 | history = self._branch.repository.iter_reverse_revision_history( |
723 | start_revid) |
724 | + is_null = bzrlib.revision.is_null |
725 | for revid in history: |
726 | yield revid |
727 | return |
728 | @@ -421,15 +343,37 @@ |
729 | introduced = self._querier.iter_merge_sorted_revisions( |
730 | start_revision_id=tip_revid, stop_revision_id=parent_revid) |
731 | introduced_revs = set([i[0] for i in introduced |
732 | - if i in revid_set]) |
733 | + if i[0] in revid_set]) |
734 | if introduced_revs: |
735 | revid_set.difference_update(introduced_revs) |
736 | yield tip_revid |
737 | tip_revid = parent_revid |
738 | |
739 | - def get_short_revision_history_by_fileid(self, file_id): |
740 | + def get_short_revision_history_by_fileid(self, file_id, tip_revid): |
741 | # FIXME: would be awesome if we could get, for a folder, the list of |
742 | # revisions where items within that folder changed.i |
743 | + # TODO: Walk backwards in history in case the file was deleted in tip |
744 | + inv = self.get_inventory(tip_revid) |
745 | + file_key = (file_id, inv[file_id].revision) |
746 | + # Now that we have a file_key, walk the per-file graph to find all |
747 | + # interesting parents |
748 | + get_kg = getattr(self._branch.repository.texts, |
749 | + 'get_known_graph_ancestry', None) |
750 | + if get_kg is not None: |
751 | + kg = get_kg([file_key]) |
752 | + return [k[1] for k in kg._nodes] |
753 | + # Walk the ancestry of this file_key, to find interesting revisions |
754 | + g = graph.Graph(self._branch.repository.texts) |
755 | + all_keys = set([key for key, _ in g.iter_ancestry([file_key])]) |
756 | + return [k[1] for k in all_keys] |
757 | + |
758 | + def get_inventory(self, revid): |
759 | + if revid not in self._inventory_cache: |
760 | + # TODO: This cache is unbounded, though only used for a single http |
761 | + # request. Consider what we might do to limit this. |
762 | + self._inventory_cache[revid] = ( |
763 | + self._branch.repository.get_inventory(revid)) |
764 | + return self._inventory_cache[revid] |
765 | possible_keys = [(file_id, revid) for revid in self._rev_indices] |
766 | get_parent_map = self._branch.repository.texts.get_parent_map |
767 | # We chunk the requests as this works better with GraphIndex. |
768 | @@ -459,7 +403,6 @@ |
769 | raise KeyError |
770 | revid = val |
771 | except KeyError: |
772 | - import pdb; pdb.set_trace() |
773 | raise bzrlib.errors.NoSuchRevision(self._branch_nick, revid) |
774 | return revid |
775 | |
776 | @@ -738,7 +681,7 @@ |
777 | 'tags': revtags, |
778 | } |
779 | if isinstance(revision, bzrlib.foreign.ForeignRevision): |
780 | - foreign_revid, mapping = (rev.foreign_revid, rev.mapping) |
781 | + foreign_revid, mapping = (revision.foreign_revid, revision.mapping) |
782 | elif ":" in revision.revision_id: |
783 | try: |
784 | foreign_revid, mapping = \ |
785 | |
786 | === added file 'loggerhead/history_db.py' |
787 | --- loggerhead/history_db.py 1970-01-01 00:00:00 +0000 |
788 | +++ loggerhead/history_db.py 2010-05-15 23:47:26 +0000 |
789 | @@ -0,0 +1,1621 @@ |
790 | +# Copyright (C) 2010 Canonical Ltd |
791 | +# |
792 | +# This program is free software; you can redistribute it and/or modify |
793 | +# it under the terms of the GNU General Public License as published by |
794 | +# the Free Software Foundation; either version 2 of the License, or |
795 | +# (at your option) any later version. |
796 | +# |
797 | +# This program is distributed in the hope that it will be useful, |
798 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
799 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
800 | +# GNU General Public License for more details. |
801 | +# |
802 | +# You should have received a copy of the GNU General Public License |
803 | +# along with this program; if not, write to the Free Software |
804 | +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
805 | + |
806 | +"""Store history information in a database.""" |
807 | + |
808 | +try: |
809 | + from sqlite3 import dbapi2 |
810 | +except ImportError: |
811 | + from pysqlite2 import dbapi2 |
812 | + |
813 | +from collections import defaultdict, deque |
814 | +import time |
815 | + |
816 | +from bzrlib import ( |
817 | + debug, |
818 | + errors, |
819 | + lru_cache, |
820 | + revision, |
821 | + static_tuple, |
822 | + trace, |
823 | + ui, |
824 | + ) |
825 | + |
826 | +from loggerhead import history_db_schema as schema |
827 | + |
828 | + |
829 | +NULL_PARENTS = (revision.NULL_REVISION,) |
830 | + |
831 | + |
832 | +def _n_params(n): |
833 | + """Create a query string representing N parameters. |
834 | + |
835 | + n=1 => ? |
836 | + n=2 => ?, ? |
837 | + etc. |
838 | + """ |
839 | + return ', '.join('?'*n) |
840 | + |
841 | + |
842 | +def _add_n_params(query, n): |
843 | + """Add n parameters to the query string. |
844 | + |
845 | + the query should have a single '%s' in it to be expanded. |
846 | + """ |
847 | + return query % (_n_params(n),) |
848 | + |
849 | + |
850 | +def _get_result_for_many(cursor, query, params): |
851 | + """Get the results for a query with a lot of parameters. |
852 | + |
853 | + SQLite has a limit on how many parameters can be passed (often for a good |
854 | + reason). However, we don't want to have to think about it right now. So |
855 | + this just loops over params based on a maximum allowed per query. Then |
856 | + return the whole result in one list. |
857 | + |
858 | + :param query: An SQL query that should contain something like: |
859 | + "WHERE foo IN (%s)" |
860 | + :param params: A list of parameters to supply to the function. |
861 | + """ |
862 | + res = [] |
863 | + MAX_COUNT = 200 |
864 | + for start in xrange(0, len(params), MAX_COUNT): |
865 | + next_params = params[start:start+MAX_COUNT] |
866 | + res.extend( |
867 | + cursor.execute(_add_n_params(query, len(next_params)), |
868 | + next_params).fetchall()) |
869 | + return res |
870 | + |
871 | + |
872 | +class Importer(object): |
873 | + """Import data from bzr into the history_db.""" |
874 | + |
875 | + _MAINLINE_PARENT_RANGE_LEN = 100 |
876 | + |
877 | + def __init__(self, db_path, a_branch, tip_revision_id=None, |
878 | + incremental=False, validate=False): |
879 | + db_conn = dbapi2.connect(db_path) |
880 | + self._incremental = incremental |
881 | + self._validate = validate |
882 | + self._db_conn = db_conn |
883 | + self._ensure_schema() |
884 | + self._cursor = self._db_conn.cursor() |
885 | + self._branch = a_branch |
886 | + if tip_revision_id is None: |
887 | + self._branch_tip_rev_id = a_branch.last_revision() |
888 | + else: |
889 | + self._branch_tip_rev_id = tip_revision_id |
890 | + self._branch_tip_key = (self._branch_tip_rev_id,) |
891 | + self._graph = None |
892 | + if not self._incremental: |
893 | + self._ensure_graph() |
894 | + self._rev_id_to_db_id = {} |
895 | + self._db_id_to_rev_id = {} |
896 | + self._stats = defaultdict(lambda: 0) |
897 | + # Map child_id => [parent_db_ids] |
898 | + self._db_parent_map = {} |
899 | + |
900 | + def set_max_cache_size(self, size): |
901 | + """Tell SQLite how many megabytes to cache internally.""" |
902 | + page_size = self._db_conn.execute('PRAGMA page_size').fetchone()[0] |
903 | + self._db_conn.execute("PRAGMA cache_size = %d" |
904 | + % (size / page_size,)); |
905 | + |
906 | + def _ensure_schema(self): |
907 | + if not schema.is_initialized(self._db_conn, dbapi2.OperationalError): |
908 | + schema.create_sqlite_db(self._db_conn) |
909 | + if 'history_db' in debug.debug_flags: |
910 | + trace.note('history_db initialized database') |
911 | + # We know we can't do this incrementally, because nothing has |
912 | + # existed before... |
913 | + #self._incremental = False |
914 | + |
915 | + def _ensure_revisions(self, revision_ids): |
916 | + schema.ensure_revisions(self._cursor, revision_ids, |
917 | + self._rev_id_to_db_id, |
918 | + self._db_id_to_rev_id, self._graph) |
919 | + |
920 | + def _ensure_graph(self): |
921 | + if self._graph is not None: |
922 | + return |
923 | + repo = self._branch.repository |
924 | + self._graph = repo.revisions.get_known_graph_ancestry( |
925 | + [self._branch_tip_key]) |
926 | + |
927 | + def _is_imported(self, tip_rev_id): |
928 | + res = self._cursor.execute( |
929 | + "SELECT tip_revision FROM dotted_revno JOIN revision" |
930 | + " ON dotted_revno.tip_revision = revision.db_id" |
931 | + " WHERE revision_id = ?" |
932 | + " AND tip_revision = merged_revision", |
933 | + (tip_rev_id,)).fetchone() |
934 | + return (res is not None) |
935 | + |
936 | + def _insert_nodes(self, tip_rev_id, nodes): |
937 | + """Insert all of the nodes mentioned into the database.""" |
938 | + self._stats['_insert_node_calls'] += 1 |
939 | + rev_to_db = self._rev_id_to_db_id |
940 | + tip_db_id = rev_to_db[tip_rev_id] |
941 | + self._stats['total_nodes_inserted'] += len(nodes) |
942 | + revno_entries = [] |
943 | + st = static_tuple.StaticTuple |
944 | + def build_revno_info(): |
945 | + for dist, node in enumerate(nodes): |
946 | + # TODO: Do we need to track the 'end_of_merge' and 'merge_depth' |
947 | + # fields? |
948 | + db_id = rev_to_db[node.key[0]] |
949 | + revno_entries.append((tip_db_id, |
950 | + db_id, |
951 | + '.'.join(map(str, node.revno)), |
952 | + node.end_of_merge, |
953 | + node.merge_depth, |
954 | + dist)) |
955 | + build_revno_info() |
956 | + try: |
957 | + schema.create_dotted_revnos(self._cursor, revno_entries) |
958 | + except dbapi2.IntegrityError: |
959 | + # Revisions already exist |
960 | + return False |
961 | + return True |
962 | + |
963 | + def _update_parents(self, nodes): |
964 | + """Update parent information for all these nodes.""" |
965 | + # Get the keys and their parents |
966 | + parent_keys = self._graph.get_parent_keys |
967 | + parent_map = dict([(n.key[0], [p[0] for p in parent_keys(n.key)]) |
968 | + for n in nodes]) |
969 | + self._insert_parent_map(parent_map) |
970 | + |
971 | + def _insert_parent_map(self, parent_map): |
972 | + """Insert all the entries in this parent map into the parent table.""" |
973 | + r_to_d = self._rev_id_to_db_id |
974 | + def _ensure_parent_map_keys(): |
975 | + rev_ids = set([r for r in parent_map if r not in r_to_d]) |
976 | + rev_ids_update = rev_ids.update |
977 | + for parent_keys in parent_map.itervalues(): |
978 | + rev_ids_update([p for p in parent_keys if p not in r_to_d]) |
979 | + self._ensure_revisions(rev_ids) |
980 | + _ensure_parent_map_keys() |
981 | + data = [] |
982 | + stuple = static_tuple.StaticTuple.from_sequence |
983 | + for rev_id, parent_ids in parent_map.iteritems(): |
984 | + db_id = r_to_d[rev_id] |
985 | + if db_id in self._db_parent_map: |
986 | + # This has already been imported, skip it |
987 | + continue |
988 | + parent_db_ids = stuple([r_to_d[p_id] for p_id in parent_ids]) |
989 | + self._db_parent_map[db_id] = parent_db_ids |
990 | + for idx, parent_db_id in enumerate(parent_db_ids): |
991 | + data.append((db_id, parent_db_id, idx)) |
992 | + # Inserting the data in db-sorted order actually improves perf a fair |
993 | + # amount. ~10%. My guess is that it keeps locality for uniqueness |
994 | + # checks, etc. |
995 | + data.sort() |
996 | + self._cursor.executemany("INSERT OR IGNORE INTO parent" |
997 | + " (child, parent, parent_idx)" |
998 | + "VALUES (?, ?, ?)", data) |
999 | + |
1000 | + def do_import(self): |
1001 | + if revision.is_null(self._branch_tip_rev_id): |
1002 | + return |
1003 | + merge_sorted = self._import_tip(self._branch_tip_rev_id) |
1004 | + self._db_conn.commit() |
1005 | + |
1006 | + def _get_merge_sorted_tip(self, tip_revision_id): |
1007 | + if self._incremental: |
1008 | + self._update_ancestry(tip_revision_id) |
1009 | + self._ensure_revisions([tip_revision_id]) |
1010 | + tip_db_id = self._rev_id_to_db_id[tip_revision_id] |
1011 | + inc_merger = _IncrementalMergeSort(self, tip_db_id) |
1012 | + merge_sorted = inc_merger.topo_order() |
1013 | + # Map db_ids back to the keys that self._graph would generate |
1014 | + # Assert that the result is valid |
1015 | + if self._validate: |
1016 | + self._ensure_graph() |
1017 | + actual_ms = self._graph.merge_sort((tip_revision_id,)) |
1018 | + actual_ms_iter = iter(actual_ms) |
1019 | + else: |
1020 | + actual_ms_iter = None |
1021 | + |
1022 | + def assert_is_equal(x, y): |
1023 | + if x != y: |
1024 | + import pdb; pdb.set_trace() |
1025 | + db_to_rev = self._db_id_to_rev_id |
1026 | + for node in merge_sorted: |
1027 | + try: |
1028 | + node.key = (db_to_rev[node.key],) |
1029 | + except KeyError: # Look this one up in the db |
1030 | + rev_res = self._cursor.execute( |
1031 | + "SELECT revision_id FROM revision WHERE db_id = ?", |
1032 | + (node.key,)).fetchone() |
1033 | + rev_id = rev_res[0] |
1034 | + self._db_id_to_rev_id[node.key] = rev_id |
1035 | + self._rev_id_to_db_id[rev_id] = node.key |
1036 | + node.key = (rev_id,) |
1037 | + if actual_ms_iter is None: |
1038 | + continue |
1039 | + actual_node = actual_ms_iter.next() |
1040 | + assert_is_equal(node.key, actual_node.key) |
1041 | + assert_is_equal(node.revno, actual_node.revno) |
1042 | + assert_is_equal(node.merge_depth, actual_node.merge_depth) |
1043 | + assert_is_equal(node.end_of_merge, actual_node.end_of_merge) |
1044 | + if actual_ms_iter is not None: |
1045 | + try: |
1046 | + actual_node = actual_ms_iter.next() |
1047 | + except StopIteration: |
1048 | + # no problem they both say they've finished |
1049 | + pass |
1050 | + else: |
1051 | + # The next revision must have already been imported |
1052 | + assert self._is_imported(actual_node.key[0]) |
1053 | + else: |
1054 | + merge_sorted = self._graph.merge_sort((tip_revision_id,)) |
1055 | + return merge_sorted |
1056 | + |
1057 | + def _import_tip(self, tip_revision_id, suppress_progress_and_commit=False): |
1058 | + if suppress_progress_and_commit: |
1059 | + pb = None |
1060 | + else: |
1061 | + pb = ui.ui_factory.nested_progress_bar() |
1062 | + if pb is not None: |
1063 | + pb.update('getting merge_sorted') |
1064 | + merge_sorted = self._get_merge_sorted_tip(tip_revision_id) |
1065 | + if not self._incremental: |
1066 | + # If _incremental all the revisions will have already been ensured |
1067 | + # by the _update_ancestry code. |
1068 | + if pb is not None: |
1069 | + pb.update('allocating revisions', 0, |
1070 | + len(merge_sorted)) |
1071 | + self._ensure_revisions([n.key[0] for n in merge_sorted]) |
1072 | + if pb is not None: |
1073 | + pb.update('updating parents', 0, |
1074 | + len(merge_sorted)) |
1075 | + self._update_parents(merge_sorted) |
1076 | + try: |
1077 | + last_mainline_rev_id = None |
1078 | + new_nodes = [] |
1079 | + for idx, node in enumerate(merge_sorted): |
1080 | + if pb is not None and idx & 0xFF == 0: |
1081 | + pb.update('importing', idx, len(merge_sorted)) |
1082 | + if last_mainline_rev_id is None: |
1083 | + assert not new_nodes |
1084 | + assert node.merge_depth == 0, \ |
1085 | + "We did not start at a mainline?" |
1086 | + last_mainline_rev_id = node.key[0] |
1087 | + new_nodes.append(node) |
1088 | + continue |
1089 | + if node.merge_depth == 0: |
1090 | + # We're at a new mainline. Insert the nodes for the |
1091 | + # previous mainline. If this has already been inserted, we |
1092 | + # assume all previous ones are also. Safe as long as we |
1093 | + # wait to commit() until we insert all parents. |
1094 | + if not self._insert_nodes(last_mainline_rev_id, new_nodes): |
1095 | + # This data has already been imported. |
1096 | + new_nodes = [] |
1097 | + break |
1098 | + last_mainline_rev_id = node.key[0] |
1099 | + new_nodes = [] |
1100 | + new_nodes.append(node) |
1101 | + if new_nodes: |
1102 | + assert last_mainline_rev_id is not None |
1103 | + self._insert_nodes(last_mainline_rev_id, new_nodes) |
1104 | + new_nodes = [] |
1105 | + self._build_one_mainline(tip_revision_id) |
1106 | + finally: |
1107 | + if pb is not None: |
1108 | + pb.finished() |
1109 | + return merge_sorted |
1110 | + |
1111 | + def _update_ancestry(self, new_tip_rev_id): |
1112 | + """Walk the parents of this tip, updating 'revision' and 'parent' |
1113 | + |
1114 | + self._rev_id_to_db_id will be updated. |
1115 | + """ |
1116 | + (known, parent_map, |
1117 | + children) = self._find_known_ancestors(new_tip_rev_id) |
1118 | + self._compute_gdfo_and_insert(known, children, parent_map) |
1119 | + self._insert_parent_map(parent_map) |
1120 | + # This seems to slow things down a fair amount. On bzrtools, we end up |
1121 | + # calling it 75 times, and it ends up taking 800ms. vs a total rutime |
1122 | + # of 1200ms otherwise. |
1123 | + # self._db_conn.commit() |
1124 | + |
1125 | + def _find_known_ancestors(self, new_tip_rev_id): |
1126 | + """Starting at tip, find ancestors we already have""" |
1127 | + needed = [new_tip_rev_id] |
1128 | + all_needed = set(new_tip_rev_id) |
1129 | + children = {} |
1130 | + parent_map = {} |
1131 | + known = {} |
1132 | + pb = ui.ui_factory.nested_progress_bar() |
1133 | + try: |
1134 | + while needed: |
1135 | + pb.update('Finding ancestry', len(all_needed), len(all_needed)) |
1136 | + rev_id = needed.pop() |
1137 | + if rev_id in known: |
1138 | + # We may add particular parents multiple times, just ignore |
1139 | + # them once they've been found |
1140 | + continue |
1141 | + res = self._cursor.execute( |
1142 | + "SELECT gdfo FROM revision WHERE revision_id = ?", |
1143 | + [rev_id]).fetchone() |
1144 | + if res is not None: |
1145 | + known[rev_id] = res[0] |
1146 | + continue |
1147 | + # We don't have this entry recorded yet, add the parents to the |
1148 | + # search |
1149 | + pmap = self._branch.repository.get_parent_map([rev_id]) |
1150 | + parent_map.update(pmap) |
1151 | + parent_ids = pmap.get(rev_id, None) |
1152 | + if parent_ids is None or parent_ids == NULL_PARENTS: |
1153 | + # We can insert this rev directly, because we know its |
1154 | + # gdfo, as it has no parents. |
1155 | + parent_map[rev_id] = () |
1156 | + self._cursor.execute("INSERT INTO revision (revision_id, gdfo)" |
1157 | + " VALUES (?, ?)", (rev_id, 1)) |
1158 | + # Wrap around to populate known quickly |
1159 | + needed.append(rev_id) |
1160 | + if parent_ids is None: |
1161 | + # This is a ghost, add it to the table |
1162 | + self._cursor.execute("INSERT INTO ghost (db_id)" |
1163 | + " SELECT db_id FROM revision" |
1164 | + " WHERE revision_id = ?", |
1165 | + (rev_id,)) |
1166 | + continue |
1167 | + for parent_id in pmap[rev_id]: |
1168 | + if parent_id not in known: |
1169 | + if parent_id not in all_needed: |
1170 | + needed.append(parent_id) |
1171 | + all_needed.add(parent_id) |
1172 | + children.setdefault(parent_id, []).append(rev_id) |
1173 | + finally: |
1174 | + pb.finished() |
1175 | + return known, parent_map, children |
1176 | + |
1177 | + def _compute_gdfo_and_insert(self, known, children, parent_map): |
1178 | + # At this point, we should have walked to all known parents, and should |
1179 | + # be able to build up the gdfo and parent info for all keys. |
1180 | + pending = [(gdfo, rev_id) for rev_id, gdfo in known.iteritems()] |
1181 | + while pending: |
1182 | + gdfo, rev_id = pending.pop() |
1183 | + for child_id in children.get(rev_id, []): |
1184 | + if child_id in known: |
1185 | + # XXX: Already numbered? |
1186 | + assert known[child_id] > gdfo |
1187 | + continue |
1188 | + parent_ids = parent_map[child_id] |
1189 | + max_gdfo = -1 |
1190 | + for parent_id in parent_ids: |
1191 | + try: |
1192 | + this_gdfo = known[parent_id] |
1193 | + except KeyError: |
1194 | + # One parent hasn't been computed yet |
1195 | + break |
1196 | + if this_gdfo > max_gdfo: |
1197 | + max_gdfo = this_gdfo |
1198 | + else: |
1199 | + # All parents have their gdfo known |
1200 | + # assert gdfo == max_gdfo |
1201 | + child_gdfo = max_gdfo + 1 |
1202 | + known[child_id] = child_gdfo |
1203 | + self._cursor.execute( |
1204 | + "INSERT INTO revision (revision_id, gdfo)" |
1205 | + " VALUES (?, ?)", |
1206 | + (child_id, child_gdfo)) |
1207 | + # Put this into the pending queue so that *its* children |
1208 | + # also get updated |
1209 | + pending.append((child_gdfo, child_id)) |
1210 | + if self._graph is not None: |
1211 | + for rev_id, gdfo in known.iteritems(): |
1212 | + assert gdfo == self._graph._nodes[(rev_id,)].gdfo |
1213 | + |
1214 | + def _get_db_id(self, revision_id): |
1215 | + db_res = self._cursor.execute('SELECT db_id FROM revision' |
1216 | + ' WHERE revision_id = ?', |
1217 | + [revision_id]).fetchone() |
1218 | + if db_res is None: |
1219 | + return None |
1220 | + return db_res[0] |
1221 | + |
1222 | + def _update_dotted(self, new_tip_rev_id): |
1223 | + """We have a new 'tip' revision, Update the dotted_revno table.""" |
1224 | + # Just make sure the db has valid info for all the existing entries |
1225 | + self._update_ancestry(new_tip_rev_id) |
1226 | + |
1227 | + def _get_mainline_range_count(self, head_db_id): |
1228 | + """Does the given head_db_id already have a range defined using it.""" |
1229 | + res = self._cursor.execute("SELECT pkey, count, tail" |
1230 | + " FROM mainline_parent_range" |
1231 | + " WHERE head = ?" |
1232 | + " ORDER BY count DESC LIMIT 1", |
1233 | + [head_db_id]).fetchone() |
1234 | + if res is None: |
1235 | + return None, None, None |
1236 | + return res |
1237 | + |
1238 | + def _get_mainline_range(self, range_key): |
1239 | + """Get the revisions in the mainline range specified.""" |
1240 | + res = self._cursor.execute("SELECT revision FROM mainline_parent" |
1241 | + " WHERE range = ?" |
1242 | + " ORDER BY dist DESC", [range_key]) |
1243 | + return [r[0] for r in res.fetchall()] |
1244 | + |
1245 | + def _get_lh_parent_db_id(self, revision_db_id): |
1246 | + parent_res = self._cursor.execute(""" |
1247 | + SELECT parent.parent |
1248 | + FROM parent |
1249 | + WHERE parent.child = ? |
1250 | + AND parent_idx = 0 |
1251 | + LIMIT 1 -- hint to the db, should always be only 1 |
1252 | + """, (revision_db_id,)).fetchone() |
1253 | + # self._stats['lh_parent_step'] += 1 |
1254 | + if parent_res is None: |
1255 | + return None |
1256 | + return parent_res[0] |
1257 | + |
1258 | + def _insert_range(self, range_db_ids, tail_db_id): |
1259 | + head_db_id = range_db_ids[0] |
1260 | + self._cursor.execute("INSERT INTO mainline_parent_range" |
1261 | + " (head, tail, count) VALUES (?, ?, ?)", |
1262 | + (head_db_id, tail_db_id, len(range_db_ids))) |
1263 | + # Note: This works for sqlite, does it work for pgsql? |
1264 | + range_key = self._cursor.lastrowid |
1265 | + self._stats['ranges_inserted'] += 1 |
1266 | + # Note that 'tail' is explicitly not included in the range |
1267 | + self._stats['revs_in_ranges'] += len(range_db_ids) |
1268 | + self._cursor.executemany( |
1269 | + "INSERT INTO mainline_parent (range, revision, dist)" |
1270 | + " VALUES (?, ?, ?)", |
1271 | + [(range_key, d, idx) for idx, d in enumerate(range_db_ids)]) |
1272 | + |
1273 | + def _build_one_mainline(self, head_rev_id): |
1274 | + # 1) Walk backward until you find an existing entry in the |
1275 | + # mainline_parent_range table (or you reach the end) |
1276 | + # 2) If the range has less than X revisions, include it in the |
1277 | + # revisions to be added |
1278 | + # 3) chop the list into X revision sections, and insert them |
1279 | + # |
1280 | + # This should ensure that any given ancestry has at most 1 section |
1281 | + # which has less than X revisions, and it should preserve convergence. |
1282 | + self._ensure_revisions([head_rev_id]) |
1283 | + cur_db_id = self._rev_id_to_db_id[head_rev_id] |
1284 | + range_db_ids = [] |
1285 | + while cur_db_id is not None: |
1286 | + (range_key, next_count, |
1287 | + tail) = self._get_mainline_range_count(cur_db_id) |
1288 | + if range_key is not None: |
1289 | + # This tip is already present in mainline_parent_range |
1290 | + # table. |
1291 | + if (range_db_ids |
1292 | + and next_count < self._MAINLINE_PARENT_RANGE_LEN): |
1293 | + range_db_ids.extend(self._get_mainline_range(range_key)) |
1294 | + cur_db_id = tail |
1295 | + break |
1296 | + else: |
1297 | + range_db_ids.append(cur_db_id) |
1298 | + cur_db_id = self._get_lh_parent_db_id(cur_db_id) |
1299 | + # We now have a list of db ids that need to be split up into |
1300 | + # ranges. |
1301 | + while range_db_ids: |
1302 | + tail_db_ids = range_db_ids[-self._MAINLINE_PARENT_RANGE_LEN:] |
1303 | + del range_db_ids[-self._MAINLINE_PARENT_RANGE_LEN:] |
1304 | + self._insert_range(tail_db_ids, cur_db_id) |
1305 | + cur_db_id = tail_db_ids[0] |
1306 | + |
1307 | + |
1308 | +class _MergeSortNode(object): |
1309 | + """A simple object that represents one entry in the merge sorted graph.""" |
1310 | + |
1311 | + __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge', |
1312 | + '_left_parent', '_left_pending_parent', |
1313 | + '_pending_parents', '_is_first', |
1314 | + ) |
1315 | + |
1316 | + def __init__(self, key, merge_depth, left_parent, pending_parents, |
1317 | + is_first): |
1318 | + self.key = key |
1319 | + self.merge_depth = merge_depth |
1320 | + self.revno = None |
1321 | + self.end_of_merge = None |
1322 | + self._left_parent = left_parent |
1323 | + self._left_pending_parent = left_parent |
1324 | + self._pending_parents = pending_parents |
1325 | + self._is_first = is_first |
1326 | + |
1327 | + def __repr__(self): |
1328 | + return '%s(%s, %s, %s, %s [%s %s %s %s])' % ( |
1329 | + self.__class__.__name__, |
1330 | + self.key, self.revno, self.end_of_merge, self.merge_depth, |
1331 | + self._left_parent, self._left_pending_parent, |
1332 | + self._pending_parents, self._is_first) |
1333 | + |
1334 | + |
1335 | +class _IncrementalMergeSort(object): |
1336 | + """Context for importing partial history.""" |
1337 | + # Note: all of the ids in this object are database ids. the revision_ids |
1338 | + # should have already been imported before we get to this step. |
1339 | + |
1340 | + def __init__(self, importer, tip_db_id): |
1341 | + self._importer = importer |
1342 | + self._tip_db_id = tip_db_id |
1343 | + self._mainline_db_ids = None |
1344 | + self._imported_mainline_id = None |
1345 | + self._cursor = importer._cursor |
1346 | + self._stats = importer._stats |
1347 | + |
1348 | + # db_id => gdfo |
1349 | + self._known_gdfo = {} |
1350 | + # db_ids that we know are ancestors of mainline_db_ids that are not |
1351 | + # ancestors of pre_mainline_id |
1352 | + self._interesting_ancestor_ids = set() |
1353 | + |
1354 | + # Information from the dotted_revno table for revisions that are in the |
1355 | + # already-imported mainline. |
1356 | + self._imported_dotted_revno = {} |
1357 | + # What dotted revnos have been loaded |
1358 | + self._known_dotted = set() |
1359 | + # This is the gdfo of the current mainline revision search tip. This is |
1360 | + # the threshold such that |
1361 | + self._imported_gdfo = None |
1362 | + |
1363 | + # Revisions that we are walking, to see if they are interesting, or |
1364 | + # already imported. |
1365 | + self._search_tips = None |
1366 | + # mainline revno => number of child branches |
1367 | + self._revno_to_branch_count = {} |
1368 | + # (revno, branch_num) => oldest seen child |
1369 | + self._branch_to_child_count = {} |
1370 | + |
1371 | + self._depth_first_stack = None |
1372 | + self._scheduled_stack = None |
1373 | + self._seen_parents = None |
1374 | + # Map from db_id => parent_ids |
1375 | + self._parent_map = self._importer._db_parent_map |
1376 | + |
1377 | + # We just populate all known ghosts here. |
1378 | + # TODO: Ghosts are expected to be rare. If we find a case where probing |
1379 | + # for them at runtime is better than grabbing them all at once, |
1380 | + # re-evaluate this decision. |
1381 | + self._ghosts = None |
1382 | + |
1383 | + def _find_needed_mainline(self): |
1384 | + """Find mainline revisions that need to be filled out. |
1385 | + |
1386 | + :return: ([mainline_not_imported], most_recent_imported) |
1387 | + """ |
1388 | + db_id = self._tip_db_id |
1389 | + needed = [] |
1390 | + while db_id is not None and not self._is_imported_db_id(db_id): |
1391 | + needed.append(db_id) |
1392 | + db_id = self._importer._get_lh_parent_db_id(db_id) |
1393 | + self._mainline_db_ids = needed |
1394 | + self._interesting_ancestor_ids.update(self._mainline_db_ids) |
1395 | + self._imported_mainline_id = db_id |
1396 | + |
1397 | + def _get_initial_search_tips(self): |
1398 | + """Grab the right-hand parents of all the interesting mainline. |
1399 | + |
1400 | + We know we already searched all of the left-hand parents, so just grab |
1401 | + the right-hand parents. |
1402 | + """ |
1403 | + # TODO: Split this into a loop, since sqlite has a maximum number of |
1404 | + # parameters. |
1405 | + res = _get_result_for_many(self._cursor, |
1406 | + "SELECT parent, gdfo FROM parent, revision" |
1407 | + " WHERE parent.parent = revision.db_id" |
1408 | + " AND parent_idx != 0" |
1409 | + " AND child IN (%s)", |
1410 | + self._mainline_db_ids) |
1411 | + self._search_tips = set([r[0] for r in res]) |
1412 | + self._stats['num_search_tips'] += len(self._search_tips) |
1413 | + self._known_gdfo.update(res) |
1414 | + # We know that we will eventually need at least 1 step of the mainline |
1415 | + # (it gives us the basis for numbering everything). We do it now, |
1416 | + # because it increases the 'cheap' filtering we can do right away. |
1417 | + self._stats['step mainline initial'] += 1 |
1418 | + self._step_mainline() |
1419 | + ghost_res = self._cursor.execute("SELECT db_id FROM ghost").fetchall() |
1420 | + self._ghosts = set([g[0] for g in ghost_res]) |
1421 | + |
1422 | + def _is_imported_db_id(self, tip_db_id): |
1423 | + res = self._cursor.execute( |
1424 | + "SELECT count(*) FROM dotted_revno" |
1425 | + " WHERE tip_revision = ?" |
1426 | + " AND tip_revision = merged_revision", |
1427 | + (tip_db_id,)).fetchone() |
1428 | + return res[0] > 0 |
1429 | + |
1430 | + def _split_search_tips_by_gdfo(self, unknown): |
1431 | + """For these search tips, mark ones 'interesting' based on gdfo. |
1432 | + |
1433 | + All search tips are ancestors of _mainline_db_ids. So if their gdfo |
1434 | + indicates that they could not be merged into already imported |
1435 | + revisions, then we know automatically that they are |
1436 | + new-and-interesting. Further, if they are present in |
1437 | + _imported_dotted_revno, then we know they are not interesting, and |
1438 | + we will stop searching them. |
1439 | + |
1440 | + Otherwise, we don't know for sure which category they fall into, so |
1441 | + we return them for further processing. |
1442 | + |
1443 | + :return: still_unknown - search tips that aren't known to be |
1444 | + interesting, and aren't known to be in the ancestry of already |
1445 | + imported revisions. |
1446 | + """ |
1447 | + still_unknown = [] |
1448 | + min_gdfo = None |
1449 | + for db_id in unknown: |
1450 | + if (db_id in self._imported_dotted_revno |
1451 | + or db_id == self._imported_mainline_id): |
1452 | + # This should be removed as a search tip, we know it isn't |
1453 | + # interesting, it is an ancestor of an imported revision |
1454 | + self._stats['split already imported'] += 1 |
1455 | + self._search_tips.remove(db_id) |
1456 | + continue |
1457 | + gdfo = self._known_gdfo[db_id] |
1458 | + if gdfo >= self._imported_gdfo: |
1459 | + self._stats['split gdfo'] += 1 |
1460 | + self._interesting_ancestor_ids.add(db_id) |
1461 | + else: |
1462 | + still_unknown.append(db_id) |
1463 | + return still_unknown |
1464 | + |
1465 | + def _split_interesting_using_children(self, unknown_search_tips): |
1466 | + """Find children of these search tips. |
1467 | + |
1468 | + For each search tip, we find all of its known children. We then filter |
1469 | + the children by: |
1470 | + a) child is ignored if child in _interesting_ancestor_ids |
1471 | + b) child is ignored if gdfo(child) > _imported_gdfo |
1472 | + or (gdfo(child) == _imported_gdfo and child != |
1473 | + _imported_mainline_id) |
1474 | + The reason for the extra check is because for the ancestry |
1475 | + left-to-be-searched, with tip at _imported_mainline_id, *only* |
1476 | + _imported_mainline_id is allowed to have gdfo = _imported_gdfo. |
1477 | + for each search tip, if there are no interesting children, then this |
1478 | + search tip is definitely interesting (there is no path for it to be |
1479 | + merged into a previous mainline entry.) |
1480 | + |
1481 | + :return: still_unknown |
1482 | + still_unknown are the search tips that are still have children that |
1483 | + could be possibly merged. |
1484 | + """ |
1485 | + interesting = self._interesting_ancestor_ids |
1486 | + parent_child_res = self._cursor.execute(_add_n_params( |
1487 | + "SELECT parent, child FROM parent" |
1488 | + " WHERE parent IN (%s)", |
1489 | + len(unknown_search_tips)), unknown_search_tips).fetchall() |
1490 | + parent_to_children = {} |
1491 | + already_imported = set() |
1492 | + for parent, child in parent_child_res: |
1493 | + if (child in self._imported_dotted_revno |
1494 | + or child == self._imported_mainline_id): |
1495 | + # This child is already imported, so obviously the parent is, |
1496 | + # too. |
1497 | + self._stats['split child imported'] += 1 |
1498 | + already_imported.add(parent) |
1499 | + already_imported.add(child) |
1500 | + parent_to_children.setdefault(parent, []).append(child) |
1501 | + self._search_tips.difference_update(already_imported) |
1502 | + possibly_merged_children = set( |
1503 | + [c for p, c in parent_child_res |
1504 | + if c not in interesting and p not in already_imported]) |
1505 | + known_gdfo = self._known_gdfo |
1506 | + unknown_gdfos = [c for c in possibly_merged_children |
1507 | + if c not in known_gdfo] |
1508 | + # TODO: Is it more wasteful to join this table early, or is it better |
1509 | + # because we avoid having to pass N parameters back in? |
1510 | + # TODO: Would sorting the db ids help? They are the primary key for the |
1511 | + # table, so we could potentially fetch in a better order. |
1512 | + if unknown_gdfos: |
1513 | + res = self._cursor.execute(_add_n_params( |
1514 | + "SELECT db_id, gdfo FROM revision WHERE db_id IN (%s)", |
1515 | + len(unknown_gdfos)), unknown_gdfos) |
1516 | + known_gdfo.update(res) |
1517 | + min_gdfo = self._imported_gdfo |
1518 | + # Remove all of the children who have gdfo >= min. We already handled |
1519 | + # the == min case in the first loop. |
1520 | + possibly_merged_children.difference_update( |
1521 | + [c for c in possibly_merged_children if known_gdfo[c] >= min_gdfo]) |
1522 | + still_unknown = [] |
1523 | + for parent in unknown_search_tips: |
1524 | + if parent in already_imported: |
1525 | + self._stats['split parent imported'] += 1 |
1526 | + continue |
1527 | + for c in parent_to_children[parent]: |
1528 | + if c in possibly_merged_children: |
1529 | + still_unknown.append(parent) |
1530 | + break |
1531 | + else: # All children could not be possibly merged |
1532 | + self._stats['split children interesting'] += 1 |
1533 | + interesting.add(parent) |
1534 | + return still_unknown |
1535 | + |
1536 | + def _step_mainline(self): |
1537 | + """Move the mainline pointer by one, updating the data.""" |
1538 | + self._stats['step mainline'] += 1 |
1539 | + res = self._cursor.execute( |
1540 | + "SELECT merged_revision, revno, end_of_merge, merge_depth" |
1541 | + " FROM dotted_revno WHERE tip_revision = ? ORDER BY dist", |
1542 | + [self._imported_mainline_id]).fetchall() |
1543 | + stuple = static_tuple.StaticTuple.from_sequence |
1544 | + st = static_tuple.StaticTuple |
1545 | + dotted_info = [st(r[0], st(stuple(map(int, r[1].split('.'))), |
1546 | + r[2], r[3])) |
1547 | + for r in res] |
1548 | + self._stats['step mainline cache missed'] += 1 |
1549 | + self._stats['step mainline added'] += len(dotted_info) |
1550 | + self._update_info_from_dotted_revno(dotted_info) |
1551 | + # TODO: We could remove search tips that show up as newly merged |
1552 | + # though that can wait until the next |
1553 | + # _split_search_tips_by_gdfo |
1554 | + # new_merged_ids = [r[0] for r in res] |
1555 | + res = self._cursor.execute("SELECT parent, gdfo" |
1556 | + " FROM parent, revision" |
1557 | + " WHERE parent = db_id" |
1558 | + " AND parent_idx = 0" |
1559 | + " AND child = ?", |
1560 | + [self._imported_mainline_id]).fetchone() |
1561 | + if res is None: |
1562 | + # Walked off the mainline... |
1563 | + # TODO: Make sure this stuff is tested |
1564 | + self._imported_mainline_id = None |
1565 | + self._imported_gdfo = 0 |
1566 | + else: |
1567 | + self._imported_mainline_id, self._imported_gdfo = res |
1568 | + self._known_gdfo[self._imported_mainline_id] = self._imported_gdfo |
1569 | + |
1570 | + def _step_search_tips(self): |
1571 | + """Move the search tips to their parents.""" |
1572 | + self._stats['step search tips'] += 1 |
1573 | + res = _get_result_for_many(self._cursor, |
1574 | + "SELECT parent, gdfo FROM parent, revision" |
1575 | + " WHERE parent=db_id AND child IN (%s)", |
1576 | + list(self._search_tips)) |
1577 | + # TODO: We could use this time to fill out _parent_map, rather than |
1578 | + # waiting until _push_node and duplicating a request to the |
1579 | + # parent table. It may be reasonable to wait on gdfo also... |
1580 | + |
1581 | + # Filter out search tips that we've already searched via a different |
1582 | + # path. By construction, if we are stepping the search tips, we know |
1583 | + # that all previous search tips are either in |
1584 | + # self._imported_dotted_revno or in self._interesting_ancestor_ids. |
1585 | + # _imported_dotted_revno will be filtered in the first |
1586 | + # _split_search_tips_by_gdfo call, so we just filter out already |
1587 | + # interesting ones. |
1588 | + interesting = self._interesting_ancestor_ids |
1589 | + self._search_tips = set([r[0] for r in res if r[0] not in interesting]) |
1590 | + # TODO: For search tips we will be removing, we don't need to join |
1591 | + # against revision since we should already have them. There may |
1592 | + # be other ways that we already know gdfo. It may be cheaper to |
1593 | + # check first. |
1594 | + self._stats['num_search_tips'] += len(self._search_tips) |
1595 | + self._known_gdfo.update(res) |
1596 | + |
1597 | + def _ensure_lh_parent_info(self): |
1598 | + """LH parents of interesting_ancestor_ids is either present or pending. |
1599 | + |
1600 | + Either the data should be in _imported_dotted_revno, or the lh parent |
1601 | + should be in interesting_ancestor_ids (meaning we will number it). |
1602 | + """ |
1603 | + #XXX REMOVE: pmap = self._parent_map |
1604 | + missing_parent_ids = set() |
1605 | + for db_id in self._interesting_ancestor_ids: |
1606 | + parent_ids = self._get_parents(db_id) |
1607 | + if not parent_ids: # no parents, nothing to add |
1608 | + continue |
1609 | + lh_parent = parent_ids[0] |
1610 | + if lh_parent in self._interesting_ancestor_ids: |
1611 | + continue |
1612 | + if lh_parent in self._imported_dotted_revno: |
1613 | + continue |
1614 | + missing_parent_ids.add(lh_parent) |
1615 | + missing_parent_ids.difference_update(self._ghosts) |
1616 | + while missing_parent_ids: |
1617 | + self._stats['step mainline ensure LH'] += 1 |
1618 | + self._step_mainline() |
1619 | + missing_parent_ids = missing_parent_ids.difference( |
1620 | + self._imported_dotted_revno) |
1621 | + |
1622 | + def _find_interesting_ancestry(self): |
1623 | + self._find_needed_mainline() |
1624 | + self._get_initial_search_tips() |
1625 | + while self._search_tips: |
1626 | + # We don't know whether these search tips are known interesting, or |
1627 | + # known uninteresting |
1628 | + unknown = list(self._search_tips) |
1629 | + while unknown: |
1630 | + unknown = self._split_search_tips_by_gdfo(unknown) |
1631 | + if not unknown: |
1632 | + break |
1633 | + unknown = self._split_interesting_using_children(unknown) |
1634 | + if not unknown: |
1635 | + break |
1636 | + # The current search tips are the 'newest' possible tips right |
1637 | + # now. If we can't classify them as definitely being |
1638 | + # interesting, then we need to step the mainline until we can. |
1639 | + # This means that the current search tips have children that |
1640 | + # could be merged into an earlier mainline, walk the mainline |
1641 | + # to see if we can resolve that. |
1642 | + # Note that relying strictly on gdfo is a bit of a waste here, |
1643 | + # because you may have a rev with 10 children before it lands |
1644 | + # in mainline, but all 11 revs will be in the dotted_revno |
1645 | + # cache for that mainline. |
1646 | + self._stats['step mainline unknown'] += 1 |
1647 | + self._step_mainline() |
1648 | + # All search_tips are known to either be interesting or |
1649 | + # uninteresting. Walk any search tips that remain. |
1650 | + self._step_search_tips() |
1651 | + # We're now sure we have all of the now-interesting revisions. To |
1652 | + # number them, we need their left-hand parents to be in |
1653 | + # _imported_dotted_revno |
1654 | + self._ensure_lh_parent_info() |
1655 | + |
1656 | + def _update_info_from_dotted_revno(self, dotted_info): |
1657 | + """Update info like 'child_seen' from the dotted_revno info.""" |
1658 | + # TODO: We can move this iterator into a parameter, and have it |
1659 | + # continuously updated from _step_mainline() |
1660 | + self._imported_dotted_revno.update(dotted_info) |
1661 | + self._known_dotted.update([i[1][0] for i in dotted_info]) |
1662 | + for db_id, (revno, eom, depth) in dotted_info: |
1663 | + if len(revno) > 1: # dotted revno, make sure branch count is right |
1664 | + base_revno = revno[0] |
1665 | + if (base_revno not in self._revno_to_branch_count |
1666 | + or revno[1] > self._revno_to_branch_count[base_revno]): |
1667 | + self._revno_to_branch_count[base_revno] = revno[1] |
1668 | + branch_key = revno[:2] |
1669 | + mini_revno = revno[2] |
1670 | + else: |
1671 | + # *mainline* branch |
1672 | + branch_key = 0 |
1673 | + mini_revno = revno[0] |
1674 | + # We found a mainline branch, make sure it is marked as such |
1675 | + self._revno_to_branch_count.setdefault(0, 0) |
1676 | + if (branch_key not in self._branch_to_child_count |
1677 | + or mini_revno > self._branch_to_child_count[branch_key]): |
1678 | + self._branch_to_child_count[branch_key] = mini_revno |
1679 | + |
1680 | + def _is_first_child(self, parent_id): |
1681 | + """Is this the first child seen for the given parent?""" |
1682 | + if parent_id in self._seen_parents: |
1683 | + return False |
1684 | + # We haven't seen this while walking, but perhaps the already merged |
1685 | + # stuff has. |
1686 | + self._seen_parents.add(parent_id) |
1687 | + if parent_id not in self._imported_dotted_revno: |
1688 | + # Haven't seen this parent merged before, so we can't have seen |
1689 | + # children of it |
1690 | + return True |
1691 | + revno = self._imported_dotted_revno[parent_id][0] |
1692 | + if len(revno) > 1: |
1693 | + branch_key = revno[:2] |
1694 | + mini_revno = revno[2] |
1695 | + else: |
1696 | + branch_key = 0 |
1697 | + mini_revno = revno[0] |
1698 | + if self._branch_to_child_count.get(branch_key, 0) > mini_revno: |
1699 | + # This revision shows up in the graph, but another revision in this |
1700 | + # branch shows up later, so this revision must have already been |
1701 | + # seen |
1702 | + return False |
1703 | + # If we got this far, it doesn't appear to have been seen. |
1704 | + return True |
1705 | + |
1706 | + def _get_parents(self, db_id): |
1707 | + if db_id in self._parent_map: |
1708 | + parent_ids = self._parent_map[db_id] |
1709 | + else: |
1710 | + parent_res = self._cursor.execute( |
1711 | + "SELECT parent FROM parent WHERE child = ?" |
1712 | + " ORDER BY parent_idx", (db_id,)).fetchall() |
1713 | + parent_ids = static_tuple.StaticTuple.from_sequence( |
1714 | + [r[0] for r in parent_res]) |
1715 | + self._parent_map[db_id] = parent_ids |
1716 | + return parent_ids |
1717 | + |
1718 | + def _push_node(self, db_id, merge_depth): |
1719 | + # TODO: Check if db_id is a ghost (not allowed on the stack) |
1720 | + self._stats['pushed'] += 1 |
1721 | + if db_id not in self._interesting_ancestor_ids: |
1722 | + # This is a parent that we really don't need to number |
1723 | + self._stats['pushed uninteresting'] += 1 |
1724 | + return |
1725 | + parent_ids = self._get_parents(db_id) |
1726 | + if len(parent_ids) <= 0: |
1727 | + left_parent = None |
1728 | + # We are dealing with a 'new root' possibly because of a ghost, |
1729 | + # possibly because of merging a new ancestry. |
1730 | + # KnownGraph.merge_sort just always says True here, so stick with |
1731 | + # that |
1732 | + is_first = True |
1733 | + else: |
1734 | + left_parent = parent_ids[0] |
1735 | + if left_parent in self._ghosts: |
1736 | + left_parent = None |
1737 | + is_first = True |
1738 | + else: |
1739 | + is_first = self._is_first_child(left_parent) |
1740 | + # Note: we don't have to filter out self._ghosts here, as long as we do |
1741 | + # it in _push_node |
1742 | + pending_parents = static_tuple.StaticTuple.from_sequence( |
1743 | + [p for p in parent_ids[1:] if p not in self._ghosts]) |
1744 | + # v- logically probably better as a tuple or object. We currently |
1745 | + # modify it in place, so we use a list |
1746 | + self._depth_first_stack.append( |
1747 | + _MergeSortNode(db_id, merge_depth, left_parent, pending_parents, |
1748 | + is_first)) |
1749 | + |
1750 | + def _step_to_latest_branch(self, base_revno): |
1751 | + """Step the mainline until we've loaded the latest sub-branch. |
1752 | + |
1753 | + This is used when we need to create a new child branch. We need to |
1754 | + ensure that we've loaded the most-recently-merged branch, so that we |
1755 | + can generate the correct branch counter. |
1756 | + |
1757 | + For example, if you have a revision whose left-hand parent is 1.2.3, |
1758 | + you need to load mainline revisions until you find some revision like |
1759 | + (1.?.1). This will ensure that you have the most recent branch merged |
1760 | + to mainline that was branched from the revno=1 revision in mainline. |
1761 | + |
1762 | + Note that if we find (1,3,1) before finding (1,2,1) that is ok. As long |
1763 | + as we have found the first revision of any sub-branch, we know that |
1764 | + we've found the most recent (since we walk backwards). |
1765 | + |
1766 | + :param base_revno: The revision that this branch is based on. 0 means |
1767 | + that this is a new-root branch. |
1768 | + :return: None |
1769 | + """ |
1770 | + self._stats['step to latest'] += 1 |
1771 | + step_count = 0 |
1772 | + start_point = self._imported_mainline_id |
1773 | + found = None |
1774 | + while self._imported_mainline_id is not None: |
1775 | + if (base_revno,) in self._known_dotted: |
1776 | + # We have walked far enough to load the original revision, |
1777 | + # which means we've loaded all children. |
1778 | + self._stats['step to latest found base'] += 1 |
1779 | + found = (base_revno,) |
1780 | + break |
1781 | + # Estimate what is the most recent branch, and see if we have read |
1782 | + # its first revision |
1783 | + branch_count = self._revno_to_branch_count.get(base_revno, 0) |
1784 | + root_of_branch_revno = (base_revno, branch_count, 1) |
1785 | + # Note: if branch_count == 0, that means we haven't seen any |
1786 | + # other branches for this revision. |
1787 | + if root_of_branch_revno in self._known_dotted: |
1788 | + found = root_of_branch_revno |
1789 | + break |
1790 | + self._stats['step mainline to-latest'] += 1 |
1791 | + if base_revno == 0: |
1792 | + self._stats['step mainline to-latest NULL'] += 1 |
1793 | + self._step_mainline() |
1794 | + step_count += 1 |
1795 | + |
1796 | + def _pop_node(self): |
1797 | + """Move the last node from the _depth_first_stack to _scheduled_stack. |
1798 | + |
1799 | + This is the most left-hand node that we are able to find. |
1800 | + """ |
1801 | + node = self._depth_first_stack.pop() |
1802 | + if node._left_parent is not None: |
1803 | + parent_revno = self._imported_dotted_revno[node._left_parent][0] |
1804 | + if node._is_first: # We simply number as parent + 1 |
1805 | + if len(parent_revno) == 1: |
1806 | + mini_revno = parent_revno[0] + 1 |
1807 | + revno = (mini_revno,) |
1808 | + # Not sure if we *have* to maintain it, but it does keep |
1809 | + # our data-structures consistent |
1810 | + if mini_revno > self._branch_to_child_count[0]: |
1811 | + self._branch_to_child_count[0] = mini_revno |
1812 | + else: |
1813 | + revno = parent_revno[:2] + (parent_revno[2] + 1,) |
1814 | + else: |
1815 | + # we need a new branch number. To get this correct, we have to |
1816 | + # make sure that the beginning of this branch has been loaded |
1817 | + if len(parent_revno) > 1: |
1818 | + # if parent_revno is a mainline, then |
1819 | + # _ensure_lh_parent_info should have already loaded enough |
1820 | + # data. So we only do this when the parent is a merged |
1821 | + # revision. |
1822 | + self._step_to_latest_branch(parent_revno[0]) |
1823 | + base_revno = parent_revno[0] |
1824 | + branch_count = ( |
1825 | + self._revno_to_branch_count.get(base_revno, 0) + 1) |
1826 | + self._revno_to_branch_count[base_revno] = branch_count |
1827 | + revno = (base_revno, branch_count, 1) |
1828 | + else: |
1829 | + # We found a new root. There are 2 cases: |
1830 | + # a) This is the very first revision in the branch. In which case |
1831 | + # self._revno_to_branch_count won't have any counts for |
1832 | + # 'revno' 0. |
1833 | + # b) This is a ghost / the first revision in a branch that was |
1834 | + # merged. We need to allocate a new branch number. |
1835 | + # This distinction is pretty much the same as the 'is_first' |
1836 | + # check for revs with a parent if you consider the NULL revision |
1837 | + # to be revno 0. |
1838 | + # We have to walk back far enough to be sure that we have the |
1839 | + # most-recent merged new-root. This can be detected by finding |
1840 | + # any new root's first revision. And, of course, we should find |
1841 | + # the last one first while walking backwards. |
1842 | + # Theory: |
1843 | + # When you see (0,X,1) you've reached the point where the X |
1844 | + # number was chosen. A hypothetical (0,X+1,1) revision would |
1845 | + # only be numbered X+1 if it was merged after (0,X,1). Thus |
1846 | + # the *first* (0,?,1) revision you find merged must be the |
1847 | + # last. |
1848 | + |
1849 | + self._step_to_latest_branch(0) |
1850 | + branch_count = self._revno_to_branch_count.get(0, -1) + 1 |
1851 | + self._revno_to_branch_count[0] = branch_count |
1852 | + if branch_count == 0: # This is the mainline |
1853 | + revno = (1,) |
1854 | + self._branch_to_child_count[0] = 1 |
1855 | + else: |
1856 | + revno = (0, branch_count, 1) |
1857 | + if not self._scheduled_stack: |
1858 | + # For all but mainline revisions, we break on the end-of-merge. So |
1859 | + # when we start new numbering, end_of_merge is True. For mainline |
1860 | + # revisions, this is only true when we don't have a parent. |
1861 | + end_of_merge = True |
1862 | + if node._left_parent is not None and node.merge_depth == 0: |
1863 | + end_of_merge = False |
1864 | + else: |
1865 | + prev_node = self._scheduled_stack[-1] |
1866 | + if prev_node.merge_depth < node.merge_depth: |
1867 | + end_of_merge = True |
1868 | + elif (prev_node.merge_depth == node.merge_depth |
1869 | + and prev_node.key not in self._parent_map[node.key]): |
1870 | + # Next node is not a direct parent |
1871 | + end_of_merge = True |
1872 | + else: |
1873 | + end_of_merge = False |
1874 | + revno = static_tuple.StaticTuple.from_sequence(revno) |
1875 | + node.revno = revno |
1876 | + node.end_of_merge = end_of_merge |
1877 | + self._imported_dotted_revno[node.key] = static_tuple.StaticTuple( |
1878 | + revno, end_of_merge, node.merge_depth) |
1879 | + self._known_dotted.add(revno) |
1880 | + node._pending_parents = None |
1881 | + self._scheduled_stack.append(node) |
1882 | + |
1883 | + def _compute_merge_sort(self): |
1884 | + self._depth_first_stack = [] |
1885 | + self._scheduled_stack = [] |
1886 | + self._seen_parents = set() |
1887 | + if not self._mainline_db_ids: |
1888 | + # Nothing to number |
1889 | + return |
1890 | + self._push_node(self._mainline_db_ids[0], 0) |
1891 | + |
1892 | + while self._depth_first_stack: |
1893 | + last = self._depth_first_stack[-1] |
1894 | + if last._left_pending_parent is None and not last._pending_parents: |
1895 | + # The parents have been processed, pop the node |
1896 | + self._pop_node() |
1897 | + continue |
1898 | + while (last._left_pending_parent is not None |
1899 | + or last._pending_parents): |
1900 | + if last._left_pending_parent is not None: |
1901 | + # Push on the left-hand-parent |
1902 | + next_db_id = last._left_pending_parent |
1903 | + last._left_pending_parent = None |
1904 | + else: |
1905 | + pending_parents = last._pending_parents |
1906 | + next_db_id = pending_parents[-1] |
1907 | + last._pending_parents = pending_parents[:-1] |
1908 | + if next_db_id in self._imported_dotted_revno: |
1909 | + continue |
1910 | + if next_db_id == last._left_parent: #Is the left-parent? |
1911 | + next_merge_depth = last.merge_depth |
1912 | + else: |
1913 | + next_merge_depth = last.merge_depth + 1 |
1914 | + self._push_node(next_db_id, next_merge_depth) |
1915 | + # And switch to the outer loop |
1916 | + break |
1917 | + |
1918 | + def topo_order(self): |
1919 | + self._find_interesting_ancestry() |
1920 | + self._compute_merge_sort() |
1921 | + return list(reversed(self._scheduled_stack)) |
1922 | + |
1923 | + |
1924 | +class Querier(object): |
1925 | + """Perform queries on an existing history db.""" |
1926 | + |
1927 | + def __init__(self, db_path, a_branch): |
1928 | + self._db_path = db_path |
1929 | + self._db_conn = None |
1930 | + self._cursor = None |
1931 | + self._importer_lock = None |
1932 | + self._branch = a_branch |
1933 | + self._branch_tip_rev_id = a_branch.last_revision() |
1934 | + self._branch_tip_db_id = self._get_db_id(self._branch_tip_rev_id) |
1935 | + self._tip_is_imported = False |
1936 | + self._stats = defaultdict(lambda: 0) |
1937 | + |
1938 | + def set_importer_lock(self, lock): |
1939 | + """Add a thread-lock for building and running an Importer. |
1940 | + |
1941 | + The DB back-end is generally single-writer, so add a thread lock to |
1942 | + avoid having two writers trying to access it at the same time. |
1943 | + |
1944 | + This will be used as part of _import_tip. Note that it doesn't (yet?) |
1945 | + support anything like timeout. |
1946 | + """ |
1947 | + self._importer_lock = lock |
1948 | + |
1949 | + def _get_cursor(self): |
1950 | + if self._cursor is not None: |
1951 | + return self._cursor |
1952 | + db_conn = dbapi2.connect(self._db_path) |
1953 | + self._db_conn = db_conn |
1954 | + self._cursor = self._db_conn.cursor() |
1955 | + return self._cursor |
1956 | + |
1957 | + def ensure_branch_tip(self): |
1958 | + """Ensure that the branch tip has been imported. |
1959 | + |
1960 | + This will run Importer if it has not. |
1961 | + """ |
1962 | + if self._branch_tip_db_id is not None and self._tip_is_imported: |
1963 | + return |
1964 | + if self._branch_tip_db_id is None: |
1965 | + # This revision has not been seen by the DB, so we know it isn't |
1966 | + # imported |
1967 | + self._import_tip() |
1968 | + return |
1969 | + if self._is_imported_db_id(self._branch_tip_db_id): |
1970 | + # This revision was seen, and imported |
1971 | + self._tip_is_imported = True |
1972 | + return |
1973 | + self._import_tip() |
1974 | + |
1975 | + def _import_tip(self): |
1976 | + if self._cursor is not None: |
1977 | + self.close() |
1978 | + if self._importer_lock is not None: |
1979 | + self._importer_lock.acquire() |
1980 | + try: |
1981 | + t = time.time() |
1982 | + importer = Importer(self._db_path, self._branch, |
1983 | + tip_revision_id=self._branch_tip_rev_id, |
1984 | + incremental=True) |
1985 | + importer.do_import() |
1986 | + tdelta = time.time() - t |
1987 | + if 'history_db' in debug.debug_flags: |
1988 | + trace.note('imported %d nodes on-the-fly in %.3fs' |
1989 | + % (importer._stats.get('total_nodes_inserted', 0), |
1990 | + tdelta)) |
1991 | + self._db_conn = importer._db_conn |
1992 | + self._cursor = importer._cursor |
1993 | + self._branch_tip_db_id = self._get_db_id(self._branch_tip_rev_id) |
1994 | + self._tip_is_imported = True |
1995 | + finally: |
1996 | + if self._importer_lock is not None: |
1997 | + self._importer_lock.release() |
1998 | + |
1999 | + def _is_imported_db_id(self, tip_db_id): |
2000 | + res = self._get_cursor().execute( |
2001 | + "SELECT count(*) FROM dotted_revno" |
2002 | + " WHERE tip_revision = ?" |
2003 | + " AND tip_revision = merged_revision", |
2004 | + (tip_db_id,)).fetchone() |
2005 | + return res[0] > 0 |
2006 | + |
2007 | + def close(self): |
2008 | + if self._db_conn is not None: |
2009 | + self._db_conn.close() |
2010 | + self._db_conn = None |
2011 | + self._cursor = None |
2012 | + |
2013 | + def _get_db_id(self, revision_id): |
2014 | + try: |
2015 | + db_res = self._get_cursor().execute( |
2016 | + 'SELECT db_id FROM revision' |
2017 | + ' WHERE revision_id = ?', |
2018 | + [revision_id]).fetchone() |
2019 | + except dbapi2.OperationalError: |
2020 | + return None |
2021 | + if db_res is None: |
2022 | + return None |
2023 | + return db_res[0] |
2024 | + |
2025 | + def _get_lh_parent_rev_id(self, revision_id): |
2026 | + parent_res = self._get_cursor().execute(""" |
2027 | + SELECT p.revision_id |
2028 | + FROM parent, revision as c, revision as p |
2029 | + WHERE parent.child = c.db_id |
2030 | + AND parent.parent = p.db_id |
2031 | + AND c.revision_id = ? |
2032 | + AND parent_idx = 0 |
2033 | + """, (revision_id,)).fetchone() |
2034 | + self._stats['lh_parent_step'] += 1 |
2035 | + if parent_res is None: |
2036 | + return None |
2037 | + return parent_res[0] |
2038 | + |
2039 | + def _get_lh_parent_db_id(self, revision_db_id): |
2040 | + parent_res = self._get_cursor().execute(""" |
2041 | + SELECT parent.parent |
2042 | + FROM parent |
2043 | + WHERE parent.child = ? |
2044 | + AND parent_idx = 0 |
2045 | + """, (revision_db_id,)).fetchone() |
2046 | + self._stats['lh_parent_step'] += 1 |
2047 | + if parent_res is None: |
2048 | + return None |
2049 | + return parent_res[0] |
2050 | + |
2051 | + def _get_possible_dotted_revno(self, tip_revision_id, merged_revision_id): |
2052 | + """Given a possible tip revision, try to determine the dotted revno.""" |
2053 | + revno = self._get_cursor().execute(""" |
2054 | + SELECT revno FROM dotted_revno, revision t, revision m |
2055 | + WHERE t.revision_id = ? |
2056 | + AND t.db_id = dotted_revno.tip_revision |
2057 | + AND m.revision_id = ? |
2058 | + AND m.db_id = dotted_revno.merged_revision |
2059 | + LIMIT 1 -- should always <= 1, but hint to the db? |
2060 | + """, (tip_revision_id, merged_revision_id)).fetchone() |
2061 | + self._stats['dotted_revno_query'] += 1 |
2062 | + if revno is None: |
2063 | + return None |
2064 | + return revno[0] |
2065 | + |
2066 | + def _get_possible_dotted_revno_db_id(self, tip_db_id, merged_db_id): |
2067 | + """Get a dotted revno if we have it.""" |
2068 | + revno = self._get_cursor().execute(""" |
2069 | + SELECT revno FROM dotted_revno |
2070 | + WHERE tip_revision = ? |
2071 | + AND merged_revision = ? |
2072 | + LIMIT 1 -- should always <= 1, but hint to the db? |
2073 | + """, (tip_db_id, merged_db_id)).fetchone() |
2074 | + self._stats['dotted_revno_query'] += 1 |
2075 | + if revno is None: |
2076 | + return None |
2077 | + return revno[0] |
2078 | + |
2079 | + def _get_range_key_and_tail(self, tip_db_id): |
2080 | + """Return the best range w/ head = tip_db_id or None.""" |
2081 | + range_res = self._get_cursor().execute( |
2082 | + "SELECT pkey, tail" |
2083 | + " FROM mainline_parent_range" |
2084 | + " WHERE head = ?" |
2085 | + " ORDER BY count DESC LIMIT 1", |
2086 | + (tip_db_id,)).fetchone() |
2087 | + if range_res is None: |
2088 | + tail = self._get_lh_parent_db_id(tip_db_id) |
2089 | + return None, tail |
2090 | + return range_res |
2091 | + |
2092 | + def get_dotted_revnos(self, revision_ids): |
2093 | + """Determine the dotted revno, using the range info, etc.""" |
2094 | + self.ensure_branch_tip() |
2095 | + t = time.time() |
2096 | + cursor = self._get_cursor() |
2097 | + tip_db_id = self._branch_tip_db_id |
2098 | + if tip_db_id is None: |
2099 | + return {} |
2100 | + db_ids = set() |
2101 | + db_id_to_rev_id = {} |
2102 | + for rev_id in revision_ids: |
2103 | + db_id = self._get_db_id(rev_id) |
2104 | + if db_id is None: |
2105 | + import pdb; pdb.set_trace() |
2106 | + db_ids.add(db_id) |
2107 | + db_id_to_rev_id[db_id] = rev_id |
2108 | + revnos = {} |
2109 | + while tip_db_id is not None and db_ids: |
2110 | + self._stats['num_steps'] += 1 |
2111 | + range_key, next_db_id = self._get_range_key_and_tail(tip_db_id) |
2112 | + if range_key is None: |
2113 | + revno_res = cursor.execute(_add_n_params( |
2114 | + "SELECT merged_revision, revno FROM dotted_revno" |
2115 | + " WHERE tip_revision = ?" |
2116 | + " AND merged_revision IN (%s)", |
2117 | + len(db_ids)), |
2118 | + [tip_db_id] + list(db_ids)).fetchall() |
2119 | + else: |
2120 | + revno_res = cursor.execute(_add_n_params( |
2121 | + "SELECT merged_revision, revno" |
2122 | + " FROM dotted_revno, mainline_parent" |
2123 | + " WHERE tip_revision = mainline_parent.revision" |
2124 | + " AND mainline_parent.range = ?" |
2125 | + " AND merged_revision IN (%s)", |
2126 | + len(db_ids)), |
2127 | + [range_key] + list(db_ids)).fetchall() |
2128 | + tip_db_id = next_db_id |
2129 | + for db_id, revno in revno_res: |
2130 | + db_ids.discard(db_id) |
2131 | + revnos[db_id_to_rev_id[db_id]] = tuple(map(int, |
2132 | + revno.split('.'))) |
2133 | + self._stats['query_time'] += (time.time() - t) |
2134 | + return revnos |
2135 | + |
2136 | + def get_revision_ids(self, revnos): |
2137 | + """Map from a dotted-revno back into a revision_id.""" |
2138 | + self.ensure_branch_tip() |
2139 | + t = time.time() |
2140 | + tip_db_id = self._branch_tip_db_id |
2141 | + # TODO: If tip_db_id is None, maybe we want to raise an exception here? |
2142 | + # To indicate that the branch has not been imported yet |
2143 | + revno_strs = set(['.'.join(map(str, revno)) for revno in revnos]) |
2144 | + revno_map = {} |
2145 | + cursor = self._get_cursor() |
2146 | + while tip_db_id is not None and revno_strs: |
2147 | + self._stats['num_steps'] += 1 |
2148 | + range_key, next_db_id = self._get_range_key_and_tail(tip_db_id) |
2149 | + if range_key is None: |
2150 | + revision_res = cursor.execute(_add_n_params( |
2151 | + "SELECT revision_id, revno" |
2152 | + " FROM dotted_revno, revision" |
2153 | + " WHERE merged_revision = revision.db_id" |
2154 | + " AND tip_revision = ?" |
2155 | + " AND revno IN (%s)", len(revno_strs)), |
2156 | + [tip_db_id] + list(revno_strs)).fetchall() |
2157 | + else: |
2158 | + revision_res = cursor.execute(_add_n_params( |
2159 | + "SELECT revision_id, revno" |
2160 | + " FROM dotted_revno, mainline_parent, revision" |
2161 | + " WHERE tip_revision = mainline_parent.revision" |
2162 | + " AND merged_revision = revision.db_id" |
2163 | + " AND mainline_parent.range = ?" |
2164 | + " AND revno IN (%s)", len(revno_strs)), |
2165 | + [range_key] + list(revno_strs)).fetchall() |
2166 | + tip_db_id = next_db_id |
2167 | + for revision_id, revno_str in revision_res: |
2168 | + dotted = tuple(map(int, revno_str.split('.'))) |
2169 | + revno_strs.discard(revno_str) |
2170 | + revno_map[dotted] = revision_id |
2171 | + self._stats['query_time'] += (time.time() - t) |
2172 | + return revno_map |
2173 | + |
2174 | + def get_mainline_where_merged(self, revision_ids): |
2175 | + """Determine what mainline revisions merged the given revisions.""" |
2176 | + self.ensure_branch_tip() |
2177 | + t = time.time() |
2178 | + tip_db_id = self._branch_tip_db_id |
2179 | + if tip_db_id is None: |
2180 | + return {} |
2181 | + cursor = self._get_cursor() |
2182 | + db_ids = set() |
2183 | + db_id_to_rev_id = {} |
2184 | + for rev_id in revision_ids: |
2185 | + db_id = self._get_db_id(rev_id) |
2186 | + if db_id is None: |
2187 | + import pdb; pdb.set_trace() |
2188 | + db_ids.add(db_id) |
2189 | + db_id_to_rev_id[db_id] = rev_id |
2190 | + revision_to_mainline_map = {} |
2191 | + while tip_db_id is not None and db_ids: |
2192 | + self._stats['num_steps'] += 1 |
2193 | + range_key, next_db_id = self._get_range_key_and_tail(tip_db_id) |
2194 | + if range_key is None: |
2195 | + mainline_res = cursor.execute(_add_n_params( |
2196 | + "SELECT revision_id, merged_revision" |
2197 | + " FROM dotted_revno, revision" |
2198 | + " WHERE tip_revision = ?" |
2199 | + " AND tip_revision = revision.db_id" |
2200 | + " AND merged_revision IN (%s)", |
2201 | + len(db_ids)), |
2202 | + [tip_db_id] + list(db_ids)).fetchall() |
2203 | + else: |
2204 | + mainline_res = cursor.execute(_add_n_params( |
2205 | + "SELECT revision_id, merged_revision" |
2206 | + " FROM dotted_revno, mainline_parent, revision" |
2207 | + " WHERE tip_revision = mainline_parent.revision" |
2208 | + " AND tip_revision = db_id" |
2209 | + " AND mainline_parent.range = ?" |
2210 | + " AND merged_revision IN (%s)", |
2211 | + len(db_ids)), |
2212 | + [range_key] + list(db_ids)).fetchall() |
2213 | + tip_db_id = next_db_id |
2214 | + for mainline_revision_id, merged_db_id in mainline_res: |
2215 | + db_ids.discard(merged_db_id) |
2216 | + revision_to_mainline_map[db_id_to_rev_id[merged_db_id]] = \ |
2217 | + mainline_revision_id |
2218 | + self._stats['query_time'] += (time.time() - t) |
2219 | + return revision_to_mainline_map |
2220 | + |
2221 | + def _get_mainline_range_starting_at(self, head_db_id): |
2222 | + """Try to find a range at this tip. |
2223 | + |
2224 | + If a range cannot be found, just find the next parent. |
2225 | + :return: (range_or_None, next_db_id) |
2226 | + """ |
2227 | + cursor = self._get_cursor() |
2228 | + range_key, next_db_id = self._get_range_key_and_tail(tip_db_id) |
2229 | + if range_key is None: |
2230 | + return None, next_db_id |
2231 | + # TODO: Is ORDER BY dist ASC expensive? We know a priori that the list |
2232 | + # is probably already in sorted order, but does sqlite know that? |
2233 | + range_db_ids = cursor.execute( |
2234 | + "SELECT revision FROM mainline_parent" |
2235 | + " WHERE range = ? ORDER BY dist ASC", |
2236 | + (range_key,)).fetchall() |
2237 | + db_ids = [r[0] for r in range_db_ids] |
2238 | + return db_ids, next_db_id |
2239 | + |
2240 | + def walk_mainline(self): |
2241 | + t = time.time() |
2242 | + db_id = self._get_db_id(self._branch_tip_rev_id) |
2243 | + all_ids = [] |
2244 | + while db_id is not None: |
2245 | + self._stats['num_steps'] += 1 |
2246 | + next_range, next_db_id = self._get_mainline_range_starting_at(db_id) |
2247 | + if next_range is None: |
2248 | + # No range, so switch to using by-parent search |
2249 | + all_ids.append(db_id) |
2250 | + else: |
2251 | + assert next_range[0] == db_id |
2252 | + all_ids.extend(next_range) |
2253 | + db_id = next_db_id |
2254 | + self._stats['query_time'] += (time.time() - t) |
2255 | + return all_ids |
2256 | + |
2257 | + def walk_ancestry(self): |
2258 | + """Walk the whole ancestry. |
2259 | + |
2260 | + Use the information from the dotted_revno table and the mainline_parent |
2261 | + table to speed things up. |
2262 | + """ |
2263 | + db_id = self._get_db_id(self._branch_tip_rev_id) |
2264 | + all_ancestors = set() |
2265 | + t = time.time() |
2266 | + cursor = self._get_cursor() |
2267 | + while db_id is not None: |
2268 | + self._stats['num_steps'] += 1 |
2269 | + range_key, next_db_id = self._get_range_key_and_tail(db_id) |
2270 | + if range_key is None: |
2271 | + merged_revs = cursor.execute( |
2272 | + "SELECT merged_revision FROM dotted_revno" |
2273 | + " WHERE tip_revision = ?", |
2274 | + (db_id,)).fetchall() |
2275 | + all_ancestors.update([r[0] for r in merged_revs]) |
2276 | + else: |
2277 | + merged_revs = cursor.execute( |
2278 | + "SELECT merged_revision FROM dotted_revno, mainline_parent" |
2279 | + " WHERE tip_revision = mainline_parent.revision" |
2280 | + " AND mainline_parent.range = ?", |
2281 | + [range_key]).fetchall() |
2282 | + all_ancestors.update([r[0] for r in merged_revs]) |
2283 | + db_id = next_db_id |
2284 | + self._stats['query_time'] += (time.time() - t) |
2285 | + return all_ancestors |
2286 | + |
2287 | + def _find_tip_containing(self, tip_db_id, merged_db_id): |
2288 | + """Walk backwards until you find the tip that contains the given id.""" |
2289 | + cursor = self._get_cursor() |
2290 | + while tip_db_id is not None: |
2291 | + if tip_db_id == merged_db_id: |
2292 | + # A tip obviously contains itself |
2293 | + self._stats['step_find_tip_as_merged'] += 1 |
2294 | + return tip_db_id |
2295 | + self._stats['num_steps'] += 1 |
2296 | + self._stats['step_find_tip_containing'] += 1 |
2297 | + range_key, next_db_id = self._get_range_key_and_tail(tip_db_id) |
2298 | + if range_key is None: |
2299 | + present_res = cursor.execute( |
2300 | + "SELECT 1 FROM dotted_revno" |
2301 | + " WHERE tip_revision = ?" |
2302 | + " AND merged_revision = ?", |
2303 | + [tip_db_id, merged_db_id]).fetchone() |
2304 | + else: |
2305 | + present_res = cursor.execute( |
2306 | + "SELECT 1" |
2307 | + " FROM dotted_revno, mainline_parent" |
2308 | + " WHERE tip_revision = mainline_parent.revision" |
2309 | + " AND mainline_parent.range = ?" |
2310 | + " AND merged_revision = ?", |
2311 | + [range_key, merged_db_id]).fetchone() |
2312 | + if present_res is not None: |
2313 | + # We found a tip that contains merged_db_id |
2314 | + return tip_db_id |
2315 | + tip_db_id = next_db_id |
2316 | + return None |
2317 | + |
2318 | + def _get_merge_sorted_range(self, tip_db_id, start_db_id, stop_db_id): |
2319 | + """Starting at the given tip, read all merge_sorted data until stop.""" |
2320 | + if start_db_id is None or start_db_id == tip_db_id: |
2321 | + found_start = True |
2322 | + else: |
2323 | + found_start = False |
2324 | + cursor = self._get_cursor() |
2325 | + while tip_db_id is not None: |
2326 | + self._stats['num_steps'] += 1 |
2327 | + self._stats['step_get_merge_sorted'] += 1 |
2328 | + range_key, next_db_id = self._get_range_key_and_tail(tip_db_id) |
2329 | + if range_key is None: |
2330 | + merged_res = cursor.execute( |
2331 | + "SELECT db_id, revision_id, merge_depth, revno," |
2332 | + " end_of_merge" |
2333 | + " FROM dotted_revno, revision" |
2334 | + " WHERE tip_revision = ?" |
2335 | + " AND db_id = merged_revision" |
2336 | + " ORDER BY dist", |
2337 | + (tip_db_id,)).fetchall() |
2338 | + else: |
2339 | + # NOTE: Adding the ORDER BY costs us 981ms - 844ms = 137ms when |
2340 | + # doing 'bzr log -n0 -r -10..-1' on bzr.dev. |
2341 | + # That seems like a lot. Extracting them without sorting |
2342 | + # on them costs about the same amount. So the guess is |
2343 | + # that adding the extra columns requires more I/O. |
2344 | + # At the moment, SELECT order == INSERT order, so we don't |
2345 | + # strictly need it. I don't know that we can trust that, |
2346 | + # though. |
2347 | + merged_res = cursor.execute( |
2348 | + "SELECT db_id, revision_id, merge_depth, revno," |
2349 | + " end_of_merge" |
2350 | + # " , mainline_parent.dist as mp_dist" |
2351 | + # " , dotted_revno.dist as dr_dist" |
2352 | + " FROM dotted_revno, revision, mainline_parent" |
2353 | + " WHERE tip_revision = mainline_parent.revision" |
2354 | + " AND mainline_parent.range = ?" |
2355 | + " AND db_id = merged_revision", |
2356 | + # " ORDER BY mainline_parent.dist, dotted_revno.dist", |
2357 | + [range_key]).fetchall() |
2358 | + if found_start: |
2359 | + for db_id, r_id, depth, revno_str, eom in merged_res: |
2360 | + if stop_db_id is not None and db_id == stop_db_id: |
2361 | + return |
2362 | + revno = tuple(map(int, revno_str.split('.'))) |
2363 | + yield r_id, depth, revno, eom |
2364 | + else: |
2365 | + for info in merged_res: |
2366 | + if not found_start and info[0] == start_db_id: |
2367 | + found_start = True |
2368 | + if found_start: |
2369 | + if stop_db_id is not None and info[0] == stop_db_id: |
2370 | + return |
2371 | + db_id, r_id, depth, revno_str, eom = info |
2372 | + revno = tuple(map(int, revno_str.split('.'))) |
2373 | + yield r_id, depth, revno, eom |
2374 | + tip_db_id = next_db_id |
2375 | + |
2376 | + def iter_merge_sorted_revisions(self, start_revision_id=None, |
2377 | + stop_revision_id=None): |
2378 | + """See Branch.iter_merge_sorted_revisions() |
2379 | + |
2380 | + Note that start and stop differ from the Branch implementation, because |
2381 | + stop is *always* exclusive. You can simulate the rest by careful |
2382 | + selection of stop. |
2383 | + """ |
2384 | + self.ensure_branch_tip() |
2385 | + t = time.time() |
2386 | + tip_db_id = self._branch_tip_db_id |
2387 | + if tip_db_id is None: |
2388 | + return [] |
2389 | + if start_revision_id is not None: |
2390 | + start_db_id = self._get_db_id(start_revision_id) |
2391 | + else: |
2392 | + start_db_id = tip_db_id |
2393 | + stop_db_id = None |
2394 | + if stop_revision_id is not None: |
2395 | + stop_db_id = self._get_db_id(stop_revision_id) |
2396 | + # Seek fast until we find start_db_id |
2397 | + merge_sorted = [] |
2398 | + revnos = {} |
2399 | + tip_db_id = self._find_tip_containing(tip_db_id, start_db_id) |
2400 | + # Now that you have found the first tip containing the given start |
2401 | + # revision, pull in data until you walk off the history, or you find |
2402 | + # the stop revision |
2403 | + merge_sorted = list( |
2404 | + self._get_merge_sorted_range(tip_db_id, start_db_id, stop_db_id)) |
2405 | + self._stats['query_time'] += (time.time() - t) |
2406 | + return merge_sorted |
2407 | + |
2408 | + def heads(self, revision_ids): |
2409 | + """Compute Graph.heads() on the given data.""" |
2410 | + raise NotImplementedError(self.heads) |
2411 | |
2412 | === added file 'loggerhead/history_db_schema.py' |
2413 | --- loggerhead/history_db_schema.py 1970-01-01 00:00:00 +0000 |
2414 | +++ loggerhead/history_db_schema.py 2010-05-15 23:47:26 +0000 |
2415 | @@ -0,0 +1,247 @@ |
2416 | +# Copyright (C) 2010 Canonical Ltd |
2417 | +# |
2418 | +# This program is free software; you can redistribute it and/or modify |
2419 | +# it under the terms of the GNU General Public License as published by |
2420 | +# the Free Software Foundation; either version 2 of the License, or |
2421 | +# (at your option) any later version. |
2422 | +# |
2423 | +# This program is distributed in the hope that it will be useful, |
2424 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
2425 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
2426 | +# GNU General Public License for more details. |
2427 | +# |
2428 | +# You should have received a copy of the GNU General Public License |
2429 | +# along with this program; if not, write to the Free Software |
2430 | +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2431 | + |
2432 | +"""The current SQL schema definition.""" |
2433 | + |
2434 | +_create_statements = [] |
2435 | + |
2436 | +revision_t = """ |
2437 | +CREATE TABLE revision ( |
2438 | + db_id INTEGER PRIMARY KEY AUTOINCREMENT, |
2439 | + revision_id TEXT NOT NULL, |
2440 | + gdfo INTEGER NOT NULL |
2441 | +); |
2442 | +""" |
2443 | +_create_statements.append(revision_t) |
2444 | + |
2445 | +revision_index = """ |
2446 | +CREATE INDEX revision_revision_id_index ON revision (revision_id); |
2447 | +""" |
2448 | +_create_statements.append(revision_index) |
2449 | + |
2450 | +parent_t = """ |
2451 | +CREATE TABLE parent ( |
2452 | + child INTEGER REFERENCES revision NOT NULL, |
2453 | + parent INTEGER REFERENCES revision NOT NULL, |
2454 | + parent_idx INTEGER NOT NULL, -- 0 == left-hand parent |
2455 | + CONSTRAINT parent_is_unique UNIQUE (child, parent, parent_idx) |
2456 | +); |
2457 | +""" |
2458 | +_create_statements.append(parent_t) |
2459 | + |
2460 | +parent_child_index = """ |
2461 | +CREATE INDEX parent_child_index ON parent (child); |
2462 | +""" |
2463 | +_create_statements.append(parent_child_index) |
2464 | +# So we can find a parent for a given node |
2465 | + |
2466 | +parent_parent_index = """ |
2467 | +CREATE INDEX parent_parent_index ON parent (parent); |
2468 | +""" |
2469 | +_create_statements.append(parent_parent_index) |
2470 | +# So we can find the *children* of a given node |
2471 | + |
2472 | + |
2473 | +ghost_t = """ |
2474 | +CREATE TABLE ghost ( |
2475 | + db_id INTEGER PRIMARY KEY REFERENCES revision |
2476 | +); |
2477 | +""" |
2478 | +_create_statements.append(ghost_t) |
2479 | + |
2480 | + |
2481 | +# TODO: Consider storing the data as 3-digit integer revnos, rather than a |
2482 | +# revno_str |
2483 | +# TODO: I think we need to add an 'order' column, so that we guarantee the |
2484 | +# order we get back from the database is the same order that we put them |
2485 | +# into the database. |
2486 | +dotted_revno_t = """ |
2487 | +CREATE TABLE dotted_revno ( |
2488 | + tip_revision INTEGER REFERENCES revision NOT NULL, |
2489 | + merged_revision INTEGER REFERENCES revision NOT NULL, |
2490 | + revno TEXT NOT NULL, |
2491 | + end_of_merge BOOL NOT NULL, |
2492 | + merge_depth INTEGER NOT NULL, |
2493 | + dist INTEGER NOT NULL, -- Offset from tip, so we preserve the order |
2494 | + CONSTRAINT dotted_revno_key UNIQUE (tip_revision, merged_revision) |
2495 | +); |
2496 | +""" |
2497 | +_create_statements.append(dotted_revno_t) |
2498 | + |
2499 | +dotted_revno_index = """ |
2500 | +CREATE INDEX dotted_revno_index ON dotted_revno(tip_revision); |
2501 | +""" |
2502 | +_create_statements.append(dotted_revno_index) |
2503 | + |
2504 | +mainline_parent_range_t = """ |
2505 | +CREATE TABLE mainline_parent_range ( |
2506 | + pkey INTEGER PRIMARY KEY AUTOINCREMENT, |
2507 | + head INTEGER REFERENCES revision NOT NULL, |
2508 | + tail INTEGER REFERENCES revision, -- NULL indicates start-of-history |
2509 | + -- tail is *not* included in the mainline_parent table |
2510 | + count INTEGER NOT NULL -- num in range, inclusive |
2511 | +); |
2512 | +""" |
2513 | +_create_statements.append(mainline_parent_range_t) |
2514 | + |
2515 | +mainline_parent_range_head_index = """ |
2516 | +CREATE INDEX mainline_parent_range_head_index |
2517 | + ON mainline_parent_range (head); |
2518 | +""" |
2519 | +_create_statements.append(mainline_parent_range_head_index) |
2520 | + |
2521 | +mainline_parent_t = """ |
2522 | +CREATE TABLE mainline_parent ( |
2523 | + range INTEGER REFERENCES mainline_parent_range NOT NULL, |
2524 | + revision INTEGER REFERENCES revision NOT NULL, |
2525 | + dist INTEGER NOT NULL -- Offset from head, so we preserve the order |
2526 | + -- Not adding the constraint at this time, but it is logically there |
2527 | + -- CONSTRAINT mainline_parent_rev_unique UNIQUE (range, revision) |
2528 | +); |
2529 | +""" |
2530 | +_create_statements.append(mainline_parent_t) |
2531 | + |
2532 | +mainline_parent_range_index = """ |
2533 | +CREATE INDEX mainline_parent_range_index |
2534 | + ON mainline_parent (range); |
2535 | +""" |
2536 | +_create_statements.append(mainline_parent_range_index) |
2537 | + |
2538 | + |
2539 | +def create_sqlite_db(conn): |
2540 | + """Given a connection to an sqlite database, create the fields.""" |
2541 | + cursor = conn.cursor() |
2542 | + for statement in _create_statements: |
2543 | + cursor.execute(statement) |
2544 | + conn.commit() |
2545 | + |
2546 | + |
2547 | +def create_pgsql_db(conn): |
2548 | + """Populate a Postgres database. |
2549 | + |
2550 | + Slightly different syntax from sqlite. |
2551 | + """ |
2552 | + cursor = conn.cursor() |
2553 | + cursor.execute("BEGIN") |
2554 | + for statement in _create_statements: |
2555 | + statement = statement.replace('INTEGER PRIMARY KEY AUTOINCREMENT', |
2556 | + 'SERIAL PRIMARY KEY') |
2557 | + cursor.execute(statement) |
2558 | + cursor.execute("COMMIT") |
2559 | + |
2560 | + |
2561 | +def is_initialized(conn, err_type): |
2562 | + # Both pgsql and sqlite have ways to tell if a table exists, but they |
2563 | + # aren't the *same*, so just punt and ask for content of a table. |
2564 | + cursor = conn.cursor() |
2565 | + try: |
2566 | + cursor.execute('SELECT count(*) FROM revision').fetchall() |
2567 | + except err_type: # ??? |
2568 | + return False |
2569 | + return True |
2570 | + |
2571 | + |
2572 | +def ensure_revision(cursor, revision_id): |
2573 | + """Ensure a revision exists, and return its database id.""" |
2574 | + x = cursor.execute('SELECT db_id FROM revision WHERE revision_id = ?', |
2575 | + (revision_id,)) |
2576 | + val = x.fetchone() |
2577 | + if val is None: |
2578 | + cursor.execute('INSERT INTO revision (revision_id) VALUES (?)', |
2579 | + (revision_id,)) |
2580 | + return ensure_revision(cursor, revision_id) |
2581 | + return val[0] |
2582 | + |
2583 | + |
2584 | +_BATCH_SIZE = 100 |
2585 | +def ensure_revisions(cursor, revision_ids, rev_id_to_db_id, db_id_to_rev_id, |
2586 | + graph): |
2587 | + """Do a bulk check to make sure we have db ids for all revisions. |
2588 | + |
2589 | + Update the revision_id => db_id mapping |
2590 | + """ |
2591 | + # TODO: I wish I knew a more efficient way to do this |
2592 | + # a) You could select all revisions that are in the db. But potentially |
2593 | + # you have lots of unrelated projects, and this would give unwanted |
2594 | + # overlap. |
2595 | + # b) SQLITE defaults to limiting you to 999 parameters |
2596 | + # c) IIRC the postgres code uses a tradeoff of 10%. If it thinks it needs |
2597 | + # more than 10% of the data in the table, it is faster to do an I/O |
2598 | + # friendly sequential scan, than to do a random order scan. |
2599 | + remaining = [r for r in revision_ids if r not in rev_id_to_db_id] |
2600 | + # res = cursor.execute('SELECT revision_id, db_id FROM revision') |
2601 | + # for rev_id, db_id in res.fetchall(): |
2602 | + # if rev_id in missing: |
2603 | + # result[rev_id] = db_id |
2604 | + # missing.discard(rev_id) |
2605 | + if not remaining: |
2606 | + return |
2607 | + def find_existing(): |
2608 | + cur = 0 |
2609 | + missing = [] |
2610 | + for cur in xrange(0, len(remaining), _BATCH_SIZE): |
2611 | + next = remaining[cur:cur+_BATCH_SIZE] |
2612 | + res = cursor.execute('SELECT revision_id, db_id FROM revision' |
2613 | + ' WHERE revision_id in (%s)' |
2614 | + % (', '.join('?'*len(next))), |
2615 | + tuple(next)) |
2616 | + for rev_id, db_id in res.fetchall(): |
2617 | + rev_id_to_db_id[rev_id] = db_id |
2618 | + db_id_to_rev_id[db_id] = rev_id |
2619 | + missing.extend([r for r in next if r not in rev_id_to_db_id]) |
2620 | + return missing |
2621 | + missing = find_existing() |
2622 | + if missing: |
2623 | + ghosts = set() |
2624 | + def insert_new(): |
2625 | + for rev_id in missing: |
2626 | + node = graph._nodes[(rev_id,)] |
2627 | + if node.gdfo == 1 and node.parent_keys is None: |
2628 | + # First rev, see if this is actually a ghost |
2629 | + ghosts.add(rev_id) |
2630 | + cursor.execute('INSERT INTO revision (revision_id, gdfo)' |
2631 | + ' VALUES (?, ?)', (rev_id, node.gdfo)) |
2632 | + db_id = cursor.lastrowid |
2633 | + rev_id_to_db_id[rev_id] = db_id |
2634 | + db_id_to_rev_id[db_id] = rev_id |
2635 | + insert_new() |
2636 | + if ghosts: |
2637 | + # TODO: We could turn this into a "revision_id IN ()", instead... |
2638 | + cursor.executemany("INSERT INTO ghost (db_id)" |
2639 | + " SELECT db_id FROM revision" |
2640 | + " WHERE revision_id = ?", |
2641 | + [(g,) for g in ghosts]) |
2642 | + |
2643 | + |
2644 | +def create_dotted_revnos(cursor, revno_entries): |
2645 | + """Create a dotted revno entry for this info.""" |
2646 | + # existing = cursor.execute('SELECT revno, end_of_merge, merge_depth' |
2647 | + # ' FROM dotted_revno' |
2648 | + # ' WHERE tip_revision = ?' |
2649 | + # ' AND merged_revision = ?', |
2650 | + # (tip_revision, merged_revision)).fetchone() |
2651 | + # if existing is not None: |
2652 | + # new_value = (revno, end_of_merge, merge_depth) |
2653 | + # if existing != new_value: |
2654 | + # raise ValueError('Disagreement in the graph. Wanted to add' |
2655 | + # ' node %s, but %s already exists' |
2656 | + # % (new_value, existing)) |
2657 | + # return |
2658 | + cursor.executemany( |
2659 | + 'INSERT INTO dotted_revno (tip_revision, merged_revision,' |
2660 | + ' revno, end_of_merge, merge_depth, dist)' |
2661 | + ' VALUES (?, ?, ?, ?, ?, ?)', |
2662 | + revno_entries) |
2663 | |
2664 | === modified file 'loggerhead/main.py' |
2665 | --- loggerhead/main.py 2010-04-26 19:04:06 +0000 |
2666 | +++ loggerhead/main.py 2010-05-15 23:47:26 +0000 |
2667 | @@ -73,6 +73,14 @@ |
2668 | logfile.setFormatter(formatter) |
2669 | logfile.setLevel(logging.DEBUG) |
2670 | logger.addHandler(logfile) |
2671 | + def _restrict_logging(logger_name): |
2672 | + logger = logging.getLogger(logger_name) |
2673 | + if logger.getEffectiveLevel() < logging.INFO: |
2674 | + logger.setLevel(logging.INFO) |
2675 | + # simpleTAL is *very* verbose in DEBUG mode, which is otherwise the |
2676 | + # default. So quiet it up a bit. |
2677 | + _restrict_logging('simpleTAL') |
2678 | + _restrict_logging('simpleTALES') |
2679 | return logger |
2680 | |
2681 | |
2682 | |
2683 | === modified file 'loggerhead/templates/atom.pt' |
2684 | --- loggerhead/templates/atom.pt 2009-06-06 12:23:11 +0000 |
2685 | +++ loggerhead/templates/atom.pt 2010-05-15 23:47:26 +0000 |
2686 | @@ -4,8 +4,8 @@ |
2687 | bazaar changes for <tal:branch-name content="branch/friendly_name">branch name</tal:branch-name> |
2688 | </title> |
2689 | <updated tal:content="string:${updated}Z">${updated}</updated> |
2690 | - <id tal:content="python:branch.url(['/atom'])">url</id> |
2691 | - <link rel="self" type="application/atom+xml" tal:attributes="href python:branch.url(['/atom'])" /> |
2692 | + <id tal:content="python:branch.absolute_url(['/atom'])">url</id> |
2693 | + <link rel="self" type="application/atom+xml" tal:attributes="href python:branch.absolute_url(['/atom'])" /> |
2694 | <link rel="alternate" type="text/html" tal:attributes="href python:branch.url(['/changes'])" /> |
2695 | |
2696 | <entry tal:repeat="entry changes"> |
2697 | @@ -16,7 +16,7 @@ |
2698 | updated |
2699 | </updated> |
2700 | <!-- TODO: The feed validator will generate warnings because this is URL-encoded --> |
2701 | - <id tal:content="python:branch.url(['/revision', entry.revno])"> |
2702 | + <id tal:content="python:branch.absolute_url(['/revision', entry.revno])"> |
2703 | ID |
2704 | </id> |
2705 | <author tal:repeat="author entry/authors"> |
2706 | |
2707 | === modified file 'loggerhead/tests/__init__.py' |
2708 | --- loggerhead/tests/__init__.py 2006-12-11 06:44:19 +0000 |
2709 | +++ loggerhead/tests/__init__.py 2010-05-15 23:47:26 +0000 |
2710 | @@ -0,0 +1,27 @@ |
2711 | +# Copyright 2006, 2010 Canonical Ltd |
2712 | +# |
2713 | +# This program is free software; you can redistribute it and/or modify |
2714 | +# it under the terms of the GNU General Public License as published by |
2715 | +# the Free Software Foundation; either version 2 of the License, or |
2716 | +# (at your option) any later version. |
2717 | +# |
2718 | +# This program is distributed in the hope that it will be useful, |
2719 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
2720 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
2721 | +# GNU General Public License for more details. |
2722 | +# |
2723 | +# You should have received a copy of the GNU General Public License |
2724 | +# along with this program; if not, write to the Free Software |
2725 | +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
2726 | + |
2727 | + |
2728 | +def load_tests(standard_tests, module, loader): |
2729 | + standard_tests.addTests(loader.loadTestsFromModuleNames([ |
2730 | + (__name__ + '.' + x) for x in [ |
2731 | + 'test_controllers', |
2732 | + 'test_corners', |
2733 | + 'test_importer', |
2734 | + 'test_simple', |
2735 | + 'test_templating', |
2736 | + ]])) |
2737 | + return standard_tests |
2738 | |
2739 | === added file 'loggerhead/tests/test_importer.py' |
2740 | --- loggerhead/tests/test_importer.py 1970-01-01 00:00:00 +0000 |
2741 | +++ loggerhead/tests/test_importer.py 2010-05-15 23:47:26 +0000 |
2742 | @@ -0,0 +1,847 @@ |
2743 | +# Copyright (C) 2010 Canonical Ltd |
2744 | +# |
2745 | +# This program is free software; you can redistribute it and/or modify |
2746 | +# it under the terms of the GNU General Public License as published by |
2747 | +# the Free Software Foundation; either version 2 of the License, or |
2748 | +# (at your option) any later version. |
2749 | +# |
2750 | +# This program is distributed in the hope that it will be useful, |
2751 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
2752 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
2753 | +# GNU General Public License for more details. |
2754 | +# |
2755 | +# You should have received a copy of the GNU General Public License |
2756 | +# along with this program; if not, write to the Free Software |
2757 | +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2758 | + |
2759 | +"""Test aspects of the importer code.""" |
2760 | + |
2761 | +import os |
2762 | +import tempfile |
2763 | +import threading |
2764 | +import time |
2765 | + |
2766 | +from bzrlib import ( |
2767 | + graph, |
2768 | + tests, |
2769 | + branch, |
2770 | + ) |
2771 | +from loggerhead import history_db |
2772 | + |
2773 | + |
2774 | +class MockVF(object): |
2775 | + |
2776 | + def __init__(self, mock_branch): |
2777 | + self._mock_branch = mock_branch |
2778 | + |
2779 | + def get_known_graph_ancestry(self, key): |
2780 | + # Note that this ignores the 'key' parameter, but that should be ok for |
2781 | + # our use case |
2782 | + return self._mock_branch._get_keys_known_graph() |
2783 | + |
2784 | + |
2785 | +class MockRepository(object): |
2786 | + |
2787 | + def __init__(self, mock_branch): |
2788 | + self._mock_branch = mock_branch |
2789 | + |
2790 | + @property |
2791 | + def revisions(self): |
2792 | + return MockVF(self._mock_branch) |
2793 | + |
2794 | + def get_parent_map(self, revision_ids): |
2795 | + pmap = {} |
2796 | + anc = self._mock_branch._ancestry |
2797 | + for rev_id in revision_ids: |
2798 | + if rev_id in anc: |
2799 | + pmap[rev_id] = anc[rev_id] |
2800 | + return pmap |
2801 | + |
2802 | + |
2803 | +class MockBranch(object): |
2804 | + """Fake a Branch object, to provide the apis we need to have.""" |
2805 | + |
2806 | + def __init__(self, ancestry, tip_revision): |
2807 | + self._tip_revision = tip_revision |
2808 | + self._ancestry = ancestry |
2809 | + |
2810 | + def _get_keys_known_graph(self): |
2811 | + key_ancestry = dict(((r,), tuple([(p,) for p in ps])) |
2812 | + for r, ps in self._ancestry.iteritems()) |
2813 | + return graph.KnownGraph(key_ancestry) |
2814 | + |
2815 | + def last_revision(self): |
2816 | + return self._tip_revision |
2817 | + |
2818 | + @property |
2819 | + def repository(self): |
2820 | + return MockRepository(self) |
2821 | + |
2822 | + def get_revision_id_to_revno_map(self): |
2823 | + kg = self._get_keys_known_graph() |
2824 | + merge_sorted = kg.merge_sort((self._tip_revision,)) |
2825 | + return dict((node.key[0], node.revno) for node in merge_sorted) |
2826 | + |
2827 | + |
2828 | +class TestCaseWithGraphs(tests.TestCase): |
2829 | + |
2830 | + def make_branch_with_simple_history(self): |
2831 | + # Graph: |
2832 | + # A |
2833 | + # | |
2834 | + # B |
2835 | + # |\ |
2836 | + # C D |
2837 | + # |/ |
2838 | + # E |
2839 | + ancestry = {'A': (), 'B': ('A',), 'C': ('B',), 'D': ('B',), |
2840 | + 'E': ('C', 'D')} |
2841 | + return MockBranch(ancestry, 'E') |
2842 | + |
2843 | + def make_interesting_branch(self): |
2844 | + # Graph looks like: |
2845 | + # A 1 |
2846 | + # |\ |\ |
2847 | + # | \ | \ |
2848 | + # | \ | \ |
2849 | + # | B | 1.1.1 ------. |
2850 | + # | /|\ | | \ \ |
2851 | + # | C E | | 1.1.2 1.2.1 | |
2852 | + # |/ / / | / / / |
2853 | + # D F H 2 1.2.2 1.3.1 |
2854 | + # |/ X | / \ / |
2855 | + # G / J 3 .------' 1.2.3 |
2856 | + # |/ /|\ | / / \ |
2857 | + # I / K L 4 1.2.4 1.4.1 |
2858 | + # |/ |/ | | / |
2859 | + # N M 5 1.2.5 |
2860 | + # | / |
2861 | + # | / |
2862 | + # |/ |
2863 | + # O |
2864 | + ancestry = {'A': (), |
2865 | + 'B': ('A',), |
2866 | + 'C': ('B',), |
2867 | + 'D': ('A', 'C'), |
2868 | + 'E': ('B',), |
2869 | + 'F': ('E',), |
2870 | + 'G': ('D', 'F'), |
2871 | + 'H': ('B',), |
2872 | + 'I': ('G', 'H'), |
2873 | + 'J': ('F',), |
2874 | + 'K': ('J',), |
2875 | + 'L': ('J',), |
2876 | + 'M': ('K', 'L'), |
2877 | + 'N': ('I', 'J'), |
2878 | + 'O': ('N', 'M'), |
2879 | + } |
2880 | + return MockBranch(ancestry, 'O') |
2881 | + |
2882 | + def make_branch_with_ghosts(self): |
2883 | + # Simple ancestry: |
2884 | + # A |
2885 | + # | |
2886 | + # B G? |
2887 | + # |/| |
2888 | + # C D |
2889 | + # |/ |
2890 | + # E |
2891 | + # |
2892 | + # Both C and D refer to 'G' but that revision isn't actually present |
2893 | + ancestry = {'A': (), |
2894 | + 'B': ('A',), |
2895 | + 'C': ('B', 'G'), |
2896 | + 'D': ('G',), |
2897 | + 'E': ('C', 'D'), |
2898 | + } |
2899 | + return MockBranch(ancestry, 'E') |
2900 | + |
2901 | + def grab_interesting_ids(self, rev_id_to_db_id): |
2902 | + for rev_id in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ': |
2903 | + setattr(self, '%s_id' % (rev_id,), rev_id_to_db_id.get(rev_id)) |
2904 | + |
2905 | + |
2906 | +class TestImporter(TestCaseWithGraphs): |
2907 | + |
2908 | + def test_build(self): |
2909 | + b = self.make_interesting_branch() |
2910 | + revno_map = b.get_revision_id_to_revno_map() |
2911 | + self.assertEqual({'A': (1,), 'B': (1,1,1), 'C': (1,1,2), |
2912 | + 'D': (2,), 'E': (1,2,1), 'F': (1,2,2), |
2913 | + 'G': (3,), 'H': (1,3,1), 'I': (4,), |
2914 | + 'J': (1,2,3,), 'K': (1,2,4), 'L': (1,4,1), |
2915 | + 'M': (1,2,5,), 'N': (5,), 'O': (6,)}, |
2916 | + revno_map) |
2917 | + |
2918 | + def test_import_non_incremental(self): |
2919 | + b = self.make_interesting_branch() |
2920 | + importer = history_db.Importer(':memory:', b, incremental=False) |
2921 | + importer.do_import() |
2922 | + cur = importer._db_conn.cursor() |
2923 | + revs = cur.execute("SELECT revision_id, db_id, gdfo" |
2924 | + " FROM revision").fetchall() |
2925 | + # Track the db_ids that are assigned |
2926 | + rev_to_db_id = dict((r[0], r[1]) for r in revs) |
2927 | + db_to_rev_id = dict((r[1], r[0]) for r in revs) |
2928 | + rev_gdfo = dict((r[0], r[2]) for r in revs) |
2929 | + self.assertEqual({'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 3, 'F': 4, |
2930 | + 'G': 5, 'H': 3, 'I': 6, 'J': 5, 'K': 6, 'L': 6, |
2931 | + 'M': 7, 'N': 7, 'O': 8}, rev_gdfo) |
2932 | + dotted_info = cur.execute( |
2933 | + "SELECT tip_revision, merged_revision, revno" |
2934 | + " FROM dotted_revno").fetchall() |
2935 | + self.grab_interesting_ids(rev_to_db_id) |
2936 | + expected = { |
2937 | + self.A_id: sorted([(self.A_id, '1')]), |
2938 | + self.D_id: sorted([(self.B_id, '1.1.1'), (self.C_id, '1.1.2'), |
2939 | + (self.D_id, '2')]), |
2940 | + self.G_id: sorted([(self.E_id, '1.2.1'), (self.F_id, '1.2.2'), |
2941 | + (self.G_id, '3')]), |
2942 | + self.I_id: sorted([(self.H_id, '1.3.1'), (self.I_id, '4')]), |
2943 | + self.N_id: sorted([(self.J_id, '1.2.3'), (self.N_id, '5')]), |
2944 | + self.O_id: sorted([(self.K_id, '1.2.4'), (self.L_id, '1.4.1'), |
2945 | + (self.M_id, '1.2.5'), (self.O_id, '6')]), |
2946 | + } |
2947 | + actual = {} |
2948 | + for tip_rev, merge_rev, revno in dotted_info: |
2949 | + val = actual.setdefault(tip_rev, []) |
2950 | + val.append((merge_rev, revno)) |
2951 | + val.sort() |
2952 | + self.assertEqual(expected, actual) |
2953 | + |
2954 | + def test_import_records_ghosts(self): |
2955 | + b = self.make_branch_with_ghosts() |
2956 | + importer = history_db.Importer(':memory:', b, incremental=False) |
2957 | + importer.do_import() |
2958 | + cur = importer._db_conn.cursor() |
2959 | + res = cur.execute("SELECT revision_id" |
2960 | + " FROM ghost NATURAL JOIN revision") |
2961 | + self.assertEqual(['G'], [r[0] for r in res]) |
2962 | + |
2963 | + def test__update_ancestry_records_ghosts(self): |
2964 | + b = self.make_branch_with_ghosts() |
2965 | + importer = history_db.Importer(':memory:', b, incremental=False) |
2966 | + importer._update_ancestry('G') |
2967 | + cur = importer._db_conn.cursor() |
2968 | + res = cur.execute("SELECT revision_id" |
2969 | + " FROM ghost NATURAL JOIN revision") |
2970 | + self.assertEqual(['G'], [r[0] for r in res]) |
2971 | + |
2972 | + def test__update_ancestry_from_scratch(self): |
2973 | + b = self.make_interesting_branch() |
2974 | + importer = history_db.Importer(':memory:', b, incremental=False) |
2975 | + importer._update_ancestry('O') |
2976 | + cur = importer._db_conn.cursor() |
2977 | + # revision and parent should be fully populated |
2978 | + rev_gdfo = dict(cur.execute("SELECT revision_id, gdfo" |
2979 | + " FROM revision").fetchall()) |
2980 | + # Track the db_ids that are assigned |
2981 | + self.assertEqual({'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 3, 'F': 4, |
2982 | + 'G': 5, 'H': 3, 'I': 6, 'J': 5, 'K': 6, 'L': 6, |
2983 | + 'M': 7, 'N': 7, 'O': 8}, rev_gdfo) |
2984 | + parent_map = dict(((c_id, p_idx), p_id) for c_id, p_id, p_idx in |
2985 | + cur.execute("SELECT c.revision_id, p.revision_id, parent_idx" |
2986 | + " FROM parent, revision c, revision p" |
2987 | + " WHERE parent.parent = p.db_id" |
2988 | + " AND parent.child = c.db_id").fetchall()) |
2989 | + self.assertEqual({('B', 0): 'A', ('C', 0): 'B', ('D', 0): 'A', |
2990 | + ('D', 1): 'C', ('E', 0): 'B', ('F', 0): 'E', |
2991 | + ('G', 0): 'D', ('G', 1): 'F', ('H', 0): 'B', |
2992 | + ('I', 0): 'G', ('I', 1): 'H', ('J', 0): 'F', |
2993 | + ('K', 0): 'J', ('L', 0): 'J', ('M', 0): 'K', |
2994 | + ('M', 1): 'L', ('N', 0): 'I', ('N', 1): 'J', |
2995 | + ('O', 0): 'N', ('O', 1): 'M', |
2996 | + }, parent_map) |
2997 | + |
2998 | + def test__update_ancestry_partial(self): |
2999 | + b = self.make_interesting_branch() |
3000 | + importer = history_db.Importer(':memory:', b, incremental=False) |
3001 | + # We intentionally seed the data with some wrong values, to see if the |
3002 | + # result uses them. |
3003 | + cur = importer._db_conn.cursor() |
3004 | + cur.executemany("INSERT INTO revision (revision_id, gdfo)" |
3005 | + " VALUES (?, ?)", [('A', 11)]) |
3006 | + importer._graph = None |
3007 | + importer._update_ancestry('O') |
3008 | + cur = importer._db_conn.cursor() |
3009 | + # revision and parent should be fully populated |
3010 | + rev_gdfo = dict(cur.execute("SELECT revision_id, gdfo" |
3011 | + " FROM revision").fetchall()) |
3012 | + # Track the db_ids that are assigned |
3013 | + self.assertEqual({'A': 11, 'B': 12, 'C': 13, 'D': 14, 'E': 13, |
3014 | + 'F': 14, 'G': 15, 'H': 13, 'I': 16, 'J': 15, |
3015 | + 'K': 16, 'L': 16, 'M': 17, 'N': 17, 'O': 18}, |
3016 | + rev_gdfo) |
3017 | + parent_map = dict(((c_id, p_idx), p_id) for c_id, p_id, p_idx in |
3018 | + cur.execute("SELECT c.revision_id, p.revision_id, parent_idx" |
3019 | + " FROM parent, revision c, revision p" |
3020 | + " WHERE parent.parent = p.db_id" |
3021 | + " AND parent.child = c.db_id").fetchall()) |
3022 | + self.assertEqual({('B', 0): 'A', ('C', 0): 'B', ('D', 0): 'A', |
3023 | + ('D', 1): 'C', ('E', 0): 'B', ('F', 0): 'E', |
3024 | + ('G', 0): 'D', ('G', 1): 'F', ('H', 0): 'B', |
3025 | + ('I', 0): 'G', ('I', 1): 'H', ('J', 0): 'F', |
3026 | + ('K', 0): 'J', ('L', 0): 'J', ('M', 0): 'K', |
3027 | + ('M', 1): 'L', ('N', 0): 'I', ('N', 1): 'J', |
3028 | + ('O', 0): 'N', ('O', 1): 'M', |
3029 | + }, parent_map) |
3030 | + |
3031 | + |
3032 | +class Test_IncrementalMergeSort(TestCaseWithGraphs): |
3033 | + |
3034 | + def make_inc_merger(self, branch, imported_tip, new_tip): |
3035 | + branch._tip_revision = imported_tip |
3036 | + importer = history_db.Importer(':memory:', branch, incremental=False) |
3037 | + importer.do_import() |
3038 | + importer._update_ancestry(new_tip) |
3039 | + self.grab_interesting_ids(importer._rev_id_to_db_id) |
3040 | + inc_merger = history_db._IncrementalMergeSort(importer, |
3041 | + importer._rev_id_to_db_id[new_tip]) |
3042 | + return inc_merger |
3043 | + |
3044 | + def assertScheduledStack(self, inc_merger, expected): |
3045 | + """Check that the merge_sort result is as expected.""" |
3046 | + actual = [(node.key, node.revno, node.end_of_merge, node.merge_depth) |
3047 | + for node in inc_merger._scheduled_stack] |
3048 | + self.assertEqual(expected, actual) |
3049 | + |
3050 | + def test_step_by_step(self): |
3051 | + b = self.make_interesting_branch() |
3052 | + inc_merger = self.make_inc_merger(b, 'G', 'N') |
3053 | + inc_merger._find_needed_mainline() |
3054 | + self.assertEqual([self.N_id, self.I_id], inc_merger._mainline_db_ids) |
3055 | + self.assertEqual(self.G_id, inc_merger._imported_mainline_id) |
3056 | + self.assertEqual(set([self.N_id, self.I_id]), |
3057 | + inc_merger._interesting_ancestor_ids) |
3058 | + # This should populate ._search_tips, as well as gdfo information |
3059 | + inc_merger._get_initial_search_tips() |
3060 | + self.assertEqual(set([self.J_id, self.H_id]), inc_merger._search_tips) |
3061 | + # We should have taken a single step on the mainline |
3062 | + self.assertEqual(self.D_id, inc_merger._imported_mainline_id) |
3063 | + self.assertEqual(4, inc_merger._imported_gdfo) |
3064 | + self.assertEqual({self.D_id: 4, self.J_id: 5, self.H_id: 3}, |
3065 | + inc_merger._known_gdfo) |
3066 | + # Both J has gdfo > D so it is quickly removed. H does not, so it is |
3067 | + # left behind. |
3068 | + self.assertEqual([self.H_id], |
3069 | + inc_merger._split_search_tips_by_gdfo([self.J_id, self.H_id])) |
3070 | + self.assertEqual(set([self.N_id, self.I_id, self.J_id]), |
3071 | + inc_merger._interesting_ancestor_ids) |
3072 | + # Checking children should notice that all known children are fine, so |
3073 | + # H now gets marked interesting |
3074 | + self.assertEqual([], |
3075 | + inc_merger._split_interesting_using_children([self.H_id])) |
3076 | + self.assertEqual(set([self.N_id, self.I_id, self.J_id, self.H_id]), |
3077 | + inc_merger._interesting_ancestor_ids) |
3078 | + # Since we know all search tips are interesting, step again. |
3079 | + inc_merger._step_search_tips() |
3080 | + self.assertEqual(set([self.B_id, self.F_id]), inc_merger._search_tips) |
3081 | + self.assertEqual(set([self.N_id, self.I_id, self.J_id, self.H_id]), |
3082 | + inc_merger._interesting_ancestor_ids) |
3083 | + # F is known-merged, so the first step should filter it from unknowns, |
3084 | + # and remove it from the search tips |
3085 | + # However B is not known yet, and has GDFO < D (since it was merged |
3086 | + # in). |
3087 | + # However E is a child of B, and that is already known to be merged. So |
3088 | + # B gets filtered out in the child step, and removed as a search tip |
3089 | + self.assertEqual([self.B_id], |
3090 | + inc_merger._split_search_tips_by_gdfo([self.B_id, self.F_id])) |
3091 | + self.assertEqual(set([self.B_id]), inc_merger._search_tips) |
3092 | + self.assertEqual([], |
3093 | + inc_merger._split_interesting_using_children([self.B_id])) |
3094 | + self.assertEqual(set([]), inc_merger._search_tips) |
3095 | + # At this point, we will stop searching. XXX: B's info has not been |
3096 | + # loaded yet... |
3097 | + self.assertEqual(self.D_id, inc_merger._imported_mainline_id) |
3098 | + self.assertEqual(4, inc_merger._imported_gdfo) |
3099 | + self.assertEqual({self.E_id: ((1,2,1), 1, 1), |
3100 | + self.F_id: ((1,2,2), 0, 1), |
3101 | + self.G_id: ((3,), 0, 0), |
3102 | + }, inc_merger._imported_dotted_revno) |
3103 | + self.assertEqual(set([(1,2,1), (1,2,2), (3,)]), |
3104 | + inc_merger._known_dotted) |
3105 | + # At this point, B_id isn't in _imported_dotted_revno, so we loop to |
3106 | + # ensure we have enough dotted_revno data |
3107 | + inc_merger._ensure_lh_parent_info() |
3108 | + self.assertEqual(self.A_id, inc_merger._imported_mainline_id) |
3109 | + self.assertEqual(1, inc_merger._imported_gdfo) |
3110 | + self.assertEqual({self.B_id: ((1,1,1), 1, 1), |
3111 | + self.C_id: ((1,1,2), 0, 1), |
3112 | + self.D_id: ((2,), 0, 0), |
3113 | + self.E_id: ((1,2,1), 1, 1), |
3114 | + self.F_id: ((1,2,2), 0, 1), |
3115 | + self.G_id: ((3,), 0, 0), |
3116 | + }, inc_merger._imported_dotted_revno) |
3117 | + self.assertEqual({0: 0, 1: 2}, inc_merger._revno_to_branch_count) |
3118 | + self.assertEqual({0: 3, (1, 1): 2, (1, 2): 2}, |
3119 | + inc_merger._branch_to_child_count) |
3120 | + inc_merger._compute_merge_sort() |
3121 | + self.assertScheduledStack(inc_merger, |
3122 | + [(self.H_id, (1, 3, 1), True, 1), |
3123 | + (self.I_id, (4,), False, 0), |
3124 | + (self.J_id, (1, 2, 3), True, 1), |
3125 | + (self.N_id, (5,), False, 0), |
3126 | + ]) |
3127 | + |
3128 | + def test__find_interesting_ancestry(self): |
3129 | + b = self.make_interesting_branch() |
3130 | + b._tip_revision = 'G' # Something older |
3131 | + importer = history_db.Importer(':memory:', b, incremental=False) |
3132 | + importer.do_import() |
3133 | + importer._update_ancestry('O') |
3134 | + self.grab_interesting_ids(importer._rev_id_to_db_id) |
3135 | + inc_merger = history_db._IncrementalMergeSort(importer, self.O_id) |
3136 | + # This should step through the ancestry, and load in the necessary |
3137 | + # data. Check the final state |
3138 | + inc_merger._find_interesting_ancestry() |
3139 | + self.assertEqual([self.O_id, self.N_id, self.I_id], |
3140 | + inc_merger._mainline_db_ids) |
3141 | + # We should stop loading A, I need to figure out why it gets loaded |
3142 | + self.assertEqual(self.A_id, inc_merger._imported_mainline_id) |
3143 | + self.assertEqual(1, inc_merger._imported_gdfo) |
3144 | + self.assertEqual(set([self.H_id, self.I_id, self.J_id, self.K_id, |
3145 | + self.L_id, self.M_id, self.N_id, self.O_id]), |
3146 | + inc_merger._interesting_ancestor_ids) |
3147 | + self.assertEqual(set([]), inc_merger._search_tips) |
3148 | + self.assertEqual({self.B_id: ((1,1,1), 1, 1), |
3149 | + self.C_id: ((1,1,2), 0, 1), |
3150 | + self.D_id: ((2,), 0, 0), |
3151 | + self.E_id: ((1,2,1), 1, 1), |
3152 | + self.F_id: ((1,2,2), 0, 1), |
3153 | + self.G_id: ((3,), 0, 0), |
3154 | + }, inc_merger._imported_dotted_revno) |
3155 | + self.assertEqual(set([(1,1,1), (1,1,2), (2,), (1,2,1), (1,2,2), (3,)]), |
3156 | + inc_merger._known_dotted) |
3157 | + |
3158 | + def test__split_gdfo_handles_mainline_tip(self): |
3159 | + b = self.make_branch_with_simple_history() |
3160 | + inc_merger = self.make_inc_merger(b, 'C', 'E') |
3161 | + inc_merger._find_needed_mainline() |
3162 | + inc_merger._get_initial_search_tips() |
3163 | + self.assertEqual(set([self.D_id]), inc_merger._search_tips) |
3164 | + self.assertEqual(self.B_id, inc_merger._imported_mainline_id) |
3165 | + # First step knows right away that D is too new |
3166 | + self.assertEqual([], |
3167 | + inc_merger._split_search_tips_by_gdfo([self.D_id])) |
3168 | + inc_merger._step_search_tips() |
3169 | + self.assertEqual(set([self.B_id]), inc_merger._search_tips) |
3170 | + # B_id can't be merged into B_id, but *because* it is B_id :) |
3171 | + self.assertEqual([], |
3172 | + inc_merger._split_search_tips_by_gdfo([self.B_id])) |
3173 | + self.assertEqual(set([]), inc_merger._search_tips) |
3174 | + self.assertEqual(set([self.D_id, self.E_id]), |
3175 | + inc_merger._interesting_ancestor_ids) |
3176 | + |
3177 | + def test__step_search_tips_skips_already_seen(self): |
3178 | + # Simpler graph: |
3179 | + # A |
3180 | + # | |
3181 | + # B |
3182 | + # |\ |
3183 | + # | C |
3184 | + # | |\ |
3185 | + # | D E |
3186 | + # |/|/ |
3187 | + # F G |
3188 | + # |/ |
3189 | + # H |
3190 | + # In this case, first step should go to G & D, when stepping from |
3191 | + # there, G => D should not continue on D, since it has already been |
3192 | + # seen, but we should include E. |
3193 | + ancestry = {'A': (), |
3194 | + 'B': ('A',), |
3195 | + 'C': ('B',), |
3196 | + 'D': ('C',), |
3197 | + 'E': ('C',), |
3198 | + 'F': ('B', 'D'), |
3199 | + 'G': ('D', 'E'), |
3200 | + 'H': ('F', 'G'), |
3201 | + } |
3202 | + b = MockBranch(ancestry, 'B') |
3203 | + importer = history_db.Importer(':memory:', b, incremental=False) |
3204 | + importer.do_import() |
3205 | + importer._update_ancestry('H') |
3206 | + self.grab_interesting_ids(importer._rev_id_to_db_id) |
3207 | + inc_merger = history_db._IncrementalMergeSort(importer, self.H_id) |
3208 | + inc_merger._find_needed_mainline() |
3209 | + self.assertEqual([self.H_id, self.F_id], inc_merger._mainline_db_ids) |
3210 | + self.assertEqual(self.B_id, inc_merger._imported_mainline_id) |
3211 | + inc_merger._get_initial_search_tips() |
3212 | + self.assertEqual(set([self.D_id, self.G_id]), inc_merger._search_tips) |
3213 | + # Both have higher-than-mainline gdfos |
3214 | + self.assertEqual([], |
3215 | + inc_merger._split_search_tips_by_gdfo([self.D_id, self.G_id])) |
3216 | + inc_merger._step_search_tips() |
3217 | + # It should want to include D_id, but it should know that we've already |
3218 | + # been there |
3219 | + self.assertEqual(set([self.C_id, self.E_id]), inc_merger._search_tips) |
3220 | + |
3221 | + def test_maintain_branch_counter_correct(self): |
3222 | + # The trick is that if we have already imported to N, then we will be |
3223 | + # hiding the correct branch counter for revno 1. We will see it as 2 |
3224 | + # from the revisions we've loaded, but really it is 3 because of the H |
3225 | + # branch. |
3226 | + b = self.make_interesting_branch() |
3227 | + b._tip_revision = 'N' |
3228 | + importer = history_db.Importer(':memory:', b, incremental=False) |
3229 | + importer.do_import() |
3230 | + importer._update_ancestry('O') |
3231 | + self.grab_interesting_ids(importer._rev_id_to_db_id) |
3232 | + inc_merger = history_db._IncrementalMergeSort(importer, self.O_id) |
3233 | + inc_merger._find_interesting_ancestry() |
3234 | + inc_merger._compute_merge_sort() |
3235 | + self.assertScheduledStack(inc_merger, |
3236 | + [(self.K_id, (1, 2, 4), True, 1), |
3237 | + (self.L_id, (1, 4, 1), True, 2), |
3238 | + (self.M_id, (1, 2, 5), False, 1), |
3239 | + (self.O_id, (6,), False, 0), |
3240 | + ]) |
3241 | + # We have to load I to get H, but at that point we've seen a (1,X,1) |
3242 | + # revision, so we know we've seen the newest merged branch. |
3243 | + self.assertEqual(self.G_id, inc_merger._imported_mainline_id) |
3244 | + |
3245 | + def test_handles_simple_child(self): |
3246 | + ancestry = {'A': (), 'B': ('A',), 'C': ('B',), 'D': ('C',)} |
3247 | + b = MockBranch(ancestry, 'C') |
3248 | + importer = history_db.Importer(':memory:', b, incremental=False) |
3249 | + importer.do_import() |
3250 | + importer._update_ancestry('D') |
3251 | + self.grab_interesting_ids(importer._rev_id_to_db_id) |
3252 | + inc_merger = history_db._IncrementalMergeSort(importer, self.D_id) |
3253 | + inc_merger._find_interesting_ancestry() |
3254 | + inc_merger._compute_merge_sort() |
3255 | + self.assertScheduledStack(inc_merger, [(self.D_id, (4,), False, 0)]) |
3256 | + |
3257 | + def test_assigns_first_branch_without_loading_history(self): |
3258 | + # Importing E + D should not have to load anything before B |
3259 | + b = self.make_branch_with_simple_history() |
3260 | + inc_merger = self.make_inc_merger(b, 'C', 'E') |
3261 | + inc_merger._find_interesting_ancestry() |
3262 | + self.assertEqual(self.A_id, inc_merger._imported_mainline_id) |
3263 | + inc_merger._compute_merge_sort() |
3264 | + self.assertScheduledStack(inc_merger, [ |
3265 | + (self.D_id, (2,1,1), True, 1), |
3266 | + (self.E_id, (4,), False, 0), |
3267 | + ]) |
3268 | + self.assertEqual(self.A_id, inc_merger._imported_mainline_id) |
3269 | + |
3270 | + def test_handles_empty_branch(self): |
3271 | + ancestry = {} |
3272 | + b = MockBranch(ancestry, 'null:') |
3273 | + importer = history_db.Importer(':memory:', b, incremental=True) |
3274 | + importer.do_import() |
3275 | + |
3276 | + def test_handles_multi_roots(self): |
3277 | + # Graph: |
3278 | + # A B |
3279 | + # |/ |
3280 | + # C |
3281 | + # | |
3282 | + # D E |
3283 | + # |/ |
3284 | + # F |
3285 | + # Since D is already imported, the incremental importer will only see |
3286 | + # E. However, we have to load everything, so that we can get the |
3287 | + # numbering for a 0-based rev correct. |
3288 | + ancestry = {'A': (), 'B': (), 'C': ('A', 'B'), 'D': ('C',), 'E': (), |
3289 | + 'F': ('D', 'E')} |
3290 | + b = MockBranch(ancestry, 'D') |
3291 | + importer = history_db.Importer(':memory:', b, incremental=False) |
3292 | + importer.do_import() |
3293 | + importer._update_ancestry('F') |
3294 | + self.grab_interesting_ids(importer._rev_id_to_db_id) |
3295 | + inc_merger = history_db._IncrementalMergeSort(importer, self.F_id) |
3296 | + inc_merger._find_interesting_ancestry() |
3297 | + self.assertEqual(self.C_id, inc_merger._imported_mainline_id) |
3298 | + self.assertEqual(set([self.E_id, self.F_id]), |
3299 | + inc_merger._interesting_ancestor_ids) |
3300 | + inc_merger._compute_merge_sort() |
3301 | + self.assertScheduledStack(inc_merger, |
3302 | + [(self.E_id, (0, 2, 1), True, 1), |
3303 | + (self.F_id, (4,), False, 0), |
3304 | + ]) |
3305 | + |
3306 | + def test_handles_partial_complex_multi_roots(self): |
3307 | + # Graph: |
3308 | + # A B |
3309 | + # |/ \ |
3310 | + # C E | |
3311 | + # | | | |
3312 | + # D F | |
3313 | + # |/ / |
3314 | + # G H |
3315 | + # |/ |
3316 | + # I J |
3317 | + # |/ |
3318 | + # K |
3319 | + |
3320 | + # Ideas: |
3321 | + # 1) (0,1,2) gets merged after (0,2,2). Which means we certainly have |
3322 | + # to find (0,2,2) to get (0, 3, 1) correct. Even though we've found |
3323 | + # a possible extra root. |
3324 | + # 2) We don't have to go all the way back to find (0,1,1) as soon as we |
3325 | + # find (0,2,1). |
3326 | + ancestry = {'A': (), 'B': (), 'C': ('A', 'B'), 'D': ('C',), 'E': (), |
3327 | + 'F': ('E',), 'G': ('D', 'F'), 'H': ('B',), 'I': ('G', 'H'), |
3328 | + 'J': (), 'K': ('I', 'J')} |
3329 | + b = MockBranch(ancestry, 'I') |
3330 | + importer = history_db.Importer(':memory:', b, incremental=False) |
3331 | + importer.do_import() |
3332 | + importer._update_ancestry('K') |
3333 | + self.grab_interesting_ids(importer._rev_id_to_db_id) |
3334 | + inc_merger = history_db._IncrementalMergeSort(importer, self.K_id) |
3335 | + inc_merger._find_interesting_ancestry() |
3336 | + self.assertEqual(self.G_id, inc_merger._imported_mainline_id) |
3337 | + self.assertEqual(set([self.K_id, self.J_id]), |
3338 | + inc_merger._interesting_ancestor_ids) |
3339 | + inc_merger._compute_merge_sort() |
3340 | + self.assertScheduledStack(inc_merger, |
3341 | + [(self.J_id, (0, 3, 1), True, 1), |
3342 | + (self.K_id, (6,), False, 0), |
3343 | + ]) |
3344 | + # We only have to walk back and stop at D because we have found (0,2,1) |
3345 | + # which must be the latest branch. |
3346 | + self.assertEqual(self.D_id, inc_merger._imported_mainline_id) |
3347 | + |
3348 | + def test_one_rev(self): |
3349 | + # Trivial ancestry: |
3350 | + # A |
3351 | + ancestry = {'A': ()} |
3352 | + b = MockBranch(ancestry, 'A') |
3353 | + importer = history_db.Importer(':memory:', b, incremental=False) |
3354 | + importer._update_ancestry('A') |
3355 | + self.grab_interesting_ids(importer._rev_id_to_db_id) |
3356 | + inc_merger = history_db._IncrementalMergeSort(importer, self.A_id) |
3357 | + inc_merger._find_interesting_ancestry() |
3358 | + inc_merger._compute_merge_sort() |
3359 | + self.assertScheduledStack(inc_merger, |
3360 | + [(self.A_id, (1,), True, 0), |
3361 | + ]) |
3362 | + |
3363 | + def test_skips_ghosts(self): |
3364 | + b = self.make_branch_with_ghosts() |
3365 | + importer = history_db.Importer(':memory:', b, incremental=False) |
3366 | + importer._update_ancestry('E') |
3367 | + self.grab_interesting_ids(importer._rev_id_to_db_id) |
3368 | + inc_merger = history_db._IncrementalMergeSort(importer, self.E_id) |
3369 | + inc_merger._find_interesting_ancestry() |
3370 | + inc_merger._compute_merge_sort() |
3371 | + # G is not mentioned in merge_sorted, neither as a left-hand parent, |
3372 | + # nor as a right-hand parent |
3373 | + self.assertScheduledStack(inc_merger, |
3374 | + [(self.A_id, (1,), True, 0), |
3375 | + (self.B_id, (2,), False, 0), |
3376 | + (self.C_id, (3,), False, 0), |
3377 | + (self.D_id, (0, 1, 1), True, 1), |
3378 | + (self.E_id, (4,), False, 0), |
3379 | + ]) |
3380 | + |
3381 | + def test_handle_partial_ghosts(self): |
3382 | + b = self.make_branch_with_ghosts() |
3383 | + b._tip_revision = 'C' |
3384 | + importer = history_db.Importer(':memory:', b, incremental=False) |
3385 | + importer.do_import() |
3386 | + importer._update_ancestry('E') |
3387 | + self.grab_interesting_ids(importer._rev_id_to_db_id) |
3388 | + inc_merger = history_db._IncrementalMergeSort(importer, self.E_id) |
3389 | + inc_merger.topo_order() |
3390 | + # G is not mentioned in merge_sorted, neither as a left-hand parent, |
3391 | + # nor as a right-hand parent |
3392 | + self.assertScheduledStack(inc_merger, |
3393 | + [(self.D_id, (0, 1, 1), True, 1), |
3394 | + (self.E_id, (4,), False, 0), |
3395 | + ]) |
3396 | + |
3397 | + def test_ignore_uninteresting_ancestors(self): |
3398 | + # Graph: |
3399 | + # A |
3400 | + # |\ |
3401 | + # B C |
3402 | + # |X| |
3403 | + # D E |
3404 | + # |\| |
3405 | + # | F |
3406 | + # |/ |
3407 | + # G |
3408 | + # |
3409 | + # Some did work C, while trunk evolved to B. C was landed, while |
3410 | + # concurrently someone tried to update C for the trunk changes. |
3411 | + # After trying to cleanup, they had to do it again. |
3412 | + # If D is imported, we should only number E and F, we shouldn't try to |
3413 | + # include B or C |
3414 | + # Note: This ancestry was taken from bzr.dev@5114.1.1, which |
3415 | + # demonstrated the race condition. |
3416 | + ancestry = {'A': (), 'B': ('A',), 'C': ('A',), 'D': ('B', 'C'), |
3417 | + 'E': ('C', 'B'), 'F': ('E', 'D'), 'G': ('D', 'F')} |
3418 | + b = MockBranch(ancestry, 'G') |
3419 | + inc_merger = self.make_inc_merger(b, 'D', 'G') |
3420 | + inc_merger.topo_order() |
3421 | + self.assertScheduledStack(inc_merger, |
3422 | + [(self.E_id, (1, 1, 2), True, 1), |
3423 | + (self.F_id, (1, 1, 3), False, 1), |
3424 | + (self.G_id, (4,), False, 0), |
3425 | + ]) |
3426 | + self.assertTrue(self.E_id in inc_merger._imported_dotted_revno) |
3427 | + self.assertTrue((1,1,2) in inc_merger._known_dotted) |
3428 | + |
3429 | + def test_handles_already_imported(self): |
3430 | + b = self.make_interesting_branch() |
3431 | + inc_merger = self.make_inc_merger(b, 'O', 'O') |
3432 | + inc_merger.topo_order() |
3433 | + self.assertScheduledStack(inc_merger, []) |
3434 | + |
3435 | + |
3436 | +class _InterLocker(object): |
3437 | + """Wrapper around thread locks to help with testing. |
3438 | + |
3439 | + The idea is that one thread will be wanting to acquire a lock. When it does |
3440 | + so, we block it, and signal the testing thread that the acquisition was |
3441 | + attempted, so now would be a good time to check that things are proceeding |
3442 | + properly. |
3443 | + """ |
3444 | + |
3445 | + def __init__(self): |
3446 | + self._monitored_lock = threading.Lock() |
3447 | + # We start the lock in blocked mode, so that we have to call |
3448 | + # self.wait_for_acquire before we will let the other thread through. |
3449 | + self._monitored_lock.acquire() |
3450 | + self._acquire_called = False |
3451 | + self._release_called = False |
3452 | + self._acquireCondition = threading.Condition() |
3453 | + self._releaseCondition = threading.Condition() |
3454 | + self._max_wait_time = 1.0 |
3455 | + |
3456 | + def acquire(self): |
3457 | + """Same as threading.Lock.acquire. |
3458 | + |
3459 | + This is meant to be called by the thread you are testing / monitoring. |
3460 | + """ |
3461 | + self._acquireCondition.acquire() |
3462 | + self._acquire_called = True |
3463 | + self._acquireCondition.notify() |
3464 | + self._acquireCondition.release() |
3465 | + # Acquire the actual lock that this is substituting for |
3466 | + t_wait_start = time.time() |
3467 | + while not self._monitored_lock.acquire(False): |
3468 | + t_waited = time.time() - t_wait_start |
3469 | + if t_waited > self._max_wait_time: |
3470 | + raise RuntimeError('Acquire timed out after %.1fs' |
3471 | + % (t_waited,)) |
3472 | + time.sleep(0.1) |
3473 | + |
3474 | + def release(self): |
3475 | + """See threading.Lock.release.""" |
3476 | + self._monitored_lock.release() |
3477 | + self._releaseCondition.acquire() |
3478 | + self._release_called = True |
3479 | + self._releaseCondition.notify() |
3480 | + self._releaseCondition.release() |
3481 | + |
3482 | + def _wait_for_condition(self, condition, evaluator, name, timeout): |
3483 | + """Wait for the given condition to trigger. |
3484 | + |
3485 | + :param condition: A Condition variable |
3486 | + :param evaluator: A callback to indicate if the condition has actually |
3487 | + been fulfilled. Should return True if the condition is ready to go. |
3488 | + :param name: An associated name for the condition (used for error |
3489 | + messages) |
3490 | + :param timeout: If the condition hasn't triggered after timeout |
3491 | + seconds, raise an error. |
3492 | + :return: When this function returns, the condition lock will be held. |
3493 | + Callers are responsible for calling .release() |
3494 | + """ |
3495 | + t_wait_start = time.time() |
3496 | + condition.acquire() |
3497 | + while not evaluator(): |
3498 | + t_waited = time.time() - t_wait_start |
3499 | + if t_waited > timeout: |
3500 | + raise RuntimeError('%s not triggered after %.1f seconds' |
3501 | + % (name, t_waited)) |
3502 | + condition.wait(0.1) |
3503 | + |
3504 | + def wait_for_acquire(self): |
3505 | + """Called by the test thread. |
3506 | + |
3507 | + This will wait on a Condition until another thread calls 'acquire'. |
3508 | + Once that happens, that thread will be blocked, and this call will |
3509 | + return. Follow this up with 'wait_for_release' to let the other |
3510 | + thread continue, and block until release is called. |
3511 | + """ |
3512 | + self._wait_for_condition(self._acquireCondition, |
3513 | + lambda: self._acquire_called, 'acquire', self._max_wait_time) |
3514 | + # Let the other thread start processing from the acquire. |
3515 | + self._monitored_lock.release() |
3516 | + self._acquireCondition.release() |
3517 | + |
3518 | + def wait_for_release(self): |
3519 | + """Block this thread until self.release() is called.""" |
3520 | + self._wait_for_condition(self._releaseCondition, |
3521 | + lambda: self._release_called, 'release', self._max_wait_time) |
3522 | + self._releaseCondition.release() |
3523 | + |
3524 | + |
3525 | +class TestQuerier(TestCaseWithGraphs): |
3526 | + |
3527 | + def get_db_path(self): |
3528 | + fn, temp = tempfile.mkstemp(prefix='test-bzr-history-db-', suffix='.db') |
3529 | + os.close(fn) |
3530 | + self.addCleanup(os.remove, temp) |
3531 | + return temp |
3532 | + |
3533 | + def test_importer_lock(self): |
3534 | + temp = self.get_db_path() |
3535 | + b = self.make_interesting_branch() |
3536 | + b._tip_revision = 'I' |
3537 | + importer = history_db.Importer(temp, b, incremental=False) |
3538 | + importer.do_import() |
3539 | + del importer |
3540 | + b._tip_revision = 'O' |
3541 | + lock = _InterLocker() |
3542 | + query = history_db.Querier(temp, b) |
3543 | + query.set_importer_lock(lock) |
3544 | + query.close() # We will be doing the rest in another thread, and |
3545 | + # sqlite connections can't be shared between threads |
3546 | + t = threading.Thread(target=query.ensure_branch_tip) |
3547 | + t.start() |
3548 | + # TODO: The only bit we don't handle is that we'd really like the test |
3549 | + # to fail if the other thread didn't exit cleanly. However, this |
3550 | + # is good enough for now. |
3551 | + self.addCleanup(t.join) |
3552 | + lock.wait_for_acquire() |
3553 | + # At this point, the database should not have been updated yet. |
3554 | + conn = history_db.dbapi2.connect(temp) |
3555 | + res = conn.cursor().execute("SELECT db_id FROM revision" |
3556 | + " WHERE revision_id = ?", |
3557 | + ('O',)).fetchone() |
3558 | + self.assertIs(None, res) |
3559 | + lock.wait_for_release() |
3560 | + # Now that it has gotten the lock and finished, we should have the tip |
3561 | + # imported properly. |
3562 | + res = conn.cursor().execute("SELECT tip_revision" |
3563 | + " FROM revision, dotted_revno" |
3564 | + " WHERE tip_revision = merged_revision" |
3565 | + " AND revision_id = ?", |
3566 | + ('O',)).fetchone() |
3567 | + self.assertIsNot(None, res) |
3568 | + |
3569 | + def test_get_merged_into(self): |
3570 | + db_path = self.get_db_path() |
3571 | + b = self.make_interesting_branch() |
3572 | + importer = history_db.Importer(db_path, b, incremental=False) |
3573 | + importer.do_import() |
3574 | + del importer |
3575 | + query = history_db.Querier(db_path, b) |
3576 | + rev_to_mainline_map = query.get_mainline_where_merged( |
3577 | + ['E', 'F', 'H', 'L']) |
3578 | + self.assertEqual({'E': 'G', 'F': 'G', 'H': 'I', 'L': 'O'}, |
3579 | + rev_to_mainline_map) |
3580 | + |
3581 | + def test_empty_branche(self): |
3582 | + db_path = self.get_db_path() |
3583 | + b = MockBranch({}, 'null:') |
3584 | + query = history_db.Querier(db_path, b) |
3585 | + query.ensure_branch_tip() |
3586 | + query.close() |
3587 | + self.assertEqual({}, query.get_dotted_revnos(['a', 'b', 'c'])) |
3588 | + self.assertEqual({}, query.get_mainline_where_merged(['a', 'b', 'c'])) |
3589 | + self.assertEqual([], query.iter_merge_sorted_revisions()) |
3590 | |
3591 | === modified file 'loggerhead/tests/test_simple.py' |
3592 | --- loggerhead/tests/test_simple.py 2009-06-08 23:02:49 +0000 |
3593 | +++ loggerhead/tests/test_simple.py 2010-05-15 23:47:26 +0000 |
3594 | @@ -1,4 +1,4 @@ |
3595 | -# Copyright (C) 2008, 2009 Canonical Ltd. |
3596 | +# Copyright (C) 2007-2010 Canonical Ltd. |
3597 | # |
3598 | # This program is free software; you can redistribute it and/or modify |
3599 | # it under the terms of the GNU General Public License as published by |
3600 | @@ -17,6 +17,8 @@ |
3601 | |
3602 | import cgi |
3603 | import logging |
3604 | +import os |
3605 | +import shutil |
3606 | |
3607 | from bzrlib.tests import TestCaseWithTransport |
3608 | from bzrlib.util.configobj.configobj import ConfigObj |
3609 | @@ -61,7 +63,7 @@ |
3610 | 'with<htmlspecialchars\n') |
3611 | self.build_tree_contents( |
3612 | [('myfilename', self.filecontents)]) |
3613 | - self.tree.add('myfilename') |
3614 | + self.tree.add('myfilename', 'myfile-id') |
3615 | self.fileid = self.tree.path2id('myfilename') |
3616 | self.msg = 'a very exciting commit message <' |
3617 | self.revid = self.tree.commit(message=self.msg) |
3618 | @@ -71,6 +73,11 @@ |
3619 | res = app.get('/changes') |
3620 | res.mustcontain(cgi.escape(self.msg)) |
3621 | |
3622 | + def test_changes_for_file(self): |
3623 | + app = self.setUpLoggerhead() |
3624 | + res = app.get('/changes?filter_file_id=myfile-id') |
3625 | + res.mustcontain(cgi.escape(self.msg)) |
3626 | + |
3627 | def test_changes_branch_from(self): |
3628 | app = self.setUpLoggerhead(served_url="lp:loggerhead") |
3629 | res = app.get('/changes') |
3630 | |
3631 | === removed file 'loggerhead/trace.py' |
3632 | --- loggerhead/trace.py 2008-10-24 02:26:05 +0000 |
3633 | +++ loggerhead/trace.py 1970-01-01 00:00:00 +0000 |
3634 | @@ -1,75 +0,0 @@ |
3635 | -# |
3636 | -# Copyright (C) 2008 Canonical Ltd. |
3637 | -# (Authored by Martin Albisetti <argentina@gmail.com) |
3638 | -# Copyright (C) 2008 Guillermo Gonzalez <guillo.gonzo@gmail.com> |
3639 | -# Copyright (C) 2006 Robey Pointer <robey@lag.net> |
3640 | -# Copyright (C) 2006 Goffredo Baroncelli <kreijack@inwind.it> |
3641 | -# |
3642 | -# This program is free software; you can redistribute it and/or modify |
3643 | -# it under the terms of the GNU General Public License as published by |
3644 | -# the Free Software Foundation; either version 2 of the License, or |
3645 | -# (at your option) any later version. |
3646 | -# |
3647 | -# This program is distributed in the hope that it will be useful, |
3648 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
3649 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
3650 | -# GNU General Public License for more details. |
3651 | -# |
3652 | -# You should have received a copy of the GNU General Public License |
3653 | -# along with this program; if not, write to the Free Software |
3654 | -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
3655 | -# |
3656 | - |
3657 | -import os |
3658 | -import logging |
3659 | -import sys |
3660 | - |
3661 | - |
3662 | -def make_handler(config, filename): |
3663 | - roll = config.get('log.roll', 'never') |
3664 | - if roll == 'daily': |
3665 | - h = logging.handlers.TimedRotatingFileHandler(filename, |
3666 | - 'midnight', |
3667 | - 1, |
3668 | - 100) |
3669 | - elif roll == 'weekly': |
3670 | - h = logging.handlers.TimedRotatingFileHandler(filename, 'W0', 1, 100) |
3671 | - else: |
3672 | - h = logging.FileHandler(filename) |
3673 | - return h |
3674 | - |
3675 | - |
3676 | -def setup_logging(log_folder, config, foreground): |
3677 | - # i hate that stupid logging config format, so just set up logging here. |
3678 | - |
3679 | - if not os.path.exists(log_folder): |
3680 | - os.mkdir(log_folder) |
3681 | - |
3682 | - f = logging.Formatter('%(levelname)-.3s [%(asctime)s.%(msecs)03d]' |
3683 | - ' %(name)s: %(message)s', |
3684 | - '%Y%m%d-%H:%M:%S') |
3685 | - debug_log = make_handler(config, os.path.join(log_folder, 'debug.log')) |
3686 | - debug_log.setLevel(logging.DEBUG) |
3687 | - debug_log.setFormatter(f) |
3688 | - if foreground: |
3689 | - stdout_log = logging.StreamHandler(sys.stdout) |
3690 | - stdout_log.setLevel(logging.DEBUG) |
3691 | - stdout_log.setFormatter(f) |
3692 | - f = logging.Formatter('[%(asctime)s.%(msecs)03d] %(message)s', |
3693 | - '%Y%m%d-%H:%M:%S') |
3694 | - access_log = make_handler(config, os.path.join(log_folder, 'access.log')) |
3695 | - access_log.setLevel(logging.INFO) |
3696 | - access_log.setFormatter(f) |
3697 | - |
3698 | - error_log = make_handler(config, os.path.join(log_folder, 'error.log')) |
3699 | - error_log.setLevel(logging.ERROR) |
3700 | - error_log.setFormatter(f) |
3701 | - |
3702 | - logging.getLogger('').setLevel(logging.DEBUG) |
3703 | - logging.getLogger('').addHandler(debug_log) |
3704 | - logging.getLogger('loggerhead.access').addHandler(access_log) |
3705 | - logging.getLogger('loggerhead').setLevel(logging.ERROR) |
3706 | - logging.getLogger('loggerhead').addHandler(error_log) |
3707 | - |
3708 | - if foreground: |
3709 | - logging.getLogger('').addHandler(stdout_log) |
3710 | |
3711 | === removed file 'loggerhead/wholehistory.py' |
3712 | --- loggerhead/wholehistory.py 2010-04-26 19:37:26 +0000 |
3713 | +++ loggerhead/wholehistory.py 1970-01-01 00:00:00 +0000 |
3714 | @@ -1,85 +0,0 @@ |
3715 | -# |
3716 | -# Copyright (C) 2008, 2009 Canonical Ltd. |
3717 | -# |
3718 | -# This program is free software; you can redistribute it and/or modify |
3719 | -# it under the terms of the GNU General Public License as published by |
3720 | -# the Free Software Foundation; either version 2 of the License, or |
3721 | -# (at your option) any later version. |
3722 | -# |
3723 | -# This program is distributed in the hope that it will be useful, |
3724 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
3725 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
3726 | -# GNU General Public License for more details. |
3727 | -# |
3728 | -# You should have received a copy of the GNU General Public License |
3729 | -# along with this program; if not, write to the Free Software |
3730 | -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
3731 | -# |
3732 | -"""Cache the whole history data needed by loggerhead about a branch.""" |
3733 | - |
3734 | -import logging |
3735 | -import time |
3736 | - |
3737 | -from bzrlib.revision import is_null, NULL_REVISION |
3738 | -from bzrlib.tsort import merge_sort |
3739 | - |
3740 | - |
3741 | -def _strip_NULL_ghosts(revision_graph): |
3742 | - """ |
3743 | - Copied over from bzrlib meant as a temporary workaround for |
3744 | - deprecated methods. |
3745 | - """ |
3746 | - # Filter ghosts, and null: |
3747 | - if NULL_REVISION in revision_graph: |
3748 | - del revision_graph[NULL_REVISION] |
3749 | - for key, parents in revision_graph.iteritems(): |
3750 | - revision_graph[key] = tuple(parent for parent in parents if parent |
3751 | - in revision_graph) |
3752 | - return revision_graph |
3753 | - |
3754 | - |
3755 | -def compute_whole_history_data(branch): |
3756 | - """Compute _rev_info and _rev_indices for a branch. |
3757 | - |
3758 | - See History.__doc__ for what these data structures mean. |
3759 | - """ |
3760 | - z = time.time() |
3761 | - |
3762 | - last_revid = branch.last_revision() |
3763 | - |
3764 | - log = logging.getLogger('loggerhead.%s' % |
3765 | - (branch.get_config().get_nickname(),)) |
3766 | - |
3767 | - graph = branch.repository.get_graph() |
3768 | - parent_map = dict((key, value) for key, value in |
3769 | - graph.iter_ancestry([last_revid]) if value is not None) |
3770 | - |
3771 | - _revision_graph = _strip_NULL_ghosts(parent_map) |
3772 | - |
3773 | - _rev_info = [] |
3774 | - _rev_indices = {} |
3775 | - |
3776 | - if is_null(last_revid): |
3777 | - _merge_sort = [] |
3778 | - else: |
3779 | - _merge_sort = merge_sort( |
3780 | - _revision_graph, last_revid, generate_revno=True) |
3781 | - |
3782 | - for info in _merge_sort: |
3783 | - seq, revid, merge_depth, revno, end_of_merge = info |
3784 | - revno_str = '.'.join(str(n) for n in revno) |
3785 | - parents = _revision_graph[revid] |
3786 | - _rev_indices[revid] = len(_rev_info) |
3787 | - _rev_info.append([(seq, revid, merge_depth, revno_str, end_of_merge), (), parents]) |
3788 | - |
3789 | - for revid in _revision_graph.iterkeys(): |
3790 | - if _rev_info[_rev_indices[revid]][0][2] == 0: |
3791 | - continue |
3792 | - for parent in _revision_graph[revid]: |
3793 | - c = _rev_info[_rev_indices[parent]] |
3794 | - if revid not in c[1]: |
3795 | - c[1] = c[1] + (revid,) |
3796 | - |
3797 | - log.info('built revision graph cache: %.3f secs' % (time.time() - z,)) |
3798 | - |
3799 | - return (_rev_info, _rev_indices) |
3800 | |
3801 | === modified file 'loggerhead/zptsupport.py' |
3802 | --- loggerhead/zptsupport.py 2009-03-02 05:44:20 +0000 |
3803 | +++ loggerhead/zptsupport.py 2010-05-15 23:47:26 +0000 |
3804 | @@ -23,10 +23,6 @@ |
3805 | |
3806 | from simpletal import simpleTAL, simpleTALES |
3807 | |
3808 | -logging.getLogger("simpleTAL").setLevel(logging.INFO) |
3809 | -logging.getLogger("simpleTALES").setLevel(logging.INFO) |
3810 | - |
3811 | - |
3812 | _zpt_cache = {} |
3813 | |
3814 | |
3815 | |
3816 | === modified file 'serve-branches' |
3817 | --- serve-branches 2009-06-26 09:42:43 +0000 |
3818 | +++ serve-branches 2010-05-15 23:47:26 +0000 |
3819 | @@ -20,6 +20,9 @@ |
3820 | |
3821 | import sys |
3822 | |
3823 | +from bzrlib.plugin import load_plugins |
3824 | +load_plugins() |
3825 | + |
3826 | from loggerhead.main import main |
3827 | |
3828 | |
3829 | |
3830 | === modified file 'setup.py' |
3831 | --- setup.py 2009-10-17 06:19:40 +0000 |
3832 | +++ setup.py 2010-05-15 23:47:26 +0000 |
3833 | @@ -30,7 +30,7 @@ |
3834 | license = "GNU GPL v2 or later", |
3835 | maintainer = "Michael Hudson", |
3836 | maintainer_email = "michael.hudson@canonical.com", |
3837 | - scripts = ["start-loggerhead", "stop-loggerhead", "serve-branches"], |
3838 | + scripts = ["serve-branches"], |
3839 | packages = ["loggerhead", |
3840 | "loggerhead/apps", |
3841 | "loggerhead/controllers", |
3842 | @@ -64,9 +64,7 @@ |
3843 | "static/javascript/yui/build/yui-base/*", |
3844 | "static/images/*"]}, |
3845 | data_files = [ |
3846 | - ('share/man/man1', ['start-loggerhead.1', |
3847 | - 'stop-loggerhead.1', |
3848 | - 'serve-branches.1']), |
3849 | + ('share/man/man1', ['serve-branches.1']), |
3850 | ('share/doc/loggerhead', ['loggerhead.conf.example']), |
3851 | ], |
3852 | ) |
3853 | |
3854 | === removed file 'start-loggerhead' |
3855 | --- start-loggerhead 2009-10-17 06:35:33 +0000 |
3856 | +++ start-loggerhead 1970-01-01 00:00:00 +0000 |
3857 | @@ -1,146 +0,0 @@ |
3858 | -#!/usr/bin/env python |
3859 | -# This program is free software; you can redistribute it and/or modify |
3860 | -# it under the terms of the GNU General Public License as published by |
3861 | -# the Free Software Foundation; either version 2 of the License, or |
3862 | -# (at your option) any later version. |
3863 | -# |
3864 | -# This program is distributed in the hope that it will be useful, |
3865 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
3866 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
3867 | -# GNU General Public License for more details. |
3868 | -# |
3869 | -# You should have received a copy of the GNU General Public License |
3870 | -# along with this program; if not, write to the Free Software |
3871 | -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
3872 | - |
3873 | -""" |
3874 | -WARNING! This script has been deprecated and will go away soon! |
3875 | -A script for starting the loggerhead process. |
3876 | -""" |
3877 | - |
3878 | - |
3879 | -import logging |
3880 | -from optparse import OptionParser |
3881 | -import os |
3882 | -import sys |
3883 | -import urlparse |
3884 | - |
3885 | -from bzrlib.plugin import load_plugins |
3886 | -from bzrlib.util.configobj.configobj import ConfigObj |
3887 | - |
3888 | -from paste import httpserver |
3889 | -from paste.httpexceptions import make_middleware |
3890 | -from paste.translogger import make_filter |
3891 | - |
3892 | -from loggerhead import daemon |
3893 | -from loggerhead.apps.config import Root |
3894 | -from loggerhead.trace import setup_logging |
3895 | -from loggerhead.apps.error import ErrorHandlerApp |
3896 | - |
3897 | - |
3898 | -def main(): |
3899 | - sys.stderr.write('\n\n') |
3900 | - sys.stderr.write('WARNING!!! This script has been deprecated by ' |
3901 | - 'serve-branches, and will be removed in the next ' |
3902 | - 'release. Please migrate to serve-branches and report ' |
3903 | - 'any missing features.\n') |
3904 | - sys.stderr.write('\n\n') |
3905 | - |
3906 | - home = os.path.realpath(os.path.dirname(__file__)) |
3907 | - default_pidfile = os.path.join(home, 'loggerhead.pid') |
3908 | - default_configfile = os.path.join(home, 'loggerhead.conf') |
3909 | - default_log_folder = os.path.join(home, 'logs') |
3910 | - parser = OptionParser(usage='usage: %prog [options]', version='%prog') |
3911 | - parser.add_option('-f', '--foreground', |
3912 | - action='store_true', dest='foreground', default=False, |
3913 | - help="run in the foreground; don't daemonize") |
3914 | - parser.add_option('-C', '--check', action='store_true', |
3915 | - dest='check', default=False, |
3916 | - help=("only start if not already running (useful for " |
3917 | - "cron jobs)")) |
3918 | - parser.add_option('-p', '--pidfile', dest="pidfile", |
3919 | - default=default_pidfile, |
3920 | - type=str, help="override pidfile location") |
3921 | - parser.add_option('-c', '--config-file', dest="configfile", |
3922 | - default=default_configfile, |
3923 | - type=str, help="override configuration file location") |
3924 | - parser.add_option('-L', '--log-folder', dest="log_folder", |
3925 | - default=default_log_folder, |
3926 | - type=str, help="override log file directory") |
3927 | - options, args = parser.parse_args() |
3928 | - if len(args) > 0: |
3929 | - parser.error('No filename arguments are used, only options.') |
3930 | - |
3931 | - if options.check: |
3932 | - if daemon.is_running(options.pidfile): |
3933 | - sys.exit(0) |
3934 | - sys.stderr.write('Did not find loggerhead running in %r;' % ( |
3935 | - options.pidfile)) |
3936 | - sys.stderr.write(' restarting...\n') |
3937 | - |
3938 | - # read loggerhead config |
3939 | - |
3940 | - config = ConfigObj(options.configfile, encoding='utf-8') |
3941 | - extra_path = config.get('bzrpath', None) |
3942 | - if extra_path: |
3943 | - sys.path.insert(0, extra_path) |
3944 | - |
3945 | - potential_overrides = [('server.socket_port', int), |
3946 | - ('server.webpath', str), |
3947 | - ('server.thread_pool', int), |
3948 | - ('server.socket_host', str)] |
3949 | - server_port = int(config.get('server.socket_port', 8080)) |
3950 | - nworkers = int(config.get('server.thread_pool', 10)) |
3951 | - server_host = config.get('server.socket_host', '0.0.0.0') |
3952 | - webpath = config.get('server.webpath', None) |
3953 | - |
3954 | - for key, keytype in potential_overrides: |
3955 | - value = config.get(key, None) |
3956 | - if value is not None: |
3957 | - value = keytype(value) |
3958 | - |
3959 | - if not options.foreground: |
3960 | - sys.stderr.write('\n') |
3961 | - sys.stderr.write('Launching loggerhead into the background.\n') |
3962 | - sys.stderr.write('PID file: %s\n' % (options.pidfile,)) |
3963 | - sys.stderr.write('\n') |
3964 | - |
3965 | - daemon.daemonize(options.pidfile, home) |
3966 | - |
3967 | - setup_logging(options.log_folder, config, foreground=options.foreground) |
3968 | - |
3969 | - log = logging.getLogger('loggerhead') |
3970 | - log.info('Starting up...') |
3971 | - |
3972 | - app = Root(config) |
3973 | - |
3974 | - app = app |
3975 | - app = make_middleware(app) |
3976 | - app = make_filter(app, None, logger_name=log.name+'.access') |
3977 | - app = ErrorHandlerApp(app) |
3978 | - |
3979 | - if webpath: |
3980 | - scheme, netloc, path, blah, blah, blah = urlparse.urlparse(webpath) |
3981 | - |
3982 | - def app(environ, start_response, orig=app): |
3983 | - environ['SCRIPT_NAME'] = path |
3984 | - environ['HTTP_HOST'] = netloc |
3985 | - environ['wsgi.url_scheme'] = scheme |
3986 | - return orig(environ, start_response) |
3987 | - |
3988 | - load_plugins() |
3989 | - |
3990 | - try: |
3991 | - httpserver.serve( |
3992 | - app, host=server_host, port=server_port, |
3993 | - threadpool_workers=nworkers) |
3994 | - finally: |
3995 | - log.info('Shutdown.') |
3996 | - try: |
3997 | - os.remove(options.pidfile) |
3998 | - except OSError: |
3999 | - pass |
4000 | - |
4001 | - |
4002 | -if __name__ == '__main__': |
4003 | - main() |
4004 | |
4005 | === removed file 'start-loggerhead.1' |
4006 | --- start-loggerhead.1 2008-08-06 18:33:20 +0000 |
4007 | +++ start-loggerhead.1 1970-01-01 00:00:00 +0000 |
4008 | @@ -1,38 +0,0 @@ |
4009 | -.TH START-LOGGERHEAD "1" "July 2008" "start-loggerhead 1.2.1" "User Commands" |
4010 | -.SH NAME |
4011 | -start-loggerhead \- start a loggerhead server |
4012 | -.SH SYNOPSIS |
4013 | -.B start-loggerhead |
4014 | -[\fIoptions\fR] |
4015 | -.SH DESCRIPTION |
4016 | -Loggerhead is a web viewer for projects in bazaar. It can be used to navigate |
4017 | -a branch history, annotate files, view patches, perform searches, etc. It's |
4018 | -heavily based on bazaar-webserve, which is itself based on hgweb for Mercurial. |
4019 | -.PP |
4020 | -The start-loggerhead command starts a new standalone loggerhead server. |
4021 | -.SH OPTIONS |
4022 | -.TP |
4023 | -\fB\-\-version\fR |
4024 | -show program's version number and exit |
4025 | -.TP |
4026 | -\fB\-h\fR, \fB\-\-help\fR |
4027 | -show this help message and exit |
4028 | -.TP |
4029 | -\fB\-f\fR, \fB\-\-foreground\fR |
4030 | -run in the foreground; don't daemonize |
4031 | -.TP |
4032 | -\fB\-C\fR, \fB\-\-check\fR |
4033 | -only start if not already running (useful for cron jobs) |
4034 | -.TP |
4035 | -\fB\-p\fR, \fB\-\-pidfile=PIDFILE\fR |
4036 | -override pid file location |
4037 | -.TP |
4038 | -\fB\-c\fR, \fB\-\-config-file=CONFIGFILE\fR |
4039 | -override configuration file location |
4040 | -.TP |
4041 | -\fB\-L\fR, \fB\-\-log-folder=LOG-FOLDER\fR |
4042 | -override log directory |
4043 | -.SH "LICENSE" |
4044 | -GNU GPLv2 or later. |
4045 | -.SH "SEE ALSO" |
4046 | -https://launchpad.net/loggerhead |
4047 | |
4048 | === removed file 'stop-loggerhead' |
4049 | --- stop-loggerhead 2009-05-13 14:03:48 +0000 |
4050 | +++ stop-loggerhead 1970-01-01 00:00:00 +0000 |
4051 | @@ -1,51 +0,0 @@ |
4052 | -#!/usr/bin/env python |
4053 | -# This program is free software; you can redistribute it and/or modify |
4054 | -# it under the terms of the GNU General Public License as published by |
4055 | -# the Free Software Foundation; either version 2 of the License, or |
4056 | -# (at your option) any later version. |
4057 | -# |
4058 | -# This program is distributed in the hope that it will be useful, |
4059 | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
4060 | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
4061 | -# GNU General Public License for more details. |
4062 | -# |
4063 | -# You should have received a copy of the GNU General Public License |
4064 | -# along with this program; if not, write to the Free Software |
4065 | -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
4066 | - |
4067 | -"""A script for stopping the loggerhead process.""" |
4068 | - |
4069 | -import os |
4070 | -import sys |
4071 | -from optparse import OptionParser |
4072 | - |
4073 | -home = os.path.realpath(os.path.dirname(__file__)) |
4074 | -default_pidfile = os.path.join(home, 'loggerhead.pid') |
4075 | -parser = OptionParser(usage='usage: %prog [options]', version='%prog') |
4076 | -parser.add_option('-p', '--pidfile', dest="pidfile", default=default_pidfile, |
4077 | - help="override pidfile location") |
4078 | - |
4079 | -options, args = parser.parse_args() |
4080 | -if len(args) > 0: |
4081 | - parser.error('No filename arguments are used, only options.') |
4082 | - |
4083 | -try: |
4084 | - f = open(options.pidfile, 'r') |
4085 | -except IOError, e: |
4086 | - print 'Pid file %s not found.' % (options.pidfile,) |
4087 | - sys.exit(1) |
4088 | - |
4089 | -pid = int(f.readline()) |
4090 | - |
4091 | -try: |
4092 | - os.kill(pid, 0) |
4093 | -except OSError, e: |
4094 | - print 'Stale pid file; server is not running.' |
4095 | - sys.exit(1) |
4096 | - |
4097 | |
4098 | -print 'Shutting down previous server @ pid %d.' % (pid,) |
4099 | |
4100 | - |
4101 | -import signal |
4102 | -os.kill(pid, signal.SIGINT) |
4103 | |
4104 | === removed file 'stop-loggerhead.1' |
4105 | --- stop-loggerhead.1 2008-08-06 18:39:15 +0000 |
4106 | +++ stop-loggerhead.1 1970-01-01 00:00:00 +0000 |
4107 | @@ -1,19 +0,0 @@ |
4108 | -.TH STOP-LOGGERHEAD "1" "July 2008" "stop-loggerhead 1.2.1" "User Commands" |
4109 | -.SH NAME |
4110 | -stop-loggerhead \- stop a loggerhead server |
4111 | -.SH SYNOPSIS |
4112 | -.B stop-loggerhead |
4113 | -.SH DESCRIPTION |
4114 | -Loggerhead is a web viewer for projects in bazaar. It can be used to navigate |
4115 | -a branch history, annotate files, view patches, perform searches, etc. It's |
4116 | -heavily based on bazaar-webserve, which is itself based on hgweb for Mercurial. |
4117 | -.SH OPTIONS |
4118 | -.TP |
4119 | -\fB\-p\fR, \fB\-\-pidfile=PIDFILE\fR |
4120 | -override pid file location |
4121 | -.PP |
4122 | -The stop-loggerhead command stops the currently running loggerhead server. |
4123 | -.SH "LICENSE" |
4124 | -GNU GPLv2 or later. |
4125 | -.SH "SEE ALSO" |
4126 | -https://launchpad.net/loggerhead |
I suspect that this is no longer relevant, as history-db has already been merged into trunk. If that's the case, could somebody please close this MP? I don't have the authority to.