Merge lp:~mwhudson/loggerhead/less-stupid-whole-history-redundancy into lp:loggerhead

Proposed by Michael Hudson-Doyle
Status: Merged
Approved by: Paul Hummer
Approved revision: 338
Merged at revision: not available
Proposed branch: lp:~mwhudson/loggerhead/less-stupid-whole-history-redundancy
Merge into: lp:loggerhead
Diff against target: None lines
To merge this branch: bzr merge lp:~mwhudson/loggerhead/less-stupid-whole-history-redundancy
Reviewer Review Type Date Requested Status
Paul Hummer (community) Approve
Loggerhead Team Pending
Review via email: mp+5843@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

Hi Paul,

This branches cuts down on the amount of things that the wholehistorycache hangs on to -- it turns out that all of the things loggerhead needs from the wholehistorycache can be quickly (about 0.1s for a launchpad branch) be derived from the merge sorted revision graph + some other bits.

There should be no observable change in behavior, apart from lower memory consumption when you browse many different branches.

Some of the code that uses the whole history data has become even more inscrutable, not sure how bad that is...

Cheers,
mwh

Revision history for this message
Paul Hummer (rockstar) wrote :

Michael-

  So, these changes are all an improvement, and I only have a few small
comments for the sake of readability in the future. Otherwise, let's land
this, just in time for a new Launchpad rollout.

 vote approve
 status approved

> === modified file 'loggerhead/history.py'
> --- loggerhead/history.py 2009-04-24 00:00:03 +0000
> +++ loggerhead/history.py 2009-04-24 03:44:30 +0000
> @@ -199,14 +199,25 @@
>
> self.last_revid = branch.last_revision()
>
> - whole_history_data = whole_history_data_cache.get(self.last_revid)
> - if whole_history_data is None:
> + cached_whole_history_data = whole_history_data_cache.get(self.last_revid)
> + if cached_whole_history_data is None:
> whole_history_data = compute_whole_history_data(branch)
> - whole_history_data_cache[self.last_revid] = whole_history_data
> -
> - (self._revision_graph, self._full_history, self._revision_info,
> - self._revno_revid, self._merge_sort, self._where_merged,
> - ) = whole_history_data
> + (self._rev_info, self._rev_indices) = whole_history_data
> + whole_history_data_cache[self.last_revid] = whole_history_data[0]
> + self._full_history = []
> + self._revno_revid = {}
> + for ((_, revid, _, revno_str, _), _, _) in self._rev_info:
> + self._revno_revid[revno_str] = revid
> + self._full_history.append(revid)
> + else:
> + self._rev_info = cached_whole_history_data
> + self._full_history = []
> + self._revno_revid = {}
> + self._rev_indices = {}
> + for ((seq, revid, _, revno_str, _), _, _) in self._rev_info:
> + self._rev_indices[revid] = seq
> + self._revno_revid[revno_str] = revid
> + self._full_history.append(revid)
>
> def use_file_cache(self, cache):
> self._file_change_cache = cache

So, the biggest issues I've had with the wholehistory is that the data
structures we're using here aren't obvious. Maybe the a comment or a docstring
would be more helpful. When I was working on wholehistory, I had to attach pdb
and examine the data structures myself. Since we're creating new
datastructures, could we add some in-code documentation to it?

> @@ -219,12 +230,12 @@
> return self._branch.get_config()
>
> def get_revno(self, revid):
> - if revid not in self._revision_info:
> + if revid not in self._rev_indices:
> # ghost parent?
> return 'unknown'
> - (seq, revid, merge_depth,
> - revno_str, end_of_merge) = self._revision_info[revid]
> - return revno_str
> + seq = self._rev_indices[revid]
> + revno = self._rev_info[seq][0][3]
> + return revno
>
> def get_revids_from(self, revid_list, start_revid):
> """

self._rev_info[seq][0][3] is a great example of mysterious data structures.
This seems magic to me.

--
Paul Hummer
http://theironlion.net
1024/862FF08F C921 E962 58F8 5547 6723 0E8C 1C4D 8AC5 862F F08F

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'loggerhead/history.py'
2--- loggerhead/history.py 2009-04-24 00:00:03 +0000
3+++ loggerhead/history.py 2009-04-24 03:44:30 +0000
4@@ -199,14 +199,25 @@
5
6 self.last_revid = branch.last_revision()
7
8- whole_history_data = whole_history_data_cache.get(self.last_revid)
9- if whole_history_data is None:
10+ cached_whole_history_data = whole_history_data_cache.get(self.last_revid)
11+ if cached_whole_history_data is None:
12 whole_history_data = compute_whole_history_data(branch)
13- whole_history_data_cache[self.last_revid] = whole_history_data
14-
15- (self._revision_graph, self._full_history, self._revision_info,
16- self._revno_revid, self._merge_sort, self._where_merged,
17- ) = whole_history_data
18+ (self._rev_info, self._rev_indices) = whole_history_data
19+ whole_history_data_cache[self.last_revid] = whole_history_data[0]
20+ self._full_history = []
21+ self._revno_revid = {}
22+ for ((_, revid, _, revno_str, _), _, _) in self._rev_info:
23+ self._revno_revid[revno_str] = revid
24+ self._full_history.append(revid)
25+ else:
26+ self._rev_info = cached_whole_history_data
27+ self._full_history = []
28+ self._revno_revid = {}
29+ self._rev_indices = {}
30+ for ((seq, revid, _, revno_str, _), _, _) in self._rev_info:
31+ self._rev_indices[revid] = seq
32+ self._revno_revid[revno_str] = revid
33+ self._full_history.append(revid)
34
35 def use_file_cache(self, cache):
36 self._file_change_cache = cache
37@@ -219,12 +230,12 @@
38 return self._branch.get_config()
39
40 def get_revno(self, revid):
41- if revid not in self._revision_info:
42+ if revid not in self._rev_indices:
43 # ghost parent?
44 return 'unknown'
45- (seq, revid, merge_depth,
46- revno_str, end_of_merge) = self._revision_info[revid]
47- return revno_str
48+ seq = self._rev_indices[revid]
49+ revno = self._rev_info[seq][0][3]
50+ return revno
51
52 def get_revids_from(self, revid_list, start_revid):
53 """
54@@ -238,10 +249,11 @@
55
56 def introduced_revisions(revid):
57 r = set([revid])
58- seq, revid, md, revno, end_of_merge = self._revision_info[revid]
59+ seq = self._rev_indices[revid]
60+ md = self._rev_info[seq][0][2]
61 i = seq + 1
62- while i < len(self._merge_sort) and self._merge_sort[i][2] > md:
63- r.add(self._merge_sort[i][1])
64+ while i < len(self._rev_info) and self._rev_info[i][0][2] > md:
65+ r.add(self._rev_info[i][0][1])
66 i += 1
67 return r
68 while 1:
69@@ -249,7 +261,7 @@
70 return
71 if introduced_revisions(revid) & revid_set:
72 yield revid
73- parents = self._revision_graph[revid]
74+ parents = self._rev_info[self._rev_indices[revid]][2]
75 if len(parents) == 0:
76 return
77 revid = parents[0]
78@@ -466,10 +478,10 @@
79
80 merge_point = []
81 while True:
82- children = self._where_merged.get(revid, ())
83+ children = self._rev_info[self._rev_indices[revid]][1]
84 nexts = []
85 for child in children:
86- child_parents = self._revision_graph[child]
87+ child_parents = self._rev_info[self._rev_indices[child]][2]
88 if child_parents[0] == revid:
89 nexts.append(child)
90 else:
91
92=== modified file 'loggerhead/wholehistory.py'
93--- loggerhead/wholehistory.py 2009-04-24 00:00:03 +0000
94+++ loggerhead/wholehistory.py 2009-04-24 03:12:28 +0000
95@@ -50,32 +50,32 @@
96
97 _revision_graph = _strip_NULL_ghosts(parent_map)
98 _full_history = []
99- _revision_info = {}
100- _revno_revid = {}
101+
102+ _rev_info = []
103+ _rev_indices = {}
104+
105 if is_null(last_revid):
106 _merge_sort = []
107 else:
108 _merge_sort = merge_sort(
109 _revision_graph, last_revid, generate_revno=True)
110
111- for (seq, revid, merge_depth, revno, end_of_merge) in _merge_sort:
112+ for info in _merge_sort:
113+ seq, revid, merge_depth, revno, end_of_merge = info
114 _full_history.append(revid)
115 revno_str = '.'.join(str(n) for n in revno)
116- _revno_revid[revno_str] = revid
117- _revision_info[revid] = (
118- seq, revid, merge_depth, revno_str, end_of_merge)
119-
120- _where_merged = {}
121+ parents = _revision_graph[revid]
122+ _rev_indices[revid] = len(_rev_info)
123+ _rev_info.append([(seq, revid, merge_depth, revno_str, end_of_merge), (), parents])
124
125 for revid in _revision_graph.keys():
126- if _revision_info[revid][2] == 0:
127+ if _rev_info[_rev_indices[revid]][0][2] == 0:
128 continue
129 for parent in _revision_graph[revid]:
130- c = _where_merged.setdefault(parent, ())
131- if revid not in c:
132- _where_merged[parent] = c + (revid,)
133+ c = _rev_info[_rev_indices[parent]]
134+ if revid not in c[1]:
135+ c[1] = c[1] + (revid,)
136
137 log.info('built revision graph cache: %r secs' % (time.time() - z))
138
139- return (_revision_graph, _full_history, _revision_info,
140- _revno_revid, _merge_sort, _where_merged)
141+ return (_rev_info, _rev_indices)

Subscribers

People subscribed via source and target branches