Merge lp:~jameinel/bzr/1.16-chk-direct into lp:~bzr/bzr/trunk-old
- 1.16-chk-direct
- Merge into trunk-old
Proposed by
John A Meinel
Status: | Merged |
---|---|
Merged at revision: | not available |
Proposed branch: | lp:~jameinel/bzr/1.16-chk-direct |
Merge into: | lp:~bzr/bzr/trunk-old |
Diff against target: | 200 lines |
To merge this branch: | bzr merge lp:~jameinel/bzr/1.16-chk-direct |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Martin Pool | Approve | ||
Review via email: mp+7203@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
John A Meinel (jameinel) wrote : | # |
Revision history for this message
Martin Pool (mbp) wrote : | # |
I'm not very familiar with the chkmap code you're changing here, but I don't see any problems with this patch. Were you wanting to talk about any particular issue here, or is there a list thread about it?
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'bzrlib/chk_map.py' |
2 | --- bzrlib/chk_map.py 2009-06-15 14:49:27 +0000 |
3 | +++ bzrlib/chk_map.py 2009-06-16 02:36:08 +0000 |
4 | @@ -203,13 +203,57 @@ |
5 | multiple pages. |
6 | :return: The root chk of the resulting CHKMap. |
7 | """ |
8 | - result = CHKMap(store, None, search_key_func=search_key_func) |
9 | + # root_key = klass._create_via_map(store, initial_value, |
10 | + # maximum_size=maximum_size, key_width=key_width, |
11 | + # search_key_func=search_key_func) |
12 | + root_key = klass._create_directly(store, initial_value, |
13 | + maximum_size=maximum_size, key_width=key_width, |
14 | + search_key_func=search_key_func) |
15 | + # if root_key != alt_root_key: |
16 | + # result1 = klass(store, root_key, search_key_func=search_key_func) |
17 | + # result2 = klass(store, alt_root_key, |
18 | + # search_key_func=search_key_func) |
19 | + # import pdb; pdb.set_trace() |
20 | + # raise ValueError('Failed to serialize via leaf splitting.') |
21 | + return root_key |
22 | + |
23 | + @classmethod |
24 | + def _create_via_map(klass, store, initial_value, maximum_size=0, |
25 | + key_width=1, search_key_func=None): |
26 | + result = klass(store, None, search_key_func=search_key_func) |
27 | result._root_node.set_maximum_size(maximum_size) |
28 | result._root_node._key_width = key_width |
29 | delta = [] |
30 | for key, value in initial_value.items(): |
31 | delta.append((None, key, value)) |
32 | - return result.apply_delta(delta) |
33 | + root_key = result.apply_delta(delta) |
34 | + return root_key |
35 | + |
36 | + @classmethod |
37 | + def _create_directly(klass, store, initial_value, maximum_size=0, |
38 | + key_width=1, search_key_func=None): |
39 | + node = LeafNode(search_key_func=search_key_func) |
40 | + node.set_maximum_size(maximum_size) |
41 | + node._key_width = key_width |
42 | + node._items = dict(initial_value) |
43 | + node._raw_size = sum([node._key_value_len(key, value) |
44 | + for key,value in initial_value.iteritems()]) |
45 | + node._len = len(node._items) |
46 | + node._compute_search_prefix() |
47 | + node._compute_serialised_prefix() |
48 | + if (node._len > 1 |
49 | + and maximum_size |
50 | + and node._current_size() > maximum_size): |
51 | + prefix, node_details = node._split(store) |
52 | + if len(node_details) == 1: |
53 | + raise AssertionError('Failed to split using node._split') |
54 | + node = InternalNode(prefix, search_key_func=search_key_func) |
55 | + node.set_maximum_size(maximum_size) |
56 | + node._key_width = key_width |
57 | + for split, subnode in node_details: |
58 | + node.add_node(split, subnode) |
59 | + keys = list(node.serialise(store)) |
60 | + return keys[-1] |
61 | |
62 | def iter_changes(self, basis): |
63 | """Iterate over the changes between basis and self. |
64 | @@ -764,7 +808,19 @@ |
65 | result[prefix] = node |
66 | else: |
67 | node = result[prefix] |
68 | - node.map(store, key, value) |
69 | + sub_prefix, node_details = node.map(store, key, value) |
70 | + if len(node_details) > 1: |
71 | + if prefix != sub_prefix: |
72 | + # This node has been split and is now found via a different |
73 | + # path |
74 | + result.pop(prefix) |
75 | + new_node = InternalNode(sub_prefix, |
76 | + search_key_func=self._search_key_func) |
77 | + new_node.set_maximum_size(self._maximum_size) |
78 | + new_node._key_width = self._key_width |
79 | + for split, node in node_details: |
80 | + new_node.add_node(split, node) |
81 | + result[prefix] = new_node |
82 | return common_prefix, result.items() |
83 | |
84 | def map(self, store, key, value): |
85 | |
86 | === modified file 'bzrlib/repofmt/groupcompress_repo.py' |
87 | --- bzrlib/repofmt/groupcompress_repo.py 2009-06-12 01:11:00 +0000 |
88 | +++ bzrlib/repofmt/groupcompress_repo.py 2009-06-16 02:36:08 +0000 |
89 | @@ -674,6 +674,73 @@ |
90 | return self._inventory_add_lines(revision_id, parents, |
91 | inv_lines, check_content=False) |
92 | |
93 | + def _get_null_inventory(self): |
94 | + serializer = self._format._serializer |
95 | + null_inv = inventory.CHKInventory(serializer.search_key_name) |
96 | + search_key_func = chk_map.search_key_registry.get( |
97 | + serializer.search_key_name) |
98 | + null_inv.id_to_entry = chk_map.CHKMap(self.chk_bytes, |
99 | + None, search_key_func) |
100 | + null_inv.id_to_entry._root_node.set_maximum_size( |
101 | + serializer.maximum_size) |
102 | + null_inv.parent_id_basename_to_file_id = chk_map.CHKMap( |
103 | + self.chk_bytes, None, search_key_func) |
104 | + null_inv.parent_id_basename_to_file_id._root_node.set_maximum_size( |
105 | + serializer.maximum_size) |
106 | + null_inv.parent_id_basename_to_file_id._root_node._key_width = 2 |
107 | + null_inv.root_id = None |
108 | + return null_inv |
109 | + |
110 | + def _create_inv_from_null(self, delta, new_revision_id): |
111 | + """This will mutate new_inv directly. |
112 | + |
113 | + This is a simplified form of create_by_apply_delta which knows that all |
114 | + the old values must be None, so everything is a create. |
115 | + """ |
116 | + serializer = self._format._serializer |
117 | + new_inv = inventory.CHKInventory(serializer.search_key_name) |
118 | + new_inv.revision_id = new_revision_id |
119 | + |
120 | + entry_to_bytes = new_inv._entry_to_bytes |
121 | + id_to_entry_dict = {} |
122 | + parent_id_basename_dict = {} |
123 | + for old_path, new_path, file_id, entry in delta: |
124 | + if old_path is not None: |
125 | + raise ValueError('Invalid delta, somebody tried to delete %r' |
126 | + ' from the NULL_REVISION' |
127 | + % ((old_path, file_id),)) |
128 | + if new_path is None: |
129 | + raise ValueError('Invalid delta, delta from NULL_REVISION has' |
130 | + ' no new_path %r' % (file_id,)) |
131 | + # file id changes |
132 | + if new_path == '': |
133 | + new_inv.root_id = file_id |
134 | + parent_id_basename_key = '', '' |
135 | + else: |
136 | + utf8_entry_name = entry.name.encode('utf-8') |
137 | + parent_id_basename_key = (entry.parent_id, utf8_entry_name) |
138 | + new_value = entry_to_bytes(entry) |
139 | + # Create Caches? |
140 | + ## new_inv._path_to_fileid_cache[new_path] = file_id |
141 | + id_to_entry_dict[(file_id,)] = new_value |
142 | + parent_id_basename_dict[parent_id_basename_key] = file_id |
143 | + |
144 | + search_key_func = chk_map.search_key_registry.get( |
145 | + serializer.search_key_name) |
146 | + maximum_size = serializer.maximum_size |
147 | + root_key = chk_map.CHKMap.from_dict(self.chk_bytes, id_to_entry_dict, |
148 | + maximum_size=maximum_size, key_width=1, |
149 | + search_key_func=search_key_func) |
150 | + new_inv.id_to_entry = chk_map.CHKMap(self.chk_bytes, root_key, |
151 | + search_key_func) |
152 | + root_key = chk_map.CHKMap.from_dict(self.chk_bytes, |
153 | + parent_id_basename_dict, |
154 | + maximum_size=maximum_size, key_width=1, |
155 | + search_key_func=search_key_func) |
156 | + new_inv.parent_id_basename_to_file_id = chk_map.CHKMap(self.chk_bytes, |
157 | + root_key, search_key_func) |
158 | + return new_inv |
159 | + |
160 | def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id, |
161 | parents, basis_inv=None, propagate_caches=False): |
162 | """Add a new inventory expressed as a delta against another revision. |
163 | @@ -699,24 +766,29 @@ |
164 | repository format specific) of the serialized inventory, and the |
165 | resulting inventory. |
166 | """ |
167 | - if basis_revision_id == _mod_revision.NULL_REVISION: |
168 | - return KnitPackRepository.add_inventory_by_delta(self, |
169 | - basis_revision_id, delta, new_revision_id, parents) |
170 | if not self.is_in_write_group(): |
171 | raise AssertionError("%r not in write group" % (self,)) |
172 | _mod_revision.check_not_reserved_id(new_revision_id) |
173 | - basis_tree = self.revision_tree(basis_revision_id) |
174 | - basis_tree.lock_read() |
175 | - try: |
176 | - if basis_inv is None: |
177 | + basis_tree = None |
178 | + if basis_inv is None: |
179 | + if basis_revision_id == _mod_revision.NULL_REVISION: |
180 | + new_inv = self._create_inv_from_null(delta, new_revision_id) |
181 | + inv_lines = new_inv.to_lines() |
182 | + return self._inventory_add_lines(new_revision_id, parents, |
183 | + inv_lines, check_content=False), new_inv |
184 | + else: |
185 | + basis_tree = self.revision_tree(basis_revision_id) |
186 | + basis_tree.lock_read() |
187 | basis_inv = basis_tree.inventory |
188 | + try: |
189 | result = basis_inv.create_by_apply_delta(delta, new_revision_id, |
190 | propagate_caches=propagate_caches) |
191 | inv_lines = result.to_lines() |
192 | return self._inventory_add_lines(new_revision_id, parents, |
193 | inv_lines, check_content=False), result |
194 | finally: |
195 | - basis_tree.unlock() |
196 | + if basis_tree is not None: |
197 | + basis_tree.unlock() |
198 | |
199 | def _iter_inventories(self, revision_ids): |
200 | """Iterate over many inventory objects.""" |
This is a bit more of an [RFC] than a strict [MERGE], but I figured it would be good to get some feedback on the route I'm taking.
With the attached patch (and most of my other proposed updates for commit w/ dev6 format repositories) I've managed to get the time for initial commit of a MySQL repository down to ~ equivalent to the XML time.
For comparison:
dev6 15.5s
1.9 11.6s
dev6+ 13.5s
dev6-all 11.6s
So this patch by itself saves approximately 2s, or 10-20% of the time for
initial commit.
The main change is to get away from calling CHKMap.map() for every object in
the commit, and instead to create a LeafNode and then _split() it.
This also includes my other change to improve InternalNode.map() performance /code.edge. launchpad. net/~jameinel/ bzr/1.16- chkmap- updates/ +merge/ 7113)
(_iter_nodes change: https:/
However, InternalNode.map() still has a lot of "did the LeafNode shrink"
overhead, that we really don't care about for initial commit.
Hopefully someone will approve 7113 for me, and then I'll resubmit this, so we can see that it isn't a very big change.