Merge lp:~spiv/bzr/all-referenced-texts-check into lp:~bzr/bzr/trunk-old
- all-referenced-texts-check
- Merge into trunk-old
Proposed by
Andrew Bennetts
Status: | Merged |
---|---|
Merged at revision: | not available |
Proposed branch: | lp:~spiv/bzr/all-referenced-texts-check |
Merge into: | lp:~bzr/bzr/trunk-old |
Diff against target: | 368 lines |
To merge this branch: | bzr merge lp:~spiv/bzr/all-referenced-texts-check |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Robert Collins (community) | Approve | ||
Review via email:
|
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
![](/+icing/build/overlay/assets/skins/sam/images/close.gif)
Andrew Bennetts (spiv) wrote : | # |
Revision history for this message
![](/+icing/build/overlay/assets/skins/sam/images/close.gif)
Robert Collins (lifeless) wrote : | # |
review approve
Looks like a TODO:
11:28 < lifeless> + # Also, perf tests:
11:28 < lifeless> + # - if all invs present, then no texts are
checked
Please either do it, or remove the comment.
review:
Approve
Revision history for this message
![](/+icing/build/overlay/assets/skins/sam/images/close.gif)
Andrew Bennetts (spiv) wrote : | # |
> Please either do it, or remove the comment.
Oops, removed. Thanks!
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'bzrlib/knit.py' |
2 | --- bzrlib/knit.py 2009-04-27 23:14:00 +0000 |
3 | +++ bzrlib/knit.py 2009-05-12 02:35:10 +0000 |
4 | @@ -2688,6 +2688,44 @@ |
5 | return key[:-1], key[-1] |
6 | |
7 | |
8 | +class _KeyRefs(object): |
9 | + |
10 | + def __init__(self): |
11 | + # dict mapping 'key' to 'set of keys referring to that key' |
12 | + self.refs = {} |
13 | + |
14 | + def add_references(self, key, refs): |
15 | + # Record the new references |
16 | + for referenced in refs: |
17 | + try: |
18 | + needed_by = self.refs[referenced] |
19 | + except KeyError: |
20 | + needed_by = self.refs[referenced] = set() |
21 | + needed_by.add(key) |
22 | + # Discard references satisfied by the new key |
23 | + self.add_key(key) |
24 | + |
25 | + def get_unsatisfied_refs(self): |
26 | + return self.refs.iterkeys() |
27 | + |
28 | + def add_key(self, key): |
29 | + try: |
30 | + del self.refs[key] |
31 | + except KeyError: |
32 | + # No keys depended on this key. That's ok. |
33 | + pass |
34 | + |
35 | + def add_keys(self, keys): |
36 | + for key in keys: |
37 | + self.add_key(key) |
38 | + |
39 | + def get_referrers(self): |
40 | + result = set() |
41 | + for referrers in self.refs.itervalues(): |
42 | + result.update(referrers) |
43 | + return result |
44 | + |
45 | + |
46 | class _KnitGraphIndex(object): |
47 | """A KnitVersionedFiles index layered on GraphIndex.""" |
48 | |
49 | @@ -2723,9 +2761,9 @@ |
50 | self._is_locked = is_locked |
51 | self._missing_compression_parents = set() |
52 | if track_external_parent_refs: |
53 | - self._external_parent_refs = set() |
54 | + self._key_dependencies = _KeyRefs() |
55 | else: |
56 | - self._external_parent_refs = None |
57 | + self._key_dependencies = None |
58 | |
59 | def __repr__(self): |
60 | return "%s(%r)" % (self.__class__.__name__, self._graph_index) |
61 | @@ -2755,13 +2793,12 @@ |
62 | |
63 | keys = {} |
64 | compression_parents = set() |
65 | - parent_refs = self._external_parent_refs |
66 | + key_dependencies = self._key_dependencies |
67 | for (key, options, access_memo, parents) in records: |
68 | if self._parents: |
69 | parents = tuple(parents) |
70 | - if parent_refs is not None: |
71 | - parent_refs.update(parents) |
72 | - parent_refs.discard(key) |
73 | + if key_dependencies is not None: |
74 | + key_dependencies.add_references(key, parents) |
75 | index, pos, size = access_memo |
76 | if 'no-eol' in options: |
77 | value = 'N' |
78 | @@ -2829,12 +2866,11 @@ |
79 | new_missing = graph_index.external_references(ref_list_num=1) |
80 | new_missing.difference_update(self.get_parent_map(new_missing)) |
81 | self._missing_compression_parents.update(new_missing) |
82 | - if self._external_parent_refs is not None: |
83 | + if self._key_dependencies is not None: |
84 | # Add parent refs from graph_index (and discard parent refs that |
85 | # the graph_index has). |
86 | for node in graph_index.iter_all_entries(): |
87 | - self._external_parent_refs.update(node[3][0]) |
88 | - self._external_parent_refs.discard(node[1]) |
89 | + self._key_dependencies.add_references(node[1], node[3][0]) |
90 | |
91 | def get_missing_compression_parents(self): |
92 | """Return the keys of missing compression parents. |
93 | @@ -2847,9 +2883,9 @@ |
94 | def get_missing_parents(self): |
95 | """Return the keys of missing parents.""" |
96 | # We may have false positives, so filter those out. |
97 | - self._external_parent_refs.difference_update( |
98 | - self.get_parent_map(self._external_parent_refs)) |
99 | - return frozenset(self._external_parent_refs) |
100 | + self._key_dependencies.add_keys( |
101 | + self.get_parent_map(self._key_dependencies.get_unsatisfied_refs())) |
102 | + return frozenset(self._key_dependencies.get_unsatisfied_refs()) |
103 | |
104 | def _check_read(self): |
105 | """raise if reads are not permitted.""" |
106 | |
107 | === modified file 'bzrlib/repository.py' |
108 | --- bzrlib/repository.py 2009-05-07 05:08:46 +0000 |
109 | +++ bzrlib/repository.py 2009-05-12 02:35:10 +0000 |
110 | @@ -1448,7 +1448,31 @@ |
111 | unstacked_inventories = self.inventories._index |
112 | present_inventories = unstacked_inventories.get_parent_map( |
113 | key[-1:] for key in parents) |
114 | - parents.difference_update(present_inventories) |
115 | + if len(parents.difference(present_inventories)) == 0: |
116 | + # No missing parent inventories. |
117 | + return set() |
118 | + # Ok, now we have a list of missing inventories. But these only matter |
119 | + # if the inventories that reference them are missing some texts they |
120 | + # appear to introduce. |
121 | + # XXX: Texts referenced by all added inventories need to be present, |
122 | + # but at the moment we're only checking for texts referenced by |
123 | + # inventories at the graph's edge. |
124 | + key_deps = self.revisions._index._key_dependencies |
125 | + key_deps.add_keys(present_inventories) |
126 | + referrers = frozenset(r[0] for r in key_deps.get_referrers()) |
127 | + file_ids = self.fileids_altered_by_revision_ids(referrers) |
128 | + missing_texts = set() |
129 | + for file_id, version_ids in file_ids.iteritems(): |
130 | + missing_texts.update( |
131 | + (file_id, version_id) for version_id in version_ids) |
132 | + present_texts = self.texts.get_parent_map(missing_texts) |
133 | + missing_texts.difference_update(present_texts) |
134 | + if not missing_texts: |
135 | + # No texts are missing, so all revisions and their deltas are |
136 | + # reconstructable. |
137 | + return set() |
138 | + # Alternatively the text versions could be returned as the missing |
139 | + # keys, but this is likely to be less data. |
140 | missing_keys = set(('inventories', rev_id) for (rev_id,) in parents) |
141 | return missing_keys |
142 | |
143 | @@ -3993,6 +4017,7 @@ |
144 | def _locked_insert_stream(self, stream, src_format): |
145 | to_serializer = self.target_repo._format._serializer |
146 | src_serializer = src_format._serializer |
147 | + new_pack = None |
148 | if to_serializer == src_serializer: |
149 | # If serializers match and the target is a pack repository, set the |
150 | # write cache size on the new pack. This avoids poor performance |
151 | @@ -4039,6 +4064,11 @@ |
152 | self.target_repo.signatures.insert_record_stream(substream) |
153 | else: |
154 | raise AssertionError('kaboom! %s' % (substream_type,)) |
155 | + # Done inserting data, and the missing_keys calculations will try to |
156 | + # read back from the inserted data, so flush the writes to the new pack |
157 | + # (if this is pack format). |
158 | + if new_pack is not None: |
159 | + new_pack._write_data('', flush=True) |
160 | # Find all the new revisions (including ones from resume_tokens) |
161 | missing_keys = self.target_repo.get_missing_parent_inventories() |
162 | try: |
163 | |
164 | === modified file 'bzrlib/tests/per_repository/test_write_group.py' |
165 | --- bzrlib/tests/per_repository/test_write_group.py 2009-04-21 07:22:04 +0000 |
166 | +++ bzrlib/tests/per_repository/test_write_group.py 2009-05-12 02:35:10 +0000 |
167 | @@ -120,6 +120,8 @@ |
168 | if token is not None: |
169 | repo.leave_lock_in_place() |
170 | |
171 | +class TestGetMissingParentInventories(TestCaseWithRepository): |
172 | + |
173 | def test_empty_get_missing_parent_inventories(self): |
174 | """A new write group has no missing parent inventories.""" |
175 | repo = self.make_repository('.') |
176 | @@ -131,63 +133,50 @@ |
177 | repo.commit_write_group() |
178 | repo.unlock() |
179 | |
180 | - def test_get_missing_parent_inventories(self): |
181 | - # Make a trunk with one commit. |
182 | - if isinstance(self.repository_format, remote.RemoteRepositoryFormat): |
183 | - # RemoteRepository by default builds a default format real |
184 | - # repository, but the default format is unstackble. So explicitly |
185 | - # make a stackable real repository and use that. |
186 | - repo = self.make_repository('trunk', format='1.9') |
187 | - repo = bzrdir.BzrDir.open(self.get_url('trunk')).open_repository() |
188 | - else: |
189 | - repo = self.make_repository('trunk') |
190 | - if not repo._format.supports_external_lookups: |
191 | - raise TestNotApplicable('format not stackable') |
192 | - repo.bzrdir._format.set_branch_format(BzrBranchFormat7()) |
193 | + def branch_trunk_and_make_tree(self, trunk_repo, relpath): |
194 | + tree = self.make_branch_and_memory_tree('branch') |
195 | + trunk_repo.lock_read() |
196 | + self.addCleanup(trunk_repo.unlock) |
197 | + tree.branch.repository.fetch(trunk_repo, revision_id='rev-1') |
198 | + tree.set_parent_ids(['rev-1']) |
199 | + return tree |
200 | + |
201 | + def make_first_commit(self, repo): |
202 | trunk = repo.bzrdir.create_branch() |
203 | - trunk_repo = repo |
204 | tree = memorytree.MemoryTree.create_on_branch(trunk) |
205 | tree.lock_write() |
206 | - if repo._format.rich_root_data: |
207 | - # The tree needs a root |
208 | - tree._inventory.add(InventoryDirectory('the-root-id', '', None)) |
209 | + tree.add([''], ['TREE_ROOT'], ['directory']) |
210 | + tree.add(['dir'], ['dir-id'], ['directory']) |
211 | + tree.add(['filename'], ['file-id'], ['file']) |
212 | + tree.put_file_bytes_non_atomic('file-id', 'content\n') |
213 | + tree.commit('Trunk commit', rev_id='rev-0') |
214 | tree.commit('Trunk commit', rev_id='rev-1') |
215 | tree.unlock() |
216 | - # Branch the trunk, add a new commit. |
217 | - tree = self.make_branch_and_tree('branch') |
218 | - trunk_repo.lock_read() |
219 | - self.addCleanup(trunk_repo.unlock) |
220 | - tree.branch.repository.fetch(trunk_repo, revision_id='rev-1') |
221 | - tree.set_parent_ids(['rev-1']) |
222 | + |
223 | + def make_new_commit_in_new_repo(self, trunk_repo, parents=None): |
224 | + tree = self.branch_trunk_and_make_tree(trunk_repo, 'branch') |
225 | + tree.set_parent_ids(parents) |
226 | tree.commit('Branch commit', rev_id='rev-2') |
227 | branch_repo = tree.branch.repository |
228 | - # Make a new repo stacked on trunk, and copy the new commit's revision |
229 | - # and inventory records to it. |
230 | - if isinstance(self.repository_format, remote.RemoteRepositoryFormat): |
231 | - # RemoteRepository by default builds a default format real |
232 | - # repository, but the default format is unstackble. So explicitly |
233 | - # make a stackable real repository and use that. |
234 | - repo = self.make_repository('stacked', format='1.9') |
235 | - repo = bzrdir.BzrDir.open(self.get_url('stacked')).open_repository() |
236 | - else: |
237 | - repo = self.make_repository('stacked') |
238 | branch_repo.lock_read() |
239 | self.addCleanup(branch_repo.unlock) |
240 | - repo.add_fallback_repository(trunk.repository) |
241 | - repo.lock_write() |
242 | - repo.start_write_group() |
243 | - trunk_repo.lock_read() |
244 | - repo.inventories.insert_record_stream( |
245 | - branch_repo.inventories.get_record_stream( |
246 | - [('rev-2',)], 'unordered', False)) |
247 | - repo.revisions.insert_record_stream( |
248 | - branch_repo.revisions.get_record_stream( |
249 | - [('rev-2',)], 'unordered', False)) |
250 | - self.assertEqual( |
251 | - set([('inventories', 'rev-1')]), |
252 | - repo.get_missing_parent_inventories()) |
253 | - # Revisions from resumed write groups can also cause missing parent |
254 | - # inventories. |
255 | + return branch_repo |
256 | + |
257 | + def make_stackable_repo(self, relpath='trunk'): |
258 | + if isinstance(self.repository_format, remote.RemoteRepositoryFormat): |
259 | + # RemoteRepository by default builds a default format real |
260 | + # repository, but the default format is unstackble. So explicitly |
261 | + # make a stackable real repository and use that. |
262 | + repo = self.make_repository(relpath, format='1.9') |
263 | + repo = bzrdir.BzrDir.open(self.get_url(relpath)).open_repository() |
264 | + else: |
265 | + repo = self.make_repository(relpath) |
266 | + if not repo._format.supports_external_lookups: |
267 | + raise TestNotApplicable('format not stackable') |
268 | + repo.bzrdir._format.set_branch_format(BzrBranchFormat7()) |
269 | + return repo |
270 | + |
271 | + def reopen_repo_and_resume_write_group(self, repo): |
272 | try: |
273 | resume_tokens = repo.suspend_write_group() |
274 | except errors.UnsuspendableWriteGroup: |
275 | @@ -201,9 +190,93 @@ |
276 | reopened_repo.lock_write() |
277 | self.addCleanup(reopened_repo.unlock) |
278 | reopened_repo.resume_write_group(resume_tokens) |
279 | + return reopened_repo |
280 | + |
281 | + def test_ghost_revision(self): |
282 | + """A parent inventory may be absent if all the needed texts are present. |
283 | + i.e., a ghost revision isn't (necessarily) considered to be a missing |
284 | + parent inventory. |
285 | + """ |
286 | + # Make a trunk with one commit. |
287 | + trunk_repo = self.make_stackable_repo() |
288 | + self.make_first_commit(trunk_repo) |
289 | + trunk_repo.lock_read() |
290 | + self.addCleanup(trunk_repo.unlock) |
291 | + # Branch the trunk, add a new commit. |
292 | + branch_repo = self.make_new_commit_in_new_repo( |
293 | + trunk_repo, parents=['rev-1', 'ghost-rev']) |
294 | + inv = branch_repo.get_inventory('rev-2') |
295 | + # Make a new repo stacked on trunk, and then copy into it: |
296 | + # - all texts in rev-2 |
297 | + # - the new inventory (rev-2) |
298 | + # - the new revision (rev-2) |
299 | + repo = self.make_stackable_repo('stacked') |
300 | + repo.lock_write() |
301 | + repo.start_write_group() |
302 | + # Add all texts from in rev-2 inventory. Note that this has to exclude |
303 | + # the root if the repo format does not support rich roots. |
304 | + rich_root = branch_repo._format.rich_root_data |
305 | + all_texts = [ |
306 | + (ie.file_id, ie.revision) for ie in inv.iter_just_entries() |
307 | + if rich_root or inv.id2path(ie.file_id) != ''] |
308 | + repo.texts.insert_record_stream( |
309 | + branch_repo.texts.get_record_stream(all_texts, 'unordered', False)) |
310 | + # Add inventory and revision for rev-2. |
311 | + repo.add_inventory('rev-2', inv, ['rev-1', 'ghost-rev']) |
312 | + repo.revisions.insert_record_stream( |
313 | + branch_repo.revisions.get_record_stream( |
314 | + [('rev-2',)], 'unordered', False)) |
315 | + # Now, no inventories are reported as missing, even though there is a |
316 | + # ghost. |
317 | + self.assertEqual(set(), repo.get_missing_parent_inventories()) |
318 | + # Resuming the write group does not affect |
319 | + # get_missing_parent_inventories. |
320 | + reopened_repo = self.reopen_repo_and_resume_write_group(repo) |
321 | + self.assertEqual(set(), reopened_repo.get_missing_parent_inventories()) |
322 | + reopened_repo.abort_write_group() |
323 | + |
324 | + def test_get_missing_parent_inventories(self): |
325 | + """A stacked repo with a single revision and inventory (no parent |
326 | + inventory) in it must have all the texts in its inventory (even if not |
327 | + changed w.r.t. to the absent parent), otherwise it will report missing |
328 | + texts/parent inventory. |
329 | + |
330 | + The core of this test is that a file was changed in rev-1, but in a |
331 | + stacked repo that only has rev-2 |
332 | + """ |
333 | + # Make a trunk with one commit. |
334 | + trunk_repo = self.make_stackable_repo() |
335 | + self.make_first_commit(trunk_repo) |
336 | + trunk_repo.lock_read() |
337 | + self.addCleanup(trunk_repo.unlock) |
338 | + # Branch the trunk, add a new commit. |
339 | + branch_repo = self.make_new_commit_in_new_repo( |
340 | + trunk_repo, parents=['rev-1']) |
341 | + inv = branch_repo.get_inventory('rev-2') |
342 | + # Make a new repo stacked on trunk, and copy the new commit's revision |
343 | + # and inventory records to it. |
344 | + repo = self.make_stackable_repo('stacked') |
345 | + repo.lock_write() |
346 | + repo.start_write_group() |
347 | + # Insert a single fulltext inv (using add_inventory because it's |
348 | + # simpler than insert_record_stream) |
349 | + repo.add_inventory('rev-2', inv, ['rev-1']) |
350 | + repo.revisions.insert_record_stream( |
351 | + branch_repo.revisions.get_record_stream( |
352 | + [('rev-2',)], 'unordered', False)) |
353 | + # There should be no missing compression parents |
354 | + self.assertEqual(set(), |
355 | + repo.inventories.get_missing_compression_parent_keys()) |
356 | + self.assertEqual( |
357 | + set([('inventories', 'rev-1')]), |
358 | + repo.get_missing_parent_inventories()) |
359 | + # Resuming the write group does not affect |
360 | + # get_missing_parent_inventories. |
361 | + reopened_repo = self.reopen_repo_and_resume_write_group(repo) |
362 | self.assertEqual( |
363 | set([('inventories', 'rev-1')]), |
364 | reopened_repo.get_missing_parent_inventories()) |
365 | + # Adding the parent inventory satisfies get_missing_parent_inventories. |
366 | reopened_repo.inventories.insert_record_stream( |
367 | branch_repo.inventories.get_record_stream( |
368 | [('rev-1',)], 'unordered', False)) |
This branch fixes bug 368418.
It improves the check for missing parent inventories to pass so long as either:
- all parent inventories are present, or
- all texts referenced by the new inventories are present.
Either way, a usable revision delta for those revisions can be calculated, which
is what matters.