1
=== modified file 'bzrlib/knit.py'
2
--- bzrlib/knit.py	2009-04-27 23:14:00 +0000
3
+++ bzrlib/knit.py	2009-05-12 02:35:10 +0000
4
@@ -2688,6 +2688,44 @@
5
2688
        return key[:-1], key[-1]
2688
        return key[:-1], key[-1]
6
2689
2689
7
2690
2690
8
2691
class _KeyRefs(object):
9
2692
10
2693
    def __init__(self):
11
2694
        # dict mapping 'key' to 'set of keys referring to that key'
12
2695
        self.refs = {}
13
2696
14
2697
    def add_references(self, key, refs):
15
2698
        # Record the new references
16
2699
        for referenced in refs:
17
2700
            try:
18
2701
                needed_by = self.refs[referenced]
19
2702
            except KeyError:
20
2703
                needed_by = self.refs[referenced] = set()
21
2704
            needed_by.add(key)
22
2705
        # Discard references satisfied by the new key
23
2706
        self.add_key(key)
24
2707
25
2708
    def get_unsatisfied_refs(self):
26
2709
        return self.refs.iterkeys()
27
2710
28
2711
    def add_key(self, key):
29
2712
        try:
30
2713
            del self.refs[key]
31
2714
        except KeyError:
32
2715
            # No keys depended on this key.  That's ok.
33
2716
            pass
34
2717
35
2718
    def add_keys(self, keys):
36
2719
        for key in keys:
37
2720
            self.add_key(key)
38
2721
39
2722
    def get_referrers(self):
40
2723
        result = set()
41
2724
        for referrers in self.refs.itervalues():
42
2725
            result.update(referrers)
43
2726
        return result
44
2727
45
2728
46
2691
class _KnitGraphIndex(object):
2729
class _KnitGraphIndex(object):
47
2692
    """A KnitVersionedFiles index layered on GraphIndex."""
2730
    """A KnitVersionedFiles index layered on GraphIndex."""
48
2693
2731
49
@@ -2723,9 +2761,9 @@
50
2723
        self._is_locked = is_locked
2761
        self._is_locked = is_locked
51
2724
        self._missing_compression_parents = set()
2762
        self._missing_compression_parents = set()
52
2725
        if track_external_parent_refs:
2763
        if track_external_parent_refs:
54
2726
            self._external_parent_refs = set()
2764
            self._key_dependencies = _KeyRefs()
55
2727
        else:
2765
        else:
57
2728
            self._external_parent_refs = None
2766
            self._key_dependencies = None
58
2729
2767
59
2730
    def __repr__(self):
2768
    def __repr__(self):
60
2731
        return "%s(%r)" % (self.__class__.__name__, self._graph_index)
2769
        return "%s(%r)" % (self.__class__.__name__, self._graph_index)
61
@@ -2755,13 +2793,12 @@
62
2755
2793
63
2756
        keys = {}
2794
        keys = {}
64
2757
        compression_parents = set()
2795
        compression_parents = set()
66
2758
        parent_refs = self._external_parent_refs
2796
        key_dependencies = self._key_dependencies
67
2759
        for (key, options, access_memo, parents) in records:
2797
        for (key, options, access_memo, parents) in records:
68
2760
            if self._parents:
2798
            if self._parents:
69
2761
                parents = tuple(parents)
2799
                parents = tuple(parents)
73
2762
                if parent_refs is not None:
2800
                if key_dependencies is not None:
74
2763
                    parent_refs.update(parents)
2801
                    key_dependencies.add_references(key, parents)
72
2764
                    parent_refs.discard(key)
75
2765
            index, pos, size = access_memo
2802
            index, pos, size = access_memo
76
2766
            if 'no-eol' in options:
2803
            if 'no-eol' in options:
77
2767
                value = 'N'
2804
                value = 'N'
78
@@ -2829,12 +2866,11 @@
79
2829
            new_missing = graph_index.external_references(ref_list_num=1)
2866
            new_missing = graph_index.external_references(ref_list_num=1)
80
2830
            new_missing.difference_update(self.get_parent_map(new_missing))
2867
            new_missing.difference_update(self.get_parent_map(new_missing))
81
2831
            self._missing_compression_parents.update(new_missing)
2868
            self._missing_compression_parents.update(new_missing)
83
2832
        if self._external_parent_refs is not None:
2869
        if self._key_dependencies is not None:
84
2833
            # Add parent refs from graph_index (and discard parent refs that
2870
            # Add parent refs from graph_index (and discard parent refs that
85
2834
            # the graph_index has).
2871
            # the graph_index has).
86
2835
            for node in graph_index.iter_all_entries():
2872
            for node in graph_index.iter_all_entries():
89
2836
                self._external_parent_refs.update(node[3][0])
2873
                self._key_dependencies.add_references(node[1], node[3][0])
88
2837
                self._external_parent_refs.discard(node[1])
90
2838
2874
91
2839
    def get_missing_compression_parents(self):
2875
    def get_missing_compression_parents(self):
92
2840
        """Return the keys of missing compression parents.
2876
        """Return the keys of missing compression parents.
93
@@ -2847,9 +2883,9 @@
94
2847
    def get_missing_parents(self):
2883
    def get_missing_parents(self):
95
2848
        """Return the keys of missing parents."""
2884
        """Return the keys of missing parents."""
96
2849
        # We may have false positives, so filter those out.
2885
        # We may have false positives, so filter those out.
100
2850
        self._external_parent_refs.difference_update(
2886
        self._key_dependencies.add_keys(
101
2851
            self.get_parent_map(self._external_parent_refs))
2887
            self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))
102
2852
        return frozenset(self._external_parent_refs)
2888
        return frozenset(self._key_dependencies.get_unsatisfied_refs())
103
2853
2889
104
2854
    def _check_read(self):
2890
    def _check_read(self):
105
2855
        """raise if reads are not permitted."""
2891
        """raise if reads are not permitted."""
106
2856
2892
107
=== modified file 'bzrlib/repository.py'
108
--- bzrlib/repository.py	2009-05-07 05:08:46 +0000
109
+++ bzrlib/repository.py	2009-05-12 02:35:10 +0000
110
@@ -1448,7 +1448,31 @@
111
1448
        unstacked_inventories = self.inventories._index
1448
        unstacked_inventories = self.inventories._index
112
1449
        present_inventories = unstacked_inventories.get_parent_map(
1449
        present_inventories = unstacked_inventories.get_parent_map(
113
1450
            key[-1:] for key in parents)
1450
            key[-1:] for key in parents)
115
1451
        parents.difference_update(present_inventories)
1451
        if len(parents.difference(present_inventories)) == 0:
116
1452
            # No missing parent inventories.
117
1453
            return set()
118
1454
        # Ok, now we have a list of missing inventories.  But these only matter
119
1455
        # if the inventories that reference them are missing some texts they
120
1456
        # appear to introduce.
121
1457
        # XXX: Texts referenced by all added inventories need to be present,
122
1458
        # but at the moment we're only checking for texts referenced by
123
1459
        # inventories at the graph's edge.
124
1460
        key_deps = self.revisions._index._key_dependencies
125
1461
        key_deps.add_keys(present_inventories)
126
1462
        referrers = frozenset(r[0] for r in key_deps.get_referrers())
127
1463
        file_ids = self.fileids_altered_by_revision_ids(referrers)
128
1464
        missing_texts = set()
129
1465
        for file_id, version_ids in file_ids.iteritems():
130
1466
            missing_texts.update(
131
1467
                (file_id, version_id) for version_id in version_ids)
132
1468
        present_texts = self.texts.get_parent_map(missing_texts)
133
1469
        missing_texts.difference_update(present_texts)
134
1470
        if not missing_texts:
135
1471
            # No texts are missing, so all revisions and their deltas are
136
1472
            # reconstructable.
137
1473
            return set()
138
1474
        # Alternatively the text versions could be returned as the missing
139
1475
        # keys, but this is likely to be less data.
140
1452
        missing_keys = set(('inventories', rev_id) for (rev_id,) in parents)
1476
        missing_keys = set(('inventories', rev_id) for (rev_id,) in parents)
141
1453
        return missing_keys
1477
        return missing_keys
142
1454
1478
143
@@ -3993,6 +4017,7 @@
144
3993
    def _locked_insert_stream(self, stream, src_format):
4017
    def _locked_insert_stream(self, stream, src_format):
145
3994
        to_serializer = self.target_repo._format._serializer
4018
        to_serializer = self.target_repo._format._serializer
146
3995
        src_serializer = src_format._serializer
4019
        src_serializer = src_format._serializer
147
4020
        new_pack = None
148
3996
        if to_serializer == src_serializer:
4021
        if to_serializer == src_serializer:
149
3997
            # If serializers match and the target is a pack repository, set the
4022
            # If serializers match and the target is a pack repository, set the
150
3998
            # write cache size on the new pack.  This avoids poor performance
4023
            # write cache size on the new pack.  This avoids poor performance
151
@@ -4039,6 +4064,11 @@
152
4039
                self.target_repo.signatures.insert_record_stream(substream)
4064
                self.target_repo.signatures.insert_record_stream(substream)
153
4040
            else:
4065
            else:
154
4041
                raise AssertionError('kaboom! %s' % (substream_type,))
4066
                raise AssertionError('kaboom! %s' % (substream_type,))
155
4067
        # Done inserting data, and the missing_keys calculations will try to
156
4068
        # read back from the inserted data, so flush the writes to the new pack
157
4069
        # (if this is pack format).
158
4070
        if new_pack is not None:
159
4071
            new_pack._write_data('', flush=True)
160
4042
        # Find all the new revisions (including ones from resume_tokens)
4072
        # Find all the new revisions (including ones from resume_tokens)
161
4043
        missing_keys = self.target_repo.get_missing_parent_inventories()
4073
        missing_keys = self.target_repo.get_missing_parent_inventories()
162
4044
        try:
4074
        try:
163
4045
4075
164
=== modified file 'bzrlib/tests/per_repository/test_write_group.py'
165
--- bzrlib/tests/per_repository/test_write_group.py	2009-04-21 07:22:04 +0000
166
+++ bzrlib/tests/per_repository/test_write_group.py	2009-05-12 02:35:10 +0000
167
@@ -120,6 +120,8 @@
168
120
        if token is not None:
120
        if token is not None:
169
121
            repo.leave_lock_in_place()
121
            repo.leave_lock_in_place()
170
122
122
171
123
class TestGetMissingParentInventories(TestCaseWithRepository):
172
124
173
123
    def test_empty_get_missing_parent_inventories(self):
125
    def test_empty_get_missing_parent_inventories(self):
174
124
        """A new write group has no missing parent inventories."""
126
        """A new write group has no missing parent inventories."""
175
125
        repo = self.make_repository('.')
127
        repo = self.make_repository('.')
176
@@ -131,63 +133,50 @@
177
131
            repo.commit_write_group()
133
            repo.commit_write_group()
178
132
            repo.unlock()
134
            repo.unlock()
179
133
135
193
134
    def test_get_missing_parent_inventories(self):
136
    def branch_trunk_and_make_tree(self, trunk_repo, relpath):
194
135
        # Make a trunk with one commit.
137
        tree = self.make_branch_and_memory_tree('branch')
195
136
        if isinstance(self.repository_format, remote.RemoteRepositoryFormat):
138
        trunk_repo.lock_read()
196
137
            # RemoteRepository by default builds a default format real
139
        self.addCleanup(trunk_repo.unlock)
197
138
            # repository, but the default format is unstackble.  So explicitly
140
        tree.branch.repository.fetch(trunk_repo, revision_id='rev-1')
198
139
            # make a stackable real repository and use that.
141
        tree.set_parent_ids(['rev-1'])
199
140
            repo = self.make_repository('trunk', format='1.9')
142
        return tree 
200
141
            repo = bzrdir.BzrDir.open(self.get_url('trunk')).open_repository()
143
201
142
        else:
144
    def make_first_commit(self, repo):
189
143
            repo = self.make_repository('trunk')
190
144
        if not repo._format.supports_external_lookups:
191
145
            raise TestNotApplicable('format not stackable')
192
146
        repo.bzrdir._format.set_branch_format(BzrBranchFormat7())
202
147
        trunk = repo.bzrdir.create_branch()
145
        trunk = repo.bzrdir.create_branch()
203
148
        trunk_repo = repo
204
149
        tree = memorytree.MemoryTree.create_on_branch(trunk)
146
        tree = memorytree.MemoryTree.create_on_branch(trunk)
205
150
        tree.lock_write()
147
        tree.lock_write()
209
151
        if repo._format.rich_root_data:
148
        tree.add([''], ['TREE_ROOT'], ['directory'])
210
152
            # The tree needs a root
149
        tree.add(['dir'], ['dir-id'], ['directory'])
211
153
            tree._inventory.add(InventoryDirectory('the-root-id', '', None))
150
        tree.add(['filename'], ['file-id'], ['file'])
212
151
        tree.put_file_bytes_non_atomic('file-id', 'content\n')
213
152
        tree.commit('Trunk commit', rev_id='rev-0')
214
154
        tree.commit('Trunk commit', rev_id='rev-1')
153
        tree.commit('Trunk commit', rev_id='rev-1')
215
155
        tree.unlock()
154
        tree.unlock()
222
156
        # Branch the trunk, add a new commit.
155
223
157
        tree = self.make_branch_and_tree('branch')
156
    def make_new_commit_in_new_repo(self, trunk_repo, parents=None):
224
158
        trunk_repo.lock_read()
157
        tree = self.branch_trunk_and_make_tree(trunk_repo, 'branch')
225
159
        self.addCleanup(trunk_repo.unlock)
158
        tree.set_parent_ids(parents)
220
160
        tree.branch.repository.fetch(trunk_repo, revision_id='rev-1')
221
161
        tree.set_parent_ids(['rev-1'])
226
162
        tree.commit('Branch commit', rev_id='rev-2')
159
        tree.commit('Branch commit', rev_id='rev-2')
227
163
        branch_repo = tree.branch.repository
160
        branch_repo = tree.branch.repository
228
164
        # Make a new repo stacked on trunk, and copy the new commit's revision
229
165
        # and inventory records to it.
230
166
        if isinstance(self.repository_format, remote.RemoteRepositoryFormat):
231
167
            # RemoteRepository by default builds a default format real
232
168
            # repository, but the default format is unstackble.  So explicitly
233
169
            # make a stackable real repository and use that.
234
170
            repo = self.make_repository('stacked', format='1.9')
235
171
            repo = bzrdir.BzrDir.open(self.get_url('stacked')).open_repository()
236
172
        else:
237
173
            repo = self.make_repository('stacked')
238
174
        branch_repo.lock_read()
161
        branch_repo.lock_read()
239
175
        self.addCleanup(branch_repo.unlock)
162
        self.addCleanup(branch_repo.unlock)
255
176
        repo.add_fallback_repository(trunk.repository)
163
        return branch_repo
256
177
        repo.lock_write()
164
257
178
        repo.start_write_group()
165
    def make_stackable_repo(self, relpath='trunk'):
258
179
        trunk_repo.lock_read()
166
        if isinstance(self.repository_format, remote.RemoteRepositoryFormat):
259
180
        repo.inventories.insert_record_stream(
167
            # RemoteRepository by default builds a default format real
260
181
            branch_repo.inventories.get_record_stream(
168
            # repository, but the default format is unstackble.  So explicitly
261
182
                [('rev-2',)], 'unordered', False))
169
            # make a stackable real repository and use that.
262
183
        repo.revisions.insert_record_stream(
170
            repo = self.make_repository(relpath, format='1.9')
263
184
            branch_repo.revisions.get_record_stream(
171
            repo = bzrdir.BzrDir.open(self.get_url(relpath)).open_repository()
264
185
                [('rev-2',)], 'unordered', False))
172
        else:
265
186
        self.assertEqual(
173
            repo = self.make_repository(relpath)
266
187
            set([('inventories', 'rev-1')]),
174
        if not repo._format.supports_external_lookups:
267
188
            repo.get_missing_parent_inventories())
175
            raise TestNotApplicable('format not stackable')
268
189
        # Revisions from resumed write groups can also cause missing parent
176
        repo.bzrdir._format.set_branch_format(BzrBranchFormat7())
269
190
        # inventories.
177
        return repo
270
178
271
179
    def reopen_repo_and_resume_write_group(self, repo):
272
191
        try:
180
        try:
273
192
            resume_tokens = repo.suspend_write_group()
181
            resume_tokens = repo.suspend_write_group()
274
193
        except errors.UnsuspendableWriteGroup:
182
        except errors.UnsuspendableWriteGroup:
275
@@ -201,9 +190,93 @@
276
201
        reopened_repo.lock_write()
190
        reopened_repo.lock_write()
277
202
        self.addCleanup(reopened_repo.unlock)
191
        self.addCleanup(reopened_repo.unlock)
278
203
        reopened_repo.resume_write_group(resume_tokens)
192
        reopened_repo.resume_write_group(resume_tokens)
279
193
        return reopened_repo
280
194
281
195
    def test_ghost_revision(self):
282
196
        """A parent inventory may be absent if all the needed texts are present.
283
197
        i.e., a ghost revision isn't (necessarily) considered to be a missing
284
198
        parent inventory.
285
199
        """
286
200
        # Make a trunk with one commit.
287
201
        trunk_repo = self.make_stackable_repo()
288
202
        self.make_first_commit(trunk_repo)
289
203
        trunk_repo.lock_read()
290
204
        self.addCleanup(trunk_repo.unlock)
291
205
        # Branch the trunk, add a new commit.
292
206
        branch_repo = self.make_new_commit_in_new_repo(
293
207
            trunk_repo, parents=['rev-1', 'ghost-rev'])
294
208
        inv = branch_repo.get_inventory('rev-2')
295
209
        # Make a new repo stacked on trunk, and then copy into it:
296
210
        #  - all texts in rev-2
297
211
        #  - the new inventory (rev-2)
298
212
        #  - the new revision (rev-2)
299
213
        repo = self.make_stackable_repo('stacked')
300
214
        repo.lock_write()
301
215
        repo.start_write_group()
302
216
        # Add all texts from in rev-2 inventory.  Note that this has to exclude
303
217
        # the root if the repo format does not support rich roots.
304
218
        rich_root = branch_repo._format.rich_root_data
305
219
        all_texts = [
306
220
            (ie.file_id, ie.revision) for ie in inv.iter_just_entries()
307
221
             if rich_root or inv.id2path(ie.file_id) != '']
308
222
        repo.texts.insert_record_stream(
309
223
            branch_repo.texts.get_record_stream(all_texts, 'unordered', False))
310
224
        # Add inventory and revision for rev-2.
311
225
        repo.add_inventory('rev-2', inv, ['rev-1', 'ghost-rev'])
312
226
        repo.revisions.insert_record_stream(
313
227
            branch_repo.revisions.get_record_stream(
314
228
                [('rev-2',)], 'unordered', False))
315
229
        # Now, no inventories are reported as missing, even though there is a
316
230
        # ghost.
317
231
        self.assertEqual(set(), repo.get_missing_parent_inventories())
318
232
        # Resuming the write group does not affect
319
233
        # get_missing_parent_inventories.
320
234
        reopened_repo = self.reopen_repo_and_resume_write_group(repo)
321
235
        self.assertEqual(set(), reopened_repo.get_missing_parent_inventories())
322
236
        reopened_repo.abort_write_group()
323
237
324
238
    def test_get_missing_parent_inventories(self):
325
239
        """A stacked repo with a single revision and inventory (no parent
326
240
        inventory) in it must have all the texts in its inventory (even if not
327
241
        changed w.r.t. to the absent parent), otherwise it will report missing
328
242
        texts/parent inventory.
329
243
        
330
244
        The core of this test is that a file was changed in rev-1, but in a
331
245
        stacked repo that only has rev-2 
332
246
        """
333
247
        # Make a trunk with one commit.
334
248
        trunk_repo = self.make_stackable_repo()
335
249
        self.make_first_commit(trunk_repo)
336
250
        trunk_repo.lock_read()
337
251
        self.addCleanup(trunk_repo.unlock)
338
252
        # Branch the trunk, add a new commit.
339
253
        branch_repo = self.make_new_commit_in_new_repo(
340
254
            trunk_repo, parents=['rev-1'])
341
255
        inv = branch_repo.get_inventory('rev-2')
342
256
        # Make a new repo stacked on trunk, and copy the new commit's revision
343
257
        # and inventory records to it.
344
258
        repo = self.make_stackable_repo('stacked')
345
259
        repo.lock_write()
346
260
        repo.start_write_group()
347
261
        # Insert a single fulltext inv (using add_inventory because it's
348
262
        # simpler than insert_record_stream)
349
263
        repo.add_inventory('rev-2', inv, ['rev-1'])
350
264
        repo.revisions.insert_record_stream(
351
265
            branch_repo.revisions.get_record_stream(
352
266
                [('rev-2',)], 'unordered', False))
353
267
        # There should be no missing compression parents
354
268
        self.assertEqual(set(),
355
269
                repo.inventories.get_missing_compression_parent_keys())
356
270
        self.assertEqual(
357
271
            set([('inventories', 'rev-1')]),
358
272
            repo.get_missing_parent_inventories())
359
273
        # Resuming the write group does not affect
360
274
        # get_missing_parent_inventories.
361
275
        reopened_repo = self.reopen_repo_and_resume_write_group(repo)
362
204
        self.assertEqual(
276
        self.assertEqual(
363
205
            set([('inventories', 'rev-1')]),
277
            set([('inventories', 'rev-1')]),
364
206
            reopened_repo.get_missing_parent_inventories())
278
            reopened_repo.get_missing_parent_inventories())
365
279
        # Adding the parent inventory satisfies get_missing_parent_inventories.
366
207
        reopened_repo.inventories.insert_record_stream(
280
        reopened_repo.inventories.insert_record_stream(
367
208
            branch_repo.inventories.get_record_stream(
281
            branch_repo.inventories.get_record_stream(
368
209
                [('rev-1',)], 'unordered', False))
282
                [('rev-1',)], 'unordered', False))
Reviewer	Review Type	Date Requested	Status
Robert Collins (community)		2009-05-11	Approve on 2009-05-12
Review via email: mp+6445@code.launchpad.net