1
=== modified file 'NEWS'
2
--- NEWS	2009-08-31 00:25:33 +0000
3
+++ NEWS	2009-09-02 07:35:10 +0000
4
@@ -112,6 +112,7 @@
5
112
* ``bzr push`` locally on windows will no longer give a locking error with
112
* ``bzr push`` locally on windows will no longer give a locking error with
6
113
  dirstate based formats. (Robert Collins)
113
  dirstate based formats. (Robert Collins)
7
114
114
8
115
<<<<<<< TREE
9
115
* ``bzr shelve`` and ``bzr unshelve`` now work on windows.
116
* ``bzr shelve`` and ``bzr unshelve`` now work on windows.
10
116
  (Robert Collins, #305006)
117
  (Robert Collins, #305006)
11
117
118
12
@@ -126,6 +127,12 @@
13
126
  fixes a number of causes for ``InconsistentDelta`` errors, and permits
127
  fixes a number of causes for ``InconsistentDelta`` errors, and permits
14
127
  faster commit of specific paths. (Robert Collins, #347649)
128
  faster commit of specific paths. (Robert Collins, #347649)
15
128
129
16
130
=======
17
131
* ``groupcompress`` sort order is now more stable, rather than relying on
18
132
  ``topo_sort`` ordering. The implementation is now
19
133
  ``KnownGraph.gc_sort``. (John Arbash Meinel)
20
134
21
135
>>>>>>> MERGE-SOURCE
22
129
Documentation
136
Documentation
23
130
*************
137
*************
24
131
138
25
132
139
26
=== modified file 'bzrlib/_known_graph_py.py'
27
--- bzrlib/_known_graph_py.py	2009-08-17 20:41:26 +0000
28
+++ bzrlib/_known_graph_py.py	2009-09-02 07:35:10 +0000
29
@@ -97,6 +97,10 @@
30
97
        return [node for node in self._nodes.itervalues()
97
        return [node for node in self._nodes.itervalues()
31
98
                if not node.parent_keys]
98
                if not node.parent_keys]
32
99
99
33
100
    def _find_tips(self):
34
101
        return [node for node in self._nodes.itervalues()
35
102
                      if not node.child_keys]
36
103
37
100
    def _find_gdfo(self):
104
    def _find_gdfo(self):
38
101
        nodes = self._nodes
105
        nodes = self._nodes
39
102
        known_parent_gdfos = {}
106
        known_parent_gdfos = {}
40
@@ -218,6 +222,51 @@
41
218
        # We started from the parents, so we don't need to do anymore work
222
        # We started from the parents, so we don't need to do anymore work
42
219
        return topo_order
223
        return topo_order
43
220
224
44
225
    def gc_sort(self):
45
226
        """Return a reverse topological ordering which is 'stable'.
46
227
47
228
        There are a few constraints:
48
229
          1) Reverse topological (all children before all parents)
49
230
          2) Grouped by prefix
50
231
          3) 'stable' sorting, so that we get the same result, independent of
51
232
             machine, or extra data.
52
233
        To do this, we use the same basic algorithm as topo_sort, but when we
53
234
        aren't sure what node to access next, we sort them lexicographically.
54
235
        """
55
236
        tips = self._find_tips()
56
237
        # Split the tips based on prefix
57
238
        prefix_tips = {}
58
239
        for node in tips:
59
240
            if node.key.__class__ is str or len(node.key) == 1:
60
241
                prefix = ''
61
242
            else:
62
243
                prefix = node.key[0]
63
244
            prefix_tips.setdefault(prefix, []).append(node)
64
245
65
246
        num_seen_children = dict.fromkeys(self._nodes, 0)
66
247
67
248
        result = []
68
249
        for prefix in sorted(prefix_tips):
69
250
            pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
70
251
                             reverse=True)
71
252
            while pending:
72
253
                node = pending.pop()
73
254
                if node.parent_keys is None:
74
255
                    # Ghost node, skip it
75
256
                    continue
76
257
                result.append(node.key)
77
258
                for parent_key in sorted(node.parent_keys, reverse=True):
78
259
                    parent_node = self._nodes[parent_key]
79
260
                    seen_children = num_seen_children[parent_key] + 1
80
261
                    if seen_children == len(parent_node.child_keys):
81
262
                        # All children have been processed, enqueue this parent
82
263
                        pending.append(parent_node)
83
264
                        # This has been queued up, stop tracking it
84
265
                        del num_seen_children[parent_key]
85
266
                    else:
86
267
                        num_seen_children[parent_key] = seen_children
87
268
        return result
88
269
89
221
    def merge_sort(self, tip_key):
270
    def merge_sort(self, tip_key):
90
222
        """Compute the merge sorted graph output."""
271
        """Compute the merge sorted graph output."""
91
223
        from bzrlib import tsort
272
        from bzrlib import tsort
92
224
273
93
=== modified file 'bzrlib/_known_graph_pyx.pyx'
94
--- bzrlib/_known_graph_pyx.pyx	2009-08-26 16:03:59 +0000
95
+++ bzrlib/_known_graph_pyx.pyx	2009-09-02 07:35:10 +0000
96
@@ -25,11 +25,18 @@
97
25
    ctypedef struct PyObject:
25
    ctypedef struct PyObject:
98
26
        pass
26
        pass
99
27
27
100
28
    int PyString_CheckExact(object)
101
29
102
30
    int PyObject_RichCompareBool(object, object, int)
103
31
    int Py_LT
104
32
105
33
    int PyTuple_CheckExact(object)
106
28
    object PyTuple_New(Py_ssize_t n)
34
    object PyTuple_New(Py_ssize_t n)
107
29
    Py_ssize_t PyTuple_GET_SIZE(object t)
35
    Py_ssize_t PyTuple_GET_SIZE(object t)
108
30
    PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
36
    PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
109
31
    void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
37
    void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
110
32
38
111
39
    int PyList_CheckExact(object)
112
33
    Py_ssize_t PyList_GET_SIZE(object l)
40
    Py_ssize_t PyList_GET_SIZE(object l)
113
34
    PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
41
    PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
114
35
    int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
42
    int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
115
@@ -108,14 +115,65 @@
116
108
    return <_KnownGraphNode>temp_node
115
    return <_KnownGraphNode>temp_node
117
109
116
118
110
117
120
111
cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
118
cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
121
112
    cdef PyObject *temp_node
119
    cdef PyObject *temp_node
122
113
    cdef _KnownGraphNode node
123
114
120
125
115
    temp_node = PyTuple_GET_ITEM(parents, pos)
121
    temp_node = PyTuple_GET_ITEM(tpl, pos)
126
116
    return <_KnownGraphNode>temp_node
122
    return <_KnownGraphNode>temp_node
127
117
123
128
118
124
129
125
def get_key(node):
130
126
    cdef _KnownGraphNode real_node
131
127
    real_node = node
132
128
    return real_node.key
133
129
134
130
135
131
cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
136
132
    """Sort a list of _KnownGraphNode objects.
137
133
138
134
    If lst_or_tpl is a list, it is allowed to mutate in place. It may also
139
135
    just return the input list if everything is already sorted.
140
136
    """
141
137
    cdef _KnownGraphNode node1, node2
142
138
    cdef int do_swap, is_tuple
143
139
    cdef Py_ssize_t length
144
140
145
141
    is_tuple = PyTuple_CheckExact(lst_or_tpl)
146
142
    if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
147
143
        raise TypeError('lst_or_tpl must be a list or tuple.')
148
144
    length = len(lst_or_tpl)
149
145
    if length == 0 or length == 1:
150
146
        return lst_or_tpl
151
147
    if length == 2:
152
148
        if is_tuple:
153
149
            node1 = _get_tuple_node(lst_or_tpl, 0)
154
150
            node2 = _get_tuple_node(lst_or_tpl, 1)
155
151
        else:
156
152
            node1 = _get_list_node(lst_or_tpl, 0)
157
153
            node2 = _get_list_node(lst_or_tpl, 1)
158
154
        if reverse:
159
155
            do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
160
156
        else:
161
157
            do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
162
158
        if not do_swap:
163
159
            return lst_or_tpl
164
160
        if is_tuple:
165
161
            return (node2, node1)
166
162
        else:
167
163
            # Swap 'in-place', since lists are mutable
168
164
            Py_INCREF(node1)
169
165
            PyList_SetItem(lst_or_tpl, 1, node1)
170
166
            Py_INCREF(node2)
171
167
            PyList_SetItem(lst_or_tpl, 0, node2)
172
168
            return lst_or_tpl
173
169
    # For all other sizes, we just use 'sorted()'
174
170
    if is_tuple:
175
171
        # Note that sorted() is just list(iterable).sort()
176
172
        lst_or_tpl = list(lst_or_tpl)
177
173
    lst_or_tpl.sort(key=get_key, reverse=reverse)
178
174
    return lst_or_tpl
179
175
180
176
181
119
cdef class _MergeSorter
177
cdef class _MergeSorter
182
120
178
183
121
cdef class KnownGraph:
179
cdef class KnownGraph:
184
@@ -216,6 +274,19 @@
185
216
                PyList_Append(tails, node)
274
                PyList_Append(tails, node)
186
217
        return tails
275
        return tails
187
218
276
188
277
    def _find_tips(self):
189
278
        cdef PyObject *temp_node
190
279
        cdef _KnownGraphNode node
191
280
        cdef Py_ssize_t pos
192
281
193
282
        tips = []
194
283
        pos = 0
195
284
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
196
285
            node = <_KnownGraphNode>temp_node
197
286
            if PyList_GET_SIZE(node.children) == 0:
198
287
                PyList_Append(tips, node)
199
288
        return tips
200
289
201
219
    def _find_gdfo(self):
290
    def _find_gdfo(self):
202
220
        cdef _KnownGraphNode node
291
        cdef _KnownGraphNode node
203
221
        cdef _KnownGraphNode child
292
        cdef _KnownGraphNode child
204
@@ -322,7 +393,7 @@
205
322
                continue
393
                continue
206
323
            if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
394
            if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
207
324
                for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
395
                for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
209
325
                    parent_node = _get_parent(node.parents, pos)
396
                    parent_node = _get_tuple_node(node.parents, pos)
210
326
                    last_item = last_item + 1
397
                    last_item = last_item + 1
211
327
                    if last_item < PyList_GET_SIZE(pending):
398
                    if last_item < PyList_GET_SIZE(pending):
212
328
                        Py_INCREF(parent_node) # SetItem steals a ref
399
                        Py_INCREF(parent_node) # SetItem steals a ref
213
@@ -397,6 +468,77 @@
214
397
        # We started from the parents, so we don't need to do anymore work
468
        # We started from the parents, so we don't need to do anymore work
215
398
        return topo_order
469
        return topo_order
216
399
470
217
471
    def gc_sort(self):
218
472
        """Return a reverse topological ordering which is 'stable'.
219
473
220
474
        There are a few constraints:
221
475
          1) Reverse topological (all children before all parents)
222
476
          2) Grouped by prefix
223
477
          3) 'stable' sorting, so that we get the same result, independent of
224
478
             machine, or extra data.
225
479
        To do this, we use the same basic algorithm as topo_sort, but when we
226
480
        aren't sure what node to access next, we sort them lexicographically.
227
481
        """
228
482
        cdef PyObject *temp
229
483
        cdef Py_ssize_t pos, last_item
230
484
        cdef _KnownGraphNode node, node2, parent_node
231
485
232
486
        tips = self._find_tips()
233
487
        # Split the tips based on prefix
234
488
        prefix_tips = {}
235
489
        for pos from 0 <= pos < PyList_GET_SIZE(tips):
236
490
            node = _get_list_node(tips, pos)
237
491
            if PyString_CheckExact(node.key) or len(node.key) == 1:
238
492
                prefix = ''
239
493
            else:
240
494
                prefix = node.key[0]
241
495
            temp = PyDict_GetItem(prefix_tips, prefix)
242
496
            if temp == NULL:
243
497
                prefix_tips[prefix] = [node]
244
498
            else:
245
499
                tip_nodes = <object>temp
246
500
                PyList_Append(tip_nodes, node)
247
501
248
502
        result = []
249
503
        for prefix in sorted(prefix_tips):
250
504
            temp = PyDict_GetItem(prefix_tips, prefix)
251
505
            assert temp != NULL
252
506
            tip_nodes = <object>temp
253
507
            pending = _sort_list_nodes(tip_nodes, 1)
254
508
            last_item = PyList_GET_SIZE(pending) - 1
255
509
            while last_item >= 0:
256
510
                node = _get_list_node(pending, last_item)
257
511
                last_item = last_item - 1
258
512
                if node.parents is None:
259
513
                    # Ghost
260
514
                    continue
261
515
                PyList_Append(result, node.key)
262
516
                # Sorting the parent keys isn't strictly necessary for stable
263
517
                # sorting of a given graph. But it does help minimize the
264
518
                # differences between graphs
265
519
                # For bzr.dev ancestry:
266
520
                #   4.73ms  no sort
267
521
                #   7.73ms  RichCompareBool sort
268
522
                parents = _sort_list_nodes(node.parents, 1)
269
523
                for pos from 0 <= pos < len(parents):
270
524
                    if PyTuple_CheckExact(parents):
271
525
                        parent_node = _get_tuple_node(parents, pos)
272
526
                    else:
273
527
                        parent_node = _get_list_node(parents, pos)
274
528
                    # TODO: GraphCycle detection
275
529
                    parent_node.seen = parent_node.seen + 1
276
530
                    if (parent_node.seen
277
531
                        == PyList_GET_SIZE(parent_node.children)):
278
532
                        # All children have been processed, queue up this
279
533
                        # parent
280
534
                        last_item = last_item + 1
281
535
                        if last_item < PyList_GET_SIZE(pending):
282
536
                            Py_INCREF(parent_node) # SetItem steals a ref
283
537
                            PyList_SetItem(pending, last_item, parent_node)
284
538
                        else:
285
539
                            PyList_Append(pending, parent_node)
286
540
                        parent_node.seen = 0
287
541
        return result
288
400
542
289
401
    def merge_sort(self, tip_key):
543
    def merge_sort(self, tip_key):
290
402
        """Compute the merge sorted graph output."""
544
        """Compute the merge sorted graph output."""
291
@@ -522,7 +664,7 @@
292
522
            raise RuntimeError('ghost nodes should not be pushed'
664
            raise RuntimeError('ghost nodes should not be pushed'
293
523
                               ' onto the stack: %s' % (node,))
665
                               ' onto the stack: %s' % (node,))
294
524
        if PyTuple_GET_SIZE(node.parents) > 0:
666
        if PyTuple_GET_SIZE(node.parents) > 0:
296
525
            parent_node = _get_parent(node.parents, 0)
667
            parent_node = _get_tuple_node(node.parents, 0)
297
526
            ms_node.left_parent = parent_node
668
            ms_node.left_parent = parent_node
298
527
            if parent_node.parents is None: # left-hand ghost
669
            if parent_node.parents is None: # left-hand ghost
299
528
                ms_node.left_pending_parent = None
670
                ms_node.left_pending_parent = None
300
@@ -532,7 +674,7 @@
301
532
        if PyTuple_GET_SIZE(node.parents) > 1:
674
        if PyTuple_GET_SIZE(node.parents) > 1:
302
533
            ms_node.pending_parents = []
675
            ms_node.pending_parents = []
303
534
            for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
676
            for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
305
535
                parent_node = _get_parent(node.parents, pos)
677
                parent_node = _get_tuple_node(node.parents, pos)
306
536
                if parent_node.parents is None: # ghost
678
                if parent_node.parents is None: # ghost
307
537
                    continue
679
                    continue
308
538
                PyList_Append(ms_node.pending_parents, parent_node)
680
                PyList_Append(ms_node.pending_parents, parent_node)
309
539
681
310
=== modified file 'bzrlib/tests/test__known_graph.py'
311
--- bzrlib/tests/test__known_graph.py	2009-08-26 16:03:59 +0000
312
+++ bzrlib/tests/test__known_graph.py	2009-09-02 07:35:10 +0000
313
@@ -768,3 +768,70 @@
314
768
                },
768
                },
315
769
                'E',
769
                'E',
316
770
                [])
770
                [])
317
771
318
772
319
773
class TestKnownGraphStableReverseTopoSort(TestCaseWithKnownGraph):
320
774
    """Test the sort order returned by gc_sort."""
321
775
322
776
    def assertSorted(self, expected, parent_map):
323
777
        graph = self.make_known_graph(parent_map)
324
778
        value = graph.gc_sort()
325
779
        if expected != value:
326
780
            self.assertEqualDiff(pprint.pformat(expected),
327
781
                                 pprint.pformat(value))
328
782
329
783
    def test_empty(self):
330
784
        self.assertSorted([], {})
331
785
332
786
    def test_single(self):
333
787
        self.assertSorted(['a'], {'a':()})
334
788
        self.assertSorted([('a',)], {('a',):()})
335
789
        self.assertSorted([('F', 'a')], {('F', 'a'):()})
336
790
337
791
    def test_linear(self):
338
792
        self.assertSorted(['c', 'b', 'a'], {'a':(), 'b':('a',), 'c':('b',)})
339
793
        self.assertSorted([('c',), ('b',), ('a',)],
340
794
                          {('a',):(), ('b',): (('a',),), ('c',): (('b',),)})
341
795
        self.assertSorted([('F', 'c'), ('F', 'b'), ('F', 'a')],
342
796
                          {('F', 'a'):(), ('F', 'b'): (('F', 'a'),),
343
797
                           ('F', 'c'): (('F', 'b'),)})
344
798
345
799
    def test_mixed_ancestries(self):
346
800
        # Each prefix should be sorted separately
347
801
        self.assertSorted([('F', 'c'), ('F', 'b'), ('F', 'a'),
348
802
                           ('G', 'c'), ('G', 'b'), ('G', 'a'),
349
803
                           ('Q', 'c'), ('Q', 'b'), ('Q', 'a'),
350
804
                          ],
351
805
                          {('F', 'a'):(), ('F', 'b'): (('F', 'a'),),
352
806
                           ('F', 'c'): (('F', 'b'),),
353
807
                           ('G', 'a'):(), ('G', 'b'): (('G', 'a'),),
354
808
                           ('G', 'c'): (('G', 'b'),),
355
809
                           ('Q', 'a'):(), ('Q', 'b'): (('Q', 'a'),),
356
810
                           ('Q', 'c'): (('Q', 'b'),),
357
811
                          })
358
812
359
813
    def test_stable_sorting(self):
360
814
        # the sort order should be stable even when extra nodes are added
361
815
        self.assertSorted(['b', 'c', 'a'],
362
816
                          {'a':(), 'b':('a',), 'c':('a',)})
363
817
        self.assertSorted(['b', 'c', 'd', 'a'],
364
818
                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
365
819
        self.assertSorted(['b', 'c', 'd', 'a'],
366
820
                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
367
821
        self.assertSorted(['Z', 'b', 'c', 'd', 'a'],
368
822
                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
369
823
                           'Z':('a',)})
370
824
        self.assertSorted(['e', 'b', 'c', 'f', 'Z', 'd', 'a'],
371
825
                          {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
372
826
                           'Z':('a',),
373
827
                           'e':('b', 'c', 'd'),
374
828
                           'f':('d', 'Z'),
375
829
                           })
376
830
377
831
    def test_skip_ghost(self):
378
832
        self.assertSorted(['b', 'c', 'a'],
379
833
                          {'a':(), 'b':('a', 'ghost'), 'c':('a',)})
380
834
381
835
    def test_skip_mainline_ghost(self):
382
836
        self.assertSorted(['b', 'c', 'a'],
383
837
                          {'a':(), 'b':('ghost', 'a'), 'c':('a',)})
Status:	Merged
Merged at revision:	not available
Proposed branch:	lp:~jameinel/bzr/2.0b1-stable-groupcompress-order
Merge into:	lp:bzr/2.0
Diff against target:	383 lines (has conflicts) Text conflict in NEWS
To merge this branch:	bzr merge lp:~jameinel/bzr/2.0b1-stable-groupcompress-order
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
bzr-core		2009-09-02	Pending
Review via email: mp+11039@code.launchpad.net