Merge lp:~jameinel/bzr/2.0b1-stable-groupcompress-order into lp:bzr/2.0

Proposed by Vincent Ladeuil
Status: Merged
Merged at revision: not available
Proposed branch: lp:~jameinel/bzr/2.0b1-stable-groupcompress-order
Merge into: lp:bzr/2.0
Diff against target: 383 lines (has conflicts)
Text conflict in NEWS
To merge this branch: bzr merge lp:~jameinel/bzr/2.0b1-stable-groupcompress-order
Reviewer Review Type Date Requested Status
bzr-core Pending
Review via email: mp+11039@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Vincent Ladeuil (vila) wrote :

I think it's important to have that patch merged in the 2.0 branch.

It allows tweaking the gc order.

Revision history for this message
Vincent Ladeuil (vila) wrote :

Since I made the merge proposal on behalf of John, I'm unclear about whether I should vote myself or if the proposal is enough to indicate that.

Anyway, if nobody objects, John, please merge.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'NEWS'
--- NEWS 2009-08-31 00:25:33 +0000
+++ NEWS 2009-09-02 07:35:10 +0000
@@ -112,6 +112,7 @@
112* ``bzr push`` locally on windows will no longer give a locking error with112* ``bzr push`` locally on windows will no longer give a locking error with
113 dirstate based formats. (Robert Collins)113 dirstate based formats. (Robert Collins)
114114
115<<<<<<< TREE
115* ``bzr shelve`` and ``bzr unshelve`` now work on windows.116* ``bzr shelve`` and ``bzr unshelve`` now work on windows.
116 (Robert Collins, #305006)117 (Robert Collins, #305006)
117118
@@ -126,6 +127,12 @@
126 fixes a number of causes for ``InconsistentDelta`` errors, and permits127 fixes a number of causes for ``InconsistentDelta`` errors, and permits
127 faster commit of specific paths. (Robert Collins, #347649)128 faster commit of specific paths. (Robert Collins, #347649)
128129
130=======
131* ``groupcompress`` sort order is now more stable, rather than relying on
132 ``topo_sort`` ordering. The implementation is now
133 ``KnownGraph.gc_sort``. (John Arbash Meinel)
134
135>>>>>>> MERGE-SOURCE
129Documentation136Documentation
130*************137*************
131138
132139
=== modified file 'bzrlib/_known_graph_py.py'
--- bzrlib/_known_graph_py.py 2009-08-17 20:41:26 +0000
+++ bzrlib/_known_graph_py.py 2009-09-02 07:35:10 +0000
@@ -97,6 +97,10 @@
97 return [node for node in self._nodes.itervalues()97 return [node for node in self._nodes.itervalues()
98 if not node.parent_keys]98 if not node.parent_keys]
9999
100 def _find_tips(self):
101 return [node for node in self._nodes.itervalues()
102 if not node.child_keys]
103
100 def _find_gdfo(self):104 def _find_gdfo(self):
101 nodes = self._nodes105 nodes = self._nodes
102 known_parent_gdfos = {}106 known_parent_gdfos = {}
@@ -218,6 +222,51 @@
218 # We started from the parents, so we don't need to do anymore work222 # We started from the parents, so we don't need to do anymore work
219 return topo_order223 return topo_order
220224
225 def gc_sort(self):
226 """Return a reverse topological ordering which is 'stable'.
227
228 There are a few constraints:
229 1) Reverse topological (all children before all parents)
230 2) Grouped by prefix
231 3) 'stable' sorting, so that we get the same result, independent of
232 machine, or extra data.
233 To do this, we use the same basic algorithm as topo_sort, but when we
234 aren't sure what node to access next, we sort them lexicographically.
235 """
236 tips = self._find_tips()
237 # Split the tips based on prefix
238 prefix_tips = {}
239 for node in tips:
240 if node.key.__class__ is str or len(node.key) == 1:
241 prefix = ''
242 else:
243 prefix = node.key[0]
244 prefix_tips.setdefault(prefix, []).append(node)
245
246 num_seen_children = dict.fromkeys(self._nodes, 0)
247
248 result = []
249 for prefix in sorted(prefix_tips):
250 pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
251 reverse=True)
252 while pending:
253 node = pending.pop()
254 if node.parent_keys is None:
255 # Ghost node, skip it
256 continue
257 result.append(node.key)
258 for parent_key in sorted(node.parent_keys, reverse=True):
259 parent_node = self._nodes[parent_key]
260 seen_children = num_seen_children[parent_key] + 1
261 if seen_children == len(parent_node.child_keys):
262 # All children have been processed, enqueue this parent
263 pending.append(parent_node)
264 # This has been queued up, stop tracking it
265 del num_seen_children[parent_key]
266 else:
267 num_seen_children[parent_key] = seen_children
268 return result
269
221 def merge_sort(self, tip_key):270 def merge_sort(self, tip_key):
222 """Compute the merge sorted graph output."""271 """Compute the merge sorted graph output."""
223 from bzrlib import tsort272 from bzrlib import tsort
224273
=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- bzrlib/_known_graph_pyx.pyx 2009-08-26 16:03:59 +0000
+++ bzrlib/_known_graph_pyx.pyx 2009-09-02 07:35:10 +0000
@@ -25,11 +25,18 @@
25 ctypedef struct PyObject:25 ctypedef struct PyObject:
26 pass26 pass
2727
28 int PyString_CheckExact(object)
29
30 int PyObject_RichCompareBool(object, object, int)
31 int Py_LT
32
33 int PyTuple_CheckExact(object)
28 object PyTuple_New(Py_ssize_t n)34 object PyTuple_New(Py_ssize_t n)
29 Py_ssize_t PyTuple_GET_SIZE(object t)35 Py_ssize_t PyTuple_GET_SIZE(object t)
30 PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)36 PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
31 void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)37 void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
3238
39 int PyList_CheckExact(object)
33 Py_ssize_t PyList_GET_SIZE(object l)40 Py_ssize_t PyList_GET_SIZE(object l)
34 PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)41 PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
35 int PyList_SetItem(object l, Py_ssize_t o, object l) except -142 int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
@@ -108,14 +115,65 @@
108 return <_KnownGraphNode>temp_node115 return <_KnownGraphNode>temp_node
109116
110117
111cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):118cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
112 cdef PyObject *temp_node119 cdef PyObject *temp_node
113 cdef _KnownGraphNode node
114120
115 temp_node = PyTuple_GET_ITEM(parents, pos)121 temp_node = PyTuple_GET_ITEM(tpl, pos)
116 return <_KnownGraphNode>temp_node122 return <_KnownGraphNode>temp_node
117123
118124
125def get_key(node):
126 cdef _KnownGraphNode real_node
127 real_node = node
128 return real_node.key
129
130
131cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
132 """Sort a list of _KnownGraphNode objects.
133
134 If lst_or_tpl is a list, it is allowed to mutate in place. It may also
135 just return the input list if everything is already sorted.
136 """
137 cdef _KnownGraphNode node1, node2
138 cdef int do_swap, is_tuple
139 cdef Py_ssize_t length
140
141 is_tuple = PyTuple_CheckExact(lst_or_tpl)
142 if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
143 raise TypeError('lst_or_tpl must be a list or tuple.')
144 length = len(lst_or_tpl)
145 if length == 0 or length == 1:
146 return lst_or_tpl
147 if length == 2:
148 if is_tuple:
149 node1 = _get_tuple_node(lst_or_tpl, 0)
150 node2 = _get_tuple_node(lst_or_tpl, 1)
151 else:
152 node1 = _get_list_node(lst_or_tpl, 0)
153 node2 = _get_list_node(lst_or_tpl, 1)
154 if reverse:
155 do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
156 else:
157 do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
158 if not do_swap:
159 return lst_or_tpl
160 if is_tuple:
161 return (node2, node1)
162 else:
163 # Swap 'in-place', since lists are mutable
164 Py_INCREF(node1)
165 PyList_SetItem(lst_or_tpl, 1, node1)
166 Py_INCREF(node2)
167 PyList_SetItem(lst_or_tpl, 0, node2)
168 return lst_or_tpl
169 # For all other sizes, we just use 'sorted()'
170 if is_tuple:
171 # Note that sorted() is just list(iterable).sort()
172 lst_or_tpl = list(lst_or_tpl)
173 lst_or_tpl.sort(key=get_key, reverse=reverse)
174 return lst_or_tpl
175
176
119cdef class _MergeSorter177cdef class _MergeSorter
120178
121cdef class KnownGraph:179cdef class KnownGraph:
@@ -216,6 +274,19 @@
216 PyList_Append(tails, node)274 PyList_Append(tails, node)
217 return tails275 return tails
218276
277 def _find_tips(self):
278 cdef PyObject *temp_node
279 cdef _KnownGraphNode node
280 cdef Py_ssize_t pos
281
282 tips = []
283 pos = 0
284 while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
285 node = <_KnownGraphNode>temp_node
286 if PyList_GET_SIZE(node.children) == 0:
287 PyList_Append(tips, node)
288 return tips
289
219 def _find_gdfo(self):290 def _find_gdfo(self):
220 cdef _KnownGraphNode node291 cdef _KnownGraphNode node
221 cdef _KnownGraphNode child292 cdef _KnownGraphNode child
@@ -322,7 +393,7 @@
322 continue393 continue
323 if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:394 if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
324 for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):395 for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
325 parent_node = _get_parent(node.parents, pos)396 parent_node = _get_tuple_node(node.parents, pos)
326 last_item = last_item + 1397 last_item = last_item + 1
327 if last_item < PyList_GET_SIZE(pending):398 if last_item < PyList_GET_SIZE(pending):
328 Py_INCREF(parent_node) # SetItem steals a ref399 Py_INCREF(parent_node) # SetItem steals a ref
@@ -397,6 +468,77 @@
397 # We started from the parents, so we don't need to do anymore work468 # We started from the parents, so we don't need to do anymore work
398 return topo_order469 return topo_order
399470
471 def gc_sort(self):
472 """Return a reverse topological ordering which is 'stable'.
473
474 There are a few constraints:
475 1) Reverse topological (all children before all parents)
476 2) Grouped by prefix
477 3) 'stable' sorting, so that we get the same result, independent of
478 machine, or extra data.
479 To do this, we use the same basic algorithm as topo_sort, but when we
480 aren't sure what node to access next, we sort them lexicographically.
481 """
482 cdef PyObject *temp
483 cdef Py_ssize_t pos, last_item
484 cdef _KnownGraphNode node, node2, parent_node
485
486 tips = self._find_tips()
487 # Split the tips based on prefix
488 prefix_tips = {}
489 for pos from 0 <= pos < PyList_GET_SIZE(tips):
490 node = _get_list_node(tips, pos)
491 if PyString_CheckExact(node.key) or len(node.key) == 1:
492 prefix = ''
493 else:
494 prefix = node.key[0]
495 temp = PyDict_GetItem(prefix_tips, prefix)
496 if temp == NULL:
497 prefix_tips[prefix] = [node]
498 else:
499 tip_nodes = <object>temp
500 PyList_Append(tip_nodes, node)
501
502 result = []
503 for prefix in sorted(prefix_tips):
504 temp = PyDict_GetItem(prefix_tips, prefix)
505 assert temp != NULL
506 tip_nodes = <object>temp
507 pending = _sort_list_nodes(tip_nodes, 1)
508 last_item = PyList_GET_SIZE(pending) - 1
509 while last_item >= 0:
510 node = _get_list_node(pending, last_item)
511 last_item = last_item - 1
512 if node.parents is None:
513 # Ghost
514 continue
515 PyList_Append(result, node.key)
516 # Sorting the parent keys isn't strictly necessary for stable
517 # sorting of a given graph. But it does help minimize the
518 # differences between graphs
519 # For bzr.dev ancestry:
520 # 4.73ms no sort
521 # 7.73ms RichCompareBool sort
522 parents = _sort_list_nodes(node.parents, 1)
523 for pos from 0 <= pos < len(parents):
524 if PyTuple_CheckExact(parents):
525 parent_node = _get_tuple_node(parents, pos)
526 else:
527 parent_node = _get_list_node(parents, pos)
528 # TODO: GraphCycle detection
529 parent_node.seen = parent_node.seen + 1
530 if (parent_node.seen
531 == PyList_GET_SIZE(parent_node.children)):
532 # All children have been processed, queue up this
533 # parent
534 last_item = last_item + 1
535 if last_item < PyList_GET_SIZE(pending):
536 Py_INCREF(parent_node) # SetItem steals a ref
537 PyList_SetItem(pending, last_item, parent_node)
538 else:
539 PyList_Append(pending, parent_node)
540 parent_node.seen = 0
541 return result
400542
401 def merge_sort(self, tip_key):543 def merge_sort(self, tip_key):
402 """Compute the merge sorted graph output."""544 """Compute the merge sorted graph output."""
@@ -522,7 +664,7 @@
522 raise RuntimeError('ghost nodes should not be pushed'664 raise RuntimeError('ghost nodes should not be pushed'
523 ' onto the stack: %s' % (node,))665 ' onto the stack: %s' % (node,))
524 if PyTuple_GET_SIZE(node.parents) > 0:666 if PyTuple_GET_SIZE(node.parents) > 0:
525 parent_node = _get_parent(node.parents, 0)667 parent_node = _get_tuple_node(node.parents, 0)
526 ms_node.left_parent = parent_node668 ms_node.left_parent = parent_node
527 if parent_node.parents is None: # left-hand ghost669 if parent_node.parents is None: # left-hand ghost
528 ms_node.left_pending_parent = None670 ms_node.left_pending_parent = None
@@ -532,7 +674,7 @@
532 if PyTuple_GET_SIZE(node.parents) > 1:674 if PyTuple_GET_SIZE(node.parents) > 1:
533 ms_node.pending_parents = []675 ms_node.pending_parents = []
534 for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):676 for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
535 parent_node = _get_parent(node.parents, pos)677 parent_node = _get_tuple_node(node.parents, pos)
536 if parent_node.parents is None: # ghost678 if parent_node.parents is None: # ghost
537 continue679 continue
538 PyList_Append(ms_node.pending_parents, parent_node)680 PyList_Append(ms_node.pending_parents, parent_node)
539681
=== modified file 'bzrlib/tests/test__known_graph.py'
--- bzrlib/tests/test__known_graph.py 2009-08-26 16:03:59 +0000
+++ bzrlib/tests/test__known_graph.py 2009-09-02 07:35:10 +0000
@@ -768,3 +768,70 @@
768 },768 },
769 'E',769 'E',
770 [])770 [])
771
772
773class TestKnownGraphStableReverseTopoSort(TestCaseWithKnownGraph):
774 """Test the sort order returned by gc_sort."""
775
776 def assertSorted(self, expected, parent_map):
777 graph = self.make_known_graph(parent_map)
778 value = graph.gc_sort()
779 if expected != value:
780 self.assertEqualDiff(pprint.pformat(expected),
781 pprint.pformat(value))
782
783 def test_empty(self):
784 self.assertSorted([], {})
785
786 def test_single(self):
787 self.assertSorted(['a'], {'a':()})
788 self.assertSorted([('a',)], {('a',):()})
789 self.assertSorted([('F', 'a')], {('F', 'a'):()})
790
791 def test_linear(self):
792 self.assertSorted(['c', 'b', 'a'], {'a':(), 'b':('a',), 'c':('b',)})
793 self.assertSorted([('c',), ('b',), ('a',)],
794 {('a',):(), ('b',): (('a',),), ('c',): (('b',),)})
795 self.assertSorted([('F', 'c'), ('F', 'b'), ('F', 'a')],
796 {('F', 'a'):(), ('F', 'b'): (('F', 'a'),),
797 ('F', 'c'): (('F', 'b'),)})
798
799 def test_mixed_ancestries(self):
800 # Each prefix should be sorted separately
801 self.assertSorted([('F', 'c'), ('F', 'b'), ('F', 'a'),
802 ('G', 'c'), ('G', 'b'), ('G', 'a'),
803 ('Q', 'c'), ('Q', 'b'), ('Q', 'a'),
804 ],
805 {('F', 'a'):(), ('F', 'b'): (('F', 'a'),),
806 ('F', 'c'): (('F', 'b'),),
807 ('G', 'a'):(), ('G', 'b'): (('G', 'a'),),
808 ('G', 'c'): (('G', 'b'),),
809 ('Q', 'a'):(), ('Q', 'b'): (('Q', 'a'),),
810 ('Q', 'c'): (('Q', 'b'),),
811 })
812
813 def test_stable_sorting(self):
814 # the sort order should be stable even when extra nodes are added
815 self.assertSorted(['b', 'c', 'a'],
816 {'a':(), 'b':('a',), 'c':('a',)})
817 self.assertSorted(['b', 'c', 'd', 'a'],
818 {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
819 self.assertSorted(['b', 'c', 'd', 'a'],
820 {'a':(), 'b':('a',), 'c':('a',), 'd':('a',)})
821 self.assertSorted(['Z', 'b', 'c', 'd', 'a'],
822 {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
823 'Z':('a',)})
824 self.assertSorted(['e', 'b', 'c', 'f', 'Z', 'd', 'a'],
825 {'a':(), 'b':('a',), 'c':('a',), 'd':('a',),
826 'Z':('a',),
827 'e':('b', 'c', 'd'),
828 'f':('d', 'Z'),
829 })
830
831 def test_skip_ghost(self):
832 self.assertSorted(['b', 'c', 'a'],
833 {'a':(), 'b':('a', 'ghost'), 'c':('a',)})
834
835 def test_skip_mainline_ghost(self):
836 self.assertSorted(['b', 'c', 'a'],
837 {'a':(), 'b':('ghost', 'a'), 'c':('a',)})

Subscribers

People subscribed via source and target branches