Merge lp:~ian-clatworthy/bzr/faster-diff into lp:~bzr/bzr/trunk-old

Proposed by Ian Clatworthy
Status: Merged
Approved by: Martin Pool
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~ian-clatworthy/bzr/faster-diff
Merge into: lp:~bzr/bzr/trunk-old
Diff against target: 103 lines
To merge this branch: bzr merge lp:~ian-clatworthy/bzr/faster-diff
Reviewer Review Type Date Requested Status
Ian Clatworthy Approve
Martin Pool Approve
Review via email: mp+6739@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Ian Clatworthy (ian-clatworthy) wrote :

This small patch makes diff faster on large trees. On OOo with a single added file, the time improves from 2.0 seconds to 1.2 seconds.

Revision history for this message
Martin Pool (mbp) :
review: Approve
Revision history for this message
Ian Clatworthy (ian-clatworthy) :
review: Needs Resubmitting
Revision history for this message
Ian Clatworthy (ian-clatworthy) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'NEWS'
--- NEWS 2009-05-28 17:14:38 +0000
+++ NEWS 2009-05-28 18:36:14 +0000
@@ -23,6 +23,8 @@
2323
24* ``bzr branch --notree`` is now faster. (Ian Clatworthy)24* ``bzr branch --notree`` is now faster. (Ian Clatworthy)
2525
26* ``bzr diff`` is now faster on large trees. (Ian Clatworthy)
27
26Bug Fixes28Bug Fixes
27*********29*********
2830
2931
=== modified file 'bzrlib/diff.py'
--- bzrlib/diff.py 2009-03-28 02:10:54 +0000
+++ bzrlib/diff.py 2009-05-28 18:36:15 +0000
@@ -620,9 +620,11 @@
620 return self.CANNOT_DIFF620 return self.CANNOT_DIFF
621 from_label = '%s%s\t%s' % (self.old_label, old_path, old_date)621 from_label = '%s%s\t%s' % (self.old_label, old_path, old_date)
622 to_label = '%s%s\t%s' % (self.new_label, new_path, new_date)622 to_label = '%s%s\t%s' % (self.new_label, new_path, new_date)
623 return self.diff_text(from_file_id, to_file_id, from_label, to_label)623 return self.diff_text(from_file_id, to_file_id, from_label, to_label,
624 old_path, new_path)
624625
625 def diff_text(self, from_file_id, to_file_id, from_label, to_label):626 def diff_text(self, from_file_id, to_file_id, from_label, to_label,
627 from_path=None, to_path=None):
626 """Diff the content of given files in two trees628 """Diff the content of given files in two trees
627629
628 :param from_file_id: The id of the file in the from tree. If None,630 :param from_file_id: The id of the file in the from tree. If None,
@@ -630,15 +632,17 @@
630 :param to_file_id: The id of the file in the to tree. This may refer632 :param to_file_id: The id of the file in the to tree. This may refer
631 to a different file from from_file_id. If None,633 to a different file from from_file_id. If None,
632 the file is not present in the to tree.634 the file is not present in the to tree.
635 :param from_path: The path in the from tree or None if unknown.
636 :param to_path: The path in the to tree or None if unknown.
633 """637 """
634 def _get_text(tree, file_id):638 def _get_text(tree, file_id, path):
635 if file_id is not None:639 if file_id is not None:
636 return tree.get_file(file_id).readlines()640 return tree.get_file(file_id, path).readlines()
637 else:641 else:
638 return []642 return []
639 try:643 try:
640 from_text = _get_text(self.old_tree, from_file_id)644 from_text = _get_text(self.old_tree, from_file_id, from_path)
641 to_text = _get_text(self.new_tree, to_file_id)645 to_text = _get_text(self.new_tree, to_file_id, to_path)
642 self.text_differ(from_label, from_text, to_label, to_text,646 self.text_differ(from_label, from_text, to_label, to_text,
643 self.to_file)647 self.to_file)
644 except errors.BinaryFile:648 except errors.BinaryFile:
@@ -882,7 +886,7 @@
882 self.to_file.write("=== modified %s '%s'%s\n" % (kind[0],886 self.to_file.write("=== modified %s '%s'%s\n" % (kind[0],
883 newpath_encoded, prop_str))887 newpath_encoded, prop_str))
884 if changed_content:888 if changed_content:
885 self.diff(file_id, oldpath, newpath)889 self._diff(file_id, oldpath, newpath, kind[0], kind[1])
886 has_changes = 1890 has_changes = 1
887 if renamed:891 if renamed:
888 has_changes = 1892 has_changes = 1
@@ -903,7 +907,10 @@
903 new_kind = self.new_tree.kind(file_id)907 new_kind = self.new_tree.kind(file_id)
904 except (errors.NoSuchId, errors.NoSuchFile):908 except (errors.NoSuchId, errors.NoSuchFile):
905 new_kind = None909 new_kind = None
906910 self._diff(file_id, old_path, new_path, old_kind, new_kind)
911
912
913 def _diff(self, file_id, old_path, new_path, old_kind, new_kind):
907 result = DiffPath._diff_many(self.differs, file_id, old_path,914 result = DiffPath._diff_many(self.differs, file_id, old_path,
908 new_path, old_kind, new_kind)915 new_path, old_kind, new_kind)
909 if result is DiffPath.CANNOT_DIFF:916 if result is DiffPath.CANNOT_DIFF:
910917
=== modified file 'bzrlib/workingtree_4.py'
--- bzrlib/workingtree_4.py 2009-05-23 04:55:52 +0000
+++ bzrlib/workingtree_4.py 2009-05-28 18:36:15 +0000
@@ -1572,6 +1572,9 @@
1572 self._repo_supports_tree_reference = getattr(1572 self._repo_supports_tree_reference = getattr(
1573 repository._format, "supports_tree_reference",1573 repository._format, "supports_tree_reference",
1574 False)1574 False)
1575 # Simple cache of last dirstate lookup
1576 self._last_entry_lookup = None
1577 self._last_entry = None
15751578
1576 def __repr__(self):1579 def __repr__(self):
1577 return "<%s of %s in %s>" % \1580 return "<%s of %s in %s>" % \
@@ -1644,8 +1647,13 @@
1644 raise errors.BzrError('must supply file_id or path')1647 raise errors.BzrError('must supply file_id or path')
1645 if path is not None:1648 if path is not None:
1646 path = path.encode('utf8')1649 path = path.encode('utf8')
1647 parent_index = self._get_parent_index()1650 cache_key = file_id or ("path:%s" % path)
1648 return self._dirstate._get_entry(parent_index, fileid_utf8=file_id, path_utf8=path)1651 if self._last_entry_lookup != cache_key:
1652 parent_index = self._get_parent_index()
1653 self._last_entry = self._dirstate._get_entry(parent_index,
1654 fileid_utf8=file_id, path_utf8=path)
1655 self._last_entry_lookup = cache_key
1656 return self._last_entry
16491657
1650 def _generate_inventory(self):1658 def _generate_inventory(self):
1651 """Create and set self.inventory from the dirstate object.1659 """Create and set self.inventory from the dirstate object.