Merge lp:~ian-clatworthy/bzr/faster-diff into lp:~bzr/bzr/trunk-old

Proposed by Ian Clatworthy on 2009-05-22
Status: Merged
Approved by: Martin Pool on 2009-05-26
Approved revision: 4378
Merged at revision: not available
Proposed branch: lp:~ian-clatworthy/bzr/faster-diff
Merge into: lp:~bzr/bzr/trunk-old
Diff against target: 103 lines
To merge this branch: bzr merge lp:~ian-clatworthy/bzr/faster-diff
Reviewer Review Type Date Requested Status
Ian Clatworthy Approve on 2009-05-27
Martin Pool 2009-05-22 Approve on 2009-05-26
Review via email: mp+6739@code.launchpad.net
To post a comment you must log in.
Ian Clatworthy (ian-clatworthy) wrote :

This small patch makes diff faster on large trees. On OOo with a single added file, the time improves from 2.0 seconds to 1.2 seconds.

Martin Pool (mbp) :
review: Approve
lp:~ian-clatworthy/bzr/faster-diff updated on 2009-05-27
4379. By Ian Clatworthy on 2009-05-27

cache last dirstate entry found in DirStateRevisionTree

4380. By Ian Clatworthy on 2009-05-27

avoid unnecessary id2path calculation when diffing

review: Resubmit
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'NEWS'
2--- NEWS 2009-05-28 17:14:38 +0000
3+++ NEWS 2009-05-28 18:36:14 +0000
4@@ -23,6 +23,8 @@
5
6 * ``bzr branch --notree`` is now faster. (Ian Clatworthy)
7
8+* ``bzr diff`` is now faster on large trees. (Ian Clatworthy)
9+
10 Bug Fixes
11 *********
12
13
14=== modified file 'bzrlib/diff.py'
15--- bzrlib/diff.py 2009-03-28 02:10:54 +0000
16+++ bzrlib/diff.py 2009-05-28 18:36:15 +0000
17@@ -620,9 +620,11 @@
18 return self.CANNOT_DIFF
19 from_label = '%s%s\t%s' % (self.old_label, old_path, old_date)
20 to_label = '%s%s\t%s' % (self.new_label, new_path, new_date)
21- return self.diff_text(from_file_id, to_file_id, from_label, to_label)
22+ return self.diff_text(from_file_id, to_file_id, from_label, to_label,
23+ old_path, new_path)
24
25- def diff_text(self, from_file_id, to_file_id, from_label, to_label):
26+ def diff_text(self, from_file_id, to_file_id, from_label, to_label,
27+ from_path=None, to_path=None):
28 """Diff the content of given files in two trees
29
30 :param from_file_id: The id of the file in the from tree. If None,
31@@ -630,15 +632,17 @@
32 :param to_file_id: The id of the file in the to tree. This may refer
33 to a different file from from_file_id. If None,
34 the file is not present in the to tree.
35+ :param from_path: The path in the from tree or None if unknown.
36+ :param to_path: The path in the to tree or None if unknown.
37 """
38- def _get_text(tree, file_id):
39+ def _get_text(tree, file_id, path):
40 if file_id is not None:
41- return tree.get_file(file_id).readlines()
42+ return tree.get_file(file_id, path).readlines()
43 else:
44 return []
45 try:
46- from_text = _get_text(self.old_tree, from_file_id)
47- to_text = _get_text(self.new_tree, to_file_id)
48+ from_text = _get_text(self.old_tree, from_file_id, from_path)
49+ to_text = _get_text(self.new_tree, to_file_id, to_path)
50 self.text_differ(from_label, from_text, to_label, to_text,
51 self.to_file)
52 except errors.BinaryFile:
53@@ -882,7 +886,7 @@
54 self.to_file.write("=== modified %s '%s'%s\n" % (kind[0],
55 newpath_encoded, prop_str))
56 if changed_content:
57- self.diff(file_id, oldpath, newpath)
58+ self._diff(file_id, oldpath, newpath, kind[0], kind[1])
59 has_changes = 1
60 if renamed:
61 has_changes = 1
62@@ -903,7 +907,10 @@
63 new_kind = self.new_tree.kind(file_id)
64 except (errors.NoSuchId, errors.NoSuchFile):
65 new_kind = None
66-
67+ self._diff(file_id, old_path, new_path, old_kind, new_kind)
68+
69+
70+ def _diff(self, file_id, old_path, new_path, old_kind, new_kind):
71 result = DiffPath._diff_many(self.differs, file_id, old_path,
72 new_path, old_kind, new_kind)
73 if result is DiffPath.CANNOT_DIFF:
74
75=== modified file 'bzrlib/workingtree_4.py'
76--- bzrlib/workingtree_4.py 2009-05-23 04:55:52 +0000
77+++ bzrlib/workingtree_4.py 2009-05-28 18:36:15 +0000
78@@ -1572,6 +1572,9 @@
79 self._repo_supports_tree_reference = getattr(
80 repository._format, "supports_tree_reference",
81 False)
82+ # Simple cache of last dirstate lookup
83+ self._last_entry_lookup = None
84+ self._last_entry = None
85
86 def __repr__(self):
87 return "<%s of %s in %s>" % \
88@@ -1644,8 +1647,13 @@
89 raise errors.BzrError('must supply file_id or path')
90 if path is not None:
91 path = path.encode('utf8')
92- parent_index = self._get_parent_index()
93- return self._dirstate._get_entry(parent_index, fileid_utf8=file_id, path_utf8=path)
94+ cache_key = file_id or ("path:%s" % path)
95+ if self._last_entry_lookup != cache_key:
96+ parent_index = self._get_parent_index()
97+ self._last_entry = self._dirstate._get_entry(parent_index,
98+ fileid_utf8=file_id, path_utf8=path)
99+ self._last_entry_lookup = cache_key
100+ return self._last_entry
101
102 def _generate_inventory(self):
103 """Create and set self.inventory from the dirstate object.