Merge lp:~bzr/bzr/list-files-norecurse into lp:~bzr/bzr/trunk-old

Proposed by Ian Clatworthy on 2009-05-18
Status: Merged
Approved by: Ian Clatworthy on 2009-06-16
Approved revision: 4373
Merged at revision: not available
Proposed branch: lp:~bzr/bzr/list-files-norecurse
Merge into: lp:~bzr/bzr/trunk-old
Diff against target: 272 lines
To merge this branch: bzr merge lp:~bzr/bzr/list-files-norecurse
Reviewer Review Type Date Requested Status
Martin Pool Approve on 2009-06-16
John A Meinel 2009-05-18 Needs Information on 2009-06-01
Review via email: mp+6669@code.launchpad.net
To post a comment you must log in.
Ian Clatworthy (ian-clatworthy) wrote :

This patch is the core of the work to make the ls command faster. 2 new parameters are added to the tree.list_files() API: from_dir and recursive. Numerous tests are added to cover these new parameters.

Implementation-wise, this builds on the iter-entries-norecurse branch.

lp:~bzr/bzr/list-files-norecurse updated on 2009-05-18
4373. By Ian Clatworthy on 2009-05-18

handle unversioned directories

John A Meinel (jameinel) wrote :

I believe Tree.list_files() is actually meant to be a 'deprecated' function that we just never got around to actually deprecating.

I'd rather see the code changed to use Tree.iter_entries_by_dir directly.

Also, as near as I can tell, this doesn't actually update any callers to *use* the new functionality, is that true?

review: Needs Information
Ian Clatworthy (ian-clatworthy) wrote :

John A Meinel wrote:
> Also, as near as I can tell, this doesn't actually update any callers to *use* the new functionality, is that true?
>
That's right. Another patch builds on this one, namely
https://code.edge.launchpad.net/~bzr/bzr/faster-ls/+merge/6670.

Ian C.

Martin Pool (mbp) wrote :

It's ok with me to merge this if you're finding it useful. I think John is right that the interfaces here could be cleaned up though.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'bzrlib/revisiontree.py'
--- bzrlib/revisiontree.py 2009-06-10 03:56:49 +0000
+++ bzrlib/revisiontree.py 2009-06-16 02:38:01 +0000
@@ -114,11 +114,18 @@
114 def has_filename(self, filename):114 def has_filename(self, filename):
115 return bool(self.inventory.path2id(filename))115 return bool(self.inventory.path2id(filename))
116116
117 def list_files(self, include_root=False):117 def list_files(self, include_root=False, from_dir=None, recursive=True):
118 # The only files returned by this are those from the version118 # The only files returned by this are those from the version
119 entries = self.inventory.iter_entries()119 inv = self.inventory
120 # skip the root for compatability with the current apis.120 if from_dir is None:
121 if self.inventory.root is not None and not include_root:121 from_dir_id = None
122 else:
123 from_dir_id = inv.path2id(from_dir)
124 if from_dir_id is None:
125 # Directory not versioned
126 return
127 entries = inv.iter_entries(from_dir=from_dir_id, recursive=recursive)
128 if inv.root is not None and not include_root and from_dir is None:
122 # skip the root for compatability with the current apis.129 # skip the root for compatability with the current apis.
123 entries.next()130 entries.next()
124 for path, entry in entries:131 for path, entry in entries:
125132
=== modified file 'bzrlib/tests/tree_implementations/test_list_files.py'
--- bzrlib/tests/tree_implementations/test_list_files.py 2009-03-23 14:59:43 +0000
+++ bzrlib/tests/tree_implementations/test_list_files.py 2009-06-16 02:38:01 +0000
@@ -49,7 +49,68 @@
49 try:49 try:
50 actual = [(path, status, kind, file_id)50 actual = [(path, status, kind, file_id)
51 for path, status, kind, file_id, ie in51 for path, status, kind, file_id, ie in
52 tree.list_files(include_root=False)]52 tree.list_files()]
53 finally:
54 tree.unlock()
55 self.assertEqual(expected, actual)
56
57 def test_list_files_with_root_no_recurse(self):
58 work_tree = self.make_branch_and_tree('wt')
59 tree = self.get_tree_no_parents_abc_content(work_tree)
60 expected = [('', 'V', 'directory', 'root-id'),
61 ('a', 'V', 'file', 'a-id'),
62 ('b', 'V', 'directory', 'b-id'),
63 ]
64 tree.lock_read()
65 try:
66 actual = [(path, status, kind, file_id)
67 for path, status, kind, file_id, ie in
68 tree.list_files(include_root=True, recursive=False)]
69 finally:
70 tree.unlock()
71 self.assertEqual(expected, actual)
72
73 def test_list_files_no_root_no_recurse(self):
74 work_tree = self.make_branch_and_tree('wt')
75 tree = self.get_tree_no_parents_abc_content(work_tree)
76 expected = [('a', 'V', 'file', 'a-id'),
77 ('b', 'V', 'directory', 'b-id'),
78 ]
79 tree.lock_read()
80 try:
81 actual = [(path, status, kind, file_id)
82 for path, status, kind, file_id, ie in
83 tree.list_files(recursive=False)]
84 finally:
85 tree.unlock()
86 self.assertEqual(expected, actual)
87
88 def test_list_files_from_dir(self):
89 work_tree = self.make_branch_and_tree('wt')
90 tree = self.get_tree_no_parents_abc_content(work_tree)
91 expected = [('c', 'V', 'file', 'c-id'),
92 ]
93 tree.lock_read()
94 try:
95 actual = [(path, status, kind, file_id)
96 for path, status, kind, file_id, ie in
97 tree.list_files(from_dir='b')]
98 finally:
99 tree.unlock()
100 self.assertEqual(expected, actual)
101
102 def test_list_files_from_dir_no_recurse(self):
103 # The test trees don't have much nesting so test with an explicit root
104 work_tree = self.make_branch_and_tree('wt')
105 tree = self.get_tree_no_parents_abc_content(work_tree)
106 expected = [('a', 'V', 'file', 'a-id'),
107 ('b', 'V', 'directory', 'b-id'),
108 ]
109 tree.lock_read()
110 try:
111 actual = [(path, status, kind, file_id)
112 for path, status, kind, file_id, ie in
113 tree.list_files(from_dir='', recursive=False)]
53 finally:114 finally:
54 tree.unlock()115 tree.unlock()
55 self.assertEqual(expected, actual)116 self.assertEqual(expected, actual)
56117
=== modified file 'bzrlib/transform.py'
--- bzrlib/transform.py 2009-06-10 03:56:49 +0000
+++ bzrlib/transform.py 2009-06-16 02:38:01 +0000
@@ -1748,7 +1748,7 @@
1748 if self._transform.final_file_id(trans_id) is None:1748 if self._transform.final_file_id(trans_id) is None:
1749 yield self._final_paths._determine_path(trans_id)1749 yield self._final_paths._determine_path(trans_id)
17501750
1751 def _make_inv_entries(self, ordered_entries, specific_file_ids):1751 def _make_inv_entries(self, ordered_entries, specific_file_ids=None):
1752 for trans_id, parent_file_id in ordered_entries:1752 for trans_id, parent_file_id in ordered_entries:
1753 file_id = self._transform.final_file_id(trans_id)1753 file_id = self._transform.final_file_id(trans_id)
1754 if file_id is None:1754 if file_id is None:
@@ -1791,14 +1791,41 @@
1791 specific_file_ids):1791 specific_file_ids):
1792 yield unicode(self._final_paths.get_path(trans_id)), entry1792 yield unicode(self._final_paths.get_path(trans_id)), entry
17931793
1794 def list_files(self, include_root=False):1794 def _iter_entries_for_dir(self, dir_path):
1795 """See Tree.list_files."""1795 """Return path, entry for items in a directory without recursing down."""
1796 dir_file_id = self.path2id(dir_path)
1797 ordered_ids = []
1798 for file_id in self.iter_children(dir_file_id):
1799 trans_id = self._transform.trans_id_file_id(file_id)
1800 ordered_ids.append((trans_id, file_id))
1801 for entry, trans_id in self._make_inv_entries(ordered_ids):
1802 yield unicode(self._final_paths.get_path(trans_id)), entry
1803
1804 def list_files(self, include_root=False, from_dir=None, recursive=True):
1805 """See WorkingTree.list_files."""
1796 # XXX This should behave like WorkingTree.list_files, but is really1806 # XXX This should behave like WorkingTree.list_files, but is really
1797 # more like RevisionTree.list_files.1807 # more like RevisionTree.list_files.
1798 for path, entry in self.iter_entries_by_dir():1808 if recursive:
1799 if entry.name == '' and not include_root:1809 prefix = None
1800 continue1810 if from_dir:
1801 yield path, 'V', entry.kind, entry.file_id, entry1811 prefix = from_dir + '/'
1812 entries = self.iter_entries_by_dir()
1813 for path, entry in entries:
1814 if entry.name == '' and not include_root:
1815 continue
1816 if prefix:
1817 if not path.startswith(prefix):
1818 continue
1819 path = path[len(prefix):]
1820 yield path, 'V', entry.kind, entry.file_id, entry
1821 else:
1822 if from_dir is None and include_root is True:
1823 root_entry = inventory.make_entry('directory', '',
1824 ROOT_PARENT, self.get_root_id())
1825 yield '', 'V', 'directory', root_entry.file_id, root_entry
1826 entries = self._iter_entries_for_dir(from_dir or '')
1827 for path, entry in entries:
1828 yield path, 'V', entry.kind, entry.file_id, entry
18021829
1803 def kind(self, file_id):1830 def kind(self, file_id):
1804 trans_id = self._transform.trans_id_file_id(file_id)1831 trans_id = self._transform.trans_id_file_id(file_id)
18051832
=== modified file 'bzrlib/workingtree.py'
--- bzrlib/workingtree.py 2009-06-15 15:47:45 +0000
+++ bzrlib/workingtree.py 2009-06-16 02:38:01 +0000
@@ -1115,15 +1115,16 @@
1115 def _kind(self, relpath):1115 def _kind(self, relpath):
1116 return osutils.file_kind(self.abspath(relpath))1116 return osutils.file_kind(self.abspath(relpath))
11171117
1118 def list_files(self, include_root=False):1118 def list_files(self, include_root=False, from_dir=None, recursive=True):
1119 """Recursively list all files as (path, class, kind, id, entry).1119 """List all files as (path, class, kind, id, entry).
11201120
1121 Lists, but does not descend into unversioned directories.1121 Lists, but does not descend into unversioned directories.
1122
1123 This does not include files that have been deleted in this1122 This does not include files that have been deleted in this
1124 tree.1123 tree. Skips the control directory.
11251124
1126 Skips the control directory.1125 :param include_root: if True, do not return an entry for the root
1126 :param from_dir: start from this directory or None for the root
1127 :param recursive: whether to recurse into subdirectories or not
1127 """1128 """
1128 # list_files is an iterator, so @needs_read_lock doesn't work properly1129 # list_files is an iterator, so @needs_read_lock doesn't work properly
1129 # with it. So callers should be careful to always read_lock the tree.1130 # with it. So callers should be careful to always read_lock the tree.
@@ -1131,7 +1132,7 @@
1131 raise errors.ObjectNotLocked(self)1132 raise errors.ObjectNotLocked(self)
11321133
1133 inv = self.inventory1134 inv = self.inventory
1134 if include_root is True:1135 if from_dir is None and include_root is True:
1135 yield ('', 'V', 'directory', inv.root.file_id, inv.root)1136 yield ('', 'V', 'directory', inv.root.file_id, inv.root)
1136 # Convert these into local objects to save lookup times1137 # Convert these into local objects to save lookup times
1137 pathjoin = osutils.pathjoin1138 pathjoin = osutils.pathjoin
@@ -1144,13 +1145,22 @@
1144 fk_entries = {'directory':TreeDirectory, 'file':TreeFile, 'symlink':TreeLink}1145 fk_entries = {'directory':TreeDirectory, 'file':TreeFile, 'symlink':TreeLink}
11451146
1146 # directory file_id, relative path, absolute path, reverse sorted children1147 # directory file_id, relative path, absolute path, reverse sorted children
1147 children = os.listdir(self.basedir)1148 if from_dir is not None:
1149 from_dir_id = inv.path2id(from_dir)
1150 if from_dir_id is None:
1151 # Directory not versioned
1152 return
1153 from_dir_abspath = pathjoin(self.basedir, from_dir)
1154 else:
1155 from_dir_id = inv.root.file_id
1156 from_dir_abspath = self.basedir
1157 children = os.listdir(from_dir_abspath)
1148 children.sort()1158 children.sort()
1149 # jam 20060527 The kernel sized tree seems equivalent whether we1159 # jam 20060527 The kernel sized tree seems equivalent whether we
1150 # use a deque and popleft to keep them sorted, or if we use a plain1160 # use a deque and popleft to keep them sorted, or if we use a plain
1151 # list and just reverse() them.1161 # list and just reverse() them.
1152 children = collections.deque(children)1162 children = collections.deque(children)
1153 stack = [(inv.root.file_id, u'', self.basedir, children)]1163 stack = [(from_dir_id, u'', from_dir_abspath, children)]
1154 while stack:1164 while stack:
1155 from_dir_id, from_dir_relpath, from_dir_abspath, children = stack[-1]1165 from_dir_id, from_dir_relpath, from_dir_abspath, children = stack[-1]
11561166
@@ -1214,14 +1224,15 @@
1214 if fk != 'directory':1224 if fk != 'directory':
1215 continue1225 continue
12161226
1217 # But do this child first1227 # But do this child first if recursing down
1218 new_children = os.listdir(fap)1228 if recursive:
1219 new_children.sort()1229 new_children = os.listdir(fap)
1220 new_children = collections.deque(new_children)1230 new_children.sort()
1221 stack.append((f_ie.file_id, fp, fap, new_children))1231 new_children = collections.deque(new_children)
1222 # Break out of inner loop,1232 stack.append((f_ie.file_id, fp, fap, new_children))
1223 # so that we start outer loop with child1233 # Break out of inner loop,
1224 break1234 # so that we start outer loop with child
1235 break
1225 else:1236 else:
1226 # if we finished all children, pop it off the stack1237 # if we finished all children, pop it off the stack
1227 stack.pop()1238 stack.pop()
12281239
=== modified file 'bzrlib/workingtree_4.py'
--- bzrlib/workingtree_4.py 2009-06-11 04:23:53 +0000
+++ bzrlib/workingtree_4.py 2009-06-16 02:38:01 +0000
@@ -1844,12 +1844,19 @@
1844 return None1844 return None
1845 return ie.executable1845 return ie.executable
18461846
1847 def list_files(self, include_root=False):1847 def list_files(self, include_root=False, from_dir=None, recursive=True):
1848 # We use a standard implementation, because DirStateRevisionTree is1848 # We use a standard implementation, because DirStateRevisionTree is
1849 # dealing with one of the parents of the current state1849 # dealing with one of the parents of the current state
1850 inv = self._get_inventory()1850 inv = self._get_inventory()
1851 entries = inv.iter_entries()1851 if from_dir is None:
1852 if self.inventory.root is not None and not include_root:1852 from_dir_id = None
1853 else:
1854 from_dir_id = inv.path2id(from_dir)
1855 if from_dir_id is None:
1856 # Directory not versioned
1857 return
1858 entries = inv.iter_entries(from_dir=from_dir_id, recursive=recursive)
1859 if inv.root is not None and not include_root and from_dir is None:
1853 entries.next()1860 entries.next()
1854 for path, entry in entries:1861 for path, entry in entries:
1855 yield path, 'V', entry.kind, entry.file_id, entry1862 yield path, 'V', entry.kind, entry.file_id, entry