Merge lp:~bzr/bzr/list-files-norecurse into lp:~bzr/bzr/trunk-old

Proposed by Ian Clatworthy on 2009-05-18
Status: Merged
Approved by: Ian Clatworthy on 2009-06-16
Approved revision: 4373
Merged at revision: not available
Proposed branch: lp:~bzr/bzr/list-files-norecurse
Merge into: lp:~bzr/bzr/trunk-old
Diff against target: 272 lines
To merge this branch: bzr merge lp:~bzr/bzr/list-files-norecurse
Reviewer Review Type Date Requested Status
Martin Pool Approve on 2009-06-16
John A Meinel 2009-05-18 Needs Information on 2009-06-01
Review via email: mp+6669@code.launchpad.net
To post a comment you must log in.
Ian Clatworthy (ian-clatworthy) wrote :

This patch is the core of the work to make the ls command faster. 2 new parameters are added to the tree.list_files() API: from_dir and recursive. Numerous tests are added to cover these new parameters.

Implementation-wise, this builds on the iter-entries-norecurse branch.

lp:~bzr/bzr/list-files-norecurse updated on 2009-05-18
4373. By Ian Clatworthy on 2009-05-18

handle unversioned directories

John A Meinel (jameinel) wrote :

I believe Tree.list_files() is actually meant to be a 'deprecated' function that we just never got around to actually deprecating.

I'd rather see the code changed to use Tree.iter_entries_by_dir directly.

Also, as near as I can tell, this doesn't actually update any callers to *use* the new functionality, is that true?

review: Needs Information
Ian Clatworthy (ian-clatworthy) wrote :

John A Meinel wrote:
> Also, as near as I can tell, this doesn't actually update any callers to *use* the new functionality, is that true?
>
That's right. Another patch builds on this one, namely
https://code.edge.launchpad.net/~bzr/bzr/faster-ls/+merge/6670.

Ian C.

Martin Pool (mbp) wrote :

It's ok with me to merge this if you're finding it useful. I think John is right that the interfaces here could be cleaned up though.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bzrlib/revisiontree.py'
2--- bzrlib/revisiontree.py 2009-06-10 03:56:49 +0000
3+++ bzrlib/revisiontree.py 2009-06-16 02:38:01 +0000
4@@ -114,11 +114,18 @@
5 def has_filename(self, filename):
6 return bool(self.inventory.path2id(filename))
7
8- def list_files(self, include_root=False):
9+ def list_files(self, include_root=False, from_dir=None, recursive=True):
10 # The only files returned by this are those from the version
11- entries = self.inventory.iter_entries()
12- # skip the root for compatability with the current apis.
13- if self.inventory.root is not None and not include_root:
14+ inv = self.inventory
15+ if from_dir is None:
16+ from_dir_id = None
17+ else:
18+ from_dir_id = inv.path2id(from_dir)
19+ if from_dir_id is None:
20+ # Directory not versioned
21+ return
22+ entries = inv.iter_entries(from_dir=from_dir_id, recursive=recursive)
23+ if inv.root is not None and not include_root and from_dir is None:
24 # skip the root for compatability with the current apis.
25 entries.next()
26 for path, entry in entries:
27
28=== modified file 'bzrlib/tests/tree_implementations/test_list_files.py'
29--- bzrlib/tests/tree_implementations/test_list_files.py 2009-03-23 14:59:43 +0000
30+++ bzrlib/tests/tree_implementations/test_list_files.py 2009-06-16 02:38:01 +0000
31@@ -49,7 +49,68 @@
32 try:
33 actual = [(path, status, kind, file_id)
34 for path, status, kind, file_id, ie in
35- tree.list_files(include_root=False)]
36+ tree.list_files()]
37+ finally:
38+ tree.unlock()
39+ self.assertEqual(expected, actual)
40+
41+ def test_list_files_with_root_no_recurse(self):
42+ work_tree = self.make_branch_and_tree('wt')
43+ tree = self.get_tree_no_parents_abc_content(work_tree)
44+ expected = [('', 'V', 'directory', 'root-id'),
45+ ('a', 'V', 'file', 'a-id'),
46+ ('b', 'V', 'directory', 'b-id'),
47+ ]
48+ tree.lock_read()
49+ try:
50+ actual = [(path, status, kind, file_id)
51+ for path, status, kind, file_id, ie in
52+ tree.list_files(include_root=True, recursive=False)]
53+ finally:
54+ tree.unlock()
55+ self.assertEqual(expected, actual)
56+
57+ def test_list_files_no_root_no_recurse(self):
58+ work_tree = self.make_branch_and_tree('wt')
59+ tree = self.get_tree_no_parents_abc_content(work_tree)
60+ expected = [('a', 'V', 'file', 'a-id'),
61+ ('b', 'V', 'directory', 'b-id'),
62+ ]
63+ tree.lock_read()
64+ try:
65+ actual = [(path, status, kind, file_id)
66+ for path, status, kind, file_id, ie in
67+ tree.list_files(recursive=False)]
68+ finally:
69+ tree.unlock()
70+ self.assertEqual(expected, actual)
71+
72+ def test_list_files_from_dir(self):
73+ work_tree = self.make_branch_and_tree('wt')
74+ tree = self.get_tree_no_parents_abc_content(work_tree)
75+ expected = [('c', 'V', 'file', 'c-id'),
76+ ]
77+ tree.lock_read()
78+ try:
79+ actual = [(path, status, kind, file_id)
80+ for path, status, kind, file_id, ie in
81+ tree.list_files(from_dir='b')]
82+ finally:
83+ tree.unlock()
84+ self.assertEqual(expected, actual)
85+
86+ def test_list_files_from_dir_no_recurse(self):
87+ # The test trees don't have much nesting so test with an explicit root
88+ work_tree = self.make_branch_and_tree('wt')
89+ tree = self.get_tree_no_parents_abc_content(work_tree)
90+ expected = [('a', 'V', 'file', 'a-id'),
91+ ('b', 'V', 'directory', 'b-id'),
92+ ]
93+ tree.lock_read()
94+ try:
95+ actual = [(path, status, kind, file_id)
96+ for path, status, kind, file_id, ie in
97+ tree.list_files(from_dir='', recursive=False)]
98 finally:
99 tree.unlock()
100 self.assertEqual(expected, actual)
101
102=== modified file 'bzrlib/transform.py'
103--- bzrlib/transform.py 2009-06-10 03:56:49 +0000
104+++ bzrlib/transform.py 2009-06-16 02:38:01 +0000
105@@ -1748,7 +1748,7 @@
106 if self._transform.final_file_id(trans_id) is None:
107 yield self._final_paths._determine_path(trans_id)
108
109- def _make_inv_entries(self, ordered_entries, specific_file_ids):
110+ def _make_inv_entries(self, ordered_entries, specific_file_ids=None):
111 for trans_id, parent_file_id in ordered_entries:
112 file_id = self._transform.final_file_id(trans_id)
113 if file_id is None:
114@@ -1791,14 +1791,41 @@
115 specific_file_ids):
116 yield unicode(self._final_paths.get_path(trans_id)), entry
117
118- def list_files(self, include_root=False):
119- """See Tree.list_files."""
120+ def _iter_entries_for_dir(self, dir_path):
121+ """Return path, entry for items in a directory without recursing down."""
122+ dir_file_id = self.path2id(dir_path)
123+ ordered_ids = []
124+ for file_id in self.iter_children(dir_file_id):
125+ trans_id = self._transform.trans_id_file_id(file_id)
126+ ordered_ids.append((trans_id, file_id))
127+ for entry, trans_id in self._make_inv_entries(ordered_ids):
128+ yield unicode(self._final_paths.get_path(trans_id)), entry
129+
130+ def list_files(self, include_root=False, from_dir=None, recursive=True):
131+ """See WorkingTree.list_files."""
132 # XXX This should behave like WorkingTree.list_files, but is really
133 # more like RevisionTree.list_files.
134- for path, entry in self.iter_entries_by_dir():
135- if entry.name == '' and not include_root:
136- continue
137- yield path, 'V', entry.kind, entry.file_id, entry
138+ if recursive:
139+ prefix = None
140+ if from_dir:
141+ prefix = from_dir + '/'
142+ entries = self.iter_entries_by_dir()
143+ for path, entry in entries:
144+ if entry.name == '' and not include_root:
145+ continue
146+ if prefix:
147+ if not path.startswith(prefix):
148+ continue
149+ path = path[len(prefix):]
150+ yield path, 'V', entry.kind, entry.file_id, entry
151+ else:
152+ if from_dir is None and include_root is True:
153+ root_entry = inventory.make_entry('directory', '',
154+ ROOT_PARENT, self.get_root_id())
155+ yield '', 'V', 'directory', root_entry.file_id, root_entry
156+ entries = self._iter_entries_for_dir(from_dir or '')
157+ for path, entry in entries:
158+ yield path, 'V', entry.kind, entry.file_id, entry
159
160 def kind(self, file_id):
161 trans_id = self._transform.trans_id_file_id(file_id)
162
163=== modified file 'bzrlib/workingtree.py'
164--- bzrlib/workingtree.py 2009-06-15 15:47:45 +0000
165+++ bzrlib/workingtree.py 2009-06-16 02:38:01 +0000
166@@ -1115,15 +1115,16 @@
167 def _kind(self, relpath):
168 return osutils.file_kind(self.abspath(relpath))
169
170- def list_files(self, include_root=False):
171- """Recursively list all files as (path, class, kind, id, entry).
172+ def list_files(self, include_root=False, from_dir=None, recursive=True):
173+ """List all files as (path, class, kind, id, entry).
174
175 Lists, but does not descend into unversioned directories.
176-
177 This does not include files that have been deleted in this
178- tree.
179+ tree. Skips the control directory.
180
181- Skips the control directory.
182+ :param include_root: if True, do not return an entry for the root
183+ :param from_dir: start from this directory or None for the root
184+ :param recursive: whether to recurse into subdirectories or not
185 """
186 # list_files is an iterator, so @needs_read_lock doesn't work properly
187 # with it. So callers should be careful to always read_lock the tree.
188@@ -1131,7 +1132,7 @@
189 raise errors.ObjectNotLocked(self)
190
191 inv = self.inventory
192- if include_root is True:
193+ if from_dir is None and include_root is True:
194 yield ('', 'V', 'directory', inv.root.file_id, inv.root)
195 # Convert these into local objects to save lookup times
196 pathjoin = osutils.pathjoin
197@@ -1144,13 +1145,22 @@
198 fk_entries = {'directory':TreeDirectory, 'file':TreeFile, 'symlink':TreeLink}
199
200 # directory file_id, relative path, absolute path, reverse sorted children
201- children = os.listdir(self.basedir)
202+ if from_dir is not None:
203+ from_dir_id = inv.path2id(from_dir)
204+ if from_dir_id is None:
205+ # Directory not versioned
206+ return
207+ from_dir_abspath = pathjoin(self.basedir, from_dir)
208+ else:
209+ from_dir_id = inv.root.file_id
210+ from_dir_abspath = self.basedir
211+ children = os.listdir(from_dir_abspath)
212 children.sort()
213 # jam 20060527 The kernel sized tree seems equivalent whether we
214 # use a deque and popleft to keep them sorted, or if we use a plain
215 # list and just reverse() them.
216 children = collections.deque(children)
217- stack = [(inv.root.file_id, u'', self.basedir, children)]
218+ stack = [(from_dir_id, u'', from_dir_abspath, children)]
219 while stack:
220 from_dir_id, from_dir_relpath, from_dir_abspath, children = stack[-1]
221
222@@ -1214,14 +1224,15 @@
223 if fk != 'directory':
224 continue
225
226- # But do this child first
227- new_children = os.listdir(fap)
228- new_children.sort()
229- new_children = collections.deque(new_children)
230- stack.append((f_ie.file_id, fp, fap, new_children))
231- # Break out of inner loop,
232- # so that we start outer loop with child
233- break
234+ # But do this child first if recursing down
235+ if recursive:
236+ new_children = os.listdir(fap)
237+ new_children.sort()
238+ new_children = collections.deque(new_children)
239+ stack.append((f_ie.file_id, fp, fap, new_children))
240+ # Break out of inner loop,
241+ # so that we start outer loop with child
242+ break
243 else:
244 # if we finished all children, pop it off the stack
245 stack.pop()
246
247=== modified file 'bzrlib/workingtree_4.py'
248--- bzrlib/workingtree_4.py 2009-06-11 04:23:53 +0000
249+++ bzrlib/workingtree_4.py 2009-06-16 02:38:01 +0000
250@@ -1844,12 +1844,19 @@
251 return None
252 return ie.executable
253
254- def list_files(self, include_root=False):
255+ def list_files(self, include_root=False, from_dir=None, recursive=True):
256 # We use a standard implementation, because DirStateRevisionTree is
257 # dealing with one of the parents of the current state
258 inv = self._get_inventory()
259- entries = inv.iter_entries()
260- if self.inventory.root is not None and not include_root:
261+ if from_dir is None:
262+ from_dir_id = None
263+ else:
264+ from_dir_id = inv.path2id(from_dir)
265+ if from_dir_id is None:
266+ # Directory not versioned
267+ return
268+ entries = inv.iter_entries(from_dir=from_dir_id, recursive=recursive)
269+ if inv.root is not None and not include_root and from_dir is None:
270 entries.next()
271 for path, entry in entries:
272 yield path, 'V', entry.kind, entry.file_id, entry