I've taken a diff against bzr.dev - review follows. Look for **** introducing my comments === modified file 'bzrlib/commit.py' --- bzrlib/commit.py 2009-04-03 00:07:49 +0000 +++ bzrlib/commit.py 2009-05-15 02:31:26 +0000 @@ -289,12 +289,14 @@ # We can use record_iter_changes IFF iter_changes is compatible with # the command line parameters, and the repository has fast delta # generation. See bug 347649. - self.use_record_iter_changes = ( - not self.specific_files and - not self.exclude and - not self.branch.repository._format.supports_tree_reference and - (self.branch.repository._format.fast_deltas or - len(self.parents) < 2)) + if self.branch.repository._format.supports_tree_reference: + # TODO: fix this + self.use_record_iter_changes = False + else: + self.use_record_iter_changes = ( + self.branch.repository._format.fast_deltas + or (len(self.parents) < 2 and not self.specific_files + and not self.exclude)) **** You've altered this if block substantially, and I think unintentionally. Please just use the old phrase and remove the two lines that you're address. *OR* Update the comment to match what your new if:else: actually does. @@ -632,13 +634,16 @@ def _update_builder_with_changes(self): """Update the commit builder with the data about what has changed. """ - exclude = self.exclude - specific_files = self.specific_files or [] - mutter("Selecting files for commit with filter %s", specific_files) + mutter("Selecting files for commit with filter %s excluding % s", + self.specific_files, self.exclude) self._check_strict() if self.use_record_iter_changes: iter_changes = self.work_tree.iter_changes(self.basis_tree) + if self.specific_files or self.exclude: + iter_changes = tree.filter_iter_changes_by_paths(iter_changes, + self.specific_files, self.exclude, + yield_changed_parents=True) **** I don't think a flag for 'yield_changed_parents' is needed; it smells of YAGNI to me: its a new interface, and our only user wants yield-changed-parents always on. @@ -955,17 +959,16 @@ def _set_specific_file_ids(self): """populate self.specific_file_ids if we will use it.""" - if not self.use_record_iter_changes: - # If provided, ensure the specified files are versioned - if self.specific_files is not None: - # Note: This routine is being called because it raises - # PathNotVersionedError as a side effect of finding the IDs. We - # later use the ids we found as input to the working tree - # inventory iterator, so we only consider those ids rather than - # examining the whole tree again. - # XXX: Dont we have filter_unversioned to do this more - # cheaply? - self.specific_file_ids = tree.find_ids_across_trees( - self.specific_files, [self.basis_tree, self.work_tree]) - else: - self.specific_file_ids = None + # If provided, ensure the specified files are versioned + if self.specific_files is not None: + # Note: This routine is being called because it raises + # PathNotVersionedError as a side effect of finding the IDs. We + # later use the ids we found as input to the working tree + # inventory iterator, so we only consider those ids rather than + # examining the whole tree again. + # XXX: Dont we have filter_unversioned to do this more + # cheaply? + self.specific_file_ids = tree.find_ids_across_trees( + self.specific_files, [self.basis_tree, self.work_tree]) + else: + self.specific_file_ids = None ***** This looks like a regression to me - its duplicating work iter_changes does. Why are you changing this? === modified file 'bzrlib/osutils.py' --- bzrlib/osutils.py 2009-05-07 04:58:58 +0000 +++ bzrlib/osutils.py 2009-05-15 02:31:26 +0000 @@ -853,6 +853,16 @@ return pathjoin(*p) +def parent_directories(filename): + """Return the list of parent directories, deepest first.""" + parents = [] + parts = splitpath(dirname(filename)) + while parts: + parents.append(joinpath(parts)) + parts.pop() + return parents ***** This misses the root directory. === modified file 'bzrlib/tests/test_osutils.py' --- bzrlib/tests/test_osutils.py 2009-05-07 05:08:46 +0000 +++ bzrlib/tests/test_osutils.py 2009-05-15 02:31:27 +0000 @@ -860,6 +860,18 @@ self.assertRaises(errors.BzrError, osutils.splitpath, 'a/../b') +class TestParentDirectories(tests.TestCaseInTempDir): + """Test osutils.parent_directories()""" + + def test_parent_directories(self): + def check(expected, path): + self.assertEqual(expected, osutils.parent_directories(path)) + + check([], 'a') + check(['a'], 'a/b') + check(['a/b', 'a'], 'a/b/c') + + ***** Please don't do this; the helper in this case isn't worth the complexity of having to figure out what -check(...) +check(...) will mean in a review in 6 months. Either: - use a well named helper - just inline the calls - use a for loop - use test parameterisation === modified file 'bzrlib/tests/test_tree.py' --- bzrlib/tests/test_tree.py 2009-03-23 14:59:43 +0000 +++ bzrlib/tests/test_tree.py 2009-05-15 02:31:27 +0000 @@ -23,7 +23,7 @@ tree as _mod_tree, ) from bzrlib.tests import TestCaseWithTransport -from bzrlib.tree import InterTree +from bzrlib.tree import InterTree, filter_iter_changes_by_paths class TestInterTree(TestCaseWithTransport): @@ -417,3 +417,84 @@ self.assertPathToKey(([u''], u'a'), u'a') self.assertPathToKey(([u'a'], u'b'), u'a/b') self.assertPathToKey(([u'a', u'b'], u'c'), u'a/b/c') + + +class TestFilterIterChangesByPaths(TestCaseWithTransport): + """Tests for tree.filter_iter_changes_by_paths().""" ***** Broadly, all the tests below would be much more debuggable as unit tests rather than black box tests. The bit that is hard to know what it means is 'original=self.make_changes_iter()'. I can't tell without running pdb, or writing tests to test the test helper, whether it actually is missing added roots, for instance. I think the selection of test cases is too small. Please be sure to: * test a selected file case where the root is new * test an exclude case where the root is excluded * test a selected + exclude case where the exclude is outside the selected * test a selected + exclude case where the selected file has been moved into the excluded tree (e.g. old has [README, exclude/], new has [exclude/README], selected_files=['README'], excludes=['exclude'] There are a number of other permutations that need testing that aren't tested in your patch. If you need help determining them I'll write them down, but in the interest of getting you unblocked I'm skipping that for now. The code that this is layering on is pretty exhaustively tested, and as we're replacing the use of well tested apis with this new function we need to test it well too. +def filter_iter_changes_by_paths(changes, include_files=None, + exclude_files=None, yield_changed_parents=False): + """Filter the results from iter_changes using paths. + + This decorator is useful for post-processing the output from + Tree.iter_changes(). It may also be used for post-procesing the output + from result-compatible methods such as Inventory.iter_changes() and + PreviewTree.iter_changes(). Note that specific-path filtering should not + have already been applied. + + :param changes: the iterator of changes to filter + :param include_files: paths of files and directories to include or None + for no masking. **** Please use 'selected_files' as the parameter, for consistency, or otherwise describe how people should go from a 'selected_files' set to a 'include_files' set. Are they different, the same, what? Rather than 'no masking', perhaps 'to disable '. + :param exclude_files: paths of files and directories to exclude or None + for no masking. Excludes take precedence over includes. + :param yield_changed_parents: if True, include changed parent directories + of included paths. **** As mentioned above, I don't think we have a use case for 'yield-changed-parents=False'. + """ + if not (include_files or exclude_files): + for change in changes: + yield change **** This can be optimised out - sketch: def foo(iterator, *args): if can_skip_me: return iterator else: return _foo(iterator, *args) + # Find the sets of files to include, additional parents and excludes, + # always including the root in the parents + includes_parents = set(['']) + if include_files: + include_set = osutils.minimum_path_selection(include_files) + if yield_changed_parents: + for include in include_set: + for parent in osutils.parent_directories(include): + if parent not in include_set: + includes_parents.add(parent) + else: + include_set = set(['']) + if exclude_files: + exclude_set = osutils.minimum_path_selection(exclude_files) + else: + exclude_set = set([]) + + for change in changes: + # Decide which path to use + old_path, new_path = change[1] + if new_path is None: + path = old_path + else: + path = new_path + + # Do the filtering + if exclude_set and is_inside_any(exclude_set, path): + continue + elif is_inside_any(include_set, path): + yield change + elif yield_changed_parents and path in includes_parents: + yield change **** Reading this, I'm not sure how you are getting the deltas for the parents if they were not selected by iter changes. Perhaps I"m missing something Anyhow, this is shaping up well I think. (I wish I could say 'review resubmit') review disapprove