Merge lp:~a-s-usov/python-fastimport/partial-streams-filtering into lp:~registry/python-fastimport/old

Proposed by Alex Usov
Status: Superseded
Proposed branch: lp:~a-s-usov/python-fastimport/partial-streams-filtering
Merge into: lp:~registry/python-fastimport/old
Diff against target: 301 lines (+240/-5)
2 files modified
fastimport/processors/filter_processor.py (+11/-3)
fastimport/tests/test_filter_processor.py (+229/-2)
To merge this branch: bzr merge lp:~a-s-usov/python-fastimport/partial-streams-filtering
Reviewer Review Type Date Requested Status
Jelmer Vernooij (community) Needs Fixing
Review via email: mp+79988@code.launchpad.net

This proposal has been superseded by a proposal from 2011-10-21.

Description of the change

Adds preserve_all_history flag to filter_processor.
 - prevents it from squashing empty commits
 - prevents it from rewriting/removing 'from' & 'merge' references

Primary usage -- being able to filter incremental stream.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

Hi,

Thanks for working on this.

I think "preserve_all_history" is a confusing name for this new behaviour. You're still changing history, just not removing commits.

Perhaps a better would be "squash_empty_commits", which could default to true?

Can you please also add a test that uses excludes/includes and filters? I suspect that if there are blobs that are removed, this will change the allocation of new ids, and might break the rewriting of from lines.

+ * preserve_all_history - if True filter processeor will be much more conservative
s/processeor/processor/

Revision history for this message
Jelmer Vernooij (jelmer) :
review: Needs Fixing
Revision history for this message
Alex Usov (a-s-usov) wrote :

Will resubmit patch soon.

Regarding allocation of new id's, my understanding it that filter_processor doesn't generates mark id's but uses whatever is present in the stream.

304. By Oleksandr Usov <email address hidden>

Rename flag to squash_empty_commits and add few more tests with includes/excludes

Unmerged revisions

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'fastimport/processors/filter_processor.py'
2--- fastimport/processors/filter_processor.py 2011-10-06 00:24:50 +0000
3+++ fastimport/processors/filter_processor.py 2011-10-21 09:38:25 +0000
4@@ -36,16 +36,23 @@
5
6 * exclude_paths - a list of paths that should not appear in the output
7 stream
8+
9+ * squash_empty_commits - if set to False, filter processor will be much more
10+ conservative w.r.t. history handling -- it will preserve all commits and
11+ links between them, including those to unknown revisions. This is primarily
12+ usefull for filtering incremental streams
13 """
14
15 known_params = [
16 'include_paths',
17 'exclude_paths',
18+ 'squash_empty_commits'
19 ]
20
21 def pre_process(self):
22 self.includes = self.params.get('include_paths')
23 self.excludes = self.params.get('exclude_paths')
24+ self.squash_empty_commits = bool(self.params.get('squash_empty_commits', True))
25 # What's the new root, if any
26 self.new_root = helpers.common_directory(self.includes)
27 # Buffer of blobs until we know we need them: mark -> cmd
28@@ -91,7 +98,7 @@
29 """Process a CommitCommand."""
30 # These pass through if they meet the filtering conditions
31 interesting_filecmds = self._filter_filecommands(cmd.iter_files)
32- if interesting_filecmds:
33+ if interesting_filecmds or not self.squash_empty_commits:
34 # If all we have is a single deleteall, skip this commit
35 if len(interesting_filecmds) == 1 and isinstance(
36 interesting_filecmds[0], commands.FileDeleteAllCommand):
37@@ -109,8 +116,9 @@
38 self.referenced_blobs.append(fc.dataref)
39
40 # Update from and merges to refer to commits in the output
41- cmd.from_ = self._find_interesting_from(cmd.from_)
42- cmd.merges = self._find_interesting_merges(cmd.merges)
43+ if self.squash_empty_commits:
44+ cmd.from_ = self._find_interesting_from(cmd.from_)
45+ cmd.merges = self._find_interesting_merges(cmd.merges)
46 self.interesting_commits.add(cmd.id)
47
48 # Keep track of the parents
49
50=== modified file 'fastimport/tests/test_filter_processor.py'
51--- fastimport/tests/test_filter_processor.py 2011-10-06 00:24:50 +0000
52+++ fastimport/tests/test_filter_processor.py 2011-10-21 09:38:25 +0000
53@@ -102,7 +102,6 @@
54 M 644 :4 doc/index.txt
55 """
56
57-
58 class TestCaseWithFiltering(TestCase):
59
60 def assertFiltering(self, input, params, expected):
61@@ -116,7 +115,6 @@
62 out = outf.getvalue()
63 self.assertEquals(expected, out)
64
65-
66 class TestNoFiltering(TestCaseWithFiltering):
67
68 def test_params_not_given(self):
69@@ -876,3 +874,232 @@
70 reset refs/heads/bar
71 from :101
72 """)
73+
74+
75+# A sample input stream containing empty commit
76+_SAMPLE_EMPTY_COMMIT = \
77+"""blob
78+mark :1
79+data 4
80+foo
81+commit refs/heads/master
82+mark :2
83+committer Joe <joe@example.com> 1234567890 +1000
84+data 14
85+Initial import
86+M 644 :1 COPYING
87+commit refs/heads/master
88+mark :3
89+committer Joe <joe@example.com> 1234567890 +1000
90+data 12
91+empty commit
92+"""
93+
94+# A sample input stream containing unresolved from and merge references
95+_SAMPLE_FROM_MERGE_COMMIT = \
96+"""blob
97+mark :1
98+data 4
99+foo
100+commit refs/heads/master
101+mark :3
102+committer Joe <joe@example.com> 1234567890 +1000
103+data 6
104+import
105+M 644 :1 COPYING
106+blob
107+mark :2
108+data 4
109+bar
110+commit refs/heads/master
111+mark :4
112+committer Joe <joe@example.com> 1234567890 +1000
113+data 19
114+unknown from commit
115+from :999
116+M 644 :2 data/DATA
117+blob
118+mark :99
119+data 4
120+bar
121+commit refs/heads/master
122+mark :5
123+committer Joe <joe@example.com> 1234567890 +1000
124+data 12
125+merge commit
126+from :3
127+merge :4
128+merge :1001
129+M 644 :99 data/DATA2
130+"""
131+
132+class TestSquashEmptyCommitsFlag(TestCaseWithFiltering):
133+
134+ def test_squash_empty_commit(self):
135+ params = {'include_paths': None, 'exclude_paths': None}
136+ self.assertFiltering(_SAMPLE_EMPTY_COMMIT, params, \
137+"""blob
138+mark :1
139+data 4
140+foo
141+commit refs/heads/master
142+mark :2
143+committer Joe <joe@example.com> 1234567890 +1000
144+data 14
145+Initial import
146+M 644 :1 COPYING
147+""")
148+
149+ def test_keep_empty_commit(self):
150+ params = {'include_paths': None, 'exclude_paths': None, 'squash_empty_commits': False}
151+ self.assertFiltering(_SAMPLE_EMPTY_COMMIT, params, _SAMPLE_EMPTY_COMMIT)
152+
153+ def test_squash_unresolved_references(self):
154+ params = {'include_paths': None, 'exclude_paths': None}
155+ self.assertFiltering(_SAMPLE_FROM_MERGE_COMMIT, params, \
156+"""blob
157+mark :1
158+data 4
159+foo
160+commit refs/heads/master
161+mark :3
162+committer Joe <joe@example.com> 1234567890 +1000
163+data 6
164+import
165+M 644 :1 COPYING
166+blob
167+mark :2
168+data 4
169+bar
170+commit refs/heads/master
171+mark :4
172+committer Joe <joe@example.com> 1234567890 +1000
173+data 19
174+unknown from commit
175+M 644 :2 data/DATA
176+blob
177+mark :99
178+data 4
179+bar
180+commit refs/heads/master
181+mark :5
182+committer Joe <joe@example.com> 1234567890 +1000
183+data 12
184+merge commit
185+from :3
186+merge :4
187+M 644 :99 data/DATA2
188+""")
189+
190+ def test_keep_unresolved_from_and_merge(self):
191+ params = {'include_paths': None, 'exclude_paths': None, 'squash_empty_commits': False}
192+ self.assertFiltering(_SAMPLE_FROM_MERGE_COMMIT, params, _SAMPLE_FROM_MERGE_COMMIT)
193+
194+ def test_with_excludes(self):
195+ params = {'include_paths': None,
196+ 'exclude_paths': ['data/DATA'],
197+ 'squash_empty_commits': False}
198+ self.assertFiltering(_SAMPLE_FROM_MERGE_COMMIT, params, \
199+"""blob
200+mark :1
201+data 4
202+foo
203+commit refs/heads/master
204+mark :3
205+committer Joe <joe@example.com> 1234567890 +1000
206+data 6
207+import
208+M 644 :1 COPYING
209+commit refs/heads/master
210+mark :4
211+committer Joe <joe@example.com> 1234567890 +1000
212+data 19
213+unknown from commit
214+from :999
215+blob
216+mark :99
217+data 4
218+bar
219+commit refs/heads/master
220+mark :5
221+committer Joe <joe@example.com> 1234567890 +1000
222+data 12
223+merge commit
224+from :3
225+merge :4
226+merge :1001
227+M 644 :99 data/DATA2
228+""")
229+
230+ def test_with_file_includes(self):
231+ params = {'include_paths': ['COPYING', 'data/DATA2'],
232+ 'exclude_paths': None,
233+ 'squash_empty_commits': False}
234+ self.assertFiltering(_SAMPLE_FROM_MERGE_COMMIT, params, \
235+"""blob
236+mark :1
237+data 4
238+foo
239+commit refs/heads/master
240+mark :3
241+committer Joe <joe@example.com> 1234567890 +1000
242+data 6
243+import
244+M 644 :1 COPYING
245+commit refs/heads/master
246+mark :4
247+committer Joe <joe@example.com> 1234567890 +1000
248+data 19
249+unknown from commit
250+from :999
251+blob
252+mark :99
253+data 4
254+bar
255+commit refs/heads/master
256+mark :5
257+committer Joe <joe@example.com> 1234567890 +1000
258+data 12
259+merge commit
260+from :3
261+merge :4
262+merge :1001
263+M 644 :99 data/DATA2
264+"""
265+)
266+
267+ def test_with_directory_includes(self):
268+ params = {'include_paths': ['data/'],
269+ 'exclude_paths': None,
270+ 'squash_empty_commits': False}
271+ self.assertFiltering(_SAMPLE_FROM_MERGE_COMMIT, params, \
272+"""commit refs/heads/master
273+mark :3
274+committer Joe <joe@example.com> 1234567890 +1000
275+data 6
276+import
277+blob
278+mark :2
279+data 4
280+bar
281+commit refs/heads/master
282+mark :4
283+committer Joe <joe@example.com> 1234567890 +1000
284+data 19
285+unknown from commit
286+from :999
287+M 644 :2 DATA
288+blob
289+mark :99
290+data 4
291+bar
292+commit refs/heads/master
293+mark :5
294+committer Joe <joe@example.com> 1234567890 +1000
295+data 12
296+merge commit
297+from :3
298+merge :4
299+merge :1001
300+M 644 :99 DATA2
301+""")

Subscribers

People subscribed via source and target branches