Merge lp:~a-s-usov/bzr-fastimport/renaming-tags into lp:bzr-fastimport

Proposed by Alex Usov
Status: Superseded
Proposed branch: lp:~a-s-usov/bzr-fastimport/renaming-tags
Merge into: lp:bzr-fastimport
Diff against target: 223 lines (+110/-7)
4 files modified
cmds.py (+10/-2)
exporter.py (+46/-5)
tests/test_commands.py (+19/-0)
tests/test_exporter.py (+35/-0)
To merge this branch: bzr merge lp:~a-s-usov/bzr-fastimport/renaming-tags
Reviewer Review Type Date Requested Status
Jelmer Vernooij (community) Needs Fixing
Review via email: mp+79106@code.launchpad.net

This proposal has been superseded by a proposal from 2011-10-17.

Description of the change

As we discussed in the ticket -- implements new command-line flag for fast-export:
--rewrite-tag-names

The flag is is switched off by default.

If enabled tags which have incorrect name will be rewritten by replacing all invalid symbols (or substrings like .lock suffix) with underscore.
Some extra care is taken to ensure rewritten names are unique.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

Thanks for the patch. This mostly looks ok; some minor issues:

* two empty lines are necessary before "+class CheckRefnameRewriting(tests.TestCase):" and "+def sanitize_ref_name_for_git(name_dict, refname):", consistent with PEP8.
* The --rewrite-git-tags option needs a test
* Please use underscores rather than camelcasing
* rewrite_dict could be a local variable rather than a class variable

I'm not sure if it's all that useful to try to prevent duplicate tag names, as this is pretty much impossible. Even with your changes there are risks of duplicate tags (e.g. with incremental imports). Likewise, your change may cause tag names to change if another tag is removed.

Revision history for this message
Jelmer Vernooij (jelmer) :
review: Needs Fixing
337. By Jelmer Vernooij

Merge support for --rewrite-tag-names.

Unmerged revisions

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'cmds.py'
2--- cmds.py 2011-10-06 00:11:52 +0000
3+++ cmds.py 2011-10-17 10:30:29 +0000
4@@ -578,6 +578,10 @@
5 future once the feature names and definitions are formally agreed
6 to by the broader fast-import developer community.
7
8+ Git has much stricter naming rules for tags and fast-export --plain
9+ will skip tags which can't be imported into git. If you want to rename
10+ these tags use --rewrite-tag-names.
11+
12 :Examples:
13
14 To produce data destined for import into Bazaar::
15@@ -624,12 +628,15 @@
16 Option('plain',
17 help="Exclude metadata to maximise interoperability."
18 ),
19+ Option('rewrite-tag-names',
20+ help="Rewrite invalid tag names in plain mode."
21+ ),
22 ]
23 encoding_type = 'exact'
24 def run(self, source, destination=None, verbose=False,
25 git_branch="master", checkpoint=10000, marks=None,
26 import_marks=None, export_marks=None, revision=None,
27- plain=True):
28+ plain=True, rewrite_tag_names=False):
29 load_fastimport()
30 from bzrlib.plugins.fastimport import exporter
31
32@@ -639,7 +646,8 @@
33 destination=destination,
34 git_branch=git_branch, checkpoint=checkpoint,
35 import_marks_file=import_marks, export_marks_file=export_marks,
36- revision=revision, verbose=verbose, plain_format=plain)
37+ revision=revision, verbose=verbose, plain_format=plain,
38+ rewrite_tags=rewrite_tag_names)
39 return exporter.run()
40
41
42
43=== modified file 'exporter.py'
44--- exporter.py 2011-10-06 00:11:52 +0000
45+++ exporter.py 2011-10-17 10:30:29 +0000
46@@ -46,7 +46,7 @@
47 # set new_git_branch to the previously used name)
48
49 from email.Utils import parseaddr
50-import sys, time
51+import sys, time, re
52
53 import bzrlib.branch
54 import bzrlib.revision
55@@ -112,18 +112,52 @@
56 return True
57
58
59+def sanitize_ref_name_for_git(refname):
60+ """Rewrite refname so that it will be accepted by git-fast-import.
61+ For the detailed rules see check_ref_format.
62+
63+ By rewriting the refname we are breaking uniqueness guarantees provided by bzr
64+ so we have to manually
65+ verify that resulting ref names are unique.
66+
67+ :param refname: refname to rewrite
68+ :return: new refname
69+ """
70+ new_refname = re.sub(
71+ # '/.' in refname or startswith '.'
72+ r"/\.|^\."
73+ # '..' in refname
74+ r"|\.\."
75+ # ord(c) < 040
76+ r"|[" + "".join([chr(x) for x in range(040)]) + r"]"
77+ # c in '\177 ~^:?*['
78+ r"|[\177 ~^:?*[]"
79+ # last char in "/."
80+ r"|[/.]$"
81+ # endswith '.lock'
82+ r"|.lock$"
83+ # "@{" in refname
84+ r"|@{"
85+ # "\\" in refname
86+ r"|\\",
87+ "_", refname)
88+ return new_refname
89
90 class BzrFastExporter(object):
91
92 def __init__(self, source, destination, git_branch=None, checkpoint=-1,
93 import_marks_file=None, export_marks_file=None, revision=None,
94- verbose=False, plain_format=False):
95+ verbose=False, plain_format=False, rewrite_tags=False):
96 """Export branch data in fast import format.
97
98 :param plain_format: if True, 'classic' fast-import format is
99 used without any extended features; if False, the generated
100 data is richer and includes information like multiple
101 authors, revision properties, etc.
102+
103+ :param rewrite_tags: if True tag names will be rewritten to be
104+ git-compatible. Otherwise tags which aren't valid for git will
105+ be skiped.
106 """
107 self.source = source
108 self.outf = _get_output_stream(destination)
109@@ -134,6 +168,7 @@
110 self.revision = revision
111 self.excluded_revisions = set()
112 self.plain_format = plain_format
113+ self.rewrite_tags = rewrite_tags
114 self._multi_author_api_available = hasattr(bzrlib.revision.Revision,
115 'get_apparent_authors')
116 self.properties_to_exclude = ['authors', 'author']
117@@ -559,9 +594,15 @@
118 else:
119 git_ref = 'refs/tags/%s' % tag.encode("utf-8")
120 if self.plain_format and not check_ref_format(git_ref):
121- self.warning('not creating tag %r as its name would not be '
122- 'valid in git.', git_ref)
123- continue
124+ if self.rewrite_tags:
125+ new_ref = sanitize_ref_name_for_git(git_ref)
126+ self.warning('tag %r is exported as %r to be valid in git.',
127+ git_ref, new_ref)
128+ git_ref = new_ref
129+ else:
130+ self.warning('not creating tag %r as its name would not be '
131+ 'valid in git.', git_ref)
132+ continue
133 self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
134
135 def _next_tmp_branch_name(self):
136
137=== modified file 'tests/test_commands.py'
138--- tests/test_commands.py 2011-10-06 00:11:52 +0000
139+++ tests/test_commands.py 2011-10-17 10:30:29 +0000
140@@ -80,6 +80,25 @@
141 except AttributeError: # bzr < 2.4
142 self.failUnlessExists("br.fi")
143
144+ def test_tag_rewriting(self):
145+ tree = self.make_branch_and_tree("br")
146+ tree.commit("pointless")
147+ self.assertTrue(tree.branch.supports_tags())
148+ rev_id = tree.branch.dotted_revno_to_revision_id((1,))
149+ tree.branch.tags.set_tag("goodTag", rev_id)
150+ tree.branch.tags.set_tag("bad Tag", rev_id)
151+
152+ # first check --no-rewrite-tag-names
153+ data = self.run_bzr("fast-export --plain --no-rewrite-tag-names br")[0]
154+ self.assertNotEqual(-1, data.find("reset refs/tags/goodTag"))
155+ self.assertEqual(data.find("reset refs/tags/"), data.rfind("reset refs/tags/"))
156+
157+ # and now with --rewrite-tag-names
158+ data = self.run_bzr("fast-export --plain --rewrite-tag-names br")[0]
159+ self.assertNotEqual(-1, data.find("reset refs/tags/goodTag"))
160+ # "bad Tag" should be exported as bad_Tag
161+ self.assertNotEqual(-1, data.find("reset refs/tags/bad_Tag"))
162+
163
164 simple_fast_import_stream = """commit refs/heads/master
165 mark :1
166
167=== modified file 'tests/test_exporter.py'
168--- tests/test_exporter.py 2011-10-06 00:11:52 +0000
169+++ tests/test_exporter.py 2011-10-17 10:30:29 +0000
170@@ -24,6 +24,7 @@
171 from bzrlib.plugins.fastimport.exporter import (
172 _get_output_stream,
173 check_ref_format,
174+ sanitize_ref_name_for_git
175 )
176
177 from bzrlib.plugins.fastimport.tests import (
178@@ -79,11 +80,45 @@
179
180 def test_invalid(self):
181 self.assertFalse(check_ref_format('foo'))
182+ self.assertFalse(check_ref_format('foo/.bar'))
183 self.assertFalse(check_ref_format('heads/foo/'))
184+ self.assertFalse(check_ref_format('heads/foo.'))
185 self.assertFalse(check_ref_format('./foo'))
186 self.assertFalse(check_ref_format('.refs/foo'))
187 self.assertFalse(check_ref_format('heads/foo..bar'))
188 self.assertFalse(check_ref_format('heads/foo?bar'))
189 self.assertFalse(check_ref_format('heads/foo.lock'))
190 self.assertFalse(check_ref_format('heads/v@{ation'))
191+ self.assertFalse(check_ref_format('heads/foo\\bar'))
192 self.assertFalse(check_ref_format('heads/foo\bar'))
193+ self.assertFalse(check_ref_format('heads/foo bar'))
194+ self.assertFalse(check_ref_format('heads/foo\020bar'))
195+ self.assertFalse(check_ref_format('heads/foo\177bar'))
196+
197+
198+class CheckRefnameRewriting(tests.TestCase):
199+ """Tests for sanitize_ref_name_for_git function"""
200+
201+ def test_passthrough_valid(self):
202+ self.assertEqual(sanitize_ref_name_for_git('heads/foo'), 'heads/foo')
203+ self.assertEqual(sanitize_ref_name_for_git('foo/bar/baz'), 'foo/bar/baz')
204+ self.assertEqual(sanitize_ref_name_for_git('refs///heads/foo'), 'refs///heads/foo')
205+ self.assertEqual(sanitize_ref_name_for_git('foo./bar'), 'foo./bar')
206+ self.assertEqual(sanitize_ref_name_for_git('heads/foo@bar'), 'heads/foo@bar')
207+ self.assertEqual(sanitize_ref_name_for_git('heads/fix.lock.error'), 'heads/fix.lock.error')
208+
209+ def test_rewrite_invalid(self):
210+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('foo./bar')))
211+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo/')))
212+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo.')))
213+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('./foo')))
214+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('.refs/foo')))
215+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo..bar')))
216+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo?bar')))
217+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo.lock')))
218+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/v@{ation')))
219+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\bar')))
220+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\\bar')))
221+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo bar')))
222+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\020bar')))
223+ self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\177bar')))

Subscribers

People subscribed via source and target branches