Merge lp:~a-s-usov/bzr-fastimport/renaming-tags into lp:bzr-fastimport

Proposed by Alex Usov
Status: Superseded
Proposed branch: lp:~a-s-usov/bzr-fastimport/renaming-tags
Merge into: lp:bzr-fastimport
Diff against target: 223 lines (+110/-7)
4 files modified
cmds.py (+10/-2)
exporter.py (+46/-5)
tests/test_commands.py (+19/-0)
tests/test_exporter.py (+35/-0)
To merge this branch: bzr merge lp:~a-s-usov/bzr-fastimport/renaming-tags
Reviewer Review Type Date Requested Status
Jelmer Vernooij (community) Needs Fixing
Review via email: mp+79106@code.launchpad.net

This proposal has been superseded by a proposal from 2011-10-17.

Description of the change

As we discussed in the ticket -- implements new command-line flag for fast-export:
--rewrite-tag-names

The flag is is switched off by default.

If enabled tags which have incorrect name will be rewritten by replacing all invalid symbols (or substrings like .lock suffix) with underscore.
Some extra care is taken to ensure rewritten names are unique.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

Thanks for the patch. This mostly looks ok; some minor issues:

* two empty lines are necessary before "+class CheckRefnameRewriting(tests.TestCase):" and "+def sanitize_ref_name_for_git(name_dict, refname):", consistent with PEP8.
* The --rewrite-git-tags option needs a test
* Please use underscores rather than camelcasing
* rewrite_dict could be a local variable rather than a class variable

I'm not sure if it's all that useful to try to prevent duplicate tag names, as this is pretty much impossible. Even with your changes there are risks of duplicate tags (e.g. with incremental imports). Likewise, your change may cause tag names to change if another tag is removed.

Revision history for this message
Jelmer Vernooij (jelmer) :
review: Needs Fixing
337. By Jelmer Vernooij

Merge support for --rewrite-tag-names.

Unmerged revisions

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'cmds.py'
--- cmds.py 2011-10-06 00:11:52 +0000
+++ cmds.py 2011-10-17 10:30:29 +0000
@@ -578,6 +578,10 @@
578 future once the feature names and definitions are formally agreed578 future once the feature names and definitions are formally agreed
579 to by the broader fast-import developer community.579 to by the broader fast-import developer community.
580580
581 Git has much stricter naming rules for tags and fast-export --plain
582 will skip tags which can't be imported into git. If you want to rename
583 these tags use --rewrite-tag-names.
584
581 :Examples:585 :Examples:
582586
583 To produce data destined for import into Bazaar::587 To produce data destined for import into Bazaar::
@@ -624,12 +628,15 @@
624 Option('plain',628 Option('plain',
625 help="Exclude metadata to maximise interoperability."629 help="Exclude metadata to maximise interoperability."
626 ),630 ),
631 Option('rewrite-tag-names',
632 help="Rewrite invalid tag names in plain mode."
633 ),
627 ]634 ]
628 encoding_type = 'exact'635 encoding_type = 'exact'
629 def run(self, source, destination=None, verbose=False,636 def run(self, source, destination=None, verbose=False,
630 git_branch="master", checkpoint=10000, marks=None,637 git_branch="master", checkpoint=10000, marks=None,
631 import_marks=None, export_marks=None, revision=None,638 import_marks=None, export_marks=None, revision=None,
632 plain=True):639 plain=True, rewrite_tag_names=False):
633 load_fastimport()640 load_fastimport()
634 from bzrlib.plugins.fastimport import exporter641 from bzrlib.plugins.fastimport import exporter
635642
@@ -639,7 +646,8 @@
639 destination=destination,646 destination=destination,
640 git_branch=git_branch, checkpoint=checkpoint,647 git_branch=git_branch, checkpoint=checkpoint,
641 import_marks_file=import_marks, export_marks_file=export_marks,648 import_marks_file=import_marks, export_marks_file=export_marks,
642 revision=revision, verbose=verbose, plain_format=plain)649 revision=revision, verbose=verbose, plain_format=plain,
650 rewrite_tags=rewrite_tag_names)
643 return exporter.run()651 return exporter.run()
644652
645653
646654
=== modified file 'exporter.py'
--- exporter.py 2011-10-06 00:11:52 +0000
+++ exporter.py 2011-10-17 10:30:29 +0000
@@ -46,7 +46,7 @@
46# set new_git_branch to the previously used name)46# set new_git_branch to the previously used name)
4747
48from email.Utils import parseaddr48from email.Utils import parseaddr
49import sys, time49import sys, time, re
5050
51import bzrlib.branch51import bzrlib.branch
52import bzrlib.revision52import bzrlib.revision
@@ -112,18 +112,52 @@
112 return True112 return True
113113
114114
115def sanitize_ref_name_for_git(refname):
116 """Rewrite refname so that it will be accepted by git-fast-import.
117 For the detailed rules see check_ref_format.
118
119 By rewriting the refname we are breaking uniqueness guarantees provided by bzr
120 so we have to manually
121 verify that resulting ref names are unique.
122
123 :param refname: refname to rewrite
124 :return: new refname
125 """
126 new_refname = re.sub(
127 # '/.' in refname or startswith '.'
128 r"/\.|^\."
129 # '..' in refname
130 r"|\.\."
131 # ord(c) < 040
132 r"|[" + "".join([chr(x) for x in range(040)]) + r"]"
133 # c in '\177 ~^:?*['
134 r"|[\177 ~^:?*[]"
135 # last char in "/."
136 r"|[/.]$"
137 # endswith '.lock'
138 r"|.lock$"
139 # "@{" in refname
140 r"|@{"
141 # "\\" in refname
142 r"|\\",
143 "_", refname)
144 return new_refname
115145
116class BzrFastExporter(object):146class BzrFastExporter(object):
117147
118 def __init__(self, source, destination, git_branch=None, checkpoint=-1,148 def __init__(self, source, destination, git_branch=None, checkpoint=-1,
119 import_marks_file=None, export_marks_file=None, revision=None,149 import_marks_file=None, export_marks_file=None, revision=None,
120 verbose=False, plain_format=False):150 verbose=False, plain_format=False, rewrite_tags=False):
121 """Export branch data in fast import format.151 """Export branch data in fast import format.
122152
123 :param plain_format: if True, 'classic' fast-import format is153 :param plain_format: if True, 'classic' fast-import format is
124 used without any extended features; if False, the generated154 used without any extended features; if False, the generated
125 data is richer and includes information like multiple155 data is richer and includes information like multiple
126 authors, revision properties, etc.156 authors, revision properties, etc.
157
158 :param rewrite_tags: if True tag names will be rewritten to be
159 git-compatible. Otherwise tags which aren't valid for git will
160 be skiped.
127 """161 """
128 self.source = source162 self.source = source
129 self.outf = _get_output_stream(destination)163 self.outf = _get_output_stream(destination)
@@ -134,6 +168,7 @@
134 self.revision = revision168 self.revision = revision
135 self.excluded_revisions = set()169 self.excluded_revisions = set()
136 self.plain_format = plain_format170 self.plain_format = plain_format
171 self.rewrite_tags = rewrite_tags
137 self._multi_author_api_available = hasattr(bzrlib.revision.Revision,172 self._multi_author_api_available = hasattr(bzrlib.revision.Revision,
138 'get_apparent_authors')173 'get_apparent_authors')
139 self.properties_to_exclude = ['authors', 'author']174 self.properties_to_exclude = ['authors', 'author']
@@ -559,9 +594,15 @@
559 else:594 else:
560 git_ref = 'refs/tags/%s' % tag.encode("utf-8")595 git_ref = 'refs/tags/%s' % tag.encode("utf-8")
561 if self.plain_format and not check_ref_format(git_ref):596 if self.plain_format and not check_ref_format(git_ref):
562 self.warning('not creating tag %r as its name would not be '597 if self.rewrite_tags:
563 'valid in git.', git_ref)598 new_ref = sanitize_ref_name_for_git(git_ref)
564 continue599 self.warning('tag %r is exported as %r to be valid in git.',
600 git_ref, new_ref)
601 git_ref = new_ref
602 else:
603 self.warning('not creating tag %r as its name would not be '
604 'valid in git.', git_ref)
605 continue
565 self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))606 self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
566607
567 def _next_tmp_branch_name(self):608 def _next_tmp_branch_name(self):
568609
=== modified file 'tests/test_commands.py'
--- tests/test_commands.py 2011-10-06 00:11:52 +0000
+++ tests/test_commands.py 2011-10-17 10:30:29 +0000
@@ -80,6 +80,25 @@
80 except AttributeError: # bzr < 2.480 except AttributeError: # bzr < 2.4
81 self.failUnlessExists("br.fi")81 self.failUnlessExists("br.fi")
8282
83 def test_tag_rewriting(self):
84 tree = self.make_branch_and_tree("br")
85 tree.commit("pointless")
86 self.assertTrue(tree.branch.supports_tags())
87 rev_id = tree.branch.dotted_revno_to_revision_id((1,))
88 tree.branch.tags.set_tag("goodTag", rev_id)
89 tree.branch.tags.set_tag("bad Tag", rev_id)
90
91 # first check --no-rewrite-tag-names
92 data = self.run_bzr("fast-export --plain --no-rewrite-tag-names br")[0]
93 self.assertNotEqual(-1, data.find("reset refs/tags/goodTag"))
94 self.assertEqual(data.find("reset refs/tags/"), data.rfind("reset refs/tags/"))
95
96 # and now with --rewrite-tag-names
97 data = self.run_bzr("fast-export --plain --rewrite-tag-names br")[0]
98 self.assertNotEqual(-1, data.find("reset refs/tags/goodTag"))
99 # "bad Tag" should be exported as bad_Tag
100 self.assertNotEqual(-1, data.find("reset refs/tags/bad_Tag"))
101
83102
84simple_fast_import_stream = """commit refs/heads/master103simple_fast_import_stream = """commit refs/heads/master
85mark :1104mark :1
86105
=== modified file 'tests/test_exporter.py'
--- tests/test_exporter.py 2011-10-06 00:11:52 +0000
+++ tests/test_exporter.py 2011-10-17 10:30:29 +0000
@@ -24,6 +24,7 @@
24from bzrlib.plugins.fastimport.exporter import (24from bzrlib.plugins.fastimport.exporter import (
25 _get_output_stream,25 _get_output_stream,
26 check_ref_format,26 check_ref_format,
27 sanitize_ref_name_for_git
27 )28 )
2829
29from bzrlib.plugins.fastimport.tests import (30from bzrlib.plugins.fastimport.tests import (
@@ -79,11 +80,45 @@
7980
80 def test_invalid(self):81 def test_invalid(self):
81 self.assertFalse(check_ref_format('foo'))82 self.assertFalse(check_ref_format('foo'))
83 self.assertFalse(check_ref_format('foo/.bar'))
82 self.assertFalse(check_ref_format('heads/foo/'))84 self.assertFalse(check_ref_format('heads/foo/'))
85 self.assertFalse(check_ref_format('heads/foo.'))
83 self.assertFalse(check_ref_format('./foo'))86 self.assertFalse(check_ref_format('./foo'))
84 self.assertFalse(check_ref_format('.refs/foo'))87 self.assertFalse(check_ref_format('.refs/foo'))
85 self.assertFalse(check_ref_format('heads/foo..bar'))88 self.assertFalse(check_ref_format('heads/foo..bar'))
86 self.assertFalse(check_ref_format('heads/foo?bar'))89 self.assertFalse(check_ref_format('heads/foo?bar'))
87 self.assertFalse(check_ref_format('heads/foo.lock'))90 self.assertFalse(check_ref_format('heads/foo.lock'))
88 self.assertFalse(check_ref_format('heads/v@{ation'))91 self.assertFalse(check_ref_format('heads/v@{ation'))
92 self.assertFalse(check_ref_format('heads/foo\\bar'))
89 self.assertFalse(check_ref_format('heads/foo\bar'))93 self.assertFalse(check_ref_format('heads/foo\bar'))
94 self.assertFalse(check_ref_format('heads/foo bar'))
95 self.assertFalse(check_ref_format('heads/foo\020bar'))
96 self.assertFalse(check_ref_format('heads/foo\177bar'))
97
98
99class CheckRefnameRewriting(tests.TestCase):
100 """Tests for sanitize_ref_name_for_git function"""
101
102 def test_passthrough_valid(self):
103 self.assertEqual(sanitize_ref_name_for_git('heads/foo'), 'heads/foo')
104 self.assertEqual(sanitize_ref_name_for_git('foo/bar/baz'), 'foo/bar/baz')
105 self.assertEqual(sanitize_ref_name_for_git('refs///heads/foo'), 'refs///heads/foo')
106 self.assertEqual(sanitize_ref_name_for_git('foo./bar'), 'foo./bar')
107 self.assertEqual(sanitize_ref_name_for_git('heads/foo@bar'), 'heads/foo@bar')
108 self.assertEqual(sanitize_ref_name_for_git('heads/fix.lock.error'), 'heads/fix.lock.error')
109
110 def test_rewrite_invalid(self):
111 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('foo./bar')))
112 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo/')))
113 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo.')))
114 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('./foo')))
115 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('.refs/foo')))
116 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo..bar')))
117 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo?bar')))
118 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo.lock')))
119 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/v@{ation')))
120 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\bar')))
121 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\\bar')))
122 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo bar')))
123 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\020bar')))
124 self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\177bar')))

Subscribers

People subscribed via source and target branches