Merge lp:~vila/bzr/106898-put-bytes-raises-TypeError into lp:bzr

Proposed by Vincent Ladeuil
Status: Merged
Approved by: Vincent Ladeuil
Approved revision: no longer in the source branch.
Merged at revision: 6613
Proposed branch: lp:~vila/bzr/106898-put-bytes-raises-TypeError
Merge into: lp:bzr
Diff against target: 289 lines (+51/-73)
9 files modified
bzrlib/tests/per_transport.py (+2/-23)
bzrlib/tests/test_smart_transport.py (+1/-1)
bzrlib/transport/__init__.py (+14/-14)
bzrlib/transport/local.py (+7/-5)
bzrlib/transport/memory.py (+4/-10)
bzrlib/transport/readonly.py (+1/-1)
bzrlib/transport/remote.py (+11/-16)
bzrlib/transport/sftp.py (+7/-3)
doc/en/release-notes/bzr-2.7.txt (+4/-0)
To merge this branch: bzr merge lp:~vila/bzr/106898-put-bytes-raises-TypeError
Reviewer Review Type Date Requested Status
Richard Wilbur Needs Fixing
Review via email: mp+283522@code.launchpad.net

Commit message

Make all transport put_bytes() raises TypeError when given unicode strings rather than bytes

Description of the change

While looking at https://bugs.launchpad.net/bzr/+bug/1524066 (which is a paramiko issue starting with version 1.16), some other failures were mentioned for per_transport.TransportTests.test_put_file_unicode.

These failures also occur on ubuntu wily and xenial.

From there I came across https://bugs.launchpad.net/bzr/+bug/106898 which was indeed proposing the right fix for test_put_bytes_unicode() and implementing it, I realize that test_put_file_unicode() was just bogus: it's the only place in the code base that try to force a file content to be unicode when we take care everywhere else to treat a file content as bytes.

To post a comment you must log in.
Revision history for this message
Richard Wilbur (richard-wilbur) wrote :

Looks like a valuable realignment of the code to be more consistent. This should also serve us well as we prepare to take on python 3 bytes.

I've suggested a few places where it looks like the parameter documentation could be updated to reflect the changes.

Other than that, +2.

review: Needs Fixing
Revision history for this message
Vincent Ladeuil (vila) wrote :

Good catches ! Thanks ! Fixed.

Revision history for this message
Vincent Ladeuil (vila) wrote :

sent to pqm by email

Revision history for this message
Vincent Ladeuil (vila) wrote :

sent to pqm by email

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bzrlib/tests/per_transport.py'
2--- bzrlib/tests/per_transport.py 2015-10-23 09:05:09 +0000
3+++ bzrlib/tests/per_transport.py 2016-01-22 08:02:39 +0000
4@@ -1,4 +1,4 @@
5-# Copyright (C) 2005-2011, 2015 Canonical Ltd
6+# Copyright (C) 2005-2011, 2016 Canonical Ltd
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10@@ -520,32 +520,11 @@
11 self.assertTransportMode(t, 'dir777', 0777)
12
13 def test_put_bytes_unicode(self):
14- # Expect put_bytes to raise AssertionError or UnicodeEncodeError if
15- # given unicode "bytes". UnicodeEncodeError doesn't really make sense
16- # (we don't want to encode unicode here at all, callers should be
17- # strictly passing bytes to put_bytes), but we allow it for backwards
18- # compatibility. At some point we should use a specific exception.
19- # See https://bugs.launchpad.net/bzr/+bug/106898.
20 t = self.get_transport()
21 if t.is_readonly():
22 return
23 unicode_string = u'\u1234'
24- self.assertRaises(
25- (AssertionError, UnicodeEncodeError),
26- t.put_bytes, 'foo', unicode_string)
27-
28- def test_put_file_unicode(self):
29- # Like put_bytes, except with a StringIO.StringIO of a unicode string.
30- # This situation can happen (and has) if code is careless about the type
31- # of "string" they initialise/write to a StringIO with. We cannot use
32- # cStringIO, because it never returns unicode from read.
33- # Like put_bytes, UnicodeEncodeError isn't quite the right exception to
34- # raise, but we raise it for hysterical raisins.
35- t = self.get_transport()
36- if t.is_readonly():
37- return
38- unicode_file = pyStringIO(u'\u1234')
39- self.assertRaises(UnicodeEncodeError, t.put_file, 'foo', unicode_file)
40+ self.assertRaises(TypeError, t.put_bytes, 'foo', unicode_string)
41
42 def test_mkdir(self):
43 t = self.get_transport()
44
45=== modified file 'bzrlib/tests/test_smart_transport.py'
46--- bzrlib/tests/test_smart_transport.py 2015-10-23 09:05:09 +0000
47+++ bzrlib/tests/test_smart_transport.py 2016-01-22 08:02:39 +0000
48@@ -1,4 +1,4 @@
49-# Copyright (C) 2006-2015 Canonical Ltd
50+# Copyright (C) 2006-2014, 2016 Canonical Ltd
51 #
52 # This program is free software; you can redistribute it and/or modify
53 # it under the terms of the GNU General Public License as published by
54
55=== modified file 'bzrlib/transport/__init__.py'
56--- bzrlib/transport/__init__.py 2012-03-13 17:25:29 +0000
57+++ bzrlib/transport/__init__.py 2016-01-22 08:02:39 +0000
58@@ -1,4 +1,4 @@
59-# Copyright (C) 2005-2011 Canonical Ltd
60+# Copyright (C) 2005-2012, 2016 Canonical Ltd
61 #
62 # This program is free software; you can redistribute it and/or modify
63 # it under the terms of the GNU General Public License as published by
64@@ -873,21 +873,21 @@
65 yield self.get(relpath)
66 count += 1
67
68- def put_bytes(self, relpath, bytes, mode=None):
69+ def put_bytes(self, relpath, raw_bytes, mode=None):
70 """Atomically put the supplied bytes into the given location.
71
72 :param relpath: The location to put the contents, relative to the
73 transport base.
74- :param bytes: A bytestring of data.
75+ :param raw_bytes: A bytestring of data.
76 :param mode: Create the file with the given mode.
77 :return: None
78 """
79- if not isinstance(bytes, str):
80- raise AssertionError(
81- 'bytes must be a plain string, not %s' % type(bytes))
82- return self.put_file(relpath, StringIO(bytes), mode=mode)
83+ if not isinstance(raw_bytes, str):
84+ raise TypeError(
85+ 'raw_bytes must be a plain string, not %s' % type(raw_bytes))
86+ return self.put_file(relpath, StringIO(raw_bytes), mode=mode)
87
88- def put_bytes_non_atomic(self, relpath, bytes, mode=None,
89+ def put_bytes_non_atomic(self, relpath, raw_bytes, mode=None,
90 create_parent_dir=False,
91 dir_mode=None):
92 """Copy the string into the target location.
93@@ -896,8 +896,8 @@
94 Transport.put_bytes_non_atomic for more information.
95
96 :param relpath: The remote location to put the contents.
97- :param bytes: A string object containing the raw bytes to write into
98- the target file.
99+ :param raw_bytes: A string object containing the raw bytes to write
100+ into the target file.
101 :param mode: Possible access permissions for new file.
102 None means do not set remote permissions.
103 :param create_parent_dir: If we cannot create the target file because
104@@ -905,10 +905,10 @@
105 create it, and then try again.
106 :param dir_mode: Possible access permissions for new directories.
107 """
108- if not isinstance(bytes, str):
109- raise AssertionError(
110- 'bytes must be a plain string, not %s' % type(bytes))
111- self.put_file_non_atomic(relpath, StringIO(bytes), mode=mode,
112+ if not isinstance(raw_bytes, str):
113+ raise TypeError(
114+ 'raw_bytes must be a plain string, not %s' % type(raw_bytes))
115+ self.put_file_non_atomic(relpath, StringIO(raw_bytes), mode=mode,
116 create_parent_dir=create_parent_dir,
117 dir_mode=dir_mode)
118
119
120=== modified file 'bzrlib/transport/local.py'
121--- bzrlib/transport/local.py 2012-03-13 17:25:29 +0000
122+++ bzrlib/transport/local.py 2016-01-22 08:02:39 +0000
123@@ -1,4 +1,4 @@
124-# Copyright (C) 2005-2011 Canonical Ltd
125+# Copyright (C) 2005-2012, 2016 Canonical Ltd
126 #
127 # This program is free software; you can redistribute it and/or modify
128 # it under the terms of the GNU General Public License as published by
129@@ -184,13 +184,15 @@
130 fp.close()
131 return length
132
133- def put_bytes(self, relpath, bytes, mode=None):
134+ def put_bytes(self, relpath, raw_bytes, mode=None):
135 """Copy the string into the location.
136
137 :param relpath: Location to put the contents, relative to base.
138- :param bytes: String
139+ :param raw_bytes: String
140 """
141-
142+ if not isinstance(raw_bytes, str):
143+ raise TypeError(
144+ 'raw_bytes must be a plain string, not %s' % type(raw_bytes))
145 path = relpath
146 try:
147 path = self._abspath(relpath)
148@@ -200,7 +202,7 @@
149 self._translate_error(e, path)
150 try:
151 if bytes:
152- fp.write(bytes)
153+ fp.write(raw_bytes)
154 fp.commit()
155 finally:
156 fp.close()
157
158=== modified file 'bzrlib/transport/memory.py'
159--- bzrlib/transport/memory.py 2011-12-19 13:23:58 +0000
160+++ bzrlib/transport/memory.py 2016-01-22 08:02:39 +0000
161@@ -1,4 +1,4 @@
162-# Copyright (C) 2005-2010 Canonical Ltd
163+# Copyright (C) 2005-2011, 2016 Canonical Ltd
164 #
165 # This program is free software; you can redistribute it and/or modify
166 # it under the terms of the GNU General Public License as published by
167@@ -148,15 +148,9 @@
168 """See Transport.put_file()."""
169 _abspath = self._abspath(relpath)
170 self._check_parent(_abspath)
171- bytes = f.read()
172- if type(bytes) is not str:
173- # Although not strictly correct, we raise UnicodeEncodeError to be
174- # compatible with other transports.
175- raise UnicodeEncodeError(
176- 'undefined', bytes, 0, 1,
177- 'put_file must be given a file of bytes, not unicode.')
178- self._files[_abspath] = (bytes, mode)
179- return len(bytes)
180+ raw_bytes = f.read()
181+ self._files[_abspath] = (raw_bytes, mode)
182+ return len(raw_bytes)
183
184 def mkdir(self, relpath, mode=None):
185 """See Transport.mkdir()."""
186
187=== modified file 'bzrlib/transport/readonly.py'
188--- bzrlib/transport/readonly.py 2015-10-23 09:05:09 +0000
189+++ bzrlib/transport/readonly.py 2016-01-22 08:02:39 +0000
190@@ -1,4 +1,4 @@
191-# Copyright (C) 2006, 2007, 2009, 2010, 2011, 2015 Canonical Ltd
192+# Copyright (C) 2006, 2007, 2009, 2010, 2011, 2016 Canonical Ltd
193 #
194 # This program is free software; you can redistribute it and/or modify
195 # it under the terms of the GNU General Public License as published by
196
197=== modified file 'bzrlib/transport/remote.py'
198--- bzrlib/transport/remote.py 2012-03-13 17:25:29 +0000
199+++ bzrlib/transport/remote.py 2016-01-22 08:02:39 +0000
200@@ -1,4 +1,4 @@
201-# Copyright (C) 2006-2010 Canonical Ltd
202+# Copyright (C) 2006-2012, 2016 Canonical Ltd
203 #
204 # This program is free software; you can redistribute it and/or modify
205 # it under the terms of the GNU General Public License as published by
206@@ -247,23 +247,18 @@
207 transport._file_streams[self.abspath(relpath)] = result
208 return result
209
210- def put_bytes(self, relpath, upload_contents, mode=None):
211- # FIXME: upload_file is probably not safe for non-ascii characters -
212- # should probably just pass all parameters as length-delimited
213- # strings?
214- if type(upload_contents) is unicode:
215- # Although not strictly correct, we raise UnicodeEncodeError to be
216- # compatible with other transports.
217- raise UnicodeEncodeError(
218- 'undefined', upload_contents, 0, 1,
219- 'put_bytes must be given bytes, not unicode.')
220- resp = self._call_with_body_bytes('put',
221+ def put_bytes(self, relpath, raw_bytes, mode=None):
222+ if not isinstance(raw_bytes, str):
223+ raise TypeError(
224+ 'raw_bytes must be a plain string, not %s' % type(raw_bytes))
225+ resp = self._call_with_body_bytes(
226+ 'put',
227 (self._remote_path(relpath), self._serialise_optional_mode(mode)),
228- upload_contents)
229+ raw_bytes)
230 self._ensure_ok(resp)
231- return len(upload_contents)
232+ return len(raw_bytes)
233
234- def put_bytes_non_atomic(self, relpath, bytes, mode=None,
235+ def put_bytes_non_atomic(self, relpath, raw_bytes, mode=None,
236 create_parent_dir=False,
237 dir_mode=None):
238 """See Transport.put_bytes_non_atomic."""
239@@ -276,7 +271,7 @@
240 'put_non_atomic',
241 (self._remote_path(relpath), self._serialise_optional_mode(mode),
242 create_parent_str, self._serialise_optional_mode(dir_mode)),
243- bytes)
244+ raw_bytes)
245 self._ensure_ok(resp)
246
247 def put_file(self, relpath, upload_file, mode=None):
248
249=== modified file 'bzrlib/transport/sftp.py'
250--- bzrlib/transport/sftp.py 2011-12-19 13:23:58 +0000
251+++ bzrlib/transport/sftp.py 2016-01-22 08:02:39 +0000
252@@ -1,4 +1,4 @@
253-# Copyright (C) 2005-2010 Canonical Ltd
254+# Copyright (C) 2005-2011, 2016 Canonical Ltd
255 #
256 # This program is free software; you can redistribute it and/or modify
257 # it under the terms of the GNU General Public License as published by
258@@ -593,11 +593,15 @@
259 create_parent_dir=create_parent_dir,
260 dir_mode=dir_mode)
261
262- def put_bytes_non_atomic(self, relpath, bytes, mode=None,
263+ def put_bytes_non_atomic(self, relpath, raw_bytes, mode=None,
264 create_parent_dir=False,
265 dir_mode=None):
266+ if not isinstance(raw_bytes, str):
267+ raise TypeError(
268+ 'raw_bytes must be a plain string, not %s' % type(raw_bytes))
269+
270 def writer(fout):
271- fout.write(bytes)
272+ fout.write(raw_bytes)
273 self._put_non_atomic_helper(relpath, writer, mode=mode,
274 create_parent_dir=create_parent_dir,
275 dir_mode=dir_mode)
276
277=== modified file 'doc/en/release-notes/bzr-2.7.txt'
278--- doc/en/release-notes/bzr-2.7.txt 2016-01-21 21:44:58 +0000
279+++ doc/en/release-notes/bzr-2.7.txt 2016-01-22 08:02:39 +0000
280@@ -66,6 +66,10 @@
281 .. Major internal changes, unlikely to be visible to users or plugin
282 developers, but interesting for bzr developers.
283
284+* Make all transport put_bytes() raises TypeError instead of AssertionError
285+ or UnicodeEncodeError when given unicode strings rather than bytes.
286+ (Vincent Ladeuil, #106898)
287+
288 Changed Behaviour
289 *****************
290