1
=== modified file 'bzrlib/tests/__init__.py'
2
--- bzrlib/tests/__init__.py	2011-05-13 12:51:05 +0000
3
+++ bzrlib/tests/__init__.py	2011-05-14 15:01:51 +0000
4
@@ -3899,6 +3899,7 @@
5
3899
        'bzrlib.tests.test_upgrade',
3899
        'bzrlib.tests.test_upgrade',
6
3900
        'bzrlib.tests.test_upgrade_stacked',
3900
        'bzrlib.tests.test_upgrade_stacked',
7
3901
        'bzrlib.tests.test_urlutils',
3901
        'bzrlib.tests.test_urlutils',
8
3902
        'bzrlib.tests.test_utextwrap',
9
3902
        'bzrlib.tests.test_version',
3903
        'bzrlib.tests.test_version',
10
3903
        'bzrlib.tests.test_version_info',
3904
        'bzrlib.tests.test_version_info',
11
3904
        'bzrlib.tests.test_versionedfile',
3905
        'bzrlib.tests.test_versionedfile',
12
3905
3906
13
=== added file 'bzrlib/tests/test_utextwrap.py'
14
--- bzrlib/tests/test_utextwrap.py	1970-01-01 00:00:00 +0000
15
+++ bzrlib/tests/test_utextwrap.py	2011-05-14 15:01:51 +0000
16
@@ -0,0 +1,207 @@
17
1
# Copyright (C) 2011 Canonical Ltd
18
2
#
19
3
# This program is free software; you can redistribute it and/or modify
20
4
# it under the terms of the GNU General Public License as published by
21
5
# the Free Software Foundation; either version 2 of the License, or
22
6
# (at your option) any later version.
23
7
#
24
8
# This program is distributed in the hope that it will be useful,
25
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
26
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27
11
# GNU General Public License for more details.
28
12
#
29
13
# You should have received a copy of the GNU General Public License
30
14
# along with this program; if not, write to the Free Software
31
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
32
16
#
33
17
34
18
"""Tests of the bzrlib.utextwrap."""
35
19
36
20
from bzrlib import tests, utextwrap
37
21
from bzrlib.tests import TestSkipped
38
22
39
23
# Japanese "Good morning".
40
24
# Each character have double width. So total 8 width on console.
41
25
_str_D = u'\u304a\u306f\u3088\u3046'
42
26
43
27
_str_S = u"hello"
44
28
45
29
# Combine single width characters and double width characters.
46
30
_str_SD = _str_S + _str_D
47
31
_str_DS = _str_D + _str_S
48
32
49
33
class TestUTextWrap(tests.TestCase):
50
34
51
35
    def check_width(self, text, expected_width):
52
36
        w = utextwrap.UTextWrapper()
53
37
        self.assertEqual(
54
38
                w._width(text),
55
39
                expected_width,
56
40
                "Width of %r should be %d" % (text, expected_width))
57
41
58
42
    def test_width(self):
59
43
        self.check_width(_str_D, 8)
60
44
        self.check_width(_str_SD, 13)
61
45
62
46
    def check_cut(self, text, width, pos):
63
47
        w = utextwrap.UTextWrapper()
64
48
        self.assertEqual((text[:pos], text[pos:]), w._cut(text, width))
65
49
66
50
    def test_cut(self):
67
51
        s = _str_SD
68
52
        self.check_cut(s, 0, 0)
69
53
        self.check_cut(s, 1, 1)
70
54
        self.check_cut(s, 5, 5)
71
55
        self.check_cut(s, 6, 5)
72
56
        self.check_cut(s, 7, 6)
73
57
        self.check_cut(s, 12, 8)
74
58
        self.check_cut(s, 13, 9)
75
59
        self.check_cut(s, 14, 9)
76
60
        self.check_cut(u'A'*5, 3, 3)
77
61
78
62
    def test_split(self):
79
63
        w = utextwrap.UTextWrapper()
80
64
        self.assertEqual(list(_str_D), w._split(_str_D))
81
65
        self.assertEqual([_str_S]+list(_str_D), w._split(_str_SD))
82
66
        self.assertEqual(list(_str_D)+[_str_S], w._split(_str_DS))
83
67
84
68
    def test_wrap(self):
85
69
        self.assertEqual(list(_str_D), utextwrap.wrap(_str_D, 1))
86
70
        self.assertEqual(list(_str_D), utextwrap.wrap(_str_D, 2))
87
71
        self.assertEqual(list(_str_D), utextwrap.wrap(_str_D, 3))
88
72
        self.assertEqual(list(_str_D),
89
73
                         utextwrap.wrap(_str_D, 3, break_long_words=False))
90
74
91
75
class TestUTextFill(tests.TestCase):
92
76
93
77
    def test_fill_simple(self):
94
78
        # Test only can call fill() because it's just '\n'.join(wrap(text)).
95
79
        self.assertEqual("%s\n%s" % (_str_D[:2], _str_D[2:]),
96
80
                         utextwrap.fill(_str_D, 4))
97
81
98
82
    def test_fill_with_breaks(self):
99
83
        # Demonstrate complicated case.
100
84
        text = u"spam ham egg spamhamegg" + _str_D + u" spam" + _str_D*2
101
85
        self.assertEqual(u'\n'.join(["spam ham",
102
86
                                     "egg spam",
103
87
                                     "hamegg" + _str_D[0],
104
88
                                     _str_D[1:],
105
89
                                     "spam" + _str_D[:2],
106
90
                                     _str_D[2:]+_str_D[:2],
107
91
                                     _str_D[2:]]),
108
92
                         utextwrap.fill(text, 8))
109
93
110
94
    def test_fill_without_breaks(self):
111
95
        text = u"spam ham egg spamhamegg" + _str_D + u" spam" + _str_D*2
112
96
        self.assertEqual(u'\n'.join(["spam ham",
113
97
                                     "egg",
114
98
                                     "spamhamegg", 
115
99
                                     # border between single width and double
116
100
                                     # width.
117
101
                                     _str_D,
118
102
                                     "spam" + _str_D[:2],
119
103
                                     _str_D[2:]+_str_D[:2],
120
104
                                     _str_D[2:]]),
121
105
                         utextwrap.fill(text, 8, break_long_words=False))
122
106
123
107
    def test_fill_indent_with_breaks(self):
124
108
        w = utextwrap.UTextWrapper(8, initial_indent=' '*4,
125
109
                                   subsequent_indent=' '*4)
126
110
        self.assertEqual(u'\n'.join(["    hell",
127
111
                                     "    o" + _str_D[0],
128
112
                                     "    " + _str_D[1:3],
129
113
                                     "    " + _str_D[3]
130
114
                                     ]),
131
115
                         w.fill(_str_SD))
132
116
133
117
    def test_fill_indent_without_breaks(self):
134
118
        w = utextwrap.UTextWrapper(8, initial_indent=' '*4,
135
119
                                   subsequent_indent=' '*4)
136
120
        w.break_long_words = False
137
121
        self.assertEqual(u'\n'.join(["    hello",
138
122
                                     "    " + _str_D[:2],
139
123
                                     "    " + _str_D[2:],
140
124
                                     ]),
141
125
                         w.fill(_str_SD))
142
126
143
127
    def test_fill_indent_without_breaks_with_fixed_width(self):
144
128
        w = utextwrap.UTextWrapper(8, initial_indent=' '*4,
145
129
                                   subsequent_indent=' '*4)
146
130
        w.break_long_words = False
147
131
        w.width = 3
148
132
        self.assertEqual(u'\n'.join(["    hello",
149
133
                                     "    " + _str_D[0],
150
134
                                     "    " + _str_D[1],
151
135
                                     "    " + _str_D[2],
152
136
                                     "    " + _str_D[3],
153
137
                                     ]),
154
138
                         w.fill(_str_SD))
155
139
156
140
class TestUTextWrapAmbiWidth(tests.TestCase):
157
141
    _cyrill_char = u"\u0410" # east_asian_width() == 'A'
158
142
159
143
    def test_ambiwidth1(self):
160
144
        w = utextwrap.UTextWrapper(4, ambiguous_width=1)
161
145
        s = self._cyrill_char*8
162
146
        self.assertEqual([self._cyrill_char*4]*2, w.wrap(s))
163
147
164
148
    def test_ambiwidth2(self):
165
149
        w = utextwrap.UTextWrapper(4, ambiguous_width=2)
166
150
        s = self._cyrill_char*8
167
151
        self.assertEqual([self._cyrill_char*2]*4, w.wrap(s))
168
152
169
153
170
154
# Regression test with Python's test_textwrap
171
155
# Note that some distribution including Ubuntu doesn't install
172
156
# Python's test suite.
173
157
try:
174
158
    from test import test_textwrap
175
159
176
160
    def override_textwrap_symbols(testcase):
177
161
        # Override the symbols imported by test_textwrap so it uses our own
178
162
        # replacements.
179
163
        testcase.overrideAttr(test_textwrap, 'TextWrapper',
180
164
                              utextwrap.UTextWrapper)
181
165
        testcase.overrideAttr(test_textwrap, 'wrap', utextwrap.wrap)
182
166
        testcase.overrideAttr(test_textwrap, 'fill', utextwrap.fill)
183
167
184
168
185
169
    def setup_both(testcase, base_class, reused_class):
186
170
        super(base_class, testcase).setUp()
187
171
        override_textwrap_symbols(testcase)
188
172
        reused_class.setUp(testcase)
189
173
190
174
191
175
    class TestWrap(tests.TestCase, test_textwrap.WrapTestCase):
192
176
193
177
        def setUp(self):
194
178
            setup_both(self, TestWrap, test_textwrap.WrapTestCase)
195
179
196
180
197
181
    class TestLongWord(tests.TestCase, test_textwrap.LongWordTestCase):
198
182
199
183
        def setUp(self):
200
184
            setup_both(self, TestLongWord, test_textwrap.LongWordTestCase)
201
185
202
186
203
187
    class TestIndent(tests.TestCase, test_textwrap.IndentTestCases):
204
188
205
189
        def setUp(self):
206
190
            setup_both(self, TestIndent, test_textwrap.IndentTestCases)
207
191
208
192
except ImportError:
209
193
210
194
    class TestWrap(tests.TestCase):
211
195
212
196
        def test_wrap(self):
213
197
            raise TestSkipped("test.test_textwrap is not avialable.")
214
198
215
199
    class TestLongWord(tests.TestCase):
216
200
217
201
        def test_longword(self):
218
202
            raise TestSkipped("test.test_textwrap is not avialable.")
219
203
220
204
    class TestIndent(tests.TestCase):
221
205
222
206
        def test_indent(self):
223
207
            raise TestSkipped("test.test_textwrap is not avialable.")
224
0
208
225
=== added file 'bzrlib/utextwrap.py'
226
--- bzrlib/utextwrap.py	1970-01-01 00:00:00 +0000
227
+++ bzrlib/utextwrap.py	2011-05-14 15:01:51 +0000
228
@@ -0,0 +1,264 @@
229
1
# Copyright (C) 2011 Canonical Ltd
230
2
#
231
3
# UTextWrapper._handle_long_word, UTextWrapper._wrap_chunks,
232
4
# UTextWrapper._fix_sentence_endings, wrap and fill is copied from Python's
233
5
# textwrap module (under PSF license) and modified for support CJK.
234
6
# Original Copyright for these functions:
235
7
#
236
8
# Copyright (C) 1999-2001 Gregory P. Ward.
237
9
# Copyright (C) 2002, 2003 Python Software Foundation.
238
10
#
239
11
# Written by Greg Ward <gward@python.net>
240
12
# This program is free software; you can redistribute it and/or modify
241
13
# it under the terms of the GNU General Public License as published by
242
14
# the Free Software Foundation; either version 2 of the License, or
243
15
# (at your option) any later version.
244
16
#
245
17
# This program is distributed in the hope that it will be useful,
246
18
# but WITHOUT ANY WARRANTY; without even the implied warranty of
247
19
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
248
20
# GNU General Public License for more details.
249
21
#
250
22
# You should have received a copy of the GNU General Public License
251
23
# along with this program; if not, write to the Free Software
252
24
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
253
25
254
26
import sys
255
27
import textwrap
256
28
from unicodedata import east_asian_width as _eawidth
257
29
258
30
from bzrlib import osutils
259
31
260
32
__all__ = ["UTextWrapper", "fill", "wrap"]
261
33
262
34
class UTextWrapper(textwrap.TextWrapper):
263
35
    """
264
36
    Extend TextWrapper for Unicode.
265
37
266
38
    This textwrapper handles east asian double width and split word
267
39
    even if !break_long_words when word contains double width
268
40
    characters.
269
41
270
42
    :param ambiguous_width: (keyword argument) width for character when
271
43
                            unicodedata.east_asian_width(c) == 'A'
272
44
                            (default: 1)
273
45
274
46
    Limitations:
275
47
    * expand_tabs doesn't fixed. It uses len() for calculating width
276
48
      of string on left of TAB.
277
49
    * Handles one codeunit as a single character having 1 or 2 width.
278
50
      This is not correct when there are surrogate pairs, combined
279
51
      characters or zero-width characters.
280
52
    * Treats all asian character are line breakable. But it is not
281
53
      true because line breaking is prohibited around some characters.
282
54
      (For example, breaking before punctation mark is prohibited.)
283
55
      See UAX # 14 "UNICODE LINE BREAKING ALGORITHM"
284
56
    """
285
57
286
58
    def __init__(self, width=None, **kwargs):
287
59
        if width is None:
288
60
            width = (osutils.terminal_width() or
289
61
                        osutils.default_terminal_width) - 1
290
62
291
63
        ambi_width = kwargs.pop('ambiguous_width', 1)
292
64
        if ambi_width == 1:
293
65
            self._east_asian_doublewidth = 'FW'
294
66
        elif ambi_width == 2:
295
67
            self._east_asian_doublewidth = 'FWA'
296
68
        else:
297
69
            raise ValueError("ambiguous_width should be 1 or 2")
298
70
299
71
        # No drop_whitespace param before Python 2.6 it was always dropped
300
72
        if sys.version_info < (2, 6):
301
73
            self.drop_whitespace = kwargs.pop("drop_whitespace", True)
302
74
            if not self.drop_whitespace:
303
75
                raise ValueError("TextWrapper version must drop whitespace")
304
76
        textwrap.TextWrapper.__init__(self, width, **kwargs)
305
77
306
78
    def _unicode_char_width(self, uc):
307
79
        """Return width of character `uc`.
308
80
309
81
        :param:     uc      Single unicode character.
310
82
        """
311
83
        # 'A' means width of the character is not be able to determine.
312
84
        # We assume that it's width is 2 because longer wrap may over
313
85
        # terminal width but shorter wrap may be acceptable.
314
86
        return (_eawidth(uc) in self._east_asian_doublewidth and 2) or 1
315
87
316
88
    def _width(self, s):
317
89
        """Returns width for s.
318
90
319
91
        When s is unicode, take care of east asian width.
320
92
        When s is bytes, treat all byte is single width character.
321
93
        """
322
94
        charwidth = self._unicode_char_width
323
95
        return sum(charwidth(c) for c in s)
324
96
325
97
    def _cut(self, s, width):
326
98
        """Returns head and rest of s. (head+rest == s)
327
99
328
100
        Head is large as long as _width(head) <= width.
329
101
        """
330
102
        w = 0
331
103
        charwidth = self._unicode_char_width
332
104
        for pos, c in enumerate(s):
333
105
            w += charwidth(c)
334
106
            if w > width:
335
107
                return s[:pos], s[pos:]
336
108
        return s, u''
337
109
338
110
    def _fix_sentence_endings(self, chunks):
339
111
        """_fix_sentence_endings(chunks : [string])
340
112
341
113
        Correct for sentence endings buried in 'chunks'.  Eg. when the
342
114
        original text contains "... foo.\nBar ...", munge_whitespace()
343
115
        and split() will convert that to [..., "foo.", " ", "Bar", ...]
344
116
        which has one too few spaces; this method simply changes the one
345
117
        space to two.
346
118
347
119
        Note: This function is copied from textwrap.TextWrap and modified
348
120
        to use unicode always.
349
121
        """
350
122
        i = 0
351
123
        L = len(chunks)-1
352
124
        patsearch = self.sentence_end_re.search
353
125
        while i < L:
354
126
            if chunks[i+1] == u" " and patsearch(chunks[i]):
355
127
                chunks[i+1] = u"  "
356
128
                i += 2
357
129
            else:
358
130
                i += 1
359
131
360
132
    def _handle_long_word(self, chunks, cur_line, cur_len, width):
361
133
        # Figure out when indent is larger than the specified width, and make
362
134
        # sure at least one character is stripped off on every pass
363
135
        if width < 2:
364
136
            space_left = chunks[-1] and self._width(chunks[-1][0]) or 1
365
137
        else:
366
138
            space_left = width - cur_len
367
139
368
140
        # If we're allowed to break long words, then do so: put as much
369
141
        # of the next chunk onto the current line as will fit.
370
142
        if self.break_long_words:
371
143
            head, rest = self._cut(chunks[-1], space_left)
372
144
            cur_line.append(head)
373
145
            if rest:
374
146
                chunks[-1] = rest
375
147
            else:
376
148
                del chunks[-1]
377
149
378
150
        # Otherwise, we have to preserve the long word intact.  Only add
379
151
        # it to the current line if there's nothing already there --
380
152
        # that minimizes how much we violate the width constraint.
381
153
        elif not cur_line:
382
154
            cur_line.append(chunks.pop())
383
155
384
156
        # If we're not allowed to break long words, and there's already
385
157
        # text on the current line, do nothing.  Next time through the
386
158
        # main loop of _wrap_chunks(), we'll wind up here again, but
387
159
        # cur_len will be zero, so the next line will be entirely
388
160
        # devoted to the long word that we can't handle right now.
389
161
390
162
    def _wrap_chunks(self, chunks):
391
163
        lines = []
392
164
        if self.width <= 0:
393
165
            raise ValueError("invalid width %r (must be > 0)" % self.width)
394
166
395
167
        # Arrange in reverse order so items can be efficiently popped
396
168
        # from a stack of chucks.
397
169
        chunks.reverse()
398
170
399
171
        while chunks:
400
172
401
173
            # Start the list of chunks that will make up the current line.
402
174
            # cur_len is just the length of all the chunks in cur_line.
403
175
            cur_line = []
404
176
            cur_len = 0
405
177
406
178
            # Figure out which static string will prefix this line.
407
179
            if lines:
408
180
                indent = self.subsequent_indent
409
181
            else:
410
182
                indent = self.initial_indent
411
183
412
184
            # Maximum width for this line.
413
185
            width = self.width - len(indent)
414
186
415
187
            # First chunk on line is whitespace -- drop it, unless this
416
188
            # is the very beginning of the text (ie. no lines started yet).
417
189
            if self.drop_whitespace and chunks[-1].strip() == '' and lines:
418
190
                del chunks[-1]
419
191
420
192
            while chunks:
421
193
                # Use _width instead of len for east asian width
422
194
                l = self._width(chunks[-1])
423
195
424
196
                # Can at least squeeze this chunk onto the current line.
425
197
                if cur_len + l <= width:
426
198
                    cur_line.append(chunks.pop())
427
199
                    cur_len += l
428
200
429
201
                # Nope, this line is full.
430
202
                else:
431
203
                    break
432
204
433
205
            # The current line is full, and the next chunk is too big to
434
206
            # fit on *any* line (not just this one).
435
207
            if chunks and self._width(chunks[-1]) > width:
436
208
                self._handle_long_word(chunks, cur_line, cur_len, width)
437
209
438
210
            # If the last chunk on this line is all whitespace, drop it.
439
211
            if self.drop_whitespace and cur_line and not cur_line[-1].strip():
440
212
                del cur_line[-1]
441
213
442
214
            # Convert current line back to a string and store it in list
443
215
            # of all lines (return value).
444
216
            if cur_line:
445
217
                lines.append(indent + u''.join(cur_line))
446
218
447
219
        return lines
448
220
449
221
    def _split(self, text):
450
222
        chunks = textwrap.TextWrapper._split(self, unicode(text))
451
223
        cjk_split_chunks = []
452
224
        for chunk in chunks:
453
225
            prev_pos = 0
454
226
            for pos, char in enumerate(chunk):
455
227
                if self._unicode_char_width(char) == 2:
456
228
                    if prev_pos < pos:
457
229
                        cjk_split_chunks.append(chunk[prev_pos:pos])
458
230
                    cjk_split_chunks.append(char)
459
231
                    prev_pos = pos+1
460
232
            if prev_pos < len(chunk):
461
233
                cjk_split_chunks.append(chunk[prev_pos:])
462
234
        return cjk_split_chunks
463
235
464
236
    def wrap(self, text):
465
237
        # ensure text is unicode
466
238
        return textwrap.TextWrapper.wrap(self, unicode(text))
467
239
468
240
# -- Convenience interface ---------------------------------------------
469
241
470
242
def wrap(text, width=None, **kwargs):
471
243
    """Wrap a single paragraph of text, returning a list of wrapped lines.
472
244
473
245
    Reformat the single paragraph in 'text' so it fits in lines of no
474
246
    more than 'width' columns, and return a list of wrapped lines.  By
475
247
    default, tabs in 'text' are expanded with string.expandtabs(), and
476
248
    all other whitespace characters (including newline) are converted to
477
249
    space.  See TextWrapper class for available keyword args to customize
478
250
    wrapping behaviour.
479
251
    """
480
252
    return UTextWrapper(width=width, **kwargs).wrap(text)
481
253
482
254
def fill(text, width=None, **kwargs):
483
255
    """Fill a single paragraph of text, returning a new string.
484
256
485
257
    Reformat the single paragraph in 'text' to fit in lines of no more
486
258
    than 'width' columns, and return a new string containing the entire
487
259
    wrapped paragraph.  As with wrap(), tabs are expanded and other
488
260
    whitespace characters converted to space.  See TextWrapper class for
489
261
    available keyword args to customize wrapping behaviour.
490
262
    """
491
263
    return UTextWrapper(width=width, **kwargs).fill(text)
492
264
Reviewer	Date Requested	Status
Martin Packman (community)		Approve on 2011-05-14
Vincent Ladeuil	2011-05-04	Approve on 2011-05-05
Review via email: mp+59950@code.launchpad.net