1
=== added file 'bzrlib/bigstring.py'
2
--- bzrlib/bigstring.py	1970-01-01 00:00:00 +0000
3
+++ bzrlib/bigstring.py	2011-11-29 07:33:40 +0000
4
@@ -0,0 +1,90 @@
5
1
# Copyright (C) 2011 Canonical Ltd
6
2
#
7
3
# This program is free software; you can redistribute it and/or modify
8
4
# it under the terms of the GNU General Public License as published by
9
5
# the Free Software Foundation; either version 2 of the License, or
10
6
# (at your option) any later version.
11
7
#
12
8
# This program is distributed in the hope that it will be useful,
13
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
11
# GNU General Public License for more details.
16
12
#
17
13
# You should have received a copy of the GNU General Public License
18
14
# along with this program; if not, write to the Free Software
19
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
16
21
17
22
18
"""Large bytestring-like object that spills to disk."""
23
19
24
20
25
21
import tempfile
26
22
27
23
28
24
class BigString(object):
29
25
    """Large bytestring-like object that spills to disk.
30
26
31
27
    BigStrings can have content appended to them, can report their length,
32
28
    and can be read back as a series of chunks.
33
29
    
34
30
    The API is such as to encourage client code to be written in a way
35
31
    that will never hold the whole string in memory.
36
32
    
37
33
    Typical usage:
38
34
    
39
35
    >>> bs = BigString()
40
36
    >>> bs.append_chunks(' %d' % i for i in range(4))
41
37
    >>> len(bs)
42
38
    8
43
39
    >>> ''.join(bs.get_chunks())
44
40
    ' 0 1 2 3'
45
41
46
42
    The last of these, joining all the chunks up in to one big in-memory
47
43
    string, defeats the purpose of the whole thing.  Don't do that.  Write them
48
44
    to a file one at a time.
49
45
    """
50
46
    
51
47
    # TODO: Keep it in memory when it's small, and just create a file if it
52
48
    # actually gets big.
53
49
54
50
    def __init__(self):
55
51
        # TODO: Maybe pass a 0 bufsiz, if we're sure we'll always just do large
56
52
        # writes...
57
53
        # 
58
54
        # TODO: Maybe fadvise when possible.
59
55
        self._file = tempfile.NamedTemporaryFile(
60
56
            mode='ab+',
61
57
            prefix='bzrbigstr-')
62
58
        self._length = 0
63
59
        self._read_chunk_size = 16384
64
60
            
65
61
    def __repr__(self):
66
62
        return "%s(%r)" % (
67
63
            self.__class__.__name__,
68
64
            self._file.name)
69
65
            
70
66
    def append_chunks(self, chunks):
71
67
        for b in chunks:
72
68
            if not isinstance(b, str):
73
69
                raise TypeError(type(b))
74
70
            # File is in append mode, so even if we've seeked back this is safe.
75
71
            self._file.write(b)
76
72
            self._length += len(b)
77
73
78
74
    def __len__(self):
79
75
        return self._length
80
76
81
77
    def get_chunks(self):
82
78
        self._file.flush()  # To the OS, not necessarily to disk.
83
79
        self._file.seek(0)
84
80
        yielded_bytes = 0
85
81
        while True:
86
82
            c = self._file.read(self._read_chunk_size)
87
83
            if not c:
88
84
                break
89
85
            yield c
90
86
            yielded_bytes += len(c)
91
87
        if yielded_bytes != self._length:
92
88
            raise AssertionError(
93
89
                "%r expected to get back %d bytes, actually returned %d" %
94
90
                (self, self._length, yielded_bytes))
95
0
91
96
=== modified file 'bzrlib/groupcompress.py'
97
--- bzrlib/groupcompress.py	2011-11-18 05:13:19 +0000
98
+++ bzrlib/groupcompress.py	2011-11-29 07:33:40 +0000
99
@@ -17,12 +17,14 @@
100
17
"""Core compression logic for compressing streams of related files."""
17
"""Core compression logic for compressing streams of related files."""
101
18
18
102
19
import time
19
import time
103
20
import warnings
104
20
import zlib
21
import zlib
105
21
22
106
22
from bzrlib.lazy_import import lazy_import
23
from bzrlib.lazy_import import lazy_import
107
23
lazy_import(globals(), """
24
lazy_import(globals(), """
108
24
from bzrlib import (
25
from bzrlib import (
109
25
    annotate,
26
    annotate,
110
27
    bigstring,
111
26
    config,
28
    config,
112
27
    debug,
29
    debug,
113
28
    errors,
30
    errors,
114
@@ -298,11 +300,17 @@
115
298
        compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)
300
        compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)
116
299
        # Peak in this point is 1 fulltext, 1 compressed text, + zlib overhead
301
        # Peak in this point is 1 fulltext, 1 compressed text, + zlib overhead
117
300
        # (measured peak is maybe 30MB over the above...)
302
        # (measured peak is maybe 30MB over the above...)
120
301
        compressed_chunks = map(compressor.compress, chunks)
303
        if 'groupcompress' in debug.debug_flags:
121
302
        compressed_chunks.append(compressor.flush())
304
            trace.mutter(
122
305
                '_create_z_content_from_chunks: %d chunks total %d bytes' % (
123
306
                len(chunks), sum(map(len, chunks))))
124
307
        bigs = bigstring.BigString()
125
308
        bigs.append_chunks(compressor.compress(c) for c in chunks)
126
309
        bigs.append_chunks([compressor.flush()])
127
310
        compressor = None  # Can't delete because of Python quirk?
128
303
        # Ignore empty chunks
311
        # Ignore empty chunks
131
304
        self._z_content_chunks = [c for c in compressed_chunks if c]
312
        self._z_content_chunks = list(bigs.get_chunks())
132
305
        self._z_content_length = sum(map(len, self._z_content_chunks))
313
        self._z_content_length = len(bigs)
133
306
314
134
307
    def _create_z_content(self):
315
    def _create_z_content(self):
135
308
        if self._z_content_chunks is not None:
316
        if self._z_content_chunks is not None:
136
@@ -826,6 +834,12 @@
137
826
            if nostore_sha == _null_sha1:
834
            if nostore_sha == _null_sha1:
138
827
                raise errors.ExistingContent()
835
                raise errors.ExistingContent()
139
828
            return _null_sha1, 0, 0, 'fulltext'
836
            return _null_sha1, 0, 0, 'fulltext'
140
837
        if 'groupcompress' in debug.debug_flags:
141
838
            trace.mutter("compress %d bytes" % len(bytes))
142
839
            if len(bytes) > 10<<20:
143
840
                warnings.warn("groupcompress given over-large %d byte block"
144
841
                    % len(bytes))
145
842
            import pdb;pdb.set_trace()
146
829
        # we assume someone knew what they were doing when they passed it in
843
        # we assume someone knew what they were doing when they passed it in
147
830
        if expected_sha is not None:
844
        if expected_sha is not None:
148
831
            sha1 = expected_sha
845
            sha1 = expected_sha
149
832
846
150
=== modified file 'bzrlib/help_topics/en/debug-flags.txt'
151
--- bzrlib/help_topics/en/debug-flags.txt	2011-10-10 13:51:29 +0000
152
+++ bzrlib/help_topics/en/debug-flags.txt	2011-11-29 07:33:40 +0000
153
@@ -15,6 +15,7 @@
154
15
-Dfilters         Emit information for debugging content filtering.
15
-Dfilters         Emit information for debugging content filtering.
155
16
-Dforceinvdeltas  Force use of inventory deltas during generic streaming fetch.
16
-Dforceinvdeltas  Force use of inventory deltas during generic streaming fetch.
156
17
-Dgraph           Trace graph traversal.
17
-Dgraph           Trace graph traversal.
157
18
-Dgroupcompress   Groupcompress internals.
158
18
-Dhashcache       Log every time a working file is read to determine its hash.
19
-Dhashcache       Log every time a working file is read to determine its hash.
159
19
-Dhooks           Trace hook execution.
20
-Dhooks           Trace hook execution.
160
20
-Dhpss            Trace smart protocol requests and responses.
21
-Dhpss            Trace smart protocol requests and responses.
161
21
22
162
=== modified file 'bzrlib/tests/__init__.py'
163
--- bzrlib/tests/__init__.py	2011-11-29 06:50:54 +0000
164
+++ bzrlib/tests/__init__.py	2011-11-29 07:33:40 +0000
165
@@ -3928,6 +3928,7 @@
166
3928
        'bzrlib.tests.test_api',
3928
        'bzrlib.tests.test_api',
167
3929
        'bzrlib.tests.test_atomicfile',
3929
        'bzrlib.tests.test_atomicfile',
168
3930
        'bzrlib.tests.test_bad_files',
3930
        'bzrlib.tests.test_bad_files',
169
3931
        'bzrlib.tests.test_bigstring',
170
3931
        'bzrlib.tests.test_bisect_multi',
3932
        'bzrlib.tests.test_bisect_multi',
171
3932
        'bzrlib.tests.test_branch',
3933
        'bzrlib.tests.test_branch',
172
3933
        'bzrlib.tests.test_branchbuilder',
3934
        'bzrlib.tests.test_branchbuilder',
173
@@ -4104,6 +4105,7 @@
174
4104
        return []
4105
        return []
175
4105
    return [
4106
    return [
176
4106
        'bzrlib',
4107
        'bzrlib',
177
4108
        'bzrlib.bigstring',
178
4107
        'bzrlib.branchbuilder',
4109
        'bzrlib.branchbuilder',
179
4108
        'bzrlib.decorators',
4110
        'bzrlib.decorators',
180
4109
        'bzrlib.inventory',
4111
        'bzrlib.inventory',
181
4110
4112
182
=== added file 'bzrlib/tests/test_bigstring.py'
183
--- bzrlib/tests/test_bigstring.py	1970-01-01 00:00:00 +0000
184
+++ bzrlib/tests/test_bigstring.py	2011-11-29 07:33:40 +0000
185
@@ -0,0 +1,51 @@
186
1
# Copyright (C) 2011 Canonical Ltd
187
2
#
188
3
# This program is free software; you can redistribute it and/or modify
189
4
# it under the terms of the GNU General Public License as published by
190
5
# the Free Software Foundation; either version 2 of the License, or
191
6
# (at your option) any later version.
192
7
#
193
8
# This program is distributed in the hope that it will be useful,
194
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
195
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
196
11
# GNU General Public License for more details.
197
12
#
198
13
# You should have received a copy of the GNU General Public License
199
14
# along with this program; if not, write to the Free Software
200
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
201
16
202
17
"""Tests for BigString"""
203
18
204
19
from testtools.matchers import (
205
20
    Matcher,
206
21
    StartsWith,
207
22
    )
208
23
209
24
from bzrlib.bigstring import BigString
210
25
from bzrlib.tests import TestCase
211
26
212
27
213
28
class TestBigString(TestCase):
214
29
215
30
    def test_repr(self):
216
31
        bs = BigString()
217
32
        self.assertThat(repr(bs),
218
33
            StartsWith('BigString('))
219
34
            
220
35
    def test_simple_get_chunks(self):
221
36
        bs = BigString()
222
37
        bs.append_chunks(['hello', 'world'])
223
38
        self.assertEqual(''.join(bs.get_chunks()),
224
39
            'helloworld')
225
40
        self.assertEqual(len(bs), 10)
226
41
227
42
    def test_get_chunks_limited_size(self):
228
43
        bs = BigString()
229
44
        bs.append_chunks(['hello', 'world', '!'])
230
45
        bs._read_chunk_size = 3
231
46
        self.assertEqual(
232
47
            list(bs.get_chunks()),
233
48
            ['hel', 'low', 'orl', 'd!'])
234
49
        self.assertEqual(
235
50
            len(bs),
236
51
            11)
Status:	Rejected
Rejected by:	Martin Packman on 2012-07-28
Proposed branch:	lp:~mbp/bzr/bigstring
Merge into:	lp:bzr
Diff against target:	236 lines (+162/-4) 5 files modified bzrlib/bigstring.py (+90/-0) bzrlib/groupcompress.py (+18/-4) bzrlib/help_topics/en/debug-flags.txt (+1/-0) bzrlib/tests/__init__.py (+2/-0) bzrlib/tests/test_bigstring.py (+51/-0)
To merge this branch:	bzr merge lp:~mbp/bzr/bigstring
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
Martin Packman (community)		2011-11-29	Disapprove on 2012-07-28
Review via email: mp+83732@code.launchpad.net