1
=== modified file 'bzrlib/tests/test_tuned_gzip.py'
2
--- bzrlib/tests/test_tuned_gzip.py	2011-05-13 12:51:05 +0000
3
+++ bzrlib/tests/test_tuned_gzip.py	2013-07-13 19:08:24 +0000
4
@@ -106,14 +106,17 @@
5
106
class TestToGzip(tests.TestCase):
106
class TestToGzip(tests.TestCase):
6
107
107
7
108
    def assertToGzip(self, chunks):
108
    def assertToGzip(self, chunks):
9
109
        bytes = ''.join(chunks)
109
        raw_bytes = ''.join(chunks)
10
110
        gzfromchunks = tuned_gzip.chunks_to_gzip(chunks)
110
        gzfromchunks = tuned_gzip.chunks_to_gzip(chunks)
12
111
        gzfrombytes = tuned_gzip.bytes_to_gzip(bytes)
111
        gzfrombytes = tuned_gzip.bytes_to_gzip(raw_bytes)
13
112
        self.assertEqual(gzfrombytes, gzfromchunks)
112
        self.assertEqual(gzfrombytes, gzfromchunks)
14
113
        decoded = self.applyDeprecated(
113
        decoded = self.applyDeprecated(
15
114
            symbol_versioning.deprecated_in((2, 3, 0)),
114
            symbol_versioning.deprecated_in((2, 3, 0)),
16
115
            tuned_gzip.GzipFile, fileobj=StringIO(gzfromchunks)).read()
115
            tuned_gzip.GzipFile, fileobj=StringIO(gzfromchunks)).read()
18
116
        self.assertEqual(bytes, decoded)
116
        lraw, ldecoded = len(raw_bytes), len(decoded)
19
117
        self.assertEqual(lraw, ldecoded,
20
118
                         'Expecting data length %d, got %d' % (lraw, ldecoded))
21
119
        self.assertEqual(raw_bytes, decoded)
22
117
120
23
118
    def test_single_chunk(self):
121
    def test_single_chunk(self):
24
119
        self.assertToGzip(['a modest chunk\nwith some various\nbits\n'])
122
        self.assertToGzip(['a modest chunk\nwith some various\nbits\n'])
25
120
123
26
=== modified file 'bzrlib/tuned_gzip.py'
27
--- bzrlib/tuned_gzip.py	2011-12-19 13:23:58 +0000
28
+++ bzrlib/tuned_gzip.py	2013-07-13 19:08:24 +0000
29
@@ -127,15 +127,28 @@
30
127
            DeprecationWarning, stacklevel=2)
127
            DeprecationWarning, stacklevel=2)
31
128
        gzip.GzipFile.__init__(self, *args, **kwargs)
128
        gzip.GzipFile.__init__(self, *args, **kwargs)
32
129
129
42
130
    def _add_read_data(self, data):
130
    if sys.version_info >= (2, 7, 4):
43
131
        # 4169 calls in 183
131
        def _add_read_data(self, data):
44
132
        # temp var for len(data) and switch to +='s.
132
            # 4169 calls in 183
45
133
        # 4169 in 139
133
            # temp var for len(data) and switch to +='s.
46
134
        len_data = len(data)
134
            # 4169 in 139
47
135
        self.crc = zlib.crc32(data, self.crc)
135
            len_data = len(data)
48
136
        self.extrabuf += data
136
            self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
49
137
        self.extrasize += len_data
137
            offset = self.offset - self.extrastart
50
138
        self.size += len_data
138
            self.extrabuf = self.extrabuf[offset:] + data
51
139
            self.extrasize = self.extrasize + len_data
52
140
            self.extrastart = self.offset
53
141
            self.size = self.size + len_data
54
142
    else:
55
143
        def _add_read_data(self, data):
56
144
            # 4169 calls in 183
57
145
            # temp var for len(data) and switch to +='s.
58
146
            # 4169 in 139
59
147
            len_data = len(data)
60
148
            self.crc = zlib.crc32(data, self.crc)
61
149
            self.extrabuf += data
62
150
            self.extrasize += len_data
63
151
            self.size += len_data
64
139
152
65
140
    def _write_gzip_header(self):
153
    def _write_gzip_header(self):
66
141
        """A tuned version of gzip._write_gzip_header
154
        """A tuned version of gzip._write_gzip_header
67
@@ -161,97 +174,98 @@
68
161
            ''          #     self.fileobj.write(fname + '\000')
174
            ''          #     self.fileobj.write(fname + '\000')
69
162
            )
175
            )
70
163
176
86
164
    def _read(self, size=1024):
177
    if sys.version_info < (2, 7, 4):
87
165
        # various optimisations:
178
        def _read(self, size=1024):
88
166
        # reduces lsprof count from 2500 to
179
            # various optimisations:
89
167
        # 8337 calls in 1272, 365 internal
180
            # reduces lsprof count from 2500 to
90
168
        if self.fileobj is None:
181
            # 8337 calls in 1272, 365 internal
91
169
            raise EOFError, "Reached EOF"
182
            if self.fileobj is None:
77
170
78
171
        if self._new_member:
79
172
            # If the _new_member flag is set, we have to
80
173
            # jump to the next member, if there is one.
81
174
            #
82
175
            # First, check if we're at the end of the file;
83
176
            # if so, it's time to stop; no more members to read.
84
177
            next_header_bytes = self.fileobj.read(10)
85
178
            if next_header_bytes == '':
92
179
                raise EOFError, "Reached EOF"
183
                raise EOFError, "Reached EOF"
93
180
184
131
181
            self._init_read()
185
            if self._new_member:
132
182
            self._read_gzip_header(next_header_bytes)
186
                # If the _new_member flag is set, we have to
133
183
            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
187
                # jump to the next member, if there is one.
134
184
            self._new_member = False
188
                #
135
185
189
                # First, check if we're at the end of the file;
136
186
        # Read a chunk of data from the file
190
                # if so, it's time to stop; no more members to read.
137
187
        buf = self.fileobj.read(size)
191
                next_header_bytes = self.fileobj.read(10)
138
188
192
                if next_header_bytes == '':
139
189
        # If the EOF has been reached, flush the decompression object
193
                    raise EOFError, "Reached EOF"
140
190
        # and mark this object as finished.
194
141
191
195
                self._init_read()
142
192
        if buf == "":
196
                self._read_gzip_header(next_header_bytes)
143
193
            self._add_read_data(self.decompress.flush())
197
                self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
144
194
            if len(self.decompress.unused_data) < 8:
198
                self._new_member = False
145
195
                raise AssertionError("what does flush do?")
199
146
196
            self._gzip_tail = self.decompress.unused_data[0:8]
200
            # Read a chunk of data from the file
147
197
            self._read_eof()
201
            buf = self.fileobj.read(size)
148
198
            # tell the driving read() call we have stuffed all the data
202
149
199
            # in self.extrabuf
203
            # If the EOF has been reached, flush the decompression object
150
200
            raise EOFError, 'Reached EOF'
204
            # and mark this object as finished.
151
201
205
152
202
        self._add_read_data(self.decompress.decompress(buf))
206
            if buf == "":
153
203
207
                self._add_read_data(self.decompress.flush())
154
204
        if self.decompress.unused_data != "":
208
                if len(self.decompress.unused_data) < 8:
155
205
            # Ending case: we've come to the end of a member in the file,
209
                    raise AssertionError("what does flush do?")
119
206
            # so seek back to the start of the data for the next member which
120
207
            # is the length of the decompress objects unused data - the first
121
208
            # 8 bytes for the end crc and size records.
122
209
            #
123
210
            # so seek back to the start of the unused data, finish up
124
211
            # this member, and read a new gzip header.
125
212
            # (The number of bytes to seek back is the length of the unused
126
213
            # data, minus 8 because those 8 bytes are part of this member.
127
214
            seek_length = len (self.decompress.unused_data) - 8
128
215
            if seek_length > 0:
129
216
                # we read too much data
130
217
                self.fileobj.seek(-seek_length, 1)
156
218
                self._gzip_tail = self.decompress.unused_data[0:8]
210
                self._gzip_tail = self.decompress.unused_data[0:8]
193
219
            elif seek_length < 0:
211
                self._read_eof()
194
220
                # we haven't read enough to check the checksum.
212
                # tell the driving read() call we have stuffed all the data
195
221
                if not (-8 < seek_length):
213
                # in self.extrabuf
196
222
                    raise AssertionError("too great a seek")
214
                raise EOFError, 'Reached EOF'
197
223
                buf = self.fileobj.read(-seek_length)
215
198
224
                self._gzip_tail = self.decompress.unused_data + buf
216
            self._add_read_data(self.decompress.decompress(buf))
199
225
            else:
217
200
226
                self._gzip_tail = self.decompress.unused_data
218
            if self.decompress.unused_data != "":
201
227
219
                # Ending case: we've come to the end of a member in the file,
202
228
            # Check the CRC and file size, and set the flag so we read
220
                # so seek back to the start of the data for the next member
203
229
            # a new member on the next call
221
                # which is the length of the decompress objects unused data -
204
230
            self._read_eof()
222
                # the first 8 bytes for the end crc and size records.
205
231
            self._new_member = True
223
                #
206
232
224
                # so seek back to the start of the unused data, finish up
207
233
    def _read_eof(self):
225
                # this member, and read a new gzip header.
208
234
        """tuned to reduce function calls and eliminate file seeking:
226
                # (The number of bytes to seek back is the length of the unused
209
235
        pass 1:
227
                # data, minus 8 because those 8 bytes are part of this member.
210
236
        reduces lsprof count from 800 to 288
228
                seek_length = len (self.decompress.unused_data) - 8
211
237
        4168 in 296
229
                if seek_length > 0:
212
238
        avoid U32 call by using struct format L
230
                    # we read too much data
213
239
        4168 in 200
231
                    self.fileobj.seek(-seek_length, 1)
214
240
        """
232
                    self._gzip_tail = self.decompress.unused_data[0:8]
215
241
        # We've read to the end of the file, so we should have 8 bytes of
233
                elif seek_length < 0:
216
242
        # unused data in the decompressor. If we don't, there is a corrupt file.
234
                    # we haven't read enough to check the checksum.
217
243
        # We use these 8 bytes to calculate the CRC and the recorded file size.
235
                    if not (-8 < seek_length):
218
244
        # We then check the that the computed CRC and size of the
236
                        raise AssertionError("too great a seek")
219
245
        # uncompressed data matches the stored values.  Note that the size
237
                    buf = self.fileobj.read(-seek_length)
220
246
        # stored is the true file size mod 2**32.
238
                    self._gzip_tail = self.decompress.unused_data + buf
221
247
        if not (len(self._gzip_tail) == 8):
239
                else:
222
248
            raise AssertionError("gzip trailer is incorrect length.")
240
                    self._gzip_tail = self.decompress.unused_data
223
249
        crc32, isize = struct.unpack("<LL", self._gzip_tail)
241
224
250
        # note that isize is unsigned - it can exceed 2GB
242
                # Check the CRC and file size, and set the flag so we read
225
251
        if crc32 != U32(self.crc):
243
                # a new member on the next call
226
252
            raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
244
                self._read_eof()
227
253
        elif isize != LOWU32(self.size):
245
                self._new_member = True
228
254
            raise IOError, "Incorrect length of data produced"
246
229
247
        def _read_eof(self):
230
248
            """tuned to reduce function calls and eliminate file seeking:
231
249
            pass 1:
232
250
            reduces lsprof count from 800 to 288
233
251
            4168 in 296
234
252
            avoid U32 call by using struct format L
235
253
            4168 in 200
236
254
            """
237
255
            # We've read to the end of the file, so we should have 8 bytes of
238
256
            # unused data in the decompressor. If we don't, there is a corrupt
239
257
            # file.  We use these 8 bytes to calculate the CRC and the recorded
240
258
            # file size.  We then check the that the computed CRC and size of
241
259
            # the uncompressed data matches the stored values.  Note that the
242
260
            # size stored is the true file size mod 2**32.
243
261
            if not (len(self._gzip_tail) == 8):
244
262
                raise AssertionError("gzip trailer is incorrect length.")
245
263
            crc32, isize = struct.unpack("<LL", self._gzip_tail)
246
264
            # note that isize is unsigned - it can exceed 2GB
247
265
            if crc32 != U32(self.crc):
248
266
                raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
249
267
            elif isize != LOWU32(self.size):
250
268
                raise IOError, "Incorrect length of data produced"
251
255
269
252
256
    def _read_gzip_header(self, bytes=None):
270
    def _read_gzip_header(self, bytes=None):
253
257
        """Supply bytes if the minimum header size is already read.
271
        """Supply bytes if the minimum header size is already read.
254
258
272
255
=== modified file 'doc/en/release-notes/bzr-2.6.txt'
256
--- doc/en/release-notes/bzr-2.6.txt	2013-05-27 09:13:55 +0000
257
+++ doc/en/release-notes/bzr-2.6.txt	2013-07-13 19:08:24 +0000
258
@@ -103,6 +103,11 @@
259
103
* The launchpad plugin now requires API 1.6.0 or later.  This version shipped
103
* The launchpad plugin now requires API 1.6.0 or later.  This version shipped
260
104
  with Ubuntu 9.10.  (Aaron Bentley)
104
  with Ubuntu 9.10.  (Aaron Bentley)
261
105
105
262
106
* Better align with upstream gzip.py in tuned_gzip.py. We may lose a bit of
263
107
  performance but that's for knit and weave formats and already partly
264
108
  deprecated, better keep compatibility than failing fast ;)
265
109
  (Vincent Ladeuil, #1116079)
266
110
267
106
Testing
111
Testing
268
107
*******
112
*******
269
108
113
Reviewer	Date Requested	Status
John A Meinel		Approve on 2013-07-09
Robert Collins (community)	2013-07-09	Approve on 2013-07-09
Review via email: mp+173666@code.launchpad.net