Status: | Merged |
---|---|
Approved by: | Jelmer Vernooij |
Approved revision: | no longer in the source branch. |
Merge reported by: | The Breezy Bot |
Merged at revision: | not available |
Proposed branch: | lp:~jelmer/brz/extract |
Merge into: | lp:brz |
Diff against target: |
345 lines (+50/-49) 8 files modified
breezy/bzr/bundle/serializer/v4.py (+2/-2) breezy/bzr/groupcompress.py (+18/-18) breezy/bzr/knitpack_repo.py (+2/-2) breezy/bzr/pack.py (+6/-5) breezy/bzr/pack_repo.py (+1/-1) breezy/tests/test_bundle.py (+1/-1) breezy/tests/test_groupcompress.py (+4/-4) breezy/tests/test_pack.py (+16/-16) |
To merge this branch: | bzr merge lp:~jelmer/brz/extract |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Jelmer Vernooij | Approve | ||
Review via email: mp+378773@code.launchpad.net |
Commit message
More chunkification of groupcompress APIs.
Description of the change
More chunkification of groupcompress APIs.
To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) : | # |
review:
Approve
Revision history for this message
Jelmer Vernooij (jelmer) : | # |
review:
Approve
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote : | # |
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote : | # |
Running landing tests failed
https:/
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'breezy/bzr/bundle/serializer/v4.py' |
2 | --- breezy/bzr/bundle/serializer/v4.py 2020-01-30 16:13:31 +0000 |
3 | +++ breezy/bzr/bundle/serializer/v4.py 2020-02-09 13:41:06 +0000 |
4 | @@ -181,9 +181,9 @@ |
5 | """ |
6 | name = self.encode_name(repo_kind, revision_id, file_id) |
7 | encoded_metadata = bencode.bencode(metadata) |
8 | - self._container.add_bytes_record(encoded_metadata, [(name, )]) |
9 | + self._container.add_bytes_record([encoded_metadata], len(encoded_metadata), [(name, )]) |
10 | if metadata[b'storage_kind'] != b'header': |
11 | - self._container.add_bytes_record(bytes, []) |
12 | + self._container.add_bytes_record([bytes], len(bytes), []) |
13 | |
14 | |
15 | class BundleReader(object): |
16 | |
17 | === modified file 'breezy/bzr/groupcompress.py' |
18 | --- breezy/bzr/groupcompress.py 2020-02-09 01:27:02 +0000 |
19 | +++ breezy/bzr/groupcompress.py 2020-02-09 13:41:06 +0000 |
20 | @@ -276,7 +276,7 @@ |
21 | :return: The bytes for the content |
22 | """ |
23 | if start == end == 0: |
24 | - return b'' |
25 | + return [] |
26 | self._ensure_content(end) |
27 | # The bytes are 'f' or 'd' for the type, then a variable-length |
28 | # base128 integer for the content size, then the actual content |
29 | @@ -297,9 +297,9 @@ |
30 | raise ValueError('end != len according to field header' |
31 | ' %s != %s' % (end, content_start + content_len)) |
32 | if c == b'f': |
33 | - return self._content[content_start:end] |
34 | + return [self._content[content_start:end]] |
35 | # Must be type delta as checked above |
36 | - return apply_delta_to_source(self._content, content_start, end) |
37 | + return [apply_delta_to_source(self._content, content_start, end)] |
38 | |
39 | def set_chunked_content(self, content_chunks, length): |
40 | """Set the content of this block to the given chunks.""" |
41 | @@ -448,7 +448,7 @@ |
42 | # get_bytes_as call? After Manager.get_record_stream() returns |
43 | # the object? |
44 | self._manager = manager |
45 | - self._bytes = None |
46 | + self._chunks = None |
47 | self.storage_kind = 'groupcompress-block' |
48 | if not first: |
49 | self.storage_kind = 'groupcompress-block-ref' |
50 | @@ -469,7 +469,7 @@ |
51 | except zlib.error as value: |
52 | raise DecompressCorruption("zlib: " + str(value)) |
53 | block = self._manager._block |
54 | - self._bytes = block.extract(self.key, self._start, self._end) |
55 | + self._chunks = block.extract(self.key, self._start, self._end) |
56 | # There are code paths that first extract as fulltext, and then |
57 | # extract as storage_kind (smart fetch). So we don't break the |
58 | # refcycle here, but instead in manager.get_record_stream() |
59 | @@ -482,24 +482,24 @@ |
60 | else: |
61 | return b'' |
62 | if storage_kind in ('fulltext', 'chunked', 'lines'): |
63 | - if self._bytes is None: |
64 | + if self._chunks is None: |
65 | self._extract_bytes() |
66 | if storage_kind == 'fulltext': |
67 | - return self._bytes |
68 | + return b''.join(self._chunks) |
69 | elif storage_kind == 'chunked': |
70 | - return [self._bytes] |
71 | + return self._chunks |
72 | else: |
73 | - return osutils.split_lines(self._bytes) |
74 | + return osutils.chunks_to_lines(self._chunks) |
75 | raise errors.UnavailableRepresentation(self.key, storage_kind, |
76 | self.storage_kind) |
77 | |
78 | def iter_bytes_as(self, storage_kind): |
79 | - if self._bytes is None: |
80 | + if self._chunks is None: |
81 | self._extract_bytes() |
82 | if storage_kind == 'chunked': |
83 | - return iter([self._bytes]) |
84 | + return iter(self._chunks) |
85 | elif storage_kind == 'lines': |
86 | - return iter(osutils.split_lines(self._bytes)) |
87 | + return iter(osutils.chunks_to_lines(self._chunks)) |
88 | raise errors.UnavailableRepresentation(self.key, storage_kind, |
89 | self.storage_kind) |
90 | |
91 | @@ -912,7 +912,7 @@ |
92 | """Extract a key previously added to the compressor. |
93 | |
94 | :param key: The key to extract. |
95 | - :return: An iterable over bytes and the sha1. |
96 | + :return: An iterable over chunks and the sha1. |
97 | """ |
98 | (start_byte, start_chunk, end_byte, |
99 | end_chunk) = self.labels_deltas[key] |
100 | @@ -926,7 +926,7 @@ |
101 | raise ValueError('Index claimed fulltext len, but stored bytes' |
102 | ' claim %s != %s' |
103 | % (len(stored_bytes), data_len)) |
104 | - data = stored_bytes[offset + 1:] |
105 | + data = [stored_bytes[offset + 1:]] |
106 | else: |
107 | if kind != b'd': |
108 | raise ValueError('Unknown content kind, bytes claim %s' % kind) |
109 | @@ -938,8 +938,8 @@ |
110 | raise ValueError('Index claimed delta len, but stored bytes' |
111 | ' claim %s != %s' |
112 | % (len(stored_bytes), data_len)) |
113 | - data = apply_delta(source, stored_bytes[offset + 1:]) |
114 | - data_sha1 = osutils.sha_string(data) |
115 | + data = [apply_delta(source, stored_bytes[offset + 1:])] |
116 | + data_sha1 = osutils.sha_strings(data) |
117 | return data, data_sha1 |
118 | |
119 | def flush(self): |
120 | @@ -1662,9 +1662,9 @@ |
121 | # self._compressor. |
122 | for factory in batcher.yield_factories(full_flush=True): |
123 | yield factory |
124 | - bytes, sha1 = self._compressor.extract(key) |
125 | + chunks, sha1 = self._compressor.extract(key) |
126 | parents = self._unadded_refs[key] |
127 | - yield FulltextContentFactory(key, parents, sha1, bytes) |
128 | + yield ChunkedContentFactory(key, parents, sha1, chunks) |
129 | continue |
130 | if batcher.add_key(key) > BATCH_SIZE: |
131 | # Ok, this batch is big enough. Yield some results. |
132 | |
133 | === modified file 'breezy/bzr/knitpack_repo.py' |
134 | --- breezy/bzr/knitpack_repo.py 2020-01-25 14:07:41 +0000 |
135 | +++ breezy/bzr/knitpack_repo.py 2020-02-09 13:41:06 +0000 |
136 | @@ -687,7 +687,7 @@ |
137 | else: |
138 | df, _ = knit._parse_record_header(key, raw_data) |
139 | df.close() |
140 | - pos, size = writer.add_bytes_record(raw_data, names) |
141 | + pos, size = writer.add_bytes_record([raw_data], len(raw_data), names) |
142 | write_index.add_node(key, eol_flag + b"%d %d" % (pos, size)) |
143 | pb.update("Copied record", record_index) |
144 | record_index += 1 |
145 | @@ -739,7 +739,7 @@ |
146 | # check the header only |
147 | df, _ = knit._parse_record_header(key, raw_data) |
148 | df.close() |
149 | - pos, size = writer.add_bytes_record(raw_data, names) |
150 | + pos, size = writer.add_bytes_record([raw_data], len(raw_data), names) |
151 | write_index.add_node(key, eol_flag + b"%d %d" % |
152 | (pos, size), references) |
153 | pb.update("Copied record", record_index) |
154 | |
155 | === modified file 'breezy/bzr/pack.py' |
156 | --- breezy/bzr/pack.py 2018-11-11 04:08:32 +0000 |
157 | +++ breezy/bzr/pack.py 2020-02-09 13:41:06 +0000 |
158 | @@ -141,10 +141,11 @@ |
159 | """Finish writing a container.""" |
160 | self.write_func(self._serialiser.end()) |
161 | |
162 | - def add_bytes_record(self, bytes, names): |
163 | + def add_bytes_record(self, chunks, length, names): |
164 | """Add a Bytes record with the given names. |
165 | |
166 | - :param bytes: The bytes to insert. |
167 | + :param bytes: The chunks to insert. |
168 | + :param length: Total length of bytes in chunks |
169 | :param names: The names to give the inserted bytes. Each name is |
170 | a tuple of bytestrings. The bytestrings may not contain |
171 | whitespace. |
172 | @@ -156,13 +157,13 @@ |
173 | and thus are only suitable for use by a ContainerReader. |
174 | """ |
175 | current_offset = self.current_offset |
176 | - length = len(bytes) |
177 | if length < self._JOIN_WRITES_THRESHOLD: |
178 | self.write_func(self._serialiser.bytes_header(length, names) |
179 | - + bytes) |
180 | + + b''.join(chunks)) |
181 | else: |
182 | self.write_func(self._serialiser.bytes_header(length, names)) |
183 | - self.write_func(bytes) |
184 | + for chunk in chunks: |
185 | + self.write_func(chunk) |
186 | self.records_written += 1 |
187 | # return a memo of where we wrote data to allow random access. |
188 | return current_offset, self.current_offset - current_offset |
189 | |
190 | === modified file 'breezy/bzr/pack_repo.py' |
191 | --- breezy/bzr/pack_repo.py 2020-01-26 13:58:01 +0000 |
192 | +++ breezy/bzr/pack_repo.py 2020-02-09 13:41:06 +0000 |
193 | @@ -1978,7 +1978,7 @@ |
194 | object supplied to the PackAccess object. |
195 | """ |
196 | p_offset, p_length = self._container_writer.add_bytes_record( |
197 | - b''.join(raw_data), []) |
198 | + raw_data, size, []) |
199 | return (self._write_index, p_offset, p_length) |
200 | |
201 | def add_raw_records(self, key_sizes, raw_data): |
202 | |
203 | === modified file 'breezy/tests/test_bundle.py' |
204 | --- breezy/tests/test_bundle.py 2020-01-30 16:13:31 +0000 |
205 | +++ breezy/tests/test_bundle.py 2020-02-09 13:41:06 +0000 |
206 | @@ -1796,7 +1796,7 @@ |
207 | writer = v4.BundleWriter(fileobj) |
208 | writer.begin() |
209 | writer.add_info_record({b'foo': b'bar'}) |
210 | - writer._container.add_bytes_record(b'blah', [(b'two', ), (b'names', )]) |
211 | + writer._container.add_bytes_record([b'blah'], len(b'blah'), [(b'two', ), (b'names', )]) |
212 | writer.end() |
213 | fileobj.seek(0) |
214 | record_iter = v4.BundleReader(fileobj).iter_records() |
215 | |
216 | === modified file 'breezy/tests/test_groupcompress.py' |
217 | --- breezy/tests/test_groupcompress.py 2020-01-26 04:30:31 +0000 |
218 | +++ breezy/tests/test_groupcompress.py 2020-02-09 13:41:06 +0000 |
219 | @@ -124,12 +124,12 @@ |
220 | sha1_2, _, end_point, _ = compressor.compress( |
221 | ('newlabel',), [text], len(text), None) |
222 | # get the first out |
223 | - self.assertEqual((b'strange\ncommon long line\n' |
224 | - b'that needs a 16 byte match\n', sha1_1), |
225 | + self.assertEqual(([b'strange\ncommon long line\n' |
226 | + b'that needs a 16 byte match\n'], sha1_1), |
227 | compressor.extract(('label',))) |
228 | # and the second |
229 | - self.assertEqual((b'common long line\nthat needs a 16 byte match\n' |
230 | - b'different\n', sha1_2), |
231 | + self.assertEqual(([b'common long line\nthat needs a 16 byte match\n' |
232 | + b'different\n'], sha1_2), |
233 | compressor.extract(('newlabel',))) |
234 | |
235 | def test_pop_last(self): |
236 | |
237 | === modified file 'breezy/tests/test_pack.py' |
238 | --- breezy/tests/test_pack.py 2018-11-12 01:41:38 +0000 |
239 | +++ breezy/tests/test_pack.py 2020-02-09 13:41:06 +0000 |
240 | @@ -119,14 +119,14 @@ |
241 | def test_non_empty_end_does_not_add_a_record_to_records_written(self): |
242 | """The end() method does not count towards the records written.""" |
243 | self.writer.begin() |
244 | - self.writer.add_bytes_record(b'foo', names=[]) |
245 | + self.writer.add_bytes_record([b'foo'], len(b'foo'), names=[]) |
246 | self.writer.end() |
247 | self.assertEqual(1, self.writer.records_written) |
248 | |
249 | def test_add_bytes_record_no_name(self): |
250 | """Add a bytes record with no name.""" |
251 | self.writer.begin() |
252 | - offset, length = self.writer.add_bytes_record(b'abc', names=[]) |
253 | + offset, length = self.writer.add_bytes_record([b'abc'], len(b'abc'), names=[]) |
254 | self.assertEqual((42, 7), (offset, length)) |
255 | self.assertOutput( |
256 | b'Bazaar pack format 1 (introduced in 0.18)\nB3\n\nabc') |
257 | @@ -136,7 +136,7 @@ |
258 | self.writer.begin() |
259 | |
260 | offset, length = self.writer.add_bytes_record( |
261 | - b'abc', names=[(b'name1', )]) |
262 | + [b'abc'], len(b'abc'), names=[(b'name1', )]) |
263 | self.assertEqual((42, 13), (offset, length)) |
264 | self.assertOutput( |
265 | b'Bazaar pack format 1 (introduced in 0.18)\n' |
266 | @@ -157,7 +157,7 @@ |
267 | |
268 | self.writer.begin() |
269 | offset, length = self.writer.add_bytes_record( |
270 | - b'abcabc', names=[(b'name1', )]) |
271 | + [b'abcabc'], len(b'abcabc'), names=[(b'name1', )]) |
272 | self.assertEqual((42, 16), (offset, length)) |
273 | self.assertOutput( |
274 | b'Bazaar pack format 1 (introduced in 0.18)\n' |
275 | @@ -173,7 +173,7 @@ |
276 | """Add a bytes record with two names.""" |
277 | self.writer.begin() |
278 | offset, length = self.writer.add_bytes_record( |
279 | - b'abc', names=[(b'name1', ), (b'name2', )]) |
280 | + [b'abc'], len(b'abc'), names=[(b'name1', ), (b'name2', )]) |
281 | self.assertEqual((42, 19), (offset, length)) |
282 | self.assertOutput( |
283 | b'Bazaar pack format 1 (introduced in 0.18)\n' |
284 | @@ -183,7 +183,7 @@ |
285 | """Add a bytes record with two names.""" |
286 | self.writer.begin() |
287 | offset, length = self.writer.add_bytes_record( |
288 | - b'abc', names=[(b'name1', ), (b'name2', )]) |
289 | + [b'abc'], len(b'abc'), names=[(b'name1', ), (b'name2', )]) |
290 | self.assertEqual((42, 19), (offset, length)) |
291 | self.assertOutput( |
292 | b'Bazaar pack format 1 (introduced in 0.18)\n' |
293 | @@ -193,7 +193,7 @@ |
294 | """Add a bytes record with a two-element name.""" |
295 | self.writer.begin() |
296 | offset, length = self.writer.add_bytes_record( |
297 | - b'abc', names=[(b'name1', b'name2')]) |
298 | + [b'abc'], len(b'abc'), names=[(b'name1', b'name2')]) |
299 | self.assertEqual((42, 19), (offset, length)) |
300 | self.assertOutput( |
301 | b'Bazaar pack format 1 (introduced in 0.18)\n' |
302 | @@ -201,8 +201,8 @@ |
303 | |
304 | def test_add_second_bytes_record_gets_higher_offset(self): |
305 | self.writer.begin() |
306 | - self.writer.add_bytes_record(b'abc', names=[]) |
307 | - offset, length = self.writer.add_bytes_record(b'abc', names=[]) |
308 | + self.writer.add_bytes_record([b'a', b'bc'], len(b'abc'), names=[]) |
309 | + offset, length = self.writer.add_bytes_record([b'abc'], len(b'abc'), names=[]) |
310 | self.assertEqual((49, 7), (offset, length)) |
311 | self.assertOutput( |
312 | b'Bazaar pack format 1 (introduced in 0.18)\n' |
313 | @@ -216,14 +216,14 @@ |
314 | self.writer.begin() |
315 | self.assertRaises( |
316 | errors.InvalidRecordError, |
317 | - self.writer.add_bytes_record, b'abc', names=[(b'bad name', )]) |
318 | + self.writer.add_bytes_record, [b'abc'], len(b'abc'), names=[(b'bad name', )]) |
319 | |
320 | def test_add_bytes_records_add_to_records_written(self): |
321 | """Adding a Bytes record increments the records_written counter.""" |
322 | self.writer.begin() |
323 | - self.writer.add_bytes_record(b'foo', names=[]) |
324 | + self.writer.add_bytes_record([b'foo'], len(b'foo'), names=[]) |
325 | self.assertEqual(1, self.writer.records_written) |
326 | - self.writer.add_bytes_record(b'foo', names=[]) |
327 | + self.writer.add_bytes_record([b'foo'], len(b'foo'), names=[]) |
328 | self.assertEqual(2, self.writer.records_written) |
329 | |
330 | |
331 | @@ -539,10 +539,10 @@ |
332 | writer = pack.ContainerWriter(pack_data.write) |
333 | writer.begin() |
334 | memos = [] |
335 | - memos.append(writer.add_bytes_record(b'abc', names=[])) |
336 | - memos.append(writer.add_bytes_record(b'def', names=[(b'name1', )])) |
337 | - memos.append(writer.add_bytes_record(b'ghi', names=[(b'name2', )])) |
338 | - memos.append(writer.add_bytes_record(b'jkl', names=[])) |
339 | + memos.append(writer.add_bytes_record([b'abc'], 3, names=[])) |
340 | + memos.append(writer.add_bytes_record([b'def'], 3, names=[(b'name1', )])) |
341 | + memos.append(writer.add_bytes_record([b'ghi'], 3, names=[(b'name2', )])) |
342 | + memos.append(writer.add_bytes_record([b'jkl'], 3, names=[])) |
343 | writer.end() |
344 | transport = self.get_transport() |
345 | transport.put_bytes('mypack', pack_data.getvalue()) |
Running landing tests failed /ci.breezy- vcs.org/ job/brz/ job/brz- land/696/
https:/