Merge lp:~jelmer/brz/extract into lp:brz

Proposed by Jelmer Vernooij
Status: Merged
Approved by: Jelmer Vernooij
Approved revision: no longer in the source branch.
Merge reported by: The Breezy Bot
Merged at revision: not available
Proposed branch: lp:~jelmer/brz/extract
Merge into: lp:brz
Diff against target: 345 lines (+50/-49)
8 files modified
breezy/bzr/bundle/serializer/v4.py (+2/-2)
breezy/bzr/groupcompress.py (+18/-18)
breezy/bzr/knitpack_repo.py (+2/-2)
breezy/bzr/pack.py (+6/-5)
breezy/bzr/pack_repo.py (+1/-1)
breezy/tests/test_bundle.py (+1/-1)
breezy/tests/test_groupcompress.py (+4/-4)
breezy/tests/test_pack.py (+16/-16)
To merge this branch: bzr merge lp:~jelmer/brz/extract
Reviewer Review Type Date Requested Status
Jelmer Vernooij Approve
Review via email: mp+378773@code.launchpad.net

Commit message

More chunkification of groupcompress APIs.

Description of the change

More chunkification of groupcompress APIs.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) :
review: Approve
Revision history for this message
Jelmer Vernooij (jelmer) :
review: Approve
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote :
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'breezy/bzr/bundle/serializer/v4.py'
2--- breezy/bzr/bundle/serializer/v4.py 2020-01-30 16:13:31 +0000
3+++ breezy/bzr/bundle/serializer/v4.py 2020-02-09 13:41:06 +0000
4@@ -181,9 +181,9 @@
5 """
6 name = self.encode_name(repo_kind, revision_id, file_id)
7 encoded_metadata = bencode.bencode(metadata)
8- self._container.add_bytes_record(encoded_metadata, [(name, )])
9+ self._container.add_bytes_record([encoded_metadata], len(encoded_metadata), [(name, )])
10 if metadata[b'storage_kind'] != b'header':
11- self._container.add_bytes_record(bytes, [])
12+ self._container.add_bytes_record([bytes], len(bytes), [])
13
14
15 class BundleReader(object):
16
17=== modified file 'breezy/bzr/groupcompress.py'
18--- breezy/bzr/groupcompress.py 2020-02-09 01:27:02 +0000
19+++ breezy/bzr/groupcompress.py 2020-02-09 13:41:06 +0000
20@@ -276,7 +276,7 @@
21 :return: The bytes for the content
22 """
23 if start == end == 0:
24- return b''
25+ return []
26 self._ensure_content(end)
27 # The bytes are 'f' or 'd' for the type, then a variable-length
28 # base128 integer for the content size, then the actual content
29@@ -297,9 +297,9 @@
30 raise ValueError('end != len according to field header'
31 ' %s != %s' % (end, content_start + content_len))
32 if c == b'f':
33- return self._content[content_start:end]
34+ return [self._content[content_start:end]]
35 # Must be type delta as checked above
36- return apply_delta_to_source(self._content, content_start, end)
37+ return [apply_delta_to_source(self._content, content_start, end)]
38
39 def set_chunked_content(self, content_chunks, length):
40 """Set the content of this block to the given chunks."""
41@@ -448,7 +448,7 @@
42 # get_bytes_as call? After Manager.get_record_stream() returns
43 # the object?
44 self._manager = manager
45- self._bytes = None
46+ self._chunks = None
47 self.storage_kind = 'groupcompress-block'
48 if not first:
49 self.storage_kind = 'groupcompress-block-ref'
50@@ -469,7 +469,7 @@
51 except zlib.error as value:
52 raise DecompressCorruption("zlib: " + str(value))
53 block = self._manager._block
54- self._bytes = block.extract(self.key, self._start, self._end)
55+ self._chunks = block.extract(self.key, self._start, self._end)
56 # There are code paths that first extract as fulltext, and then
57 # extract as storage_kind (smart fetch). So we don't break the
58 # refcycle here, but instead in manager.get_record_stream()
59@@ -482,24 +482,24 @@
60 else:
61 return b''
62 if storage_kind in ('fulltext', 'chunked', 'lines'):
63- if self._bytes is None:
64+ if self._chunks is None:
65 self._extract_bytes()
66 if storage_kind == 'fulltext':
67- return self._bytes
68+ return b''.join(self._chunks)
69 elif storage_kind == 'chunked':
70- return [self._bytes]
71+ return self._chunks
72 else:
73- return osutils.split_lines(self._bytes)
74+ return osutils.chunks_to_lines(self._chunks)
75 raise errors.UnavailableRepresentation(self.key, storage_kind,
76 self.storage_kind)
77
78 def iter_bytes_as(self, storage_kind):
79- if self._bytes is None:
80+ if self._chunks is None:
81 self._extract_bytes()
82 if storage_kind == 'chunked':
83- return iter([self._bytes])
84+ return iter(self._chunks)
85 elif storage_kind == 'lines':
86- return iter(osutils.split_lines(self._bytes))
87+ return iter(osutils.chunks_to_lines(self._chunks))
88 raise errors.UnavailableRepresentation(self.key, storage_kind,
89 self.storage_kind)
90
91@@ -912,7 +912,7 @@
92 """Extract a key previously added to the compressor.
93
94 :param key: The key to extract.
95- :return: An iterable over bytes and the sha1.
96+ :return: An iterable over chunks and the sha1.
97 """
98 (start_byte, start_chunk, end_byte,
99 end_chunk) = self.labels_deltas[key]
100@@ -926,7 +926,7 @@
101 raise ValueError('Index claimed fulltext len, but stored bytes'
102 ' claim %s != %s'
103 % (len(stored_bytes), data_len))
104- data = stored_bytes[offset + 1:]
105+ data = [stored_bytes[offset + 1:]]
106 else:
107 if kind != b'd':
108 raise ValueError('Unknown content kind, bytes claim %s' % kind)
109@@ -938,8 +938,8 @@
110 raise ValueError('Index claimed delta len, but stored bytes'
111 ' claim %s != %s'
112 % (len(stored_bytes), data_len))
113- data = apply_delta(source, stored_bytes[offset + 1:])
114- data_sha1 = osutils.sha_string(data)
115+ data = [apply_delta(source, stored_bytes[offset + 1:])]
116+ data_sha1 = osutils.sha_strings(data)
117 return data, data_sha1
118
119 def flush(self):
120@@ -1662,9 +1662,9 @@
121 # self._compressor.
122 for factory in batcher.yield_factories(full_flush=True):
123 yield factory
124- bytes, sha1 = self._compressor.extract(key)
125+ chunks, sha1 = self._compressor.extract(key)
126 parents = self._unadded_refs[key]
127- yield FulltextContentFactory(key, parents, sha1, bytes)
128+ yield ChunkedContentFactory(key, parents, sha1, chunks)
129 continue
130 if batcher.add_key(key) > BATCH_SIZE:
131 # Ok, this batch is big enough. Yield some results.
132
133=== modified file 'breezy/bzr/knitpack_repo.py'
134--- breezy/bzr/knitpack_repo.py 2020-01-25 14:07:41 +0000
135+++ breezy/bzr/knitpack_repo.py 2020-02-09 13:41:06 +0000
136@@ -687,7 +687,7 @@
137 else:
138 df, _ = knit._parse_record_header(key, raw_data)
139 df.close()
140- pos, size = writer.add_bytes_record(raw_data, names)
141+ pos, size = writer.add_bytes_record([raw_data], len(raw_data), names)
142 write_index.add_node(key, eol_flag + b"%d %d" % (pos, size))
143 pb.update("Copied record", record_index)
144 record_index += 1
145@@ -739,7 +739,7 @@
146 # check the header only
147 df, _ = knit._parse_record_header(key, raw_data)
148 df.close()
149- pos, size = writer.add_bytes_record(raw_data, names)
150+ pos, size = writer.add_bytes_record([raw_data], len(raw_data), names)
151 write_index.add_node(key, eol_flag + b"%d %d" %
152 (pos, size), references)
153 pb.update("Copied record", record_index)
154
155=== modified file 'breezy/bzr/pack.py'
156--- breezy/bzr/pack.py 2018-11-11 04:08:32 +0000
157+++ breezy/bzr/pack.py 2020-02-09 13:41:06 +0000
158@@ -141,10 +141,11 @@
159 """Finish writing a container."""
160 self.write_func(self._serialiser.end())
161
162- def add_bytes_record(self, bytes, names):
163+ def add_bytes_record(self, chunks, length, names):
164 """Add a Bytes record with the given names.
165
166- :param bytes: The bytes to insert.
167+ :param bytes: The chunks to insert.
168+ :param length: Total length of bytes in chunks
169 :param names: The names to give the inserted bytes. Each name is
170 a tuple of bytestrings. The bytestrings may not contain
171 whitespace.
172@@ -156,13 +157,13 @@
173 and thus are only suitable for use by a ContainerReader.
174 """
175 current_offset = self.current_offset
176- length = len(bytes)
177 if length < self._JOIN_WRITES_THRESHOLD:
178 self.write_func(self._serialiser.bytes_header(length, names)
179- + bytes)
180+ + b''.join(chunks))
181 else:
182 self.write_func(self._serialiser.bytes_header(length, names))
183- self.write_func(bytes)
184+ for chunk in chunks:
185+ self.write_func(chunk)
186 self.records_written += 1
187 # return a memo of where we wrote data to allow random access.
188 return current_offset, self.current_offset - current_offset
189
190=== modified file 'breezy/bzr/pack_repo.py'
191--- breezy/bzr/pack_repo.py 2020-01-26 13:58:01 +0000
192+++ breezy/bzr/pack_repo.py 2020-02-09 13:41:06 +0000
193@@ -1978,7 +1978,7 @@
194 object supplied to the PackAccess object.
195 """
196 p_offset, p_length = self._container_writer.add_bytes_record(
197- b''.join(raw_data), [])
198+ raw_data, size, [])
199 return (self._write_index, p_offset, p_length)
200
201 def add_raw_records(self, key_sizes, raw_data):
202
203=== modified file 'breezy/tests/test_bundle.py'
204--- breezy/tests/test_bundle.py 2020-01-30 16:13:31 +0000
205+++ breezy/tests/test_bundle.py 2020-02-09 13:41:06 +0000
206@@ -1796,7 +1796,7 @@
207 writer = v4.BundleWriter(fileobj)
208 writer.begin()
209 writer.add_info_record({b'foo': b'bar'})
210- writer._container.add_bytes_record(b'blah', [(b'two', ), (b'names', )])
211+ writer._container.add_bytes_record([b'blah'], len(b'blah'), [(b'two', ), (b'names', )])
212 writer.end()
213 fileobj.seek(0)
214 record_iter = v4.BundleReader(fileobj).iter_records()
215
216=== modified file 'breezy/tests/test_groupcompress.py'
217--- breezy/tests/test_groupcompress.py 2020-01-26 04:30:31 +0000
218+++ breezy/tests/test_groupcompress.py 2020-02-09 13:41:06 +0000
219@@ -124,12 +124,12 @@
220 sha1_2, _, end_point, _ = compressor.compress(
221 ('newlabel',), [text], len(text), None)
222 # get the first out
223- self.assertEqual((b'strange\ncommon long line\n'
224- b'that needs a 16 byte match\n', sha1_1),
225+ self.assertEqual(([b'strange\ncommon long line\n'
226+ b'that needs a 16 byte match\n'], sha1_1),
227 compressor.extract(('label',)))
228 # and the second
229- self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
230- b'different\n', sha1_2),
231+ self.assertEqual(([b'common long line\nthat needs a 16 byte match\n'
232+ b'different\n'], sha1_2),
233 compressor.extract(('newlabel',)))
234
235 def test_pop_last(self):
236
237=== modified file 'breezy/tests/test_pack.py'
238--- breezy/tests/test_pack.py 2018-11-12 01:41:38 +0000
239+++ breezy/tests/test_pack.py 2020-02-09 13:41:06 +0000
240@@ -119,14 +119,14 @@
241 def test_non_empty_end_does_not_add_a_record_to_records_written(self):
242 """The end() method does not count towards the records written."""
243 self.writer.begin()
244- self.writer.add_bytes_record(b'foo', names=[])
245+ self.writer.add_bytes_record([b'foo'], len(b'foo'), names=[])
246 self.writer.end()
247 self.assertEqual(1, self.writer.records_written)
248
249 def test_add_bytes_record_no_name(self):
250 """Add a bytes record with no name."""
251 self.writer.begin()
252- offset, length = self.writer.add_bytes_record(b'abc', names=[])
253+ offset, length = self.writer.add_bytes_record([b'abc'], len(b'abc'), names=[])
254 self.assertEqual((42, 7), (offset, length))
255 self.assertOutput(
256 b'Bazaar pack format 1 (introduced in 0.18)\nB3\n\nabc')
257@@ -136,7 +136,7 @@
258 self.writer.begin()
259
260 offset, length = self.writer.add_bytes_record(
261- b'abc', names=[(b'name1', )])
262+ [b'abc'], len(b'abc'), names=[(b'name1', )])
263 self.assertEqual((42, 13), (offset, length))
264 self.assertOutput(
265 b'Bazaar pack format 1 (introduced in 0.18)\n'
266@@ -157,7 +157,7 @@
267
268 self.writer.begin()
269 offset, length = self.writer.add_bytes_record(
270- b'abcabc', names=[(b'name1', )])
271+ [b'abcabc'], len(b'abcabc'), names=[(b'name1', )])
272 self.assertEqual((42, 16), (offset, length))
273 self.assertOutput(
274 b'Bazaar pack format 1 (introduced in 0.18)\n'
275@@ -173,7 +173,7 @@
276 """Add a bytes record with two names."""
277 self.writer.begin()
278 offset, length = self.writer.add_bytes_record(
279- b'abc', names=[(b'name1', ), (b'name2', )])
280+ [b'abc'], len(b'abc'), names=[(b'name1', ), (b'name2', )])
281 self.assertEqual((42, 19), (offset, length))
282 self.assertOutput(
283 b'Bazaar pack format 1 (introduced in 0.18)\n'
284@@ -183,7 +183,7 @@
285 """Add a bytes record with two names."""
286 self.writer.begin()
287 offset, length = self.writer.add_bytes_record(
288- b'abc', names=[(b'name1', ), (b'name2', )])
289+ [b'abc'], len(b'abc'), names=[(b'name1', ), (b'name2', )])
290 self.assertEqual((42, 19), (offset, length))
291 self.assertOutput(
292 b'Bazaar pack format 1 (introduced in 0.18)\n'
293@@ -193,7 +193,7 @@
294 """Add a bytes record with a two-element name."""
295 self.writer.begin()
296 offset, length = self.writer.add_bytes_record(
297- b'abc', names=[(b'name1', b'name2')])
298+ [b'abc'], len(b'abc'), names=[(b'name1', b'name2')])
299 self.assertEqual((42, 19), (offset, length))
300 self.assertOutput(
301 b'Bazaar pack format 1 (introduced in 0.18)\n'
302@@ -201,8 +201,8 @@
303
304 def test_add_second_bytes_record_gets_higher_offset(self):
305 self.writer.begin()
306- self.writer.add_bytes_record(b'abc', names=[])
307- offset, length = self.writer.add_bytes_record(b'abc', names=[])
308+ self.writer.add_bytes_record([b'a', b'bc'], len(b'abc'), names=[])
309+ offset, length = self.writer.add_bytes_record([b'abc'], len(b'abc'), names=[])
310 self.assertEqual((49, 7), (offset, length))
311 self.assertOutput(
312 b'Bazaar pack format 1 (introduced in 0.18)\n'
313@@ -216,14 +216,14 @@
314 self.writer.begin()
315 self.assertRaises(
316 errors.InvalidRecordError,
317- self.writer.add_bytes_record, b'abc', names=[(b'bad name', )])
318+ self.writer.add_bytes_record, [b'abc'], len(b'abc'), names=[(b'bad name', )])
319
320 def test_add_bytes_records_add_to_records_written(self):
321 """Adding a Bytes record increments the records_written counter."""
322 self.writer.begin()
323- self.writer.add_bytes_record(b'foo', names=[])
324+ self.writer.add_bytes_record([b'foo'], len(b'foo'), names=[])
325 self.assertEqual(1, self.writer.records_written)
326- self.writer.add_bytes_record(b'foo', names=[])
327+ self.writer.add_bytes_record([b'foo'], len(b'foo'), names=[])
328 self.assertEqual(2, self.writer.records_written)
329
330
331@@ -539,10 +539,10 @@
332 writer = pack.ContainerWriter(pack_data.write)
333 writer.begin()
334 memos = []
335- memos.append(writer.add_bytes_record(b'abc', names=[]))
336- memos.append(writer.add_bytes_record(b'def', names=[(b'name1', )]))
337- memos.append(writer.add_bytes_record(b'ghi', names=[(b'name2', )]))
338- memos.append(writer.add_bytes_record(b'jkl', names=[]))
339+ memos.append(writer.add_bytes_record([b'abc'], 3, names=[]))
340+ memos.append(writer.add_bytes_record([b'def'], 3, names=[(b'name1', )]))
341+ memos.append(writer.add_bytes_record([b'ghi'], 3, names=[(b'name2', )]))
342+ memos.append(writer.add_bytes_record([b'jkl'], 3, names=[]))
343 writer.end()
344 transport = self.get_transport()
345 transport.put_bytes('mypack', pack_data.getvalue())

Subscribers

People subscribed via source and target branches