Breezy

Merge lp:~jelmer/brz/extract into lp:brz

extract
Merge into trunk

Proposed by Jelmer Vernooij on 2020-02-09

Status:	Merged
Approved by:	Jelmer Vernooij on 2020-02-09
Approved revision:	no longer in the source branch.
Merge reported by:	The Breezy Bot
Merged at revision:	not available
Proposed branch:	lp:~jelmer/brz/extract
Merge into:	lp:brz
Diff against target:	345 lines (+50/-49) 8 files modified breezy/bzr/bundle/serializer/v4.py (+2/-2) breezy/bzr/groupcompress.py (+18/-18) breezy/bzr/knitpack_repo.py (+2/-2) breezy/bzr/pack.py (+6/-5) breezy/bzr/pack_repo.py (+1/-1) breezy/tests/test_bundle.py (+1/-1) breezy/tests/test_groupcompress.py (+4/-4) breezy/tests/test_pack.py (+16/-16)
To merge this branch:	bzr merge lp:~jelmer/brz/extract
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
Jelmer Vernooij			Approve on 2020-02-09
Review via email: mp+378773@code.launchpad.net

Commit message

More chunkification of groupcompress APIs.

Description of the change

More chunkification of groupcompress APIs.

Revision history for this message

Jelmer Vernooij (jelmer) on 2020-02-09:

review: Approve

Revision history for this message

Jelmer Vernooij (jelmer) on 2020-02-09:

review: Approve

Revision history for this message

The Breezy Bot (the-breezy-bot) wrote on 2020-02-09:

Running landing tests failed
https://ci.breezy-vcs.org/job/brz/job/brz-land/696/

Revision history for this message

The Breezy Bot (the-breezy-bot) wrote on 2020-02-09:

Running landing tests failed
https://ci.breezy-vcs.org/job/brz/job/brz-land/699/

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Breezy developers

Jelmer Vernooij

Robert Ladyman

1	=== modified file 'breezy/bzr/bundle/serializer/v4.py'
2	--- breezy/bzr/bundle/serializer/v4.py 2020-01-30 16:13:31 +0000
3	+++ breezy/bzr/bundle/serializer/v4.py 2020-02-09 13:41:06 +0000
4	@@ -181,9 +181,9 @@
5	"""
6	name = self.encode_name(repo_kind, revision_id, file_id)
7	encoded_metadata = bencode.bencode(metadata)
8	- self._container.add_bytes_record(encoded_metadata, [(name, )])
9	+ self._container.add_bytes_record([encoded_metadata], len(encoded_metadata), [(name, )])
10	if metadata[b'storage_kind'] != b'header':
11	- self._container.add_bytes_record(bytes, [])
12	+ self._container.add_bytes_record([bytes], len(bytes), [])
13
14
15	class BundleReader(object):
16
17	=== modified file 'breezy/bzr/groupcompress.py'
18	--- breezy/bzr/groupcompress.py 2020-02-09 01:27:02 +0000
19	+++ breezy/bzr/groupcompress.py 2020-02-09 13:41:06 +0000
20	@@ -276,7 +276,7 @@
21	:return: The bytes for the content
22	"""
23	if start == end == 0:
24	- return b''
25	+ return []
26	self._ensure_content(end)
27	# The bytes are 'f' or 'd' for the type, then a variable-length
28	# base128 integer for the content size, then the actual content
29	@@ -297,9 +297,9 @@
30	raise ValueError('end != len according to field header'
31	' %s != %s' % (end, content_start + content_len))
32	if c == b'f':
33	- return self._content[content_start:end]
34	+ return [self._content[content_start:end]]
35	# Must be type delta as checked above
36	- return apply_delta_to_source(self._content, content_start, end)
37	+ return [apply_delta_to_source(self._content, content_start, end)]
38
39	def set_chunked_content(self, content_chunks, length):
40	"""Set the content of this block to the given chunks."""
41	@@ -448,7 +448,7 @@
42	# get_bytes_as call? After Manager.get_record_stream() returns
43	# the object?
44	self._manager = manager
45	- self._bytes = None
46	+ self._chunks = None
47	self.storage_kind = 'groupcompress-block'
48	if not first:
49	self.storage_kind = 'groupcompress-block-ref'
50	@@ -469,7 +469,7 @@
51	except zlib.error as value:
52	raise DecompressCorruption("zlib: " + str(value))
53	block = self._manager._block
54	- self._bytes = block.extract(self.key, self._start, self._end)
55	+ self._chunks = block.extract(self.key, self._start, self._end)
56	# There are code paths that first extract as fulltext, and then
57	# extract as storage_kind (smart fetch). So we don't break the
58	# refcycle here, but instead in manager.get_record_stream()
59	@@ -482,24 +482,24 @@
60	else:
61	return b''
62	if storage_kind in ('fulltext', 'chunked', 'lines'):
63	- if self._bytes is None:
64	+ if self._chunks is None:
65	self._extract_bytes()
66	if storage_kind == 'fulltext':
67	- return self._bytes
68	+ return b''.join(self._chunks)
69	elif storage_kind == 'chunked':
70	- return [self._bytes]
71	+ return self._chunks
72	else:
73	- return osutils.split_lines(self._bytes)
74	+ return osutils.chunks_to_lines(self._chunks)
75	raise errors.UnavailableRepresentation(self.key, storage_kind,
76	self.storage_kind)
77
78	def iter_bytes_as(self, storage_kind):
79	- if self._bytes is None:
80	+ if self._chunks is None:
81	self._extract_bytes()
82	if storage_kind == 'chunked':
83	- return iter([self._bytes])
84	+ return iter(self._chunks)
85	elif storage_kind == 'lines':
86	- return iter(osutils.split_lines(self._bytes))
87	+ return iter(osutils.chunks_to_lines(self._chunks))
88	raise errors.UnavailableRepresentation(self.key, storage_kind,
89	self.storage_kind)
90
91	@@ -912,7 +912,7 @@
92	"""Extract a key previously added to the compressor.
93
94	:param key: The key to extract.
95	- :return: An iterable over bytes and the sha1.
96	+ :return: An iterable over chunks and the sha1.
97	"""
98	(start_byte, start_chunk, end_byte,
99	end_chunk) = self.labels_deltas[key]
100	@@ -926,7 +926,7 @@
101	raise ValueError('Index claimed fulltext len, but stored bytes'
102	' claim %s != %s'
103	% (len(stored_bytes), data_len))
104	- data = stored_bytes[offset + 1:]
105	+ data = [stored_bytes[offset + 1:]]
106	else:
107	if kind != b'd':
108	raise ValueError('Unknown content kind, bytes claim %s' % kind)
109	@@ -938,8 +938,8 @@
110	raise ValueError('Index claimed delta len, but stored bytes'
111	' claim %s != %s'
112	% (len(stored_bytes), data_len))
113	- data = apply_delta(source, stored_bytes[offset + 1:])
114	- data_sha1 = osutils.sha_string(data)
115	+ data = [apply_delta(source, stored_bytes[offset + 1:])]
116	+ data_sha1 = osutils.sha_strings(data)
117	return data, data_sha1
118
119	def flush(self):
120	@@ -1662,9 +1662,9 @@
121	# self._compressor.
122	for factory in batcher.yield_factories(full_flush=True):
123	yield factory
124	- bytes, sha1 = self._compressor.extract(key)
125	+ chunks, sha1 = self._compressor.extract(key)
126	parents = self._unadded_refs[key]
127	- yield FulltextContentFactory(key, parents, sha1, bytes)
128	+ yield ChunkedContentFactory(key, parents, sha1, chunks)
129	continue
130	if batcher.add_key(key) > BATCH_SIZE:
131	# Ok, this batch is big enough. Yield some results.
132
133	=== modified file 'breezy/bzr/knitpack_repo.py'
134	--- breezy/bzr/knitpack_repo.py 2020-01-25 14:07:41 +0000
135	+++ breezy/bzr/knitpack_repo.py 2020-02-09 13:41:06 +0000
136	@@ -687,7 +687,7 @@
137	else:
138	df, _ = knit._parse_record_header(key, raw_data)
139	df.close()
140	- pos, size = writer.add_bytes_record(raw_data, names)
141	+ pos, size = writer.add_bytes_record([raw_data], len(raw_data), names)
142	write_index.add_node(key, eol_flag + b"%d %d" % (pos, size))
143	pb.update("Copied record", record_index)
144	record_index += 1
145	@@ -739,7 +739,7 @@
146	# check the header only
147	df, _ = knit._parse_record_header(key, raw_data)
148	df.close()
149	- pos, size = writer.add_bytes_record(raw_data, names)
150	+ pos, size = writer.add_bytes_record([raw_data], len(raw_data), names)
151	write_index.add_node(key, eol_flag + b"%d %d" %
152	(pos, size), references)
153	pb.update("Copied record", record_index)
154
155	=== modified file 'breezy/bzr/pack.py'
156	--- breezy/bzr/pack.py 2018-11-11 04:08:32 +0000
157	+++ breezy/bzr/pack.py 2020-02-09 13:41:06 +0000
158	@@ -141,10 +141,11 @@
159	"""Finish writing a container."""
160	self.write_func(self._serialiser.end())
161
162	- def add_bytes_record(self, bytes, names):
163	+ def add_bytes_record(self, chunks, length, names):
164	"""Add a Bytes record with the given names.
165
166	- :param bytes: The bytes to insert.
167	+ :param bytes: The chunks to insert.
168	+ :param length: Total length of bytes in chunks
169	:param names: The names to give the inserted bytes. Each name is
170	a tuple of bytestrings. The bytestrings may not contain
171	whitespace.
172	@@ -156,13 +157,13 @@
173	and thus are only suitable for use by a ContainerReader.
174	"""
175	current_offset = self.current_offset
176	- length = len(bytes)
177	if length < self._JOIN_WRITES_THRESHOLD:
178	self.write_func(self._serialiser.bytes_header(length, names)
179	- + bytes)
180	+ + b''.join(chunks))
181	else:
182	self.write_func(self._serialiser.bytes_header(length, names))
183	- self.write_func(bytes)
184	+ for chunk in chunks:
185	+ self.write_func(chunk)
186	self.records_written += 1
187	# return a memo of where we wrote data to allow random access.
188	return current_offset, self.current_offset - current_offset
189
190	=== modified file 'breezy/bzr/pack_repo.py'
191	--- breezy/bzr/pack_repo.py 2020-01-26 13:58:01 +0000
192	+++ breezy/bzr/pack_repo.py 2020-02-09 13:41:06 +0000
193	@@ -1978,7 +1978,7 @@
194	object supplied to the PackAccess object.
195	"""
196	p_offset, p_length = self._container_writer.add_bytes_record(
197	- b''.join(raw_data), [])
198	+ raw_data, size, [])
199	return (self._write_index, p_offset, p_length)
200
201	def add_raw_records(self, key_sizes, raw_data):
202
203	=== modified file 'breezy/tests/test_bundle.py'
204	--- breezy/tests/test_bundle.py 2020-01-30 16:13:31 +0000
205	+++ breezy/tests/test_bundle.py 2020-02-09 13:41:06 +0000
206	@@ -1796,7 +1796,7 @@
207	writer = v4.BundleWriter(fileobj)
208	writer.begin()
209	writer.add_info_record({b'foo': b'bar'})
210	- writer._container.add_bytes_record(b'blah', [(b'two', ), (b'names', )])
211	+ writer._container.add_bytes_record([b'blah'], len(b'blah'), [(b'two', ), (b'names', )])
212	writer.end()
213	fileobj.seek(0)
214	record_iter = v4.BundleReader(fileobj).iter_records()
215
216	=== modified file 'breezy/tests/test_groupcompress.py'
217	--- breezy/tests/test_groupcompress.py 2020-01-26 04:30:31 +0000
218	+++ breezy/tests/test_groupcompress.py 2020-02-09 13:41:06 +0000
219	@@ -124,12 +124,12 @@
220	sha1_2, _, end_point, _ = compressor.compress(
221	('newlabel',), [text], len(text), None)
222	# get the first out
223	- self.assertEqual((b'strange\ncommon long line\n'
224	- b'that needs a 16 byte match\n', sha1_1),
225	+ self.assertEqual(([b'strange\ncommon long line\n'
226	+ b'that needs a 16 byte match\n'], sha1_1),
227	compressor.extract(('label',)))
228	# and the second
229	- self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
230	- b'different\n', sha1_2),
231	+ self.assertEqual(([b'common long line\nthat needs a 16 byte match\n'
232	+ b'different\n'], sha1_2),
233	compressor.extract(('newlabel',)))
234
235	def test_pop_last(self):
236
237	=== modified file 'breezy/tests/test_pack.py'
238	--- breezy/tests/test_pack.py 2018-11-12 01:41:38 +0000
239	+++ breezy/tests/test_pack.py 2020-02-09 13:41:06 +0000
240	@@ -119,14 +119,14 @@
241	def test_non_empty_end_does_not_add_a_record_to_records_written(self):
242	"""The end() method does not count towards the records written."""
243	self.writer.begin()
244	- self.writer.add_bytes_record(b'foo', names=[])
245	+ self.writer.add_bytes_record([b'foo'], len(b'foo'), names=[])
246	self.writer.end()
247	self.assertEqual(1, self.writer.records_written)
248
249	def test_add_bytes_record_no_name(self):
250	"""Add a bytes record with no name."""
251	self.writer.begin()
252	- offset, length = self.writer.add_bytes_record(b'abc', names=[])
253	+ offset, length = self.writer.add_bytes_record([b'abc'], len(b'abc'), names=[])
254	self.assertEqual((42, 7), (offset, length))
255	self.assertOutput(
256	b'Bazaar pack format 1 (introduced in 0.18)\nB3\n\nabc')
257	@@ -136,7 +136,7 @@
258	self.writer.begin()
259
260	offset, length = self.writer.add_bytes_record(
261	- b'abc', names=[(b'name1', )])
262	+ [b'abc'], len(b'abc'), names=[(b'name1', )])
263	self.assertEqual((42, 13), (offset, length))
264	self.assertOutput(
265	b'Bazaar pack format 1 (introduced in 0.18)\n'
266	@@ -157,7 +157,7 @@
267
268	self.writer.begin()
269	offset, length = self.writer.add_bytes_record(
270	- b'abcabc', names=[(b'name1', )])
271	+ [b'abcabc'], len(b'abcabc'), names=[(b'name1', )])
272	self.assertEqual((42, 16), (offset, length))
273	self.assertOutput(
274	b'Bazaar pack format 1 (introduced in 0.18)\n'
275	@@ -173,7 +173,7 @@
276	"""Add a bytes record with two names."""
277	self.writer.begin()
278	offset, length = self.writer.add_bytes_record(
279	- b'abc', names=[(b'name1', ), (b'name2', )])
280	+ [b'abc'], len(b'abc'), names=[(b'name1', ), (b'name2', )])
281	self.assertEqual((42, 19), (offset, length))
282	self.assertOutput(
283	b'Bazaar pack format 1 (introduced in 0.18)\n'
284	@@ -183,7 +183,7 @@
285	"""Add a bytes record with two names."""
286	self.writer.begin()
287	offset, length = self.writer.add_bytes_record(
288	- b'abc', names=[(b'name1', ), (b'name2', )])
289	+ [b'abc'], len(b'abc'), names=[(b'name1', ), (b'name2', )])
290	self.assertEqual((42, 19), (offset, length))
291	self.assertOutput(
292	b'Bazaar pack format 1 (introduced in 0.18)\n'
293	@@ -193,7 +193,7 @@
294	"""Add a bytes record with a two-element name."""
295	self.writer.begin()
296	offset, length = self.writer.add_bytes_record(
297	- b'abc', names=[(b'name1', b'name2')])
298	+ [b'abc'], len(b'abc'), names=[(b'name1', b'name2')])
299	self.assertEqual((42, 19), (offset, length))
300	self.assertOutput(
301	b'Bazaar pack format 1 (introduced in 0.18)\n'
302	@@ -201,8 +201,8 @@
303
304	def test_add_second_bytes_record_gets_higher_offset(self):
305	self.writer.begin()
306	- self.writer.add_bytes_record(b'abc', names=[])
307	- offset, length = self.writer.add_bytes_record(b'abc', names=[])
308	+ self.writer.add_bytes_record([b'a', b'bc'], len(b'abc'), names=[])
309	+ offset, length = self.writer.add_bytes_record([b'abc'], len(b'abc'), names=[])
310	self.assertEqual((49, 7), (offset, length))
311	self.assertOutput(
312	b'Bazaar pack format 1 (introduced in 0.18)\n'
313	@@ -216,14 +216,14 @@
314	self.writer.begin()
315	self.assertRaises(
316	errors.InvalidRecordError,
317	- self.writer.add_bytes_record, b'abc', names=[(b'bad name', )])
318	+ self.writer.add_bytes_record, [b'abc'], len(b'abc'), names=[(b'bad name', )])
319
320	def test_add_bytes_records_add_to_records_written(self):
321	"""Adding a Bytes record increments the records_written counter."""
322	self.writer.begin()
323	- self.writer.add_bytes_record(b'foo', names=[])
324	+ self.writer.add_bytes_record([b'foo'], len(b'foo'), names=[])
325	self.assertEqual(1, self.writer.records_written)
326	- self.writer.add_bytes_record(b'foo', names=[])
327	+ self.writer.add_bytes_record([b'foo'], len(b'foo'), names=[])
328	self.assertEqual(2, self.writer.records_written)
329
330
331	@@ -539,10 +539,10 @@
332	writer = pack.ContainerWriter(pack_data.write)
333	writer.begin()
334	memos = []
335	- memos.append(writer.add_bytes_record(b'abc', names=[]))
336	- memos.append(writer.add_bytes_record(b'def', names=[(b'name1', )]))
337	- memos.append(writer.add_bytes_record(b'ghi', names=[(b'name2', )]))
338	- memos.append(writer.add_bytes_record(b'jkl', names=[]))
339	+ memos.append(writer.add_bytes_record([b'abc'], 3, names=[]))
340	+ memos.append(writer.add_bytes_record([b'def'], 3, names=[(b'name1', )]))
341	+ memos.append(writer.add_bytes_record([b'ghi'], 3, names=[(b'name2', )]))
342	+ memos.append(writer.add_bytes_record([b'jkl'], 3, names=[]))
343	writer.end()
344	transport = self.get_transport()
345	transport.put_bytes('mypack', pack_data.getvalue())