Merge lp:~jelmer/brz/compress into lp:brz

Proposed by Jelmer Vernooij
Status: Merged
Approved by: Jelmer Vernooij
Approved revision: no longer in the source branch.
Merge reported by: The Breezy Bot
Merged at revision: not available
Proposed branch: lp:~jelmer/brz/compress
Merge into: lp:brz
Diff against target: 374 lines (+97/-90)
2 files modified
breezy/bzr/groupcompress.py (+22/-19)
breezy/tests/test_groupcompress.py (+75/-71)
To merge this branch: bzr merge lp:~jelmer/brz/compress
Reviewer Review Type Date Requested Status
Jelmer Vernooij Approve
Review via email: mp+378071@code.launchpad.net

Commit message

Pass chunks rather than full texts to compress().

Description of the change

Pass chunks rather than full texts to compress().

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) :
review: Approve
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'breezy/bzr/groupcompress.py'
2--- breezy/bzr/groupcompress.py 2020-01-25 04:20:44 +0000
3+++ breezy/bzr/groupcompress.py 2020-01-25 14:24:06 +0000
4@@ -568,7 +568,7 @@
5 for factory in self._factories:
6 bytes = factory.get_bytes_as('fulltext')
7 (found_sha1, start_point, end_point,
8- type) = compressor.compress(factory.key, bytes, factory.sha1)
9+ type) = compressor.compress(factory.key, [bytes], factory.sha1)
10 # Now update this factory with the new offsets, etc
11 factory.sha1 = found_sha1
12 factory._start = start_point
13@@ -826,14 +826,14 @@
14 else:
15 self._settings = settings
16
17- def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
18+ def compress(self, key, chunks, expected_sha, nostore_sha=None, soft=False):
19 """Compress lines with label key.
20
21 :param key: A key tuple. It is stored in the output
22 for identification of the text during decompression. If the last
23 element is b'None' it is replaced with the sha1 of the text -
24 e.g. sha1:xxxxxxx.
25- :param bytes: The bytes to be compressed
26+ :param chunks: Chunks of bytes to be compressed
27 :param expected_sha: If non-None, the sha the lines are believed to
28 have. During compression the sha is calculated; a mismatch will
29 cause an error.
30@@ -847,7 +847,7 @@
31
32 :seealso VersionedFiles.add_lines:
33 """
34- if not bytes: # empty, like a dir entry, etc
35+ if not chunks: # empty, like a dir entry, etc
36 if nostore_sha == _null_sha1:
37 raise errors.ExistingContent()
38 return _null_sha1, 0, 0, 'fulltext'
39@@ -855,23 +855,25 @@
40 if expected_sha is not None:
41 sha1 = expected_sha
42 else:
43- sha1 = osutils.sha_string(bytes)
44+ sha1 = osutils.sha_strings(chunks)
45 if nostore_sha is not None:
46 if sha1 == nostore_sha:
47 raise errors.ExistingContent()
48 if key[-1] is None:
49 key = key[:-1] + (b'sha1:' + sha1,)
50
51- start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)
52+ length = sum(map(len, chunks))
53+
54+ start, end, type = self._compress(key, chunks, length / 2, soft)
55 return sha1, start, end, type
56
57- def _compress(self, key, bytes, max_delta_size, soft=False):
58+ def _compress(self, key, chunks, max_delta_size, soft=False):
59 """Compress lines with label key.
60
61 :param key: A key tuple. It is stored in the output for identification
62 of the text during decompression.
63
64- :param bytes: The bytes to be compressed
65+ :param chunks: The chunks of bytes to be compressed
66
67 :param max_delta_size: The size above which we issue a fulltext instead
68 of a delta.
69@@ -956,10 +958,10 @@
70 # The actual content is managed by LinesDeltaIndex
71 self.chunks = self._delta_index.lines
72
73- def _compress(self, key, bytes, max_delta_size, soft=False):
74+ def _compress(self, key, chunks, max_delta_size, soft=False):
75 """see _CommonGroupCompressor._compress"""
76- input_len = len(bytes)
77- new_lines = osutils.split_lines(bytes)
78+ input_len = sum(map(len, chunks))
79+ new_lines = osutils.chunks_to_lines(chunks)
80 out_lines, index_lines = self._delta_index.make_delta(
81 new_lines, bytes_length=input_len, soft=soft)
82 delta_length = sum(map(len, out_lines))
83@@ -1011,9 +1013,9 @@
84 max_bytes_to_index = self._settings.get('max_bytes_to_index', 0)
85 self._delta_index = DeltaIndex(max_bytes_to_index=max_bytes_to_index)
86
87- def _compress(self, key, bytes, max_delta_size, soft=False):
88+ def _compress(self, key, chunks, max_delta_size, soft=False):
89 """see _CommonGroupCompressor._compress"""
90- input_len = len(bytes)
91+ input_len = sum(map(len, chunks))
92 # By having action/label/sha1/len, we can parse the group if the index
93 # was ever destroyed, we have the key in 'label', we know the final
94 # bytes are valid from sha1, and we know where to find the end of this
95@@ -1027,13 +1029,14 @@
96 raise AssertionError('_source_offset != endpoint'
97 ' somehow the DeltaIndex got out of sync with'
98 ' the output lines')
99+ bytes = b''.join(chunks)
100 delta = self._delta_index.make_delta(bytes, max_delta_size)
101- if (delta is None):
102+ if delta is None:
103 type = 'fulltext'
104- enc_length = encode_base128_int(len(bytes))
105+ enc_length = encode_base128_int(input_len)
106 len_mini_header = 1 + len(enc_length)
107 self._delta_index.add_source(bytes, len_mini_header)
108- new_chunks = [b'f', enc_length, bytes]
109+ new_chunks = [b'f', enc_length] + chunks
110 else:
111 type = 'delta'
112 enc_length = encode_base128_int(len(delta))
113@@ -1836,7 +1839,7 @@
114 max_fulltext_prefix = prefix
115 (found_sha1, start_point, end_point,
116 type) = self._compressor.compress(record.key,
117- bytes, record.sha1, soft=soft,
118+ [bytes], record.sha1, soft=soft,
119 nostore_sha=nostore_sha)
120 # delta_ratio = float(len(bytes)) / (end_point - start_point)
121 # Check if we want to continue to include that text
122@@ -1858,8 +1861,8 @@
123 flush()
124 max_fulltext_len = len(bytes)
125 (found_sha1, start_point, end_point,
126- type) = self._compressor.compress(record.key, bytes,
127- record.sha1)
128+ type) = self._compressor.compress(
129+ record.key, [bytes], record.sha1)
130 if record.key[-1] is None:
131 key = record.key[:-1] + (b'sha1:' + found_sha1,)
132 else:
133
134=== modified file 'breezy/tests/test_groupcompress.py'
135--- breezy/tests/test_groupcompress.py 2018-11-11 04:08:32 +0000
136+++ breezy/tests/test_groupcompress.py 2020-01-25 14:24:06 +0000
137@@ -81,8 +81,8 @@
138 def test_one_nosha_delta(self):
139 # diff against NUKK
140 compressor = self.compressor()
141- sha1, start_point, end_point, _ = compressor.compress(('label',),
142- b'strange\ncommon\n', None)
143+ sha1, start_point, end_point, _ = compressor.compress(
144+ ('label',), [b'strange\ncommon\n'], None)
145 self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
146 expected_lines = b'f\x0fstrange\ncommon\n'
147 self.assertEqual(expected_lines, b''.join(compressor.chunks))
148@@ -92,8 +92,8 @@
149 def test_empty_content(self):
150 compressor = self.compressor()
151 # Adding empty bytes should return the 'null' record
152- sha1, start_point, end_point, kind = compressor.compress(('empty',),
153- b'', None)
154+ sha1, start_point, end_point, kind = compressor.compress(
155+ ('empty',), [], None)
156 self.assertEqual(0, start_point)
157 self.assertEqual(0, end_point)
158 self.assertEqual('fulltext', kind)
159@@ -101,10 +101,11 @@
160 self.assertEqual(0, compressor.endpoint)
161 self.assertEqual([], compressor.chunks)
162 # Even after adding some content
163- compressor.compress(('content',), b'some\nbytes\n', None)
164+ compressor.compress(
165+ ('content',), [b'some\nbytes\n'], None)
166 self.assertTrue(compressor.endpoint > 0)
167- sha1, start_point, end_point, kind = compressor.compress(('empty2',),
168- b'', None)
169+ sha1, start_point, end_point, kind = compressor.compress(
170+ ('empty2',), [], None)
171 self.assertEqual(0, start_point)
172 self.assertEqual(0, end_point)
173 self.assertEqual('fulltext', kind)
174@@ -114,11 +115,11 @@
175 # Knit fetching will try to reconstruct texts locally which results in
176 # reading something that is in the compressor stream already.
177 compressor = self.compressor()
178- sha1_1, _, _, _ = compressor.compress(('label',),
179- b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
180+ sha1_1, _, _, _ = compressor.compress(
181+ ('label',), [b'strange\ncommon long line\nthat needs a 16 byte match\n'], None)
182 expected_lines = list(compressor.chunks)
183- sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
184- b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
185+ sha1_2, _, end_point, _ = compressor.compress(
186+ ('newlabel',), [b'common long line\nthat needs a 16 byte match\ndifferent\n'], None)
187 # get the first out
188 self.assertEqual((b'strange\ncommon long line\n'
189 b'that needs a 16 byte match\n', sha1_1),
190@@ -130,11 +131,11 @@
191
192 def test_pop_last(self):
193 compressor = self.compressor()
194- _, _, _, _ = compressor.compress(('key1',),
195- b'some text\nfor the first entry\n', None)
196+ _, _, _, _ = compressor.compress(
197+ ('key1',), [b'some text\nfor the first entry\n'], None)
198 expected_lines = list(compressor.chunks)
199- _, _, _, _ = compressor.compress(('key2',),
200- b'some text\nfor the second entry\n', None)
201+ _, _, _, _ = compressor.compress(
202+ ('key2',), [b'some text\nfor the second entry\n'], None)
203 compressor.pop_last()
204 self.assertEqual(expected_lines, compressor.chunks)
205
206@@ -146,30 +147,33 @@
207
208 def test_stats(self):
209 compressor = self.compressor()
210- compressor.compress(('label',),
211- b'strange\n'
212- b'common very very long line\n'
213- b'plus more text\n', None)
214- compressor.compress(('newlabel',),
215- b'common very very long line\n'
216- b'plus more text\n'
217- b'different\n'
218- b'moredifferent\n', None)
219- compressor.compress(('label3',),
220- b'new\n'
221- b'common very very long line\n'
222- b'plus more text\n'
223- b'different\n'
224- b'moredifferent\n', None)
225+ compressor.compress(
226+ ('label',), [b'strange\n',
227+ b'common very very long line\n',
228+ b'plus more text\n'], None)
229+ compressor.compress(
230+ ('newlabel',),
231+ [b'common very very long line\n',
232+ b'plus more text\n',
233+ b'different\n',
234+ b'moredifferent\n'], None)
235+ compressor.compress(
236+ ('label3',),
237+ [b'new\n',
238+ b'common very very long line\n',
239+ b'plus more text\n',
240+ b'different\n',
241+ b'moredifferent\n'], None)
242 self.assertAlmostEqual(1.9, compressor.ratio(), 1)
243
244 def test_two_nosha_delta(self):
245 compressor = self.compressor()
246- sha1_1, _, _, _ = compressor.compress(('label',),
247- b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
248+ sha1_1, _, _, _ = compressor.compress(
249+ ('label',),
250+ [b'strange\ncommon long line\nthat needs a 16 byte match\n'], None)
251 expected_lines = list(compressor.chunks)
252- sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
253- b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
254+ sha1_2, start_point, end_point, _ = compressor.compress(
255+ ('newlabel',), [b'common long line\nthat needs a 16 byte match\ndifferent\n'], None)
256 self.assertEqual(sha_string(b'common long line\n'
257 b'that needs a 16 byte match\n'
258 b'different\n'), sha1_2)
259@@ -190,15 +194,14 @@
260 # The first interesting test: make a change that should use lines from
261 # both parents.
262 compressor = self.compressor()
263- sha1_1, _, _, _ = compressor.compress(('label',),
264- b'strange\ncommon very very long line\nwith some extra text\n', None)
265- sha1_2, _, _, _ = compressor.compress(('newlabel',),
266- b'different\nmoredifferent\nand then some more\n', None)
267+ sha1_1, _, _, _ = compressor.compress(
268+ ('label',), [b'strange\ncommon very very long line\nwith some extra text\n'], None)
269+ sha1_2, _, _, _ = compressor.compress(
270+ ('newlabel',), [b'different\nmoredifferent\nand then some more\n'], None)
271 expected_lines = list(compressor.chunks)
272- sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
273- b'new\ncommon very very long line\nwith some extra text\n'
274- b'different\nmoredifferent\nand then some more\n',
275- None)
276+ sha1_3, start_point, end_point, _ = compressor.compress(
277+ ('label3',), [b'new\ncommon very very long line\nwith some extra text\n',
278+ b'different\nmoredifferent\nand then some more\n'], None)
279 self.assertEqual(
280 sha_string(b'new\ncommon very very long line\nwith some extra text\n'
281 b'different\nmoredifferent\nand then some more\n'),
282@@ -225,30 +228,32 @@
283
284 def test_stats(self):
285 compressor = self.compressor()
286- compressor.compress(('label',),
287- b'strange\n'
288- b'common very very long line\n'
289- b'plus more text\n', None)
290- compressor.compress(('newlabel',),
291- b'common very very long line\n'
292- b'plus more text\n'
293- b'different\n'
294- b'moredifferent\n', None)
295- compressor.compress(('label3',),
296- b'new\n'
297- b'common very very long line\n'
298- b'plus more text\n'
299- b'different\n'
300- b'moredifferent\n', None)
301+ compressor.compress(
302+ ('label',), [b'strange\n',
303+ b'common very very long line\n',
304+ b'plus more text\n'], None)
305+ compressor.compress(
306+ ('newlabel',), [
307+ b'common very very long line\n',
308+ b'plus more text\n',
309+ b'different\n',
310+ b'moredifferent\n'], None)
311+ compressor.compress(
312+ ('label3',),
313+ [b'new\n',
314+ b'common very very long line\n',
315+ b'plus more text\n',
316+ b'different\n',
317+ b'moredifferent\n'], None)
318 self.assertAlmostEqual(1.9, compressor.ratio(), 1)
319
320 def test_two_nosha_delta(self):
321 compressor = self.compressor()
322- sha1_1, _, _, _ = compressor.compress(('label',),
323- b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
324+ sha1_1, _, _, _ = compressor.compress(
325+ ('label',), [b'strange\ncommon long line\nthat needs a 16 byte match\n'], None)
326 expected_lines = list(compressor.chunks)
327- sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
328- b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
329+ sha1_2, start_point, end_point, _ = compressor.compress(
330+ ('newlabel',), [b'common long line\nthat needs a 16 byte match\ndifferent\n'], None)
331 self.assertEqual(sha_string(b'common long line\n'
332 b'that needs a 16 byte match\n'
333 b'different\n'), sha1_2)
334@@ -269,15 +274,14 @@
335 # The first interesting test: make a change that should use lines from
336 # both parents.
337 compressor = self.compressor()
338- sha1_1, _, _, _ = compressor.compress(('label',),
339- b'strange\ncommon very very long line\nwith some extra text\n', None)
340- sha1_2, _, _, _ = compressor.compress(('newlabel',),
341- b'different\nmoredifferent\nand then some more\n', None)
342+ sha1_1, _, _, _ = compressor.compress(
343+ ('label',), [b'strange\ncommon very very long line\nwith some extra text\n'], None)
344+ sha1_2, _, _, _ = compressor.compress(
345+ ('newlabel',), [b'different\nmoredifferent\nand then some more\n'], None)
346 expected_lines = list(compressor.chunks)
347- sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
348- b'new\ncommon very very long line\nwith some extra text\n'
349- b'different\nmoredifferent\nand then some more\n',
350- None)
351+ sha1_3, start_point, end_point, _ = compressor.compress(
352+ ('label3',), [b'new\ncommon very very long line\nwith some extra text\n',
353+ b'different\nmoredifferent\nand then some more\n'], None)
354 self.assertEqual(
355 sha_string(b'new\ncommon very very long line\nwith some extra text\n'
356 b'different\nmoredifferent\nand then some more\n'),
357@@ -305,7 +309,7 @@
358 compressor = groupcompress.GroupCompressor()
359 start = 0
360 for key in sorted(key_to_text):
361- compressor.compress(key, key_to_text[key], None)
362+ compressor.compress(key, [key_to_text[key]], None)
363 locs = dict((key, (start, end)) for key, (start, _, end, _)
364 in compressor.labels_deltas.items())
365 block = compressor.flush()
366@@ -946,7 +950,7 @@
367 compressor = groupcompress.GroupCompressor()
368 start = 0
369 for key in sorted(key_to_text):
370- compressor.compress(key, key_to_text[key], None)
371+ compressor.compress(key, [key_to_text[key]], None)
372 locs = dict((key, (start, end)) for key, (start, _, end, _)
373 in compressor.labels_deltas.items())
374 block = compressor.flush()

Subscribers

People subscribed via source and target branches