Status: | Merged |
---|---|
Approved by: | Jelmer Vernooij |
Approved revision: | no longer in the source branch. |
Merge reported by: | The Breezy Bot |
Merged at revision: | not available |
Proposed branch: | lp:~jelmer/brz/compress |
Merge into: | lp:brz |
Diff against target: |
374 lines (+97/-90) 2 files modified
breezy/bzr/groupcompress.py (+22/-19) breezy/tests/test_groupcompress.py (+75/-71) |
To merge this branch: | bzr merge lp:~jelmer/brz/compress |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Jelmer Vernooij | Approve | ||
Review via email: mp+378071@code.launchpad.net |
Commit message
Pass chunks rather than full texts to compress().
Description of the change
Pass chunks rather than full texts to compress().
To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) : | # |
review:
Approve
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote : | # |
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'breezy/bzr/groupcompress.py' |
2 | --- breezy/bzr/groupcompress.py 2020-01-25 04:20:44 +0000 |
3 | +++ breezy/bzr/groupcompress.py 2020-01-25 14:24:06 +0000 |
4 | @@ -568,7 +568,7 @@ |
5 | for factory in self._factories: |
6 | bytes = factory.get_bytes_as('fulltext') |
7 | (found_sha1, start_point, end_point, |
8 | - type) = compressor.compress(factory.key, bytes, factory.sha1) |
9 | + type) = compressor.compress(factory.key, [bytes], factory.sha1) |
10 | # Now update this factory with the new offsets, etc |
11 | factory.sha1 = found_sha1 |
12 | factory._start = start_point |
13 | @@ -826,14 +826,14 @@ |
14 | else: |
15 | self._settings = settings |
16 | |
17 | - def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False): |
18 | + def compress(self, key, chunks, expected_sha, nostore_sha=None, soft=False): |
19 | """Compress lines with label key. |
20 | |
21 | :param key: A key tuple. It is stored in the output |
22 | for identification of the text during decompression. If the last |
23 | element is b'None' it is replaced with the sha1 of the text - |
24 | e.g. sha1:xxxxxxx. |
25 | - :param bytes: The bytes to be compressed |
26 | + :param chunks: Chunks of bytes to be compressed |
27 | :param expected_sha: If non-None, the sha the lines are believed to |
28 | have. During compression the sha is calculated; a mismatch will |
29 | cause an error. |
30 | @@ -847,7 +847,7 @@ |
31 | |
32 | :seealso VersionedFiles.add_lines: |
33 | """ |
34 | - if not bytes: # empty, like a dir entry, etc |
35 | + if not chunks: # empty, like a dir entry, etc |
36 | if nostore_sha == _null_sha1: |
37 | raise errors.ExistingContent() |
38 | return _null_sha1, 0, 0, 'fulltext' |
39 | @@ -855,23 +855,25 @@ |
40 | if expected_sha is not None: |
41 | sha1 = expected_sha |
42 | else: |
43 | - sha1 = osutils.sha_string(bytes) |
44 | + sha1 = osutils.sha_strings(chunks) |
45 | if nostore_sha is not None: |
46 | if sha1 == nostore_sha: |
47 | raise errors.ExistingContent() |
48 | if key[-1] is None: |
49 | key = key[:-1] + (b'sha1:' + sha1,) |
50 | |
51 | - start, end, type = self._compress(key, bytes, len(bytes) / 2, soft) |
52 | + length = sum(map(len, chunks)) |
53 | + |
54 | + start, end, type = self._compress(key, chunks, length / 2, soft) |
55 | return sha1, start, end, type |
56 | |
57 | - def _compress(self, key, bytes, max_delta_size, soft=False): |
58 | + def _compress(self, key, chunks, max_delta_size, soft=False): |
59 | """Compress lines with label key. |
60 | |
61 | :param key: A key tuple. It is stored in the output for identification |
62 | of the text during decompression. |
63 | |
64 | - :param bytes: The bytes to be compressed |
65 | + :param chunks: The chunks of bytes to be compressed |
66 | |
67 | :param max_delta_size: The size above which we issue a fulltext instead |
68 | of a delta. |
69 | @@ -956,10 +958,10 @@ |
70 | # The actual content is managed by LinesDeltaIndex |
71 | self.chunks = self._delta_index.lines |
72 | |
73 | - def _compress(self, key, bytes, max_delta_size, soft=False): |
74 | + def _compress(self, key, chunks, max_delta_size, soft=False): |
75 | """see _CommonGroupCompressor._compress""" |
76 | - input_len = len(bytes) |
77 | - new_lines = osutils.split_lines(bytes) |
78 | + input_len = sum(map(len, chunks)) |
79 | + new_lines = osutils.chunks_to_lines(chunks) |
80 | out_lines, index_lines = self._delta_index.make_delta( |
81 | new_lines, bytes_length=input_len, soft=soft) |
82 | delta_length = sum(map(len, out_lines)) |
83 | @@ -1011,9 +1013,9 @@ |
84 | max_bytes_to_index = self._settings.get('max_bytes_to_index', 0) |
85 | self._delta_index = DeltaIndex(max_bytes_to_index=max_bytes_to_index) |
86 | |
87 | - def _compress(self, key, bytes, max_delta_size, soft=False): |
88 | + def _compress(self, key, chunks, max_delta_size, soft=False): |
89 | """see _CommonGroupCompressor._compress""" |
90 | - input_len = len(bytes) |
91 | + input_len = sum(map(len, chunks)) |
92 | # By having action/label/sha1/len, we can parse the group if the index |
93 | # was ever destroyed, we have the key in 'label', we know the final |
94 | # bytes are valid from sha1, and we know where to find the end of this |
95 | @@ -1027,13 +1029,14 @@ |
96 | raise AssertionError('_source_offset != endpoint' |
97 | ' somehow the DeltaIndex got out of sync with' |
98 | ' the output lines') |
99 | + bytes = b''.join(chunks) |
100 | delta = self._delta_index.make_delta(bytes, max_delta_size) |
101 | - if (delta is None): |
102 | + if delta is None: |
103 | type = 'fulltext' |
104 | - enc_length = encode_base128_int(len(bytes)) |
105 | + enc_length = encode_base128_int(input_len) |
106 | len_mini_header = 1 + len(enc_length) |
107 | self._delta_index.add_source(bytes, len_mini_header) |
108 | - new_chunks = [b'f', enc_length, bytes] |
109 | + new_chunks = [b'f', enc_length] + chunks |
110 | else: |
111 | type = 'delta' |
112 | enc_length = encode_base128_int(len(delta)) |
113 | @@ -1836,7 +1839,7 @@ |
114 | max_fulltext_prefix = prefix |
115 | (found_sha1, start_point, end_point, |
116 | type) = self._compressor.compress(record.key, |
117 | - bytes, record.sha1, soft=soft, |
118 | + [bytes], record.sha1, soft=soft, |
119 | nostore_sha=nostore_sha) |
120 | # delta_ratio = float(len(bytes)) / (end_point - start_point) |
121 | # Check if we want to continue to include that text |
122 | @@ -1858,8 +1861,8 @@ |
123 | flush() |
124 | max_fulltext_len = len(bytes) |
125 | (found_sha1, start_point, end_point, |
126 | - type) = self._compressor.compress(record.key, bytes, |
127 | - record.sha1) |
128 | + type) = self._compressor.compress( |
129 | + record.key, [bytes], record.sha1) |
130 | if record.key[-1] is None: |
131 | key = record.key[:-1] + (b'sha1:' + found_sha1,) |
132 | else: |
133 | |
134 | === modified file 'breezy/tests/test_groupcompress.py' |
135 | --- breezy/tests/test_groupcompress.py 2018-11-11 04:08:32 +0000 |
136 | +++ breezy/tests/test_groupcompress.py 2020-01-25 14:24:06 +0000 |
137 | @@ -81,8 +81,8 @@ |
138 | def test_one_nosha_delta(self): |
139 | # diff against NUKK |
140 | compressor = self.compressor() |
141 | - sha1, start_point, end_point, _ = compressor.compress(('label',), |
142 | - b'strange\ncommon\n', None) |
143 | + sha1, start_point, end_point, _ = compressor.compress( |
144 | + ('label',), [b'strange\ncommon\n'], None) |
145 | self.assertEqual(sha_string(b'strange\ncommon\n'), sha1) |
146 | expected_lines = b'f\x0fstrange\ncommon\n' |
147 | self.assertEqual(expected_lines, b''.join(compressor.chunks)) |
148 | @@ -92,8 +92,8 @@ |
149 | def test_empty_content(self): |
150 | compressor = self.compressor() |
151 | # Adding empty bytes should return the 'null' record |
152 | - sha1, start_point, end_point, kind = compressor.compress(('empty',), |
153 | - b'', None) |
154 | + sha1, start_point, end_point, kind = compressor.compress( |
155 | + ('empty',), [], None) |
156 | self.assertEqual(0, start_point) |
157 | self.assertEqual(0, end_point) |
158 | self.assertEqual('fulltext', kind) |
159 | @@ -101,10 +101,11 @@ |
160 | self.assertEqual(0, compressor.endpoint) |
161 | self.assertEqual([], compressor.chunks) |
162 | # Even after adding some content |
163 | - compressor.compress(('content',), b'some\nbytes\n', None) |
164 | + compressor.compress( |
165 | + ('content',), [b'some\nbytes\n'], None) |
166 | self.assertTrue(compressor.endpoint > 0) |
167 | - sha1, start_point, end_point, kind = compressor.compress(('empty2',), |
168 | - b'', None) |
169 | + sha1, start_point, end_point, kind = compressor.compress( |
170 | + ('empty2',), [], None) |
171 | self.assertEqual(0, start_point) |
172 | self.assertEqual(0, end_point) |
173 | self.assertEqual('fulltext', kind) |
174 | @@ -114,11 +115,11 @@ |
175 | # Knit fetching will try to reconstruct texts locally which results in |
176 | # reading something that is in the compressor stream already. |
177 | compressor = self.compressor() |
178 | - sha1_1, _, _, _ = compressor.compress(('label',), |
179 | - b'strange\ncommon long line\nthat needs a 16 byte match\n', None) |
180 | + sha1_1, _, _, _ = compressor.compress( |
181 | + ('label',), [b'strange\ncommon long line\nthat needs a 16 byte match\n'], None) |
182 | expected_lines = list(compressor.chunks) |
183 | - sha1_2, _, end_point, _ = compressor.compress(('newlabel',), |
184 | - b'common long line\nthat needs a 16 byte match\ndifferent\n', None) |
185 | + sha1_2, _, end_point, _ = compressor.compress( |
186 | + ('newlabel',), [b'common long line\nthat needs a 16 byte match\ndifferent\n'], None) |
187 | # get the first out |
188 | self.assertEqual((b'strange\ncommon long line\n' |
189 | b'that needs a 16 byte match\n', sha1_1), |
190 | @@ -130,11 +131,11 @@ |
191 | |
192 | def test_pop_last(self): |
193 | compressor = self.compressor() |
194 | - _, _, _, _ = compressor.compress(('key1',), |
195 | - b'some text\nfor the first entry\n', None) |
196 | + _, _, _, _ = compressor.compress( |
197 | + ('key1',), [b'some text\nfor the first entry\n'], None) |
198 | expected_lines = list(compressor.chunks) |
199 | - _, _, _, _ = compressor.compress(('key2',), |
200 | - b'some text\nfor the second entry\n', None) |
201 | + _, _, _, _ = compressor.compress( |
202 | + ('key2',), [b'some text\nfor the second entry\n'], None) |
203 | compressor.pop_last() |
204 | self.assertEqual(expected_lines, compressor.chunks) |
205 | |
206 | @@ -146,30 +147,33 @@ |
207 | |
208 | def test_stats(self): |
209 | compressor = self.compressor() |
210 | - compressor.compress(('label',), |
211 | - b'strange\n' |
212 | - b'common very very long line\n' |
213 | - b'plus more text\n', None) |
214 | - compressor.compress(('newlabel',), |
215 | - b'common very very long line\n' |
216 | - b'plus more text\n' |
217 | - b'different\n' |
218 | - b'moredifferent\n', None) |
219 | - compressor.compress(('label3',), |
220 | - b'new\n' |
221 | - b'common very very long line\n' |
222 | - b'plus more text\n' |
223 | - b'different\n' |
224 | - b'moredifferent\n', None) |
225 | + compressor.compress( |
226 | + ('label',), [b'strange\n', |
227 | + b'common very very long line\n', |
228 | + b'plus more text\n'], None) |
229 | + compressor.compress( |
230 | + ('newlabel',), |
231 | + [b'common very very long line\n', |
232 | + b'plus more text\n', |
233 | + b'different\n', |
234 | + b'moredifferent\n'], None) |
235 | + compressor.compress( |
236 | + ('label3',), |
237 | + [b'new\n', |
238 | + b'common very very long line\n', |
239 | + b'plus more text\n', |
240 | + b'different\n', |
241 | + b'moredifferent\n'], None) |
242 | self.assertAlmostEqual(1.9, compressor.ratio(), 1) |
243 | |
244 | def test_two_nosha_delta(self): |
245 | compressor = self.compressor() |
246 | - sha1_1, _, _, _ = compressor.compress(('label',), |
247 | - b'strange\ncommon long line\nthat needs a 16 byte match\n', None) |
248 | + sha1_1, _, _, _ = compressor.compress( |
249 | + ('label',), |
250 | + [b'strange\ncommon long line\nthat needs a 16 byte match\n'], None) |
251 | expected_lines = list(compressor.chunks) |
252 | - sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',), |
253 | - b'common long line\nthat needs a 16 byte match\ndifferent\n', None) |
254 | + sha1_2, start_point, end_point, _ = compressor.compress( |
255 | + ('newlabel',), [b'common long line\nthat needs a 16 byte match\ndifferent\n'], None) |
256 | self.assertEqual(sha_string(b'common long line\n' |
257 | b'that needs a 16 byte match\n' |
258 | b'different\n'), sha1_2) |
259 | @@ -190,15 +194,14 @@ |
260 | # The first interesting test: make a change that should use lines from |
261 | # both parents. |
262 | compressor = self.compressor() |
263 | - sha1_1, _, _, _ = compressor.compress(('label',), |
264 | - b'strange\ncommon very very long line\nwith some extra text\n', None) |
265 | - sha1_2, _, _, _ = compressor.compress(('newlabel',), |
266 | - b'different\nmoredifferent\nand then some more\n', None) |
267 | + sha1_1, _, _, _ = compressor.compress( |
268 | + ('label',), [b'strange\ncommon very very long line\nwith some extra text\n'], None) |
269 | + sha1_2, _, _, _ = compressor.compress( |
270 | + ('newlabel',), [b'different\nmoredifferent\nand then some more\n'], None) |
271 | expected_lines = list(compressor.chunks) |
272 | - sha1_3, start_point, end_point, _ = compressor.compress(('label3',), |
273 | - b'new\ncommon very very long line\nwith some extra text\n' |
274 | - b'different\nmoredifferent\nand then some more\n', |
275 | - None) |
276 | + sha1_3, start_point, end_point, _ = compressor.compress( |
277 | + ('label3',), [b'new\ncommon very very long line\nwith some extra text\n', |
278 | + b'different\nmoredifferent\nand then some more\n'], None) |
279 | self.assertEqual( |
280 | sha_string(b'new\ncommon very very long line\nwith some extra text\n' |
281 | b'different\nmoredifferent\nand then some more\n'), |
282 | @@ -225,30 +228,32 @@ |
283 | |
284 | def test_stats(self): |
285 | compressor = self.compressor() |
286 | - compressor.compress(('label',), |
287 | - b'strange\n' |
288 | - b'common very very long line\n' |
289 | - b'plus more text\n', None) |
290 | - compressor.compress(('newlabel',), |
291 | - b'common very very long line\n' |
292 | - b'plus more text\n' |
293 | - b'different\n' |
294 | - b'moredifferent\n', None) |
295 | - compressor.compress(('label3',), |
296 | - b'new\n' |
297 | - b'common very very long line\n' |
298 | - b'plus more text\n' |
299 | - b'different\n' |
300 | - b'moredifferent\n', None) |
301 | + compressor.compress( |
302 | + ('label',), [b'strange\n', |
303 | + b'common very very long line\n', |
304 | + b'plus more text\n'], None) |
305 | + compressor.compress( |
306 | + ('newlabel',), [ |
307 | + b'common very very long line\n', |
308 | + b'plus more text\n', |
309 | + b'different\n', |
310 | + b'moredifferent\n'], None) |
311 | + compressor.compress( |
312 | + ('label3',), |
313 | + [b'new\n', |
314 | + b'common very very long line\n', |
315 | + b'plus more text\n', |
316 | + b'different\n', |
317 | + b'moredifferent\n'], None) |
318 | self.assertAlmostEqual(1.9, compressor.ratio(), 1) |
319 | |
320 | def test_two_nosha_delta(self): |
321 | compressor = self.compressor() |
322 | - sha1_1, _, _, _ = compressor.compress(('label',), |
323 | - b'strange\ncommon long line\nthat needs a 16 byte match\n', None) |
324 | + sha1_1, _, _, _ = compressor.compress( |
325 | + ('label',), [b'strange\ncommon long line\nthat needs a 16 byte match\n'], None) |
326 | expected_lines = list(compressor.chunks) |
327 | - sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',), |
328 | - b'common long line\nthat needs a 16 byte match\ndifferent\n', None) |
329 | + sha1_2, start_point, end_point, _ = compressor.compress( |
330 | + ('newlabel',), [b'common long line\nthat needs a 16 byte match\ndifferent\n'], None) |
331 | self.assertEqual(sha_string(b'common long line\n' |
332 | b'that needs a 16 byte match\n' |
333 | b'different\n'), sha1_2) |
334 | @@ -269,15 +274,14 @@ |
335 | # The first interesting test: make a change that should use lines from |
336 | # both parents. |
337 | compressor = self.compressor() |
338 | - sha1_1, _, _, _ = compressor.compress(('label',), |
339 | - b'strange\ncommon very very long line\nwith some extra text\n', None) |
340 | - sha1_2, _, _, _ = compressor.compress(('newlabel',), |
341 | - b'different\nmoredifferent\nand then some more\n', None) |
342 | + sha1_1, _, _, _ = compressor.compress( |
343 | + ('label',), [b'strange\ncommon very very long line\nwith some extra text\n'], None) |
344 | + sha1_2, _, _, _ = compressor.compress( |
345 | + ('newlabel',), [b'different\nmoredifferent\nand then some more\n'], None) |
346 | expected_lines = list(compressor.chunks) |
347 | - sha1_3, start_point, end_point, _ = compressor.compress(('label3',), |
348 | - b'new\ncommon very very long line\nwith some extra text\n' |
349 | - b'different\nmoredifferent\nand then some more\n', |
350 | - None) |
351 | + sha1_3, start_point, end_point, _ = compressor.compress( |
352 | + ('label3',), [b'new\ncommon very very long line\nwith some extra text\n', |
353 | + b'different\nmoredifferent\nand then some more\n'], None) |
354 | self.assertEqual( |
355 | sha_string(b'new\ncommon very very long line\nwith some extra text\n' |
356 | b'different\nmoredifferent\nand then some more\n'), |
357 | @@ -305,7 +309,7 @@ |
358 | compressor = groupcompress.GroupCompressor() |
359 | start = 0 |
360 | for key in sorted(key_to_text): |
361 | - compressor.compress(key, key_to_text[key], None) |
362 | + compressor.compress(key, [key_to_text[key]], None) |
363 | locs = dict((key, (start, end)) for key, (start, _, end, _) |
364 | in compressor.labels_deltas.items()) |
365 | block = compressor.flush() |
366 | @@ -946,7 +950,7 @@ |
367 | compressor = groupcompress.GroupCompressor() |
368 | start = 0 |
369 | for key in sorted(key_to_text): |
370 | - compressor.compress(key, key_to_text[key], None) |
371 | + compressor.compress(key, [key_to_text[key]], None) |
372 | locs = dict((key, (start, end)) for key, (start, _, end, _) |
373 | in compressor.labels_deltas.items()) |
374 | block = compressor.flush() |
Running landing tests failed /ci.breezy- vcs.org/ job/brz/ job/brz- land/648/
https:/