Merge lp:~jelmer/brz/file-content-factory into lp:brz
- file-content-factory
- Merge into trunk
Proposed by
Jelmer Vernooij
Status: | Merged |
---|---|
Approved by: | Jelmer Vernooij |
Approved revision: | no longer in the source branch. |
Merge reported by: | The Breezy Bot |
Merged at revision: | not available |
Proposed branch: | lp:~jelmer/brz/file-content-factory |
Merge into: | lp:brz |
Diff against target: |
878 lines (+197/-139) 14 files modified
breezy/_annotator_py.py (+1/-1) breezy/bzr/groupcompress.py (+20/-17) breezy/bzr/groupcompress_repo.py (+3/-3) breezy/bzr/knit.py (+55/-36) breezy/bzr/knitpack_repo.py (+2/-2) breezy/bzr/reconcile.py (+5/-5) breezy/bzr/smart/repository.py (+3/-1) breezy/bzr/versionedfile.py (+55/-18) breezy/bzr/vf_repository.py (+6/-12) breezy/bzr/weave.py (+5/-6) breezy/git/annotate.py (+7/-4) breezy/merge.py (+1/-2) breezy/plugins/weave_fmt/repository.py (+5/-5) breezy/tests/per_versionedfile.py (+29/-27) |
To merge this branch: | bzr merge lp:~jelmer/brz/file-content-factory |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Jelmer Vernooij | Approve | ||
Review via email: mp+378072@code.launchpad.net |
Commit message
Add a 'lines' storage kind.
Description of the change
Add a 'lines' storage kind.
Avoid converting back and forth between fulltext and lines in various places.
To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) : | # |
review:
Approve
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote : | # |
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote : | # |
Running landing tests failed
https:/
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote : | # |
Running landing tests failed
https:/
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'breezy/_annotator_py.py' |
2 | --- breezy/_annotator_py.py 2019-03-02 21:46:18 +0000 |
3 | +++ breezy/_annotator_py.py 2020-01-25 16:26:49 +0000 |
4 | @@ -127,7 +127,7 @@ |
5 | if record.storage_kind == 'absent': |
6 | raise errors.RevisionNotPresent(record.key, self._vf) |
7 | this_key = record.key |
8 | - lines = osutils.chunks_to_lines(record.get_bytes_as('chunked')) |
9 | + lines = record.get_bytes_as('lines') |
10 | num_lines = len(lines) |
11 | self._text_cache[this_key] = lines |
12 | yield this_key, lines, num_lines |
13 | |
14 | === modified file 'breezy/bzr/groupcompress.py' |
15 | --- breezy/bzr/groupcompress.py 2020-01-25 14:46:10 +0000 |
16 | +++ breezy/bzr/groupcompress.py 2020-01-25 16:26:49 +0000 |
17 | @@ -466,7 +466,7 @@ |
18 | return self._manager._wire_bytes() |
19 | else: |
20 | return b'' |
21 | - if storage_kind in ('fulltext', 'chunked'): |
22 | + if storage_kind in ('fulltext', 'chunked', 'lines'): |
23 | if self._bytes is None: |
24 | # Grab and cache the raw bytes for this entry |
25 | # and break the ref-cycle with _manager since we don't need it |
26 | @@ -482,8 +482,10 @@ |
27 | # refcycle here, but instead in manager.get_record_stream() |
28 | if storage_kind == 'fulltext': |
29 | return self._bytes |
30 | - else: |
31 | + elif storage_kind == 'chunked': |
32 | return [self._bytes] |
33 | + else: |
34 | + return osutils.split_lines(self._bytes) |
35 | raise errors.UnavailableRepresentation(self.key, storage_kind, |
36 | self.storage_kind) |
37 | |
38 | @@ -566,9 +568,9 @@ |
39 | old_length = self._block._content_length |
40 | end_point = 0 |
41 | for factory in self._factories: |
42 | - bytes = factory.get_bytes_as('fulltext') |
43 | + chunks = factory.get_bytes_as('chunked') |
44 | (found_sha1, start_point, end_point, |
45 | - type) = compressor.compress(factory.key, [bytes], factory.sha1) |
46 | + type) = compressor.compress(factory.key, chunks, factory.sha1) |
47 | # Now update this factory with the new offsets, etc |
48 | factory.sha1 = found_sha1 |
49 | factory._start = start_point |
50 | @@ -1369,7 +1371,7 @@ |
51 | if keys is None: |
52 | keys = self.keys() |
53 | for record in self.get_record_stream(keys, 'unordered', True): |
54 | - record.get_bytes_as('fulltext') |
55 | + record.get_bytes_as('chunked') |
56 | else: |
57 | return self.get_record_stream(keys, 'unordered', True) |
58 | |
59 | @@ -1670,8 +1672,8 @@ |
60 | result[record.key] = record.sha1 |
61 | else: |
62 | if record.storage_kind != 'absent': |
63 | - result[record.key] = osutils.sha_string( |
64 | - record.get_bytes_as('fulltext')) |
65 | + result[record.key] = osutils.sha_strings( |
66 | + record.get_bytes_as('chunked')) |
67 | return result |
68 | |
69 | def insert_record_stream(self, stream): |
70 | @@ -1823,25 +1825,26 @@ |
71 | self._index.add_records(nodes, random_id=random_id) |
72 | continue |
73 | try: |
74 | - bytes = record.get_bytes_as('fulltext') |
75 | + chunks = record.get_bytes_as('chunked') |
76 | except errors.UnavailableRepresentation: |
77 | - adapter_key = record.storage_kind, 'fulltext' |
78 | + adapter_key = record.storage_kind, 'chunked' |
79 | adapter = get_adapter(adapter_key) |
80 | - bytes = adapter.get_bytes(record) |
81 | + chunks = adapter.get_bytes(record, 'chunked') |
82 | + chunks_len = sum(map(len, chunks)) |
83 | if len(record.key) > 1: |
84 | prefix = record.key[0] |
85 | soft = (prefix == last_prefix) |
86 | else: |
87 | prefix = None |
88 | soft = False |
89 | - if max_fulltext_len < len(bytes): |
90 | - max_fulltext_len = len(bytes) |
91 | + if max_fulltext_len < chunks_len: |
92 | + max_fulltext_len = chunks_len |
93 | max_fulltext_prefix = prefix |
94 | (found_sha1, start_point, end_point, |
95 | type) = self._compressor.compress(record.key, |
96 | - [bytes], record.sha1, soft=soft, |
97 | + chunks, record.sha1, soft=soft, |
98 | nostore_sha=nostore_sha) |
99 | - # delta_ratio = float(len(bytes)) / (end_point - start_point) |
100 | + # delta_ratio = float(chunks_len) / (end_point - start_point) |
101 | # Check if we want to continue to include that text |
102 | if (prefix == max_fulltext_prefix |
103 | and end_point < 2 * max_fulltext_len): |
104 | @@ -1859,10 +1862,10 @@ |
105 | if start_new_block: |
106 | self._compressor.pop_last() |
107 | flush() |
108 | - max_fulltext_len = len(bytes) |
109 | + max_fulltext_len = chunks_len |
110 | (found_sha1, start_point, end_point, |
111 | type) = self._compressor.compress( |
112 | - record.key, [bytes], record.sha1) |
113 | + record.key, chunks, record.sha1) |
114 | if record.key[-1] is None: |
115 | key = record.key[:-1] + (b'sha1:' + found_sha1,) |
116 | else: |
117 | @@ -1915,7 +1918,7 @@ |
118 | pb.update('Walking content', key_idx, total) |
119 | if record.storage_kind == 'absent': |
120 | raise errors.RevisionNotPresent(key, self) |
121 | - lines = osutils.split_lines(record.get_bytes_as('fulltext')) |
122 | + lines = record.get_bytes_as('lines') |
123 | for line in lines: |
124 | yield line, key |
125 | if pb is not None: |
126 | |
127 | === modified file 'breezy/bzr/groupcompress_repo.py' |
128 | --- breezy/bzr/groupcompress_repo.py 2019-10-19 22:53:54 +0000 |
129 | +++ breezy/bzr/groupcompress_repo.py 2020-01-25 16:26:49 +0000 |
130 | @@ -698,9 +698,9 @@ |
131 | % (chk_inv.revision_id, p_id_map.key()[0], |
132 | canon_p_id_map.key()[0])) |
133 | self._data_changed = True |
134 | - yield versionedfile.ChunkedContentFactory(record.key, |
135 | - record.parents, record.sha1, |
136 | - canonical_inv.to_lines()) |
137 | + yield versionedfile.ChunkedContentFactory( |
138 | + record.key, record.parents, record.sha1, canonical_inv.to_lines(), |
139 | + chunks_are_lines=True) |
140 | # We have finished processing all of the inventory records, we |
141 | # don't need these sets anymore |
142 | return _filtered_inv_stream() |
143 | |
144 | === modified file 'breezy/bzr/knit.py' |
145 | --- breezy/bzr/knit.py 2020-01-25 04:20:44 +0000 |
146 | +++ breezy/bzr/knit.py 2020-01-25 16:26:49 +0000 |
147 | @@ -223,7 +223,10 @@ |
148 | class FTAnnotatedToUnannotated(KnitAdapter): |
149 | """An adapter from FT annotated knits to unannotated ones.""" |
150 | |
151 | - def get_bytes(self, factory): |
152 | + def get_bytes(self, factory, target_storage_kind): |
153 | + if target_storage_kind != 'knit-ft-gz': |
154 | + raise errors.UnavailableRepresentation( |
155 | + factory.key, target_storage_kind, factory.storage_kind) |
156 | annotated_compressed_bytes = factory._raw_record |
157 | rec, contents = \ |
158 | self._data._parse_record_unchecked(annotated_compressed_bytes) |
159 | @@ -236,7 +239,10 @@ |
160 | class DeltaAnnotatedToUnannotated(KnitAdapter): |
161 | """An adapter for deltas from annotated to unannotated.""" |
162 | |
163 | - def get_bytes(self, factory): |
164 | + def get_bytes(self, factory, target_storage_kind): |
165 | + if target_storage_kind != 'knit-delta-gz': |
166 | + raise errors.UnavailableRepresentation( |
167 | + factory.key, target_storage_kind, factory.storage_kind) |
168 | annotated_compressed_bytes = factory._raw_record |
169 | rec, contents = \ |
170 | self._data._parse_record_unchecked(annotated_compressed_bytes) |
171 | @@ -250,19 +256,24 @@ |
172 | class FTAnnotatedToFullText(KnitAdapter): |
173 | """An adapter from FT annotated knits to unannotated ones.""" |
174 | |
175 | - def get_bytes(self, factory): |
176 | + def get_bytes(self, factory, target_storage_kind): |
177 | annotated_compressed_bytes = factory._raw_record |
178 | rec, contents = \ |
179 | self._data._parse_record_unchecked(annotated_compressed_bytes) |
180 | content, delta = self._annotate_factory.parse_record(factory.key[-1], |
181 | contents, factory._build_details, None) |
182 | - return b''.join(content.text()) |
183 | + if target_storage_kind == 'fulltext': |
184 | + return b''.join(content.text()) |
185 | + elif target_storage_kind in ('chunked', 'lines'): |
186 | + return content.text() |
187 | + raise errors.UnavailableRepresentation( |
188 | + factory.key, target_storage_kind, factory.storage_kind) |
189 | |
190 | |
191 | class DeltaAnnotatedToFullText(KnitAdapter): |
192 | """An adapter for deltas from annotated to unannotated.""" |
193 | |
194 | - def get_bytes(self, factory): |
195 | + def get_bytes(self, factory, target_storage_kind): |
196 | annotated_compressed_bytes = factory._raw_record |
197 | rec, contents = \ |
198 | self._data._parse_record_unchecked(annotated_compressed_bytes) |
199 | @@ -273,32 +284,42 @@ |
200 | [compression_parent], 'unordered', True)) |
201 | if basis_entry.storage_kind == 'absent': |
202 | raise errors.RevisionNotPresent(compression_parent, self._basis_vf) |
203 | - basis_chunks = basis_entry.get_bytes_as('chunked') |
204 | - basis_lines = osutils.chunks_to_lines(basis_chunks) |
205 | + basis_lines = basis_entry.get_bytes_as('lines') |
206 | # Manually apply the delta because we have one annotated content and |
207 | # one plain. |
208 | basis_content = PlainKnitContent(basis_lines, compression_parent) |
209 | basis_content.apply_delta(delta, rec[1]) |
210 | basis_content._should_strip_eol = factory._build_details[1] |
211 | - return b''.join(basis_content.text()) |
212 | + |
213 | + if target_storage_kind == 'fulltext': |
214 | + return b''.join(basis_content.text()) |
215 | + elif target_storage_kind in ('chunked', 'lines'): |
216 | + return basis_content.text() |
217 | + raise errors.UnavailableRepresentation( |
218 | + factory.key, target_storage_kind, factory.storage_kind) |
219 | |
220 | |
221 | class FTPlainToFullText(KnitAdapter): |
222 | """An adapter from FT plain knits to unannotated ones.""" |
223 | |
224 | - def get_bytes(self, factory): |
225 | + def get_bytes(self, factory, target_storage_kind): |
226 | compressed_bytes = factory._raw_record |
227 | rec, contents = \ |
228 | self._data._parse_record_unchecked(compressed_bytes) |
229 | content, delta = self._plain_factory.parse_record(factory.key[-1], |
230 | contents, factory._build_details, None) |
231 | - return b''.join(content.text()) |
232 | + if target_storage_kind == 'fulltext': |
233 | + return b''.join(content.text()) |
234 | + elif target_storage_kind in ('chunked', 'lines'): |
235 | + return content.text() |
236 | + raise errors.UnavailableRepresentation( |
237 | + factory.key, target_storage_kind, factory.storage_kind) |
238 | |
239 | |
240 | class DeltaPlainToFullText(KnitAdapter): |
241 | """An adapter for deltas from annotated to unannotated.""" |
242 | |
243 | - def get_bytes(self, factory): |
244 | + def get_bytes(self, factory, target_storage_kind): |
245 | compressed_bytes = factory._raw_record |
246 | rec, contents = \ |
247 | self._data._parse_record_unchecked(compressed_bytes) |
248 | @@ -309,14 +330,18 @@ |
249 | [compression_parent], 'unordered', True)) |
250 | if basis_entry.storage_kind == 'absent': |
251 | raise errors.RevisionNotPresent(compression_parent, self._basis_vf) |
252 | - basis_chunks = basis_entry.get_bytes_as('chunked') |
253 | - basis_lines = osutils.chunks_to_lines(basis_chunks) |
254 | + basis_lines = basis_entry.get_bytes_as('lines') |
255 | basis_content = PlainKnitContent(basis_lines, compression_parent) |
256 | # Manually apply the delta because we have one annotated content and |
257 | # one plain. |
258 | content, _ = self._plain_factory.parse_record(rec[1], contents, |
259 | factory._build_details, basis_content) |
260 | - return b''.join(content.text()) |
261 | + if target_storage_kind == 'fulltext': |
262 | + return b''.join(content.text()) |
263 | + elif target_storage_kind in ('chunked', 'lines'): |
264 | + return content.text() |
265 | + raise errors.UnavailableRepresentation( |
266 | + factory.key, target_storage_kind, factory.storage_kind) |
267 | |
268 | |
269 | class KnitContentFactory(ContentFactory): |
270 | @@ -381,19 +406,15 @@ |
271 | self._create_network_bytes() |
272 | return self._network_bytes |
273 | if ('-ft-' in self.storage_kind |
274 | - and storage_kind in ('chunked', 'fulltext')): |
275 | - adapter_key = (self.storage_kind, 'fulltext') |
276 | + and storage_kind in ('chunked', 'fulltext', 'lines')): |
277 | + adapter_key = (self.storage_kind, storage_kind) |
278 | adapter_factory = adapter_registry.get(adapter_key) |
279 | adapter = adapter_factory(None) |
280 | - bytes = adapter.get_bytes(self) |
281 | - if storage_kind == 'chunked': |
282 | - return [bytes] |
283 | - else: |
284 | - return bytes |
285 | + return adapter.get_bytes(self, storage_kind) |
286 | if self._knit is not None: |
287 | # Not redundant with direct conversion above - that only handles |
288 | # fulltext cases. |
289 | - if storage_kind == 'chunked': |
290 | + if storage_kind in ('chunked', 'lines'): |
291 | return self._knit.get_lines(self.key[0]) |
292 | elif storage_kind == 'fulltext': |
293 | return self._knit.get_text(self.key[0]) |
294 | @@ -435,9 +456,9 @@ |
295 | # all the keys etc are contained in the bytes returned in the |
296 | # first record. |
297 | return b'' |
298 | - if storage_kind in ('chunked', 'fulltext'): |
299 | + if storage_kind in ('chunked', 'fulltext', 'lines'): |
300 | chunks = self._generator._get_one_work(self.key).text() |
301 | - if storage_kind == 'chunked': |
302 | + if storage_kind in ('chunked', 'lines'): |
303 | return chunks |
304 | else: |
305 | return b''.join(chunks) |
306 | @@ -1708,7 +1729,7 @@ |
307 | except KeyError: |
308 | adapter_key = (record.storage_kind, "knit-ft-gz") |
309 | adapter = get_adapter(adapter_key) |
310 | - bytes = adapter.get_bytes(record) |
311 | + bytes = adapter.get_bytes(record, adapter_key[1]) |
312 | else: |
313 | # It's a knit record, it has a _raw_record field (even if |
314 | # it was reconstituted from a network stream). |
315 | @@ -1745,9 +1766,8 @@ |
316 | buffered = True |
317 | if not buffered: |
318 | self._index.add_records([index_entry]) |
319 | - elif record.storage_kind == 'chunked': |
320 | - self.add_lines(record.key, parents, |
321 | - osutils.chunks_to_lines(record.get_bytes_as('chunked'))) |
322 | + elif record.storage_kind in ('chunked', 'file'): |
323 | + self.add_lines(record.key, parents, record.get_bytes_as('lines')) |
324 | else: |
325 | # Not suitable for direct insertion as a |
326 | # delta, either because it's not the right format, or this |
327 | @@ -1757,12 +1777,11 @@ |
328 | self._access.flush() |
329 | try: |
330 | # Try getting a fulltext directly from the record. |
331 | - bytes = record.get_bytes_as('fulltext') |
332 | + lines = record.get_bytes_as('lines') |
333 | except errors.UnavailableRepresentation: |
334 | - adapter_key = record.storage_kind, 'fulltext' |
335 | + adapter_key = record.storage_kind, 'lines' |
336 | adapter = get_adapter(adapter_key) |
337 | - bytes = adapter.get_bytes(record) |
338 | - lines = split_lines(bytes) |
339 | + lines = adapter.get_bytes(record, 'lines') |
340 | try: |
341 | self.add_lines(record.key, parents, lines) |
342 | except errors.RevisionAlreadyPresent: |
343 | @@ -2107,7 +2126,7 @@ |
344 | if key in self.nonlocal_keys: |
345 | record = next(self.get_record_stream()) |
346 | # Create a content object on the fly |
347 | - lines = osutils.chunks_to_lines(record.get_bytes_as('chunked')) |
348 | + lines = record.get_bytes_as('lines') |
349 | return PlainKnitContent(lines, record.key) |
350 | else: |
351 | # local keys we can ask for directly |
352 | @@ -2198,9 +2217,9 @@ |
353 | if component_id in self._contents_map: |
354 | content = self._contents_map[component_id] |
355 | else: |
356 | - content, delta = self._factory.parse_record(key[-1], |
357 | - record, record_details, content, |
358 | - copy_base_content=multiple_versions) |
359 | + content, delta = self._factory.parse_record( |
360 | + key[-1], record, record_details, content, |
361 | + copy_base_content=multiple_versions) |
362 | if multiple_versions: |
363 | self._contents_map[component_id] = content |
364 | |
365 | |
366 | === modified file 'breezy/bzr/knitpack_repo.py' |
367 | --- breezy/bzr/knitpack_repo.py 2020-01-11 17:50:28 +0000 |
368 | +++ breezy/bzr/knitpack_repo.py 2020-01-25 16:26:49 +0000 |
369 | @@ -1092,8 +1092,8 @@ |
370 | raise errors.BzrError('Mismatched key parent %r:%r' % |
371 | (key, parent_keys)) |
372 | parents.append(parent_key[1]) |
373 | - text_lines = osutils.split_lines(next(repo.texts.get_record_stream( |
374 | - [key], 'unordered', True)).get_bytes_as('fulltext')) |
375 | + text_lines = next(repo.texts.get_record_stream( |
376 | + [key], 'unordered', True)).get_bytes_as('lines') |
377 | output_texts.add_lines(key, parent_keys, text_lines, |
378 | random_id=True, check_content=False) |
379 | # 5) check that nothing inserted has a reference outside the keyspace. |
380 | |
381 | === modified file 'breezy/bzr/reconcile.py' |
382 | --- breezy/bzr/reconcile.py 2018-11-30 12:39:04 +0000 |
383 | +++ breezy/bzr/reconcile.py 2020-01-25 16:26:49 +0000 |
384 | @@ -35,7 +35,7 @@ |
385 | from ..i18n import gettext |
386 | from ..trace import mutter |
387 | from ..tsort import topo_sort |
388 | -from .versionedfile import AdapterFactory, FulltextContentFactory |
389 | +from .versionedfile import AdapterFactory, ChunkedContentFactory |
390 | |
391 | |
392 | class VersionedFileRepoReconciler(object): |
393 | @@ -152,8 +152,8 @@ |
394 | # The check for the left most parent only handles knit |
395 | # compressors, but this code only applies to knit and weave |
396 | # repositories anyway. |
397 | - bytes = record.get_bytes_as('fulltext') |
398 | - yield FulltextContentFactory(record.key, wanted_parents, record.sha1, bytes) |
399 | + chunks = record.get_bytes_as('chunked') |
400 | + yield ChunkedContentFactory(record.key, wanted_parents, record.sha1, chunks) |
401 | else: |
402 | adapted_record = AdapterFactory( |
403 | record.key, wanted_parents, record) |
404 | @@ -344,10 +344,10 @@ |
405 | |
406 | def fix_parents(stream): |
407 | for record in stream: |
408 | - bytes = record.get_bytes_as('fulltext') |
409 | + chunks = record.get_bytes_as('chunked') |
410 | new_key = (new_file_id, record.key[-1]) |
411 | parents = new_parents[new_key] |
412 | - yield FulltextContentFactory(new_key, parents, record.sha1, bytes) |
413 | + yield ChunkedContentFactory(new_key, parents, record.sha1, chunks) |
414 | stream = self.repo.texts.get_record_stream( |
415 | needed_keys, 'topological', True) |
416 | self.repo._remove_file_id(new_file_id) |
417 | |
418 | === modified file 'breezy/bzr/smart/repository.py' |
419 | --- breezy/bzr/smart/repository.py 2019-02-15 18:57:38 +0000 |
420 | +++ breezy/bzr/smart/repository.py 2020-01-25 16:26:49 +0000 |
421 | @@ -1270,7 +1270,9 @@ |
422 | inv_delta = inv._make_delta(prev_inv) |
423 | lines = serializer.delta_to_lines( |
424 | prev_inv.revision_id, inv.revision_id, inv_delta) |
425 | - yield ChunkedContentFactory(inv.revision_id, None, None, lines) |
426 | + yield ChunkedContentFactory( |
427 | + inv.revision_id, None, None, lines, |
428 | + chunks_are_lines=True) |
429 | prev_inv = inv |
430 | |
431 | def body_stream(self, repository, ordering, revids): |
432 | |
433 | === modified file 'breezy/bzr/versionedfile.py' |
434 | --- breezy/bzr/versionedfile.py 2020-01-11 17:50:28 +0000 |
435 | +++ breezy/bzr/versionedfile.py 2020-01-25 16:26:49 +0000 |
436 | @@ -56,20 +56,19 @@ |
437 | |
438 | |
439 | adapter_registry = Registry() |
440 | -adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'breezy.bzr.knit', |
441 | - 'DeltaPlainToFullText') |
442 | -adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'breezy.bzr.knit', |
443 | - 'FTPlainToFullText') |
444 | adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'), |
445 | 'breezy.bzr.knit', 'DeltaAnnotatedToUnannotated') |
446 | -adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'), |
447 | - 'breezy.bzr.knit', 'DeltaAnnotatedToFullText') |
448 | adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'), |
449 | 'breezy.bzr.knit', 'FTAnnotatedToUnannotated') |
450 | -adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'), |
451 | - 'breezy.bzr.knit', 'FTAnnotatedToFullText') |
452 | -# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'), |
453 | -# 'breezy.bzr.knit', 'FTAnnotatedToChunked') |
454 | +for target_storage_kind in ('fulltext', 'chunked', 'lines'): |
455 | + adapter_registry.register_lazy(('knit-delta-gz', target_storage_kind), 'breezy.bzr.knit', |
456 | + 'DeltaPlainToFullText') |
457 | + adapter_registry.register_lazy(('knit-ft-gz', target_storage_kind), 'breezy.bzr.knit', |
458 | + 'FTPlainToFullText') |
459 | + adapter_registry.register_lazy(('knit-annotated-ft-gz', target_storage_kind), |
460 | + 'breezy.bzr.knit', 'FTAnnotatedToFullText') |
461 | + adapter_registry.register_lazy(('knit-annotated-delta-gz', target_storage_kind), |
462 | + 'breezy.bzr.knit', 'DeltaAnnotatedToFullText') |
463 | |
464 | |
465 | class ContentFactory(object): |
466 | @@ -110,21 +109,27 @@ |
467 | :ivar parents: A tuple of parent keys for self.key. If the object has |
468 | no parent information, None (as opposed to () for an empty list of |
469 | parents). |
470 | + :ivar chunks_are_lines: Whether chunks are lines. |
471 | """ |
472 | |
473 | - def __init__(self, key, parents, sha1, chunks): |
474 | + def __init__(self, key, parents, sha1, chunks, chunks_are_lines=None): |
475 | """Create a ContentFactory.""" |
476 | self.sha1 = sha1 |
477 | self.storage_kind = 'chunked' |
478 | self.key = key |
479 | self.parents = parents |
480 | self._chunks = chunks |
481 | + self._chunks_are_lines = chunks_are_lines |
482 | |
483 | def get_bytes_as(self, storage_kind): |
484 | if storage_kind == 'chunked': |
485 | return self._chunks |
486 | elif storage_kind == 'fulltext': |
487 | return b''.join(self._chunks) |
488 | + elif storage_kind == 'lines': |
489 | + if self._chunks_are_lines: |
490 | + return self._chunks |
491 | + return list(osutils.chunks_to_lines(self._chunks)) |
492 | raise errors.UnavailableRepresentation(self.key, storage_kind, |
493 | self.storage_kind) |
494 | |
495 | @@ -160,6 +165,38 @@ |
496 | return self._text |
497 | elif storage_kind == 'chunked': |
498 | return [self._text] |
499 | + elif storage_kind == 'lines': |
500 | + return osutils.split_lines(self._text) |
501 | + raise errors.UnavailableRepresentation(self.key, storage_kind, |
502 | + self.storage_kind) |
503 | + |
504 | + |
505 | +class FileContentFactory(ContentFactory): |
506 | + """File-based content factory. |
507 | + """ |
508 | + |
509 | + def __init__(self, key, parents, fileobj): |
510 | + self.key = key |
511 | + self.parents = parents |
512 | + self.file = fileobj |
513 | + self.storage_kind = 'file' |
514 | + self._sha1 = None |
515 | + |
516 | + @property |
517 | + def sha1(self): |
518 | + if self._sha1 is None: |
519 | + self.file.seek(0) |
520 | + self._size, self._sha1 = osutils.size_sha_file(self.file) |
521 | + return self._sha1 |
522 | + |
523 | + def get_bytes_as(self, storage_kind): |
524 | + self.file.seek(0) |
525 | + if storage_kind == 'fulltext': |
526 | + return self.file.read() |
527 | + elif storage_kind == 'chunked': |
528 | + return list(osutils.file_iterator(self.file)) |
529 | + elif storage_kind == 'lines': |
530 | + return self.file.readlines() |
531 | raise errors.UnavailableRepresentation(self.key, storage_kind, |
532 | self.storage_kind) |
533 | |
534 | @@ -1030,13 +1067,11 @@ |
535 | if not mpvf.has_version(p)) |
536 | # It seems likely that adding all the present parents as fulltexts can |
537 | # easily exhaust memory. |
538 | - chunks_to_lines = osutils.chunks_to_lines |
539 | for record in self.get_record_stream(needed_parents, 'unordered', |
540 | True): |
541 | if record.storage_kind == 'absent': |
542 | continue |
543 | - mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')), |
544 | - record.key, []) |
545 | + mpvf.add_version(record.get_bytes_as('lines'), record.key, []) |
546 | for (key, parent_keys, expected_sha1, mpdiff), lines in zip( |
547 | records, mpvf.get_line_list(versions)): |
548 | if len(parent_keys) == 1: |
549 | @@ -1546,7 +1581,9 @@ |
550 | lines = self._lines[key] |
551 | parents = self._parents[key] |
552 | pending.remove(key) |
553 | - yield ChunkedContentFactory(key, parents, None, lines) |
554 | + yield ChunkedContentFactory( |
555 | + key, parents, None, lines, |
556 | + chunks_are_lines=True) |
557 | for versionedfile in self.fallback_versionedfiles: |
558 | for record in versionedfile.get_record_stream( |
559 | pending, 'unordered', True): |
560 | @@ -1775,9 +1812,9 @@ |
561 | if lines is not None: |
562 | if not isinstance(lines, list): |
563 | raise AssertionError |
564 | - yield ChunkedContentFactory((k,), None, |
565 | - sha1=osutils.sha_strings(lines), |
566 | - chunks=lines) |
567 | + yield ChunkedContentFactory( |
568 | + (k,), None, sha1=osutils.sha_strings(lines), |
569 | + chunks=lines, chunks_are_lines=True) |
570 | else: |
571 | yield AbsentContentFactory((k,)) |
572 | |
573 | |
574 | === modified file 'breezy/bzr/vf_repository.py' |
575 | --- breezy/bzr/vf_repository.py 2020-01-20 00:03:54 +0000 |
576 | +++ breezy/bzr/vf_repository.py 2020-01-25 16:26:49 +0000 |
577 | @@ -853,8 +853,8 @@ |
578 | """Check a single text from this repository.""" |
579 | if kind == 'inventories': |
580 | rev_id = record.key[0] |
581 | - inv = self._deserialise_inventory(rev_id, |
582 | - record.get_bytes_as('fulltext')) |
583 | + inv = self._deserialise_inventory( |
584 | + rev_id, record.get_bytes_as('fulltext')) |
585 | if last_object is not None: |
586 | delta = inv._make_delta(last_object) |
587 | for old_path, path, file_id, ie in delta: |
588 | @@ -880,14 +880,9 @@ |
589 | """Check a single text.""" |
590 | # Check it is extractable. |
591 | # TODO: check length. |
592 | - if record.storage_kind == 'chunked': |
593 | - chunks = record.get_bytes_as(record.storage_kind) |
594 | - sha1 = osutils.sha_strings(chunks) |
595 | - length = sum(map(len, chunks)) |
596 | - else: |
597 | - content = record.get_bytes_as('fulltext') |
598 | - sha1 = osutils.sha_string(content) |
599 | - length = len(content) |
600 | + chunks = record.get_bytes_as('chunked') |
601 | + sha1 = osutils.sha_strings(chunks) |
602 | + length = sum(map(len, chunks)) |
603 | if item_data and sha1 != item_data[1]: |
604 | checker._report_items.append( |
605 | 'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' % |
606 | @@ -1197,9 +1192,8 @@ |
607 | stream = self.inventories.get_record_stream(keys, 'unordered', True) |
608 | for record in stream: |
609 | if record.storage_kind != 'absent': |
610 | - chunks = record.get_bytes_as('chunked') |
611 | + lines = record.get_bytes_as('lines') |
612 | revid = record.key[-1] |
613 | - lines = osutils.chunks_to_lines(chunks) |
614 | for line in lines: |
615 | yield line, revid |
616 | |
617 | |
618 | === modified file 'breezy/bzr/weave.py' |
619 | --- breezy/bzr/weave.py 2019-03-02 21:46:18 +0000 |
620 | +++ breezy/bzr/weave.py 2020-01-25 16:26:49 +0000 |
621 | @@ -179,7 +179,7 @@ |
622 | def get_bytes_as(self, storage_kind): |
623 | if storage_kind == 'fulltext': |
624 | return self._weave.get_text(self.key[-1]) |
625 | - elif storage_kind == 'chunked': |
626 | + elif storage_kind in ('chunked', 'lines'): |
627 | return self._weave.get_lines(self.key[-1]) |
628 | else: |
629 | raise UnavailableRepresentation(self.key, storage_kind, 'fulltext') |
630 | @@ -422,20 +422,19 @@ |
631 | raise RevisionNotPresent([record.key[0]], self) |
632 | # adapt to non-tuple interface |
633 | parents = [parent[0] for parent in record.parents] |
634 | - if (record.storage_kind == 'fulltext' or |
635 | - record.storage_kind == 'chunked'): |
636 | + if record.storage_kind in ('fulltext', 'chunked', 'lines'): |
637 | self.add_lines( |
638 | record.key[0], parents, |
639 | - osutils.chunks_to_lines(record.get_bytes_as('chunked'))) |
640 | + record.get_bytes_as('lines')) |
641 | else: |
642 | - adapter_key = record.storage_kind, 'fulltext' |
643 | + adapter_key = record.storage_kind, 'lines' |
644 | try: |
645 | adapter = adapters[adapter_key] |
646 | except KeyError: |
647 | adapter_factory = adapter_registry.get(adapter_key) |
648 | adapter = adapter_factory(self) |
649 | adapters[adapter_key] = adapter |
650 | - lines = split_lines(adapter.get_bytes(record)) |
651 | + lines = adapter.get_bytes(record, 'lines') |
652 | try: |
653 | self.add_lines(record.key[0], parents, lines) |
654 | except RevisionAlreadyPresent: |
655 | |
656 | === modified file 'breezy/git/annotate.py' |
657 | --- breezy/git/annotate.py 2019-10-20 23:46:25 +0000 |
658 | +++ breezy/git/annotate.py 2020-01-25 16:26:49 +0000 |
659 | @@ -22,6 +22,7 @@ |
660 | tree_lookup_path, |
661 | ) |
662 | |
663 | +from .. import osutils |
664 | from ..errors import ( |
665 | NoSuchRevision, |
666 | UnavailableRepresentation, |
667 | @@ -32,7 +33,7 @@ |
668 | ) |
669 | |
670 | |
671 | -class GitFulltextContentFactory(object): |
672 | +class GitBlobContentFactory(object): |
673 | """Static data content factory. |
674 | |
675 | This takes a fulltext when created and just returns that during |
676 | @@ -52,17 +53,19 @@ |
677 | """Create a ContentFactory.""" |
678 | self.store = store |
679 | self.key = (path, revision) |
680 | - self.storage_kind = 'fulltext' |
681 | + self.storage_kind = 'git-blob' |
682 | self.parents = None |
683 | self.blob_id = blob_id |
684 | |
685 | def get_bytes_as(self, storage_kind): |
686 | if storage_kind == 'fulltext': |
687 | return self.store[self.blob_id].as_raw_string() |
688 | + elif storage_kind == 'lines': |
689 | + return osutils.chunks_to_lines(self.store[self.blob_id].as_raw_chunks()) |
690 | elif storage_kind == 'chunked': |
691 | return self.store[self.blob_id].as_raw_chunks() |
692 | raise UnavailableRepresentation(self.key, storage_kind, |
693 | - 'fulltext') |
694 | + self.storage_kind) |
695 | |
696 | |
697 | class GitAbsentContentFactory(object): |
698 | @@ -153,5 +156,5 @@ |
699 | except KeyError: |
700 | yield GitAbsentContentFactory(store, path, text_revision) |
701 | else: |
702 | - yield GitFulltextContentFactory( |
703 | + yield GitBlobContentFactory( |
704 | store, path, text_revision, blob_sha) |
705 | |
706 | === modified file 'breezy/merge.py' |
707 | --- breezy/merge.py 2020-01-18 16:14:28 +0000 |
708 | +++ breezy/merge.py 2020-01-25 16:26:49 +0000 |
709 | @@ -2007,8 +2007,7 @@ |
710 | for record in self.vf.get_record_stream(keys, 'unordered', True): |
711 | if record.storage_kind == 'absent': |
712 | raise errors.RevisionNotPresent(record.key, self.vf) |
713 | - result[record.key[-1]] = osutils.chunks_to_lines( |
714 | - record.get_bytes_as('chunked')) |
715 | + result[record.key[-1]] = record.get_bytes_as('lines') |
716 | return result |
717 | |
718 | def plan_merge(self): |
719 | |
720 | === modified file 'breezy/plugins/weave_fmt/repository.py' |
721 | --- breezy/plugins/weave_fmt/repository.py 2018-11-12 01:41:38 +0000 |
722 | +++ breezy/plugins/weave_fmt/repository.py 2020-01-25 16:26:49 +0000 |
723 | @@ -628,19 +628,19 @@ |
724 | if record.storage_kind == 'absent': |
725 | raise errors.RevisionNotPresent([record.key[0]], self) |
726 | # adapt to non-tuple interface |
727 | - if record.storage_kind == 'fulltext': |
728 | + if record.storage_kind in ('fulltext', 'chunks', 'lines'): |
729 | self.add_lines(record.key, None, |
730 | - osutils.split_lines(record.get_bytes_as('fulltext'))) |
731 | + record.get_bytes_as('lines')) |
732 | else: |
733 | - adapter_key = record.storage_kind, 'fulltext' |
734 | + adapter_key = record.storage_kind, 'lines' |
735 | try: |
736 | adapter = adapters[adapter_key] |
737 | except KeyError: |
738 | adapter_factory = adapter_registry.get(adapter_key) |
739 | adapter = adapter_factory(self) |
740 | adapters[adapter_key] = adapter |
741 | - lines = osutils.split_lines(adapter.get_bytes( |
742 | - record, record.get_bytes_as(record.storage_kind))) |
743 | + lines = adapter.get_bytes( |
744 | + record, record.get_bytes_as(record.storage_kind)) |
745 | try: |
746 | self.add_lines(record.key, None, lines) |
747 | except errors.RevisionAlreadyPresent: |
748 | |
749 | === modified file 'breezy/tests/per_versionedfile.py' |
750 | --- breezy/tests/per_versionedfile.py 2019-06-16 15:54:33 +0000 |
751 | +++ breezy/tests/per_versionedfile.py 2020-01-25 16:26:49 +0000 |
752 | @@ -1214,7 +1214,11 @@ |
753 | # Each is source_kind, requested_kind, adapter class |
754 | scenarios = [ |
755 | ('knit-delta-gz', 'fulltext', _mod_knit.DeltaPlainToFullText), |
756 | + ('knit-delta-gz', 'lines', _mod_knit.DeltaPlainToFullText), |
757 | + ('knit-delta-gz', 'chunked', _mod_knit.DeltaPlainToFullText), |
758 | ('knit-ft-gz', 'fulltext', _mod_knit.FTPlainToFullText), |
759 | + ('knit-ft-gz', 'lines', _mod_knit.FTPlainToFullText), |
760 | + ('knit-ft-gz', 'chunked', _mod_knit.FTPlainToFullText), |
761 | ('knit-annotated-delta-gz', 'knit-delta-gz', |
762 | _mod_knit.DeltaAnnotatedToUnannotated), |
763 | ('knit-annotated-delta-gz', 'fulltext', |
764 | @@ -1223,6 +1227,10 @@ |
765 | _mod_knit.FTAnnotatedToUnannotated), |
766 | ('knit-annotated-ft-gz', 'fulltext', |
767 | _mod_knit.FTAnnotatedToFullText), |
768 | + ('knit-annotated-ft-gz', 'lines', |
769 | + _mod_knit.FTAnnotatedToFullText), |
770 | + ('knit-annotated-ft-gz', 'chunked', |
771 | + _mod_knit.FTAnnotatedToFullText), |
772 | ] |
773 | for source, requested, klass in scenarios: |
774 | adapter_factory = versionedfile.adapter_registry.get( |
775 | @@ -1235,16 +1243,16 @@ |
776 | transport = self.get_transport() |
777 | return make_file_factory(annotated, mapper)(transport) |
778 | |
779 | - def helpGetBytes(self, f, ft_adapter, delta_adapter): |
780 | + def helpGetBytes(self, f, ft_name, ft_adapter, delta_name, delta_adapter): |
781 | """Grab the interested adapted texts for tests.""" |
782 | # origin is a fulltext |
783 | entries = f.get_record_stream([(b'origin',)], 'unordered', False) |
784 | base = next(entries) |
785 | - ft_data = ft_adapter.get_bytes(base) |
786 | + ft_data = ft_adapter.get_bytes(base, ft_name) |
787 | # merged is both a delta and multiple parents. |
788 | entries = f.get_record_stream([(b'merged',)], 'unordered', False) |
789 | merged = next(entries) |
790 | - delta_data = delta_adapter.get_bytes(merged) |
791 | + delta_data = delta_adapter.get_bytes(merged, delta_name) |
792 | return ft_data, delta_data |
793 | |
794 | def test_deannotation_noeol(self): |
795 | @@ -1252,10 +1260,9 @@ |
796 | # we need a full text, and a delta |
797 | f = self.get_knit() |
798 | get_diamond_files(f, 1, trailing_eol=False) |
799 | - ft_data, delta_data = self.helpGetBytes(f, |
800 | - _mod_knit.FTAnnotatedToUnannotated( |
801 | - None), |
802 | - _mod_knit.DeltaAnnotatedToUnannotated(None)) |
803 | + ft_data, delta_data = self.helpGetBytes( |
804 | + f, 'knit-ft-gz', _mod_knit.FTAnnotatedToUnannotated(None), |
805 | + 'knit-delta-gz', _mod_knit.DeltaAnnotatedToUnannotated(None)) |
806 | self.assertEqual( |
807 | b'version origin 1 b284f94827db1fa2970d9e2014f080413b547a7e\n' |
808 | b'origin\n' |
809 | @@ -1271,10 +1278,9 @@ |
810 | # we need a full text, and a delta |
811 | f = self.get_knit() |
812 | get_diamond_files(f, 1) |
813 | - ft_data, delta_data = self.helpGetBytes(f, |
814 | - _mod_knit.FTAnnotatedToUnannotated( |
815 | - None), |
816 | - _mod_knit.DeltaAnnotatedToUnannotated(None)) |
817 | + ft_data, delta_data = self.helpGetBytes( |
818 | + f, 'knit-ft-gz', _mod_knit.FTAnnotatedToUnannotated(None), |
819 | + 'knit-delta-gz', _mod_knit.DeltaAnnotatedToUnannotated(None)) |
820 | self.assertEqual( |
821 | b'version origin 1 00e364d235126be43292ab09cb4686cf703ddc17\n' |
822 | b'origin\n' |
823 | @@ -1293,10 +1299,9 @@ |
824 | # Reconstructing a full text requires a backing versioned file, and it |
825 | # must have the base lines requested from it. |
826 | logged_vf = versionedfile.RecordingVersionedFilesDecorator(f) |
827 | - ft_data, delta_data = self.helpGetBytes(f, |
828 | - _mod_knit.FTAnnotatedToFullText( |
829 | - None), |
830 | - _mod_knit.DeltaAnnotatedToFullText(logged_vf)) |
831 | + ft_data, delta_data = self.helpGetBytes( |
832 | + f, 'fulltext', _mod_knit.FTAnnotatedToFullText(None), |
833 | + 'fulltext', _mod_knit.DeltaAnnotatedToFullText(logged_vf)) |
834 | self.assertEqual(b'origin', ft_data) |
835 | self.assertEqual(b'base\nleft\nright\nmerged', delta_data) |
836 | self.assertEqual([('get_record_stream', [(b'left',)], 'unordered', |
837 | @@ -1310,10 +1315,9 @@ |
838 | # Reconstructing a full text requires a backing versioned file, and it |
839 | # must have the base lines requested from it. |
840 | logged_vf = versionedfile.RecordingVersionedFilesDecorator(f) |
841 | - ft_data, delta_data = self.helpGetBytes(f, |
842 | - _mod_knit.FTAnnotatedToFullText( |
843 | - None), |
844 | - _mod_knit.DeltaAnnotatedToFullText(logged_vf)) |
845 | + ft_data, delta_data = self.helpGetBytes( |
846 | + f, 'fulltext', _mod_knit.FTAnnotatedToFullText(None), |
847 | + 'fulltext', _mod_knit.DeltaAnnotatedToFullText(logged_vf)) |
848 | self.assertEqual(b'origin\n', ft_data) |
849 | self.assertEqual(b'base\nleft\nright\nmerged\n', delta_data) |
850 | self.assertEqual([('get_record_stream', [(b'left',)], 'unordered', |
851 | @@ -1330,10 +1334,9 @@ |
852 | # Reconstructing a full text requires a backing versioned file, and it |
853 | # must have the base lines requested from it. |
854 | logged_vf = versionedfile.RecordingVersionedFilesDecorator(f) |
855 | - ft_data, delta_data = self.helpGetBytes(f, |
856 | - _mod_knit.FTPlainToFullText( |
857 | - None), |
858 | - _mod_knit.DeltaPlainToFullText(logged_vf)) |
859 | + ft_data, delta_data = self.helpGetBytes( |
860 | + f, 'fulltext', _mod_knit.FTPlainToFullText(None), |
861 | + 'fulltext', _mod_knit.DeltaPlainToFullText(logged_vf)) |
862 | self.assertEqual(b'origin\n', ft_data) |
863 | self.assertEqual(b'base\nleft\nright\nmerged\n', delta_data) |
864 | self.assertEqual([('get_record_stream', [(b'left',)], 'unordered', |
865 | @@ -1350,10 +1353,9 @@ |
866 | # Reconstructing a full text requires a backing versioned file, and it |
867 | # must have the base lines requested from it. |
868 | logged_vf = versionedfile.RecordingVersionedFilesDecorator(f) |
869 | - ft_data, delta_data = self.helpGetBytes(f, |
870 | - _mod_knit.FTPlainToFullText( |
871 | - None), |
872 | - _mod_knit.DeltaPlainToFullText(logged_vf)) |
873 | + ft_data, delta_data = self.helpGetBytes( |
874 | + f, 'fulltext', _mod_knit.FTPlainToFullText(None), |
875 | + 'fulltext', _mod_knit.DeltaPlainToFullText(logged_vf)) |
876 | self.assertEqual(b'origin', ft_data) |
877 | self.assertEqual(b'base\nleft\nright\nmerged', delta_data) |
878 | self.assertEqual([('get_record_stream', [(b'left',)], 'unordered', |
Running landing tests failed /ci.breezy- vcs.org/ job/brz/ job/brz- land/650/
https:/