Merge lp:~jelmer/brz/file-content-factory into lp:brz

Proposed by Jelmer Vernooij
Status: Merged
Approved by: Jelmer Vernooij
Approved revision: no longer in the source branch.
Merge reported by: The Breezy Bot
Merged at revision: not available
Proposed branch: lp:~jelmer/brz/file-content-factory
Merge into: lp:brz
Diff against target: 878 lines (+197/-139)
14 files modified
breezy/_annotator_py.py (+1/-1)
breezy/bzr/groupcompress.py (+20/-17)
breezy/bzr/groupcompress_repo.py (+3/-3)
breezy/bzr/knit.py (+55/-36)
breezy/bzr/knitpack_repo.py (+2/-2)
breezy/bzr/reconcile.py (+5/-5)
breezy/bzr/smart/repository.py (+3/-1)
breezy/bzr/versionedfile.py (+55/-18)
breezy/bzr/vf_repository.py (+6/-12)
breezy/bzr/weave.py (+5/-6)
breezy/git/annotate.py (+7/-4)
breezy/merge.py (+1/-2)
breezy/plugins/weave_fmt/repository.py (+5/-5)
breezy/tests/per_versionedfile.py (+29/-27)
To merge this branch: bzr merge lp:~jelmer/brz/file-content-factory
Reviewer Review Type Date Requested Status
Jelmer Vernooij Approve
Review via email: mp+378072@code.launchpad.net

Commit message

Add a 'lines' storage kind.

Description of the change

Add a 'lines' storage kind.

Avoid converting back and forth between fulltext and lines in various places.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) :
review: Approve
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote :
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote :
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'breezy/_annotator_py.py'
2--- breezy/_annotator_py.py 2019-03-02 21:46:18 +0000
3+++ breezy/_annotator_py.py 2020-01-25 16:26:49 +0000
4@@ -127,7 +127,7 @@
5 if record.storage_kind == 'absent':
6 raise errors.RevisionNotPresent(record.key, self._vf)
7 this_key = record.key
8- lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
9+ lines = record.get_bytes_as('lines')
10 num_lines = len(lines)
11 self._text_cache[this_key] = lines
12 yield this_key, lines, num_lines
13
14=== modified file 'breezy/bzr/groupcompress.py'
15--- breezy/bzr/groupcompress.py 2020-01-25 14:46:10 +0000
16+++ breezy/bzr/groupcompress.py 2020-01-25 16:26:49 +0000
17@@ -466,7 +466,7 @@
18 return self._manager._wire_bytes()
19 else:
20 return b''
21- if storage_kind in ('fulltext', 'chunked'):
22+ if storage_kind in ('fulltext', 'chunked', 'lines'):
23 if self._bytes is None:
24 # Grab and cache the raw bytes for this entry
25 # and break the ref-cycle with _manager since we don't need it
26@@ -482,8 +482,10 @@
27 # refcycle here, but instead in manager.get_record_stream()
28 if storage_kind == 'fulltext':
29 return self._bytes
30- else:
31+ elif storage_kind == 'chunked':
32 return [self._bytes]
33+ else:
34+ return osutils.split_lines(self._bytes)
35 raise errors.UnavailableRepresentation(self.key, storage_kind,
36 self.storage_kind)
37
38@@ -566,9 +568,9 @@
39 old_length = self._block._content_length
40 end_point = 0
41 for factory in self._factories:
42- bytes = factory.get_bytes_as('fulltext')
43+ chunks = factory.get_bytes_as('chunked')
44 (found_sha1, start_point, end_point,
45- type) = compressor.compress(factory.key, [bytes], factory.sha1)
46+ type) = compressor.compress(factory.key, chunks, factory.sha1)
47 # Now update this factory with the new offsets, etc
48 factory.sha1 = found_sha1
49 factory._start = start_point
50@@ -1369,7 +1371,7 @@
51 if keys is None:
52 keys = self.keys()
53 for record in self.get_record_stream(keys, 'unordered', True):
54- record.get_bytes_as('fulltext')
55+ record.get_bytes_as('chunked')
56 else:
57 return self.get_record_stream(keys, 'unordered', True)
58
59@@ -1670,8 +1672,8 @@
60 result[record.key] = record.sha1
61 else:
62 if record.storage_kind != 'absent':
63- result[record.key] = osutils.sha_string(
64- record.get_bytes_as('fulltext'))
65+ result[record.key] = osutils.sha_strings(
66+ record.get_bytes_as('chunked'))
67 return result
68
69 def insert_record_stream(self, stream):
70@@ -1823,25 +1825,26 @@
71 self._index.add_records(nodes, random_id=random_id)
72 continue
73 try:
74- bytes = record.get_bytes_as('fulltext')
75+ chunks = record.get_bytes_as('chunked')
76 except errors.UnavailableRepresentation:
77- adapter_key = record.storage_kind, 'fulltext'
78+ adapter_key = record.storage_kind, 'chunked'
79 adapter = get_adapter(adapter_key)
80- bytes = adapter.get_bytes(record)
81+ chunks = adapter.get_bytes(record, 'chunked')
82+ chunks_len = sum(map(len, chunks))
83 if len(record.key) > 1:
84 prefix = record.key[0]
85 soft = (prefix == last_prefix)
86 else:
87 prefix = None
88 soft = False
89- if max_fulltext_len < len(bytes):
90- max_fulltext_len = len(bytes)
91+ if max_fulltext_len < chunks_len:
92+ max_fulltext_len = chunks_len
93 max_fulltext_prefix = prefix
94 (found_sha1, start_point, end_point,
95 type) = self._compressor.compress(record.key,
96- [bytes], record.sha1, soft=soft,
97+ chunks, record.sha1, soft=soft,
98 nostore_sha=nostore_sha)
99- # delta_ratio = float(len(bytes)) / (end_point - start_point)
100+ # delta_ratio = float(chunks_len) / (end_point - start_point)
101 # Check if we want to continue to include that text
102 if (prefix == max_fulltext_prefix
103 and end_point < 2 * max_fulltext_len):
104@@ -1859,10 +1862,10 @@
105 if start_new_block:
106 self._compressor.pop_last()
107 flush()
108- max_fulltext_len = len(bytes)
109+ max_fulltext_len = chunks_len
110 (found_sha1, start_point, end_point,
111 type) = self._compressor.compress(
112- record.key, [bytes], record.sha1)
113+ record.key, chunks, record.sha1)
114 if record.key[-1] is None:
115 key = record.key[:-1] + (b'sha1:' + found_sha1,)
116 else:
117@@ -1915,7 +1918,7 @@
118 pb.update('Walking content', key_idx, total)
119 if record.storage_kind == 'absent':
120 raise errors.RevisionNotPresent(key, self)
121- lines = osutils.split_lines(record.get_bytes_as('fulltext'))
122+ lines = record.get_bytes_as('lines')
123 for line in lines:
124 yield line, key
125 if pb is not None:
126
127=== modified file 'breezy/bzr/groupcompress_repo.py'
128--- breezy/bzr/groupcompress_repo.py 2019-10-19 22:53:54 +0000
129+++ breezy/bzr/groupcompress_repo.py 2020-01-25 16:26:49 +0000
130@@ -698,9 +698,9 @@
131 % (chk_inv.revision_id, p_id_map.key()[0],
132 canon_p_id_map.key()[0]))
133 self._data_changed = True
134- yield versionedfile.ChunkedContentFactory(record.key,
135- record.parents, record.sha1,
136- canonical_inv.to_lines())
137+ yield versionedfile.ChunkedContentFactory(
138+ record.key, record.parents, record.sha1, canonical_inv.to_lines(),
139+ chunks_are_lines=True)
140 # We have finished processing all of the inventory records, we
141 # don't need these sets anymore
142 return _filtered_inv_stream()
143
144=== modified file 'breezy/bzr/knit.py'
145--- breezy/bzr/knit.py 2020-01-25 04:20:44 +0000
146+++ breezy/bzr/knit.py 2020-01-25 16:26:49 +0000
147@@ -223,7 +223,10 @@
148 class FTAnnotatedToUnannotated(KnitAdapter):
149 """An adapter from FT annotated knits to unannotated ones."""
150
151- def get_bytes(self, factory):
152+ def get_bytes(self, factory, target_storage_kind):
153+ if target_storage_kind != 'knit-ft-gz':
154+ raise errors.UnavailableRepresentation(
155+ factory.key, target_storage_kind, factory.storage_kind)
156 annotated_compressed_bytes = factory._raw_record
157 rec, contents = \
158 self._data._parse_record_unchecked(annotated_compressed_bytes)
159@@ -236,7 +239,10 @@
160 class DeltaAnnotatedToUnannotated(KnitAdapter):
161 """An adapter for deltas from annotated to unannotated."""
162
163- def get_bytes(self, factory):
164+ def get_bytes(self, factory, target_storage_kind):
165+ if target_storage_kind != 'knit-delta-gz':
166+ raise errors.UnavailableRepresentation(
167+ factory.key, target_storage_kind, factory.storage_kind)
168 annotated_compressed_bytes = factory._raw_record
169 rec, contents = \
170 self._data._parse_record_unchecked(annotated_compressed_bytes)
171@@ -250,19 +256,24 @@
172 class FTAnnotatedToFullText(KnitAdapter):
173 """An adapter from FT annotated knits to unannotated ones."""
174
175- def get_bytes(self, factory):
176+ def get_bytes(self, factory, target_storage_kind):
177 annotated_compressed_bytes = factory._raw_record
178 rec, contents = \
179 self._data._parse_record_unchecked(annotated_compressed_bytes)
180 content, delta = self._annotate_factory.parse_record(factory.key[-1],
181 contents, factory._build_details, None)
182- return b''.join(content.text())
183+ if target_storage_kind == 'fulltext':
184+ return b''.join(content.text())
185+ elif target_storage_kind in ('chunked', 'lines'):
186+ return content.text()
187+ raise errors.UnavailableRepresentation(
188+ factory.key, target_storage_kind, factory.storage_kind)
189
190
191 class DeltaAnnotatedToFullText(KnitAdapter):
192 """An adapter for deltas from annotated to unannotated."""
193
194- def get_bytes(self, factory):
195+ def get_bytes(self, factory, target_storage_kind):
196 annotated_compressed_bytes = factory._raw_record
197 rec, contents = \
198 self._data._parse_record_unchecked(annotated_compressed_bytes)
199@@ -273,32 +284,42 @@
200 [compression_parent], 'unordered', True))
201 if basis_entry.storage_kind == 'absent':
202 raise errors.RevisionNotPresent(compression_parent, self._basis_vf)
203- basis_chunks = basis_entry.get_bytes_as('chunked')
204- basis_lines = osutils.chunks_to_lines(basis_chunks)
205+ basis_lines = basis_entry.get_bytes_as('lines')
206 # Manually apply the delta because we have one annotated content and
207 # one plain.
208 basis_content = PlainKnitContent(basis_lines, compression_parent)
209 basis_content.apply_delta(delta, rec[1])
210 basis_content._should_strip_eol = factory._build_details[1]
211- return b''.join(basis_content.text())
212+
213+ if target_storage_kind == 'fulltext':
214+ return b''.join(basis_content.text())
215+ elif target_storage_kind in ('chunked', 'lines'):
216+ return basis_content.text()
217+ raise errors.UnavailableRepresentation(
218+ factory.key, target_storage_kind, factory.storage_kind)
219
220
221 class FTPlainToFullText(KnitAdapter):
222 """An adapter from FT plain knits to unannotated ones."""
223
224- def get_bytes(self, factory):
225+ def get_bytes(self, factory, target_storage_kind):
226 compressed_bytes = factory._raw_record
227 rec, contents = \
228 self._data._parse_record_unchecked(compressed_bytes)
229 content, delta = self._plain_factory.parse_record(factory.key[-1],
230 contents, factory._build_details, None)
231- return b''.join(content.text())
232+ if target_storage_kind == 'fulltext':
233+ return b''.join(content.text())
234+ elif target_storage_kind in ('chunked', 'lines'):
235+ return content.text()
236+ raise errors.UnavailableRepresentation(
237+ factory.key, target_storage_kind, factory.storage_kind)
238
239
240 class DeltaPlainToFullText(KnitAdapter):
241 """An adapter for deltas from annotated to unannotated."""
242
243- def get_bytes(self, factory):
244+ def get_bytes(self, factory, target_storage_kind):
245 compressed_bytes = factory._raw_record
246 rec, contents = \
247 self._data._parse_record_unchecked(compressed_bytes)
248@@ -309,14 +330,18 @@
249 [compression_parent], 'unordered', True))
250 if basis_entry.storage_kind == 'absent':
251 raise errors.RevisionNotPresent(compression_parent, self._basis_vf)
252- basis_chunks = basis_entry.get_bytes_as('chunked')
253- basis_lines = osutils.chunks_to_lines(basis_chunks)
254+ basis_lines = basis_entry.get_bytes_as('lines')
255 basis_content = PlainKnitContent(basis_lines, compression_parent)
256 # Manually apply the delta because we have one annotated content and
257 # one plain.
258 content, _ = self._plain_factory.parse_record(rec[1], contents,
259 factory._build_details, basis_content)
260- return b''.join(content.text())
261+ if target_storage_kind == 'fulltext':
262+ return b''.join(content.text())
263+ elif target_storage_kind in ('chunked', 'lines'):
264+ return content.text()
265+ raise errors.UnavailableRepresentation(
266+ factory.key, target_storage_kind, factory.storage_kind)
267
268
269 class KnitContentFactory(ContentFactory):
270@@ -381,19 +406,15 @@
271 self._create_network_bytes()
272 return self._network_bytes
273 if ('-ft-' in self.storage_kind
274- and storage_kind in ('chunked', 'fulltext')):
275- adapter_key = (self.storage_kind, 'fulltext')
276+ and storage_kind in ('chunked', 'fulltext', 'lines')):
277+ adapter_key = (self.storage_kind, storage_kind)
278 adapter_factory = adapter_registry.get(adapter_key)
279 adapter = adapter_factory(None)
280- bytes = adapter.get_bytes(self)
281- if storage_kind == 'chunked':
282- return [bytes]
283- else:
284- return bytes
285+ return adapter.get_bytes(self, storage_kind)
286 if self._knit is not None:
287 # Not redundant with direct conversion above - that only handles
288 # fulltext cases.
289- if storage_kind == 'chunked':
290+ if storage_kind in ('chunked', 'lines'):
291 return self._knit.get_lines(self.key[0])
292 elif storage_kind == 'fulltext':
293 return self._knit.get_text(self.key[0])
294@@ -435,9 +456,9 @@
295 # all the keys etc are contained in the bytes returned in the
296 # first record.
297 return b''
298- if storage_kind in ('chunked', 'fulltext'):
299+ if storage_kind in ('chunked', 'fulltext', 'lines'):
300 chunks = self._generator._get_one_work(self.key).text()
301- if storage_kind == 'chunked':
302+ if storage_kind in ('chunked', 'lines'):
303 return chunks
304 else:
305 return b''.join(chunks)
306@@ -1708,7 +1729,7 @@
307 except KeyError:
308 adapter_key = (record.storage_kind, "knit-ft-gz")
309 adapter = get_adapter(adapter_key)
310- bytes = adapter.get_bytes(record)
311+ bytes = adapter.get_bytes(record, adapter_key[1])
312 else:
313 # It's a knit record, it has a _raw_record field (even if
314 # it was reconstituted from a network stream).
315@@ -1745,9 +1766,8 @@
316 buffered = True
317 if not buffered:
318 self._index.add_records([index_entry])
319- elif record.storage_kind == 'chunked':
320- self.add_lines(record.key, parents,
321- osutils.chunks_to_lines(record.get_bytes_as('chunked')))
322+ elif record.storage_kind in ('chunked', 'file'):
323+ self.add_lines(record.key, parents, record.get_bytes_as('lines'))
324 else:
325 # Not suitable for direct insertion as a
326 # delta, either because it's not the right format, or this
327@@ -1757,12 +1777,11 @@
328 self._access.flush()
329 try:
330 # Try getting a fulltext directly from the record.
331- bytes = record.get_bytes_as('fulltext')
332+ lines = record.get_bytes_as('lines')
333 except errors.UnavailableRepresentation:
334- adapter_key = record.storage_kind, 'fulltext'
335+ adapter_key = record.storage_kind, 'lines'
336 adapter = get_adapter(adapter_key)
337- bytes = adapter.get_bytes(record)
338- lines = split_lines(bytes)
339+ lines = adapter.get_bytes(record, 'lines')
340 try:
341 self.add_lines(record.key, parents, lines)
342 except errors.RevisionAlreadyPresent:
343@@ -2107,7 +2126,7 @@
344 if key in self.nonlocal_keys:
345 record = next(self.get_record_stream())
346 # Create a content object on the fly
347- lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
348+ lines = record.get_bytes_as('lines')
349 return PlainKnitContent(lines, record.key)
350 else:
351 # local keys we can ask for directly
352@@ -2198,9 +2217,9 @@
353 if component_id in self._contents_map:
354 content = self._contents_map[component_id]
355 else:
356- content, delta = self._factory.parse_record(key[-1],
357- record, record_details, content,
358- copy_base_content=multiple_versions)
359+ content, delta = self._factory.parse_record(
360+ key[-1], record, record_details, content,
361+ copy_base_content=multiple_versions)
362 if multiple_versions:
363 self._contents_map[component_id] = content
364
365
366=== modified file 'breezy/bzr/knitpack_repo.py'
367--- breezy/bzr/knitpack_repo.py 2020-01-11 17:50:28 +0000
368+++ breezy/bzr/knitpack_repo.py 2020-01-25 16:26:49 +0000
369@@ -1092,8 +1092,8 @@
370 raise errors.BzrError('Mismatched key parent %r:%r' %
371 (key, parent_keys))
372 parents.append(parent_key[1])
373- text_lines = osutils.split_lines(next(repo.texts.get_record_stream(
374- [key], 'unordered', True)).get_bytes_as('fulltext'))
375+ text_lines = next(repo.texts.get_record_stream(
376+ [key], 'unordered', True)).get_bytes_as('lines')
377 output_texts.add_lines(key, parent_keys, text_lines,
378 random_id=True, check_content=False)
379 # 5) check that nothing inserted has a reference outside the keyspace.
380
381=== modified file 'breezy/bzr/reconcile.py'
382--- breezy/bzr/reconcile.py 2018-11-30 12:39:04 +0000
383+++ breezy/bzr/reconcile.py 2020-01-25 16:26:49 +0000
384@@ -35,7 +35,7 @@
385 from ..i18n import gettext
386 from ..trace import mutter
387 from ..tsort import topo_sort
388-from .versionedfile import AdapterFactory, FulltextContentFactory
389+from .versionedfile import AdapterFactory, ChunkedContentFactory
390
391
392 class VersionedFileRepoReconciler(object):
393@@ -152,8 +152,8 @@
394 # The check for the left most parent only handles knit
395 # compressors, but this code only applies to knit and weave
396 # repositories anyway.
397- bytes = record.get_bytes_as('fulltext')
398- yield FulltextContentFactory(record.key, wanted_parents, record.sha1, bytes)
399+ chunks = record.get_bytes_as('chunked')
400+ yield ChunkedContentFactory(record.key, wanted_parents, record.sha1, chunks)
401 else:
402 adapted_record = AdapterFactory(
403 record.key, wanted_parents, record)
404@@ -344,10 +344,10 @@
405
406 def fix_parents(stream):
407 for record in stream:
408- bytes = record.get_bytes_as('fulltext')
409+ chunks = record.get_bytes_as('chunked')
410 new_key = (new_file_id, record.key[-1])
411 parents = new_parents[new_key]
412- yield FulltextContentFactory(new_key, parents, record.sha1, bytes)
413+ yield ChunkedContentFactory(new_key, parents, record.sha1, chunks)
414 stream = self.repo.texts.get_record_stream(
415 needed_keys, 'topological', True)
416 self.repo._remove_file_id(new_file_id)
417
418=== modified file 'breezy/bzr/smart/repository.py'
419--- breezy/bzr/smart/repository.py 2019-02-15 18:57:38 +0000
420+++ breezy/bzr/smart/repository.py 2020-01-25 16:26:49 +0000
421@@ -1270,7 +1270,9 @@
422 inv_delta = inv._make_delta(prev_inv)
423 lines = serializer.delta_to_lines(
424 prev_inv.revision_id, inv.revision_id, inv_delta)
425- yield ChunkedContentFactory(inv.revision_id, None, None, lines)
426+ yield ChunkedContentFactory(
427+ inv.revision_id, None, None, lines,
428+ chunks_are_lines=True)
429 prev_inv = inv
430
431 def body_stream(self, repository, ordering, revids):
432
433=== modified file 'breezy/bzr/versionedfile.py'
434--- breezy/bzr/versionedfile.py 2020-01-11 17:50:28 +0000
435+++ breezy/bzr/versionedfile.py 2020-01-25 16:26:49 +0000
436@@ -56,20 +56,19 @@
437
438
439 adapter_registry = Registry()
440-adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'breezy.bzr.knit',
441- 'DeltaPlainToFullText')
442-adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'breezy.bzr.knit',
443- 'FTPlainToFullText')
444 adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
445 'breezy.bzr.knit', 'DeltaAnnotatedToUnannotated')
446-adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
447- 'breezy.bzr.knit', 'DeltaAnnotatedToFullText')
448 adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
449 'breezy.bzr.knit', 'FTAnnotatedToUnannotated')
450-adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
451- 'breezy.bzr.knit', 'FTAnnotatedToFullText')
452-# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
453-# 'breezy.bzr.knit', 'FTAnnotatedToChunked')
454+for target_storage_kind in ('fulltext', 'chunked', 'lines'):
455+ adapter_registry.register_lazy(('knit-delta-gz', target_storage_kind), 'breezy.bzr.knit',
456+ 'DeltaPlainToFullText')
457+ adapter_registry.register_lazy(('knit-ft-gz', target_storage_kind), 'breezy.bzr.knit',
458+ 'FTPlainToFullText')
459+ adapter_registry.register_lazy(('knit-annotated-ft-gz', target_storage_kind),
460+ 'breezy.bzr.knit', 'FTAnnotatedToFullText')
461+ adapter_registry.register_lazy(('knit-annotated-delta-gz', target_storage_kind),
462+ 'breezy.bzr.knit', 'DeltaAnnotatedToFullText')
463
464
465 class ContentFactory(object):
466@@ -110,21 +109,27 @@
467 :ivar parents: A tuple of parent keys for self.key. If the object has
468 no parent information, None (as opposed to () for an empty list of
469 parents).
470+ :ivar chunks_are_lines: Whether chunks are lines.
471 """
472
473- def __init__(self, key, parents, sha1, chunks):
474+ def __init__(self, key, parents, sha1, chunks, chunks_are_lines=None):
475 """Create a ContentFactory."""
476 self.sha1 = sha1
477 self.storage_kind = 'chunked'
478 self.key = key
479 self.parents = parents
480 self._chunks = chunks
481+ self._chunks_are_lines = chunks_are_lines
482
483 def get_bytes_as(self, storage_kind):
484 if storage_kind == 'chunked':
485 return self._chunks
486 elif storage_kind == 'fulltext':
487 return b''.join(self._chunks)
488+ elif storage_kind == 'lines':
489+ if self._chunks_are_lines:
490+ return self._chunks
491+ return list(osutils.chunks_to_lines(self._chunks))
492 raise errors.UnavailableRepresentation(self.key, storage_kind,
493 self.storage_kind)
494
495@@ -160,6 +165,38 @@
496 return self._text
497 elif storage_kind == 'chunked':
498 return [self._text]
499+ elif storage_kind == 'lines':
500+ return osutils.split_lines(self._text)
501+ raise errors.UnavailableRepresentation(self.key, storage_kind,
502+ self.storage_kind)
503+
504+
505+class FileContentFactory(ContentFactory):
506+ """File-based content factory.
507+ """
508+
509+ def __init__(self, key, parents, fileobj):
510+ self.key = key
511+ self.parents = parents
512+ self.file = fileobj
513+ self.storage_kind = 'file'
514+ self._sha1 = None
515+
516+ @property
517+ def sha1(self):
518+ if self._sha1 is None:
519+ self.file.seek(0)
520+ self._size, self._sha1 = osutils.size_sha_file(self.file)
521+ return self._sha1
522+
523+ def get_bytes_as(self, storage_kind):
524+ self.file.seek(0)
525+ if storage_kind == 'fulltext':
526+ return self.file.read()
527+ elif storage_kind == 'chunked':
528+ return list(osutils.file_iterator(self.file))
529+ elif storage_kind == 'lines':
530+ return self.file.readlines()
531 raise errors.UnavailableRepresentation(self.key, storage_kind,
532 self.storage_kind)
533
534@@ -1030,13 +1067,11 @@
535 if not mpvf.has_version(p))
536 # It seems likely that adding all the present parents as fulltexts can
537 # easily exhaust memory.
538- chunks_to_lines = osutils.chunks_to_lines
539 for record in self.get_record_stream(needed_parents, 'unordered',
540 True):
541 if record.storage_kind == 'absent':
542 continue
543- mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
544- record.key, [])
545+ mpvf.add_version(record.get_bytes_as('lines'), record.key, [])
546 for (key, parent_keys, expected_sha1, mpdiff), lines in zip(
547 records, mpvf.get_line_list(versions)):
548 if len(parent_keys) == 1:
549@@ -1546,7 +1581,9 @@
550 lines = self._lines[key]
551 parents = self._parents[key]
552 pending.remove(key)
553- yield ChunkedContentFactory(key, parents, None, lines)
554+ yield ChunkedContentFactory(
555+ key, parents, None, lines,
556+ chunks_are_lines=True)
557 for versionedfile in self.fallback_versionedfiles:
558 for record in versionedfile.get_record_stream(
559 pending, 'unordered', True):
560@@ -1775,9 +1812,9 @@
561 if lines is not None:
562 if not isinstance(lines, list):
563 raise AssertionError
564- yield ChunkedContentFactory((k,), None,
565- sha1=osutils.sha_strings(lines),
566- chunks=lines)
567+ yield ChunkedContentFactory(
568+ (k,), None, sha1=osutils.sha_strings(lines),
569+ chunks=lines, chunks_are_lines=True)
570 else:
571 yield AbsentContentFactory((k,))
572
573
574=== modified file 'breezy/bzr/vf_repository.py'
575--- breezy/bzr/vf_repository.py 2020-01-20 00:03:54 +0000
576+++ breezy/bzr/vf_repository.py 2020-01-25 16:26:49 +0000
577@@ -853,8 +853,8 @@
578 """Check a single text from this repository."""
579 if kind == 'inventories':
580 rev_id = record.key[0]
581- inv = self._deserialise_inventory(rev_id,
582- record.get_bytes_as('fulltext'))
583+ inv = self._deserialise_inventory(
584+ rev_id, record.get_bytes_as('fulltext'))
585 if last_object is not None:
586 delta = inv._make_delta(last_object)
587 for old_path, path, file_id, ie in delta:
588@@ -880,14 +880,9 @@
589 """Check a single text."""
590 # Check it is extractable.
591 # TODO: check length.
592- if record.storage_kind == 'chunked':
593- chunks = record.get_bytes_as(record.storage_kind)
594- sha1 = osutils.sha_strings(chunks)
595- length = sum(map(len, chunks))
596- else:
597- content = record.get_bytes_as('fulltext')
598- sha1 = osutils.sha_string(content)
599- length = len(content)
600+ chunks = record.get_bytes_as('chunked')
601+ sha1 = osutils.sha_strings(chunks)
602+ length = sum(map(len, chunks))
603 if item_data and sha1 != item_data[1]:
604 checker._report_items.append(
605 'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
606@@ -1197,9 +1192,8 @@
607 stream = self.inventories.get_record_stream(keys, 'unordered', True)
608 for record in stream:
609 if record.storage_kind != 'absent':
610- chunks = record.get_bytes_as('chunked')
611+ lines = record.get_bytes_as('lines')
612 revid = record.key[-1]
613- lines = osutils.chunks_to_lines(chunks)
614 for line in lines:
615 yield line, revid
616
617
618=== modified file 'breezy/bzr/weave.py'
619--- breezy/bzr/weave.py 2019-03-02 21:46:18 +0000
620+++ breezy/bzr/weave.py 2020-01-25 16:26:49 +0000
621@@ -179,7 +179,7 @@
622 def get_bytes_as(self, storage_kind):
623 if storage_kind == 'fulltext':
624 return self._weave.get_text(self.key[-1])
625- elif storage_kind == 'chunked':
626+ elif storage_kind in ('chunked', 'lines'):
627 return self._weave.get_lines(self.key[-1])
628 else:
629 raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
630@@ -422,20 +422,19 @@
631 raise RevisionNotPresent([record.key[0]], self)
632 # adapt to non-tuple interface
633 parents = [parent[0] for parent in record.parents]
634- if (record.storage_kind == 'fulltext' or
635- record.storage_kind == 'chunked'):
636+ if record.storage_kind in ('fulltext', 'chunked', 'lines'):
637 self.add_lines(
638 record.key[0], parents,
639- osutils.chunks_to_lines(record.get_bytes_as('chunked')))
640+ record.get_bytes_as('lines'))
641 else:
642- adapter_key = record.storage_kind, 'fulltext'
643+ adapter_key = record.storage_kind, 'lines'
644 try:
645 adapter = adapters[adapter_key]
646 except KeyError:
647 adapter_factory = adapter_registry.get(adapter_key)
648 adapter = adapter_factory(self)
649 adapters[adapter_key] = adapter
650- lines = split_lines(adapter.get_bytes(record))
651+ lines = adapter.get_bytes(record, 'lines')
652 try:
653 self.add_lines(record.key[0], parents, lines)
654 except RevisionAlreadyPresent:
655
656=== modified file 'breezy/git/annotate.py'
657--- breezy/git/annotate.py 2019-10-20 23:46:25 +0000
658+++ breezy/git/annotate.py 2020-01-25 16:26:49 +0000
659@@ -22,6 +22,7 @@
660 tree_lookup_path,
661 )
662
663+from .. import osutils
664 from ..errors import (
665 NoSuchRevision,
666 UnavailableRepresentation,
667@@ -32,7 +33,7 @@
668 )
669
670
671-class GitFulltextContentFactory(object):
672+class GitBlobContentFactory(object):
673 """Static data content factory.
674
675 This takes a fulltext when created and just returns that during
676@@ -52,17 +53,19 @@
677 """Create a ContentFactory."""
678 self.store = store
679 self.key = (path, revision)
680- self.storage_kind = 'fulltext'
681+ self.storage_kind = 'git-blob'
682 self.parents = None
683 self.blob_id = blob_id
684
685 def get_bytes_as(self, storage_kind):
686 if storage_kind == 'fulltext':
687 return self.store[self.blob_id].as_raw_string()
688+ elif storage_kind == 'lines':
689+ return osutils.chunks_to_lines(self.store[self.blob_id].as_raw_chunks())
690 elif storage_kind == 'chunked':
691 return self.store[self.blob_id].as_raw_chunks()
692 raise UnavailableRepresentation(self.key, storage_kind,
693- 'fulltext')
694+ self.storage_kind)
695
696
697 class GitAbsentContentFactory(object):
698@@ -153,5 +156,5 @@
699 except KeyError:
700 yield GitAbsentContentFactory(store, path, text_revision)
701 else:
702- yield GitFulltextContentFactory(
703+ yield GitBlobContentFactory(
704 store, path, text_revision, blob_sha)
705
706=== modified file 'breezy/merge.py'
707--- breezy/merge.py 2020-01-18 16:14:28 +0000
708+++ breezy/merge.py 2020-01-25 16:26:49 +0000
709@@ -2007,8 +2007,7 @@
710 for record in self.vf.get_record_stream(keys, 'unordered', True):
711 if record.storage_kind == 'absent':
712 raise errors.RevisionNotPresent(record.key, self.vf)
713- result[record.key[-1]] = osutils.chunks_to_lines(
714- record.get_bytes_as('chunked'))
715+ result[record.key[-1]] = record.get_bytes_as('lines')
716 return result
717
718 def plan_merge(self):
719
720=== modified file 'breezy/plugins/weave_fmt/repository.py'
721--- breezy/plugins/weave_fmt/repository.py 2018-11-12 01:41:38 +0000
722+++ breezy/plugins/weave_fmt/repository.py 2020-01-25 16:26:49 +0000
723@@ -628,19 +628,19 @@
724 if record.storage_kind == 'absent':
725 raise errors.RevisionNotPresent([record.key[0]], self)
726 # adapt to non-tuple interface
727- if record.storage_kind == 'fulltext':
728+ if record.storage_kind in ('fulltext', 'chunks', 'lines'):
729 self.add_lines(record.key, None,
730- osutils.split_lines(record.get_bytes_as('fulltext')))
731+ record.get_bytes_as('lines'))
732 else:
733- adapter_key = record.storage_kind, 'fulltext'
734+ adapter_key = record.storage_kind, 'lines'
735 try:
736 adapter = adapters[adapter_key]
737 except KeyError:
738 adapter_factory = adapter_registry.get(adapter_key)
739 adapter = adapter_factory(self)
740 adapters[adapter_key] = adapter
741- lines = osutils.split_lines(adapter.get_bytes(
742- record, record.get_bytes_as(record.storage_kind)))
743+ lines = adapter.get_bytes(
744+ record, record.get_bytes_as(record.storage_kind))
745 try:
746 self.add_lines(record.key, None, lines)
747 except errors.RevisionAlreadyPresent:
748
749=== modified file 'breezy/tests/per_versionedfile.py'
750--- breezy/tests/per_versionedfile.py 2019-06-16 15:54:33 +0000
751+++ breezy/tests/per_versionedfile.py 2020-01-25 16:26:49 +0000
752@@ -1214,7 +1214,11 @@
753 # Each is source_kind, requested_kind, adapter class
754 scenarios = [
755 ('knit-delta-gz', 'fulltext', _mod_knit.DeltaPlainToFullText),
756+ ('knit-delta-gz', 'lines', _mod_knit.DeltaPlainToFullText),
757+ ('knit-delta-gz', 'chunked', _mod_knit.DeltaPlainToFullText),
758 ('knit-ft-gz', 'fulltext', _mod_knit.FTPlainToFullText),
759+ ('knit-ft-gz', 'lines', _mod_knit.FTPlainToFullText),
760+ ('knit-ft-gz', 'chunked', _mod_knit.FTPlainToFullText),
761 ('knit-annotated-delta-gz', 'knit-delta-gz',
762 _mod_knit.DeltaAnnotatedToUnannotated),
763 ('knit-annotated-delta-gz', 'fulltext',
764@@ -1223,6 +1227,10 @@
765 _mod_knit.FTAnnotatedToUnannotated),
766 ('knit-annotated-ft-gz', 'fulltext',
767 _mod_knit.FTAnnotatedToFullText),
768+ ('knit-annotated-ft-gz', 'lines',
769+ _mod_knit.FTAnnotatedToFullText),
770+ ('knit-annotated-ft-gz', 'chunked',
771+ _mod_knit.FTAnnotatedToFullText),
772 ]
773 for source, requested, klass in scenarios:
774 adapter_factory = versionedfile.adapter_registry.get(
775@@ -1235,16 +1243,16 @@
776 transport = self.get_transport()
777 return make_file_factory(annotated, mapper)(transport)
778
779- def helpGetBytes(self, f, ft_adapter, delta_adapter):
780+ def helpGetBytes(self, f, ft_name, ft_adapter, delta_name, delta_adapter):
781 """Grab the interested adapted texts for tests."""
782 # origin is a fulltext
783 entries = f.get_record_stream([(b'origin',)], 'unordered', False)
784 base = next(entries)
785- ft_data = ft_adapter.get_bytes(base)
786+ ft_data = ft_adapter.get_bytes(base, ft_name)
787 # merged is both a delta and multiple parents.
788 entries = f.get_record_stream([(b'merged',)], 'unordered', False)
789 merged = next(entries)
790- delta_data = delta_adapter.get_bytes(merged)
791+ delta_data = delta_adapter.get_bytes(merged, delta_name)
792 return ft_data, delta_data
793
794 def test_deannotation_noeol(self):
795@@ -1252,10 +1260,9 @@
796 # we need a full text, and a delta
797 f = self.get_knit()
798 get_diamond_files(f, 1, trailing_eol=False)
799- ft_data, delta_data = self.helpGetBytes(f,
800- _mod_knit.FTAnnotatedToUnannotated(
801- None),
802- _mod_knit.DeltaAnnotatedToUnannotated(None))
803+ ft_data, delta_data = self.helpGetBytes(
804+ f, 'knit-ft-gz', _mod_knit.FTAnnotatedToUnannotated(None),
805+ 'knit-delta-gz', _mod_knit.DeltaAnnotatedToUnannotated(None))
806 self.assertEqual(
807 b'version origin 1 b284f94827db1fa2970d9e2014f080413b547a7e\n'
808 b'origin\n'
809@@ -1271,10 +1278,9 @@
810 # we need a full text, and a delta
811 f = self.get_knit()
812 get_diamond_files(f, 1)
813- ft_data, delta_data = self.helpGetBytes(f,
814- _mod_knit.FTAnnotatedToUnannotated(
815- None),
816- _mod_knit.DeltaAnnotatedToUnannotated(None))
817+ ft_data, delta_data = self.helpGetBytes(
818+ f, 'knit-ft-gz', _mod_knit.FTAnnotatedToUnannotated(None),
819+ 'knit-delta-gz', _mod_knit.DeltaAnnotatedToUnannotated(None))
820 self.assertEqual(
821 b'version origin 1 00e364d235126be43292ab09cb4686cf703ddc17\n'
822 b'origin\n'
823@@ -1293,10 +1299,9 @@
824 # Reconstructing a full text requires a backing versioned file, and it
825 # must have the base lines requested from it.
826 logged_vf = versionedfile.RecordingVersionedFilesDecorator(f)
827- ft_data, delta_data = self.helpGetBytes(f,
828- _mod_knit.FTAnnotatedToFullText(
829- None),
830- _mod_knit.DeltaAnnotatedToFullText(logged_vf))
831+ ft_data, delta_data = self.helpGetBytes(
832+ f, 'fulltext', _mod_knit.FTAnnotatedToFullText(None),
833+ 'fulltext', _mod_knit.DeltaAnnotatedToFullText(logged_vf))
834 self.assertEqual(b'origin', ft_data)
835 self.assertEqual(b'base\nleft\nright\nmerged', delta_data)
836 self.assertEqual([('get_record_stream', [(b'left',)], 'unordered',
837@@ -1310,10 +1315,9 @@
838 # Reconstructing a full text requires a backing versioned file, and it
839 # must have the base lines requested from it.
840 logged_vf = versionedfile.RecordingVersionedFilesDecorator(f)
841- ft_data, delta_data = self.helpGetBytes(f,
842- _mod_knit.FTAnnotatedToFullText(
843- None),
844- _mod_knit.DeltaAnnotatedToFullText(logged_vf))
845+ ft_data, delta_data = self.helpGetBytes(
846+ f, 'fulltext', _mod_knit.FTAnnotatedToFullText(None),
847+ 'fulltext', _mod_knit.DeltaAnnotatedToFullText(logged_vf))
848 self.assertEqual(b'origin\n', ft_data)
849 self.assertEqual(b'base\nleft\nright\nmerged\n', delta_data)
850 self.assertEqual([('get_record_stream', [(b'left',)], 'unordered',
851@@ -1330,10 +1334,9 @@
852 # Reconstructing a full text requires a backing versioned file, and it
853 # must have the base lines requested from it.
854 logged_vf = versionedfile.RecordingVersionedFilesDecorator(f)
855- ft_data, delta_data = self.helpGetBytes(f,
856- _mod_knit.FTPlainToFullText(
857- None),
858- _mod_knit.DeltaPlainToFullText(logged_vf))
859+ ft_data, delta_data = self.helpGetBytes(
860+ f, 'fulltext', _mod_knit.FTPlainToFullText(None),
861+ 'fulltext', _mod_knit.DeltaPlainToFullText(logged_vf))
862 self.assertEqual(b'origin\n', ft_data)
863 self.assertEqual(b'base\nleft\nright\nmerged\n', delta_data)
864 self.assertEqual([('get_record_stream', [(b'left',)], 'unordered',
865@@ -1350,10 +1353,9 @@
866 # Reconstructing a full text requires a backing versioned file, and it
867 # must have the base lines requested from it.
868 logged_vf = versionedfile.RecordingVersionedFilesDecorator(f)
869- ft_data, delta_data = self.helpGetBytes(f,
870- _mod_knit.FTPlainToFullText(
871- None),
872- _mod_knit.DeltaPlainToFullText(logged_vf))
873+ ft_data, delta_data = self.helpGetBytes(
874+ f, 'fulltext', _mod_knit.FTPlainToFullText(None),
875+ 'fulltext', _mod_knit.DeltaPlainToFullText(logged_vf))
876 self.assertEqual(b'origin', ft_data)
877 self.assertEqual(b'base\nleft\nright\nmerged', delta_data)
878 self.assertEqual([('get_record_stream', [(b'left',)], 'unordered',

Subscribers

People subscribed via source and target branches