Merge lp:~jelmer/brz/get-full into lp:brz

Proposed by Jelmer Vernooij
Status: Merged
Merged at revision: 6839
Proposed branch: lp:~jelmer/brz/get-full
Merge into: lp:brz
Diff against target: 236 lines (+18/-90)
7 files modified
breezy/bzr/groupcompress.py (+0/-18)
breezy/bzr/knit.py (+0/-15)
breezy/bzr/versionedfile.py (+0/-30)
breezy/bzr/vf_repository.py (+11/-12)
breezy/plugins/weave_fmt/repository.py (+5/-3)
breezy/tests/test_tuned_gzip.py (+1/-3)
breezy/tuned_gzip.py (+1/-9)
To merge this branch: bzr merge lp:~jelmer/brz/get-full
Reviewer Review Type Date Requested Status
Martin Packman Approve
Review via email: mp+334719@code.launchpad.net

Description of the change

Work with chunks rather than full texts, in tuned gzip and commit builder.

To post a comment you must log in.
Revision history for this message
Martin Packman (gz) wrote :

Changes here are all reasonable, but won't make much difference yet I think?

review: Approve
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

On Tue, Dec 12, 2017 at 01:09:56AM -0000, Martin Packman wrote:
> Review: Approve
>
> Changes here are all reasonable, but won't make much difference yet I think?
Yep, this is just a first step towards not dealing with full texts.

--
Jelmer Vernooij <email address hidden>
PGP Key: https://www.jelmer.uk/D729A457.asc

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'breezy/bzr/groupcompress.py'
2--- breezy/bzr/groupcompress.py 2017-11-11 15:06:09 +0000
3+++ breezy/bzr/groupcompress.py 2017-12-05 00:06:55 +0000
4@@ -1305,24 +1305,6 @@
5 nostore_sha=nostore_sha))[0]
6 return sha1, length, None
7
8- def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
9- """See VersionedFiles._add_text()."""
10- self._index._check_write_ok()
11- self._check_add(key, None, random_id, check_content=False)
12- if not isinstance(text, bytes):
13- raise errors.BzrBadParameterUnicode("text")
14- if parents is None:
15- # The caller might pass None if there is no graph data, but kndx
16- # indexes can't directly store that, so we give them
17- # an empty tuple instead.
18- parents = ()
19- # double handling for now. Make it work until then.
20- length = len(text)
21- record = FulltextContentFactory(key, parents, None, text)
22- sha1 = list(self._insert_record_stream([record], random_id=random_id,
23- nostore_sha=nostore_sha))[0]
24- return sha1, length, None
25-
26 def add_fallback_versioned_files(self, a_versioned_files):
27 """Add a source of texts for texts not present in this knit.
28
29
30=== modified file 'breezy/bzr/knit.py'
31--- breezy/bzr/knit.py 2017-11-11 15:06:09 +0000
32+++ breezy/bzr/knit.py 2017-12-05 00:06:55 +0000
33@@ -995,21 +995,6 @@
34 parent_texts, left_matching_blocks, nostore_sha, random_id,
35 line_bytes=line_bytes)
36
37- def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
38- """See VersionedFiles._add_text()."""
39- self._index._check_write_ok()
40- self._check_add(key, None, random_id, check_content=False)
41- if not isinstance(text, bytes):
42- raise errors.BzrBadParameterUnicode("text")
43- if parents is None:
44- # The caller might pass None if there is no graph data, but kndx
45- # indexes can't directly store that, so we give them
46- # an empty tuple instead.
47- parents = ()
48- return self._add(key, None, parents,
49- None, None, nostore_sha, random_id,
50- line_bytes=text)
51-
52 def _add(self, key, lines, parents, parent_texts,
53 left_matching_blocks, nostore_sha, random_id,
54 line_bytes):
55
56=== modified file 'breezy/bzr/versionedfile.py'
57--- breezy/bzr/versionedfile.py 2017-11-12 13:53:51 +0000
58+++ breezy/bzr/versionedfile.py 2017-12-05 00:06:55 +0000
59@@ -976,36 +976,6 @@
60 """
61 raise NotImplementedError(self.add_lines)
62
63- def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
64- """Add a text to the store.
65-
66- This is a private function for use by VersionedFileCommitBuilder.
67-
68- :param key: The key tuple of the text to add. If the last element is
69- None, a CHK string will be generated during the addition.
70- :param parents: The parents key tuples of the text to add.
71- :param text: A string containing the text to be committed.
72- :param nostore_sha: Raise ExistingContent and do not add the lines to
73- the versioned file if the digest of the lines matches this.
74- :param random_id: If True a random id has been selected rather than
75- an id determined by some deterministic process such as a converter
76- from a foreign VCS. When True the backend may choose not to check
77- for uniqueness of the resulting key within the versioned file, so
78- this should only be done when the result is expected to be unique
79- anyway.
80- :param check_content: If True, the lines supplied are verified to be
81- bytestrings that are correctly formed lines.
82- :return: The text sha1, the number of bytes in the text, and an opaque
83- representation of the inserted version which can be provided
84- back to future _add_text calls in the parent_texts dictionary.
85- """
86- # The default implementation just thunks over to .add_lines(),
87- # inefficient, but it works.
88- return self.add_lines(key, parents, osutils.split_lines(text),
89- nostore_sha=nostore_sha,
90- random_id=random_id,
91- check_content=True)
92-
93 def add_mpdiffs(self, records):
94 """Add mpdiffs to this VersionedFile.
95
96
97=== modified file 'breezy/bzr/vf_repository.py'
98--- breezy/bzr/vf_repository.py 2017-11-19 16:30:53 +0000
99+++ breezy/bzr/vf_repository.py 2017-12-05 00:06:55 +0000
100@@ -76,6 +76,7 @@
101 )
102
103 from ..sixish import (
104+ BytesIO,
105 range,
106 viewitems,
107 viewvalues,
108@@ -486,12 +487,8 @@
109 nostore_sha = None
110 file_obj, stat_value = tree.get_file_with_stat(change[1][1], file_id)
111 try:
112- text = file_obj.read()
113- finally:
114- file_obj.close()
115- try:
116- entry.text_sha1, entry.text_size = self._add_text_to_weave(
117- file_id, text, heads, nostore_sha)
118+ entry.text_sha1, entry.text_size = self._add_lines_to_weave(
119+ file_id, file_obj.readlines(), heads, nostore_sha)
120 yield file_id, change[1][1], (entry.text_sha1, stat_value)
121 except errors.ExistingContent:
122 # No content change against a carry_over parent
123@@ -499,6 +496,8 @@
124 carried_over = True
125 entry.text_size = parent_entry.text_size
126 entry.text_sha1 = parent_entry.text_sha1
127+ finally:
128+ file_obj.close()
129 elif kind == 'symlink':
130 # Wants a path hint?
131 entry.symlink_target = tree.get_symlink_target(change[1][1], file_id)
132@@ -506,7 +505,7 @@
133 parent_entry.symlink_target == entry.symlink_target):
134 carried_over = True
135 else:
136- self._add_text_to_weave(change[0], '', heads, None)
137+ self._add_lines_to_weave(change[0], [], heads, None)
138 elif kind == 'directory':
139 if carry_over_possible:
140 carried_over = True
141@@ -514,7 +513,7 @@
142 # Nothing to set on the entry.
143 # XXX: split into the Root and nonRoot versions.
144 if change[1][1] != '' or self.repository.supports_rich_root():
145- self._add_text_to_weave(change[0], '', heads, None)
146+ self._add_lines_to_weave(change[0], [], heads, None)
147 elif kind == 'tree-reference':
148 if not self.repository._format.supports_tree_reference:
149 # This isn't quite sane as an error, but we shouldn't
150@@ -529,7 +528,7 @@
151 parent_entry.reference_revision == reference_revision):
152 carried_over = True
153 else:
154- self._add_text_to_weave(change[0], '', heads, None)
155+ self._add_lines_to_weave(change[0], [], heads, None)
156 else:
157 raise AssertionError('unknown kind %r' % kind)
158 if not carried_over:
159@@ -555,10 +554,10 @@
160 self._require_root_change(tree)
161 self.basis_delta_revision = basis_revision_id
162
163- def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha):
164+ def _add_lines_to_weave(self, file_id, lines, parents, nostore_sha):
165 parent_keys = tuple([(file_id, parent) for parent in parents])
166- return self.repository.texts._add_text(
167- (file_id, self._new_revision_id), parent_keys, new_text,
168+ return self.repository.texts.add_lines(
169+ (file_id, self._new_revision_id), parent_keys, lines,
170 nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
171
172
173
174=== modified file 'breezy/plugins/weave_fmt/repository.py'
175--- breezy/plugins/weave_fmt/repository.py 2017-11-20 22:51:10 +0000
176+++ breezy/plugins/weave_fmt/repository.py 2017-12-05 00:06:55 +0000
177@@ -608,11 +608,13 @@
178 raise errors.ReadOnlyError(self)
179 if '/' in key[-1]:
180 raise ValueError('bad idea to put / in %r' % (key,))
181- text = ''.join(lines)
182+ chunks = lines
183 if self._compressed:
184- text = tuned_gzip.bytes_to_gzip(text)
185+ chunks = tuned_gzip.chunks_to_gzip(chunks)
186 path = self._map(key)
187- self._transport.put_bytes_non_atomic(path, text, create_parent_dir=True)
188+ self._transport.put_file_non_atomic(
189+ path, BytesIO(b''.join(chunks)),
190+ create_parent_dir=True)
191
192 def insert_record_stream(self, stream):
193 adapters = {}
194
195=== modified file 'breezy/tests/test_tuned_gzip.py'
196--- breezy/tests/test_tuned_gzip.py 2017-05-22 00:56:52 +0000
197+++ breezy/tests/test_tuned_gzip.py 2017-12-05 00:06:55 +0000
198@@ -32,9 +32,7 @@
199 def assertToGzip(self, chunks):
200 raw_bytes = b''.join(chunks)
201 gzfromchunks = tuned_gzip.chunks_to_gzip(chunks)
202- gzfrombytes = tuned_gzip.bytes_to_gzip(raw_bytes)
203- self.assertEqual(gzfrombytes, gzfromchunks)
204- decoded = gzip.GzipFile(fileobj=BytesIO(gzfromchunks)).read()
205+ decoded = gzip.GzipFile(fileobj=BytesIO(b''.join(gzfromchunks))).read()
206 lraw, ldecoded = len(raw_bytes), len(decoded)
207 self.assertEqual(lraw, ldecoded,
208 'Expecting data length %d, got %d' % (lraw, ldecoded))
209
210=== modified file 'breezy/tuned_gzip.py'
211--- breezy/tuned_gzip.py 2017-05-22 00:56:52 +0000
212+++ breezy/tuned_gzip.py 2017-12-05 00:06:55 +0000
213@@ -22,7 +22,7 @@
214 import struct
215 import zlib
216
217-__all__ = ["bytes_to_gzip", "chunks_to_gzip"]
218+__all__ = ["chunks_to_gzip"]
219
220
221 def U32(i):
222@@ -40,14 +40,6 @@
223 return i & 0xFFFFFFFF
224
225
226-def bytes_to_gzip(bytes, factory=zlib.compressobj,
227- level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,
228- width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,
229- crc32=zlib.crc32):
230- """Create a gzip file containing bytes and return its content."""
231- return chunks_to_gzip([bytes])
232-
233-
234 def chunks_to_gzip(chunks, factory=zlib.compressobj,
235 level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,
236 width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,

Subscribers

People subscribed via source and target branches