1
=== modified file 'breezy/bzr/groupcompress.py'
2
--- breezy/bzr/groupcompress.py	2018-11-17 16:53:10 +0000
3
+++ breezy/bzr/groupcompress.py	2019-06-16 15:55:45 +0000
4
@@ -1295,18 +1295,56 @@
5
1295
                 back to future add_lines calls in the parent_texts dictionary.
1295
                 back to future add_lines calls in the parent_texts dictionary.
6
1296
        """
1296
        """
7
1297
        self._index._check_write_ok()
1297
        self._index._check_write_ok()
9
1298
        self._check_add(key, lines, random_id, check_content)
1298
        if check_content:
10
1299
            self._check_lines_not_unicode(lines)
11
1300
            self._check_lines_are_lines(lines)
12
1301
        return self.add_chunks(
13
1302
            key, parents, iter(lines), parent_texts, left_matching_blocks,
14
1303
            nostore_sha, random_id)
15
1304
16
1305
    def add_chunks(self, key, parents, chunk_iter, parent_texts=None,
17
1306
                   left_matching_blocks=None, nostore_sha=None, random_id=False):
18
1307
        """Add a text to the store.
19
1308
20
1309
        :param key: The key tuple of the text to add.
21
1310
        :param parents: The parents key tuples of the text to add.
22
1311
        :param chunk_iter: An iterator over chunks. Chunks
23
1312
            don't need to be file lines; the only requirement is that they
24
1313
            are bytes.
25
1314
        :param parent_texts: An optional dictionary containing the opaque
26
1315
            representations of some or all of the parents of version_id to
27
1316
            allow delta optimisations.  VERY IMPORTANT: the texts must be those
28
1317
            returned by add_lines or data corruption can be caused.
29
1318
        :param left_matching_blocks: a hint about which areas are common
30
1319
            between the text and its left-hand-parent.  The format is
31
1320
            the SequenceMatcher.get_matching_blocks format.
32
1321
        :param nostore_sha: Raise ExistingContent and do not add the lines to
33
1322
            the versioned file if the digest of the lines matches this.
34
1323
        :param random_id: If True a random id has been selected rather than
35
1324
            an id determined by some deterministic process such as a converter
36
1325
            from a foreign VCS. When True the backend may choose not to check
37
1326
            for uniqueness of the resulting key within the versioned file, so
38
1327
            this should only be done when the result is expected to be unique
39
1328
            anyway.
40
1329
        :return: The text sha1, the number of bytes in the text, and an opaque
41
1330
                 representation of the inserted version which can be provided
42
1331
                 back to future add_lines calls in the parent_texts dictionary.
43
1332
        """
44
1333
        self._index._check_write_ok()
45
1334
        self._check_add(key, random_id)
46
1299
        if parents is None:
1335
        if parents is None:
47
1300
            # The caller might pass None if there is no graph data, but kndx
1336
            # The caller might pass None if there is no graph data, but kndx
48
1301
            # indexes can't directly store that, so we give them
1337
            # indexes can't directly store that, so we give them
49
1302
            # an empty tuple instead.
1338
            # an empty tuple instead.
50
1303
            parents = ()
1339
            parents = ()
51
1304
        # double handling for now. Make it work until then.
1340
        # double handling for now. Make it work until then.
57
1305
        length = sum(map(len, lines))
1341
        # TODO(jelmer): problematic for big files: let's not keep the list of
58
1306
        record = ChunkedContentFactory(key, parents, None, lines)
1342
        # chunks in memory.
59
1307
        sha1 = list(self._insert_record_stream([record], random_id=random_id,
1343
        chunks = list(chunk_iter)
60
1308
                                               nostore_sha=nostore_sha))[0]
1344
        record = ChunkedContentFactory(key, parents, None, chunks)
61
1309
        return sha1, length, None
1345
        sha1 = list(self._insert_record_stream(
62
1346
            [record], random_id=random_id, nostore_sha=nostore_sha))[0]
63
1347
        return sha1, sum(map(len, chunks)), None
64
1310
1348
65
1311
    def add_fallback_versioned_files(self, a_versioned_files):
1349
    def add_fallback_versioned_files(self, a_versioned_files):
66
1312
        """Add a source of texts for texts not present in this knit.
1350
        """Add a source of texts for texts not present in this knit.
67
@@ -1338,7 +1376,7 @@
68
1338
        self._index._graph_index.clear_cache()
1376
        self._index._graph_index.clear_cache()
69
1339
        self._index._int_cache.clear()
1377
        self._index._int_cache.clear()
70
1340
1378
72
1341
    def _check_add(self, key, lines, random_id, check_content):
1379
    def _check_add(self, key, random_id):
73
1342
        """check that version_id and lines are safe to add."""
1380
        """check that version_id and lines are safe to add."""
74
1343
        version_id = key[-1]
1381
        version_id = key[-1]
75
1344
        if version_id is not None:
1382
        if version_id is not None:
76
@@ -1349,9 +1387,6 @@
77
1349
        # probably check that the existing content is identical to what is
1387
        # probably check that the existing content is identical to what is
78
1350
        # being inserted, and otherwise raise an exception.  This would make
1388
        # being inserted, and otherwise raise an exception.  This would make
79
1351
        # the bundle code simpler.
1389
        # the bundle code simpler.
80
1352
        if check_content:
81
1353
            self._check_lines_not_unicode(lines)
82
1354
            self._check_lines_are_lines(lines)
83
1355
1390
84
1356
    def get_parent_map(self, keys):
1391
    def get_parent_map(self, keys):
85
1357
        """Get a map of the graph parents of keys.
1392
        """Get a map of the graph parents of keys.
86
1358
1393
87
=== modified file 'breezy/bzr/knit.py'
88
--- breezy/bzr/knit.py	2019-05-28 21:46:09 +0000
89
+++ breezy/bzr/knit.py	2019-06-16 15:55:45 +0000
90
@@ -996,6 +996,21 @@
91
996
                         parent_texts, left_matching_blocks, nostore_sha, random_id,
996
                         parent_texts, left_matching_blocks, nostore_sha, random_id,
92
997
                         line_bytes=line_bytes)
997
                         line_bytes=line_bytes)
93
998
998
94
999
    def add_chunks(self, key, parents, chunk_iter, parent_texts=None,
95
1000
                   left_matching_blocks=None, nostore_sha=None, random_id=False):
96
1001
        """See VersionedFiles.add_chunks()."""
97
1002
        self._index._check_write_ok()
98
1003
        self._check_add(key, None, random_id, check_content=False)
99
1004
        if parents is None:
100
1005
            # The caller might pass None if there is no graph data, but kndx
101
1006
            # indexes can't directly store that, so we give them
102
1007
            # an empty tuple instead.
103
1008
            parents = ()
104
1009
        line_bytes = b''.join(chunk_iter)
105
1010
        return self._add(key, None, parents,
106
1011
                         parent_texts, left_matching_blocks, nostore_sha, random_id,
107
1012
                         line_bytes=line_bytes)
108
1013
109
999
    def _add(self, key, lines, parents, parent_texts,
1014
    def _add(self, key, lines, parents, parent_texts,
110
1000
             left_matching_blocks, nostore_sha, random_id,
1015
             left_matching_blocks, nostore_sha, random_id,
111
1001
             line_bytes):
1016
             line_bytes):
112
1002
1017
113
=== modified file 'breezy/bzr/versionedfile.py'
114
--- breezy/bzr/versionedfile.py	2018-11-11 04:08:32 +0000
115
+++ breezy/bzr/versionedfile.py	2019-06-16 15:55:45 +0000
116
@@ -978,6 +978,38 @@
117
978
        """
978
        """
118
979
        raise NotImplementedError(self.add_lines)
979
        raise NotImplementedError(self.add_lines)
119
980
980
120
981
    def add_chunks(self, key, parents, chunk_iter, parent_texts=None,
121
982
                   left_matching_blocks=None, nostore_sha=None, random_id=False,
122
983
                   check_content=True):
123
984
        """Add a text to the store from a chunk iterable.
124
985
125
986
        :param key: The key tuple of the text to add. If the last element is
126
987
            None, a CHK string will be generated during the addition.
127
988
        :param parents: The parents key tuples of the text to add.
128
989
        :param chunk_iter: An iterable over bytestrings.
129
990
        :param parent_texts: An optional dictionary containing the opaque
130
991
            representations of some or all of the parents of version_id to
131
992
            allow delta optimisations.  VERY IMPORTANT: the texts must be those
132
993
            returned by add_lines or data corruption can be caused.
133
994
        :param left_matching_blocks: a hint about which areas are common
134
995
            between the text and its left-hand-parent.  The format is
135
996
            the SequenceMatcher.get_matching_blocks format.
136
997
        :param nostore_sha: Raise ExistingContent and do not add the lines to
137
998
            the versioned file if the digest of the lines matches this.
138
999
        :param random_id: If True a random id has been selected rather than
139
1000
            an id determined by some deterministic process such as a converter
140
1001
            from a foreign VCS. When True the backend may choose not to check
141
1002
            for uniqueness of the resulting key within the versioned file, so
142
1003
            this should only be done when the result is expected to be unique
143
1004
            anyway.
144
1005
        :param check_content: If True, the lines supplied are verified to be
145
1006
            bytestrings that are correctly formed lines.
146
1007
        :return: The text sha1, the number of bytes in the text, and an opaque
147
1008
                 representation of the inserted version which can be provided
148
1009
                 back to future add_lines calls in the parent_texts dictionary.
149
1010
        """
150
1011
        raise NotImplementedError(self.add_chunks)
151
1012
152
981
    def add_mpdiffs(self, records):
1013
    def add_mpdiffs(self, records):
153
982
        """Add mpdiffs to this VersionedFile.
1014
        """Add mpdiffs to this VersionedFile.
154
983
1015
155
@@ -1201,6 +1233,16 @@
156
1201
        self._mapper = mapper
1233
        self._mapper = mapper
157
1202
        self._is_locked = is_locked
1234
        self._is_locked = is_locked
158
1203
1235
159
1236
    def add_chunks(self, key, parents, chunk_iter, parent_texts=None,
160
1237
                   left_matching_blocks=None, nostore_sha=None,
161
1238
                   random_id=False):
162
1239
        # For now, just fallback to add_lines.
163
1240
        lines = osutils.chunks_to_lines(list(chunk_iter))
164
1241
        return self.add_lines(
165
1242
            key, parents, lines, parent_texts,
166
1243
            left_matching_blocks, nostore_sha, random_id,
167
1244
            check_content=True)
168
1245
169
1204
    def add_lines(self, key, parents, lines, parent_texts=None,
1246
    def add_lines(self, key, parents, lines, parent_texts=None,
170
1205
                  left_matching_blocks=None, nostore_sha=None, random_id=False,
1247
                  left_matching_blocks=None, nostore_sha=None, random_id=False,
171
1206
                  check_content=True):
1248
                  check_content=True):
172
1207
1249
173
=== modified file 'breezy/bzr/vf_repository.py'
174
--- breezy/bzr/vf_repository.py	2019-06-15 12:04:34 +0000
175
+++ breezy/bzr/vf_repository.py	2019-06-16 15:55:45 +0000
176
@@ -569,8 +569,9 @@
177
569
569
178
570
    def _add_file_to_weave(self, file_id, fileobj, parents, nostore_sha):
570
    def _add_file_to_weave(self, file_id, fileobj, parents, nostore_sha):
179
571
        parent_keys = tuple([(file_id, parent) for parent in parents])
571
        parent_keys = tuple([(file_id, parent) for parent in parents])
182
572
        return self.repository.texts.add_lines(
572
        return self.repository.texts.add_chunks(
183
573
            (file_id, self._new_revision_id), parent_keys, fileobj.readlines(),
573
            (file_id, self._new_revision_id), parent_keys,
184
574
            osutils.file_iterator(fileobj),
185
574
            nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
575
            nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
186
575
576
187
576
577
188
577
578
189
=== modified file 'breezy/tests/blackbox/test_big_file.py'
190
--- breezy/tests/blackbox/test_big_file.py	2019-06-15 21:45:04 +0000
191
+++ breezy/tests/blackbox/test_big_file.py	2019-06-16 15:55:45 +0000
192
@@ -21,6 +21,7 @@
193
21
memory.
21
memory.
194
22
"""
22
"""
195
23
23
196
24
import errno
197
24
import os
25
import os
198
25
import resource
26
import resource
199
26
27
200
@@ -36,8 +37,8 @@
201
36
BIG_FILE_SIZE = 1024 * 1024 * 500
37
BIG_FILE_SIZE = 1024 * 1024 * 500
202
37
BIG_FILE_CHUNK_SIZE = 1024 * 1024
38
BIG_FILE_CHUNK_SIZE = 1024 * 1024
203
38
39
206
39
RESOURCE = resource.RLIMIT_DATA
40
RESOURCE = resource.RLIMIT_AS
207
40
LIMIT = 1024 * 1024 * 200
41
LIMIT = 1024 * 1024 * 100
208
41
42
209
42
43
210
43
def make_big_file(path):
44
def make_big_file(path):
211
@@ -50,8 +51,12 @@
212
50
class TestAdd(tests.TestCaseWithTransport):
51
class TestAdd(tests.TestCaseWithTransport):
213
51
52
214
52
    def writeBigFile(self, path):
53
    def writeBigFile(self, path):
215
53
        make_big_file(path)
216
54
        self.addCleanup(os.unlink, path)
54
        self.addCleanup(os.unlink, path)
217
55
        try:
218
56
            make_big_file(path)
219
57
        except EnvironmentError as e:
220
58
            if e.errno == errno.ENOSPC:
221
59
                self.skipTest('not enough disk space for big file')
222
55
60
223
56
    def setUp(self):
61
    def setUp(self):
224
57
        super(TestAdd, self).setUp()
62
        super(TestAdd, self).setUp()
225
58
63
226
=== modified file 'breezy/tests/per_versionedfile.py'
227
--- breezy/tests/per_versionedfile.py	2018-11-11 04:08:32 +0000
228
+++ breezy/tests/per_versionedfile.py	2019-06-16 15:55:45 +0000
229
@@ -1538,6 +1538,26 @@
230
1538
        records.sort()
1538
        records.sort()
231
1539
        self.assertEqual([(key0, b'a\nb\n'), (key1, b'b\nc\n')], records)
1539
        self.assertEqual([(key0, b'a\nb\n'), (key1, b'b\nc\n')], records)
232
1540
1540
233
1541
    def test_add_chunks(self):
234
1542
        f = self.get_versionedfiles()
235
1543
        key0 = self.get_simple_key(b'r0')
236
1544
        key1 = self.get_simple_key(b'r1')
237
1545
        key2 = self.get_simple_key(b'r2')
238
1546
        keyf = self.get_simple_key(b'foo')
239
1547
        f.add_chunks(key0, [], [b'a', b'\nb\n'])
240
1548
        if self.graph:
241
1549
            f.add_chunks(key1, [key0], [b'b', b'\n', b'c\n'])
242
1550
        else:
243
1551
            f.add_chunks(key1, [], [b'b\n', b'c\n'])
244
1552
        keys = f.keys()
245
1553
        self.assertIn(key0, keys)
246
1554
        self.assertIn(key1, keys)
247
1555
        records = []
248
1556
        for record in f.get_record_stream([key0, key1], 'unordered', True):
249
1557
            records.append((record.key, record.get_bytes_as('fulltext')))
250
1558
        records.sort()
251
1559
        self.assertEqual([(key0, b'a\nb\n'), (key1, b'b\nc\n')], records)
252
1560
253
1541
    def test_annotate(self):
1561
    def test_annotate(self):
254
1542
        files = self.get_versionedfiles()
1562
        files = self.get_versionedfiles()
255
1543
        self.get_diamond_files(files)
1563
        self.get_diamond_files(files)
Status:	Merged
Approved by:	Jelmer Vernooij on 2019-06-16
Approved revision:	no longer in the source branch.
Merge reported by:	The Breezy Bot
Merged at revision:	not available
Proposed branch:	lp:~jelmer/brz/big-file-vf
Merge into:	lp:brz
Diff against target:	255 lines (+133/-15) 6 files modified breezy/bzr/groupcompress.py (+45/-10) breezy/bzr/knit.py (+15/-0) breezy/bzr/versionedfile.py (+42/-0) breezy/bzr/vf_repository.py (+3/-2) breezy/tests/blackbox/test_big_file.py (+8/-3) breezy/tests/per_versionedfile.py (+20/-0)
To merge this branch:	bzr merge lp:~jelmer/brz/big-file-vf
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
Martin Packman		2019-06-16	Approve on 2019-06-16
Review via email: mp+368882@code.launchpad.net