1
=== modified file 'NEWS'
2
--- NEWS	2009-06-04 21:05:44 +0000
3
+++ NEWS	2009-06-05 02:35:36 +0000
4
@@ -31,6 +31,12 @@
5
31
  bugs with stacking and non default formats.)
31
  bugs with stacking and non default formats.)
6
32
  (John Arbash Meinel, #373455)
32
  (John Arbash Meinel, #373455)
7
33
33
8
34
* ``--development6-rich-root`` delays generating a delta index for the
9
35
  first object inserted into a group. This has a beneficial impact on
10
36
  ``bzr commit`` since each committed texts goes to its own group. For
11
37
  committing a 90MB file, it drops peak memory by about 200MB, and speeds
12
38
  up commit from 7s => 4s. (John Arbash Meinel)
13
39
14
34
* Numerous operations are now faster for huge projects, i.e. those
40
* Numerous operations are now faster for huge projects, i.e. those
15
35
  with a large number of files and/or a large number of revisions,
41
  with a large number of files and/or a large number of revisions,
16
36
  particularly when the latest development format is used. These
42
  particularly when the latest development format is used. These
17
37
43
18
=== modified file 'bzrlib/_groupcompress_pyx.pyx'
19
--- bzrlib/_groupcompress_pyx.pyx	2009-04-09 20:23:07 +0000
20
+++ bzrlib/_groupcompress_pyx.pyx	2009-06-05 02:35:36 +0000
21
@@ -118,6 +118,9 @@
22
118
            self._index = NULL
118
            self._index = NULL
23
119
        safe_free(<void **>&self._source_infos)
119
        safe_free(<void **>&self._source_infos)
24
120
120
25
121
    def _has_index(self):
26
122
        return (self._index != NULL)
27
123
28
121
    def add_delta_source(self, delta, unadded_bytes):
124
    def add_delta_source(self, delta, unadded_bytes):
29
122
        """Add a new delta to the source texts.
125
        """Add a new delta to the source texts.
30
123
126
31
@@ -171,6 +174,9 @@
32
171
        source_location = len(self._sources)
174
        source_location = len(self._sources)
33
172
        if source_location >= self._max_num_sources:
175
        if source_location >= self._max_num_sources:
34
173
            self._expand_sources()
176
            self._expand_sources()
35
177
        if source_location != 0 and self._index == NULL:
36
178
            # We were lazy about populating the index, create it now
37
179
            self._populate_first_index()
38
174
        self._sources.append(source)
180
        self._sources.append(source)
39
175
        c_source = PyString_AS_STRING(source)
181
        c_source = PyString_AS_STRING(source)
40
176
        c_source_size = PyString_GET_SIZE(source)
182
        c_source_size = PyString_GET_SIZE(source)
41
@@ -179,11 +185,24 @@
42
179
        src.size = c_source_size
185
        src.size = c_source_size
43
180
186
44
181
        src.agg_offset = self._source_offset + unadded_bytes
187
        src.agg_offset = self._source_offset + unadded_bytes
45
182
        index = create_delta_index(src, self._index)
46
183
        self._source_offset = src.agg_offset + src.size
188
        self._source_offset = src.agg_offset + src.size
50
184
        if index != NULL:
189
        # We delay creating the index on the first insert
51
185
            free_delta_index(self._index)
190
        if source_location != 0:
52
186
            self._index = index
191
            index = create_delta_index(src, self._index)
53
192
            if index != NULL:
54
193
                free_delta_index(self._index)
55
194
                self._index = index
56
195
57
196
    cdef _populate_first_index(self):
58
197
        cdef delta_index *index
59
198
        if len(self._sources) != 1 or self._index != NULL:
60
199
            raise AssertionError('_populate_first_index should only be'
61
200
                ' called when we have a single source and no index yet')
62
201
63
202
        # We know that self._index is already NULL, so whatever
64
203
        # create_delta_index returns is fine
65
204
        self._index = create_delta_index(&self._source_infos[0], NULL)
66
205
        assert self._index != NULL
67
187
206
68
188
    cdef _expand_sources(self):
207
    cdef _expand_sources(self):
69
189
        raise RuntimeError('if we move self._source_infos, then we need to'
208
        raise RuntimeError('if we move self._source_infos, then we need to'
70
@@ -201,7 +220,10 @@
71
201
        cdef unsigned long delta_size
220
        cdef unsigned long delta_size
72
202
221
73
203
        if self._index == NULL:
222
        if self._index == NULL:
75
204
            return None
223
            if len(self._sources) == 0:
76
224
                return None
77
225
            # We were just lazy about generating the index
78
226
            self._populate_first_index()
79
205
227
80
206
        if not PyString_CheckExact(target_bytes):
228
        if not PyString_CheckExact(target_bytes):
81
207
            raise TypeError('target is not a str')
229
            raise TypeError('target is not a str')
82
208
230
83
=== modified file 'bzrlib/groupcompress.py'
84
--- bzrlib/groupcompress.py	2009-05-29 10:25:37 +0000
85
+++ bzrlib/groupcompress.py	2009-06-05 02:35:36 +0000
86
@@ -746,6 +746,14 @@
87
746
746
88
747
        After calling this, the compressor should no longer be used
747
        After calling this, the compressor should no longer be used
89
748
        """
748
        """
90
749
        # TODO: this causes us to 'bloat' to 2x the size of content in the
91
750
        #       group. This has an impact for 'commit' of large objects.
92
751
        #       One possibility is to use self._content_chunks, and be lazy and
93
752
        #       only fill out self._content as a full string when we actually
94
753
        #       need it. That would at least drop the peak memory consumption
95
754
        #       for 'commit' down to ~1x the size of the largest file, at a
96
755
        #       cost of increased complexity within this code. 2x is still <<
97
756
        #       3x the size of the largest file, so we are doing ok.
98
749
        content = ''.join(self.chunks)
757
        content = ''.join(self.chunks)
99
750
        self.chunks = None
758
        self.chunks = None
100
751
        self._delta_index = None
759
        self._delta_index = None
101
752
760
102
=== modified file 'bzrlib/tests/test__groupcompress.py'
103
--- bzrlib/tests/test__groupcompress.py	2009-04-29 05:53:21 +0000
104
+++ bzrlib/tests/test__groupcompress.py	2009-06-05 02:35:36 +0000
105
@@ -272,6 +272,25 @@
106
272
        di = self._gc_module.DeltaIndex('test text\n')
272
        di = self._gc_module.DeltaIndex('test text\n')
107
273
        self.assertEqual('DeltaIndex(1, 10)', repr(di))
273
        self.assertEqual('DeltaIndex(1, 10)', repr(di))
108
274
274
109
275
    def test_first_add_source_doesnt_index_until_make_delta(self):
110
276
        di = self._gc_module.DeltaIndex()
111
277
        self.assertFalse(di._has_index())
112
278
        di.add_source(_text1, 0)
113
279
        self.assertFalse(di._has_index())
114
280
        # However, asking to make a delta will trigger the index to be
115
281
        # generated, and will generate a proper delta
116
282
        delta = di.make_delta(_text2)
117
283
        self.assertTrue(di._has_index())
118
284
        self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)
119
285
120
286
    def test_second_add_source_triggers_make_index(self):
121
287
        di = self._gc_module.DeltaIndex()
122
288
        self.assertFalse(di._has_index())
123
289
        di.add_source(_text1, 0)
124
290
        self.assertFalse(di._has_index())
125
291
        di.add_source(_text2, 0)
126
292
        self.assertTrue(di._has_index())
127
293
128
275
    def test_make_delta(self):
294
    def test_make_delta(self):
129
276
        di = self._gc_module.DeltaIndex(_text1)
295
        di = self._gc_module.DeltaIndex(_text1)
130
277
        delta = di.make_delta(_text2)
296
        delta = di.make_delta(_text2)
Status:	Merged
Merged at revision:	not available
Proposed branch:	lp:~jameinel/bzr/1.16-no-first-delta-index
Merge into:	lp:~bzr/bzr/trunk-old
Diff against target:	130 lines
To merge this branch:	bzr merge lp:~jameinel/bzr/1.16-no-first-delta-index
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
Andrew Bennetts		2009-06-02	Approve on 2009-06-05
Review via email: mp+6991@code.launchpad.net