Merge lp:~gz/brz/groupscompress_pyx_tidy into lp:brz

Proposed by Martin Packman
Status: Merged
Approved by: Jelmer Vernooij
Approved revision: no longer in the source branch.
Merge reported by: The Breezy Bot
Merged at revision: not available
Proposed branch: lp:~gz/brz/groupscompress_pyx_tidy
Merge into: lp:brz
Diff against target: 309 lines (+57/-82)
3 files modified
breezy/bzr/_groupcompress_pyx.pyx (+45/-76)
breezy/bzr/diff-delta.c (+3/-6)
breezy/tests/test__groupcompress.py (+9/-0)
To merge this branch: bzr merge lp:~gz/brz/groupscompress_pyx_tidy
Reviewer Review Type Date Requested Status
Jelmer Vernooij Approve
Review via email: mp+358903@code.launchpad.net

Commit message

Tidy up _groupcompress_pyx

Description of the change

Use cimport over cdef extern for common libraries.

Remove 'safe' memory wrappers and use pymalloc for main allocation.

Change code that was avoiding issues with old Pyrex versions.

Remove unused diff-delta.c variables.

Add test for __sizeof__ implementation.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

Looks reasonable.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'breezy/bzr/_groupcompress_pyx.pyx'
2--- breezy/bzr/_groupcompress_pyx.pyx 2018-08-18 19:43:40 +0000
3+++ breezy/bzr/_groupcompress_pyx.pyx 2018-11-16 13:31:49 +0000
4@@ -22,22 +22,26 @@
5 cdef extern from "python-compat.h":
6 pass
7
8-
9-cdef extern from "Python.h":
10- ctypedef struct PyObject:
11- pass
12- int PyBytes_CheckExact(object)
13- char * PyBytes_AS_STRING(object)
14- Py_ssize_t PyBytes_GET_SIZE(object)
15- object PyBytes_FromStringAndSize(char *, Py_ssize_t)
16-
17-
18-cdef extern from *:
19- ctypedef unsigned long size_t
20- void * malloc(size_t) nogil
21- void * realloc(void *, size_t) nogil
22- void free(void *) nogil
23- void memcpy(void *, void *, size_t) nogil
24+from libc.stdlib cimport (
25+ free,
26+ )
27+from libc.string cimport (
28+ memcpy,
29+ )
30+
31+from cpython.bytes cimport (
32+ PyBytes_AS_STRING,
33+ PyBytes_CheckExact,
34+ PyBytes_FromStringAndSize,
35+ PyBytes_GET_SIZE,
36+ )
37+from cpython.object cimport (
38+ PyObject,
39+ )
40+from cpython.mem cimport (
41+ PyMem_Free,
42+ PyMem_Malloc,
43+ )
44
45
46 cdef extern from "delta.h":
47@@ -78,29 +82,6 @@
48 unsigned int rabin_hash (unsigned char *data)
49
50
51-cdef void *safe_malloc(size_t count) except NULL:
52- cdef void *result
53- result = malloc(count)
54- if result == NULL:
55- raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
56- return result
57-
58-
59-cdef void *safe_realloc(void * old, size_t count) except NULL:
60- cdef void *result
61- result = realloc(old, count)
62- if result == NULL:
63- raise MemoryError('Failed to reallocate to %d bytes of memory'
64- % (count,))
65- return result
66-
67-
68-cdef int safe_free(void **val) except -1:
69- assert val != NULL
70- if val[0] != NULL:
71- free(val[0])
72- val[0] = NULL
73-
74 def make_delta_index(source):
75 return DeltaIndex(source)
76
77@@ -130,10 +111,7 @@
78
79 cdef class DeltaIndex:
80
81- # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
82- # isn't performance critical
83- # cdef readonly list _sources
84- cdef readonly object _sources
85+ cdef readonly list _sources
86 cdef source_info *_source_infos
87 cdef delta_index *_index
88 cdef public unsigned long _source_offset
89@@ -144,8 +122,10 @@
90 self._sources = []
91 self._index = NULL
92 self._max_num_sources = 65000
93- self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
94- * self._max_num_sources)
95+ self._source_infos = <source_info *>PyMem_Malloc(
96+ sizeof(source_info) * self._max_num_sources)
97+ if self._source_infos == NULL:
98+ raise MemoryError('failed to allocate memory for DeltaIndex')
99 self._source_offset = 0
100 self._max_bytes_to_index = 0
101 if max_bytes_to_index is not None:
102@@ -157,18 +137,9 @@
103 def __sizeof__(self):
104 # We want to track the _source_infos allocations, but the referenced
105 # void* are actually tracked in _sources itself.
106- # XXX: Cython is capable of doing sizeof(class) and returning the size
107- # of the underlying struct. Pyrex (<= 0.9.9) refuses, so we need
108- # to do it manually. *sigh* Note that we might get it wrong
109- # because of alignment issues.
110- cdef Py_ssize_t size
111- # PyObject start, vtable *, 3 object pointers, 2 C ints
112- size = ((sizeof(PyObject) + sizeof(void*) + 3*sizeof(PyObject*)
113- + sizeof(unsigned long)
114- + sizeof(unsigned int))
115+ return (sizeof(DeltaIndex)
116 + (sizeof(source_info) * self._max_num_sources)
117 + sizeof_delta_index(self._index))
118- return size
119
120 def __repr__(self):
121 return '%s(%d, %d)' % (self.__class__.__name__,
122@@ -178,7 +149,7 @@
123 if self._index != NULL:
124 free_delta_index(self._index)
125 self._index = NULL
126- safe_free(<void **>&self._source_infos)
127+ PyMem_Free(self._source_infos)
128
129 def _has_index(self):
130 return (self._index != NULL)
131@@ -326,10 +297,6 @@
132 cdef _expand_sources(self):
133 raise RuntimeError('if we move self._source_infos, then we need to'
134 ' change all of the index pointers as well.')
135- self._max_num_sources = self._max_num_sources * 2
136- self._source_infos = <source_info *>safe_realloc(self._source_infos,
137- sizeof(source_info)
138- * self._max_num_sources)
139
140 def make_delta(self, target_bytes, max_delta_size=0):
141 """Create a delta from the current source to the target bytes."""
142@@ -399,7 +366,7 @@
143 return _apply_delta(source, source_size, delta, delta_size)
144
145
146-cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
147+cdef unsigned char *_decode_copy_instruction(unsigned char *data,
148 unsigned char cmd, unsigned int *offset,
149 unsigned int *length) nogil: # cannot_raise
150 """Decode a copy instruction from the next few bytes.
151@@ -417,38 +384,41 @@
152 size = 0
153 count = 0
154 if (cmd & 0x01):
155- off = bytes[count]
156+ off = data[count]
157 count = count + 1
158 if (cmd & 0x02):
159- off = off | (bytes[count] << 8)
160+ off = off | (data[count] << 8)
161 count = count + 1
162 if (cmd & 0x04):
163- off = off | (bytes[count] << 16)
164+ off = off | (data[count] << 16)
165 count = count + 1
166 if (cmd & 0x08):
167- off = off | (bytes[count] << 24)
168+ off = off | (data[count] << 24)
169 count = count + 1
170 if (cmd & 0x10):
171- size = bytes[count]
172+ size = data[count]
173 count = count + 1
174 if (cmd & 0x20):
175- size = size | (bytes[count] << 8)
176+ size = size | (data[count] << 8)
177 count = count + 1
178 if (cmd & 0x40):
179- size = size | (bytes[count] << 16)
180+ size = size | (data[count] << 16)
181 count = count + 1
182 if (size == 0):
183 size = 0x10000
184 offset[0] = off
185 length[0] = size
186- return bytes + count
187+ return data + count
188
189
190 cdef object _apply_delta(char *source, Py_ssize_t source_size,
191 char *delta, Py_ssize_t delta_size):
192 """common functionality between apply_delta and apply_delta_to_source."""
193- cdef unsigned char *data, *top
194- cdef unsigned char *dst_buf, *out, cmd
195+ cdef unsigned char *data
196+ cdef unsigned char *top
197+ cdef unsigned char *dst_buf
198+ cdef unsigned char *out
199+ cdef unsigned char cmd
200 cdef Py_ssize_t size
201 cdef unsigned int cp_off, cp_size
202 cdef int failed
203@@ -510,7 +480,6 @@
204 raise RuntimeError('Did not extract the number of bytes we expected'
205 ' we were left with %d bytes in "size", and top - data = %d'
206 % (size, <int>(top - data)))
207- return None
208
209 # *dst_size = out - dst_buf;
210 if (out - dst_buf) != PyBytes_GET_SIZE(result):
211@@ -567,7 +536,7 @@
212 return PyBytes_FromStringAndSize(<char *>c_bytes, count)
213
214
215-def decode_base128_int(bytes):
216+def decode_base128_int(data):
217 """Decode an integer from a 7-bit lsb encoding."""
218 cdef int offset
219 cdef int val
220@@ -579,11 +548,11 @@
221 offset = 0
222 val = 0
223 shift = 0
224- if not PyBytes_CheckExact(bytes):
225+ if not PyBytes_CheckExact(data):
226 raise TypeError('bytes is not a string')
227- c_bytes = <unsigned char*>PyBytes_AS_STRING(bytes)
228+ c_bytes = <unsigned char*>PyBytes_AS_STRING(data)
229 # We take off 1, because we have to be able to decode the non-expanded byte
230- num_low_bytes = PyBytes_GET_SIZE(bytes) - 1
231+ num_low_bytes = PyBytes_GET_SIZE(data) - 1
232 while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
233 val = val | ((c_bytes[offset] & 0x7F) << shift)
234 shift = shift + 7
235
236=== modified file 'breezy/bzr/diff-delta.c'
237--- breezy/bzr/diff-delta.c 2017-06-08 23:30:31 +0000
238+++ breezy/bzr/diff-delta.c 2018-11-16 13:31:49 +0000
239@@ -220,7 +220,7 @@
240 unsigned int i, j, hmask, memsize, fit_in_old, copied_count;
241 struct unpacked_index_entry *entry;
242 struct delta_index *index;
243- struct index_entry *packed_entry, **packed_hash, *old_entry, *copy_from;
244+ struct index_entry *packed_entry, **packed_hash, *old_entry;
245 struct index_entry null_entry = {0};
246 void *mem;
247
248@@ -334,7 +334,6 @@
249 * old_index->hash_mask? Would it make any real difference?
250 */
251 j = i & old_index->hash_mask;
252- copy_from = old_index->hash[j];
253 for (old_entry = old_index->hash[j];
254 old_entry < old_index->hash[j + 1] && old_entry->ptr != NULL;
255 old_entry++) {
256@@ -905,7 +904,6 @@
257 int inscnt;
258 const unsigned char *ref_data, *ref_top, *data, *top;
259 unsigned char *out;
260- unsigned long source_size;
261
262 if (!trg_buf || !trg_size)
263 return DELTA_BUFFER_EMPTY;
264@@ -920,8 +918,6 @@
265 if (!out)
266 return DELTA_OUT_OF_MEMORY;
267
268- source_size = index->last_src->size + index->last_src->agg_offset;
269-
270 /* store target buffer size */
271 i = trg_size;
272 while (i >= 0x80) {
273@@ -1053,7 +1049,8 @@
274 */
275 assert(moff < msource->size);
276 moff += msource->agg_offset;
277- assert(moff + msize <= source_size);
278+ assert(moff + msize
279+ <= index->last_src->size + index->last_src->agg_offset);
280 if (moff & 0x000000ff)
281 out[outpos++] = moff >> 0, i |= 0x01;
282 if (moff & 0x0000ff00)
283
284=== modified file 'breezy/tests/test__groupcompress.py'
285--- breezy/tests/test__groupcompress.py 2018-08-18 19:43:40 +0000
286+++ breezy/tests/test__groupcompress.py 2018-11-16 13:31:49 +0000
287@@ -16,6 +16,8 @@
288
289 """Tests for the python and pyrex extensions of groupcompress"""
290
291+import sys
292+
293 from .. import (
294 tests,
295 )
296@@ -273,6 +275,13 @@
297 di = self._gc_module.DeltaIndex(b'test text\n')
298 self.assertEqual('DeltaIndex(1, 10)', repr(di))
299
300+ def test_sizeof(self):
301+ di = self._gc_module.DeltaIndex()
302+ # Exact value will depend on platform but should include sources
303+ # source_info is a pointer and two longs so at least 12 bytes
304+ lower_bound = di._max_num_sources * 12
305+ self.assertGreater(sys.getsizeof(di), lower_bound)
306+
307 def test__dump_no_index(self):
308 di = self._gc_module.DeltaIndex()
309 self.assertEqual(None, di._dump_index())

Subscribers

People subscribed via source and target branches