Merge lp:~gz/bzr/create_delta_index_api_change_633336 into lp:bzr
- create_delta_index_api_change_633336
- Merge into bzr.dev
Status: | Merged |
---|---|
Approved by: | Martin Packman |
Approved revision: | no longer in the source branch. |
Merged at revision: | 5731 |
Proposed branch: | lp:~gz/bzr/create_delta_index_api_change_633336 |
Merge into: | lp:bzr |
Diff against target: |
482 lines (+140/-70) 4 files modified
bzrlib/_groupcompress_pyx.pyx (+58/-20) bzrlib/delta.h (+35/-19) bzrlib/diff-delta.c (+43/-31) doc/en/release-notes/bzr-2.4.txt (+4/-0) |
To merge this branch: | bzr merge lp:~gz/bzr/create_delta_index_api_change_633336 |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
John A Meinel | Approve | ||
Review via email: mp+54130@code.launchpad.net |
Commit message
Change diff-delta.c code so MemoryError can be raised from OOM rather than AssertionError
Description of the change
Based on the review of the initial version of this fix for bug 633336, this branch gives up of trying to be minimal and makes a more invasive change to the api of the problematic functions. By taking an outparam the return value is available to return a particular code so it's possible to distinguish malloc failures from other types of errors.
For more background see the previous review:
<https:/
Various alternatives were considered along the way, this seems like the cleanest option. Importing Python.h and using the python api so exceptions could be raised directly would also work, but the code->exception object adapter function added isn't too painful.
While I was at it, I also changed the interface of creat_delta which would silently ignore malloc failures, though it'd be harder to hit in practice as it's dealing in smaller chunks. The related TODO about cutting down the double-malloc is not addressed.
Does this want a NEWS entry under internals, or is this sufficiently deep that no one really cares?
Martin Packman (gz) wrote : | # |
sent to pqm by email
Martin Packman (gz) wrote : | # |
PQM has some kind of issue with the C. Either GCC doesn't like me, or there's Pyrex/Cython version incompatibility generating bad code:
I: [pqm-amd64 chroot] Running command: "cd /home/pqm/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
bzrlib/
error: command 'gcc' failed with exit status 1
Any ideas John?
Martin Packman (gz) wrote : | # |
sent to pqm by email
Preview Diff
1 | === modified file 'bzrlib/_groupcompress_pyx.pyx' |
2 | --- bzrlib/_groupcompress_pyx.pyx 2010-08-02 17:08:29 +0000 |
3 | +++ bzrlib/_groupcompress_pyx.pyx 2011-03-22 16:01:27 +0000 |
4 | @@ -46,13 +46,26 @@ |
5 | unsigned long agg_offset |
6 | struct delta_index: |
7 | pass |
8 | - delta_index * create_delta_index(source_info *src, delta_index *old) nogil |
9 | - delta_index * create_delta_index_from_delta(source_info *delta, |
10 | - delta_index *old) nogil |
11 | + ctypedef enum delta_result: |
12 | + DELTA_OK |
13 | + DELTA_OUT_OF_MEMORY |
14 | + DELTA_INDEX_NEEDED |
15 | + DELTA_SOURCE_EMPTY |
16 | + DELTA_SOURCE_BAD |
17 | + DELTA_BUFFER_EMPTY |
18 | + DELTA_SIZE_TOO_BIG |
19 | + delta_result create_delta_index(source_info *src, |
20 | + delta_index *old, |
21 | + delta_index **fresh) nogil |
22 | + delta_result create_delta_index_from_delta(source_info *delta, |
23 | + delta_index *old, |
24 | + delta_index **fresh) nogil |
25 | void free_delta_index(delta_index *index) nogil |
26 | - void *create_delta(delta_index *indexes, |
27 | - void *buf, unsigned long bufsize, |
28 | - unsigned long *delta_size, unsigned long max_delta_size) nogil |
29 | + delta_result create_delta(delta_index *indexes, |
30 | + void *buf, unsigned long bufsize, |
31 | + unsigned long *delta_size, |
32 | + unsigned long max_delta_size, |
33 | + void **delta_data) nogil |
34 | unsigned long get_delta_hdr_size(unsigned char **datap, |
35 | unsigned char *top) nogil |
36 | unsigned long sizeof_delta_index(delta_index *index) |
37 | @@ -86,6 +99,20 @@ |
38 | return DeltaIndex(source) |
39 | |
40 | |
41 | +cdef object _translate_delta_failure(delta_result result): |
42 | + if result == DELTA_OUT_OF_MEMORY: |
43 | + return MemoryError("Delta function failed to allocate memory") |
44 | + elif result == DELTA_INDEX_NEEDED: |
45 | + return ValueError("Delta function requires delta_index param") |
46 | + elif result == DELTA_SOURCE_EMPTY: |
47 | + return ValueError("Delta function given empty source_info param") |
48 | + elif result == DELTA_SOURCE_BAD: |
49 | + return RuntimeError("Delta function given invalid source_info param") |
50 | + elif result == DELTA_BUFFER_EMPTY: |
51 | + return ValueError("Delta function given empty buffer params") |
52 | + return AssertionError("Unrecognised delta result code: %d" % result) |
53 | + |
54 | + |
55 | cdef class DeltaIndex: |
56 | |
57 | # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object |
58 | @@ -147,6 +174,7 @@ |
59 | cdef char *c_delta |
60 | cdef Py_ssize_t c_delta_size |
61 | cdef delta_index *index |
62 | + cdef delta_result res |
63 | cdef unsigned int source_location |
64 | cdef source_info *src |
65 | cdef unsigned int num_indexes |
66 | @@ -165,9 +193,11 @@ |
67 | src.size = c_delta_size |
68 | src.agg_offset = self._source_offset + unadded_bytes |
69 | with nogil: |
70 | - index = create_delta_index_from_delta(src, self._index) |
71 | + res = create_delta_index_from_delta(src, self._index, &index) |
72 | + if res != DELTA_OK: |
73 | + raise _translate_delta_failure(res) |
74 | self._source_offset = src.agg_offset + src.size |
75 | - if index != NULL: |
76 | + if index != self._index: |
77 | free_delta_index(self._index) |
78 | self._index = index |
79 | |
80 | @@ -181,6 +211,7 @@ |
81 | cdef char *c_source |
82 | cdef Py_ssize_t c_source_size |
83 | cdef delta_index *index |
84 | + cdef delta_result res |
85 | cdef unsigned int source_location |
86 | cdef source_info *src |
87 | cdef unsigned int num_indexes |
88 | @@ -206,22 +237,27 @@ |
89 | # We delay creating the index on the first insert |
90 | if source_location != 0: |
91 | with nogil: |
92 | - index = create_delta_index(src, self._index) |
93 | - if index != NULL: |
94 | + res = create_delta_index(src, self._index, &index) |
95 | + if res != DELTA_OK: |
96 | + raise _translate_delta_failure(res) |
97 | + if index != self._index: |
98 | free_delta_index(self._index) |
99 | self._index = index |
100 | |
101 | cdef _populate_first_index(self): |
102 | cdef delta_index *index |
103 | + cdef delta_result res |
104 | if len(self._sources) != 1 or self._index != NULL: |
105 | raise AssertionError('_populate_first_index should only be' |
106 | ' called when we have a single source and no index yet') |
107 | |
108 | - # We know that self._index is already NULL, so whatever |
109 | - # create_delta_index returns is fine |
110 | + # We know that self._index is already NULL, so create_delta_index |
111 | + # will always create a new index unless there's a malloc failure |
112 | with nogil: |
113 | - self._index = create_delta_index(&self._source_infos[0], NULL) |
114 | - assert self._index != NULL |
115 | + res = create_delta_index(&self._source_infos[0], NULL, &index) |
116 | + if res != DELTA_OK: |
117 | + raise _translate_delta_failure(res) |
118 | + self._index = index |
119 | |
120 | cdef _expand_sources(self): |
121 | raise RuntimeError('if we move self._source_infos, then we need to' |
122 | @@ -238,6 +274,7 @@ |
123 | cdef void * delta |
124 | cdef unsigned long delta_size |
125 | cdef unsigned long c_max_delta_size |
126 | + cdef delta_result res |
127 | |
128 | if self._index == NULL: |
129 | if len(self._sources) == 0: |
130 | @@ -256,13 +293,14 @@ |
131 | # allocate the bytes into the final string |
132 | c_max_delta_size = max_delta_size |
133 | with nogil: |
134 | - delta = create_delta(self._index, |
135 | - target, target_size, |
136 | - &delta_size, c_max_delta_size) |
137 | + res = create_delta(self._index, target, target_size, |
138 | + &delta_size, c_max_delta_size, &delta) |
139 | result = None |
140 | - if delta: |
141 | + if res == DELTA_OK: |
142 | result = PyString_FromStringAndSize(<char *>delta, delta_size) |
143 | free(delta) |
144 | + elif res != DELTA_SIZE_TOO_BIG: |
145 | + raise _translate_delta_failure(res) |
146 | return result |
147 | |
148 | |
149 | @@ -369,8 +407,8 @@ |
150 | # Copy instruction |
151 | data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size) |
152 | if (cp_off + cp_size < cp_size or |
153 | - cp_off + cp_size > source_size or |
154 | - cp_size > size): |
155 | + cp_off + cp_size > <unsigned int>source_size or |
156 | + cp_size > <unsigned int>size): |
157 | failed = 1 |
158 | break |
159 | memcpy(out, source + cp_off, cp_size) |
160 | |
161 | === modified file 'bzrlib/delta.h' |
162 | --- bzrlib/delta.h 2009-04-09 20:23:07 +0000 |
163 | +++ bzrlib/delta.h 2011-03-22 16:01:27 +0000 |
164 | @@ -21,33 +21,48 @@ |
165 | aggregate source */ |
166 | }; |
167 | |
168 | + |
169 | +/* result type for functions that have multiple failure modes */ |
170 | +typedef enum { |
171 | + DELTA_OK, /* Success */ |
172 | + DELTA_OUT_OF_MEMORY, /* Could not allocate required memory */ |
173 | + DELTA_INDEX_NEEDED, /* A delta_index must be passed */ |
174 | + DELTA_SOURCE_EMPTY, /* A source_info had no content */ |
175 | + DELTA_SOURCE_BAD, /* A source_info had invalid or corrupt content */ |
176 | + DELTA_BUFFER_EMPTY, /* A buffer pointer and size */ |
177 | + DELTA_SIZE_TOO_BIG, /* Delta data is larger than the max requested */ |
178 | +} delta_result; |
179 | + |
180 | + |
181 | /* |
182 | * create_delta_index: compute index data from given buffer |
183 | * |
184 | - * This returns a pointer to a struct delta_index that should be passed to |
185 | - * subsequent create_delta() calls, or to free_delta_index(). A NULL pointer |
186 | - * is returned on failure. The given buffer must not be freed nor altered |
187 | - * before free_delta_index() is called. The returned pointer must be freed |
188 | - * using free_delta_index(). |
189 | + * Returns a delta_result status, when DELTA_OK then *fresh is set to a struct |
190 | + * delta_index that should be passed to subsequent create_delta() calls, or to |
191 | + * free_delta_index(). Other values are a failure, and *fresh is unset. |
192 | + * The given buffer must not be freed nor altered before free_delta_index() is |
193 | + * called. The resultant struct must be freed using free_delta_index(). |
194 | */ |
195 | -extern struct delta_index * |
196 | +extern delta_result |
197 | create_delta_index(const struct source_info *src, |
198 | - struct delta_index *old); |
199 | + struct delta_index *old, |
200 | + struct delta_index **fresh); |
201 | |
202 | |
203 | /* |
204 | * create_delta_index_from_delta: compute index data from given buffer |
205 | * |
206 | - * This returns a pointer to a struct delta_index that should be passed to |
207 | - * subsequent create_delta() calls, or to free_delta_index(). A NULL pointer |
208 | - * is returned on failure. |
209 | + * Returns a delta_result status, when DELTA_OK then *fresh is set to a struct |
210 | + * delta_index that should be passed to subsequent create_delta() calls, or to |
211 | + * free_delta_index(). Other values are a failure, and *fresh is unset. |
212 | * The bytes must be in the form of a delta structure, as generated by |
213 | * create_delta(). The generated index will only index the insert bytes, and |
214 | * not any of the control structures. |
215 | */ |
216 | -extern struct delta_index * |
217 | +extern delta_result |
218 | create_delta_index_from_delta(const struct source_info *delta, |
219 | - struct delta_index *old); |
220 | + struct delta_index *old, |
221 | + struct delta_index **fresh); |
222 | /* |
223 | * free_delta_index: free the index created by create_delta_index() |
224 | * |
225 | @@ -67,15 +82,16 @@ |
226 | * |
227 | * This function may be called multiple times with different buffers using |
228 | * the same delta_index pointer. If max_delta_size is non-zero and the |
229 | - * resulting delta is to be larger than max_delta_size then NULL is returned. |
230 | - * On success, a non-NULL pointer to the buffer with the delta data is |
231 | - * returned and *delta_size is updated with its size. The returned buffer |
232 | - * must be freed by the caller. |
233 | + * resulting delta is to be larger than max_delta_size then DELTA_SIZE_TOO_BIG |
234 | + * is returned. Otherwise on success, DELTA_OK is returned and *delta_data is |
235 | + * set to a new buffer with the delta data and *delta_size is updated with its |
236 | + * size. That buffer must be freed by the caller. |
237 | */ |
238 | -extern void * |
239 | +extern delta_result |
240 | create_delta(const struct delta_index *index, |
241 | - const void *buf, unsigned long bufsize, |
242 | - unsigned long *delta_size, unsigned long max_delta_size); |
243 | + const void *buf, unsigned long bufsize, |
244 | + unsigned long *delta_size, unsigned long max_delta_size, |
245 | + void **delta_data); |
246 | |
247 | /* the smallest possible delta size is 3 bytes |
248 | * Target size, Copy command, Copy length |
249 | |
250 | === modified file 'bzrlib/diff-delta.c' |
251 | --- bzrlib/diff-delta.c 2010-08-02 16:35:11 +0000 |
252 | +++ bzrlib/diff-delta.c 2011-03-22 16:01:27 +0000 |
253 | @@ -4,7 +4,7 @@ |
254 | * This code was greatly inspired by parts of LibXDiff from Davide Libenzi |
255 | * http://www.xmailserver.org/xdiff-lib.html |
256 | * |
257 | - * Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007 |
258 | + * Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007 |
259 | * Adapted for Bazaar by John Arbash Meinel <john@arbash-meinel.com> (C) 2009 |
260 | * |
261 | * This program is free software; you can redistribute it and/or modify |
262 | @@ -280,8 +280,11 @@ |
263 | if (fit_in_old) { |
264 | // fprintf(stderr, "Fit all %d entries into old index\n", |
265 | // copied_count); |
266 | - /* No need to allocate a new buffer */ |
267 | - return NULL; |
268 | + /* |
269 | + * No need to allocate a new buffer, but return old_index ptr so |
270 | + * callers can distinguish this from an OOM failure. |
271 | + */ |
272 | + return old_index; |
273 | } else { |
274 | // fprintf(stderr, "Fit only %d entries into old index," |
275 | // " reallocating\n", copied_count); |
276 | @@ -370,9 +373,10 @@ |
277 | } |
278 | |
279 | |
280 | -struct delta_index * |
281 | +delta_result |
282 | create_delta_index(const struct source_info *src, |
283 | - struct delta_index *old) |
284 | + struct delta_index *old, |
285 | + struct delta_index **fresh) |
286 | { |
287 | unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count; |
288 | unsigned int total_num_entries; |
289 | @@ -383,7 +387,7 @@ |
290 | unsigned long memsize; |
291 | |
292 | if (!src->buf || !src->size) |
293 | - return NULL; |
294 | + return DELTA_SOURCE_EMPTY; |
295 | buffer = src->buf; |
296 | |
297 | /* Determine index hash size. Note that indexing skips the |
298 | @@ -408,7 +412,7 @@ |
299 | sizeof(*entry) * total_num_entries; |
300 | mem = malloc(memsize); |
301 | if (!mem) |
302 | - return NULL; |
303 | + return DELTA_OUT_OF_MEMORY; |
304 | hash = mem; |
305 | mem = hash + hsize; |
306 | entry = mem; |
307 | @@ -419,7 +423,7 @@ |
308 | hash_count = calloc(hsize, sizeof(*hash_count)); |
309 | if (!hash_count) { |
310 | free(hash); |
311 | - return NULL; |
312 | + return DELTA_OUT_OF_MEMORY; |
313 | } |
314 | |
315 | /* then populate the index for the new data */ |
316 | @@ -450,16 +454,15 @@ |
317 | total_num_entries = limit_hash_buckets(hash, hash_count, hsize, |
318 | total_num_entries); |
319 | free(hash_count); |
320 | - if (old) { |
321 | - old->last_src = src; |
322 | - } |
323 | index = pack_delta_index(hash, hsize, total_num_entries, old); |
324 | free(hash); |
325 | + /* pack_delta_index only returns NULL on malloc failure */ |
326 | if (!index) { |
327 | - return NULL; |
328 | + return DELTA_OUT_OF_MEMORY; |
329 | } |
330 | index->last_src = src; |
331 | - return index; |
332 | + *fresh = index; |
333 | + return DELTA_OK; |
334 | } |
335 | |
336 | /* Take some entries, and put them into a custom hash. |
337 | @@ -679,9 +682,10 @@ |
338 | } |
339 | } |
340 | |
341 | -struct delta_index * |
342 | +delta_result |
343 | create_delta_index_from_delta(const struct source_info *src, |
344 | - struct delta_index *old_index) |
345 | + struct delta_index *old_index, |
346 | + struct delta_index **fresh) |
347 | { |
348 | unsigned int i, num_entries, max_num_entries, prev_val, num_inserted; |
349 | unsigned int hash_offset; |
350 | @@ -690,8 +694,10 @@ |
351 | struct delta_index *new_index; |
352 | struct index_entry *entry, *entries; |
353 | |
354 | + if (!old_index) |
355 | + return DELTA_INDEX_NEEDED; |
356 | if (!src->buf || !src->size) |
357 | - return NULL; |
358 | + return DELTA_SOURCE_EMPTY; |
359 | buffer = src->buf; |
360 | top = buffer + src->size; |
361 | |
362 | @@ -707,7 +713,7 @@ |
363 | /* allocate an array to hold whatever entries we find */ |
364 | entries = malloc(sizeof(*entry) * max_num_entries); |
365 | if (!entries) /* malloc failure */ |
366 | - return NULL; |
367 | + return DELTA_OUT_OF_MEMORY; |
368 | |
369 | /* then populate the index for the new data */ |
370 | prev_val = ~0; |
371 | @@ -774,16 +780,16 @@ |
372 | } |
373 | } |
374 | if (data != top) { |
375 | - /* Something was wrong with this delta */ |
376 | + /* The source_info data passed was corrupted or otherwise invalid */ |
377 | free(entries); |
378 | - return NULL; |
379 | + return DELTA_SOURCE_BAD; |
380 | } |
381 | if (num_entries == 0) { |
382 | /** Nothing to index **/ |
383 | free(entries); |
384 | - return NULL; |
385 | + *fresh = old_index; |
386 | + return DELTA_OK; |
387 | } |
388 | - assert(old_index != NULL); |
389 | old_index->last_src = src; |
390 | /* See if we can fill in these values into the holes in the array */ |
391 | entry = entries; |
392 | @@ -841,11 +847,15 @@ |
393 | new_index = create_index_from_old_and_new_entries(old_index, |
394 | entry, num_entries); |
395 | } else { |
396 | - new_index = NULL; |
397 | + new_index = old_index; |
398 | // fprintf(stderr, "inserted %d without resizing\n", num_inserted); |
399 | } |
400 | free(entries); |
401 | - return new_index; |
402 | + /* create_index_from_old_and_new_entries returns NULL on malloc failure */ |
403 | + if (!new_index) |
404 | + return DELTA_OUT_OF_MEMORY; |
405 | + *fresh = new_index; |
406 | + return DELTA_OK; |
407 | } |
408 | |
409 | void free_delta_index(struct delta_index *index) |
410 | @@ -868,10 +878,11 @@ |
411 | */ |
412 | #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7) |
413 | |
414 | -void * |
415 | +delta_result |
416 | create_delta(const struct delta_index *index, |
417 | const void *trg_buf, unsigned long trg_size, |
418 | - unsigned long *delta_size, unsigned long max_size) |
419 | + unsigned long *delta_size, unsigned long max_size, |
420 | + void **delta_data) |
421 | { |
422 | unsigned int i, outpos, outsize, moff, val; |
423 | int msize; |
424 | @@ -882,9 +893,9 @@ |
425 | unsigned long source_size; |
426 | |
427 | if (!trg_buf || !trg_size) |
428 | - return NULL; |
429 | + return DELTA_BUFFER_EMPTY; |
430 | if (index == NULL) |
431 | - return NULL; |
432 | + return DELTA_INDEX_NEEDED; |
433 | |
434 | outpos = 0; |
435 | outsize = 8192; |
436 | @@ -892,7 +903,7 @@ |
437 | outsize = max_size + MAX_OP_SIZE + 1; |
438 | out = malloc(outsize); |
439 | if (!out) |
440 | - return NULL; |
441 | + return DELTA_OUT_OF_MEMORY; |
442 | |
443 | source_size = index->last_src->size + index->last_src->agg_offset; |
444 | |
445 | @@ -1071,7 +1082,7 @@ |
446 | out = realloc(out, outsize); |
447 | if (!out) { |
448 | free(tmp); |
449 | - return NULL; |
450 | + return DELTA_OUT_OF_MEMORY; |
451 | } |
452 | } |
453 | } |
454 | @@ -1081,11 +1092,12 @@ |
455 | |
456 | if (max_size && outpos > max_size) { |
457 | free(out); |
458 | - return NULL; |
459 | + return DELTA_SIZE_TOO_BIG; |
460 | } |
461 | |
462 | *delta_size = outpos; |
463 | - return out; |
464 | + *delta_data = out; |
465 | + return DELTA_OK; |
466 | } |
467 | |
468 | /* vim: et ts=4 sw=4 sts=4 |
469 | |
470 | === modified file 'doc/en/release-notes/bzr-2.4.txt' |
471 | --- doc/en/release-notes/bzr-2.4.txt 2011-03-22 12:29:53 +0000 |
472 | +++ doc/en/release-notes/bzr-2.4.txt 2011-03-22 16:01:27 +0000 |
473 | @@ -291,6 +291,10 @@ |
474 | by cacthing them so they can be re-raised in the controlling thread. It's |
475 | available in the ``bzrlib.cethread`` module. (Vincent Ladeuil) |
476 | |
477 | +* Correctly propogate malloc failures from diff-delta.c code as MemoryError |
478 | + so OOM conditions during groupcompress are clearly reported. This entailed a |
479 | + change to several function signatures. (Martin [gz], #633336) |
480 | + |
481 | * ``HookPoint.lazy_hook`` and ``Hooks.install_named_lazy_hook`` can install |
482 | hooks for which the callable is loaded lazily. (Jelmer Vernooij) |
483 |
So it certainly should have a NEWS entry. I'm happy with the change overall.