Merge lp:~gz/brz/py3_simple_set into lp:brz
- py3_simple_set
- Merge into trunk
Proposed by
Martin Packman
Status: | Merged |
---|---|
Approved by: | Martin Packman |
Approved revision: | no longer in the source branch. |
Merge reported by: | The Breezy Bot |
Merged at revision: | not available |
Proposed branch: | lp:~gz/brz/py3_simple_set |
Merge into: | lp:brz |
Diff against target: |
337 lines (+64/-84) 3 files modified
breezy/_simple_set_pyx.pxd (+3/-5) breezy/_simple_set_pyx.pyx (+51/-66) breezy/tests/test__simple_set.py (+10/-13) |
To merge this branch: | bzr merge lp:~gz/brz/py3_simple_set |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Jelmer Vernooij | Approve | ||
Review via email: mp+326113@code.launchpad.net |
Commit message
Update _simple_set_pyx to more modern cython
Description of the change
So simple set did actually compile on Python 3 already, this branch mostly just tidies things up a bit and makes cython do some more work for us. Ideally we'd remove the manual memory block usage, but that turned out to be a pretty large change.
I wonder a bit if this is the correct optimisation level, but can put off questions like that for now. I diffed the generated C and this ends up much the same as before.
To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'breezy/_simple_set_pyx.pxd' | |||
2 | --- breezy/_simple_set_pyx.pxd 2010-02-17 17:11:16 +0000 | |||
3 | +++ breezy/_simple_set_pyx.pxd 2017-06-21 22:37:45 +0000 | |||
4 | @@ -23,9 +23,7 @@ | |||
5 | 23 | eg. SimpleSet.add(key) => saved_key and SimpleSet[key] => saved_key | 23 | eg. SimpleSet.add(key) => saved_key and SimpleSet[key] => saved_key |
6 | 24 | """ | 24 | """ |
7 | 25 | 25 | ||
11 | 26 | cdef extern from "Python.h": | 26 | from cpython.object cimport PyObject |
9 | 27 | ctypedef struct PyObject: | ||
10 | 28 | pass | ||
12 | 29 | 27 | ||
13 | 30 | 28 | ||
14 | 31 | cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]: | 29 | cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]: |
15 | @@ -74,8 +72,8 @@ | |||
16 | 74 | # so we manage it manually | 72 | # so we manage it manually |
17 | 75 | 73 | ||
18 | 76 | cdef PyObject *_get(self, object key) except? NULL | 74 | cdef PyObject *_get(self, object key) except? NULL |
21 | 77 | cdef object _add(self, key) | 75 | cpdef object add(self, key) |
22 | 78 | cdef int _discard(self, key) except -1 | 76 | cpdef bint discard(self, key) except -1 |
23 | 79 | cdef int _insert_clean(self, PyObject *key) except -1 | 77 | cdef int _insert_clean(self, PyObject *key) except -1 |
24 | 80 | cdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1 | 78 | cdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1 |
25 | 81 | 79 | ||
26 | 82 | 80 | ||
27 | === modified file 'breezy/_simple_set_pyx.pyx' | |||
28 | --- breezy/_simple_set_pyx.pyx 2017-06-10 00:09:13 +0000 | |||
29 | +++ breezy/_simple_set_pyx.pyx 2017-06-21 22:37:45 +0000 | |||
30 | @@ -18,31 +18,25 @@ | |||
31 | 18 | 18 | ||
32 | 19 | from __future__ import absolute_import | 19 | from __future__ import absolute_import |
33 | 20 | 20 | ||
59 | 21 | cdef extern from "python-compat.h": | 21 | from cpython.object cimport ( |
60 | 22 | pass | 22 | hashfunc, |
61 | 23 | 23 | Py_EQ, | |
62 | 24 | cdef extern from "Python.h": | 24 | PyObject_Hash, |
63 | 25 | ctypedef unsigned long size_t | 25 | PyTypeObject, |
64 | 26 | ctypedef long (*hashfunc)(PyObject*) except -1 | 26 | Py_TYPE, |
65 | 27 | ctypedef object (*richcmpfunc)(PyObject *, PyObject *, int) | 27 | richcmpfunc, |
66 | 28 | ctypedef int (*visitproc)(PyObject *, void *) | 28 | traverseproc, |
67 | 29 | ctypedef int (*traverseproc)(PyObject *, visitproc, void *) | 29 | visitproc, |
68 | 30 | int Py_EQ | 30 | ) |
69 | 31 | void Py_INCREF(PyObject *) | 31 | from cpython.mem cimport ( |
70 | 32 | void Py_DECREF(PyObject *) | 32 | PyMem_Malloc, |
71 | 33 | ctypedef struct PyTypeObject: | 33 | PyMem_Free, |
72 | 34 | hashfunc tp_hash | 34 | ) |
73 | 35 | richcmpfunc tp_richcompare | 35 | from cpython.ref cimport ( |
74 | 36 | traverseproc tp_traverse | 36 | Py_INCREF, |
75 | 37 | 37 | Py_DECREF, | |
76 | 38 | PyTypeObject *Py_TYPE(PyObject *) | 38 | ) |
77 | 39 | # Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and | 39 | from libc.string cimport memset |
53 | 40 | # thus silently truncates to 32-bits on 64-bit machines. | ||
54 | 41 | long PyObject_Hash(PyObject *) except -1 | ||
55 | 42 | |||
56 | 43 | void *PyMem_Malloc(size_t nbytes) | ||
57 | 44 | void PyMem_Free(void *) | ||
58 | 45 | void memset(void *, int, size_t) | ||
78 | 46 | 40 | ||
79 | 47 | 41 | ||
80 | 48 | # Dummy is an object used to mark nodes that have been deleted. Since | 42 | # Dummy is an object used to mark nodes that have been deleted. Since |
81 | @@ -61,11 +55,9 @@ | |||
82 | 61 | _NotImplemented = NotImplemented | 55 | _NotImplemented = NotImplemented |
83 | 62 | 56 | ||
84 | 63 | 57 | ||
86 | 64 | cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1: | 58 | cdef int _is_equal(object this, long this_hash, object other) except -1: |
87 | 65 | cdef long other_hash | 59 | cdef long other_hash |
88 | 66 | 60 | ||
89 | 67 | if this == other: | ||
90 | 68 | return 1 | ||
91 | 69 | other_hash = PyObject_Hash(other) | 61 | other_hash = PyObject_Hash(other) |
92 | 70 | if other_hash != this_hash: | 62 | if other_hash != this_hash: |
93 | 71 | return 0 | 63 | return 0 |
94 | @@ -204,13 +196,14 @@ | |||
95 | 204 | """ | 196 | """ |
96 | 205 | cdef size_t i, n_lookup | 197 | cdef size_t i, n_lookup |
97 | 206 | cdef long the_hash | 198 | cdef long the_hash |
99 | 207 | cdef PyObject **table, **slot | 199 | cdef PyObject **table |
100 | 200 | cdef PyObject **slot | ||
101 | 208 | cdef Py_ssize_t mask | 201 | cdef Py_ssize_t mask |
102 | 209 | 202 | ||
103 | 210 | mask = self._mask | 203 | mask = self._mask |
104 | 211 | table = self._table | 204 | table = self._table |
105 | 212 | 205 | ||
107 | 213 | the_hash = PyObject_Hash(key) | 206 | the_hash = PyObject_Hash(<object>key) |
108 | 214 | i = the_hash | 207 | i = the_hash |
109 | 215 | for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever | 208 | for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever |
110 | 216 | slot = &table[i & mask] | 209 | slot = &table[i & mask] |
111 | @@ -236,7 +229,9 @@ | |||
112 | 236 | :return: The new size of the internal table | 229 | :return: The new size of the internal table |
113 | 237 | """ | 230 | """ |
114 | 238 | cdef Py_ssize_t new_size, n_bytes, remaining | 231 | cdef Py_ssize_t new_size, n_bytes, remaining |
116 | 239 | cdef PyObject **new_table, **old_table, **slot | 232 | cdef PyObject **new_table |
117 | 233 | cdef PyObject **old_table | ||
118 | 234 | cdef PyObject **slot | ||
119 | 240 | 235 | ||
120 | 241 | new_size = DEFAULT_SIZE | 236 | new_size = DEFAULT_SIZE |
121 | 242 | while new_size <= min_used and new_size > 0: | 237 | while new_size <= min_used and new_size > 0: |
122 | @@ -269,7 +264,7 @@ | |||
123 | 269 | slot = old_table | 264 | slot = old_table |
124 | 270 | while remaining > 0: | 265 | while remaining > 0: |
125 | 271 | if slot[0] == NULL: # unused slot | 266 | if slot[0] == NULL: # unused slot |
127 | 272 | pass | 267 | pass |
128 | 273 | elif slot[0] == _dummy: # dummy slot | 268 | elif slot[0] == _dummy: # dummy slot |
129 | 274 | remaining = remaining - 1 | 269 | remaining = remaining - 1 |
130 | 275 | else: # active slot | 270 | else: # active slot |
131 | @@ -279,22 +274,18 @@ | |||
132 | 279 | PyMem_Free(old_table) | 274 | PyMem_Free(old_table) |
133 | 280 | return new_size | 275 | return new_size |
134 | 281 | 276 | ||
136 | 282 | def add(self, key): | 277 | cpdef object add(self, key): |
137 | 283 | """Similar to set.add(), start tracking this key. | 278 | """Similar to set.add(), start tracking this key. |
139 | 284 | 279 | ||
140 | 285 | There is one small difference, which is that we return the object that | 280 | There is one small difference, which is that we return the object that |
141 | 286 | is stored at the given location. (which is closer to the | 281 | is stored at the given location. (which is closer to the |
142 | 287 | dict.setdefault() functionality.) | 282 | dict.setdefault() functionality.) |
143 | 288 | """ | 283 | """ |
153 | 289 | return self._add(key) | 284 | cdef PyObject **slot |
154 | 290 | 285 | cdef bint added | |
155 | 291 | cdef object _add(self, key): | 286 | |
156 | 292 | cdef PyObject **slot, *py_key | 287 | if (Py_TYPE(key).tp_richcompare == NULL |
157 | 293 | cdef int added | 288 | or Py_TYPE(key).tp_hash == NULL): |
149 | 294 | |||
150 | 295 | py_key = <PyObject *>key | ||
151 | 296 | if (Py_TYPE(py_key).tp_richcompare == NULL | ||
152 | 297 | or Py_TYPE(py_key).tp_hash == NULL): | ||
158 | 298 | raise TypeError('Types added to SimpleSet must implement' | 289 | raise TypeError('Types added to SimpleSet must implement' |
159 | 299 | ' both tp_richcompare and tp_hash') | 290 | ' both tp_richcompare and tp_hash') |
160 | 300 | added = 0 | 291 | added = 0 |
161 | @@ -302,15 +293,15 @@ | |||
162 | 302 | assert self._used < self._mask | 293 | assert self._used < self._mask |
163 | 303 | slot = _lookup(self, key) | 294 | slot = _lookup(self, key) |
164 | 304 | if (slot[0] == NULL): | 295 | if (slot[0] == NULL): |
166 | 305 | Py_INCREF(py_key) | 296 | Py_INCREF(key) |
167 | 306 | self._fill = self._fill + 1 | 297 | self._fill = self._fill + 1 |
168 | 307 | self._used = self._used + 1 | 298 | self._used = self._used + 1 |
170 | 308 | slot[0] = py_key | 299 | slot[0] = <PyObject *>key |
171 | 309 | added = 1 | 300 | added = 1 |
172 | 310 | elif (slot[0] == _dummy): | 301 | elif (slot[0] == _dummy): |
174 | 311 | Py_INCREF(py_key) | 302 | Py_INCREF(key) |
175 | 312 | self._used = self._used + 1 | 303 | self._used = self._used + 1 |
177 | 313 | slot[0] = py_key | 304 | slot[0] = <PyObject *>key |
178 | 314 | added = 1 | 305 | added = 1 |
179 | 315 | # No else: clause. If _lookup returns a pointer to | 306 | # No else: clause. If _lookup returns a pointer to |
180 | 316 | # a live object, then we already have a value at this location. | 307 | # a live object, then we already have a value at this location. |
181 | @@ -324,23 +315,18 @@ | |||
182 | 324 | # so we can still return it | 315 | # so we can still return it |
183 | 325 | return retval | 316 | return retval |
184 | 326 | 317 | ||
186 | 327 | def discard(self, key): | 318 | cpdef bint discard(self, key) except -1: |
187 | 328 | """Remove key from the set, whether it exists or not. | 319 | """Remove key from the set, whether it exists or not. |
188 | 329 | 320 | ||
189 | 330 | :return: False if the item did not exist, True if it did | 321 | :return: False if the item did not exist, True if it did |
190 | 331 | """ | 322 | """ |
197 | 332 | if self._discard(key): | 323 | cdef PyObject **slot |
192 | 333 | return True | ||
193 | 334 | return False | ||
194 | 335 | |||
195 | 336 | cdef int _discard(self, key) except -1: | ||
196 | 337 | cdef PyObject **slot, *py_key | ||
198 | 338 | 324 | ||
199 | 339 | slot = _lookup(self, key) | 325 | slot = _lookup(self, key) |
200 | 340 | if slot[0] == NULL or slot[0] == _dummy: | 326 | if slot[0] == NULL or slot[0] == _dummy: |
201 | 341 | return 0 | 327 | return 0 |
202 | 342 | self._used = self._used - 1 | 328 | self._used = self._used - 1 |
204 | 343 | Py_DECREF(slot[0]) | 329 | Py_DECREF(<object>slot[0]) |
205 | 344 | slot[0] = _dummy | 330 | slot[0] = _dummy |
206 | 345 | # PySet uses the heuristic: If more than 1/5 are dummies, then resize | 331 | # PySet uses the heuristic: If more than 1/5 are dummies, then resize |
207 | 346 | # them away | 332 | # them away |
208 | @@ -399,7 +385,6 @@ | |||
209 | 399 | if self.set is not None and self._used == self.set._used: | 385 | if self.set is not None and self._used == self.set._used: |
210 | 400 | return self.len | 386 | return self.len |
211 | 401 | return 0 | 387 | return 0 |
212 | 402 | |||
213 | 403 | 388 | ||
214 | 404 | 389 | ||
215 | 405 | cdef api SimpleSet SimpleSet_New(): | 390 | cdef api SimpleSet SimpleSet_New(): |
216 | @@ -461,12 +446,12 @@ | |||
217 | 461 | cdef size_t i, n_lookup | 446 | cdef size_t i, n_lookup |
218 | 462 | cdef Py_ssize_t mask | 447 | cdef Py_ssize_t mask |
219 | 463 | cdef long key_hash | 448 | cdef long key_hash |
221 | 464 | cdef PyObject **table, **slot, *cur, **free_slot, *py_key | 449 | cdef PyObject **table |
222 | 450 | cdef PyObject **slot | ||
223 | 451 | cdef PyObject *cur | ||
224 | 452 | cdef PyObject **free_slot | ||
225 | 465 | 453 | ||
230 | 466 | py_key = <PyObject *>key | 454 | key_hash = PyObject_Hash(key) |
227 | 467 | # Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit | ||
228 | 468 | # (it treats hash() as returning an 'int' rather than a 'long') | ||
229 | 469 | key_hash = PyObject_Hash(py_key) | ||
231 | 470 | i = <size_t>key_hash | 455 | i = <size_t>key_hash |
232 | 471 | mask = self._mask | 456 | mask = self._mask |
233 | 472 | table = self._table | 457 | table = self._table |
234 | @@ -481,13 +466,13 @@ | |||
235 | 481 | return free_slot | 466 | return free_slot |
236 | 482 | else: | 467 | else: |
237 | 483 | return slot | 468 | return slot |
239 | 484 | if cur == py_key: | 469 | if cur == <PyObject *>key: |
240 | 485 | # Found an exact pointer to the key | 470 | # Found an exact pointer to the key |
241 | 486 | return slot | 471 | return slot |
242 | 487 | if cur == _dummy: | 472 | if cur == _dummy: |
243 | 488 | if free_slot == NULL: | 473 | if free_slot == NULL: |
244 | 489 | free_slot = slot | 474 | free_slot = slot |
246 | 490 | elif _is_equal(py_key, key_hash, cur): | 475 | elif _is_equal(key, key_hash, <object>cur): |
247 | 491 | # Both py_key and cur belong in this slot, return it | 476 | # Both py_key and cur belong in this slot, return it |
248 | 492 | return slot | 477 | return slot |
249 | 493 | i = i + 1 + n_lookup | 478 | i = i + 1 + n_lookup |
250 | @@ -519,8 +504,8 @@ | |||
251 | 519 | This may be the same object, or it may be an equivalent object. | 504 | This may be the same object, or it may be an equivalent object. |
252 | 520 | (consider dict.setdefault(key, key)) | 505 | (consider dict.setdefault(key, key)) |
253 | 521 | """ | 506 | """ |
256 | 522 | return _check_self(self)._add(key) | 507 | return _check_self(self).add(key) |
257 | 523 | 508 | ||
258 | 524 | 509 | ||
259 | 525 | cdef api int SimpleSet_Contains(object self, object key) except -1: | 510 | cdef api int SimpleSet_Contains(object self, object key) except -1: |
260 | 526 | """Is key present in self?""" | 511 | """Is key present in self?""" |
261 | @@ -535,7 +520,7 @@ | |||
262 | 535 | :return: 1 if there was an object present, 0 if there was not, and -1 on | 520 | :return: 1 if there was an object present, 0 if there was not, and -1 on |
263 | 536 | error. | 521 | error. |
264 | 537 | """ | 522 | """ |
266 | 538 | return _check_self(self)._discard(key) | 523 | return _check_self(self).discard(key) |
267 | 539 | 524 | ||
268 | 540 | 525 | ||
269 | 541 | cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL: | 526 | cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL: |
270 | 542 | 527 | ||
271 | === modified file 'breezy/tests/test__simple_set.py' | |||
272 | --- breezy/tests/test__simple_set.py 2017-05-25 01:35:55 +0000 | |||
273 | +++ breezy/tests/test__simple_set.py 2017-06-21 22:37:45 +0000 | |||
274 | @@ -35,7 +35,7 @@ | |||
275 | 35 | """A simple object which has a fixed hash value. | 35 | """A simple object which has a fixed hash value. |
276 | 36 | 36 | ||
277 | 37 | We could have used an 'int', but it turns out that Int objects don't | 37 | We could have used an 'int', but it turns out that Int objects don't |
279 | 38 | implement tp_richcompare... | 38 | implement tp_richcompare in Python 2. |
280 | 39 | """ | 39 | """ |
281 | 40 | 40 | ||
282 | 41 | def __init__(self, the_hash): | 41 | def __init__(self, the_hash): |
283 | @@ -69,12 +69,16 @@ | |||
284 | 69 | def __eq__(self, other): | 69 | def __eq__(self, other): |
285 | 70 | raise RuntimeError('I refuse to play nice') | 70 | raise RuntimeError('I refuse to play nice') |
286 | 71 | 71 | ||
287 | 72 | __hash__ = _Hashable.__hash__ | ||
288 | 73 | |||
289 | 72 | 74 | ||
290 | 73 | class _NoImplementCompare(_Hashable): | 75 | class _NoImplementCompare(_Hashable): |
291 | 74 | 76 | ||
292 | 75 | def __eq__(self, other): | 77 | def __eq__(self, other): |
293 | 76 | return NotImplemented | 78 | return NotImplemented |
294 | 77 | 79 | ||
295 | 80 | __hash__ = _Hashable.__hash__ | ||
296 | 81 | |||
297 | 78 | 82 | ||
298 | 79 | # Even though this is an extension, we don't permute the tests for a python | 83 | # Even though this is an extension, we don't permute the tests for a python |
299 | 80 | # version. As the plain python version is just a dict or set | 84 | # version. As the plain python version is just a dict or set |
300 | @@ -87,14 +91,6 @@ | |||
301 | 87 | _test_needs_features = [compiled_simpleset_feature] | 91 | _test_needs_features = [compiled_simpleset_feature] |
302 | 88 | module = _simple_set_pyx | 92 | module = _simple_set_pyx |
303 | 89 | 93 | ||
304 | 90 | def assertIn(self, obj, container): | ||
305 | 91 | self.assertTrue(obj in container, | ||
306 | 92 | '%s not found in %s' % (obj, container)) | ||
307 | 93 | |||
308 | 94 | def assertNotIn(self, obj, container): | ||
309 | 95 | self.assertTrue(obj not in container, | ||
310 | 96 | 'We found %s in %s' % (obj, container)) | ||
311 | 97 | |||
312 | 98 | def assertFillState(self, used, fill, mask, obj): | 94 | def assertFillState(self, used, fill, mask, obj): |
313 | 99 | self.assertEqual((used, fill, mask), (obj.used, obj.fill, obj.mask)) | 95 | self.assertEqual((used, fill, mask), (obj.used, obj.fill, obj.mask)) |
314 | 100 | 96 | ||
315 | @@ -281,9 +277,10 @@ | |||
316 | 281 | 277 | ||
317 | 282 | def test__resize(self): | 278 | def test__resize(self): |
318 | 283 | obj = self.module.SimpleSet() | 279 | obj = self.module.SimpleSet() |
322 | 284 | k1 = ('foo',) | 280 | # Need objects with exact hash as checking offset of <null> later |
323 | 285 | k2 = ('bar',) | 281 | k1 = _Hashable(501) |
324 | 286 | k3 = ('baz',) | 282 | k2 = _Hashable(591) |
325 | 283 | k3 = _Hashable(2051) | ||
326 | 287 | obj.add(k1) | 284 | obj.add(k1) |
327 | 288 | obj.add(k2) | 285 | obj.add(k2) |
328 | 289 | obj.add(k3) | 286 | obj.add(k3) |
329 | @@ -373,7 +370,7 @@ | |||
330 | 373 | all.add(key) | 370 | all.add(key) |
331 | 374 | self.assertEqual(sorted([k1, k2, k3]), sorted(all)) | 371 | self.assertEqual(sorted([k1, k2, k3]), sorted(all)) |
332 | 375 | iterator = iter(obj) | 372 | iterator = iter(obj) |
334 | 376 | next(iterator) | 373 | self.assertIn(next(iterator), all) |
335 | 377 | obj.add(('foo',)) | 374 | obj.add(('foo',)) |
336 | 378 | # Set changed size | 375 | # Set changed size |
337 | 379 | self.assertRaises(RuntimeError, next, iterator) | 376 | self.assertRaises(RuntimeError, next, iterator) |