Merge lp:~gz/brz/py3_simple_set into lp:brz

Proposed by Martin Packman
Status: Merged
Approved by: Martin Packman
Approved revision: no longer in the source branch.
Merge reported by: The Breezy Bot
Merged at revision: not available
Proposed branch: lp:~gz/brz/py3_simple_set
Merge into: lp:brz
Diff against target: 337 lines (+64/-84)
3 files modified
breezy/_simple_set_pyx.pxd (+3/-5)
breezy/_simple_set_pyx.pyx (+51/-66)
breezy/tests/test__simple_set.py (+10/-13)
To merge this branch: bzr merge lp:~gz/brz/py3_simple_set
Reviewer Review Type Date Requested Status
Jelmer Vernooij Approve
Review via email: mp+326113@code.launchpad.net

Commit message

Update _simple_set_pyx to more modern cython

Description of the change

So simple set did actually compile on Python 3 already, this branch mostly just tidies things up a bit and makes cython do some more work for us. Ideally we'd remove the manual memory block usage, but that turned out to be a pretty large change.

I wonder a bit if this is the correct optimisation level, but can put off questions like that for now. I diffed the generated C and this ends up much the same as before.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'breezy/_simple_set_pyx.pxd'
--- breezy/_simple_set_pyx.pxd 2010-02-17 17:11:16 +0000
+++ breezy/_simple_set_pyx.pxd 2017-06-21 22:37:45 +0000
@@ -23,9 +23,7 @@
23 eg. SimpleSet.add(key) => saved_key and SimpleSet[key] => saved_key23 eg. SimpleSet.add(key) => saved_key and SimpleSet[key] => saved_key
24"""24"""
2525
26cdef extern from "Python.h":26from cpython.object cimport PyObject
27 ctypedef struct PyObject:
28 pass
2927
3028
31cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:29cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
@@ -74,8 +72,8 @@
74 # so we manage it manually72 # so we manage it manually
7573
76 cdef PyObject *_get(self, object key) except? NULL74 cdef PyObject *_get(self, object key) except? NULL
77 cdef object _add(self, key)75 cpdef object add(self, key)
78 cdef int _discard(self, key) except -176 cpdef bint discard(self, key) except -1
79 cdef int _insert_clean(self, PyObject *key) except -177 cdef int _insert_clean(self, PyObject *key) except -1
80 cdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -178 cdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1
8179
8280
=== modified file 'breezy/_simple_set_pyx.pyx'
--- breezy/_simple_set_pyx.pyx 2017-06-10 00:09:13 +0000
+++ breezy/_simple_set_pyx.pyx 2017-06-21 22:37:45 +0000
@@ -18,31 +18,25 @@
1818
19from __future__ import absolute_import19from __future__ import absolute_import
2020
21cdef extern from "python-compat.h":21from cpython.object cimport (
22 pass22 hashfunc,
2323 Py_EQ,
24cdef extern from "Python.h":24 PyObject_Hash,
25 ctypedef unsigned long size_t25 PyTypeObject,
26 ctypedef long (*hashfunc)(PyObject*) except -126 Py_TYPE,
27 ctypedef object (*richcmpfunc)(PyObject *, PyObject *, int)27 richcmpfunc,
28 ctypedef int (*visitproc)(PyObject *, void *)28 traverseproc,
29 ctypedef int (*traverseproc)(PyObject *, visitproc, void *)29 visitproc,
30 int Py_EQ30 )
31 void Py_INCREF(PyObject *)31from cpython.mem cimport (
32 void Py_DECREF(PyObject *)32 PyMem_Malloc,
33 ctypedef struct PyTypeObject:33 PyMem_Free,
34 hashfunc tp_hash34 )
35 richcmpfunc tp_richcompare35from cpython.ref cimport (
36 traverseproc tp_traverse36 Py_INCREF,
3737 Py_DECREF,
38 PyTypeObject *Py_TYPE(PyObject *)38 )
39 # Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and39from libc.string cimport memset
40 # thus silently truncates to 32-bits on 64-bit machines.
41 long PyObject_Hash(PyObject *) except -1
42
43 void *PyMem_Malloc(size_t nbytes)
44 void PyMem_Free(void *)
45 void memset(void *, int, size_t)
4640
4741
48# Dummy is an object used to mark nodes that have been deleted. Since42# Dummy is an object used to mark nodes that have been deleted. Since
@@ -61,11 +55,9 @@
61_NotImplemented = NotImplemented55_NotImplemented = NotImplemented
6256
6357
64cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1:58cdef int _is_equal(object this, long this_hash, object other) except -1:
65 cdef long other_hash59 cdef long other_hash
6660
67 if this == other:
68 return 1
69 other_hash = PyObject_Hash(other)61 other_hash = PyObject_Hash(other)
70 if other_hash != this_hash:62 if other_hash != this_hash:
71 return 063 return 0
@@ -204,13 +196,14 @@
204 """196 """
205 cdef size_t i, n_lookup197 cdef size_t i, n_lookup
206 cdef long the_hash198 cdef long the_hash
207 cdef PyObject **table, **slot199 cdef PyObject **table
200 cdef PyObject **slot
208 cdef Py_ssize_t mask201 cdef Py_ssize_t mask
209202
210 mask = self._mask203 mask = self._mask
211 table = self._table204 table = self._table
212205
213 the_hash = PyObject_Hash(key)206 the_hash = PyObject_Hash(<object>key)
214 i = the_hash207 i = the_hash
215 for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever208 for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
216 slot = &table[i & mask]209 slot = &table[i & mask]
@@ -236,7 +229,9 @@
236 :return: The new size of the internal table229 :return: The new size of the internal table
237 """230 """
238 cdef Py_ssize_t new_size, n_bytes, remaining231 cdef Py_ssize_t new_size, n_bytes, remaining
239 cdef PyObject **new_table, **old_table, **slot232 cdef PyObject **new_table
233 cdef PyObject **old_table
234 cdef PyObject **slot
240235
241 new_size = DEFAULT_SIZE236 new_size = DEFAULT_SIZE
242 while new_size <= min_used and new_size > 0:237 while new_size <= min_used and new_size > 0:
@@ -269,7 +264,7 @@
269 slot = old_table264 slot = old_table
270 while remaining > 0:265 while remaining > 0:
271 if slot[0] == NULL: # unused slot266 if slot[0] == NULL: # unused slot
272 pass 267 pass
273 elif slot[0] == _dummy: # dummy slot268 elif slot[0] == _dummy: # dummy slot
274 remaining = remaining - 1269 remaining = remaining - 1
275 else: # active slot270 else: # active slot
@@ -279,22 +274,18 @@
279 PyMem_Free(old_table)274 PyMem_Free(old_table)
280 return new_size275 return new_size
281276
282 def add(self, key):277 cpdef object add(self, key):
283 """Similar to set.add(), start tracking this key.278 """Similar to set.add(), start tracking this key.
284 279
285 There is one small difference, which is that we return the object that280 There is one small difference, which is that we return the object that
286 is stored at the given location. (which is closer to the281 is stored at the given location. (which is closer to the
287 dict.setdefault() functionality.)282 dict.setdefault() functionality.)
288 """283 """
289 return self._add(key)284 cdef PyObject **slot
290285 cdef bint added
291 cdef object _add(self, key):286
292 cdef PyObject **slot, *py_key287 if (Py_TYPE(key).tp_richcompare == NULL
293 cdef int added288 or Py_TYPE(key).tp_hash == NULL):
294
295 py_key = <PyObject *>key
296 if (Py_TYPE(py_key).tp_richcompare == NULL
297 or Py_TYPE(py_key).tp_hash == NULL):
298 raise TypeError('Types added to SimpleSet must implement'289 raise TypeError('Types added to SimpleSet must implement'
299 ' both tp_richcompare and tp_hash')290 ' both tp_richcompare and tp_hash')
300 added = 0291 added = 0
@@ -302,15 +293,15 @@
302 assert self._used < self._mask293 assert self._used < self._mask
303 slot = _lookup(self, key)294 slot = _lookup(self, key)
304 if (slot[0] == NULL):295 if (slot[0] == NULL):
305 Py_INCREF(py_key)296 Py_INCREF(key)
306 self._fill = self._fill + 1297 self._fill = self._fill + 1
307 self._used = self._used + 1298 self._used = self._used + 1
308 slot[0] = py_key299 slot[0] = <PyObject *>key
309 added = 1300 added = 1
310 elif (slot[0] == _dummy):301 elif (slot[0] == _dummy):
311 Py_INCREF(py_key)302 Py_INCREF(key)
312 self._used = self._used + 1303 self._used = self._used + 1
313 slot[0] = py_key304 slot[0] = <PyObject *>key
314 added = 1305 added = 1
315 # No else: clause. If _lookup returns a pointer to306 # No else: clause. If _lookup returns a pointer to
316 # a live object, then we already have a value at this location.307 # a live object, then we already have a value at this location.
@@ -324,23 +315,18 @@
324 # so we can still return it315 # so we can still return it
325 return retval316 return retval
326317
327 def discard(self, key):318 cpdef bint discard(self, key) except -1:
328 """Remove key from the set, whether it exists or not.319 """Remove key from the set, whether it exists or not.
329320
330 :return: False if the item did not exist, True if it did321 :return: False if the item did not exist, True if it did
331 """322 """
332 if self._discard(key):323 cdef PyObject **slot
333 return True
334 return False
335
336 cdef int _discard(self, key) except -1:
337 cdef PyObject **slot, *py_key
338324
339 slot = _lookup(self, key)325 slot = _lookup(self, key)
340 if slot[0] == NULL or slot[0] == _dummy:326 if slot[0] == NULL or slot[0] == _dummy:
341 return 0327 return 0
342 self._used = self._used - 1328 self._used = self._used - 1
343 Py_DECREF(slot[0])329 Py_DECREF(<object>slot[0])
344 slot[0] = _dummy330 slot[0] = _dummy
345 # PySet uses the heuristic: If more than 1/5 are dummies, then resize331 # PySet uses the heuristic: If more than 1/5 are dummies, then resize
346 # them away332 # them away
@@ -399,7 +385,6 @@
399 if self.set is not None and self._used == self.set._used:385 if self.set is not None and self._used == self.set._used:
400 return self.len386 return self.len
401 return 0387 return 0
402
403388
404389
405cdef api SimpleSet SimpleSet_New():390cdef api SimpleSet SimpleSet_New():
@@ -461,12 +446,12 @@
461 cdef size_t i, n_lookup446 cdef size_t i, n_lookup
462 cdef Py_ssize_t mask447 cdef Py_ssize_t mask
463 cdef long key_hash448 cdef long key_hash
464 cdef PyObject **table, **slot, *cur, **free_slot, *py_key449 cdef PyObject **table
450 cdef PyObject **slot
451 cdef PyObject *cur
452 cdef PyObject **free_slot
465453
466 py_key = <PyObject *>key454 key_hash = PyObject_Hash(key)
467 # Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit
468 # (it treats hash() as returning an 'int' rather than a 'long')
469 key_hash = PyObject_Hash(py_key)
470 i = <size_t>key_hash455 i = <size_t>key_hash
471 mask = self._mask456 mask = self._mask
472 table = self._table457 table = self._table
@@ -481,13 +466,13 @@
481 return free_slot466 return free_slot
482 else:467 else:
483 return slot468 return slot
484 if cur == py_key:469 if cur == <PyObject *>key:
485 # Found an exact pointer to the key470 # Found an exact pointer to the key
486 return slot471 return slot
487 if cur == _dummy:472 if cur == _dummy:
488 if free_slot == NULL:473 if free_slot == NULL:
489 free_slot = slot474 free_slot = slot
490 elif _is_equal(py_key, key_hash, cur):475 elif _is_equal(key, key_hash, <object>cur):
491 # Both py_key and cur belong in this slot, return it476 # Both py_key and cur belong in this slot, return it
492 return slot477 return slot
493 i = i + 1 + n_lookup478 i = i + 1 + n_lookup
@@ -519,8 +504,8 @@
519 This may be the same object, or it may be an equivalent object.504 This may be the same object, or it may be an equivalent object.
520 (consider dict.setdefault(key, key))505 (consider dict.setdefault(key, key))
521 """506 """
522 return _check_self(self)._add(key)507 return _check_self(self).add(key)
523 508
524509
525cdef api int SimpleSet_Contains(object self, object key) except -1:510cdef api int SimpleSet_Contains(object self, object key) except -1:
526 """Is key present in self?"""511 """Is key present in self?"""
@@ -535,7 +520,7 @@
535 :return: 1 if there was an object present, 0 if there was not, and -1 on520 :return: 1 if there was an object present, 0 if there was not, and -1 on
536 error.521 error.
537 """522 """
538 return _check_self(self)._discard(key)523 return _check_self(self).discard(key)
539524
540525
541cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:526cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
542527
=== modified file 'breezy/tests/test__simple_set.py'
--- breezy/tests/test__simple_set.py 2017-05-25 01:35:55 +0000
+++ breezy/tests/test__simple_set.py 2017-06-21 22:37:45 +0000
@@ -35,7 +35,7 @@
35 """A simple object which has a fixed hash value.35 """A simple object which has a fixed hash value.
3636
37 We could have used an 'int', but it turns out that Int objects don't37 We could have used an 'int', but it turns out that Int objects don't
38 implement tp_richcompare...38 implement tp_richcompare in Python 2.
39 """39 """
4040
41 def __init__(self, the_hash):41 def __init__(self, the_hash):
@@ -69,12 +69,16 @@
69 def __eq__(self, other):69 def __eq__(self, other):
70 raise RuntimeError('I refuse to play nice')70 raise RuntimeError('I refuse to play nice')
7171
72 __hash__ = _Hashable.__hash__
73
7274
73class _NoImplementCompare(_Hashable):75class _NoImplementCompare(_Hashable):
7476
75 def __eq__(self, other):77 def __eq__(self, other):
76 return NotImplemented78 return NotImplemented
7779
80 __hash__ = _Hashable.__hash__
81
7882
79# Even though this is an extension, we don't permute the tests for a python83# Even though this is an extension, we don't permute the tests for a python
80# version. As the plain python version is just a dict or set84# version. As the plain python version is just a dict or set
@@ -87,14 +91,6 @@
87 _test_needs_features = [compiled_simpleset_feature]91 _test_needs_features = [compiled_simpleset_feature]
88 module = _simple_set_pyx92 module = _simple_set_pyx
8993
90 def assertIn(self, obj, container):
91 self.assertTrue(obj in container,
92 '%s not found in %s' % (obj, container))
93
94 def assertNotIn(self, obj, container):
95 self.assertTrue(obj not in container,
96 'We found %s in %s' % (obj, container))
97
98 def assertFillState(self, used, fill, mask, obj):94 def assertFillState(self, used, fill, mask, obj):
99 self.assertEqual((used, fill, mask), (obj.used, obj.fill, obj.mask))95 self.assertEqual((used, fill, mask), (obj.used, obj.fill, obj.mask))
10096
@@ -281,9 +277,10 @@
281277
282 def test__resize(self):278 def test__resize(self):
283 obj = self.module.SimpleSet()279 obj = self.module.SimpleSet()
284 k1 = ('foo',)280 # Need objects with exact hash as checking offset of <null> later
285 k2 = ('bar',)281 k1 = _Hashable(501)
286 k3 = ('baz',)282 k2 = _Hashable(591)
283 k3 = _Hashable(2051)
287 obj.add(k1)284 obj.add(k1)
288 obj.add(k2)285 obj.add(k2)
289 obj.add(k3)286 obj.add(k3)
@@ -373,7 +370,7 @@
373 all.add(key)370 all.add(key)
374 self.assertEqual(sorted([k1, k2, k3]), sorted(all))371 self.assertEqual(sorted([k1, k2, k3]), sorted(all))
375 iterator = iter(obj)372 iterator = iter(obj)
376 next(iterator)373 self.assertIn(next(iterator), all)
377 obj.add(('foo',))374 obj.add(('foo',))
378 # Set changed size375 # Set changed size
379 self.assertRaises(RuntimeError, next, iterator)376 self.assertRaises(RuntimeError, next, iterator)

Subscribers

People subscribed via source and target branches