Merge lp:~gz/brz/py3_simple_set into lp:brz

Proposed by Martin Packman on 2017-06-21
Status: Merged
Approved by: Martin Packman on 2017-06-21
Approved revision: 6708
Merge reported by: The Breezy Bot
Merged at revision: not available
Proposed branch: lp:~gz/brz/py3_simple_set
Merge into: lp:brz
Diff against target: 337 lines (+64/-84)
3 files modified
breezy/_simple_set_pyx.pxd (+3/-5)
breezy/_simple_set_pyx.pyx (+51/-66)
breezy/tests/test__simple_set.py (+10/-13)
To merge this branch: bzr merge lp:~gz/brz/py3_simple_set
Reviewer Review Type Date Requested Status
Jelmer Vernooij 2017-06-21 Approve on 2017-06-21
Review via email: mp+326113@code.launchpad.net

Commit message

Update _simple_set_pyx to more modern cython

Description of the change

So simple set did actually compile on Python 3 already, this branch mostly just tidies things up a bit and makes cython do some more work for us. Ideally we'd remove the manual memory block usage, but that turned out to be a pretty large change.

I wonder a bit if this is the correct optimisation level, but can put off questions like that for now. I diffed the generated C and this ends up much the same as before.

To post a comment you must log in.
Jelmer Vernooij (jelmer) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'breezy/_simple_set_pyx.pxd'
2--- breezy/_simple_set_pyx.pxd 2010-02-17 17:11:16 +0000
3+++ breezy/_simple_set_pyx.pxd 2017-06-21 22:37:45 +0000
4@@ -23,9 +23,7 @@
5 eg. SimpleSet.add(key) => saved_key and SimpleSet[key] => saved_key
6 """
7
8-cdef extern from "Python.h":
9- ctypedef struct PyObject:
10- pass
11+from cpython.object cimport PyObject
12
13
14 cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
15@@ -74,8 +72,8 @@
16 # so we manage it manually
17
18 cdef PyObject *_get(self, object key) except? NULL
19- cdef object _add(self, key)
20- cdef int _discard(self, key) except -1
21+ cpdef object add(self, key)
22+ cpdef bint discard(self, key) except -1
23 cdef int _insert_clean(self, PyObject *key) except -1
24 cdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1
25
26
27=== modified file 'breezy/_simple_set_pyx.pyx'
28--- breezy/_simple_set_pyx.pyx 2017-06-10 00:09:13 +0000
29+++ breezy/_simple_set_pyx.pyx 2017-06-21 22:37:45 +0000
30@@ -18,31 +18,25 @@
31
32 from __future__ import absolute_import
33
34-cdef extern from "python-compat.h":
35- pass
36-
37-cdef extern from "Python.h":
38- ctypedef unsigned long size_t
39- ctypedef long (*hashfunc)(PyObject*) except -1
40- ctypedef object (*richcmpfunc)(PyObject *, PyObject *, int)
41- ctypedef int (*visitproc)(PyObject *, void *)
42- ctypedef int (*traverseproc)(PyObject *, visitproc, void *)
43- int Py_EQ
44- void Py_INCREF(PyObject *)
45- void Py_DECREF(PyObject *)
46- ctypedef struct PyTypeObject:
47- hashfunc tp_hash
48- richcmpfunc tp_richcompare
49- traverseproc tp_traverse
50-
51- PyTypeObject *Py_TYPE(PyObject *)
52- # Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and
53- # thus silently truncates to 32-bits on 64-bit machines.
54- long PyObject_Hash(PyObject *) except -1
55-
56- void *PyMem_Malloc(size_t nbytes)
57- void PyMem_Free(void *)
58- void memset(void *, int, size_t)
59+from cpython.object cimport (
60+ hashfunc,
61+ Py_EQ,
62+ PyObject_Hash,
63+ PyTypeObject,
64+ Py_TYPE,
65+ richcmpfunc,
66+ traverseproc,
67+ visitproc,
68+ )
69+from cpython.mem cimport (
70+ PyMem_Malloc,
71+ PyMem_Free,
72+ )
73+from cpython.ref cimport (
74+ Py_INCREF,
75+ Py_DECREF,
76+ )
77+from libc.string cimport memset
78
79
80 # Dummy is an object used to mark nodes that have been deleted. Since
81@@ -61,11 +55,9 @@
82 _NotImplemented = NotImplemented
83
84
85-cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1:
86+cdef int _is_equal(object this, long this_hash, object other) except -1:
87 cdef long other_hash
88
89- if this == other:
90- return 1
91 other_hash = PyObject_Hash(other)
92 if other_hash != this_hash:
93 return 0
94@@ -204,13 +196,14 @@
95 """
96 cdef size_t i, n_lookup
97 cdef long the_hash
98- cdef PyObject **table, **slot
99+ cdef PyObject **table
100+ cdef PyObject **slot
101 cdef Py_ssize_t mask
102
103 mask = self._mask
104 table = self._table
105
106- the_hash = PyObject_Hash(key)
107+ the_hash = PyObject_Hash(<object>key)
108 i = the_hash
109 for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
110 slot = &table[i & mask]
111@@ -236,7 +229,9 @@
112 :return: The new size of the internal table
113 """
114 cdef Py_ssize_t new_size, n_bytes, remaining
115- cdef PyObject **new_table, **old_table, **slot
116+ cdef PyObject **new_table
117+ cdef PyObject **old_table
118+ cdef PyObject **slot
119
120 new_size = DEFAULT_SIZE
121 while new_size <= min_used and new_size > 0:
122@@ -269,7 +264,7 @@
123 slot = old_table
124 while remaining > 0:
125 if slot[0] == NULL: # unused slot
126- pass
127+ pass
128 elif slot[0] == _dummy: # dummy slot
129 remaining = remaining - 1
130 else: # active slot
131@@ -279,22 +274,18 @@
132 PyMem_Free(old_table)
133 return new_size
134
135- def add(self, key):
136+ cpdef object add(self, key):
137 """Similar to set.add(), start tracking this key.
138-
139+
140 There is one small difference, which is that we return the object that
141 is stored at the given location. (which is closer to the
142 dict.setdefault() functionality.)
143 """
144- return self._add(key)
145-
146- cdef object _add(self, key):
147- cdef PyObject **slot, *py_key
148- cdef int added
149-
150- py_key = <PyObject *>key
151- if (Py_TYPE(py_key).tp_richcompare == NULL
152- or Py_TYPE(py_key).tp_hash == NULL):
153+ cdef PyObject **slot
154+ cdef bint added
155+
156+ if (Py_TYPE(key).tp_richcompare == NULL
157+ or Py_TYPE(key).tp_hash == NULL):
158 raise TypeError('Types added to SimpleSet must implement'
159 ' both tp_richcompare and tp_hash')
160 added = 0
161@@ -302,15 +293,15 @@
162 assert self._used < self._mask
163 slot = _lookup(self, key)
164 if (slot[0] == NULL):
165- Py_INCREF(py_key)
166+ Py_INCREF(key)
167 self._fill = self._fill + 1
168 self._used = self._used + 1
169- slot[0] = py_key
170+ slot[0] = <PyObject *>key
171 added = 1
172 elif (slot[0] == _dummy):
173- Py_INCREF(py_key)
174+ Py_INCREF(key)
175 self._used = self._used + 1
176- slot[0] = py_key
177+ slot[0] = <PyObject *>key
178 added = 1
179 # No else: clause. If _lookup returns a pointer to
180 # a live object, then we already have a value at this location.
181@@ -324,23 +315,18 @@
182 # so we can still return it
183 return retval
184
185- def discard(self, key):
186+ cpdef bint discard(self, key) except -1:
187 """Remove key from the set, whether it exists or not.
188
189 :return: False if the item did not exist, True if it did
190 """
191- if self._discard(key):
192- return True
193- return False
194-
195- cdef int _discard(self, key) except -1:
196- cdef PyObject **slot, *py_key
197+ cdef PyObject **slot
198
199 slot = _lookup(self, key)
200 if slot[0] == NULL or slot[0] == _dummy:
201 return 0
202 self._used = self._used - 1
203- Py_DECREF(slot[0])
204+ Py_DECREF(<object>slot[0])
205 slot[0] = _dummy
206 # PySet uses the heuristic: If more than 1/5 are dummies, then resize
207 # them away
208@@ -399,7 +385,6 @@
209 if self.set is not None and self._used == self.set._used:
210 return self.len
211 return 0
212-
213
214
215 cdef api SimpleSet SimpleSet_New():
216@@ -461,12 +446,12 @@
217 cdef size_t i, n_lookup
218 cdef Py_ssize_t mask
219 cdef long key_hash
220- cdef PyObject **table, **slot, *cur, **free_slot, *py_key
221+ cdef PyObject **table
222+ cdef PyObject **slot
223+ cdef PyObject *cur
224+ cdef PyObject **free_slot
225
226- py_key = <PyObject *>key
227- # Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit
228- # (it treats hash() as returning an 'int' rather than a 'long')
229- key_hash = PyObject_Hash(py_key)
230+ key_hash = PyObject_Hash(key)
231 i = <size_t>key_hash
232 mask = self._mask
233 table = self._table
234@@ -481,13 +466,13 @@
235 return free_slot
236 else:
237 return slot
238- if cur == py_key:
239+ if cur == <PyObject *>key:
240 # Found an exact pointer to the key
241 return slot
242 if cur == _dummy:
243 if free_slot == NULL:
244 free_slot = slot
245- elif _is_equal(py_key, key_hash, cur):
246+ elif _is_equal(key, key_hash, <object>cur):
247 # Both py_key and cur belong in this slot, return it
248 return slot
249 i = i + 1 + n_lookup
250@@ -519,8 +504,8 @@
251 This may be the same object, or it may be an equivalent object.
252 (consider dict.setdefault(key, key))
253 """
254- return _check_self(self)._add(key)
255-
256+ return _check_self(self).add(key)
257+
258
259 cdef api int SimpleSet_Contains(object self, object key) except -1:
260 """Is key present in self?"""
261@@ -535,7 +520,7 @@
262 :return: 1 if there was an object present, 0 if there was not, and -1 on
263 error.
264 """
265- return _check_self(self)._discard(key)
266+ return _check_self(self).discard(key)
267
268
269 cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
270
271=== modified file 'breezy/tests/test__simple_set.py'
272--- breezy/tests/test__simple_set.py 2017-05-25 01:35:55 +0000
273+++ breezy/tests/test__simple_set.py 2017-06-21 22:37:45 +0000
274@@ -35,7 +35,7 @@
275 """A simple object which has a fixed hash value.
276
277 We could have used an 'int', but it turns out that Int objects don't
278- implement tp_richcompare...
279+ implement tp_richcompare in Python 2.
280 """
281
282 def __init__(self, the_hash):
283@@ -69,12 +69,16 @@
284 def __eq__(self, other):
285 raise RuntimeError('I refuse to play nice')
286
287+ __hash__ = _Hashable.__hash__
288+
289
290 class _NoImplementCompare(_Hashable):
291
292 def __eq__(self, other):
293 return NotImplemented
294
295+ __hash__ = _Hashable.__hash__
296+
297
298 # Even though this is an extension, we don't permute the tests for a python
299 # version. As the plain python version is just a dict or set
300@@ -87,14 +91,6 @@
301 _test_needs_features = [compiled_simpleset_feature]
302 module = _simple_set_pyx
303
304- def assertIn(self, obj, container):
305- self.assertTrue(obj in container,
306- '%s not found in %s' % (obj, container))
307-
308- def assertNotIn(self, obj, container):
309- self.assertTrue(obj not in container,
310- 'We found %s in %s' % (obj, container))
311-
312 def assertFillState(self, used, fill, mask, obj):
313 self.assertEqual((used, fill, mask), (obj.used, obj.fill, obj.mask))
314
315@@ -281,9 +277,10 @@
316
317 def test__resize(self):
318 obj = self.module.SimpleSet()
319- k1 = ('foo',)
320- k2 = ('bar',)
321- k3 = ('baz',)
322+ # Need objects with exact hash as checking offset of <null> later
323+ k1 = _Hashable(501)
324+ k2 = _Hashable(591)
325+ k3 = _Hashable(2051)
326 obj.add(k1)
327 obj.add(k2)
328 obj.add(k3)
329@@ -373,7 +370,7 @@
330 all.add(key)
331 self.assertEqual(sorted([k1, k2, k3]), sorted(all))
332 iterator = iter(obj)
333- next(iterator)
334+ self.assertIn(next(iterator), all)
335 obj.add(('foo',))
336 # Set changed size
337 self.assertRaises(RuntimeError, next, iterator)

Subscribers

People subscribed via source and target branches