1
=== modified file '.bzrignore'
2
--- .bzrignore	2009-09-09 11:43:10 +0000
3
+++ .bzrignore	2009-10-12 16:51:14 +0000
4
@@ -58,6 +58,9 @@
5
58
bzrlib/_known_graph_pyx.c
58
bzrlib/_known_graph_pyx.c
6
59
bzrlib/_readdir_pyx.c
59
bzrlib/_readdir_pyx.c
7
60
bzrlib/_rio_pyx.c
60
bzrlib/_rio_pyx.c
8
61
bzrlib/_simple_set_pyx.c
9
62
bzrlib/_simple_set_pyx.h
10
63
bzrlib/_simple_set_pyx_api.h
11
61
bzrlib/_walkdirs_win32.c
64
bzrlib/_walkdirs_win32.c
12
62
# built extension modules
65
# built extension modules
13
63
bzrlib/_*.dll
66
bzrlib/_*.dll
14
64
67
15
=== modified file 'NEWS'
16
--- NEWS	2009-10-08 23:44:40 +0000
17
+++ NEWS	2009-10-12 16:51:14 +0000
18
@@ -206,6 +206,13 @@
19
206
  repository or branch object is unlocked then relocked the same way.
206
  repository or branch object is unlocked then relocked the same way.
20
207
  (Andrew Bennetts)
207
  (Andrew Bennetts)
21
208
  
208
  
22
209
* Added ``bzrlib._simple_set_pyx``. This is a hybrid between a Set and a
23
210
  Dict (it only holds keys, but you can lookup the object located at a
24
211
  given key). It has significantly reduced memory consumption versus the
25
212
  builtin objects (1/2 the size of Set, 1/3rd the size of Dict). This will
26
213
  be used as the interning structure for StaticTuple objects, as part of
27
214
  an ongoing push to reduce peak memory consumption.  (John Arbash Meinel)
28
215
29
209
* ``BTreeLeafParser.extract_key`` has been tweaked slightly to reduce
216
* ``BTreeLeafParser.extract_key`` has been tweaked slightly to reduce
30
210
  mallocs while parsing the index (approx 3=>1 mallocs per key read).
217
  mallocs while parsing the index (approx 3=>1 mallocs per key read).
31
211
  This results in a 10% speedup while reading an index.
218
  This results in a 10% speedup while reading an index.
32
212
219
33
=== added file 'bzrlib/_simple_set_pyx.pxd'
34
--- bzrlib/_simple_set_pyx.pxd	1970-01-01 00:00:00 +0000
35
+++ bzrlib/_simple_set_pyx.pxd	2009-10-12 16:51:14 +0000
36
@@ -0,0 +1,91 @@
37
1
# Copyright (C) 2009 Canonical Ltd
38
2
#
39
3
# This program is free software; you can redistribute it and/or modify
40
4
# it under the terms of the GNU General Public License as published by
41
5
# the Free Software Foundation; either version 2 of the License, or
42
6
# (at your option) any later version.
43
7
#
44
8
# This program is distributed in the hope that it will be useful,
45
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
46
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
47
11
# GNU General Public License for more details.
48
12
#
49
13
# You should have received a copy of the GNU General Public License
50
14
# along with this program; if not, write to the Free Software
51
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
52
16
53
17
"""Interface definition of a class like PySet but without caching the hash.
54
18
55
19
This is generally useful when you want to 'intern' objects, etc. Note that this
56
20
differs from Set in that we:
57
21
  1) Don't have all of the .intersection, .difference, etc functions
58
22
  2) Do return the object from the set via queries
59
23
     eg. SimpleSet.add(key) => saved_key and SimpleSet[key] => saved_key
60
24
"""
61
25
62
26
cdef extern from "Python.h":
63
27
    ctypedef struct PyObject:
64
28
        pass
65
29
66
30
67
31
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
68
32
    """A class similar to PySet, but with simpler implementation.
69
33
70
34
    The main advantage is that this class uses only 2N memory to store N
71
35
    objects rather than 4N memory. The main trade-off is that we do not cache
72
36
    the hash value of saved objects. As such, it is assumed that computing the
73
37
    hash will be cheap (such as strings or tuples of strings, etc.)
74
38
75
39
    This also differs in that you can get back the objects that are stored
76
40
    (like a dict), but we also don't implement the complete list of 'set'
77
41
    operations (difference, intersection, etc).
78
42
    """
79
43
    # Data structure definition:
80
44
    #   This is a basic hash table using open addressing.
81
45
    #       http://en.wikipedia.org/wiki/Open_addressing
82
46
    #   Basically that means we keep an array of pointers to Python objects
83
47
    #   (called a table). Each location in the array is called a 'slot'.
84
48
    #
85
49
    #   An empty slot holds a NULL pointer, a slot where there was an item
86
50
    #   which was then deleted will hold a pointer to _dummy, and a filled slot
87
51
    #   points at the actual object which fills that slot.
88
52
    #
89
53
    #   The table is always a power of two, and the default location where an
90
54
    #   object is inserted is at hash(object) & (table_size - 1)
91
55
    #
92
56
    #   If there is a collision, then we search for another location. The
93
57
    #   specific algorithm is in _lookup. We search until we:
94
58
    #       find the object
95
59
    #       find an equivalent object (by tp_richcompare(obj1, obj2, Py_EQ))
96
60
    #       find a NULL slot
97
61
    #
98
62
    #   When an object is deleted, we set its slot to _dummy. this way we don't
99
63
    #   have to track whether there was a collision, and find the corresponding
100
64
    #   keys. (The collision resolution algorithm makes that nearly impossible
101
65
    #   anyway, because it depends on the upper bits of the hash.)
102
66
    #   The main effect of this, is that if we find _dummy, then we can insert
103
67
    #   an object there, but we have to keep searching until we find NULL to
104
68
    #   know that the object is not present elsewhere.
105
69
106
70
    cdef Py_ssize_t _used   # active
107
71
    cdef Py_ssize_t _fill   # active + dummy
108
72
    cdef Py_ssize_t _mask   # Table contains (mask+1) slots, a power of 2
109
73
    cdef PyObject **_table  # Pyrex/Cython doesn't support arrays to 'object'
110
74
                            # so we manage it manually
111
75
112
76
    cdef PyObject *_get(self, object key) except? NULL
113
77
    cdef object _add(self, key)
114
78
    cdef int _discard(self, key) except -1
115
79
    cdef int _insert_clean(self, PyObject *key) except -1
116
80
    cdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1
117
81
118
82
119
83
# TODO: might want to export the C api here, though it is all available from
120
84
#       the class object...
121
85
cdef api SimpleSet SimpleSet_New()
122
86
cdef api object SimpleSet_Add(object self, object key)
123
87
cdef api int SimpleSet_Contains(object self, object key) except -1
124
88
cdef api int SimpleSet_Discard(object self, object key) except -1
125
89
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL
126
90
cdef api Py_ssize_t SimpleSet_Size(object self) except -1
127
91
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key)
128
0
92
129
=== added file 'bzrlib/_simple_set_pyx.pyx'
130
--- bzrlib/_simple_set_pyx.pyx	1970-01-01 00:00:00 +0000
131
+++ bzrlib/_simple_set_pyx.pyx	2009-10-12 16:51:14 +0000
132
@@ -0,0 +1,600 @@
133
1
# Copyright (C) 2009 Canonical Ltd
134
2
#
135
3
# This program is free software; you can redistribute it and/or modify
136
4
# it under the terms of the GNU General Public License as published by
137
5
# the Free Software Foundation; either version 2 of the License, or
138
6
# (at your option) any later version.
139
7
#
140
8
# This program is distributed in the hope that it will be useful,
141
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
142
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
143
11
# GNU General Public License for more details.
144
12
#
145
13
# You should have received a copy of the GNU General Public License
146
14
# along with this program; if not, write to the Free Software
147
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
148
16
149
17
"""Definition of a class that is similar to Set with some small changes."""
150
18
151
19
cdef extern from "python-compat.h":
152
20
    pass
153
21
154
22
cdef extern from "Python.h":
155
23
    ctypedef unsigned long size_t
156
24
    ctypedef long (*hashfunc)(PyObject*)
157
25
    ctypedef PyObject *(*richcmpfunc)(PyObject *, PyObject *, int)
158
26
    ctypedef int (*visitproc)(PyObject *, void *)
159
27
    ctypedef int (*traverseproc)(PyObject *, visitproc, void *)
160
28
    int Py_EQ
161
29
    PyObject *Py_True
162
30
    PyObject *Py_NotImplemented
163
31
    void Py_INCREF(PyObject *)
164
32
    void Py_DECREF(PyObject *)
165
33
    ctypedef struct PyTypeObject:
166
34
        hashfunc tp_hash
167
35
        richcmpfunc tp_richcompare
168
36
        traverseproc tp_traverse
169
37
170
38
    PyTypeObject *Py_TYPE(PyObject *)
171
39
    int PyObject_IsTrue(PyObject *)
172
40
        
173
41
    void *PyMem_Malloc(size_t nbytes)
174
42
    void PyMem_Free(void *)
175
43
    void memset(void *, int, size_t)
176
44
177
45
178
46
# Dummy is an object used to mark nodes that have been deleted. Since
179
47
# collisions require us to move a node to an alternative location, if we just
180
48
# set an entry to NULL on delete, we won't find any relocated nodes.
181
49
# We have to use _dummy_obj because we need to keep a refcount to it, but we
182
50
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
183
51
# over the code base.
184
52
cdef object _dummy_obj
185
53
cdef PyObject *_dummy
186
54
_dummy_obj = object()
187
55
_dummy = <PyObject *>_dummy_obj
188
56
189
57
190
58
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1:
191
59
    cdef long other_hash
192
60
    cdef PyObject *res
193
61
194
62
    if this == other:
195
63
        return 1
196
64
    other_hash = Py_TYPE(other).tp_hash(other)
197
65
    if other_hash == -1:
198
66
        # Even though other successfully hashed in the past, it seems to have
199
67
        # changed its mind, and failed this time, so propogate the failure.
200
68
        return -1
201
69
    if other_hash != this_hash:
202
70
        return 0
203
71
204
72
    # This implements a subset of the PyObject_RichCompareBool functionality.
205
73
    # Namely it:
206
74
    #   1) Doesn't try to do anything with old-style classes
207
75
    #   2) Assumes that both objects have a tp_richcompare implementation, and
208
76
    #      that if that is not enough to compare equal, then they are not
209
77
    #      equal. (It doesn't try to cast them both to some intermediate form
210
78
    #      that would compare equal.)
211
79
    res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
212
80
    if res == NULL: # Exception
213
81
        return -1
214
82
    if PyObject_IsTrue(res):
215
83
        Py_DECREF(res)
216
84
        return 1
217
85
    if res == Py_NotImplemented:
218
86
        Py_DECREF(res)
219
87
        res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
220
88
    if res == NULL:
221
89
        return -1
222
90
    if PyObject_IsTrue(res):
223
91
        Py_DECREF(res)
224
92
        return 1
225
93
    Py_DECREF(res)
226
94
    return 0
227
95
228
96
229
97
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
230
98
    """This class can be used to track canonical forms for objects.
231
99
232
100
    It is similar in function to the interned dictionary that is used by
233
101
    strings. However:
234
102
235
103
      1) It assumes that hash(obj) is cheap, so does not need to inline a copy
236
104
         of it
237
105
      2) It only stores one reference to the object, rather than 2 (key vs
238
106
         key:value)
239
107
240
108
    As such, it uses 1/3rd the amount of memory to store a pointer to the
241
109
    interned object.
242
110
    """
243
111
    # Attributes are defined in the .pxd file
244
112
    DEF DEFAULT_SIZE=1024
245
113
246
114
    def __init__(self):
247
115
        cdef Py_ssize_t size, n_bytes
248
116
249
117
        size = DEFAULT_SIZE
250
118
        self._mask = size - 1
251
119
        self._used = 0
252
120
        self._fill = 0
253
121
        n_bytes = sizeof(PyObject*) * size;
254
122
        self._table = <PyObject **>PyMem_Malloc(n_bytes)
255
123
        if self._table == NULL:
256
124
            raise MemoryError()
257
125
        memset(self._table, 0, n_bytes)
258
126
259
127
    def __dealloc__(self):
260
128
        if self._table != NULL:
261
129
            PyMem_Free(self._table)
262
130
            self._table = NULL
263
131
264
132
    property used:
265
133
        def __get__(self):
266
134
            return self._used
267
135
268
136
    property fill:
269
137
        def __get__(self):
270
138
            return self._fill
271
139
272
140
    property mask:
273
141
        def __get__(self):
274
142
            return self._mask
275
143
276
144
    def _memory_size(self):
277
145
        """Return the number of bytes of memory consumed by this class."""
278
146
        return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1))
279
147
280
148
    def __len__(self):
281
149
        return self._used
282
150
283
151
    def _test_lookup(self, key):
284
152
        cdef PyObject **slot
285
153
286
154
        slot = _lookup(self, key)
287
155
        if slot[0] == NULL:
288
156
            res = '<null>'
289
157
        elif slot[0] == _dummy:
290
158
            res = '<dummy>'
291
159
        else:
292
160
            res = <object>slot[0]
293
161
        return <int>(slot - self._table), res
294
162
295
163
    def __contains__(self, key):
296
164
        """Is key present in this SimpleSet."""
297
165
        cdef PyObject **slot
298
166
299
167
        slot = _lookup(self, key)
300
168
        if slot[0] == NULL or slot[0] == _dummy:
301
169
            return False
302
170
        return True
303
171
304
172
    cdef PyObject *_get(self, object key) except? NULL:
305
173
        """Return the object (or nothing) define at the given location."""
306
174
        cdef PyObject **slot
307
175
308
176
        slot = _lookup(self, key)
309
177
        if slot[0] == NULL or slot[0] == _dummy:
310
178
            return NULL
311
179
        return slot[0]
312
180
313
181
    def __getitem__(self, key):
314
182
        """Return a stored item that is equivalent to key."""
315
183
        cdef PyObject *py_val
316
184
317
185
        py_val = self._get(key)
318
186
        if py_val == NULL:
319
187
            raise KeyError("Key %s is not present" % key)
320
188
        val = <object>(py_val)
321
189
        return val
322
190
323
191
    cdef int _insert_clean(self, PyObject *key) except -1:
324
192
        """Insert a key into self.table.
325
193
326
194
        This is only meant to be used during times like '_resize',
327
195
        as it makes a lot of assuptions about keys not already being present,
328
196
        and there being no dummy entries.
329
197
        """
330
198
        cdef size_t i, n_lookup
331
199
        cdef long the_hash
332
200
        cdef PyObject **table, **slot
333
201
        cdef Py_ssize_t mask
334
202
335
203
        mask = self._mask
336
204
        table = self._table
337
205
338
206
        the_hash = Py_TYPE(key).tp_hash(key)
339
207
        if the_hash == -1:
340
208
            return -1
341
209
        i = the_hash
342
210
        for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
343
211
            slot = &table[i & mask]
344
212
            if slot[0] == NULL:
345
213
                slot[0] = key
346
214
                self._fill = self._fill + 1
347
215
                self._used = self._used + 1
348
216
                return 1
349
217
            i = i + 1 + n_lookup
350
218
        raise RuntimeError('ran out of slots.')
351
219
352
220
    def _py_resize(self, min_used):
353
221
        """Do not use this directly, it is only exposed for testing."""
354
222
        return self._resize(min_used)
355
223
356
224
    cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
357
225
        """Resize the internal table.
358
226
359
227
        The final table will be big enough to hold at least min_used entries.
360
228
        We will copy the data from the existing table over, leaving out dummy
361
229
        entries.
362
230
363
231
        :return: The new size of the internal table
364
232
        """
365
233
        cdef Py_ssize_t new_size, n_bytes, remaining
366
234
        cdef PyObject **new_table, **old_table, **slot
367
235
368
236
        new_size = DEFAULT_SIZE
369
237
        while new_size <= min_used and new_size > 0:
370
238
            new_size = new_size << 1
371
239
        # We rolled over our signed size field
372
240
        if new_size <= 0:
373
241
            raise MemoryError()
374
242
        # Even if min_used == self._mask + 1, and we aren't changing the actual
375
243
        # size, we will still run the algorithm so that dummy entries are
376
244
        # removed
377
245
        # TODO: Test this
378
246
        # if new_size < self._used:
379
247
        #     raise RuntimeError('cannot shrink SimpleSet to something'
380
248
        #                        ' smaller than the number of used slots.')
381
249
        n_bytes = sizeof(PyObject*) * new_size;
382
250
        new_table = <PyObject **>PyMem_Malloc(n_bytes)
383
251
        if new_table == NULL:
384
252
            raise MemoryError()
385
253
386
254
        old_table = self._table
387
255
        self._table = new_table
388
256
        memset(self._table, 0, n_bytes)
389
257
        self._mask = new_size - 1
390
258
        self._used = 0
391
259
        remaining = self._fill
392
260
        self._fill = 0
393
261
394
262
        # Moving everything to the other table is refcount neutral, so we don't
395
263
        # worry about it.
396
264
        slot = old_table
397
265
        while remaining > 0:
398
266
            if slot[0] == NULL: # unused slot
399
267
                pass 
400
268
            elif slot[0] == _dummy: # dummy slot
401
269
                remaining = remaining - 1
402
270
            else: # active slot
403
271
                remaining = remaining - 1
404
272
                self._insert_clean(slot[0])
405
273
            slot = slot + 1
406
274
        PyMem_Free(old_table)
407
275
        return new_size
408
276
409
277
    def add(self, key):
410
278
        """Similar to set.add(), start tracking this key.
411
279
        
412
280
        There is one small difference, which is that we return the object that
413
281
        is stored at the given location. (which is closer to the
414
282
        dict.setdefault() functionality.)
415
283
        """
416
284
        return self._add(key)
417
285
418
286
    cdef object _add(self, key):
419
287
        cdef PyObject **slot, *py_key
420
288
        cdef int added
421
289
422
290
        py_key = <PyObject *>key
423
291
        if (Py_TYPE(py_key).tp_richcompare == NULL
424
292
            or Py_TYPE(py_key).tp_hash == NULL):
425
293
            raise TypeError('Types added to SimpleSet must implement'
426
294
                            ' both tp_richcompare and tp_hash')
427
295
        added = 0
428
296
        # We need at least one empty slot
429
297
        assert self._used < self._mask
430
298
        slot = _lookup(self, key)
431
299
        if (slot[0] == NULL):
432
300
            Py_INCREF(py_key)
433
301
            self._fill = self._fill + 1
434
302
            self._used = self._used + 1
435
303
            slot[0] = py_key
436
304
            added = 1
437
305
        elif (slot[0] == _dummy):
438
306
            Py_INCREF(py_key)
439
307
            self._used = self._used + 1
440
308
            slot[0] = py_key
441
309
            added = 1
442
310
        # No else: clause. If _lookup returns a pointer to
443
311
        # a live object, then we already have a value at this location.
444
312
        retval = <object>(slot[0])
445
313
        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
446
314
        if added and (self._fill * 3) >= ((self._mask + 1) * 2):
447
315
            # However, we always work for a load factor of 2:1
448
316
            self._resize(self._used * 2)
449
317
        # Even if we resized and ended up moving retval into a different slot,
450
318
        # it is still the value that is held at the slot equivalent to 'key',
451
319
        # so we can still return it
452
320
        return retval
453
321
454
322
    def discard(self, key):
455
323
        """Remove key from the set, whether it exists or not.
456
324
457
325
        :return: False if the item did not exist, True if it did
458
326
        """
459
327
        if self._discard(key):
460
328
            return True
461
329
        return False
462
330
463
331
    cdef int _discard(self, key) except -1:
464
332
        cdef PyObject **slot, *py_key
465
333
466
334
        slot = _lookup(self, key)
467
335
        if slot[0] == NULL or slot[0] == _dummy:
468
336
            return 0
469
337
        self._used = self._used - 1
470
338
        Py_DECREF(slot[0])
471
339
        slot[0] = _dummy
472
340
        # PySet uses the heuristic: If more than 1/5 are dummies, then resize
473
341
        #                           them away
474
342
        #   if ((so->_fill - so->_used) * 5 < so->mask)
475
343
        # However, we are planning on using this as an interning structure, in
476
344
        # which we will be putting a lot of objects. And we expect that large
477
345
        # groups of them are going to have the same lifetime.
478
346
        # Dummy entries hurt a little bit because they cause the lookup to keep
479
347
        # searching, but resizing is also rather expensive
480
348
        # For now, we'll just use their algorithm, but we may want to revisit
481
349
        # it
482
350
        if ((self._fill - self._used) * 5 > self._mask):
483
351
            self._resize(self._used * 2)
484
352
        return 1
485
353
486
354
    def __iter__(self):
487
355
        return _SimpleSet_iterator(self)
488
356
489
357
490
358
cdef class _SimpleSet_iterator:
491
359
    """Iterator over the SimpleSet structure."""
492
360
493
361
    cdef Py_ssize_t pos
494
362
    cdef SimpleSet set
495
363
    cdef Py_ssize_t _used # track if things have been mutated while iterating
496
364
    cdef Py_ssize_t len # number of entries left
497
365
498
366
    def __init__(self, obj):
499
367
        self.set = obj
500
368
        self.pos = 0
501
369
        self._used = self.set._used
502
370
        self.len = self.set._used
503
371
504
372
    def __iter__(self):
505
373
        return self
506
374
507
375
    def __next__(self):
508
376
        cdef Py_ssize_t mask, i
509
377
        cdef PyObject *key
510
378
511
379
        if self.set is None:
512
380
            raise StopIteration
513
381
        if self.set._used != self._used:
514
382
            # Force this exception to continue to be raised
515
383
            self._used = -1
516
384
            raise RuntimeError("Set size changed during iteration")
517
385
        if not SimpleSet_Next(self.set, &self.pos, &key):
518
386
            self.set = None
519
387
            raise StopIteration
520
388
        # we found something
521
389
        the_key = <object>key # INCREF
522
390
        self.len = self.len - 1
523
391
        return the_key
524
392
525
393
    def __length_hint__(self):
526
394
        if self.set is not None and self._used == self.set._used:
527
395
            return self.len
528
396
        return 0
529
397
    
530
398
531
399
532
400
cdef api SimpleSet SimpleSet_New():
533
401
    """Create a new SimpleSet object."""
534
402
    return SimpleSet()
535
403
536
404
537
405
cdef SimpleSet _check_self(object self):
538
406
    """Check that the parameter is not None.
539
407
540
408
    Pyrex/Cython will do type checking, but only to ensure that an object is
541
409
    either the right type or None. You can say "object foo not None" for pure
542
410
    python functions, but not for C functions.
543
411
    So this is just a helper for all the apis that need to do the check.
544
412
    """
545
413
    cdef SimpleSet true_self
546
414
    if self is None:
547
415
        raise TypeError('self must not be None')
548
416
    true_self = self
549
417
    return true_self
550
418
551
419
552
420
cdef PyObject **_lookup(SimpleSet self, object key) except NULL:
553
421
    """Find the slot where 'key' would fit.
554
422
555
423
    This is the same as a dicts 'lookup' function.
556
424
557
425
    :param key: An object we are looking up
558
426
    :param hash: The hash for key
559
427
    :return: The location in self.table where key should be put.
560
428
        location == NULL is an exception, but (*location) == NULL just
561
429
        indicates the slot is empty and can be used.
562
430
    """
563
431
    # This uses Quadratic Probing:
564
432
    #  http://en.wikipedia.org/wiki/Quadratic_probing
565
433
    # with c1 = c2 = 1/2
566
434
    # This leads to probe locations at:
567
435
    #  h0 = hash(k1)
568
436
    #  h1 = h0 + 1
569
437
    #  h2 = h0 + 3 = h1 + 1 + 1
570
438
    #  h3 = h0 + 6 = h2 + 1 + 2
571
439
    #  h4 = h0 + 10 = h2 + 1 + 3
572
440
    # Note that all of these are '& mask', but that is computed *after* the
573
441
    # offset.
574
442
    # This differs from the algorithm used by Set and Dict. Which, effectively,
575
443
    # use double-hashing, and a step size that starts large, but dwindles to
576
444
    # stepping one-by-one.
577
445
    # This gives more 'locality' in that if you have a collision at offset X,
578
446
    # the first fallback is X+1, which is fast to check. However, that means
579
447
    # that an object w/ hash X+1 will also check there, and then X+2 next.
580
448
    # However, for objects with differing hashes, their chains are different.
581
449
    # The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
582
450
    # So different hashes diverge quickly.
583
451
    # A bigger problem is that we *only* ever use the lowest bits of the hash
584
452
    # So all integers (x + SIZE*N) will resolve into the same bucket, and all
585
453
    # use the same collision resolution. We may want to try to find a way to
586
454
    # incorporate the upper bits of the hash with quadratic probing. (For
587
455
    # example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
588
456
    cdef size_t i, n_lookup
589
457
    cdef Py_ssize_t mask
590
458
    cdef long key_hash
591
459
    cdef PyObject **table, **slot, *cur, **free_slot, *py_key
592
460
593
461
    # hash is a signed long(), we are using an offset at unsigned size_t
594
462
    key_hash = hash(key)
595
463
    i = <size_t>key_hash
596
464
    mask = self._mask
597
465
    table = self._table
598
466
    free_slot = NULL
599
467
    py_key = <PyObject *>key
600
468
    for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
601
469
        slot = &table[i & mask]
602
470
        cur = slot[0]
603
471
        if cur == NULL:
604
472
            # Found a blank spot
605
473
            if free_slot != NULL:
606
474
                # Did we find an earlier _dummy entry?
607
475
                return free_slot
608
476
            else:
609
477
                return slot
610
478
        if cur == py_key:
611
479
            # Found an exact pointer to the key
612
480
            return slot
613
481
        if cur == _dummy:
614
482
            if free_slot == NULL:
615
483
                free_slot = slot
616
484
        elif _is_equal(py_key, key_hash, cur):
617
485
            # Both py_key and cur belong in this slot, return it
618
486
            return slot
619
487
        i = i + 1 + n_lookup
620
488
    raise AssertionError('should never get here')
621
489
622
490
623
491
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL:
624
492
    """Find the slot where 'key' would fit.
625
493
626
494
    This is the same as a dicts 'lookup' function. This is a private
627
495
    api because mutating what you get without maintaing the other invariants
628
496
    is a 'bad thing'.
629
497
630
498
    :param key: An object we are looking up
631
499
    :param hash: The hash for key
632
500
    :return: The location in self._table where key should be put
633
501
        should never be NULL, but may reference a NULL (PyObject*)
634
502
    """
635
503
    return _lookup(_check_self(self), key)
636
504
637
505
638
506
cdef api object SimpleSet_Add(object self, object key):
639
507
    """Add a key to the SimpleSet (set).
640
508
641
509
    :param self: The SimpleSet to add the key to.
642
510
    :param key: The key to be added. If the key is already present,
643
511
        self will not be modified
644
512
    :return: The current key stored at the location defined by 'key'.
645
513
        This may be the same object, or it may be an equivalent object.
646
514
        (consider dict.setdefault(key, key))
647
515
    """
648
516
    return _check_self(self)._add(key)
649
517
    
650
518
651
519
cdef api int SimpleSet_Contains(object self, object key) except -1:
652
520
    """Is key present in self?"""
653
521
    return (key in _check_self(self))
654
522
655
523
656
524
cdef api int SimpleSet_Discard(object self, object key) except -1:
657
525
    """Remove the object referenced at location 'key'.
658
526
659
527
    :param self: The SimpleSet being modified
660
528
    :param key: The key we are checking on
661
529
    :return: 1 if there was an object present, 0 if there was not, and -1 on
662
530
        error.
663
531
    """
664
532
    return _check_self(self)._discard(key)
665
533
666
534
667
535
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
668
536
    """Get a pointer to the object present at location 'key'.
669
537
670
538
    This returns an object which is equal to key which was previously added to
671
539
    self. This returns a borrowed reference, as it may also return NULL if no
672
540
    value is present at that location.
673
541
674
542
    :param key: The value we are looking for
675
543
    :return: The object present at that location
676
544
    """
677
545
    return _check_self(self)._get(key)
678
546
679
547
680
548
cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
681
549
    """Get the number of active entries in 'self'"""
682
550
    return _check_self(self)._used
683
551
684
552
685
553
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key):
686
554
    """Walk over items in a SimpleSet.
687
555
688
556
    :param pos: should be initialized to 0 by the caller, and will be updated
689
557
        by this function
690
558
    :param key: Will return a borrowed reference to key
691
559
    :return: 0 if nothing left, 1 if we are returning a new value
692
560
    """
693
561
    cdef Py_ssize_t i, mask
694
562
    cdef SimpleSet true_self
695
563
    cdef PyObject **table
696
564
    true_self = _check_self(self)
697
565
    i = pos[0]
698
566
    if (i < 0):
699
567
        return 0
700
568
    mask = true_self._mask
701
569
    table= true_self._table
702
570
    while (i <= mask and (table[i] == NULL or table[i] == _dummy)):
703
571
        i = i + 1
704
572
    pos[0] = i + 1
705
573
    if (i > mask):
706
574
        return 0 # All done
707
575
    if (key != NULL):
708
576
        key[0] = table[i]
709
577
    return 1
710
578
711
579
712
580
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit, void *arg):
713
581
    """This is an implementation of 'tp_traverse' that hits the whole table.
714
582
715
583
    Cython/Pyrex don't seem to let you define a tp_traverse, and they only
716
584
    define one for you if you have an 'object' attribute. Since they don't
717
585
    support C arrays of objects, we access the PyObject * directly.
718
586
    """
719
587
    cdef Py_ssize_t pos
720
588
    cdef PyObject *next_key
721
589
    cdef int ret
722
590
723
591
    pos = 0
724
592
    while SimpleSet_Next(self, &pos, &next_key):
725
593
        ret = visit(next_key, arg)
726
594
        if ret:
727
595
            return ret
728
596
    return 0
729
597
730
598
# It is a little bit ugly to do this, but it works, and means that Meliae can
731
599
# dump the total memory consumed by all child objects.
732
600
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse
733
0
601
734
=== modified file 'bzrlib/python-compat.h'
735
--- bzrlib/python-compat.h	2009-06-10 03:56:49 +0000
736
+++ bzrlib/python-compat.h	2009-10-12 16:51:14 +0000
737
@@ -73,4 +73,9 @@
738
73
#define  snprintf  _snprintf
73
#define  snprintf  _snprintf
739
74
#endif
74
#endif
740
75
75
741
76
/* Introduced in Python 2.6 */
742
77
#ifndef Py_TYPE
743
78
#  define Py_TYPE(o) ((o)->ob_type)
744
79
#endif
745
80
746
76
#endif /* _BZR_PYTHON_COMPAT_H */
81
#endif /* _BZR_PYTHON_COMPAT_H */
747
77
82
748
=== modified file 'bzrlib/tests/__init__.py'
749
--- bzrlib/tests/__init__.py	2009-10-08 01:50:30 +0000
750
+++ bzrlib/tests/__init__.py	2009-10-12 16:51:14 +0000
751
@@ -3688,6 +3688,7 @@
752
3688
        'bzrlib.tests.test__groupcompress',
3688
        'bzrlib.tests.test__groupcompress',
753
3689
        'bzrlib.tests.test__known_graph',
3689
        'bzrlib.tests.test__known_graph',
754
3690
        'bzrlib.tests.test__rio',
3690
        'bzrlib.tests.test__rio',
755
3691
        'bzrlib.tests.test__simple_set',
756
3691
        'bzrlib.tests.test__walkdirs_win32',
3692
        'bzrlib.tests.test__walkdirs_win32',
757
3692
        'bzrlib.tests.test_ancestry',
3693
        'bzrlib.tests.test_ancestry',
758
3693
        'bzrlib.tests.test_annotate',
3694
        'bzrlib.tests.test_annotate',
759
3694
3695
760
=== added file 'bzrlib/tests/test__simple_set.py'
761
--- bzrlib/tests/test__simple_set.py	1970-01-01 00:00:00 +0000
762
+++ bzrlib/tests/test__simple_set.py	2009-10-12 16:51:14 +0000
763
@@ -0,0 +1,371 @@
764
1
# Copyright (C) 2009 Canonical Ltd
765
2
#
766
3
# This program is free software; you can redistribute it and/or modify
767
4
# it under the terms of the GNU General Public License as published by
768
5
# the Free Software Foundation; either version 2 of the License, or
769
6
# (at your option) any later version.
770
7
#
771
8
# This program is distributed in the hope that it will be useful,
772
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
773
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
774
11
# GNU General Public License for more details.
775
12
#
776
13
# You should have received a copy of the GNU General Public License
777
14
# along with this program; if not, write to the Free Software
778
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
779
16
780
17
"""Tests for the StaticTupleInterned type."""
781
18
782
19
import sys
783
20
784
21
from bzrlib import (
785
22
    errors,
786
23
    osutils,
787
24
    tests,
788
25
    )
789
26
790
27
try:
791
28
    from bzrlib import _simple_set_pyx
792
29
except ImportError:
793
30
    _simple_set_pyx = None
794
31
795
32
796
33
class _Hashable(object):
797
34
    """A simple object which has a fixed hash value.
798
35
799
36
    We could have used an 'int', but it turns out that Int objects don't
800
37
    implement tp_richcompare...
801
38
    """
802
39
803
40
    def __init__(self, the_hash):
804
41
        self.hash = the_hash
805
42
806
43
    def __hash__(self):
807
44
        return self.hash
808
45
809
46
    def __eq__(self, other):
810
47
        if not isinstance(other, _Hashable):
811
48
            return NotImplemented
812
49
        return other.hash == self.hash
813
50
814
51
815
52
class _BadSecondHash(_Hashable):
816
53
817
54
    def __init__(self, the_hash):
818
55
        _Hashable.__init__(self, the_hash)
819
56
        self._first = True
820
57
821
58
    def __hash__(self):
822
59
        if self._first:
823
60
            self._first = False
824
61
            return self.hash
825
62
        else:
826
63
            raise ValueError('I can only be hashed once.')
827
64
828
65
829
66
class _BadCompare(_Hashable):
830
67
831
68
    def __eq__(self, other):
832
69
        raise RuntimeError('I refuse to play nice')
833
70
834
71
835
72
# Even though this is an extension, we don't permute the tests for a python
836
73
# version. As the plain python version is just a dict or set
837
74
838
75
class _CompiledSimpleSet(tests.Feature):
839
76
840
77
    def _probe(self):
841
78
        if _simple_set_pyx is None:
842
79
            return False
843
80
        return True
844
81
845
82
    def feature_name(self):
846
83
        return 'bzrlib._simple_set_pyx'
847
84
848
85
CompiledSimpleSet = _CompiledSimpleSet()
849
86
850
87
851
88
class TestSimpleSet(tests.TestCase):
852
89
853
90
    _test_needs_features = [CompiledSimpleSet]
854
91
    module = _simple_set_pyx
855
92
856
93
    def assertIn(self, obj, container):
857
94
        self.assertTrue(obj in container,
858
95
            '%s not found in %s' % (obj, container))
859
96
860
97
    def assertNotIn(self, obj, container):
861
98
        self.assertTrue(obj not in container,
862
99
            'We found %s in %s' % (obj, container))
863
100
864
101
    def assertFillState(self, used, fill, mask, obj):
865
102
        self.assertEqual((used, fill, mask), (obj.used, obj.fill, obj.mask))
866
103
867
104
    def assertLookup(self, offset, value, obj, key):
868
105
        self.assertEqual((offset, value), obj._test_lookup(key))
869
106
870
107
    def assertRefcount(self, count, obj):
871
108
        """Assert that the refcount for obj is what we expect.
872
109
873
110
        Note that this automatically adjusts for the fact that calling
874
111
        assertRefcount actually creates a new pointer, as does calling
875
112
        sys.getrefcount. So pass the expected value *before* the call.
876
113
        """
877
114
        # I'm not sure why the offset is 3, but I've check that in the caller,
878
115
        # an offset of 1 works, which is expected. Not sure why assertRefcount
879
116
        # is incrementing/decrementing 2 times
880
117
        self.assertEqual(count, sys.getrefcount(obj)-3)
881
118
882
119
    def test_initial(self):
883
120
        obj = self.module.SimpleSet()
884
121
        self.assertEqual(0, len(obj))
885
122
        st = ('foo', 'bar')
886
123
        self.assertFillState(0, 0, 0x3ff, obj)
887
124
888
125
    def test__lookup(self):
889
126
        # These are carefully chosen integers to force hash collisions in the
890
127
        # algorithm, based on the initial set size of 1024
891
128
        obj = self.module.SimpleSet()
892
129
        self.assertLookup(643, '<null>', obj, _Hashable(643))
893
130
        self.assertLookup(643, '<null>', obj, _Hashable(643 + 1024))
894
131
        self.assertLookup(643, '<null>', obj, _Hashable(643 + 50*1024))
895
132
896
133
    def test__lookup_collision(self):
897
134
        obj = self.module.SimpleSet()
898
135
        k1 = _Hashable(643)
899
136
        k2 = _Hashable(643 + 1024)
900
137
        self.assertLookup(643, '<null>', obj, k1)
901
138
        self.assertLookup(643, '<null>', obj, k2)
902
139
        obj.add(k1)
903
140
        self.assertLookup(643, k1, obj, k1)
904
141
        self.assertLookup(644, '<null>', obj, k2)
905
142
906
143
    def test__lookup_after_resize(self):
907
144
        obj = self.module.SimpleSet()
908
145
        k1 = _Hashable(643)
909
146
        k2 = _Hashable(643 + 1024)
910
147
        obj.add(k1)
911
148
        obj.add(k2)
912
149
        self.assertLookup(643, k1, obj, k1)
913
150
        self.assertLookup(644, k2, obj, k2)
914
151
        obj._py_resize(2047) # resized to 2048
915
152
        self.assertEqual(2048, obj.mask + 1)
916
153
        self.assertLookup(643, k1, obj, k1)
917
154
        self.assertLookup(643+1024, k2, obj, k2)
918
155
        obj._py_resize(1023) # resized back to 1024
919
156
        self.assertEqual(1024, obj.mask + 1)
920
157
        self.assertLookup(643, k1, obj, k1)
921
158
        self.assertLookup(644, k2, obj, k2)
922
159
923
160
    def test_get_set_del_with_collisions(self):
924
161
        obj = self.module.SimpleSet()
925
162
926
163
        h1 = 643
927
164
        h2 = 643 + 1024
928
165
        h3 = 643 + 1024*50
929
166
        h4 = 643 + 1024*25
930
167
        h5 = 644
931
168
        h6 = 644 + 1024
932
169
933
170
        k1 = _Hashable(h1)
934
171
        k2 = _Hashable(h2)
935
172
        k3 = _Hashable(h3)
936
173
        k4 = _Hashable(h4)
937
174
        k5 = _Hashable(h5)
938
175
        k6 = _Hashable(h6)
939
176
        self.assertLookup(643, '<null>', obj, k1)
940
177
        self.assertLookup(643, '<null>', obj, k2)
941
178
        self.assertLookup(643, '<null>', obj, k3)
942
179
        self.assertLookup(643, '<null>', obj, k4)
943
180
        self.assertLookup(644, '<null>', obj, k5)
944
181
        self.assertLookup(644, '<null>', obj, k6)
945
182
        obj.add(k1)
946
183
        self.assertIn(k1, obj)
947
184
        self.assertNotIn(k2, obj)
948
185
        self.assertNotIn(k3, obj)
949
186
        self.assertNotIn(k4, obj)
950
187
        self.assertLookup(643, k1, obj, k1)
951
188
        self.assertLookup(644, '<null>', obj, k2)
952
189
        self.assertLookup(644, '<null>', obj, k3)
953
190
        self.assertLookup(644, '<null>', obj, k4)
954
191
        self.assertLookup(644, '<null>', obj, k5)
955
192
        self.assertLookup(644, '<null>', obj, k6)
956
193
        self.assertIs(k1, obj[k1])
957
194
        self.assertIs(k2, obj.add(k2))
958
195
        self.assertIs(k2, obj[k2])
959
196
        self.assertLookup(643, k1, obj, k1)
960
197
        self.assertLookup(644, k2, obj, k2)
961
198
        self.assertLookup(646, '<null>', obj, k3)
962
199
        self.assertLookup(646, '<null>', obj, k4)
963
200
        self.assertLookup(645, '<null>', obj, k5)
964
201
        self.assertLookup(645, '<null>', obj, k6)
965
202
        self.assertLookup(643, k1, obj, _Hashable(h1))
966
203
        self.assertLookup(644, k2, obj, _Hashable(h2))
967
204
        self.assertLookup(646, '<null>', obj, _Hashable(h3))
968
205
        self.assertLookup(646, '<null>', obj, _Hashable(h4))
969
206
        self.assertLookup(645, '<null>', obj, _Hashable(h5))
970
207
        self.assertLookup(645, '<null>', obj, _Hashable(h6))
971
208
        obj.add(k3)
972
209
        self.assertIs(k3, obj[k3])
973
210
        self.assertIn(k1, obj)
974
211
        self.assertIn(k2, obj)
975
212
        self.assertIn(k3, obj)
976
213
        self.assertNotIn(k4, obj)
977
214
978
215
        obj.discard(k1)
979
216
        self.assertLookup(643, '<dummy>', obj, k1)
980
217
        self.assertLookup(644, k2, obj, k2)
981
218
        self.assertLookup(646, k3, obj, k3)
982
219
        self.assertLookup(643, '<dummy>', obj, k4)
983
220
        self.assertNotIn(k1, obj)
984
221
        self.assertIn(k2, obj)
985
222
        self.assertIn(k3, obj)
986
223
        self.assertNotIn(k4, obj)
987
224
988
225
    def test_add(self):
989
226
        obj = self.module.SimpleSet()
990
227
        self.assertFillState(0, 0, 0x3ff, obj)
991
228
        # We use this clumsy notation, because otherwise the refcounts are off.
992
229
        # I'm guessing the python compiler sees it is a static tuple, and adds
993
230
        # it to the function variables, or somesuch
994
231
        k1 = tuple(['foo'])
995
232
        self.assertRefcount(1, k1)
996
233
        self.assertIs(k1, obj.add(k1))
997
234
        self.assertFillState(1, 1, 0x3ff, obj)
998
235
        self.assertRefcount(2, k1)
999
236
        ktest = obj[k1]
1000
237
        self.assertRefcount(3, k1)
1001
238
        self.assertIs(k1, ktest)
1002
239
        del ktest
1003
240
        self.assertRefcount(2, k1)
1004
241
        k2 = tuple(['foo'])
1005
242
        self.assertRefcount(1, k2)
1006
243
        self.assertIsNot(k1, k2)
1007
244
        # doesn't add anything, so the counters shouldn't be adjusted
1008
245
        self.assertIs(k1, obj.add(k2))
1009
246
        self.assertFillState(1, 1, 0x3ff, obj)
1010
247
        self.assertRefcount(2, k1) # not changed
1011
248
        self.assertRefcount(1, k2) # not incremented
1012
249
        self.assertIs(k1, obj[k1])
1013
250
        self.assertIs(k1, obj[k2])
1014
251
        self.assertRefcount(2, k1)
1015
252
        self.assertRefcount(1, k2)
1016
253
        # Deleting an entry should remove the fill, but not the used
1017
254
        obj.discard(k1)
1018
255
        self.assertFillState(0, 1, 0x3ff, obj)
1019
256
        self.assertRefcount(1, k1)
1020
257
        k3 = tuple(['bar'])
1021
258
        self.assertRefcount(1, k3)
1022
259
        self.assertIs(k3, obj.add(k3))
1023
260
        self.assertFillState(1, 2, 0x3ff, obj)
1024
261
        self.assertRefcount(2, k3)
1025
262
        self.assertIs(k2, obj.add(k2))
1026
263
        self.assertFillState(2, 2, 0x3ff, obj)
1027
264
        self.assertRefcount(1, k1)
1028
265
        self.assertRefcount(2, k2)
1029
266
        self.assertRefcount(2, k3)
1030
267
1031
268
    def test_discard(self):
1032
269
        obj = self.module.SimpleSet()
1033
270
        k1 = tuple(['foo'])
1034
271
        k2 = tuple(['foo'])
1035
272
        k3 = tuple(['bar'])
1036
273
        self.assertRefcount(1, k1)
1037
274
        self.assertRefcount(1, k2)
1038
275
        self.assertRefcount(1, k3)
1039
276
        obj.add(k1)
1040
277
        self.assertRefcount(2, k1)
1041
278
        self.assertEqual(0, obj.discard(k3))
1042
279
        self.assertRefcount(1, k3)
1043
280
        obj.add(k3)
1044
281
        self.assertRefcount(2, k3)
1045
282
        self.assertEqual(1, obj.discard(k3))
1046
283
        self.assertRefcount(1, k3)
1047
284
1048
285
    def test__resize(self):
1049
286
        obj = self.module.SimpleSet()
1050
287
        k1 = ('foo',)
1051
288
        k2 = ('bar',)
1052
289
        k3 = ('baz',)
1053
290
        obj.add(k1)
1054
291
        obj.add(k2)
1055
292
        obj.add(k3)
1056
293
        obj.discard(k2)
1057
294
        self.assertFillState(2, 3, 0x3ff, obj)
1058
295
        self.assertEqual(1024, obj._py_resize(500))
1059
296
        # Doesn't change the size, but does change the content
1060
297
        self.assertFillState(2, 2, 0x3ff, obj)
1061
298
        obj.add(k2)
1062
299
        obj.discard(k3)
1063
300
        self.assertFillState(2, 3, 0x3ff, obj)
1064
301
        self.assertEqual(4096, obj._py_resize(4095))
1065
302
        self.assertFillState(2, 2, 0xfff, obj)
1066
303
        self.assertIn(k1, obj)
1067
304
        self.assertIn(k2, obj)
1068
305
        self.assertNotIn(k3, obj)
1069
306
        obj.add(k2)
1070
307
        self.assertIn(k2, obj)
1071
308
        obj.discard(k2)
1072
309
        self.assertEqual((591, '<dummy>'), obj._test_lookup(k2))
1073
310
        self.assertFillState(1, 2, 0xfff, obj)
1074
311
        self.assertEqual(2048, obj._py_resize(1024))
1075
312
        self.assertFillState(1, 1, 0x7ff, obj)
1076
313
        self.assertEqual((591, '<null>'), obj._test_lookup(k2))
1077
314
1078
315
    def test_second_hash_failure(self):
1079
316
        obj = self.module.SimpleSet()
1080
317
        k1 = _BadSecondHash(200)
1081
318
        k2 = _Hashable(200)
1082
319
        # Should only call hash() one time
1083
320
        obj.add(k1)
1084
321
        self.assertFalse(k1._first)
1085
322
        self.assertRaises(ValueError, obj.add, k2)
1086
323
1087
324
    def test_richcompare_failure(self):
1088
325
        obj = self.module.SimpleSet()
1089
326
        k1 = _Hashable(200)
1090
327
        k2 = _BadCompare(200)
1091
328
        obj.add(k1)
1092
329
        # Tries to compare with k1, fails
1093
330
        self.assertRaises(RuntimeError, obj.add, k2)
1094
331
1095
332
    def test_add_and_remove_lots_of_items(self):
1096
333
        obj = self.module.SimpleSet()
1097
334
        chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'
1098
335
        for i in chars:
1099
336
            for j in chars:
1100
337
                k = (i, j)
1101
338
                obj.add(k)
1102
339
        num = len(chars)*len(chars)
1103
340
        self.assertFillState(num, num, 0x1fff, obj)
1104
341
        # Now delete all of the entries and it should shrink again
1105
342
        for i in chars:
1106
343
            for j in chars:
1107
344
                k = (i, j)
1108
345
                obj.discard(k)
1109
346
        # It should be back to 1024 wide mask, though there may still be some
1110
347
        # dummy values in there
1111
348
        self.assertFillState(0, obj.fill, 0x3ff, obj)
1112
349
        # but there should be fewer than 1/5th dummy entries
1113
350
        self.assertTrue(obj.fill < 1024 / 5)
1114
351
1115
352
    def test__iter__(self):
1116
353
        obj = self.module.SimpleSet()
1117
354
        k1 = ('1',)
1118
355
        k2 = ('1', '2')
1119
356
        k3 = ('3', '4')
1120
357
        obj.add(k1)
1121
358
        obj.add(k2)
1122
359
        obj.add(k3)
1123
360
        all = set()
1124
361
        for key in obj:
1125
362
            all.add(key)
1126
363
        self.assertEqual(sorted([k1, k2, k3]), sorted(all))
1127
364
        iterator = iter(obj)
1128
365
        iterator.next()
1129
366
        obj.add(('foo',))
1130
367
        # Set changed size
1131
368
        self.assertRaises(RuntimeError, iterator.next)
1132
369
        # And even removing an item still causes it to fail
1133
370
        obj.discard(k2)
1134
371
        self.assertRaises(RuntimeError, iterator.next)
1135
0
372
1136
=== modified file 'setup.py'
1137
--- setup.py	2009-10-01 03:46:41 +0000
1138
+++ setup.py	2009-10-12 16:51:14 +0000
1139
@@ -300,6 +300,7 @@
1140
300
add_pyrex_extension('bzrlib._chk_map_pyx', libraries=[z_lib])
300
add_pyrex_extension('bzrlib._chk_map_pyx', libraries=[z_lib])
1141
301
ext_modules.append(Extension('bzrlib._patiencediff_c',
301
ext_modules.append(Extension('bzrlib._patiencediff_c',
1142
302
                             ['bzrlib/_patiencediff_c.c']))
302
                             ['bzrlib/_patiencediff_c.c']))
1143
303
add_pyrex_extension('bzrlib._simple_set_pyx')
1144
303
304
1145
304
305
1146
305
if unavailable_files:
306
if unavailable_files:
Status:	Merged
Merged at revision:	not available
Proposed branch:	lp:~jameinel/bzr/2.1-simple-set
Merge into:	lp:bzr
Diff against target:	1146 lines 8 files modified .bzrignore (+3/-0) NEWS (+7/-0) bzrlib/_simple_set_pyx.pxd (+91/-0) bzrlib/_simple_set_pyx.pyx (+600/-0) bzrlib/python-compat.h (+5/-0) bzrlib/tests/__init__.py (+1/-0) bzrlib/tests/test__simple_set.py (+371/-0) setup.py (+1/-0)
To merge this branch:	bzr merge lp:~jameinel/bzr/2.1-simple-set
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
Andrew Bennetts		2009-10-08	Approve on 2009-10-09
Review via email: mp+13039@code.launchpad.net