1
=== modified file 'bzrlib/chk_map.py'
2
--- bzrlib/chk_map.py	2009-06-26 09:24:34 +0000
3
+++ bzrlib/chk_map.py	2009-07-01 22:35:14 +0000
4
@@ -1398,106 +1398,202 @@
5
1398
    return node
1398
    return node
6
1399
1399
7
1400
1400
92
1401
def _find_children_info(store, interesting_keys, uninteresting_keys, pb):
1401
class InterestingNodeIterator(object):
93
1402
    """Read the associated records, and determine what is interesting."""
1402
    """Determine the nodes and items that are 'interesting'
94
1403
    uninteresting_keys = set(uninteresting_keys)
1403
95
1404
    chks_to_read = uninteresting_keys.union(interesting_keys)
1404
    This is defined as being present in the interesting set, but not being
96
1405
    next_uninteresting = set()
1405
    present in the uninteresting set.
97
1406
    next_interesting = set()
1406
    """
98
1407
    next_interesting_intersection = None
1407
99
1408
    uninteresting_items = set()
1408
    def __init__(self, store, interesting_root_keys, uninteresting_root_keys,
100
1409
    interesting_items = set()
1409
                 search_key_func, pb=None):
101
1410
    interesting_to_yield = []
1410
        self._store = store
102
1411
    for record in store.get_record_stream(chks_to_read, 'unordered', True):
1411
        self._interesting_root_keys = interesting_root_keys
103
1412
        # records_read.add(record.key())
1412
        self._uninteresting_root_keys = uninteresting_root_keys
104
1413
        if pb is not None:
1413
        self._pb = pb
105
1414
            pb.tick()
1414
        # All uninteresting chks that we have seen. By the time they are added
106
1415
        bytes = record.get_bytes_as('fulltext')
1415
        # here, they should be either fully ignored, or queued up for
107
1416
        # We don't care about search_key_func for this code, because we only
1416
        # processing
108
1417
        # care about external references.
1417
        self._all_uninteresting_chks = set(self._uninteresting_root_keys)
109
1418
        node = _deserialise(bytes, record.key, search_key_func=None)
1418
        # All items that we have seen from the uninteresting_root_keys
110
1419
        if record.key in uninteresting_keys:
1419
        self._all_uninteresting_items = set()
111
1420
            if type(node) is InternalNode:
1420
        # These are interesting items which were either read, or already in the
112
1421
                next_uninteresting.update(node.refs())
1421
        # interesting queue (so we don't need to walk them again)
113
1422
            else:
1422
        self._processed_interesting_refs = set()
114
1423
                # We know we are at a LeafNode, so we can pass None for the
1423
        self._search_key_func = search_key_func
115
1424
                # store
1424
116
1425
                uninteresting_items.update(node.iteritems(None))
1425
        # The uninteresting and interesting nodes to be searched
117
1426
        else:
1426
        self._uninteresting_queue = []
118
1427
            interesting_to_yield.append(record.key)
1427
        self._interesting_queue = []
119
1428
            if type(node) is InternalNode:
1428
        # Holds the (key, value) items found when processing the root nodes,
120
1429
                if next_interesting_intersection is None:
1429
        # waiting for the uninteresting nodes to be walked
121
1430
                    next_interesting_intersection = set(node.refs())
1430
        self._interesting_item_queue = []
122
1431
                else:
1431
        self._state = None
123
1432
                    next_interesting_intersection = \
1432
124
1433
                        next_interesting_intersection.intersection(node.refs())
1433
    def _read_nodes_from_store(self, keys):
125
1434
                next_interesting.update(node.refs())
1434
        # We chose not to use _page_cache, because we think in terms of records
126
1435
            else:
1435
        # to be yielded. Also, we expect to touch each page only 1 time during
127
1436
                interesting_items.update(node.iteritems(None))
1436
        # this code. (We may want to evaluate saving the raw bytes into the
128
1437
    return (next_uninteresting, uninteresting_items,
1437
        # page cache, which would allow a working tree update after the fetch
129
1438
            next_interesting, interesting_to_yield, interesting_items,
1438
        # to not have to read the bytes again.)
130
1439
            next_interesting_intersection)
1439
        stream = self._store.get_record_stream(keys, 'unordered', True)
131
1440
1440
        for record in stream:
132
1441
1441
            if self._pb is not None:
133
1442
def _find_all_uninteresting(store, interesting_root_keys,
1442
                self._pb.tick()
134
1443
                            uninteresting_root_keys, pb):
1443
            if record.storage_kind == 'absent':
135
1444
    """Determine the full set of uninteresting keys."""
1444
                raise errors.NoSuchRevision(self._store, record.key)
52
1445
    # What about duplicates between interesting_root_keys and
53
1446
    # uninteresting_root_keys?
54
1447
    if not uninteresting_root_keys:
55
1448
        # Shortcut case. We know there is nothing uninteresting to filter out
56
1449
        # So we just let the rest of the algorithm do the work
57
1450
        # We know there is nothing uninteresting, and we didn't have to read
58
1451
        # any interesting records yet.
59
1452
        return (set(), set(), set(interesting_root_keys), [], set())
60
1453
    all_uninteresting_chks = set(uninteresting_root_keys)
61
1454
    all_uninteresting_items = set()
62
1455
63
1456
    # First step, find the direct children of both the interesting and
64
1457
    # uninteresting set
65
1458
    (uninteresting_keys, uninteresting_items,
66
1459
     interesting_keys, interesting_to_yield,
67
1460
     interesting_items, interesting_intersection,
68
1461
     ) = _find_children_info(store, interesting_root_keys,
69
1462
                                              uninteresting_root_keys,
70
1463
                                              pb=pb)
71
1464
    all_uninteresting_chks.update(uninteresting_keys)
72
1465
    all_uninteresting_items.update(uninteresting_items)
73
1466
    del uninteresting_items
74
1467
    # Do not examine in detail pages common to all interesting trees.
75
1468
    # Pages that are common to all interesting trees will have their
76
1469
    # older versions found via the uninteresting tree traversal. Some pages
77
1470
    # found via the interesting trees traversal will be uninteresting for
78
1471
    # other of the interesting trees, which is why we require the pages to be
79
1472
    # common for us to trim them.
80
1473
    if interesting_intersection is not None:
81
1474
        uninteresting_keys.difference_update(interesting_intersection)
82
1475
83
1476
    # Second, find the full set of uninteresting bits reachable by the
84
1477
    # uninteresting roots
85
1478
    chks_to_read = uninteresting_keys
86
1479
    while chks_to_read:
87
1480
        next_chks = set()
88
1481
        for record in store.get_record_stream(chks_to_read, 'unordered', False):
89
1482
            # TODO: Handle 'absent'
90
1483
            if pb is not None:
91
1484
                pb.tick()
136
1485
            bytes = record.get_bytes_as('fulltext')
1445
            bytes = record.get_bytes_as('fulltext')
140
1486
            # We don't care about search_key_func for this code, because we
1446
            node = _deserialise(bytes, record.key,
141
1487
            # only care about external references.
1447
                                search_key_func=self._search_key_func)
139
1488
            node = _deserialise(bytes, record.key, search_key_func=None)
142
1489
            if type(node) is InternalNode:
1448
            if type(node) is InternalNode:
149
1490
                # uninteresting_prefix_chks.update(node._items.iteritems())
1449
                # Note we don't have to do node.refs() because we know that
150
1491
                chks = node._items.values()
1450
                # there are no children that have been pushed into this node
151
1492
                # TODO: We remove the entries that are already in
1451
                prefix_refs = node._items.items()
152
1493
                #       uninteresting_chks ?
1452
                items = []
147
1494
                next_chks.update(chks)
148
1495
                all_uninteresting_chks.update(chks)
153
1496
            else:
1453
            else:
158
1497
                all_uninteresting_items.update(node._items.iteritems())
1454
                prefix_refs = []
159
1498
        chks_to_read = next_chks
1455
                items = node._items.items()
160
1499
    return (all_uninteresting_chks, all_uninteresting_items,
1456
            yield record, node, prefix_refs, items
161
1500
            interesting_keys, interesting_to_yield, interesting_items)
1457
162
1458
    def _read_uninteresting_roots(self):
163
1459
        uninteresting_chks_to_enqueue = []
164
1460
        all_uninteresting_chks = self._all_uninteresting_chks
165
1461
        for record, node, prefix_refs, items in \
166
1462
                self._read_nodes_from_store(self._uninteresting_root_keys):
167
1463
            # Uninteresting node
168
1464
            prefix_refs = [p_r for p_r in prefix_refs
169
1465
                                if p_r[1] not in all_uninteresting_chks]
170
1466
            new_refs = [p_r[1] for p_r in prefix_refs]
171
1467
            all_uninteresting_chks.update(new_refs)
172
1468
            self._all_uninteresting_items.update(items)
173
1469
            # Queue up the uninteresting references
174
1470
            # Don't actually put them in the 'to-read' queue until we have
175
1471
            # finished checking the interesting references
176
1472
            uninteresting_chks_to_enqueue.extend(prefix_refs)
177
1473
        return uninteresting_chks_to_enqueue
178
1474
179
1475
    def _enqueue_uninteresting(self, interesting_prefixes,
180
1476
                               uninteresting_chks_to_enqueue):
181
1477
        # At this point, we have read all the uninteresting and interesting
182
1478
        # items, so we can queue up the uninteresting stuff, knowing that we've
183
1479
        # handled the interesting ones
184
1480
        for prefix, ref in uninteresting_chks_to_enqueue:
185
1481
            not_interesting = True
186
1482
            for i in xrange(len(prefix), 0, -1):
187
1483
                if prefix[:i] in interesting_prefixes:
188
1484
                    not_interesting = False
189
1485
                    break
190
1486
            if not_interesting:
191
1487
                # This prefix is not part of the remaining 'interesting set'
192
1488
                continue
193
1489
            self._uninteresting_queue.append(ref)
194
1490
195
1491
    def _read_all_roots(self):
196
1492
        """Read the root pages.
197
1493
198
1494
        This is structured as a generator, so that the root records can be
199
1495
        yielded up to whoever needs them without any buffering.
200
1496
        """
201
1497
        # This is the bootstrap phase
202
1498
        if not self._uninteresting_root_keys:
203
1499
            # With no uninteresting_root_keys we can just shortcut and be ready
204
1500
            # for _flush_interesting_queue
205
1501
            self._interesting_queue = list(self._interesting_root_keys)
206
1502
            return
207
1503
        uninteresting_chks_to_enqueue = self._read_uninteresting_roots()
208
1504
        # filter out any root keys that are already known to be uninteresting
209
1505
        interesting_keys = set(self._interesting_root_keys).difference(
210
1506
                                self._all_uninteresting_chks)
211
1507
        # These are prefixes that are present in interesting_keys that we are
212
1508
        # thinking to yield
213
1509
        interesting_prefixes = set()
214
1510
        # We are about to yield all of these, so we don't want them getting
215
1511
        # added a second time
216
1512
        processed_interesting_refs = self._processed_interesting_refs
217
1513
        processed_interesting_refs.update(interesting_keys)
218
1514
        for record, node, prefix_refs, items in \
219
1515
                self._read_nodes_from_store(interesting_keys):
220
1516
            # At this level, we now know all the uninteresting references
221
1517
            # So we filter and queue up whatever is remaining
222
1518
            prefix_refs = [p_r for p_r in prefix_refs
223
1519
                           if p_r[1] not in self._all_uninteresting_chks
224
1520
                              and p_r[1] not in processed_interesting_refs]
225
1521
            refs = [p_r[1] for p_r in prefix_refs]
226
1522
            interesting_prefixes.update([p_r[0] for p_r in prefix_refs])
227
1523
            self._interesting_queue.extend(refs)
228
1524
            # TODO: We can potentially get multiple items here, however the
229
1525
            #       current design allows for this, as callers will do the work
230
1526
            #       to make the results unique. We might profile whether we
231
1527
            #       gain anything by ensuring unique return values for items
232
1528
            interesting_items = [item for item in items
233
1529
                                 if item not in self._all_uninteresting_items]
234
1530
            self._interesting_item_queue.extend(interesting_items)
235
1531
            interesting_prefixes.update([self._search_key_func(item[0])
236
1532
                                         for item in interesting_items])
237
1533
            processed_interesting_refs.update(refs)
238
1534
            yield record
239
1535
        # For interesting_prefixes we have the full length prefixes queued up.
240
1536
        # However, we also need possible prefixes. (If we have a known ref to
241
1537
        # 'ab', then we also need to include 'a'.) So expand the
242
1538
        # interesting_prefixes to include all shorter prefixes
243
1539
        for prefix in list(interesting_prefixes):
244
1540
            interesting_prefixes.update([prefix[:i]
245
1541
                                         for i in xrange(1, len(prefix))])
246
1542
        self._enqueue_uninteresting(interesting_prefixes,
247
1543
                                    uninteresting_chks_to_enqueue)
248
1544
249
1545
    def _flush_interesting_queue(self):
250
1546
        # No need to maintain the heap invariant anymore, just pull things out
251
1547
        # and process them
252
1548
        refs = set(self._interesting_queue)
253
1549
        self._interesting_queue = []
254
1550
        # First pass, flush all interesting items and convert to using direct refs
255
1551
        all_uninteresting_chks = self._all_uninteresting_chks
256
1552
        processed_interesting_refs = self._processed_interesting_refs
257
1553
        all_uninteresting_items = self._all_uninteresting_items
258
1554
        interesting_items = [item for item in self._interesting_item_queue
259
1555
                                   if item not in all_uninteresting_items]
260
1556
        self._interesting_item_queue = []
261
1557
        if interesting_items:
262
1558
            yield None, interesting_items
263
1559
        refs = refs.difference(all_uninteresting_chks)
264
1560
        while refs:
265
1561
            next_refs = set()
266
1562
            next_refs_update = next_refs.update
267
1563
            # Inlining _read_nodes_from_store improves 'bzr branch bzr.dev'
268
1564
            # from 1m54s to 1m51s. Consider it.
269
1565
            for record, _, p_refs, items in self._read_nodes_from_store(refs):
270
1566
                items = [item for item in items
271
1567
                         if item not in all_uninteresting_items]
272
1568
                yield record, items
273
1569
                next_refs_update([p_r[1] for p_r in p_refs])
274
1570
            next_refs = next_refs.difference(all_uninteresting_chks)
275
1571
            next_refs = next_refs.difference(processed_interesting_refs)
276
1572
            processed_interesting_refs.update(next_refs)
277
1573
            refs = next_refs
278
1574
279
1575
    def _process_next_uninteresting(self):
280
1576
        # Since we don't filter uninteresting any further than during
281
1577
        # _read_all_roots, process the whole queue in a single pass.
282
1578
        refs = self._uninteresting_queue
283
1579
        self._uninteresting_queue = []
284
1580
        all_uninteresting_chks = self._all_uninteresting_chks
285
1581
        for record, _, prefix_refs, items in self._read_nodes_from_store(refs):
286
1582
            self._all_uninteresting_items.update(items)
287
1583
            refs = [r for _,r in prefix_refs if r not in all_uninteresting_chks]
288
1584
            self._uninteresting_queue.extend(refs)
289
1585
            all_uninteresting_chks.update(refs)
290
1586
291
1587
    def _process_queues(self):
292
1588
        while self._uninteresting_queue:
293
1589
            self._process_next_uninteresting()
294
1590
        return self._flush_interesting_queue()
295
1591
296
1592
    def process(self):
297
1593
        for record in self._read_all_roots():
298
1594
            yield record, []
299
1595
        for record, items in self._process_queues():
300
1596
            yield record, items
301
1501
1597
302
1502
1598
303
1503
def iter_interesting_nodes(store, interesting_root_keys,
1599
def iter_interesting_nodes(store, interesting_root_keys,
304
@@ -1514,72 +1610,11 @@
305
1514
    :return: Yield
1610
    :return: Yield
306
1515
        (interesting record, {interesting key:values})
1611
        (interesting record, {interesting key:values})
307
1516
    """
1612
    """
374
1517
    # TODO: consider that it may be more memory efficient to use the 20-byte
1613
    iterator = InterestingNodeIterator(store, interesting_root_keys,
375
1518
    #       sha1 string, rather than tuples of hexidecimal sha1 strings.
1614
                                       uninteresting_root_keys,
376
1519
    # TODO: Try to factor out a lot of the get_record_stream() calls into a
1615
                                       search_key_func=store._search_key_func,
377
1520
    #       helper function similar to _read_bytes. This function should be
1616
                                       pb=pb)
378
1521
    #       able to use nodes from the _page_cache as well as actually
1617
    return iterator.process()
313
1522
    #       requesting bytes from the store.
314
1523
315
1524
    (all_uninteresting_chks, all_uninteresting_items, interesting_keys,
316
1525
     interesting_to_yield, interesting_items) = _find_all_uninteresting(store,
317
1526
        interesting_root_keys, uninteresting_root_keys, pb)
318
1527
319
1528
    # Now that we know everything uninteresting, we can yield information from
320
1529
    # our first request
321
1530
    interesting_items.difference_update(all_uninteresting_items)
322
1531
    interesting_to_yield = set(interesting_to_yield) - all_uninteresting_chks
323
1532
    if interesting_items:
324
1533
        yield None, interesting_items
325
1534
    if interesting_to_yield:
326
1535
        # We request these records again, rather than buffering the root
327
1536
        # records, most likely they are still in the _group_cache anyway.
328
1537
        for record in store.get_record_stream(interesting_to_yield,
329
1538
                                              'unordered', False):
330
1539
            yield record, []
331
1540
    all_uninteresting_chks.update(interesting_to_yield)
332
1541
    interesting_keys.difference_update(all_uninteresting_chks)
333
1542
334
1543
    chks_to_read = interesting_keys
335
1544
    counter = 0
336
1545
    while chks_to_read:
337
1546
        next_chks = set()
338
1547
        for record in store.get_record_stream(chks_to_read, 'unordered', False):
339
1548
            counter += 1
340
1549
            if pb is not None:
341
1550
                pb.update('find chk pages', counter)
342
1551
            # TODO: Handle 'absent'?
343
1552
            bytes = record.get_bytes_as('fulltext')
344
1553
            # We don't care about search_key_func for this code, because we
345
1554
            # only care about external references.
346
1555
            node = _deserialise(bytes, record.key, search_key_func=None)
347
1556
            if type(node) is InternalNode:
348
1557
                # all_uninteresting_chks grows large, as it lists all nodes we
349
1558
                # don't want to process (including already seen interesting
350
1559
                # nodes).
351
1560
                # small.difference_update(large) scales O(large), but
352
1561
                # small.difference(large) scales O(small).
353
1562
                # Also, we know we just _deserialised this node, so we can
354
1563
                # access the dict directly.
355
1564
                chks = set(node._items.itervalues()).difference(
356
1565
                            all_uninteresting_chks)
357
1566
                # Is set() and .difference_update better than:
358
1567
                # chks = [chk for chk in node.refs()
359
1568
                #              if chk not in all_uninteresting_chks]
360
1569
                next_chks.update(chks)
361
1570
                # These are now uninteresting everywhere else
362
1571
                all_uninteresting_chks.update(chks)
363
1572
                interesting_items = []
364
1573
            else:
365
1574
                interesting_items = [item for item in node._items.iteritems()
366
1575
                                     if item not in all_uninteresting_items]
367
1576
                # TODO: Do we need to filter out items that we have already
368
1577
                #       seen on other pages? We don't really want to buffer the
369
1578
                #       whole thing, but it does mean that callers need to
370
1579
                #       understand they may get duplicate values.
371
1580
                # all_uninteresting_items.update(interesting_items)
372
1581
            yield record, interesting_items
373
1582
        chks_to_read = next_chks
379
1583
1618
380
1584
1619
381
1585
try:
1620
try:
382
1586
1621
383
=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
384
--- bzrlib/repofmt/groupcompress_repo.py	2009-06-29 14:51:13 +0000
385
+++ bzrlib/repofmt/groupcompress_repo.py	2009-07-01 22:35:14 +0000
386
@@ -652,6 +652,9 @@
387
652
                parents=False, is_locked=self.is_locked,
652
                parents=False, is_locked=self.is_locked,
388
653
                inconsistency_fatal=False),
653
                inconsistency_fatal=False),
389
654
            access=self._pack_collection.chk_index.data_access)
654
            access=self._pack_collection.chk_index.data_access)
390
655
        search_key_name = self._format._serializer.search_key_name
391
656
        search_key_func = chk_map.search_key_registry.get(search_key_name)
392
657
        self.chk_bytes._search_key_func = search_key_func
393
655
        # True when the repository object is 'write locked' (as opposed to the
658
        # True when the repository object is 'write locked' (as opposed to the
394
656
        # physical lock only taken out around changes to the pack-names list.)
659
        # physical lock only taken out around changes to the pack-names list.)
395
657
        # Another way to represent this would be a decorator around the control
660
        # Another way to represent this would be a decorator around the control
396
658
661
397
=== modified file 'bzrlib/tests/test_chk_map.py'
398
--- bzrlib/tests/test_chk_map.py	2009-06-26 09:24:34 +0000
399
+++ bzrlib/tests/test_chk_map.py	2009-07-01 22:35:14 +0000
400
@@ -20,6 +20,7 @@
401
20
20
402
21
from bzrlib import (
21
from bzrlib import (
403
22
    chk_map,
22
    chk_map,
404
23
    groupcompress,
405
23
    osutils,
24
    osutils,
406
24
    tests,
25
    tests,
407
25
    )
26
    )
408
@@ -59,17 +60,14 @@
409
59
        self.assertCommonPrefix('', '', '')
60
        self.assertCommonPrefix('', '', '')
410
60
61
411
61
62
413
62
class TestCaseWithStore(tests.TestCaseWithTransport):
63
class TestCaseWithStore(tests.TestCaseWithMemoryTransport):
414
63
64
415
64
    def get_chk_bytes(self):
65
    def get_chk_bytes(self):
416
65
        # The easiest way to get a CHK store is a development6 repository and
66
        # The easiest way to get a CHK store is a development6 repository and
417
66
        # then work with the chk_bytes attribute directly.
67
        # then work with the chk_bytes attribute directly.
424
67
        repo = self.make_repository(".", format="development6-rich-root")
68
        factory = groupcompress.make_pack_factory(False, False, 1)
425
68
        repo.lock_write()
69
        self.chk_bytes = factory(self.get_transport())
426
69
        self.addCleanup(repo.unlock)
70
        return self.chk_bytes
421
70
        repo.start_write_group()
422
71
        self.addCleanup(repo.abort_write_group)
423
72
        return repo.chk_bytes
427
73
71
428
74
    def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
72
    def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
429
75
                 search_key_func=None):
73
                 search_key_func=None):
430
@@ -97,6 +95,246 @@
431
97
        return dict(node.iteritems(*args))
95
        return dict(node.iteritems(*args))
432
98
96
433
99
97
434
98
class TestCaseWithExampleMaps(TestCaseWithStore):
435
99
436
100
    def get_chk_bytes(self):
437
101
        if getattr(self, '_chk_bytes', None) is None:
438
102
            self._chk_bytes = super(TestCaseWithExampleMaps,
439
103
                                    self).get_chk_bytes()
440
104
        return self._chk_bytes
441
105
442
106
    def get_map(self, a_dict, maximum_size=100, search_key_func=None):
443
107
        c_map = self._get_map(a_dict, maximum_size=maximum_size,
444
108
                              chk_bytes=self.get_chk_bytes(),
445
109
                              search_key_func=search_key_func)
446
110
        return c_map
447
111
448
112
    def make_root_only_map(self, search_key_func=None):
449
113
        return self.get_map({
450
114
            ('aaa',): 'initial aaa content',
451
115
            ('abb',): 'initial abb content',
452
116
        }, search_key_func=search_key_func)
453
117
454
118
    def make_root_only_aaa_ddd_map(self, search_key_func=None):
455
119
        return self.get_map({
456
120
            ('aaa',): 'initial aaa content',
457
121
            ('ddd',): 'initial ddd content',
458
122
        }, search_key_func=search_key_func)
459
123
460
124
    def make_one_deep_map(self, search_key_func=None):
461
125
        # Same as root_only_map, except it forces an InternalNode at the root
462
126
        return self.get_map({
463
127
            ('aaa',): 'initial aaa content',
464
128
            ('abb',): 'initial abb content',
465
129
            ('ccc',): 'initial ccc content',
466
130
            ('ddd',): 'initial ddd content',
467
131
        }, search_key_func=search_key_func)
468
132
469
133
    def make_two_deep_map(self, search_key_func=None):
470
134
        # Carefully chosen so that it creates a 2-deep map for both
471
135
        # _search_key_plain and for _search_key_16
472
136
        # Also so that things line up with make_one_deep_two_prefix_map
473
137
        return self.get_map({
474
138
            ('aaa',): 'initial aaa content',
475
139
            ('abb',): 'initial abb content',
476
140
            ('acc',): 'initial acc content',
477
141
            ('ace',): 'initial ace content',
478
142
            ('add',): 'initial add content',
479
143
            ('adh',): 'initial adh content',
480
144
            ('adl',): 'initial adl content',
481
145
            ('ccc',): 'initial ccc content',
482
146
            ('ddd',): 'initial ddd content',
483
147
        }, search_key_func=search_key_func)
484
148
485
149
    def make_one_deep_two_prefix_map(self, search_key_func=None):
486
150
        """Create a map with one internal node, but references are extra long.
487
151
488
152
        Otherwise has similar content to make_two_deep_map.
489
153
        """
490
154
        return self.get_map({
491
155
            ('aaa',): 'initial aaa content',
492
156
            ('add',): 'initial add content',
493
157
            ('adh',): 'initial adh content',
494
158
            ('adl',): 'initial adl content',
495
159
        }, search_key_func=search_key_func)
496
160
497
161
    def make_one_deep_one_prefix_map(self, search_key_func=None):
498
162
        """Create a map with one internal node, but references are extra long.
499
163
500
164
        Similar to make_one_deep_two_prefix_map, except the split is at the
501
165
        first char, rather than the second.
502
166
        """
503
167
        return self.get_map({
504
168
            ('add',): 'initial add content',
505
169
            ('adh',): 'initial adh content',
506
170
            ('adl',): 'initial adl content',
507
171
            ('bbb',): 'initial bbb content',
508
172
        }, search_key_func=search_key_func)
509
173
510
174
511
175
class TestTestCaseWithExampleMaps(TestCaseWithExampleMaps):
512
176
    """Actual tests for the provided examples."""
513
177
514
178
    def test_root_only_map_plain(self):
515
179
        c_map = self.make_root_only_map()
516
180
        self.assertEqualDiff(
517
181
            "'' LeafNode\n"
518
182
            "      ('aaa',) 'initial aaa content'\n"
519
183
            "      ('abb',) 'initial abb content'\n",
520
184
            c_map._dump_tree())
521
185
522
186
    def test_root_only_map_16(self):
523
187
        c_map = self.make_root_only_map(search_key_func=chk_map._search_key_16)
524
188
        self.assertEqualDiff(
525
189
            "'' LeafNode\n"
526
190
            "      ('aaa',) 'initial aaa content'\n"
527
191
            "      ('abb',) 'initial abb content'\n",
528
192
            c_map._dump_tree())
529
193
530
194
    def test_one_deep_map_plain(self):
531
195
        c_map = self.make_one_deep_map()
532
196
        self.assertEqualDiff(
533
197
            "'' InternalNode\n"
534
198
            "  'a' LeafNode\n"
535
199
            "      ('aaa',) 'initial aaa content'\n"
536
200
            "      ('abb',) 'initial abb content'\n"
537
201
            "  'c' LeafNode\n"
538
202
            "      ('ccc',) 'initial ccc content'\n"
539
203
            "  'd' LeafNode\n"
540
204
            "      ('ddd',) 'initial ddd content'\n",
541
205
            c_map._dump_tree())
542
206
543
207
    def test_one_deep_map_16(self):
544
208
        c_map = self.make_one_deep_map(search_key_func=chk_map._search_key_16)
545
209
        self.assertEqualDiff(
546
210
            "'' InternalNode\n"
547
211
            "  '2' LeafNode\n"
548
212
            "      ('ccc',) 'initial ccc content'\n"
549
213
            "  '4' LeafNode\n"
550
214
            "      ('abb',) 'initial abb content'\n"
551
215
            "  'F' LeafNode\n"
552
216
            "      ('aaa',) 'initial aaa content'\n"
553
217
            "      ('ddd',) 'initial ddd content'\n",
554
218
            c_map._dump_tree())
555
219
556
220
    def test_root_only_aaa_ddd_plain(self):
557
221
        c_map = self.make_root_only_aaa_ddd_map()
558
222
        self.assertEqualDiff(
559
223
            "'' LeafNode\n"
560
224
            "      ('aaa',) 'initial aaa content'\n"
561
225
            "      ('ddd',) 'initial ddd content'\n",
562
226
            c_map._dump_tree())
563
227
564
228
    def test_one_deep_map_16(self):
565
229
        c_map = self.make_root_only_aaa_ddd_map(
566
230
                search_key_func=chk_map._search_key_16)
567
231
        # We use 'aaa' and 'ddd' because they happen to map to 'F' when using
568
232
        # _search_key_16
569
233
        self.assertEqualDiff(
570
234
            "'' LeafNode\n"
571
235
            "      ('aaa',) 'initial aaa content'\n"
572
236
            "      ('ddd',) 'initial ddd content'\n",
573
237
            c_map._dump_tree())
574
238
575
239
    def test_two_deep_map_plain(self):
576
240
        c_map = self.make_two_deep_map()
577
241
        self.assertEqualDiff(
578
242
            "'' InternalNode\n"
579
243
            "  'a' InternalNode\n"
580
244
            "    'aa' LeafNode\n"
581
245
            "      ('aaa',) 'initial aaa content'\n"
582
246
            "    'ab' LeafNode\n"
583
247
            "      ('abb',) 'initial abb content'\n"
584
248
            "    'ac' LeafNode\n"
585
249
            "      ('acc',) 'initial acc content'\n"
586
250
            "      ('ace',) 'initial ace content'\n"
587
251
            "    'ad' LeafNode\n"
588
252
            "      ('add',) 'initial add content'\n"
589
253
            "      ('adh',) 'initial adh content'\n"
590
254
            "      ('adl',) 'initial adl content'\n"
591
255
            "  'c' LeafNode\n"
592
256
            "      ('ccc',) 'initial ccc content'\n"
593
257
            "  'd' LeafNode\n"
594
258
            "      ('ddd',) 'initial ddd content'\n",
595
259
            c_map._dump_tree())
596
260
597
261
    def test_two_deep_map_16(self):
598
262
        c_map = self.make_two_deep_map(search_key_func=chk_map._search_key_16)
599
263
        self.assertEqualDiff(
600
264
            "'' InternalNode\n"
601
265
            "  '2' LeafNode\n"
602
266
            "      ('acc',) 'initial acc content'\n"
603
267
            "      ('ccc',) 'initial ccc content'\n"
604
268
            "  '4' LeafNode\n"
605
269
            "      ('abb',) 'initial abb content'\n"
606
270
            "  'C' LeafNode\n"
607
271
            "      ('ace',) 'initial ace content'\n"
608
272
            "  'F' InternalNode\n"
609
273
            "    'F0' LeafNode\n"
610
274
            "      ('aaa',) 'initial aaa content'\n"
611
275
            "    'F3' LeafNode\n"
612
276
            "      ('adl',) 'initial adl content'\n"
613
277
            "    'F4' LeafNode\n"
614
278
            "      ('adh',) 'initial adh content'\n"
615
279
            "    'FB' LeafNode\n"
616
280
            "      ('ddd',) 'initial ddd content'\n"
617
281
            "    'FD' LeafNode\n"
618
282
            "      ('add',) 'initial add content'\n",
619
283
            c_map._dump_tree())
620
284
621
285
    def test_one_deep_two_prefix_map_plain(self):
622
286
        c_map = self.make_one_deep_two_prefix_map()
623
287
        self.assertEqualDiff(
624
288
            "'' InternalNode\n"
625
289
            "  'aa' LeafNode\n"
626
290
            "      ('aaa',) 'initial aaa content'\n"
627
291
            "  'ad' LeafNode\n"
628
292
            "      ('add',) 'initial add content'\n"
629
293
            "      ('adh',) 'initial adh content'\n"
630
294
            "      ('adl',) 'initial adl content'\n",
631
295
            c_map._dump_tree())
632
296
633
297
    def test_one_deep_two_prefix_map_16(self):
634
298
        c_map = self.make_one_deep_two_prefix_map(
635
299
            search_key_func=chk_map._search_key_16)
636
300
        self.assertEqualDiff(
637
301
            "'' InternalNode\n"
638
302
            "  'F0' LeafNode\n"
639
303
            "      ('aaa',) 'initial aaa content'\n"
640
304
            "  'F3' LeafNode\n"
641
305
            "      ('adl',) 'initial adl content'\n"
642
306
            "  'F4' LeafNode\n"
643
307
            "      ('adh',) 'initial adh content'\n"
644
308
            "  'FD' LeafNode\n"
645
309
            "      ('add',) 'initial add content'\n",
646
310
            c_map._dump_tree())
647
311
648
312
    def test_one_deep_one_prefix_map_plain(self):
649
313
        c_map = self.make_one_deep_one_prefix_map()
650
314
        self.assertEqualDiff(
651
315
            "'' InternalNode\n"
652
316
            "  'a' LeafNode\n"
653
317
            "      ('add',) 'initial add content'\n"
654
318
            "      ('adh',) 'initial adh content'\n"
655
319
            "      ('adl',) 'initial adl content'\n"
656
320
            "  'b' LeafNode\n"
657
321
            "      ('bbb',) 'initial bbb content'\n",
658
322
            c_map._dump_tree())
659
323
660
324
    def test_one_deep_one_prefix_map_16(self):
661
325
        c_map = self.make_one_deep_one_prefix_map(
662
326
            search_key_func=chk_map._search_key_16)
663
327
        self.assertEqualDiff(
664
328
            "'' InternalNode\n"
665
329
            "  '4' LeafNode\n"
666
330
            "      ('bbb',) 'initial bbb content'\n"
667
331
            "  'F' LeafNode\n"
668
332
            "      ('add',) 'initial add content'\n"
669
333
            "      ('adh',) 'initial adh content'\n"
670
334
            "      ('adl',) 'initial adl content'\n",
671
335
            c_map._dump_tree())
672
336
673
337
674
100
class TestMap(TestCaseWithStore):
338
class TestMap(TestCaseWithStore):
675
101
339
676
102
    def assertHasABMap(self, chk_bytes):
340
    def assertHasABMap(self, chk_bytes):
677
@@ -1886,62 +2124,408 @@
678
1886
# 1-4K get0
2124
# 1-4K get0
679
1887
2125
680
1888
2126
688
1889
class TestIterInterestingNodes(TestCaseWithStore):
2127
class TestInterestingNodeIterator(TestCaseWithExampleMaps):
689
1890
2128
690
1891
    def get_chk_bytes(self):
2129
    def get_iterator(self, interesting_roots, uninteresting_roots,
691
1892
        if getattr(self, '_chk_bytes', None) is None:
2130
                     search_key_func=None):
692
1893
            self._chk_bytes = super(TestIterInterestingNodes,
2131
        if search_key_func is None:
693
1894
                                    self).get_chk_bytes()
2132
            search_key_func = chk_map._search_key_plain
694
1895
        return self._chk_bytes
2133
        return chk_map.InterestingNodeIterator(self.get_chk_bytes(),
695
2134
            interesting_roots, uninteresting_roots, search_key_func)
696
2135
697
2136
    def test__init__(self):
698
2137
        c_map = self.make_root_only_map()
699
2138
        key1 = c_map.key()
700
2139
        c_map.map(('aaa',), 'new aaa content')
701
2140
        key2 = c_map._save()
702
2141
        iterator = self.get_iterator([key2], [key1])
703
2142
        self.assertEqual(set([key1]), iterator._all_uninteresting_chks)
704
2143
        self.assertEqual([], iterator._uninteresting_queue)
705
2144
        self.assertEqual([], iterator._interesting_queue)
706
2145
707
2146
    def help__read_all_roots(self, search_key_func):
708
2147
        c_map = self.make_root_only_map(search_key_func=search_key_func)
709
2148
        key1 = c_map.key()
710
2149
        c_map.map(('aaa',), 'new aaa content')
711
2150
        key2 = c_map._save()
712
2151
        iterator = self.get_iterator([key2], [key1], search_key_func)
713
2152
        root_results = [record.key for record in iterator._read_all_roots()]
714
2153
        self.assertEqual([key2], root_results)
715
2154
        # We should have queued up only items that aren't in the uninteresting
716
2155
        # set
717
2156
        self.assertEqual([(('aaa',), 'new aaa content')],
718
2157
                         iterator._interesting_item_queue)
719
2158
        self.assertEqual([], iterator._interesting_queue)
720
2159
        # And there are no uninteresting references, so that queue should be
721
2160
        # empty
722
2161
        self.assertEqual([], iterator._uninteresting_queue)
723
2162
724
2163
    def test__read_all_roots_plain(self):
725
2164
        self.help__read_all_roots(search_key_func=chk_map._search_key_plain)
726
2165
727
2166
    def test__read_all_roots_16(self):
728
2167
        self.help__read_all_roots(search_key_func=chk_map._search_key_16)
729
2168
730
2169
    def test__read_all_roots_skips_known_uninteresting(self):
731
2170
        c_map = self.make_one_deep_map(chk_map._search_key_plain)
732
2171
        key1 = c_map.key()
733
2172
        c_map2 = self.make_root_only_map(chk_map._search_key_plain)
734
2173
        key2 = c_map2.key()
735
2174
        iterator = self.get_iterator([key2], [key1], chk_map._search_key_plain)
736
2175
        root_results = [record.key for record in iterator._read_all_roots()]
737
2176
        # We should have no results. key2 is completely contained within key1,
738
2177
        # and we should have seen that in the first pass
739
2178
        self.assertEqual([], root_results)
740
2179
741
2180
    def test__read_all_roots_prepares_queues(self):
742
2181
        c_map = self.make_one_deep_map(chk_map._search_key_plain)
743
2182
        key1 = c_map.key()
744
2183
        c_map._dump_tree() # load everything
745
2184
        key1_a = c_map._root_node._items['a'].key()
746
2185
        c_map.map(('abb',), 'new abb content')
747
2186
        key2 = c_map._save()
748
2187
        key2_a = c_map._root_node._items['a'].key()
749
2188
        iterator = self.get_iterator([key2], [key1], chk_map._search_key_plain)
750
2189
        root_results = [record.key for record in iterator._read_all_roots()]
751
2190
        self.assertEqual([key2], root_results)
752
2191
        # At this point, we should have queued up only the 'a' Leaf on both
753
2192
        # sides, both 'c' and 'd' are known to not have changed on both sides
754
2193
        self.assertEqual([key2_a], iterator._interesting_queue)
755
2194
        self.assertEqual([], iterator._interesting_item_queue)
756
2195
        self.assertEqual([key1_a], iterator._uninteresting_queue)
757
2196
758
2197
    def test__read_all_roots_multi_interesting_prepares_queues(self):
759
2198
        c_map = self.make_one_deep_map(chk_map._search_key_plain)
760
2199
        key1 = c_map.key()
761
2200
        c_map._dump_tree() # load everything
762
2201
        key1_a = c_map._root_node._items['a'].key()
763
2202
        key1_c = c_map._root_node._items['c'].key()
764
2203
        c_map.map(('abb',), 'new abb content')
765
2204
        key2 = c_map._save()
766
2205
        key2_a = c_map._root_node._items['a'].key()
767
2206
        key2_c = c_map._root_node._items['c'].key()
768
2207
        c_map = chk_map.CHKMap(self.get_chk_bytes(), key1,
769
2208
                               chk_map._search_key_plain)
770
2209
        c_map.map(('ccc',), 'new ccc content')
771
2210
        key3 = c_map._save()
772
2211
        key3_a = c_map._root_node._items['a'].key()
773
2212
        key3_c = c_map._root_node._items['c'].key()
774
2213
        iterator = self.get_iterator([key2, key3], [key1],
775
2214
                                     chk_map._search_key_plain)
776
2215
        root_results = [record.key for record in iterator._read_all_roots()]
777
2216
        self.assertEqual(sorted([key2, key3]), sorted(root_results))
778
2217
        # We should have queued up key2_a, and key3_c, but not key2_c or key3_c
779
2218
        self.assertEqual([key2_a, key3_c], iterator._interesting_queue)
780
2219
        self.assertEqual([], iterator._interesting_item_queue)
781
2220
        # And we should have queued up both a and c for the uninteresting set
782
2221
        self.assertEqual([key1_a, key1_c], iterator._uninteresting_queue)
783
2222
784
2223
    def test__read_all_roots_different_depths(self):
785
2224
        c_map = self.make_two_deep_map(chk_map._search_key_plain)
786
2225
        c_map._dump_tree() # load everything
787
2226
        key1 = c_map.key()
788
2227
        key1_a = c_map._root_node._items['a'].key()
789
2228
        key1_c = c_map._root_node._items['c'].key()
790
2229
        key1_d = c_map._root_node._items['d'].key()
791
2230
792
2231
        c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
793
2232
        c_map2._dump_tree()
794
2233
        key2 = c_map2.key()
795
2234
        key2_aa = c_map2._root_node._items['aa'].key()
796
2235
        key2_ad = c_map2._root_node._items['ad'].key()
797
2236
798
2237
        iterator = self.get_iterator([key2], [key1], chk_map._search_key_plain)
799
2238
        root_results = [record.key for record in iterator._read_all_roots()]
800
2239
        self.assertEqual([key2], root_results)
801
2240
        # Only the 'a' subset should be queued up, since 'c' and 'd' cannot be
802
2241
        # present
803
2242
        self.assertEqual([key1_a], iterator._uninteresting_queue)
804
2243
        self.assertEqual([key2_aa, key2_ad], iterator._interesting_queue)
805
2244
        self.assertEqual([], iterator._interesting_item_queue)
806
2245
807
2246
        iterator = self.get_iterator([key1], [key2], chk_map._search_key_plain)
808
2247
        root_results = [record.key for record in iterator._read_all_roots()]
809
2248
        self.assertEqual([key1], root_results)
810
2249
811
2250
        self.assertEqual([key2_aa, key2_ad], iterator._uninteresting_queue)
812
2251
        self.assertEqual([key1_a, key1_c, key1_d], iterator._interesting_queue)
813
2252
        self.assertEqual([], iterator._interesting_item_queue)
814
2253
815
2254
    def test__read_all_roots_different_depths_16(self):
816
2255
        c_map = self.make_two_deep_map(chk_map._search_key_16)
817
2256
        c_map._dump_tree() # load everything
818
2257
        key1 = c_map.key()
819
2258
        key1_2 = c_map._root_node._items['2'].key()
820
2259
        key1_4 = c_map._root_node._items['4'].key()
821
2260
        key1_C = c_map._root_node._items['C'].key()
822
2261
        key1_F = c_map._root_node._items['F'].key()
823
2262
824
2263
        c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_16)
825
2264
        c_map2._dump_tree()
826
2265
        key2 = c_map2.key()
827
2266
        key2_F0 = c_map2._root_node._items['F0'].key()
828
2267
        key2_F3 = c_map2._root_node._items['F3'].key()
829
2268
        key2_F4 = c_map2._root_node._items['F4'].key()
830
2269
        key2_FD = c_map2._root_node._items['FD'].key()
831
2270
832
2271
        iterator = self.get_iterator([key2], [key1], chk_map._search_key_16)
833
2272
        root_results = [record.key for record in iterator._read_all_roots()]
834
2273
        self.assertEqual([key2], root_results)
835
2274
        # Only the subset of keys that may be present should be queued up.
836
2275
        self.assertEqual([key1_F], iterator._uninteresting_queue)
837
2276
        self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
838
2277
                         sorted(iterator._interesting_queue))
839
2278
        self.assertEqual([], iterator._interesting_item_queue)
840
2279
841
2280
        iterator = self.get_iterator([key1], [key2], chk_map._search_key_16)
842
2281
        root_results = [record.key for record in iterator._read_all_roots()]
843
2282
        self.assertEqual([key1], root_results)
844
2283
845
2284
        self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
846
2285
                         sorted(iterator._uninteresting_queue))
847
2286
        self.assertEqual(sorted([key1_2, key1_4, key1_C, key1_F]),
848
2287
                         sorted(iterator._interesting_queue))
849
2288
        self.assertEqual([], iterator._interesting_item_queue)
850
2289
851
2290
    def test__read_all_roots_mixed_depth(self):
852
2291
        c_map = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
853
2292
        c_map._dump_tree() # load everything
854
2293
        key1 = c_map.key()
855
2294
        key1_aa = c_map._root_node._items['aa'].key()
856
2295
        key1_ad = c_map._root_node._items['ad'].key()
857
2296
858
2297
        c_map2 = self.make_one_deep_one_prefix_map(chk_map._search_key_plain)
859
2298
        c_map2._dump_tree()
860
2299
        key2 = c_map2.key()
861
2300
        key2_a = c_map2._root_node._items['a'].key()
862
2301
        key2_b = c_map2._root_node._items['b'].key()
863
2302
864
2303
        iterator = self.get_iterator([key2], [key1], chk_map._search_key_plain)
865
2304
        root_results = [record.key for record in iterator._read_all_roots()]
866
2305
        self.assertEqual([key2], root_results)
867
2306
        # 'ad' matches exactly 'a' on the other side, so it should be removed,
868
2307
        # and neither side should have it queued for walking
869
2308
        self.assertEqual([], iterator._uninteresting_queue)
870
2309
        self.assertEqual([key2_b], iterator._interesting_queue)
871
2310
        self.assertEqual([], iterator._interesting_item_queue)
872
2311
873
2312
        iterator = self.get_iterator([key1], [key2], chk_map._search_key_plain)
874
2313
        root_results = [record.key for record in iterator._read_all_roots()]
875
2314
        self.assertEqual([key1], root_results)
876
2315
        # Note: This is technically not the 'true minimal' set that we could
877
2316
        #       use The reason is that 'a' was matched exactly to 'ad' (by sha
878
2317
        #       sum).  However, the code gets complicated in the case of more
879
2318
        #       than one interesting key, so for now, we live with this
880
2319
        #       Consider revising, though benchmarking showing it to be a
881
2320
        #       real-world issue should be done
882
2321
        self.assertEqual([key2_a], iterator._uninteresting_queue)
883
2322
        # self.assertEqual([], iterator._uninteresting_queue)
884
2323
        self.assertEqual([key1_aa], iterator._interesting_queue)
885
2324
        self.assertEqual([], iterator._interesting_item_queue)
886
2325
887
2326
    def test__read_all_roots_yields_extra_deep_records(self):
888
2327
        # This is slightly controversial, as we will yield a chk page that we
889
2328
        # might later on find out could be filtered out. (If a root node is
890
2329
        # referenced deeper in the uninteresting set.)
891
2330
        # However, even with stacking, we always have all chk pages that we
892
2331
        # will need. So as long as we filter out the referenced keys, we'll
893
2332
        # never run into problems.
894
2333
        # This allows us to yield a root node record immediately, without any
895
2334
        # buffering.
896
2335
        c_map = self.make_two_deep_map(chk_map._search_key_plain)
897
2336
        c_map._dump_tree() # load all keys
898
2337
        key1 = c_map.key()
899
2338
        key1_a = c_map._root_node._items['a'].key()
900
2339
        c_map2 = self.get_map({
901
2340
            ('acc',): 'initial acc content',
902
2341
            ('ace',): 'initial ace content',
903
2342
        }, maximum_size=100)
904
2343
        self.assertEqualDiff(
905
2344
            "'' LeafNode\n"
906
2345
            "      ('acc',) 'initial acc content'\n"
907
2346
            "      ('ace',) 'initial ace content'\n",
908
2347
            c_map2._dump_tree())
909
2348
        key2 = c_map2.key()
910
2349
        iterator = self.get_iterator([key2], [key1], chk_map._search_key_plain)
911
2350
        root_results = [record.key for record in iterator._read_all_roots()]
912
2351
        self.assertEqual([key2], root_results)
913
2352
        # However, even though we have yielded the root node to be fetched,
914
2353
        # we should have enqued all of the chk pages to be walked, so that we
915
2354
        # can find the keys if they are present
916
2355
        self.assertEqual([key1_a], iterator._uninteresting_queue)
917
2356
        self.assertEqual([(('acc',), 'initial acc content'),
918
2357
                          (('ace',), 'initial ace content'),
919
2358
                         ], iterator._interesting_item_queue)
920
2359
921
2360
    def test__read_all_roots_multiple_targets(self):
922
2361
        c_map = self.make_root_only_map()
923
2362
        key1 = c_map.key()
924
2363
        c_map = self.make_one_deep_map()
925
2364
        key2 = c_map.key()
926
2365
        c_map._dump_tree()
927
2366
        key2_c = c_map._root_node._items['c'].key()
928
2367
        key2_d = c_map._root_node._items['d'].key()
929
2368
        c_map.map(('ccc',), 'new ccc value')
930
2369
        key3 = c_map._save()
931
2370
        key3_c = c_map._root_node._items['c'].key()
932
2371
        iterator = self.get_iterator([key2, key3], [key1],
933
2372
                                     chk_map._search_key_plain)
934
2373
        root_results = [record.key for record in iterator._read_all_roots()]
935
2374
        self.assertEqual(sorted([key2, key3]), sorted(root_results))
936
2375
        self.assertEqual([], iterator._uninteresting_queue)
937
2376
        # the key 'd' is interesting from key2 and key3, but should only be
938
2377
        # entered into the queue 1 time
939
2378
        self.assertEqual(sorted([key2_c, key3_c, key2_d]),
940
2379
                         sorted(iterator._interesting_queue))
941
2380
        self.assertEqual([], iterator._interesting_item_queue)
942
2381
943
2382
    def test__read_all_roots_no_uninteresting(self):
944
2383
        # This is the 'initial branch' case. With nothing in the uninteresting
945
2384
        # set, we can just queue up all root nodes into interesting queue, and
946
2385
        # then have them fast-path flushed via _flush_interesting_queue
947
2386
        c_map = self.make_two_deep_map()
948
2387
        key1 = c_map.key()
949
2388
        iterator = self.get_iterator([key1], [], chk_map._search_key_plain)
950
2389
        root_results = [record.key for record in iterator._read_all_roots()]
951
2390
        self.assertEqual([], root_results)
952
2391
        self.assertEqual([], iterator._uninteresting_queue)
953
2392
        self.assertEqual([key1], iterator._interesting_queue)
954
2393
        self.assertEqual([], iterator._interesting_item_queue)
955
2394
956
2395
        c_map2 = self.make_one_deep_map()
957
2396
        key2 = c_map2.key()
958
2397
        iterator = self.get_iterator([key1, key2], [],
959
2398
                                     chk_map._search_key_plain)
960
2399
        root_results = [record.key for record in iterator._read_all_roots()]
961
2400
        self.assertEqual([], root_results)
962
2401
        self.assertEqual([], iterator._uninteresting_queue)
963
2402
        self.assertEqual(sorted([key1, key2]),
964
2403
                         sorted(iterator._interesting_queue))
965
2404
        self.assertEqual([], iterator._interesting_item_queue)
966
2405
967
2406
    def test__read_all_roots_no_uninteresting_16(self):
968
2407
        c_map = self.make_two_deep_map(chk_map._search_key_16)
969
2408
        key1 = c_map.key()
970
2409
        iterator = self.get_iterator([key1], [], chk_map._search_key_16)
971
2410
        root_results = [record.key for record in iterator._read_all_roots()]
972
2411
        self.assertEqual([], root_results)
973
2412
        self.assertEqual([], iterator._uninteresting_queue)
974
2413
        self.assertEqual([key1], iterator._interesting_queue)
975
2414
        self.assertEqual([], iterator._interesting_item_queue)
976
2415
977
2416
        c_map2 = self.make_one_deep_map(chk_map._search_key_16)
978
2417
        key2 = c_map2.key()
979
2418
        iterator = self.get_iterator([key1, key2], [],
980
2419
                                     chk_map._search_key_16)
981
2420
        root_results = [record.key for record in iterator._read_all_roots()]
982
2421
        self.assertEqual([], root_results)
983
2422
        self.assertEqual([], iterator._uninteresting_queue)
984
2423
        self.assertEqual(sorted([key1, key2]),
985
2424
                         sorted(iterator._interesting_queue))
986
2425
        self.assertEqual([], iterator._interesting_item_queue)
987
2426
988
2427
    def test__read_all_roots_multiple_uninteresting(self):
989
2428
        c_map = self.make_two_deep_map()
990
2429
        key1 = c_map.key()
991
2430
        c_map._dump_tree() # load everything
992
2431
        key1_a = c_map._root_node._items['a'].key()
993
2432
        c_map.map(('ccc',), 'new ccc value')
994
2433
        key2 = c_map._save()
995
2434
        key2_a = c_map._root_node._items['a'].key()
996
2435
        c_map.map(('add',), 'new add value')
997
2436
        key3 = c_map._save()
998
2437
        key3_a = c_map._root_node._items['a'].key()
999
2438
        iterator = self.get_iterator([key3], [key1, key2],
1000
2439
                                     chk_map._search_key_plain)
1001
2440
        root_results = [record.key for record in iterator._read_all_roots()]
1002
2441
        self.assertEqual([key3], root_results)
1003
2442
        # the 'a' keys should not be queued up 2 times, since they are
1004
2443
        # identical
1005
2444
        self.assertEqual([key1_a], iterator._uninteresting_queue)
1006
2445
        self.assertEqual([key3_a], iterator._interesting_queue)
1007
2446
        self.assertEqual([], iterator._interesting_item_queue)
1008
2447
1009
2448
    def test__process_next_uninteresting_batched_no_dupes(self):
1010
2449
        c_map = self.make_two_deep_map()
1011
2450
        key1 = c_map.key()
1012
2451
        c_map._dump_tree() # load everything
1013
2452
        key1_a = c_map._root_node._items['a'].key()
1014
2453
        key1_aa = c_map._root_node._items['a']._items['aa'].key()
1015
2454
        key1_ab = c_map._root_node._items['a']._items['ab'].key()
1016
2455
        key1_ac = c_map._root_node._items['a']._items['ac'].key()
1017
2456
        key1_ad = c_map._root_node._items['a']._items['ad'].key()
1018
2457
        c_map.map(('aaa',), 'new aaa value')
1019
2458
        key2 = c_map._save()
1020
2459
        key2_a = c_map._root_node._items['a'].key()
1021
2460
        key2_aa = c_map._root_node._items['a']._items['aa'].key()
1022
2461
        c_map.map(('acc',), 'new acc content')
1023
2462
        key3 = c_map._save()
1024
2463
        key3_a = c_map._root_node._items['a'].key()
1025
2464
        key3_ac = c_map._root_node._items['a']._items['ac'].key()
1026
2465
        iterator = self.get_iterator([key3], [key1, key2],
1027
2466
                                     chk_map._search_key_plain)
1028
2467
        root_results = [record.key for record in iterator._read_all_roots()]
1029
2468
        self.assertEqual([key3], root_results)
1030
2469
        self.assertEqual(sorted([key1_a, key2_a]),
1031
2470
                         sorted(iterator._uninteresting_queue))
1032
2471
        self.assertEqual([key3_a], iterator._interesting_queue)
1033
2472
        self.assertEqual([], iterator._interesting_item_queue)
1034
2473
        iterator._process_next_uninteresting()
1035
2474
        # All of the uninteresting records should be brought in and queued up,
1036
2475
        # but we should not have any duplicates
1037
2476
        self.assertEqual(sorted([key1_aa, key1_ab, key1_ac, key1_ad, key2_aa]),
1038
2477
                         sorted(iterator._uninteresting_queue))
1039
2478
1040
2479
1041
2480
class TestIterInterestingNodes(TestCaseWithExampleMaps):
1042
1896
2481
1043
1897
    def get_map_key(self, a_dict, maximum_size=10):
2482
    def get_map_key(self, a_dict, maximum_size=10):
1046
1898
        c_map = self._get_map(a_dict, maximum_size=maximum_size,
2483
        c_map = self.get_map(a_dict, maximum_size=maximum_size)
1045
1899
                              chk_bytes=self.get_chk_bytes())
1047
1900
        return c_map.key()
2484
        return c_map.key()
1048
1901
2485
1050
1902
    def assertIterInteresting(self, expected, interesting_keys,
2486
    def assertIterInteresting(self, records, items, interesting_keys,
1051
1903
                              uninteresting_keys):
2487
                              uninteresting_keys):
1052
1904
        """Check the result of iter_interesting_nodes.
2488
        """Check the result of iter_interesting_nodes.
1053
1905
2489
1056
1906
        :param expected: A list of (record_keys, interesting_chk_pages,
2490
        Note that we no longer care how many steps are taken, etc, just that
1057
1907
                                    interesting key value pairs)
2491
        the right contents are returned.
1058
2492
1059
2493
        :param records: A list of record keys that should be yielded
1060
2494
        :param items: A list of items (key,value) that should be yielded.
1061
1908
        """
2495
        """
1062
1909
        store = self.get_chk_bytes()
2496
        store = self.get_chk_bytes()
1063
2497
        store._search_key_func = chk_map._search_key_plain
1064
1910
        iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
2498
        iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
1065
1911
                                                    uninteresting_keys)
2499
                                                    uninteresting_keys)
1078
1912
        nodes = list(iter_nodes)
2500
        record_keys = []
1079
1913
        for count, (exp, act) in enumerate(izip(expected, nodes)):
2501
        all_items = []
1080
1914
            exp_record, exp_items = exp
2502
        for record, new_items in iter_nodes:
1081
1915
            record, items = act
2503
            if record is not None:
1082
1916
            exp_tuple = (exp_record, sorted(exp_items))
2504
                record_keys.append(record.key)
1083
1917
            if record is None:
2505
            if new_items:
1084
1918
                act_tuple = (None, sorted(items))
2506
                all_items.extend(new_items)
1085
1919
            else:
2507
        self.assertEqual(sorted(records), sorted(record_keys))
1086
1920
                act_tuple = (record.key, sorted(items))
2508
        self.assertEqual(sorted(items), sorted(all_items))
1075
1921
            self.assertEqual(exp_tuple, act_tuple,
1076
1922
                             'entry %d did not match expected' % count)
1077
1923
        self.assertEqual(len(expected), len(nodes))
1087
1924
2509
1088
1925
    def test_empty_to_one_keys(self):
2510
    def test_empty_to_one_keys(self):
1089
1926
        target = self.get_map_key({('a',): 'content'})
2511
        target = self.get_map_key({('a',): 'content'})
1093
1927
        self.assertIterInteresting(
2512
        self.assertIterInteresting([target],
1094
1928
            [(target, [(('a',), 'content')]),
2513
                                   [(('a',), 'content')],
1095
1929
            ], [target], [])
2514
                                   [target], [])
1096
1930
2515
1097
1931
    def test_none_to_one_key(self):
2516
    def test_none_to_one_key(self):
1098
1932
        basis = self.get_map_key({})
2517
        basis = self.get_map_key({})
1099
1933
        target = self.get_map_key({('a',): 'content'})
2518
        target = self.get_map_key({('a',): 'content'})
1104
1934
        self.assertIterInteresting(
2519
        self.assertIterInteresting([target],
1105
1935
            [(None, [(('a',), 'content')]),
2520
                                   [(('a',), 'content')],
1106
1936
             (target, []),
2521
                                   [target], [basis])
1103
1937
            ], [target], [basis])
1107
1938
2522
1108
1939
    def test_one_to_none_key(self):
2523
    def test_one_to_none_key(self):
1109
1940
        basis = self.get_map_key({('a',): 'content'})
2524
        basis = self.get_map_key({('a',): 'content'})
1110
1941
        target = self.get_map_key({})
2525
        target = self.get_map_key({})
1114
1942
        self.assertIterInteresting(
2526
        self.assertIterInteresting([target],
1115
1943
            [(target, [])],
2527
                                   [],
1116
1944
            [target], [basis])
2528
                                   [target], [basis])
1117
1945
2529
1118
1946
    def test_common_pages(self):
2530
    def test_common_pages(self):
1119
1947
        basis = self.get_map_key({('a',): 'content',
2531
        basis = self.get_map_key({('a',): 'content',
1120
@@ -1964,10 +2548,9 @@
1121
1964
            target_map._dump_tree())
2548
            target_map._dump_tree())
1122
1965
        b_key = target_map._root_node._items['b'].key()
2549
        b_key = target_map._root_node._items['b'].key()
1123
1966
        # This should return the root node, and the node for the 'b' key
2550
        # This should return the root node, and the node for the 'b' key
1128
1967
        self.assertIterInteresting(
2551
        self.assertIterInteresting([target, b_key],
1129
1968
            [(target, []),
2552
                                   [(('b',), 'other content')],
1130
1969
             (b_key, [(('b',), 'other content')])],
2553
                                   [target], [basis])
1127
1970
            [target], [basis])
1131
1971
2554
1132
1972
    def test_common_sub_page(self):
2555
    def test_common_sub_page(self):
1133
1973
        basis = self.get_map_key({('aaa',): 'common',
2556
        basis = self.get_map_key({('aaa',): 'common',
1134
@@ -1991,12 +2574,11 @@
1135
1991
        # The key for the internal aa node
2574
        # The key for the internal aa node
1136
1992
        a_key = target_map._root_node._items['a'].key()
2575
        a_key = target_map._root_node._items['a'].key()
1137
1993
        # The key for the leaf aab node
2576
        # The key for the leaf aab node
1138
2577
        # aaa_key = target_map._root_node._items['a']._items['aaa'].key()
1139
1994
        aab_key = target_map._root_node._items['a']._items['aab'].key()
2578
        aab_key = target_map._root_node._items['a']._items['aab'].key()
1145
1995
        self.assertIterInteresting(
2579
        self.assertIterInteresting([target, a_key, aab_key],
1146
1996
            [(target, []),
2580
                                   [(('aab',), 'new')],
1147
1997
             (a_key, []),
2581
                                   [target], [basis])
1143
1998
             (aab_key, [(('aab',), 'new')])],
1144
1999
            [target], [basis])
1148
2000
2582
1149
2001
    def test_common_leaf(self):
2583
    def test_common_leaf(self):
1150
2002
        basis = self.get_map_key({})
2584
        basis = self.get_map_key({})
1151
@@ -2040,29 +2622,22 @@
1152
2040
        a_key = target3_map._root_node._items['a'].key()
2622
        a_key = target3_map._root_node._items['a'].key()
1153
2041
        aac_key = target3_map._root_node._items['a']._items['aac'].key()
2623
        aac_key = target3_map._root_node._items['a']._items['aac'].key()
1154
2042
        self.assertIterInteresting(
2624
        self.assertIterInteresting(
1178
2043
            [(None, [(('aaa',), 'common')]),
2625
            [target1, target2, target3, a_key, aac_key, b_key],
1179
2044
             (target1, []),
2626
            [(('aaa',), 'common'), (('bbb',), 'new'), (('aac',), 'other')],
1180
2045
             (target2, []),
2627
            [target1, target2, target3], [basis])
1181
2046
             (target3, []),
2628
1182
2047
             (b_key, [(('bbb',), 'new')]),
2629
        self.assertIterInteresting(
1183
2048
             (a_key, []),
2630
            [target2, target3, a_key, aac_key, b_key],
1184
2049
             (aac_key, [(('aac',), 'other')]),
2631
            [(('bbb',), 'new'), (('aac',), 'other')],
1185
2050
            ], [target1, target2, target3], [basis])
2632
            [target2, target3], [target1])
1186
2051
2633
1187
2052
        self.assertIterInteresting(
2634
        # Technically, target1 could be filtered out, but since it is a root
1188
2053
            [(target2, []),
2635
        # node, we yield it immediately, rather than waiting to find out much
1189
2054
             (target3, []),
2636
        # later on.
1190
2055
             (b_key, [(('bbb',), 'new')]),
2637
        self.assertIterInteresting(
1191
2056
             (a_key, []),
2638
            [target1],
1192
2057
             (aac_key, [(('aac',), 'other')]),
2639
            [],
1193
2058
            ], [target2, target3], [target1])
2640
            [target1], [target3])
1171
2059
1172
2060
        # This may be a case that we relax. A root node is a deep child of the
1173
2061
        # excluded set. The cost is buffering root nodes until we have
1174
2062
        # determined all possible exclusions. (Because a prefix of '', cannot
1175
2063
        # be excluded.)
1176
2064
        self.assertIterInteresting(
1177
2065
            [], [target1], [target3])
1194
2066
2641
1195
2067
    def test_multiple_maps(self):
2642
    def test_multiple_maps(self):
1196
2068
        basis1 = self.get_map_key({('aaa',): 'common',
2643
        basis1 = self.get_map_key({('aaa',): 'common',
1197
@@ -2111,13 +2686,9 @@
1198
2111
        # The key for the leaf bba node
2686
        # The key for the leaf bba node
1199
2112
        bba_key = target2_map._root_node._items['b']._items['bba'].key()
2687
        bba_key = target2_map._root_node._items['b']._items['bba'].key()
1200
2113
        self.assertIterInteresting(
2688
        self.assertIterInteresting(
1208
2114
            [(target1, []),
2689
            [target1, target2, a_key, aac_key, b_key, bba_key],
1209
2115
             (target2, []),
2690
            [(('aac',), 'target1'), (('bba',), 'target2')],
1210
2116
             (a_key, []),
2691
            [target1, target2], [basis1, basis2])
1204
2117
             (b_key, []),
1205
2118
             (aac_key, [(('aac',), 'target1')]),
1206
2119
             (bba_key, [(('bba',), 'target2')]),
1207
2120
            ], [target1, target2], [basis1, basis2])
1211
2121
2692
1212
2122
    def test_multiple_maps_overlapping_common_new(self):
2693
    def test_multiple_maps_overlapping_common_new(self):
1213
2123
        # Test that when a node found through the interesting_keys iteration
2694
        # Test that when a node found through the interesting_keys iteration
1214
@@ -2188,17 +2759,10 @@
1215
2188
            right_map._dump_tree())
2759
            right_map._dump_tree())
1216
2189
        # Keys from the right side target - none, the root is enough.
2760
        # Keys from the right side target - none, the root is enough.
1217
2190
        # Test behaviour
2761
        # Test behaviour
1218
2191
        self.expectFailure("we don't properly filter different depths",
1219
2192
            self.assertIterInteresting,
1220
2193
            [(left, []),
1221
2194
             (right, []),
1222
2195
             (l_d_key, [(('ddd',), 'change')]),
1223
2196
            ], [left, right], [basis])
1224
2197
        self.assertIterInteresting(
2762
        self.assertIterInteresting(
1229
2198
            [(left, []),
2763
            [right, left, l_d_key],
1230
2199
             (right, []),
2764
            [(('ddd',), 'change')],
1231
2200
             (l_d_key, [(('ddd',), 'change')]),
2765
            [left, right], [basis])
1228
2201
            ], [left, right], [basis])
1232
2202
2766
1233
2203
    def test_multiple_maps_similar(self):
2767
    def test_multiple_maps_similar(self):
1234
2204
        # We want to have a depth=2 tree, with multiple entries in each leaf
2768
        # We want to have a depth=2 tree, with multiple entries in each leaf
1235
@@ -2259,8 +2823,6 @@
1236
2259
        r_a_key = right_map._root_node._items['a'].key()
2823
        r_a_key = right_map._root_node._items['a'].key()
1237
2260
        r_c_key = right_map._root_node._items['c'].key()
2824
        r_c_key = right_map._root_node._items['c'].key()
1238
2261
        self.assertIterInteresting(
2825
        self.assertIterInteresting(
1244
2262
            [(left, []),
2826
            [right, left, l_a_key, r_c_key],
1245
2263
             (right, []),
2827
            [(('abb',), 'changed left'), (('cbb',), 'changed right')],
1246
2264
             (l_a_key, [(('abb',), 'changed left')]),
2828
            [left, right], [basis])
1242
2265
             (r_c_key, [(('cbb',), 'changed right')]),
1243
2266
            ], [left, right], [basis])
Status:	Superseded
Proposed branch:	lp:~jameinel/bzr/1.17-chk-multilevel
Merge into:	lp:~bzr/bzr/trunk-old
Diff against target:	1246 lines
To merge this branch:	bzr merge lp:~jameinel/bzr/1.17-chk-multilevel
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
bzr-core		2009-07-01	Pending
Review via email: mp+8100@code.launchpad.net