Merge lp:~lifeless/bzr/bug-514090 into lp:bzr/2.1

Proposed by Robert Collins
Status: Merged
Merged at revision: not available
Proposed branch: lp:~lifeless/bzr/bug-514090
Merge into: lp:bzr/2.1
Diff against target: 173 lines (+59/-20)
3 files modified
NEWS (+18/-0)
bzrlib/chk_map.py (+35/-14)
bzrlib/tests/test_chk_map.py (+6/-6)
To merge this branch: bzr merge lp:~lifeless/bzr/bug-514090
Reviewer Review Type Date Requested Status
Martin Pool Approve
Review via email: mp+18263@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Robert Collins (lifeless) wrote :

Yo. Thread race fixness FTW.

Revision history for this message
Martin Pool (mbp) wrote :

Makes sense as an emergency fix.

It would be nice to hang this off a specific object scope to be safer, or to rely on the app serializing access to a Repository.

review: Approve
Revision history for this message
Martin Packman (gz) wrote :

A small note, there's been a long-standing bug in both mod_python and mod_wsgi where threading.local() objects only last for a single request:
<http://code.google.com/p/modwsgi/issues/detail?id=120>

I presume however this doesn't affect launchpad, and would at worst just make the cache useless.

Revision history for this message
Robert Collins (lifeless) wrote :

we run with paste.deploy. Do you know if this affects that?

mod_wsgi being affected may cause this to hurt smart-server-over-http
users, but OTOH the cache shouldn't be very important between requests
anyway, as we try very hard to do all the heavy lifting in a single
smart verb.

Thanks a lot for mentioning this!

-Rob

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'NEWS'
--- NEWS 2010-01-29 09:13:30 +0000
+++ NEWS 2010-01-29 15:23:14 +0000
@@ -5,6 +5,24 @@
5.. contents:: List of Releases5.. contents:: List of Releases
6 :depth: 16 :depth: 1
77
8bzr 2.1.0 (not released yet)
9############################
10
11:Codename:
12:2.1.0:
13
14Bug Fixes
15*********
16
17* Using the ``bzrlib.chk_map`` module from within multiple threads at the
18 same time was broken due to race conditions with a module level page
19 cache. This shows up as a KeyError in the ``bzrlib.lru_cache`` code with
20 ``bzrlib.chk_map`` in the backtrace, and can be triggered without using
21 the same high level objects such as ``bzrlib.repository.Repository``
22 from different threads. chk_map now uses a thread local cache which may
23 increase memory pressure on processes using threads.
24 (Robert Collins, John Arbash Meinel, #514090)
25
8bzr 2.1.0rc226bzr 2.1.0rc2
9############27############
1028
1129
=== modified file 'bzrlib/chk_map.py'
--- bzrlib/chk_map.py 2009-11-02 17:27:52 +0000
+++ bzrlib/chk_map.py 2010-01-29 15:23:14 +0000
@@ -38,6 +38,7 @@
38"""38"""
3939
40import heapq40import heapq
41import threading
4142
42from bzrlib import lazy_import43from bzrlib import lazy_import
43lazy_import.lazy_import(globals(), """44lazy_import.lazy_import(globals(), """
@@ -59,11 +60,31 @@
59# If each line is 50 bytes, and you have 255 internal pages, with 255-way fan60# If each line is 50 bytes, and you have 255 internal pages, with 255-way fan
60# out, it takes 3.1MB to cache the layer.61# out, it takes 3.1MB to cache the layer.
61_PAGE_CACHE_SIZE = 4*1024*102462_PAGE_CACHE_SIZE = 4*1024*1024
62# We are caching bytes so len(value) is perfectly accurate63# Per thread caches for 2 reasons:
63_page_cache = lru_cache.LRUSizeCache(_PAGE_CACHE_SIZE)64# - in the server we may be serving very different content, so we get less
65# cache thrashing.
66# - we avoid locking on every cache lookup.
67_thread_caches = threading.local()
68# The page cache.
69_thread_caches.page_cache = None
70
71def _get_cache():
72 """Get the per-thread page cache.
73
74 We need a function to do this because in a new thread the _thread_caches
75 threading.local object does not have the cache initialized yet.
76 """
77 page_cache = getattr(_thread_caches, 'page_cache', None)
78 if page_cache is None:
79 # We are caching bytes so len(value) is perfectly accurate
80 page_cache = lru_cache.LRUSizeCache(_PAGE_CACHE_SIZE)
81 _thread_caches.page_cache = page_cache
82 return page_cache
83
6484
65def clear_cache():85def clear_cache():
66 _page_cache.clear()86 _get_cache().clear()
87
6788
68# If a ChildNode falls below this many bytes, we check for a remap89# If a ChildNode falls below this many bytes, we check for a remap
69_INTERESTING_NEW_SIZE = 5090_INTERESTING_NEW_SIZE = 50
@@ -161,11 +182,11 @@
161182
162 def _read_bytes(self, key):183 def _read_bytes(self, key):
163 try:184 try:
164 return _page_cache[key]185 return _get_cache()[key]
165 except KeyError:186 except KeyError:
166 stream = self._store.get_record_stream([key], 'unordered', True)187 stream = self._store.get_record_stream([key], 'unordered', True)
167 bytes = stream.next().get_bytes_as('fulltext')188 bytes = stream.next().get_bytes_as('fulltext')
168 _page_cache[key] = bytes189 _get_cache()[key] = bytes
169 return bytes190 return bytes
170191
171 def _dump_tree(self, include_keys=False):192 def _dump_tree(self, include_keys=False):
@@ -901,7 +922,7 @@
901 bytes = ''.join(lines)922 bytes = ''.join(lines)
902 if len(bytes) != self._current_size():923 if len(bytes) != self._current_size():
903 raise AssertionError('Invalid _current_size')924 raise AssertionError('Invalid _current_size')
904 _page_cache.add(self._key, bytes)925 _get_cache().add(self._key, bytes)
905 return [self._key]926 return [self._key]
906927
907 def refs(self):928 def refs(self):
@@ -1143,7 +1164,7 @@
1143 found_keys = set()1164 found_keys = set()
1144 for key in keys:1165 for key in keys:
1145 try:1166 try:
1146 bytes = _page_cache[key]1167 bytes = _get_cache()[key]
1147 except KeyError:1168 except KeyError:
1148 continue1169 continue
1149 else:1170 else:
@@ -1174,7 +1195,7 @@
1174 prefix, node_key_filter = keys[record.key]1195 prefix, node_key_filter = keys[record.key]
1175 node_and_filters.append((node, node_key_filter))1196 node_and_filters.append((node, node_key_filter))
1176 self._items[prefix] = node1197 self._items[prefix] = node
1177 _page_cache.add(record.key, bytes)1198 _get_cache().add(record.key, bytes)
1178 for info in node_and_filters:1199 for info in node_and_filters:
1179 yield info1200 yield info
11801201
@@ -1300,7 +1321,7 @@
1300 lines.append(serialised[prefix_len:])1321 lines.append(serialised[prefix_len:])
1301 sha1, _, _ = store.add_lines((None,), (), lines)1322 sha1, _, _ = store.add_lines((None,), (), lines)
1302 self._key = StaticTuple("sha1:" + sha1,).intern()1323 self._key = StaticTuple("sha1:" + sha1,).intern()
1303 _page_cache.add(self._key, ''.join(lines))1324 _get_cache().add(self._key, ''.join(lines))
1304 yield self._key1325 yield self._key
13051326
1306 def _search_key(self, key):1327 def _search_key(self, key):
@@ -1489,11 +1510,11 @@
1489 self._state = None1510 self._state = None
14901511
1491 def _read_nodes_from_store(self, keys):1512 def _read_nodes_from_store(self, keys):
1492 # We chose not to use _page_cache, because we think in terms of records1513 # We chose not to use _get_cache(), because we think in
1493 # to be yielded. Also, we expect to touch each page only 1 time during1514 # terms of records to be yielded. Also, we expect to touch each page
1494 # this code. (We may want to evaluate saving the raw bytes into the1515 # only 1 time during this code. (We may want to evaluate saving the
1495 # page cache, which would allow a working tree update after the fetch1516 # raw bytes into the page cache, which would allow a working tree
1496 # to not have to read the bytes again.)1517 # update after the fetch to not have to read the bytes again.)
1497 as_st = StaticTuple.from_sequence1518 as_st = StaticTuple.from_sequence
1498 stream = self._store.get_record_stream(keys, 'unordered', True)1519 stream = self._store.get_record_stream(keys, 'unordered', True)
1499 for record in stream:1520 for record in stream:
15001521
=== modified file 'bzrlib/tests/test_chk_map.py'
--- bzrlib/tests/test_chk_map.py 2009-10-21 20:53:21 +0000
+++ bzrlib/tests/test_chk_map.py 2010-01-29 15:23:14 +0000
@@ -905,7 +905,7 @@
905 # Unmapping the new node will check the existing nodes to see if they905 # Unmapping the new node will check the existing nodes to see if they
906 # would fit.906 # would fit.
907 # Clear the page cache so we ensure we have to read all the children907 # Clear the page cache so we ensure we have to read all the children
908 chk_map._page_cache.clear()908 chk_map.clear_cache()
909 chkmap.unmap(('aad',))909 chkmap.unmap(('aad',))
910 self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode)910 self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode)
911 self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)911 self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
@@ -945,12 +945,12 @@
945 # Now clear the page cache, and only include 2 of the children in the945 # Now clear the page cache, and only include 2 of the children in the
946 # cache946 # cache
947 aab_key = chkmap._root_node._items['aab']947 aab_key = chkmap._root_node._items['aab']
948 aab_bytes = chk_map._page_cache[aab_key]948 aab_bytes = chk_map._get_cache()[aab_key]
949 aac_key = chkmap._root_node._items['aac']949 aac_key = chkmap._root_node._items['aac']
950 aac_bytes = chk_map._page_cache[aac_key]950 aac_bytes = chk_map._get_cache()[aac_key]
951 chk_map._page_cache.clear()951 chk_map.clear_cache()
952 chk_map._page_cache[aab_key] = aab_bytes952 chk_map._get_cache()[aab_key] = aab_bytes
953 chk_map._page_cache[aac_key] = aac_bytes953 chk_map._get_cache()[aac_key] = aac_bytes
954954
955 # Unmapping the new node will check the nodes from the page cache955 # Unmapping the new node will check the nodes from the page cache
956 # first, and not have to read in 'aaa'956 # first, and not have to read in 'aaa'

Subscribers

People subscribed via source and target branches