Merge lp:~jderose/microfiber/views into lp:microfiber

Proposed by Jason Gerard DeRose
Status: Merged
Merged at revision: 117
Proposed branch: lp:~jderose/microfiber/views
Merge into: lp:microfiber
Diff against target: 281 lines (+95/-20)
2 files modified
microfiber.py (+12/-3)
test_microfiber.py (+83/-17)
To merge this branch: bzr merge lp:~jderose/microfiber/views
Reviewer Review Type Date Requested Status
Jason Gerard DeRose Approve
Review via email: mp+113822@code.launchpad.net

Description of the change

Some misc API changes I grouped up here:

* Database.view() now includes reduce=False by default, which is really how the CouchDB API should work in the first place. For code that only uses a non-reduced view, it sucks for this code to have to know whether there is a reduce function. James and I discussed this a while back, so I'm trying it first in Microfiber, and then will add it to couch.js if we like it.

* Renamed new "non-atomic" Database.bulksave() to Database.save_many() because combined with Database.get_many(), it makes for a clearer, more consistent API.

* Renamed original "all-or-nothing" Database.bulksave2() back to Database.bulksave(). This way there is no API breakage, just in case anyone out there has been using this and wants the "all-or-nothing" behavior, understanding the implications. But I think we should deprecate this method soon. My idea with the convenience methods isn't to capture every scenario, but instead to just capture a few important patterns that might be tricky to get right using post(), get(), etc.

* Adds experimental id_slice_iter() function to give you all the ids from the rows is a view result, but chunked into groups of 25 (override with size=10 or whatever). This is for certain operations in Dmedia where we need to update a large number of docs, like the new Core.purge_store() method. So the idea is as you go through the view results, you use Database.get_many() to get 25 docs, update them all, and then save them back with Database.save_many(). This provides a huge performance improvement over get()/save() for each doc... 10x easily.

To post a comment you must log in.
Revision history for this message
Jason Gerard DeRose (jderose) wrote :

I'm gonna self-approve this as it's blocking other work.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'microfiber.py'
2--- microfiber.py 2012-07-06 06:59:53 +0000
3+++ microfiber.py 2012-07-07 12:53:19 +0000
4@@ -49,6 +49,7 @@
5 from urllib.parse import urlparse, urlencode, quote_plus
6 from http.client import HTTPConnection, HTTPSConnection, BadStatusLine
7 import threading
8+import math
9
10
11 __all__ = (
12@@ -234,6 +235,11 @@
13 return replication_body(name, peer, **kw)
14
15
16+def id_slice_iter(rows, size=25):
17+ for i in range(math.ceil(len(rows) / size)):
18+ yield [row['id'] for row in rows[i*size : (i+1)*size]]
19+
20+
21 class HTTPError(Exception):
22 """
23 Base class for custom `microfiber` exceptions.
24@@ -682,7 +688,8 @@
25 * `Database.server()` - return a `Server` pointing at same URL
26 * `Database.ensure()` - ensure the database exists
27 * `Database.save(doc)` - save to CouchDB, update doc _id & _rev in place
28- * `Database.bulksave(docs)` - as above, but with a list of docs
29+ * `Database.save_many(docs)` - as above, but with a list of docs
30+ * `Database.get_many(doc_ids)` - retrieve many docs at once
31 * `Datebase.view(design, view, **options)` - shortcut method, that's all
32 """
33 def __init__(self, name, env=SERVER):
34@@ -749,7 +756,7 @@
35 doc['_rev'] = r['rev']
36 return r
37
38- def bulksave(self, docs):
39+ def save_many(self, docs):
40 """
41 Bulk-save using non-atomic semantics, updates all _rev in-place.
42
43@@ -778,7 +785,7 @@
44 raise BulkConflict(conflicts, rows)
45 return rows
46
47- def bulksave2(self, docs):
48+ def bulksave(self, docs):
49 """
50 Bulk-save using all-or-nothing semantics, updates all _rev in-place.
51
52@@ -819,6 +826,8 @@
53
54 ``Database.get('_design', design, '_view', view, **options)``
55 """
56+ if 'reduce' not in options:
57+ options['reduce'] = False
58 return self.get('_design', design, '_view', view, **options)
59
60 def dump(self, fp, attachments=True):
61
62=== modified file 'test_microfiber.py'
63--- test_microfiber.py 2012-07-06 06:59:53 +0000
64+++ test_microfiber.py 2012-07-07 12:53:19 +0000
65@@ -46,6 +46,7 @@
66 usercouch = None
67
68 import microfiber
69+from microfiber import random_id
70 from microfiber import NotFound, MethodNotAllowed, Conflict, PreconditionFailed
71
72
73@@ -65,10 +66,6 @@
74 )
75
76
77-def random_id():
78- return b32encode(os.urandom(10)).decode('ascii')
79-
80-
81 def random_oauth():
82 return dict(
83 (k, random_id())
84@@ -92,7 +89,6 @@
85
86
87 assert is_microfiber_id(microfiber.random_id())
88-assert not is_microfiber_id(random_id())
89 assert not is_microfiber_id(test_id())
90
91
92@@ -560,6 +556,44 @@
93 }
94 )
95
96+ def test_id_slice_iter(self):
97+ ids = [random_id() for i in range(74)]
98+ rows = [{'id': _id} for _id in ids]
99+ chunks = list(microfiber.id_slice_iter(rows))
100+ self.assertEqual(len(chunks), 3)
101+ self.assertEqual(len(chunks[0]), 25)
102+ self.assertEqual(len(chunks[1]), 25)
103+ self.assertEqual(len(chunks[2]), 24)
104+ accum = []
105+ for chunk in chunks:
106+ accum.extend(chunk)
107+ self.assertEqual(accum, ids)
108+
109+ ids = [random_id() for i in range(75)]
110+ rows = [{'id': _id} for _id in ids]
111+ chunks = list(microfiber.id_slice_iter(rows))
112+ self.assertEqual(len(chunks), 3)
113+ self.assertEqual(len(chunks[0]), 25)
114+ self.assertEqual(len(chunks[1]), 25)
115+ self.assertEqual(len(chunks[2]), 25)
116+ accum = []
117+ for chunk in chunks:
118+ accum.extend(chunk)
119+ self.assertEqual(accum, ids)
120+
121+ ids = [random_id() for i in range(76)]
122+ rows = [{'id': _id} for _id in ids]
123+ chunks = list(microfiber.id_slice_iter(rows))
124+ self.assertEqual(len(chunks), 4)
125+ self.assertEqual(len(chunks[0]), 25)
126+ self.assertEqual(len(chunks[1]), 25)
127+ self.assertEqual(len(chunks[2]), 25)
128+ self.assertEqual(len(chunks[3]), 1)
129+ accum = []
130+ for chunk in chunks:
131+ accum.extend(chunk)
132+ self.assertEqual(accum, ids)
133+
134
135 class TestErrors(TestCase):
136 def test_errors(self):
137@@ -825,6 +859,38 @@
138 self.assertEqual(s._basic, 'foo')
139 self.assertEqual(s._oauth, 'bar')
140
141+ def test_view(self):
142+ class Mock(microfiber.Database):
143+ def get(self, *parts, **options):
144+ self._parts = parts
145+ self._options = options
146+ assert not hasattr(self, '_return')
147+ self._return = random_id()
148+ return self._return
149+
150+ db = Mock('mydb')
151+ self.assertEqual(db.view('foo', 'bar'), db._return)
152+ self.assertEqual(db._parts, ('_design', 'foo', '_view', 'bar'))
153+ self.assertEqual(db._options, {'reduce': False})
154+
155+ db = Mock('mydb')
156+ self.assertEqual(db.view('foo', 'bar', reduce=True), db._return)
157+ self.assertEqual(db._parts, ('_design', 'foo', '_view', 'bar'))
158+ self.assertEqual(db._options, {'reduce': True})
159+
160+ db = Mock('mydb')
161+ self.assertEqual(db.view('foo', 'bar', include_docs=True), db._return)
162+ self.assertEqual(db._parts, ('_design', 'foo', '_view', 'bar'))
163+ self.assertEqual(db._options, {'reduce': False, 'include_docs': True})
164+
165+ db = Mock('mydb')
166+ self.assertEqual(
167+ db.view('foo', 'bar', include_docs=True, reduce=True),
168+ db._return
169+ )
170+ self.assertEqual(db._parts, ('_design', 'foo', '_view', 'bar'))
171+ self.assertEqual(db._options, {'reduce': True, 'include_docs': True})
172+
173
174 class ReplicationTestCase(TestCase):
175 def setUp(self):
176@@ -1348,14 +1414,14 @@
177 }
178 )
179
180- def test_bulksave(self):
181+ def test_save_many(self):
182 db = microfiber.Database(self.db, self.env)
183 self.assertTrue(db.ensure())
184
185 # Test that doc['_id'] gets set automatically
186 markers = tuple(test_id() for i in range(10))
187 docs = [{'marker': m} for m in markers]
188- rows = db.bulksave(docs)
189+ rows = db.save_many(docs)
190 for (marker, doc, row) in zip(markers, docs, rows):
191 self.assertEqual(doc['marker'], marker)
192 self.assertEqual(doc['_id'], row['id'])
193@@ -1366,7 +1432,7 @@
194 # Test when doc['_id'] is already present
195 ids = tuple(test_id() for i in range(10))
196 docs = [{'_id': _id} for _id in ids]
197- rows = db.bulksave(docs)
198+ rows = db.save_many(docs)
199 for (_id, doc, row) in zip(ids, docs, rows):
200 self.assertEqual(doc['_id'], _id)
201 self.assertEqual(row['id'], _id)
202@@ -1377,7 +1443,7 @@
203 # Let's update all the docs
204 for doc in docs:
205 doc['x'] = 'foo'
206- rows = db.bulksave(docs)
207+ rows = db.save_many(docs)
208 for (_id, doc, row) in zip(ids, docs, rows):
209 self.assertEqual(doc['_id'], _id)
210 self.assertEqual(row['id'], _id)
211@@ -1404,7 +1470,7 @@
212 good.append(doc)
213
214 with self.assertRaises(microfiber.BulkConflict) as cm:
215- rows = db.bulksave(docs)
216+ rows = db.save_many(docs)
217 self.assertEqual(str(cm.exception), 'conflict on 5 docs')
218 self.assertEqual(cm.exception.conflicts, bad)
219 self.assertEqual(len(cm.exception.rows), 10)
220@@ -1424,14 +1490,14 @@
221 self.assertEqual(row['rev'], doc['_rev'])
222 self.assertEqual(real, doc)
223
224- def test_bulksave2(self):
225+ def test_bulksave(self):
226 db = microfiber.Database(self.db, self.env)
227 self.assertTrue(db.ensure())
228
229 # Test that doc['_id'] gets set automatically
230 markers = tuple(test_id() for i in range(10))
231 docs = [{'marker': m} for m in markers]
232- rows = db.bulksave2(docs)
233+ rows = db.bulksave(docs)
234 for (marker, doc, row) in zip(markers, docs, rows):
235 self.assertEqual(doc['marker'], marker)
236 self.assertEqual(doc['_id'], row['id'])
237@@ -1442,7 +1508,7 @@
238 # Test when doc['_id'] is already present
239 ids = tuple(test_id() for i in range(10))
240 docs = [{'_id': _id} for _id in ids]
241- rows = db.bulksave2(docs)
242+ rows = db.bulksave(docs)
243 for (_id, doc, row) in zip(ids, docs, rows):
244 self.assertEqual(doc['_id'], _id)
245 self.assertEqual(row['id'], _id)
246@@ -1453,7 +1519,7 @@
247 # Let's update all the docs
248 for doc in docs:
249 doc['x'] = 'foo'
250- rows = db.bulksave2(docs)
251+ rows = db.bulksave(docs)
252 for (_id, doc, row) in zip(ids, docs, rows):
253 self.assertEqual(doc['_id'], _id)
254 self.assertEqual(row['id'], _id)
255@@ -1472,7 +1538,7 @@
256 # Now let's update all the docs, test all-or-nothing behavior
257 for doc in docs:
258 doc['x'] = 'bar'
259- rows = db.bulksave2(docs)
260+ rows = db.bulksave(docs)
261 for (_id, doc, row) in zip(ids, docs, rows):
262 self.assertEqual(doc['_id'], _id)
263 self.assertEqual(row['id'], _id)
264@@ -1494,7 +1560,7 @@
265 # Now update all the docs again, realize all-or-nothing is a bad idea:
266 for doc in docs:
267 doc['x'] = 'baz'
268- rows = db.bulksave2(docs)
269+ rows = db.bulksave(docs)
270 for (i, row) in enumerate(rows):
271 _id = ids[i]
272 doc = docs[i]
273@@ -1516,7 +1582,7 @@
274
275 ids = tuple(test_id() for i in range(50))
276 docs = [{'_id': _id} for _id in ids]
277- db.bulksave(docs)
278+ db.save_many(docs)
279
280 # Test an empty doc_ids list
281 self.assertEqual(db.get_many([]), [])

Subscribers

People subscribed via source and target branches