Merge lp:~jderose/microfiber/better-bulk into lp:microfiber
- better-bulk
- Merge into trunk
Status: | Merged |
---|---|
Approved by: | James Raymond |
Approved revision: | 125 |
Merged at revision: | 112 |
Proposed branch: | lp:~jderose/microfiber/better-bulk |
Merge into: | lp:microfiber |
Diff against target: |
626 lines (+457/-67) 3 files modified
microfiber.py (+56/-5) setup.py (+4/-0) test_microfiber.py (+397/-62) |
To merge this branch: | bzr merge lp:~jderose/microfiber/better-bulk |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
James Raymond | Approve | ||
Review via email: mp+112982@code.launchpad.net |
Commit message
Description of the change
* Renames Database.bulksave() to Database.
* Adds new Database.bulksave() method that uses "non-atomic" semantics and raises the new BulkConflict exception when any documents have a conflict
* Adds a slew of tests that try to really get to the heart of CouchDB bulk update semantics so we better understand the behaviours (and know if they change)
* Adds experimental Database.get_many() method to make it easier to retrieve many docs at once. Not sure about the name. Database.bulkget() looks horrible, Database.bulk_get() looks off too (to me). As these use different special paths in the CouchDB REST API, it might actually be clearer to have a name that looks quite a bit different from Database.
But still... how about Database.
* Adds new `./setup.py test --skip-slow` option to skip the 30-second connection timeout tests, but still run all the other live tests.
- 125. By Jason Gerard DeRose
-
Typo: lets => let's
James Raymond (jamesmr) : | # |
Preview Diff
1 | === modified file 'microfiber.py' |
2 | --- microfiber.py 2012-06-30 01:19:50 +0000 |
3 | +++ microfiber.py 2012-07-02 07:56:21 +0000 |
4 | @@ -310,6 +310,15 @@ |
5 | } |
6 | |
7 | |
8 | +class BulkConflict(Exception): |
9 | + def __init__(self, conflicts, rows): |
10 | + self.conflicts = conflicts |
11 | + self.rows = rows |
12 | + count = len(conflicts) |
13 | + msg = ('conflict on {} doc' if count == 1 else 'conflict on {} docs') |
14 | + super().__init__(msg.format(count)) |
15 | + |
16 | + |
17 | class FakeList(list): |
18 | __slots__ = ('_count', '_iterable') |
19 | |
20 | @@ -659,7 +668,6 @@ |
21 | try: |
22 | self.put(None) |
23 | return True |
24 | - |
25 | except PreconditionFailed: |
26 | return False |
27 | |
28 | @@ -697,18 +705,61 @@ |
29 | |
30 | def bulksave(self, docs): |
31 | """ |
32 | - POST a list of docs to _bulk_docs, update all _rev in place. |
33 | - |
34 | - This method works just like `Database.save()`, except on a whole list |
35 | - of docs all at once. |
36 | + Bulk-save using non-atomic semantics, updates all _rev in-place. |
37 | + |
38 | + This method is similar `Database.save()`, except this method operates on |
39 | + a list of many docs at once. |
40 | + |
41 | + If there are conflicts, a `BulkConflict` exception will be raised, whose |
42 | + ``conflicts`` attribute will be a list of the documents for which there |
43 | + were conflicts. Your request will *not* have modified these conflicting |
44 | + documents in the database, similar to `Database.save()`. |
45 | + |
46 | + However, all non-conflicting documents will have been saved and their |
47 | + _rev updated in-place. |
48 | + """ |
49 | + for doc in filter(lambda d: '_id' not in d, docs): |
50 | + doc['_id'] = random_id() |
51 | + rows = self.post({'docs': docs}, '_bulk_docs') |
52 | + conflicts = [] |
53 | + for (doc, row) in zip(docs, rows): |
54 | + assert doc['_id'] == row['id'] |
55 | + if 'rev' in row: |
56 | + doc['_rev'] = row['rev'] |
57 | + else: |
58 | + conflicts.append(doc) |
59 | + if conflicts: |
60 | + raise BulkConflict(conflicts, rows) |
61 | + return rows |
62 | + |
63 | + def bulksave2(self, docs): |
64 | + """ |
65 | + Bulk-save using all-or-nothing semantics, updates all _rev in-place. |
66 | + |
67 | + This method is similar `Database.save()`, except this method operates on |
68 | + a list of many docs at once. |
69 | + |
70 | + Note: for subtle reasons that take a while to explain, you probably |
71 | + don't want to use this method. |
72 | """ |
73 | for doc in filter(lambda d: '_id' not in d, docs): |
74 | doc['_id'] = random_id() |
75 | rows = self.post({'docs': docs, 'all_or_nothing': True}, '_bulk_docs') |
76 | for (doc, row) in zip(docs, rows): |
77 | + assert doc['_id'] == row['id'] |
78 | doc['_rev'] = row['rev'] |
79 | return rows |
80 | |
81 | + def get_many(self, doc_ids): |
82 | + """ |
83 | + Convenience method to retrieve multiple documents at once. |
84 | + |
85 | + As CouchDB has a rather large per-request overhead, retrieving multiple |
86 | + documents at once can greatly improve performance. |
87 | + """ |
88 | + result = self.post({'keys': doc_ids}, '_all_docs', include_docs=True) |
89 | + return [row['doc'] for row in result['rows']] |
90 | + |
91 | def view(self, design, view, **options): |
92 | """ |
93 | Shortcut for making a GET request to a view. |
94 | |
95 | === modified file 'setup.py' |
96 | --- setup.py 2011-12-08 23:38:16 +0000 |
97 | +++ setup.py 2012-07-02 07:56:21 +0000 |
98 | @@ -44,6 +44,7 @@ |
99 | |
100 | user_options = [ |
101 | ('no-live', None, 'skip live tests against tmp CouchDB instances'), |
102 | + ('skip-slow', None, 'skip only the slow 30 second live timeout test'), |
103 | ('auth=', None, |
104 | "live test with 'open', 'basic', or 'oauth'; default is 'basic'" |
105 | ), |
106 | @@ -51,6 +52,7 @@ |
107 | |
108 | def initialize_options(self): |
109 | self.no_live = 0 |
110 | + self.skip_slow = 0 |
111 | self.auth = 'basic' |
112 | |
113 | def finalize_options(self): |
114 | @@ -62,6 +64,8 @@ |
115 | os.environ['MICROFIBER_TEST_NO_LIVE'] = 'true' |
116 | else: |
117 | os.environ['MICROFIBER_TEST_AUTH'] = self.auth |
118 | + if self.skip_slow: |
119 | + os.environ['MICROFIBER_TEST_SKIP_SLOW'] = 'true' |
120 | |
121 | pynames = ['microfiber', 'test_microfiber'] |
122 | |
123 | |
124 | === modified file 'test_microfiber.py' |
125 | --- test_microfiber.py 2012-05-04 04:53:52 +0000 |
126 | +++ test_microfiber.py 2012-07-02 07:56:21 +0000 |
127 | @@ -38,6 +38,7 @@ |
128 | from urllib.parse import urlparse, urlencode |
129 | from http.client import HTTPConnection, HTTPSConnection |
130 | import threading |
131 | +from random import SystemRandom |
132 | |
133 | try: |
134 | import usercouch.misc |
135 | @@ -48,14 +49,39 @@ |
136 | from microfiber import NotFound, MethodNotAllowed, Conflict, PreconditionFailed |
137 | |
138 | |
139 | +random = SystemRandom() |
140 | + |
141 | # OAuth test string from http://oauth.net/core/1.0a/#anchor46 |
142 | BASE_STRING = 'GET&http%3A%2F%2Fphotos.example.net%2Fphotos&file%3Dvacation.jpg%26oauth_consumer_key%3Ddpf43f3p2l4k3l03%26oauth_nonce%3Dkllo9940pd9333jh%26oauth_signature_method%3DHMAC-SHA1%26oauth_timestamp%3D1191242096%26oauth_token%3Dnnch734d00sl2jdk%26oauth_version%3D1.0%26size%3Doriginal' |
143 | |
144 | +B32ALPHABET = frozenset('234567ABCDEFGHIJKLMNOPQRSTUVWXYZ') |
145 | + |
146 | + |
147 | +def is_microfiber_id(_id): |
148 | + assert isinstance(_id, str) |
149 | + return ( |
150 | + len(_id) == microfiber.RANDOM_B32LEN |
151 | + and set(_id).issubset(B32ALPHABET) |
152 | + ) |
153 | + |
154 | |
155 | def random_id(): |
156 | return b32encode(os.urandom(10)).decode('ascii') |
157 | |
158 | |
159 | +def test_id(): |
160 | + """ |
161 | + So we can tell our random test IDs from the ones microfiber.random_id() |
162 | + makes, we use 160-bit IDs instead of 120-bit. |
163 | + """ |
164 | + return b32encode(os.urandom(20)).decode('ascii') |
165 | + |
166 | + |
167 | +assert is_microfiber_id(microfiber.random_id()) |
168 | +assert not is_microfiber_id(random_id()) |
169 | +assert not is_microfiber_id(test_id()) |
170 | + |
171 | + |
172 | class FakeResponse(object): |
173 | def __init__(self, status, reason): |
174 | self.status = status |
175 | @@ -286,6 +312,22 @@ |
176 | ) |
177 | |
178 | |
179 | +class TestBulkConflict(TestCase): |
180 | + def test_init(self): |
181 | + conflicts = ['foo', 'bar'] |
182 | + rows = ['raz', 'jaz'] |
183 | + inst = microfiber.BulkConflict(conflicts, rows) |
184 | + self.assertEqual(str(inst), 'conflict on 2 docs') |
185 | + self.assertIs(inst.conflicts, conflicts) |
186 | + self.assertIs(inst.rows, rows) |
187 | + |
188 | + conflicts = ['hello'] |
189 | + inst = microfiber.BulkConflict(conflicts, rows) |
190 | + self.assertEqual(str(inst), 'conflict on 1 doc') |
191 | + self.assertIs(inst.conflicts, conflicts) |
192 | + self.assertIs(inst.rows, rows) |
193 | + |
194 | + |
195 | class TestCouchBase(TestCase): |
196 | klass = microfiber.CouchBase |
197 | |
198 | @@ -521,6 +563,9 @@ |
199 | klass = microfiber.CouchBase |
200 | |
201 | def test_bad_status_line(self): |
202 | + if os.environ.get('MICROFIBER_TEST_SKIP_SLOW') == 'true': |
203 | + self.skipTest('called with --skip-slow') |
204 | + |
205 | inst = self.klass(self.env) |
206 | |
207 | # Create database |
208 | @@ -804,66 +849,356 @@ |
209 | self.assertTrue(d['_rev'].startswith('1-')) |
210 | self.assertEqual(d['n'], i) |
211 | |
212 | + def test_bulk_non_atomic(self): |
213 | + """ |
214 | + Verify our assumptions about CouchDB "non-atomic" bulk semantics. |
215 | + |
216 | + Results: conflicting docs are not updated, and we know which docs were |
217 | + conflicting; non-conflicting doc get updated normally. |
218 | + |
219 | + Pro tip: use this! |
220 | + """ |
221 | + db = microfiber.Database(self.db, self.env) |
222 | + db.ensure() |
223 | + db.post({'_id': 'example'}) |
224 | + me = db.get('example') |
225 | + you = db.get('example') |
226 | + self.assertEqual(me, |
227 | + { |
228 | + '_id': 'example', |
229 | + '_rev': '1-967a00dff5e02add41819138abb3284d', |
230 | + } |
231 | + ) |
232 | + self.assertEqual(me, you) |
233 | + |
234 | + # you make a change, creating a conflict for me |
235 | + you['x'] = 'foo' |
236 | + db.save(you) |
237 | + self.assertEqual(db.get('example'), |
238 | + { |
239 | + '_id': 'example', |
240 | + '_rev': '2-047387155f2bb8c7cd80b0a5da505e9a', |
241 | + 'x': 'foo', |
242 | + } |
243 | + ) |
244 | + |
245 | + # me makes a change, what happens? |
246 | + me['y'] = 'bar' |
247 | + rows = db.post({'docs': [me]}, '_bulk_docs') |
248 | + self.assertEqual( |
249 | + rows, |
250 | + [{'id': 'example', 'error': 'conflict', 'reason': 'Document update conflict.'}] |
251 | + ) |
252 | + self.assertEqual(db.get('example'), |
253 | + { |
254 | + '_id': 'example', |
255 | + '_rev': '2-047387155f2bb8c7cd80b0a5da505e9a', |
256 | + 'x': 'foo', |
257 | + } |
258 | + ) |
259 | + |
260 | + def test_bulk_all_or_nothing(self): |
261 | + """ |
262 | + Verify our assumptions about CouchDB "all-or-nothing" bulk semantics. |
263 | + |
264 | + Results: subtle and surprising, unlikely what you want! Totally |
265 | + different behavior when both ends are at the same revision number vs |
266 | + when one is ahead in revision number! |
267 | + |
268 | + For example, in this case the last change wins: |
269 | + |
270 | + 1. Sue and Ann both get the "1-" rev of the "foo" doc |
271 | + 2. Sue saves/bulksaves a change in "foo", now at rev "2-" |
272 | + 3. Ann bulksaves a change in "foo" |
273 | + 4. Ann has the winning "2-" rev of "foo" |
274 | + |
275 | + But in this case, something totally different happens: |
276 | + |
277 | + 1. Sue and Ann both get the "1-" rev of the "foo" doc |
278 | + 2. Sue saves/bulksaves a change in "foo", now at rev "2-" |
279 | + 3. Sue saves/bulksaves a *2nd* change in "foo", now at rev "3-" |
280 | + 4. Ann bulksaves a change in "foo" |
281 | + 5. Ann thinks she has the winning "2-" rev of "foo", but Ann didn't |
282 | + make the last change according to rest of the world, and worse, |
283 | + Ann thinks her "2-" rev is the lastest, when it's actually "3-" |
284 | + |
285 | + Pro tip: these are not the semantics you're looking for! |
286 | + """ |
287 | + db = microfiber.Database(self.db, self.env) |
288 | + db.ensure() |
289 | + db.post({'_id': 'example'}) |
290 | + me = db.get('example') |
291 | + you = db.get('example') |
292 | + self.assertEqual(me, |
293 | + { |
294 | + '_id': 'example', |
295 | + '_rev': '1-967a00dff5e02add41819138abb3284d', |
296 | + } |
297 | + ) |
298 | + self.assertEqual(me, you) |
299 | + |
300 | + # you make a change, creating a conflict for me |
301 | + you['x'] = 'foo' |
302 | + db.save(you) |
303 | + self.assertEqual(db.get('example'), |
304 | + { |
305 | + '_id': 'example', |
306 | + '_rev': '2-047387155f2bb8c7cd80b0a5da505e9a', |
307 | + 'x': 'foo', |
308 | + } |
309 | + ) |
310 | + |
311 | + # me makes a change, what happens? |
312 | + me['y'] = 'bar' |
313 | + rows = db.post({'docs': [me], 'all_or_nothing': True}, '_bulk_docs') |
314 | + self.assertEqual( |
315 | + rows, |
316 | + [{'id': 'example', 'rev': '2-34e30c39538299cfed3958f6692f794d'}] |
317 | + ) |
318 | + self.assertEqual(db.get('example'), |
319 | + { |
320 | + '_id': 'example', |
321 | + '_rev': '2-34e30c39538299cfed3958f6692f794d', |
322 | + 'y': 'bar', |
323 | + } |
324 | + ) |
325 | + |
326 | + # Seems like reasonable last-one-wins, right? Not so fast! Let's try |
327 | + # another example: |
328 | + db.post({'_id': 'example2'}) |
329 | + me = db.get('example2') |
330 | + you = db.get('example2') |
331 | + self.assertEqual(me, |
332 | + { |
333 | + '_id': 'example2', |
334 | + '_rev': '1-967a00dff5e02add41819138abb3284d', |
335 | + } |
336 | + ) |
337 | + self.assertEqual(me, you) |
338 | + |
339 | + # you make *two* changes, creating a conflict for me |
340 | + you['x'] = 'foo' |
341 | + db.save(you) |
342 | + self.assertEqual(db.get('example2'), |
343 | + { |
344 | + '_id': 'example2', |
345 | + '_rev': '2-047387155f2bb8c7cd80b0a5da505e9a', |
346 | + 'x': 'foo', |
347 | + } |
348 | + ) |
349 | + db.save(you) |
350 | + self.assertEqual(db.get('example2'), |
351 | + { |
352 | + '_id': 'example2', |
353 | + '_rev': '3-074e07f92324e448702162e585e718fb', |
354 | + 'x': 'foo', |
355 | + } |
356 | + ) |
357 | + |
358 | + # me makes a change, what happens? |
359 | + me['y'] = 'bar' |
360 | + rows = db.post({'docs': [me], 'all_or_nothing': True}, '_bulk_docs') |
361 | + self.assertEqual( |
362 | + rows, |
363 | + [{'id': 'example2', 'rev': '2-34e30c39538299cfed3958f6692f794d'}] |
364 | + ) |
365 | + self.assertEqual(db.get('example2'), |
366 | + { |
367 | + '_id': 'example2', |
368 | + '_rev': '3-074e07f92324e448702162e585e718fb', |
369 | + 'x': 'foo', |
370 | + } |
371 | + ) |
372 | + |
373 | def test_bulksave(self): |
374 | - inst = self.klass(self.db, self.env) |
375 | - |
376 | - self.assertRaises(NotFound, inst.get) |
377 | - self.assertEqual(inst.put(None), {'ok': True}) |
378 | - self.assertEqual(inst.get()['db_name'], self.db) |
379 | - self.assertRaises(PreconditionFailed, inst.put, None) |
380 | - |
381 | - docs = [{'_id': random_id(), 'foo': i} for i in range(1000)] |
382 | - copy = deepcopy(docs) |
383 | - rows = inst.bulksave(copy) |
384 | - self.assertIsInstance(rows, list) |
385 | - for (d, c) in zip(docs, copy): |
386 | - self.assertEqual(d['_id'], c['_id']) |
387 | - self.assertEqual(d['foo'], c['foo']) |
388 | - for (r, c) in zip(rows, copy): |
389 | - self.assertEqual(r['id'], c['_id']) |
390 | - self.assertEqual(r['rev'], c['_rev']) |
391 | - self.assertTrue(c['_rev'].startswith('1-')) |
392 | - |
393 | - old = docs |
394 | - docs = copy |
395 | - for d in docs: |
396 | - d['bar'] = random_id() |
397 | - copy = deepcopy(docs) |
398 | - rows = inst.bulksave(copy) |
399 | - for (d, c) in zip(docs, copy): |
400 | - self.assertEqual(d['_id'], c['_id']) |
401 | - self.assertLess(d['_rev'], c['_rev']) |
402 | - self.assertEqual(d['foo'], c['foo']) |
403 | - self.assertEqual(d['bar'], c['bar']) |
404 | - for (r, c) in zip(rows, copy): |
405 | - self.assertEqual(r['id'], c['_id']) |
406 | - self.assertEqual(r['rev'], c['_rev']) |
407 | - self.assertTrue(c['_rev'].startswith('2-')) |
408 | - |
409 | - # FIXME: Is CouchDB 1.0.1 broken in this regard... shouldn't this raise |
410 | - # ExpectationFailed? |
411 | - inst.bulksave(old) |
412 | - |
413 | - # Test compacting the db |
414 | - oldsize = inst.get()['disk_size'] |
415 | - self.assertEqual(inst.post(None, '_compact'), {'ok': True}) |
416 | - while True: |
417 | - time.sleep(1) |
418 | - if inst.get()['compact_running'] is False: |
419 | - break |
420 | - newsize = inst.get()['disk_size'] |
421 | - self.assertLess(newsize, oldsize) |
422 | - |
423 | - # Test that _id is generated if missing: |
424 | - docs = [{'n': i} for i in range(1000)] |
425 | - rows = inst.bulksave(docs) |
426 | - self.assertEqual(len(docs), len(rows)) |
427 | - i = 0 |
428 | - for (d, r) in zip(docs, rows): |
429 | - self.assertEqual(set(d), set(['_id', '_rev', 'n'])) |
430 | - self.assertEqual(d['_id'], r['id']) |
431 | - self.assertEqual(len(d['_id']), 24) |
432 | - self.assertEqual(d['_rev'], r['rev']) |
433 | - self.assertTrue(d['_rev'].startswith('1-')) |
434 | - self.assertEqual(d['n'], i) |
435 | - i += 1 |
436 | + db = microfiber.Database(self.db, self.env) |
437 | + self.assertTrue(db.ensure()) |
438 | + |
439 | + # Test that doc['_id'] gets set automatically |
440 | + markers = tuple(test_id() for i in range(10)) |
441 | + docs = [{'marker': m} for m in markers] |
442 | + rows = db.bulksave(docs) |
443 | + for (marker, doc, row) in zip(markers, docs, rows): |
444 | + self.assertEqual(doc['marker'], marker) |
445 | + self.assertEqual(doc['_id'], row['id']) |
446 | + self.assertEqual(doc['_rev'], row['rev']) |
447 | + self.assertTrue(doc['_rev'].startswith('1-')) |
448 | + self.assertTrue(is_microfiber_id(doc['_id'])) |
449 | + |
450 | + # Test when doc['_id'] is already present |
451 | + ids = tuple(test_id() for i in range(10)) |
452 | + docs = [{'_id': _id} for _id in ids] |
453 | + rows = db.bulksave(docs) |
454 | + for (_id, doc, row) in zip(ids, docs, rows): |
455 | + self.assertEqual(doc['_id'], _id) |
456 | + self.assertEqual(row['id'], _id) |
457 | + self.assertEqual(doc['_rev'], row['rev']) |
458 | + self.assertTrue(doc['_rev'].startswith('1-')) |
459 | + self.assertEqual(db.get(_id), doc) |
460 | + |
461 | + # Let's update all the docs |
462 | + for doc in docs: |
463 | + doc['x'] = 'foo' |
464 | + rows = db.bulksave(docs) |
465 | + for (_id, doc, row) in zip(ids, docs, rows): |
466 | + self.assertEqual(doc['_id'], _id) |
467 | + self.assertEqual(row['id'], _id) |
468 | + self.assertEqual(doc['_rev'], row['rev']) |
469 | + self.assertTrue(doc['_rev'].startswith('2-')) |
470 | + self.assertEqual(doc['x'], 'foo') |
471 | + self.assertEqual(db.get(_id), doc) |
472 | + |
473 | + # Let's update half the docs out-of-band to create conflicts |
474 | + for (i, doc) in enumerate(docs): |
475 | + if i % 2 == 0: |
476 | + d = deepcopy(doc) |
477 | + d['x'] = 'gotcha' |
478 | + db.post(d) |
479 | + |
480 | + # Now let's update all the docs, test for BulkConflict |
481 | + good = [] |
482 | + bad = [] |
483 | + for (i, doc) in enumerate(docs): |
484 | + doc['x'] = 'bar' |
485 | + if i % 2 == 0: |
486 | + bad.append(doc) |
487 | + else: |
488 | + good.append(doc) |
489 | + |
490 | + with self.assertRaises(microfiber.BulkConflict) as cm: |
491 | + rows = db.bulksave(docs) |
492 | + self.assertEqual(str(cm.exception), 'conflict on 5 docs') |
493 | + self.assertEqual(cm.exception.conflicts, bad) |
494 | + self.assertEqual(len(cm.exception.rows), 10) |
495 | + for (i, row) in enumerate(cm.exception.rows): |
496 | + _id = ids[i] |
497 | + doc = docs[i] |
498 | + real = db.get(_id) |
499 | + self.assertEqual(row['id'], _id) |
500 | + self.assertTrue(real['_rev'].startswith('3-')) |
501 | + if i % 2 == 0: |
502 | + self.assertEqual(real['x'], 'gotcha') |
503 | + self.assertEqual(doc['x'], 'bar') |
504 | + self.assertNotIn('rev', row) |
505 | + self.assertTrue(doc['_rev'].startswith('2-')) |
506 | + else: |
507 | + self.assertEqual(real['x'], 'bar') |
508 | + self.assertEqual(row['rev'], doc['_rev']) |
509 | + self.assertEqual(real, doc) |
510 | + |
511 | + def test_bulksave2(self): |
512 | + db = microfiber.Database(self.db, self.env) |
513 | + self.assertTrue(db.ensure()) |
514 | + |
515 | + # Test that doc['_id'] gets set automatically |
516 | + markers = tuple(test_id() for i in range(10)) |
517 | + docs = [{'marker': m} for m in markers] |
518 | + rows = db.bulksave2(docs) |
519 | + for (marker, doc, row) in zip(markers, docs, rows): |
520 | + self.assertEqual(doc['marker'], marker) |
521 | + self.assertEqual(doc['_id'], row['id']) |
522 | + self.assertEqual(doc['_rev'], row['rev']) |
523 | + self.assertTrue(doc['_rev'].startswith('1-')) |
524 | + self.assertTrue(is_microfiber_id(doc['_id'])) |
525 | + |
526 | + # Test when doc['_id'] is already present |
527 | + ids = tuple(test_id() for i in range(10)) |
528 | + docs = [{'_id': _id} for _id in ids] |
529 | + rows = db.bulksave2(docs) |
530 | + for (_id, doc, row) in zip(ids, docs, rows): |
531 | + self.assertEqual(doc['_id'], _id) |
532 | + self.assertEqual(row['id'], _id) |
533 | + self.assertEqual(doc['_rev'], row['rev']) |
534 | + self.assertTrue(doc['_rev'].startswith('1-')) |
535 | + self.assertEqual(db.get(_id), doc) |
536 | + |
537 | + # Let's update all the docs |
538 | + for doc in docs: |
539 | + doc['x'] = 'foo' |
540 | + rows = db.bulksave2(docs) |
541 | + for (_id, doc, row) in zip(ids, docs, rows): |
542 | + self.assertEqual(doc['_id'], _id) |
543 | + self.assertEqual(row['id'], _id) |
544 | + self.assertEqual(doc['_rev'], row['rev']) |
545 | + self.assertTrue(doc['_rev'].startswith('2-')) |
546 | + self.assertEqual(doc['x'], 'foo') |
547 | + self.assertEqual(db.get(_id), doc) |
548 | + |
549 | + # Let's update half the docs out-of-band to create conflicts |
550 | + for (i, doc) in enumerate(docs): |
551 | + if i % 2 == 0: |
552 | + d = deepcopy(doc) |
553 | + d['x'] = 'gotcha' |
554 | + db.save(d) |
555 | + |
556 | + # Now let's update all the docs, test all-or-nothing behavior |
557 | + for doc in docs: |
558 | + doc['x'] = 'bar' |
559 | + rows = db.bulksave2(docs) |
560 | + for (_id, doc, row) in zip(ids, docs, rows): |
561 | + self.assertEqual(doc['_id'], _id) |
562 | + self.assertEqual(row['id'], _id) |
563 | + self.assertEqual(doc['_rev'], row['rev']) |
564 | + self.assertTrue(doc['_rev'].startswith('3-')) |
565 | + self.assertEqual(doc['x'], 'bar') |
566 | + self.assertEqual(db.get(_id), doc) |
567 | + |
568 | + # Again update half the docs out-of-band, but this time saving twice |
569 | + # so the conflict is ahead in revision number: |
570 | + for (i, doc) in enumerate(docs): |
571 | + if i % 2 == 0: |
572 | + d = deepcopy(doc) |
573 | + d['x'] = 'gotcha' |
574 | + db.save(d) |
575 | + db.save(d) |
576 | + self.assertTrue(d['_rev'].startswith('5-')) |
577 | + |
578 | + # Now update all the docs again, realize all-or-nothing is a bad idea: |
579 | + for doc in docs: |
580 | + doc['x'] = 'baz' |
581 | + rows = db.bulksave2(docs) |
582 | + for (i, row) in enumerate(rows): |
583 | + _id = ids[i] |
584 | + doc = docs[i] |
585 | + real = db.get(_id) |
586 | + self.assertEqual(row['id'], _id) |
587 | + if i % 2 == 0: |
588 | + self.assertEqual(real['x'], 'gotcha') |
589 | + self.assertTrue(real['_rev'].startswith('5-')) |
590 | + self.assertTrue(row['rev'].startswith('4-')) |
591 | + else: |
592 | + self.assertEqual(real['x'], 'baz') |
593 | + self.assertTrue(real['_rev'].startswith('4-')) |
594 | + self.assertEqual(row['rev'], real['_rev']) |
595 | + self.assertEqual(doc, real) |
596 | + |
597 | + def test_get_many(self): |
598 | + db = microfiber.Database(self.db, self.env) |
599 | + self.assertTrue(db.ensure()) |
600 | + |
601 | + ids = tuple(test_id() for i in range(50)) |
602 | + docs = [{'_id': _id} for _id in ids] |
603 | + db.bulksave(docs) |
604 | + |
605 | + # Test an empty doc_ids list |
606 | + self.assertEqual(db.get_many([]), []) |
607 | + |
608 | + # Test a get_many on all the docs |
609 | + self.assertEqual(db.get_many(ids), docs) |
610 | + |
611 | + # Test with some random subsets |
612 | + rdocs = random.sample(docs, 40) |
613 | + self.assertEqual(db.get_many([d['_id'] for d in rdocs]), rdocs) |
614 | + |
615 | + rdocs = random.sample(docs, 20) |
616 | + self.assertEqual(db.get_many([d['_id'] for d in rdocs]), rdocs) |
617 | + |
618 | + rdocs = random.sample(docs, 10) |
619 | + self.assertEqual(db.get_many([d['_id'] for d in rdocs]), rdocs) |
620 | + |
621 | + # Test with duplicate ids |
622 | + self.assertEqual( |
623 | + db.get_many([ids[7], ids[7], ids[7]]), |
624 | + [docs[7], docs[7], docs[7]] |
625 | + ) |
626 | + |