Merge lp:~chipaca/u1db/prune-conflicts-automerges into lp:u1db

Proposed by John Lenton
Status: Rejected
Rejected by: Eric Casteleijn
Proposed branch: lp:~chipaca/u1db/prune-conflicts-automerges
Merge into: lp:u1db
Diff against target: 444 lines (+299/-14)
6 files modified
src/u1db.c (+37/-2)
u1db/backends/__init__.py (+8/-2)
u1db/backends/inmemory.py (+10/-1)
u1db/backends/sqlite_backend.py (+12/-8)
u1db/tests/test_backends.py (+55/-1)
u1db/tests/test_sync.py (+177/-0)
To merge this branch: bzr merge lp:~chipaca/u1db/prune-conflicts-automerges
Reviewer Review Type Date Requested Status
Samuele Pedroni Needs Fixing
Review via email: mp+107517@code.launchpad.net

Commit message

On sync, autoresolve conflicts whose content matches the new revision.

Description of the change

This makes _prune_conflicts autoresolve conflicts that have the same content as the new revision. E.g. if you have a document at rev a2 with a conflict a1b1, and you get a new revision a3 that has the same content as a1b1, the a1b1 conflict is resolved.

To post a comment you must log in.
316. By John Lenton

s/automerge/autoresolve/

Revision history for this message
John Lenton (chipaca) wrote :

Not sure why I was calling it 'automerge', when it's 'autoresolve'. Anyway, fixed that.

Revision history for this message
Samuele Pedroni (pedronis) wrote :

needs testing of the lines:

108 + if doc.rev != rev:
109 + # conflicts have been autoresolved
110 + state = 'superseded'

also some integration testing in test_sync how they behave in there, the idea is that the autoresolved document will be sent back if I understand correctly

review: Needs Fixing
317. By John Lenton

add explicit tests for the state returned by _put_doc_if_newer when automerging; add a sync test to test this autoresolving happening in the wild

Revision history for this message
John Lenton (chipaca) wrote :

The autoresolved document is not sent back by 'superseded'. Not sure if that is right or not.

318. By John Lenton

added integration tests

Revision history for this message
Samuele Pedroni (pedronis) wrote :

last annyoing detail, tests that really require save_conflicts=True (not sure which of the ones you added do or don't) need to live in LocalDatabaseWithConflictsTests, not LocalDatabaseTests

Revision history for this message
Eric Casteleijn (thisfred) wrote :

Unmerged revisions

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/u1db.c'
2--- src/u1db.c 2012-05-29 15:49:21 +0000
3+++ src/u1db.c 2012-05-31 16:03:21 +0000
4@@ -678,10 +678,12 @@
5 prune_conflicts(u1database *db, u1db_document *doc,
6 u1db_vectorclock *new_vc)
7 {
8+ const char *local_replica_uid = NULL;
9 int status = U1DB_OK;
10+ int did_autoresolve = 0;
11 sqlite3_stmt *statement;
12 status = sqlite3_prepare_v2(db->sql_handle,
13- "SELECT doc_rev FROM conflicts WHERE doc_id = ?", -1,
14+ "SELECT doc_rev, content FROM conflicts WHERE doc_id = ?", -1,
15 &statement, NULL);
16 if (status != SQLITE_OK) { goto finish; }
17 status = sqlite3_bind_text(statement, 1, doc->doc_id, -1, SQLITE_TRANSIENT);
18@@ -689,11 +691,15 @@
19 status = sqlite3_step(statement);
20 while (status == SQLITE_ROW) {
21 const char *conflict_rev;
22+ const char *conflict_content;
23 u1db_vectorclock *conflict_vc;
24
25+ conflict_content = (const char*)sqlite3_column_text(statement, 1);
26 conflict_rev = (const char*)sqlite3_column_text(statement, 0);
27 conflict_vc = u1db__vectorclock_from_str(conflict_rev);
28- if (conflict_vc == NULL) {
29+ if (conflict_vc == NULL
30+ || (sqlite3_column_type(statement, 1) != SQLITE_NULL
31+ && conflict_content == NULL)) {
32 status = U1DB_NOMEM;
33 } else {
34 if (u1db__vectorclock_is_newer(new_vc, conflict_vc)) {
35@@ -702,6 +708,16 @@
36 // find out differently, update this to create a list of
37 // things to delete, then iterate over deleting them.
38 status = delete_conflict(db, doc->doc_id, conflict_rev);
39+ } else if ((doc->json == NULL && conflict_content == NULL)
40+ || (doc->json != NULL && conflict_content != NULL
41+ && strcmp(doc->json, conflict_content) == 0)) {
42+ did_autoresolve = 1;
43+ status = u1db__vectorclock_maximize(new_vc, conflict_vc);
44+ if (status != U1DB_OK) {
45+ u1db__free_vectorclock(&conflict_vc);
46+ goto finish;
47+ }
48+ status = delete_conflict(db, doc->doc_id, conflict_rev);
49 } else {
50 // There is an existing conflict that we do *not* supersede,
51 // make sure the document is marked conflicted
52@@ -716,6 +732,14 @@
53 }
54 if (status == SQLITE_DONE) {
55 status = U1DB_OK;
56+ } else if (status == U1DB_OK && did_autoresolve) {
57+ status = u1db_get_replica_uid(db, &local_replica_uid);
58+ if (status != SQLITE_OK) { goto finish; }
59+ status = u1db__vectorclock_increment(new_vc, local_replica_uid);
60+ if (status != SQLITE_OK) { goto finish; }
61+ free(doc->doc_rev);
62+ status = u1db__vectorclock_as_str(new_vc, &doc->doc_rev);
63+ if (status != SQLITE_OK) { goto finish; }
64 }
65 finish:
66 sqlite3_finalize(statement);
67@@ -772,9 +796,20 @@
68 }
69 if (u1db__vectorclock_is_newer(new_vc, stored_vc)) {
70 // Just take the newer version
71+ char *rev = strdup(doc->doc_rev);
72+ if (rev == NULL) {
73+ status = U1DB_NOMEM;
74+ goto finish;
75+ }
76 store = 1;
77 *state = U1DB_INSERTED;
78 status = prune_conflicts(db, doc, new_vc);
79+ // if the doc's rev has been updated, conflicts were autoresolved
80+ if (status == U1DB_OK && strcmp(rev, doc->doc_rev) != 0) {
81+ *state = U1DB_SUPERSEDED;
82+ }
83+ free(rev);
84+ if (status != U1DB_OK) { goto finish; }
85 } else if (u1db__vectorclock_is_newer(stored_vc, new_vc)) {
86 // The existing version is newer than the one supplied
87 store = 0;
88
89=== modified file 'u1db/backends/__init__.py'
90--- u1db/backends/__init__.py 2012-05-24 21:09:21 +0000
91+++ u1db/backends/__init__.py 2012-05-31 16:03:21 +0000
92@@ -22,6 +22,7 @@
93 import u1db
94 from u1db import (
95 errors,
96+ vectorclock,
97 )
98 import u1db.sync
99 from u1db.vectorclock import VectorClockRev
100@@ -102,9 +103,14 @@
101 else:
102 cur_vcr = VectorClockRev(cur_doc.rev)
103 if doc_vcr.is_newer(cur_vcr):
104- self._put_and_update_indexes(cur_doc, doc)
105+ rev = doc.rev
106 self._prune_conflicts(doc, doc_vcr)
107- state = 'inserted'
108+ if doc.rev != rev:
109+ # conflicts have been autoresolved
110+ state = 'superseded'
111+ else:
112+ state = 'inserted'
113+ self._put_and_update_indexes(cur_doc, doc)
114 elif doc.rev == cur_doc.rev:
115 # magical convergence
116 state = 'converged'
117
118=== modified file 'u1db/backends/inmemory.py'
119--- u1db/backends/inmemory.py 2012-05-30 19:42:52 +0000
120+++ u1db/backends/inmemory.py 2012-05-31 16:03:21 +0000
121@@ -148,12 +148,21 @@
122
123 def _prune_conflicts(self, doc, doc_vcr):
124 if self._has_conflicts(doc.doc_id):
125+ autoresolved = False
126 remaining_conflicts = []
127 cur_conflicts = self._conflicts[doc.doc_id]
128 for c_rev, c_doc in cur_conflicts:
129- if doc_vcr.is_newer(vectorclock.VectorClockRev(c_rev)):
130+ c_vcr = vectorclock.VectorClockRev(c_rev)
131+ if doc_vcr.is_newer(c_vcr):
132+ continue
133+ if doc.same_content_as(Document(doc.doc_id, c_rev, c_doc)):
134+ doc_vcr.maximize(c_vcr)
135+ autoresolved = True
136 continue
137 remaining_conflicts.append((c_rev, c_doc))
138+ if autoresolved:
139+ doc_vcr.increment(self._replica_uid)
140+ doc.rev = doc_vcr.as_str()
141 self._replace_conflicts(doc, remaining_conflicts)
142
143 def resolve_doc(self, doc, conflicted_doc_revs):
144
145=== modified file 'u1db/backends/sqlite_backend.py'
146--- u1db/backends/sqlite_backend.py 2012-05-30 19:42:52 +0000
147+++ u1db/backends/sqlite_backend.py 2012-05-31 16:03:21 +0000
148@@ -428,11 +428,6 @@
149 return [Document(doc_id, doc_rev, content)
150 for doc_rev, content in c.fetchall()]
151
152- def _get_conflict_revs(self, doc_id):
153- c = self._db_handle.cursor()
154- c.execute("SELECT doc_rev FROM conflicts WHERE doc_id = ?", (doc_id,))
155- return c.fetchall()
156-
157 def get_doc_conflicts(self, doc_id):
158 with self._db_handle:
159 conflict_docs = self._get_conflicts(doc_id)
160@@ -489,10 +484,19 @@
161
162 def _prune_conflicts(self, doc, doc_vcr):
163 if self._has_conflicts(doc.doc_id):
164+ autoresolved = False
165 c_revs_to_prune = []
166- for c_rev, in self._get_conflict_revs(doc.doc_id):
167- if doc_vcr.is_newer(vectorclock.VectorClockRev(c_rev)):
168- c_revs_to_prune.append(c_rev)
169+ for c_doc in self._get_conflicts(doc.doc_id):
170+ c_vcr = vectorclock.VectorClockRev(c_doc.rev)
171+ if doc_vcr.is_newer(c_vcr):
172+ c_revs_to_prune.append(c_doc.rev)
173+ elif doc.same_content_as(c_doc):
174+ c_revs_to_prune.append(c_doc.rev)
175+ doc_vcr.maximize(c_vcr)
176+ autoresolved = True
177+ if autoresolved:
178+ doc_vcr.increment(self._replica_uid)
179+ doc.rev = doc_vcr.as_str()
180 c = self._db_handle.cursor()
181 self._delete_conflicts(c, doc, c_revs_to_prune)
182
183
184=== modified file 'u1db/tests/test_backends.py'
185--- u1db/tests/test_backends.py 2012-05-30 19:42:52 +0000
186+++ u1db/tests/test_backends.py 2012-05-31 16:03:21 +0000
187@@ -361,7 +361,7 @@
188 self.assertEqual('superseded', state)
189 self.assertGetDoc(self.db, doc1.doc_id, doc1_rev2, simple_doc, False)
190
191- def test_put_doc_if_newer_automerge(self):
192+ def test_put_doc_if_newer_autoresolve(self):
193 doc1 = self.db.create_doc(simple_doc)
194 rev = doc1.rev
195 doc = self.make_document(doc1.doc_id, "whatever:1", doc1.get_json())
196@@ -372,6 +372,60 @@
197 self.assertTrue(v2.is_newer(vectorclock.VectorClockRev("whatever:1")))
198 self.assertTrue(v2.is_newer(vectorclock.VectorClockRev(rev)))
199
200+ def test_put_doc_if_newer_autoresolve_2(self):
201+ # this is an ordering variant of _3, but that already works
202+ # adding the test explicitly to catch the regression easily
203+ doc_a1 = self.db.create_doc(simple_doc)
204+ doc_a2 = self.make_document(doc_a1.doc_id, 'test:2', "{}")
205+ doc_a1b1 = self.make_document(doc_a1.doc_id, 'test:1|other:1',
206+ '{"a":"42"}')
207+ doc_a3 = self.make_document(doc_a1.doc_id, 'test:2|other:1', "{}")
208+ state, _ = self.db._put_doc_if_newer(doc_a2, True)
209+ self.assertEqual(state, 'inserted')
210+ state, _ = self.db._put_doc_if_newer(doc_a1b1, True)
211+ self.assertEqual(state, 'conflicted')
212+ state, _ = self.db._put_doc_if_newer(doc_a3, True)
213+ self.assertEqual(state, 'inserted')
214+ self.assertFalse(self.db.get_doc(doc_a1.doc_id).has_conflicts)
215+
216+ def test_put_doc_if_newer_autoresolve_3(self):
217+ doc_a1 = self.db.create_doc(simple_doc)
218+ doc_a1b1 = self.make_document(doc_a1.doc_id, 'test:1|other:1', "{}")
219+ doc_a2 = self.make_document(doc_a1.doc_id, 'test:2', '{"a":"42"}')
220+ doc_a3 = self.make_document(doc_a1.doc_id, 'test:3', "{}")
221+ state, _ = self.db._put_doc_if_newer(doc_a1b1, True)
222+ self.assertEqual(state, 'inserted')
223+ state, _ = self.db._put_doc_if_newer(doc_a2, True)
224+ self.assertEqual(state, 'conflicted')
225+ state, _ = self.db._put_doc_if_newer(doc_a3, True)
226+ self.assertEqual(state, 'superseded')
227+ doc = self.db.get_doc(doc_a1.doc_id, True)
228+ self.assertFalse(doc.has_conflicts)
229+ rev = vectorclock.VectorClockRev(doc.rev)
230+ rev_a3 = vectorclock.VectorClockRev('test:3')
231+ rev_a1b1 = vectorclock.VectorClockRev('test:1|other:1')
232+ self.assertTrue(rev.is_newer(rev_a3))
233+ self.assertTrue(rev.is_newer(rev_a1b1))
234+
235+ def test_put_doc_if_newer_autoresolve_4(self):
236+ doc_a1 = self.db.create_doc(simple_doc)
237+ doc_a1b1 = self.make_document(doc_a1.doc_id, 'test:1|other:1', None)
238+ doc_a2 = self.make_document(doc_a1.doc_id, 'test:2', '{"a":"42"}')
239+ doc_a3 = self.make_document(doc_a1.doc_id, 'test:3', None)
240+ state, _ = self.db._put_doc_if_newer(doc_a1b1, True)
241+ self.assertEqual(state, 'inserted')
242+ state, _ = self.db._put_doc_if_newer(doc_a2, True)
243+ self.assertEqual(state, 'conflicted')
244+ state, _ = self.db._put_doc_if_newer(doc_a3, True)
245+ self.assertEqual(state, 'superseded')
246+ doc = self.db.get_doc(doc_a1.doc_id, True)
247+ self.assertFalse(doc.has_conflicts)
248+ rev = vectorclock.VectorClockRev(doc.rev)
249+ rev_a3 = vectorclock.VectorClockRev('test:3')
250+ rev_a1b1 = vectorclock.VectorClockRev('test:1|other:1')
251+ self.assertTrue(rev.is_newer(rev_a3))
252+ self.assertTrue(rev.is_newer(rev_a1b1))
253+
254 def test_put_doc_if_newer_already_converged(self):
255 orig_doc = '{"new": "doc"}'
256 doc1 = self.db.create_doc(orig_doc)
257
258=== modified file 'u1db/tests/test_sync.py'
259--- u1db/tests/test_sync.py 2012-05-24 21:09:21 +0000
260+++ u1db/tests/test_sync.py 2012-05-31 16:03:21 +0000
261@@ -438,6 +438,183 @@
262 self.assertTrue(v.is_newer(vectorclock.VectorClockRev(rev1)))
263 self.assertTrue(v.is_newer(vectorclock.VectorClockRev(rev2)))
264
265+ def test_sync_autoresolves_moar(self):
266+ # here we test that when a database that has a conflicted document is
267+ # the source of a sync, and the target database has a revision of the
268+ # conflicted document that is newer than the source database's, and
269+ # that target's database's document's content is the same as the
270+ # source's document's conflict's, the source's document's conflict gets
271+ # autoresolved, and the source's document's revision bumped.
272+ #
273+ # idea is as follows:
274+ # A B
275+ # a1 -
276+ # `------->
277+ # a1 a1
278+ # v v
279+ # a2 a1b1
280+ # `------->
281+ # a1b1+a2 a1b1
282+ # v
283+ # a1b1+a2 a1b2 (a1b2 has same content as a2)
284+ # `------->
285+ # a3b2 a1b2 (autoresolved)
286+ # `------->
287+ # a3b2 a3b2
288+ self.db1.create_doc(simple_doc, doc_id='doc')
289+ self.sync(self.db1, self.db2)
290+ for db, content in [(self.db1, '{}'), (self.db2, '{"hi": 42}')]:
291+ doc = db.get_doc('doc')
292+ doc.set_json(content)
293+ db.put_doc(doc)
294+ self.sync(self.db1, self.db2)
295+ # db1 and db2 now both have a doc of {hi:42}, but db1 has a conflict
296+ doc = self.db1.get_doc('doc')
297+ rev1 = doc.rev
298+ self.assertTrue(doc.has_conflicts)
299+ # set db2 to have a doc of {} (same as db1 before the conflict)
300+ doc = self.db2.get_doc('doc')
301+ doc.set_json('{}')
302+ self.db2.put_doc(doc)
303+ rev2 = doc.rev
304+ # sync it across
305+ self.sync(self.db1, self.db2)
306+ # tadaa!
307+ doc = self.db1.get_doc('doc')
308+ self.assertFalse(doc.has_conflicts)
309+ vec1 = vectorclock.VectorClockRev(rev1)
310+ vec2 = vectorclock.VectorClockRev(rev2)
311+ vec3 = vectorclock.VectorClockRev(doc.rev)
312+ self.assertTrue(vec3.is_newer(vec1))
313+ self.assertTrue(vec3.is_newer(vec2))
314+ # because the conflict is on the source, sync it another time
315+ self.sync(self.db1, self.db2)
316+ # make sure db2 now has the exact same thing
317+ self.assertEqual(self.db1.get_doc('doc'), self.db2.get_doc('doc'))
318+
319+ def test_sync_autoresolves_moar_backwards(self):
320+ # here we test that when a database that has a conflicted document is
321+ # the target of a sync, and the source database has a revision of the
322+ # conflicted document that is newer than the target database's, and
323+ # that source's database's document's content is the same as the
324+ # target's document's conflict's, the target's document's conflict gets
325+ # autoresolved, and the document's revision bumped.
326+ #
327+ # idea is as follows:
328+ # A B
329+ # a1 -
330+ # `------->
331+ # a1 a1
332+ # v v
333+ # a2 a1b1
334+ # `------->
335+ # a1b1+a2 a1b1
336+ # v
337+ # a1b1+a2 a1b2 (a1b2 has same content as a2)
338+ # <-------'
339+ # a3b2 a3b2 (autoresolved and propagated)
340+ self.db1.create_doc(simple_doc, doc_id='doc')
341+ self.sync(self.db1, self.db2)
342+ for db, content in [(self.db1, '{}'), (self.db2, '{"hi": 42}')]:
343+ doc = db.get_doc('doc')
344+ doc.set_json(content)
345+ db.put_doc(doc)
346+ self.sync(self.db1, self.db2)
347+ # db1 and db2 now both have a doc of {hi:42}, but db1 has a conflict
348+ doc = self.db1.get_doc('doc')
349+ rev1 = doc.rev
350+ self.assertTrue(doc.has_conflicts)
351+ revc = self.db1.get_doc_conflicts('doc')[-1].rev
352+ # set db2 to have a doc of {} (same as db1 before the conflict)
353+ doc = self.db2.get_doc('doc')
354+ doc.set_json('{}')
355+ self.db2.put_doc(doc)
356+ rev2 = doc.rev
357+ # sync it across
358+ self.sync(self.db2, self.db1)
359+ # tadaa!
360+ doc = self.db1.get_doc('doc')
361+ self.assertFalse(doc.has_conflicts)
362+ vec1 = vectorclock.VectorClockRev(rev1)
363+ vec2 = vectorclock.VectorClockRev(rev2)
364+ vec3 = vectorclock.VectorClockRev(doc.rev)
365+ vecc = vectorclock.VectorClockRev(revc)
366+ self.assertTrue(vec3.is_newer(vec1))
367+ self.assertTrue(vec3.is_newer(vec2))
368+ self.assertTrue(vec3.is_newer(vecc))
369+ # make sure db2 now has the exact same thing
370+ self.assertEqual(self.db1.get_doc('doc'), self.db2.get_doc('doc'))
371+
372+ def test_sync_autoresolves_moar_backwards_three(self):
373+ # same as autoresolves_moar_backwards, but with three databases (note
374+ # all the syncs go in the same direction -- this is a more natural
375+ # scenario):
376+ #
377+ # A B C
378+ # a1 - -
379+ # `------->
380+ # a1 a1 -
381+ # `------->
382+ # a1 a1 a1
383+ # v v
384+ # a2 a1b1 a1
385+ # `------------------->
386+ # a2 a1b1 a2
387+ # `------->
388+ # a2+a1b1 a2
389+ # v
390+ # a2 a2+a1b1 a2c1 (same as a1b1)
391+ # `------------------->
392+ # a2c1 a2+a1b1 a2c1
393+ # `------->
394+ # a2b2c1 a2b2c1 a2c1
395+ self.db3 = self.create_database('test3')
396+ self.db1.create_doc(simple_doc, doc_id='doc')
397+ self.sync(self.db1, self.db2)
398+ self.sync(self.db2, self.db3)
399+ for db, content in [(self.db2, '{"hi": 42}'),
400+ (self.db1, '{}'),
401+ ]:
402+ doc = db.get_doc('doc')
403+ doc.set_json(content)
404+ db.put_doc(doc)
405+ self.sync(self.db1, self.db3)
406+ self.sync(self.db2, self.db3)
407+ # db2 and db3 now both have a doc of {}, but db2 has a
408+ # conflict
409+ doc = self.db2.get_doc('doc')
410+ self.assertTrue(doc.has_conflicts)
411+ revc = self.db2.get_doc_conflicts('doc')[-1].rev
412+ self.assertEqual('{}', doc.get_json())
413+ self.assertEqual(self.db3.get_doc('doc').get_json(), doc.get_json())
414+ self.assertEqual(self.db3.get_doc('doc').rev, doc.rev)
415+ # set db3 to have a doc of {hi:42} (same as db2 before the conflict)
416+ doc = self.db3.get_doc('doc')
417+ doc.set_json('{"hi": 42}')
418+ self.db3.put_doc(doc)
419+ rev3 = doc.rev
420+ # sync it across to db1
421+ self.sync(self.db1, self.db3)
422+ # db1 now has hi:42, with a rev that is newer than db2's doc
423+ doc = self.db1.get_doc('doc')
424+ rev1 = doc.rev
425+ self.assertFalse(doc.has_conflicts)
426+ self.assertEqual('{"hi": 42}', doc.get_json())
427+ VCR=vectorclock.VectorClockRev
428+ self.assertTrue(VCR(rev1).is_newer(VCR(self.db2.get_doc('doc').rev)))
429+ # so sync it to db2
430+ self.sync(self.db1, self.db2)
431+ # tadaa!
432+ doc = self.db2.get_doc('doc')
433+ self.assertFalse(doc.has_conflicts)
434+ # db2's revision of the document is strictly newer than db1's before
435+ # the sync, and db3's before that sync way back when
436+ self.assertTrue(VCR(doc.rev).is_newer(VCR(rev1)))
437+ self.assertTrue(VCR(doc.rev).is_newer(VCR(rev3)))
438+ self.assertTrue(VCR(doc.rev).is_newer(VCR(revc)))
439+ # make sure both dbs now have the exact same thing
440+ self.assertEqual(self.db1.get_doc('doc'), self.db2.get_doc('doc'))
441+
442 def test_sync_puts_changes(self):
443 doc = self.db1.create_doc(simple_doc)
444 self.assertEqual(1, self.sync(self.db1, self.db2))

Subscribers

People subscribed via source and target branches