U1DB

Merge lp:~chipaca/u1db/prune-conflicts-automerges into lp:u1db

prune-conflicts-automerges
Merge into trunk

Proposed by John Lenton on 2012-05-26

Status:

Rejected

Rejected by:

Eric Casteleijn on 2012-06-18

Proposed branch:

lp:~chipaca/u1db/prune-conflicts-automerges

Merge into:

lp:u1db

Diff against target:

444 lines (+299/-14)

6 files modified

src/u1db.c (+37/-2)
u1db/backends/__init__.py (+8/-2)
u1db/backends/inmemory.py (+10/-1)
u1db/backends/sqlite_backend.py (+12/-8)
u1db/tests/test_backends.py (+55/-1)
u1db/tests/test_sync.py (+177/-0)

To merge this branch:

bzr merge lp:~chipaca/u1db/prune-conflicts-automerges

Undecided

Fix Released

Link a bug report

Reviewer	Review Type	Date Requested	Status
Samuele Pedroni		2012-05-26	Needs Fixing on 2012-05-29
Review via email: mp+107517@code.launchpad.net

Commit message

On sync, autoresolve conflicts whose content matches the new revision.

Description of the change

This makes _prune_conflicts autoresolve conflicts that have the same content as the new revision. E.g. if you have a document at rev a2 with a conflict a1b1, and you get a new revision a3 that has the same content as a1b1, the a1b1 conflict is resolved.

lp:~chipaca/u1db/prune-conflicts-automerges updated on 2012-05-26

316. By John Lenton on 2012-05-26: s/automerge/autoresolve/

Revision history for this message

John Lenton (chipaca) wrote on 2012-05-26:

Not sure why I was calling it 'automerge', when it's 'autoresolve'. Anyway, fixed that.

Revision history for this message

Samuele Pedroni (pedronis) wrote on 2012-05-29:

needs testing of the lines:

108 + if doc.rev != rev:
109 + # conflicts have been autoresolved
110 + state = 'superseded'

also some integration testing in test_sync how they behave in there, the idea is that the autoresolved document will be sent back if I understand correctly

review: Needs Fixing

lp:~chipaca/u1db/prune-conflicts-automerges updated on 2012-05-29

317. By John Lenton on 2012-05-29: add explicit tests for the state returned by _put_doc_if_newer when automerging; add a sync test to test this autoresolving happening in the wild

Revision history for this message

John Lenton (chipaca) wrote on 2012-05-29:

The autoresolved document is not sent back by 'superseded'. Not sure if that is right or not.

lp:~chipaca/u1db/prune-conflicts-automerges updated on 2012-05-31

318. By John Lenton on 2012-05-31: added integration tests

Revision history for this message

Samuele Pedroni (pedronis) wrote on 2012-06-05:

last annyoing detail, tests that really require save_conflicts=True (not sure which of the ones you added do or don't) need to live in LocalDatabaseWithConflictsTests, not LocalDatabaseTests

Revision history for this message

Eric Casteleijn (thisfred) wrote on 2012-06-18:

superseded by lp:~thisfred/u1db/prune-conflicts-automerges

Unmerged revisions

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Christina A Reitbauer

John Lenton

Lucio Torre

Samuele Pedroni

Ubuntu One hackers

 === modified file 'src/u1db.c'
 --- src/u1db.c	2012-05-29 15:49:21 +0000
 +++ src/u1db.c	2012-05-31 16:03:21 +0000
@@ -678,10 +678,12 @@
  prune_conflicts(u1database *db, u1db_document *doc,
                  u1db_vectorclock *new_vc)
+ {
++    const char *local_replica_uid = NULL;
      int status = U1DB_OK;
++    int did_autoresolve = 0;
      sqlite3_stmt *statement;
      status = sqlite3_prepare_v2(db->sql_handle,
--        "SELECT doc_rev FROM conflicts WHERE doc_id = ?", -1,
++        "SELECT doc_rev, content FROM conflicts WHERE doc_id = ?", -1,
          &statement, NULL);
      if (status != SQLITE_OK) { goto finish; }
      status = sqlite3_bind_text(statement, 1, doc->doc_id, -1, SQLITE_TRANSIENT);
@@ -689,11 +691,15 @@
      status = sqlite3_step(statement);
      while (status == SQLITE_ROW) {
          const char *conflict_rev;
++        const char *conflict_content;
          u1db_vectorclock *conflict_vc;
++        conflict_content = (const char*)sqlite3_column_text(statement, 1);
          conflict_rev = (const char*)sqlite3_column_text(statement, 0);
          conflict_vc = u1db__vectorclock_from_str(conflict_rev);
--        if (conflict_vc == NULL) {
++        if (conflict_vc == NULL
++            || (sqlite3_column_type(statement, 1) != SQLITE_NULL
++                && conflict_content == NULL)) {
              status = U1DB_NOMEM;
          } else {
              if (u1db__vectorclock_is_newer(new_vc, conflict_vc)) {
@@ -702,6 +708,16 @@
                  //        find out differently, update this to create a list of
                  //        things to delete, then iterate over deleting them.
                  status = delete_conflict(db, doc->doc_id, conflict_rev);
++            } else if ((doc->json == NULL && conflict_content == NULL)
++                       || (doc->json != NULL && conflict_content != NULL
++                           && strcmp(doc->json, conflict_content) == 0)) {
++                did_autoresolve = 1;
++                status = u1db__vectorclock_maximize(new_vc, conflict_vc);
++                if (status != U1DB_OK) {
++                    u1db__free_vectorclock(&conflict_vc);
++                    goto finish;
++                }
++                status = delete_conflict(db, doc->doc_id, conflict_rev);
              } else {
                  // There is an existing conflict that we do *not* supersede,
                  // make sure the document is marked conflicted
@@ -716,6 +732,14 @@
+     }
      if (status == SQLITE_DONE) {
          status = U1DB_OK;
++    } else if (status == U1DB_OK && did_autoresolve) {
++        status = u1db_get_replica_uid(db, &local_replica_uid);
++        if (status != SQLITE_OK) { goto finish; }
++        status = u1db__vectorclock_increment(new_vc, local_replica_uid);
++        if (status != SQLITE_OK) { goto finish; }
++        free(doc->doc_rev);
++        status = u1db__vectorclock_as_str(new_vc, &doc->doc_rev);
++        if (status != SQLITE_OK) { goto finish; }
+     }
  finish:
      sqlite3_finalize(statement);
@@ -772,9 +796,20 @@
+         }
          if (u1db__vectorclock_is_newer(new_vc, stored_vc)) {
              // Just take the newer version
++            char *rev = strdup(doc->doc_rev);
++            if (rev == NULL) {
++                status = U1DB_NOMEM;
++                goto finish;
++            }
              store = 1;
              *state = U1DB_INSERTED;
              status = prune_conflicts(db, doc, new_vc);
++            // if the doc's rev has been updated, conflicts were autoresolved
++            if (status == U1DB_OK && strcmp(rev, doc->doc_rev) != 0) {
++                *state = U1DB_SUPERSEDED;
++            }
++            free(rev);
++            if (status != U1DB_OK) { goto finish; }
          } else if (u1db__vectorclock_is_newer(stored_vc, new_vc)) {
              // The existing version is newer than the one supplied
              store = 0;
 === modified file 'u1db/backends/__init__.py'
 --- u1db/backends/__init__.py	2012-05-24 21:09:21 +0000
 +++ u1db/backends/__init__.py	2012-05-31 16:03:21 +0000
@@ -22,6 +22,7 @@
  import u1db
  from u1db import (
      errors,
++    vectorclock,
+ )
  import u1db.sync
  from u1db.vectorclock import VectorClockRev
@@ -102,9 +103,14 @@
          else:
              cur_vcr = VectorClockRev(cur_doc.rev)
          if doc_vcr.is_newer(cur_vcr):
--            self._put_and_update_indexes(cur_doc, doc)
++            rev = doc.rev
              self._prune_conflicts(doc, doc_vcr)
--            state = 'inserted'
++            if doc.rev != rev:
++                # conflicts have been autoresolved
++                state = 'superseded'
++            else:
++                state = 'inserted'
++            self._put_and_update_indexes(cur_doc, doc)
          elif doc.rev == cur_doc.rev:
              # magical convergence
              state = 'converged'
 === modified file 'u1db/backends/inmemory.py'
 --- u1db/backends/inmemory.py	2012-05-30 19:42:52 +0000
 +++ u1db/backends/inmemory.py	2012-05-31 16:03:21 +0000
@@ -148,12 +148,21 @@
      def _prune_conflicts(self, doc, doc_vcr):
          if self._has_conflicts(doc.doc_id):
++            autoresolved = False
              remaining_conflicts = []
              cur_conflicts = self._conflicts[doc.doc_id]
              for c_rev, c_doc in cur_conflicts:
--                if doc_vcr.is_newer(vectorclock.VectorClockRev(c_rev)):
++                c_vcr = vectorclock.VectorClockRev(c_rev)
++                if doc_vcr.is_newer(c_vcr):
++                    continue
++                if doc.same_content_as(Document(doc.doc_id, c_rev, c_doc)):
++                    doc_vcr.maximize(c_vcr)
++                    autoresolved = True
                      continue
                  remaining_conflicts.append((c_rev, c_doc))
++            if autoresolved:
++                doc_vcr.increment(self._replica_uid)
++                doc.rev = doc_vcr.as_str()
              self._replace_conflicts(doc, remaining_conflicts)
      def resolve_doc(self, doc, conflicted_doc_revs):
 === modified file 'u1db/backends/sqlite_backend.py'
 --- u1db/backends/sqlite_backend.py	2012-05-30 19:42:52 +0000
 +++ u1db/backends/sqlite_backend.py	2012-05-31 16:03:21 +0000
@@ -428,11 +428,6 @@
          return [Document(doc_id, doc_rev, content)
                  for doc_rev, content in c.fetchall()]
--    def _get_conflict_revs(self, doc_id):
--        c = self._db_handle.cursor()
--        c.execute("SELECT doc_rev FROM conflicts WHERE doc_id = ?", (doc_id,))
--        return c.fetchall()
--
      def get_doc_conflicts(self, doc_id):
          with self._db_handle:
              conflict_docs = self._get_conflicts(doc_id)
@@ -489,10 +484,19 @@
      def _prune_conflicts(self, doc, doc_vcr):
          if self._has_conflicts(doc.doc_id):
++            autoresolved = False
              c_revs_to_prune = []
--            for c_rev, in self._get_conflict_revs(doc.doc_id):
--                if doc_vcr.is_newer(vectorclock.VectorClockRev(c_rev)):
--                    c_revs_to_prune.append(c_rev)
++            for c_doc in self._get_conflicts(doc.doc_id):
++                c_vcr = vectorclock.VectorClockRev(c_doc.rev)
++                if doc_vcr.is_newer(c_vcr):
++                    c_revs_to_prune.append(c_doc.rev)
++                elif doc.same_content_as(c_doc):
++                    c_revs_to_prune.append(c_doc.rev)
++                    doc_vcr.maximize(c_vcr)
++                    autoresolved = True
++            if autoresolved:
++                doc_vcr.increment(self._replica_uid)
++                doc.rev = doc_vcr.as_str()
              c = self._db_handle.cursor()
              self._delete_conflicts(c, doc, c_revs_to_prune)
 === modified file 'u1db/tests/test_backends.py'
 --- u1db/tests/test_backends.py	2012-05-30 19:42:52 +0000
 +++ u1db/tests/test_backends.py	2012-05-31 16:03:21 +0000
@@ -361,7 +361,7 @@
          self.assertEqual('superseded', state)
          self.assertGetDoc(self.db, doc1.doc_id, doc1_rev2, simple_doc, False)
--    def test_put_doc_if_newer_automerge(self):
++    def test_put_doc_if_newer_autoresolve(self):
          doc1 = self.db.create_doc(simple_doc)
          rev = doc1.rev
          doc = self.make_document(doc1.doc_id, "whatever:1", doc1.get_json())
@@ -372,6 +372,60 @@
          self.assertTrue(v2.is_newer(vectorclock.VectorClockRev("whatever:1")))
          self.assertTrue(v2.is_newer(vectorclock.VectorClockRev(rev)))
++    def test_put_doc_if_newer_autoresolve_2(self):
++        # this is an ordering variant of _3, but that already works
++        # adding the test explicitly to catch the regression easily
++        doc_a1 = self.db.create_doc(simple_doc)
++        doc_a2 = self.make_document(doc_a1.doc_id, 'test:2', "{}")
++        doc_a1b1 = self.make_document(doc_a1.doc_id, 'test:1|other:1',
++                                      '{"a":"42"}')
++        doc_a3 = self.make_document(doc_a1.doc_id, 'test:2|other:1', "{}")
++        state, _ = self.db._put_doc_if_newer(doc_a2, True)
++        self.assertEqual(state, 'inserted')
++        state, _ = self.db._put_doc_if_newer(doc_a1b1, True)
++        self.assertEqual(state, 'conflicted')
++        state, _ = self.db._put_doc_if_newer(doc_a3, True)
++        self.assertEqual(state, 'inserted')
++        self.assertFalse(self.db.get_doc(doc_a1.doc_id).has_conflicts)
++
++    def test_put_doc_if_newer_autoresolve_3(self):
++        doc_a1 = self.db.create_doc(simple_doc)
++        doc_a1b1 = self.make_document(doc_a1.doc_id, 'test:1|other:1', "{}")
++        doc_a2 = self.make_document(doc_a1.doc_id, 'test:2',  '{"a":"42"}')
++        doc_a3 = self.make_document(doc_a1.doc_id, 'test:3', "{}")
++        state, _ = self.db._put_doc_if_newer(doc_a1b1, True)
++        self.assertEqual(state, 'inserted')
++        state, _ = self.db._put_doc_if_newer(doc_a2, True)
++        self.assertEqual(state, 'conflicted')
++        state, _ = self.db._put_doc_if_newer(doc_a3, True)
++        self.assertEqual(state, 'superseded')
++        doc = self.db.get_doc(doc_a1.doc_id, True)
++        self.assertFalse(doc.has_conflicts)
++        rev = vectorclock.VectorClockRev(doc.rev)
++        rev_a3 = vectorclock.VectorClockRev('test:3')
++        rev_a1b1 = vectorclock.VectorClockRev('test:1|other:1')
++        self.assertTrue(rev.is_newer(rev_a3))
++        self.assertTrue(rev.is_newer(rev_a1b1))
++
++    def test_put_doc_if_newer_autoresolve_4(self):
++        doc_a1 = self.db.create_doc(simple_doc)
++        doc_a1b1 = self.make_document(doc_a1.doc_id, 'test:1|other:1', None)
++        doc_a2 = self.make_document(doc_a1.doc_id, 'test:2',  '{"a":"42"}')
++        doc_a3 = self.make_document(doc_a1.doc_id, 'test:3', None)
++        state, _ = self.db._put_doc_if_newer(doc_a1b1, True)
++        self.assertEqual(state, 'inserted')
++        state, _ = self.db._put_doc_if_newer(doc_a2, True)
++        self.assertEqual(state, 'conflicted')
++        state, _ = self.db._put_doc_if_newer(doc_a3, True)
++        self.assertEqual(state, 'superseded')
++        doc = self.db.get_doc(doc_a1.doc_id, True)
++        self.assertFalse(doc.has_conflicts)
++        rev = vectorclock.VectorClockRev(doc.rev)
++        rev_a3 = vectorclock.VectorClockRev('test:3')
++        rev_a1b1 = vectorclock.VectorClockRev('test:1|other:1')
++        self.assertTrue(rev.is_newer(rev_a3))
++        self.assertTrue(rev.is_newer(rev_a1b1))
++
      def test_put_doc_if_newer_already_converged(self):
          orig_doc = '{"new": "doc"}'
          doc1 = self.db.create_doc(orig_doc)
 === modified file 'u1db/tests/test_sync.py'
 --- u1db/tests/test_sync.py	2012-05-24 21:09:21 +0000
 +++ u1db/tests/test_sync.py	2012-05-31 16:03:21 +0000
@@ -438,6 +438,183 @@
          self.assertTrue(v.is_newer(vectorclock.VectorClockRev(rev1)))
          self.assertTrue(v.is_newer(vectorclock.VectorClockRev(rev2)))
++    def test_sync_autoresolves_moar(self):
++        # here we test that when a database that has a conflicted document is
++        # the source of a sync, and the target database has a revision of the
++        # conflicted document that is newer than the source database's, and
++        # that target's database's document's content is the same as the
++        # source's document's conflict's, the source's document's conflict gets
++        # autoresolved, and the source's document's revision bumped.
++        #
++        # idea is as follows:
++        # A          B
++        # a1         -
++        #   `------->
++        # a1         a1
++        # v          v
++        # a2         a1b1
++        #   `------->
++        # a1b1+a2    a1b1
++        #            v
++        # a1b1+a2    a1b2 (a1b2 has same content as a2)
++        #   `------->
++        # a3b2       a1b2 (autoresolved)
++        #   `------->
++        # a3b2       a3b2
++        self.db1.create_doc(simple_doc, doc_id='doc')
++        self.sync(self.db1, self.db2)
++        for db, content in [(self.db1, '{}'), (self.db2, '{"hi": 42}')]:
++            doc = db.get_doc('doc')
++            doc.set_json(content)
++            db.put_doc(doc)
++        self.sync(self.db1, self.db2)
++        # db1 and db2 now both have a doc of {hi:42}, but db1 has a conflict
++        doc = self.db1.get_doc('doc')
++        rev1 = doc.rev
++        self.assertTrue(doc.has_conflicts)
++        # set db2 to have a doc of {} (same as db1 before the conflict)
++        doc = self.db2.get_doc('doc')
++        doc.set_json('{}')
++        self.db2.put_doc(doc)
++        rev2 = doc.rev
++        # sync it across
++        self.sync(self.db1, self.db2)
++        # tadaa!
++        doc = self.db1.get_doc('doc')
++        self.assertFalse(doc.has_conflicts)
++        vec1 = vectorclock.VectorClockRev(rev1)
++        vec2 = vectorclock.VectorClockRev(rev2)
++        vec3 = vectorclock.VectorClockRev(doc.rev)
++        self.assertTrue(vec3.is_newer(vec1))
++        self.assertTrue(vec3.is_newer(vec2))
++        # because the conflict is on the source, sync it another time
++        self.sync(self.db1, self.db2)
++        # make sure db2 now has the exact same thing
++        self.assertEqual(self.db1.get_doc('doc'), self.db2.get_doc('doc'))
++
++    def test_sync_autoresolves_moar_backwards(self):
++        # here we test that when a database that has a conflicted document is
++        # the target of a sync, and the source database has a revision of the
++        # conflicted document that is newer than the target database's, and
++        # that source's database's document's content is the same as the
++        # target's document's conflict's, the target's document's conflict gets
++        # autoresolved, and the document's revision bumped.
++        #
++        # idea is as follows:
++        # A          B
++        # a1         -
++        #   `------->
++        # a1         a1
++        # v          v
++        # a2         a1b1
++        #   `------->
++        # a1b1+a2    a1b1
++        #            v
++        # a1b1+a2    a1b2 (a1b2 has same content as a2)
++        #   <-------'
++        # a3b2       a3b2 (autoresolved and propagated)
++        self.db1.create_doc(simple_doc, doc_id='doc')
++        self.sync(self.db1, self.db2)
++        for db, content in [(self.db1, '{}'), (self.db2, '{"hi": 42}')]:
++            doc = db.get_doc('doc')
++            doc.set_json(content)
++            db.put_doc(doc)
++        self.sync(self.db1, self.db2)
++        # db1 and db2 now both have a doc of {hi:42}, but db1 has a conflict
++        doc = self.db1.get_doc('doc')
++        rev1 = doc.rev
++        self.assertTrue(doc.has_conflicts)
++        revc = self.db1.get_doc_conflicts('doc')[-1].rev
++        # set db2 to have a doc of {} (same as db1 before the conflict)
++        doc = self.db2.get_doc('doc')
++        doc.set_json('{}')
++        self.db2.put_doc(doc)
++        rev2 = doc.rev
++        # sync it across
++        self.sync(self.db2, self.db1)
++        # tadaa!
++        doc = self.db1.get_doc('doc')
++        self.assertFalse(doc.has_conflicts)
++        vec1 = vectorclock.VectorClockRev(rev1)
++        vec2 = vectorclock.VectorClockRev(rev2)
++        vec3 = vectorclock.VectorClockRev(doc.rev)
++        vecc = vectorclock.VectorClockRev(revc)
++        self.assertTrue(vec3.is_newer(vec1))
++        self.assertTrue(vec3.is_newer(vec2))
++        self.assertTrue(vec3.is_newer(vecc))
++        # make sure db2 now has the exact same thing
++        self.assertEqual(self.db1.get_doc('doc'), self.db2.get_doc('doc'))
++
++    def test_sync_autoresolves_moar_backwards_three(self):
++        # same as autoresolves_moar_backwards, but with three databases (note
++        # all the syncs go in the same direction -- this is a more natural
++        # scenario):
++        #
++        # A          B          C
++        # a1         -          -
++        #   `------->
++        # a1         a1         -
++        #              `------->
++        # a1         a1         a1
++        # v          v
++        # a2         a1b1       a1
++        #  `------------------->
++        # a2         a1b1       a2
++        #              `------->
++        #            a2+a1b1    a2
++        #                       v
++        # a2         a2+a1b1    a2c1 (same as a1b1)
++        #  `------------------->
++        # a2c1       a2+a1b1    a2c1
++        #   `------->
++        # a2b2c1     a2b2c1     a2c1
++        self.db3 = self.create_database('test3')
++        self.db1.create_doc(simple_doc, doc_id='doc')
++        self.sync(self.db1, self.db2)
++        self.sync(self.db2, self.db3)
++        for db, content in [(self.db2, '{"hi": 42}'),
++                            (self.db1, '{}'),
++                            ]:
++            doc = db.get_doc('doc')
++            doc.set_json(content)
++            db.put_doc(doc)
++        self.sync(self.db1, self.db3)
++        self.sync(self.db2, self.db3)
++        # db2 and db3 now both have a doc of {}, but db2 has a
++        # conflict
++        doc = self.db2.get_doc('doc')
++        self.assertTrue(doc.has_conflicts)
++        revc = self.db2.get_doc_conflicts('doc')[-1].rev
++        self.assertEqual('{}', doc.get_json())
++        self.assertEqual(self.db3.get_doc('doc').get_json(), doc.get_json())
++        self.assertEqual(self.db3.get_doc('doc').rev, doc.rev)
++        # set db3 to have a doc of {hi:42} (same as db2 before the conflict)
++        doc = self.db3.get_doc('doc')
++        doc.set_json('{"hi": 42}')
++        self.db3.put_doc(doc)
++        rev3 = doc.rev
++        # sync it across to db1
++        self.sync(self.db1, self.db3)
++        # db1 now has hi:42, with a rev that is newer than db2's doc
++        doc = self.db1.get_doc('doc')
++        rev1 = doc.rev
++        self.assertFalse(doc.has_conflicts)
++        self.assertEqual('{"hi": 42}', doc.get_json())
++        VCR=vectorclock.VectorClockRev
++        self.assertTrue(VCR(rev1).is_newer(VCR(self.db2.get_doc('doc').rev)))
++        # so sync it to db2
++        self.sync(self.db1, self.db2)
++        # tadaa!
++        doc = self.db2.get_doc('doc')
++        self.assertFalse(doc.has_conflicts)
++        # db2's revision of the document is strictly newer than db1's before
++        # the sync, and db3's before that sync way back when
++        self.assertTrue(VCR(doc.rev).is_newer(VCR(rev1)))
++        self.assertTrue(VCR(doc.rev).is_newer(VCR(rev3)))
++        self.assertTrue(VCR(doc.rev).is_newer(VCR(revc)))
++        # make sure both dbs now have the exact same thing
++        self.assertEqual(self.db1.get_doc('doc'), self.db2.get_doc('doc'))
++
      def test_sync_puts_changes(self):
          doc = self.db1.create_doc(simple_doc)
          self.assertEqual(1, self.sync(self.db1, self.db2))