Merge lp:~pedronis/u1db/whats_changed_sorted_last_edits into lp:u1db

Proposed by Samuele Pedroni
Status: Merged
Approved by: Samuele Pedroni
Approved revision: 148
Merged at revision: 146
Proposed branch: lp:~pedronis/u1db/whats_changed_sorted_last_edits
Merge into: lp:u1db
Diff against target: 155 lines (+48/-21)
5 files modified
u1db/__init__.py (+6/-5)
u1db/backends/inmemory.py (+12/-2)
u1db/backends/sqlite_backend.py (+12/-7)
u1db/sync.py (+4/-2)
u1db/tests/test_backends.py (+14/-5)
To merge this branch: bzr merge lp:~pedronis/u1db/whats_changed_sorted_last_edits
Reviewer Review Type Date Requested Status
John A Meinel (community) Approve
Review via email: mp+85386@code.launchpad.net

Description of the change

have whats_changed return information about the generation of a change sorted, return only the last change for a given doc_id. (the idea is that it should be possible to make sync incremental by sending document across in the implied order)

To post a comment you must log in.
147. By Samuele Pedroni

pep8 fix

Revision history for this message
John A Meinel (jameinel) wrote :

A few small comments, everything would be fine to land the way it is. Just some thoughts:

38 + cur_generation = len(self._transaction_log)
39 + changes = []
40 + relevant_tail = self._transaction_log[old_generation:]
41 + relevant_tail.reverse()

If we want to be in-memory race safe, then doing:

relevant_tail = self._transaction_log[old_generation:]
cur_generation = old_generation + len(relevant_tail)

In the SQL implementation, you use changes[0][1] which is also getting it from the queried data, rather than assuming things don't change.

Also, instead of relevant_tail.reverse() and then iterating, just do:

for doc_id in reversed(relevant_tail):

That generates a reverse iterator, rather than the O(n) for actually reversing each item in the list.

review: Approve
148. By Samuele Pedroni

suggested improvements

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'u1db/__init__.py'
2--- u1db/__init__.py 2011-12-06 20:13:48 +0000
3+++ u1db/__init__.py 2011-12-13 09:31:23 +0000
4@@ -47,17 +47,18 @@
5 """
6
7 def whats_changed(self, old_generation):
8- """Return a list of entries that have changed since old_generation.
9+ """Return a list of documents that have changed since old_generation.
10 This allows APPS to only store a db generation before going
11 'offline', and then when coming back online they can use this
12 data to update whatever extra data they are storing.
13
14 :param old_generation: The generation of the database in the old
15 state.
16- :return: (cur_generation, set([doc_id]))
17- The current generation of the database, and the set of
18- document ids that were changed in between old_generation and
19- cur_generation
20+ :return: (cur_generation, [(doc_id, generation),...])
21+ The current generation of the database, and a list of of
22+ changed documents since old_generation, represented by tuples
23+ with for each document its doc_id and the generation corresponding
24+ to the last intervening change and sorted by generation
25 """
26 raise NotImplementedError(self.whats_changed)
27
28
29=== modified file 'u1db/backends/inmemory.py'
30--- u1db/backends/inmemory.py 2011-12-06 15:34:12 +0000
31+++ u1db/backends/inmemory.py 2011-12-13 09:31:23 +0000
32@@ -168,8 +168,18 @@
33 return result
34
35 def whats_changed(self, old_generation=0):
36- return (len(self._transaction_log),
37- set(self._transaction_log[old_generation:]))
38+ changes = []
39+ relevant_tail = self._transaction_log[old_generation:]
40+ cur_generation = old_generation + len(relevant_tail)
41+ seen = set()
42+ generation = cur_generation
43+ for doc_id in reversed(relevant_tail):
44+ if doc_id not in seen:
45+ changes.append((doc_id, generation))
46+ seen.add(doc_id)
47+ generation -= 1
48+ changes.reverse()
49+ return (cur_generation, changes)
50
51 def force_doc_sync_conflict(self, doc):
52 my_doc = self._get_doc(doc.doc_id)
53
54=== modified file 'u1db/backends/sqlite_backend.py'
55--- u1db/backends/sqlite_backend.py 2011-12-06 20:13:48 +0000
56+++ u1db/backends/sqlite_backend.py 2011-12-13 09:31:23 +0000
57@@ -354,15 +354,20 @@
58 def whats_changed(self, old_generation=0):
59 c = self._db_handle.cursor()
60 c.execute("SELECT generation, doc_id FROM transaction_log"
61- " WHERE generation > ?", (old_generation,))
62+ " WHERE generation > ? ORDER BY generation DESC",
63+ (old_generation,))
64 results = c.fetchall()
65 cur_gen = old_generation
66- doc_ids = set()
67- for gen, doc_id in results:
68- if gen > cur_gen:
69- cur_gen = gen
70- doc_ids.add(doc_id)
71- return cur_gen, doc_ids
72+ seen = set()
73+ changes = []
74+ for generation, doc_id in results:
75+ if doc_id not in seen:
76+ changes.append((doc_id, generation))
77+ seen.add(doc_id)
78+ if changes:
79+ cur_gen = changes[0][1] # max generation
80+ changes.reverse()
81+ return cur_gen, changes
82
83 def delete_doc(self, doc):
84 with self._db_handle:
85
86=== modified file 'u1db/sync.py'
87--- u1db/sync.py 2011-12-06 16:14:56 +0000
88+++ u1db/sync.py 2011-12-13 09:31:23 +0000
89@@ -92,7 +92,8 @@
90 (other_replica_uid, other_gen,
91 others_my_gen) = sync_target.get_sync_info(self.source._replica_uid)
92 # what's changed since that generation and this current gen
93- my_gen, changed_doc_ids = self.source.whats_changed(others_my_gen)
94+ my_gen, changes = self.source.whats_changed(others_my_gen)
95+ changed_doc_ids = set(doc_id for doc_id, _ in changes)
96 # prepare to send all the changed docs
97 docs_to_send = self.source.get_docs(changed_doc_ids,
98 check_for_conflicts=False)
99@@ -169,7 +170,8 @@
100 processing the returned documents.
101 """
102 self._last_known_generation = last_known_generation # for tests
103- gen, changed_doc_ids = self._db.whats_changed(last_known_generation)
104+ gen, changes = self._db.whats_changed(last_known_generation)
105+ changed_doc_ids = set(doc_id for doc_id, _ in changes)
106 self.new_gen = gen
107 seen_ids = self.seen_ids
108 # changed docs that weren't superseded by or converged with
109
110=== modified file 'u1db/tests/test_backends.py'
111--- u1db/tests/test_backends.py 2011-12-06 15:34:12 +0000
112+++ u1db/tests/test_backends.py 2011-12-13 09:31:23 +0000
113@@ -341,13 +341,13 @@
114 self.db.put_doc(doc)
115 self.assertEqual([doc.doc_id, doc.doc_id],
116 self.db._get_transaction_log())
117- self.assertEqual((2, set([doc.doc_id])), self.db.whats_changed())
118+ self.assertEqual((2, [(doc.doc_id, 2)]), self.db.whats_changed())
119
120 def test_delete_updates_transaction_log(self):
121 doc = self.db.create_doc(simple_doc)
122 db_gen, _ = self.db.whats_changed()
123 self.db.delete_doc(doc)
124- self.assertEqual((2, set([doc.doc_id])), self.db.whats_changed(db_gen))
125+ self.assertEqual((2, [(doc.doc_id, 2)]), self.db.whats_changed(db_gen))
126
127 def test_delete_then_put(self):
128 doc = self.db.create_doc(simple_doc)
129@@ -358,14 +358,23 @@
130 self.assertGetDoc(self.db, doc.doc_id, doc.rev, nested_doc, False)
131
132 def test_whats_changed_initial_database(self):
133- self.assertEqual((0, set()), self.db.whats_changed())
134+ self.assertEqual((0, []), self.db.whats_changed())
135
136 def test_whats_changed_returns_one_id_for_multiple_changes(self):
137 doc = self.db.create_doc(simple_doc)
138 doc.content = '{"new": "contents"}'
139 self.db.put_doc(doc)
140- self.assertEqual((2, set([doc.doc_id])), self.db.whats_changed())
141- self.assertEqual((2, set()), self.db.whats_changed(2))
142+ self.assertEqual((2, [(doc.doc_id, 2)]), self.db.whats_changed())
143+ self.assertEqual((2, []), self.db.whats_changed(2))
144+
145+ def test_whats_changed_returns_last_edits_ascending(self):
146+ doc = self.db.create_doc(simple_doc)
147+ doc1 = self.db.create_doc(simple_doc)
148+ doc.content = '{"new": "contents"}'
149+ self.db.delete_doc(doc1)
150+ self.db.put_doc(doc)
151+ self.assertEqual((4, [(doc1.doc_id, 3), (doc.doc_id, 4)]),
152+ self.db.whats_changed())
153
154
155 class DatabaseIndexTests(tests.DatabaseBaseTests):

Subscribers

People subscribed via source and target branches