Merge lp:~thisfred/u1db/document-too-big into lp:u1db

Proposed by Eric Casteleijn
Status: Merged
Approved by: Eric Casteleijn
Approved revision: 344
Merged at revision: 347
Proposed branch: lp:~thisfred/u1db/document-too-big
Merge into: lp:u1db
Diff against target: 604 lines (+249/-11)
13 files modified
include/u1db/u1db.h (+16/-3)
include/u1db/u1db_internal.h (+6/-0)
src/u1db.c (+119/-7)
u1db/__init__.py (+24/-0)
u1db/backends/__init__.py (+11/-0)
u1db/backends/inmemory.py (+1/-0)
u1db/backends/sqlite_backend.py (+1/-0)
u1db/errors.py (+6/-0)
u1db/remote/http_database.py (+12/-0)
u1db/remote/http_errors.py (+1/-0)
u1db/tests/c_backend_wrapper.pyx (+24/-1)
u1db/tests/test_backends.py (+19/-0)
u1db/tests/test_document.py (+9/-0)
To merge this branch: bzr merge lp:~thisfred/u1db/document-too-big
Reviewer Review Type Date Requested Status
John O'Brien (community) Approve
Review via email: mp+114046@code.launchpad.net

Commit message

Added get_size for documents, and set_document_size_limit for dbs.

Description of the change

This is step 1 of the document size restriction work. This defines and implements the API on all backends. What it does not yet handle/test is size violations on remote dbs, mainly because testing this is turning out to be hard, so I thought I'd add that on a separate branch.

To post a comment you must log in.
Revision history for this message
Eric Casteleijn (thisfred) wrote :

working on memory leaks

Revision history for this message
John O'Brien (jdobrien) wrote :

Looks good to me.

review: Approve
Revision history for this message
Eric Casteleijn (thisfred) wrote :

memory leaks fixed

Revision history for this message
Ubuntu One Auto Pilot (otto-pilot) wrote :

Attempt to merge into lp:u1db failed due to conflicts:

text conflict in u1db/tests/test_sync.py

lp:~thisfred/u1db/document-too-big updated
344. By Eric Casteleijn

merged trunk, resolved conflict

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'include/u1db/u1db.h'
2--- include/u1db/u1db.h 2012-06-27 18:56:17 +0000
3+++ include/u1db/u1db.h 2012-07-10 17:17:20 +0000
4@@ -44,7 +44,8 @@
5 const char **key);
6 typedef int (*u1db_doc_gen_callback)(void *context, u1db_document *doc,
7 int gen, const char *trans_id);
8-typedef int (*u1db_doc_id_gen_callback)(void *context, const char *doc_id, int gen);
9+typedef int (*u1db_doc_id_gen_callback)(void *context, const char *doc_id,
10+ int gen);
11 typedef int (*u1db_trans_info_callback)(void *context, const char *doc_id,
12 int gen, const char *trans_id);
13
14@@ -73,6 +74,7 @@
15 #define U1DB_INVALID_TRANSACTION_ID -20
16 #define U1DB_INVALID_GENERATION -21
17 #define U1DB_TARGET_UNAVAILABLE -22
18+#define U1DB_DOCUMENT_TOO_BIG -23
19 #define U1DB_INTERNAL_ERROR -999
20
21 // Used by put_doc_if_newer
22@@ -99,6 +101,11 @@
23 int u1db_set_replica_uid(u1database *db, const char *replica_uid);
24
25 /**
26+ * Set the replica_uid defined for this database.
27+ */
28+int u1db_set_document_size_limit(u1database *db, int limit);
29+
30+/**
31 * Get the replica_uid defined for this database.
32 *
33 * @param replica_uid (OUT) The unique identifier for this replica. The
34@@ -256,14 +263,20 @@
35
36
37 /**
38+ * Get the size of the document in bytes.
39+ */
40+int u1db_doc_get_size(u1db_document *doc);
41+
42+
43+/**
44 * Create an index that you can query for matching documents.
45 *
46 * @param index_name An identifier for this index.
47 * @param n_expressions The number of index expressions.
48 * @param exp0... The values to match in the index, all of these should be char*
49 */
50-int u1db_create_index(u1database *db, const char *index_name, int n_expressions,
51- ...);
52+int u1db_create_index(u1database *db, const char *index_name,
53+ int n_expressions, ...);
54
55
56 /**
57
58=== modified file 'include/u1db/u1db_internal.h'
59--- include/u1db/u1db_internal.h 2012-07-06 17:34:14 +0000
60+++ include/u1db/u1db_internal.h 2012-07-10 17:17:20 +0000
61@@ -31,6 +31,7 @@
62 {
63 sqlite3 *sql_handle;
64 char *replica_uid;
65+ int document_size_limit;
66 };
67
68 struct _u1query {
69@@ -231,6 +232,11 @@
70 int u1db__get_generation(u1database *db, int *generation);
71
72 /**
73+ * Internal API, Get the document size limit.
74+ */
75+int u1db__get_document_size_limit(u1database *db, int *limit);
76+
77+/**
78 * Internal API, Get the global database rev and transaction id.
79 */
80 int u1db__get_generation_info(u1database *db, int *generation,
81
82=== modified file 'src/u1db.c'
83--- src/u1db.c 2012-07-06 17:34:14 +0000
84+++ src/u1db.c 2012-07-10 17:17:20 +0000
85@@ -70,7 +70,8 @@
86 }
87 u1db__generate_hex_uuid(default_replica_uid);
88 u1db_set_replica_uid(db, default_replica_uid);
89- return SQLITE_OK;
90+ u1db_set_document_size_limit(db, 0);
91+ return U1DB_OK;
92 }
93
94 u1database *
95@@ -84,6 +85,8 @@
96 free(db);
97 return NULL;
98 }
99+ // TODO: surely this is not right? We should get the db sqlite, and only if
100+ // that fails because it's not there, should we initialize?!?
101 initialize(db);
102 return db;
103 }
104@@ -154,7 +157,39 @@
105 num_bytes = strlen(replica_uid);
106 db->replica_uid = (char *)calloc(1, num_bytes + 1);
107 memcpy(db->replica_uid, replica_uid, num_bytes + 1);
108- return 0;
109+ return U1DB_OK;
110+}
111+
112+int
113+u1db_set_document_size_limit(u1database *db, int limit)
114+{
115+ sqlite3_stmt *statement;
116+ int status, final_status;
117+
118+ status = sqlite3_prepare_v2(db->sql_handle,
119+ "INSERT OR REPLACE INTO u1db_config VALUES ('document_size_limit', ?)",
120+ -1, &statement, NULL);
121+ if (status != SQLITE_OK) {
122+ return status;
123+ }
124+ status = sqlite3_bind_int(statement, 1, limit);
125+ if (status != SQLITE_OK) {
126+ sqlite3_finalize(statement);
127+ return status;
128+ }
129+ status = sqlite3_step(statement);
130+ if (status != SQLITE_DONE) {
131+ sqlite3_finalize(statement);
132+ return status;
133+ }
134+ status = sqlite3_finalize(statement);
135+ if (status != SQLITE_OK) {
136+ return status;
137+ }
138+ // If we got this far, then document_size_limit has been properly set.
139+ // Copy it
140+ db->document_size_limit = limit;
141+ return U1DB_OK;
142 }
143
144 int
145@@ -165,7 +200,7 @@
146 const unsigned char *text;
147 if (db->replica_uid != NULL) {
148 *replica_uid = db->replica_uid;
149- return SQLITE_OK;
150+ return U1DB_OK;
151 }
152 status = sqlite3_prepare_v2(db->sql_handle,
153 "SELECT value FROM u1db_config WHERE name = 'replica_uid'", -1,
154@@ -181,7 +216,7 @@
155 if (status == SQLITE_DONE) {
156 // No replica_uid set yet
157 *replica_uid = NULL;
158- return SQLITE_OK;
159+ return U1DB_OK;
160 }
161 *replica_uid = "Failed to step prepared statement";
162 return status;
163@@ -196,7 +231,7 @@
164 db->replica_uid = (char *)calloc(1, num_bytes + 1);
165 memcpy(db->replica_uid, text, num_bytes+1);
166 *replica_uid = db->replica_uid;
167- return SQLITE_OK;
168+ return U1DB_OK;
169 }
170
171 static int
172@@ -482,6 +517,20 @@
173 return status;
174 }
175
176+static int
177+u1db__check_doc_size(u1database *db, u1db_document *doc)
178+{
179+ int status = U1DB_OK;
180+ int limit = 0;
181+ status = u1db__get_document_size_limit(db, &limit);
182+ if (status != U1DB_OK)
183+ return status;
184+ if (limit == 0)
185+ return U1DB_OK;
186+ if (u1db_doc_get_size(doc) <= limit)
187+ return U1DB_OK;
188+ return U1DB_DOCUMENT_TOO_BIG;
189+}
190
191 int
192 u1db_put_doc(u1database *db, u1db_document *doc)
193@@ -493,13 +542,16 @@
194 sqlite3_stmt *statement = NULL;
195
196 if (db == NULL || doc == NULL) {
197- // Bad parameter
198- return -1;
199+ return U1DB_INVALID_PARAMETER;
200 }
201 status = u1db__is_doc_id_valid(doc->doc_id);
202 if (status != U1DB_OK) {
203 return status;
204 }
205+ status = u1db__check_doc_size(db, doc);
206+ if (status != U1DB_OK) {
207+ return status;
208+ }
209 status = sqlite3_exec(db->sql_handle, "BEGIN", NULL, NULL, NULL);
210 if (status != SQLITE_OK) {
211 return status;
212@@ -1426,6 +1478,48 @@
213 }
214
215 int
216+u1db__get_document_size_limit(u1database *db, int *limit)
217+{
218+ int status = U1DB_OK;
219+ sqlite3_stmt *statement;
220+
221+ if (db == NULL || limit == NULL) {
222+ return U1DB_INVALID_PARAMETER;
223+ }
224+ // TODO: make sure we don't do this lookup every time in case the limit
225+ // really is 0.
226+ if (db->document_size_limit != 0) {
227+ *limit = db->document_size_limit;
228+ return U1DB_OK;
229+ }
230+ status = sqlite3_prepare_v2(db->sql_handle,
231+ "SELECT value FROM u1db_config WHERE name = 'document_size_limit'", -1,
232+ &statement, NULL);
233+ if(status != SQLITE_OK) {
234+ return status;
235+ }
236+ status = sqlite3_step(statement);
237+ if(status != SQLITE_ROW) {
238+ // TODO: Check return for failures
239+ sqlite3_finalize(statement);
240+ if (status == SQLITE_DONE) {
241+ // No document_size_limit set yet
242+ *limit = 0;
243+ db->document_size_limit = *limit;
244+ return U1DB_OK;
245+ }
246+ return status;
247+ }
248+ *limit = sqlite3_column_int(statement, 0);
249+ status = sqlite3_finalize(statement);
250+ if (status != SQLITE_OK) {
251+ return status;
252+ }
253+ db->document_size_limit = *limit;
254+ return U1DB_OK;
255+}
256+
257+int
258 u1db__get_generation_info(u1database *db, int *generation, char **trans_id)
259 {
260 int status;
261@@ -1914,6 +2008,24 @@
262 return status;
263 }
264
265+
266+int
267+u1db_doc_get_size(u1db_document *doc)
268+{
269+ int size = 0;
270+ if (doc->json != NULL) {
271+ size += strlen(doc->json);
272+ }
273+ if (doc->doc_id != NULL) {
274+ size += strlen(doc->doc_id);
275+ }
276+ if (doc->doc_rev != NULL) {
277+ size += strlen(doc->doc_rev);
278+ }
279+ return size;
280+}
281+
282+
283 int
284 u1db__is_doc_id_valid(const char *doc_id)
285 {
286
287=== modified file 'u1db/__init__.py'
288--- u1db/__init__.py 2012-07-05 15:50:36 +0000
289+++ u1db/__init__.py 2012-07-10 17:17:20 +0000
290@@ -65,6 +65,13 @@
291 """
292 raise NotImplementedError(self.set_document_factory)
293
294+ def set_document_size_limit(self, limit):
295+ """Set the maximum allowed document size for this database.
296+
297+ :param limit: Maximum allowed document size in bytes.
298+ """
299+ raise NotImplementedError(self.set_document_size_limit)
300+
301 def whats_changed(self, old_generation):
302 """Return a list of documents that have changed since old_generation.
303 This allows APPS to only store a db generation before going
304@@ -125,6 +132,9 @@
305 You can optionally specify the document identifier, but the document
306 must not already exist. See 'put_doc' if you want to override an
307 existing document.
308+ If the database specifies a maximum document size and the document
309+ exceeds it, create will fail and raise a DocumentTooBig exception.
310+
311 :param content: The JSON document string
312 :param doc_id: An optional identifier specifying the document id.
313 :return: Document
314@@ -134,6 +144,8 @@
315 def put_doc(self, doc):
316 """Update a document.
317 If the document currently has conflicts, put will fail.
318+ If the database specifies a maximum document size and the document
319+ exceeds it, put will fail and raise a DocumentTooBig exception.
320
321 :param doc: A Document with new content.
322 :return: new_doc_rev - The new revision identifier for the document.
323@@ -422,6 +434,18 @@
324 return self._json
325 return None
326
327+ def get_size(self):
328+ """Calculate the total size of the document."""
329+ size = 0
330+ json = self.get_json()
331+ if json:
332+ size += len(json)
333+ if self.rev:
334+ size += len(self.rev)
335+ if self.doc_id:
336+ size += len(self.doc_id)
337+ return size
338+
339 def set_json(self, json):
340 """Set the json serialization of this document."""
341 if json is not None:
342
343=== modified file 'u1db/backends/__init__.py'
344--- u1db/backends/__init__.py 2012-07-06 17:34:14 +0000
345+++ u1db/backends/__init__.py 2012-07-10 17:17:20 +0000
346@@ -36,6 +36,8 @@
347
348 class CommonBackend(u1db.Database):
349
350+ document_size_limit = 0
351+
352 def _allocate_doc_id(self):
353 """Generate a unique identifier for this document."""
354 return 'D-' + uuid.uuid4().hex # 'D-' stands for document
355@@ -52,6 +54,12 @@
356 if not check_doc_id_re.match(doc_id):
357 raise errors.InvalidDocId()
358
359+ def _check_doc_size(self, doc):
360+ if not self.document_size_limit:
361+ return
362+ if doc.get_size() > self.document_size_limit:
363+ raise errors.DocumentTooBig
364+
365 def _get_generation(self):
366 """Return the current generation.
367
368@@ -192,3 +200,6 @@
369 vcr.maximize(VectorClockRev(rev))
370 vcr.increment(self._replica_uid)
371 return vcr.as_str()
372+
373+ def set_document_size_limit(self, limit):
374+ self.document_size_limit = limit
375
376=== modified file 'u1db/backends/inmemory.py'
377--- u1db/backends/inmemory.py 2012-07-06 17:34:14 +0000
378+++ u1db/backends/inmemory.py 2012-07-10 17:17:20 +0000
379@@ -100,6 +100,7 @@
380 if doc.doc_id is None:
381 raise errors.InvalidDocId()
382 self._check_doc_id(doc.doc_id)
383+ self._check_doc_size(doc)
384 if self._has_conflicts(doc.doc_id):
385 raise errors.ConflictedDoc()
386 old_doc = self._get_doc(doc.doc_id)
387
388=== modified file 'u1db/backends/sqlite_backend.py'
389--- u1db/backends/sqlite_backend.py 2012-07-06 17:34:14 +0000
390+++ u1db/backends/sqlite_backend.py 2012-07-10 17:17:20 +0000
391@@ -340,6 +340,7 @@
392 if doc.doc_id is None:
393 raise errors.InvalidDocId()
394 self._check_doc_id(doc.doc_id)
395+ self._check_doc_size(doc)
396 with self._db_handle:
397 if self._has_conflicts(doc.doc_id):
398 raise errors.ConflictedDoc()
399
400=== modified file 'u1db/errors.py'
401--- u1db/errors.py 2012-07-04 15:44:36 +0000
402+++ u1db/errors.py 2012-07-10 17:17:20 +0000
403@@ -43,6 +43,12 @@
404 wire_description = "invalid document id"
405
406
407+class DocumentTooBig(U1DBError):
408+ """Document exceeds the maximum document size for this database."""
409+
410+ wire_description = "document too big"
411+
412+
413 class InvalidTransactionId(U1DBError):
414 """Invalid transaction for generation."""
415
416
417=== modified file 'u1db/remote/http_database.py'
418--- u1db/remote/http_database.py 2012-05-30 14:39:36 +0000
419+++ u1db/remote/http_database.py 2012-07-10 17:17:20 +0000
420@@ -41,10 +41,20 @@
421 def __init__(self, url, document_factory=None):
422 super(HTTPDatabase, self).__init__(url)
423 self._factory = document_factory or Document
424+ self.document_size_limit = 0
425
426 def set_document_factory(self, factory):
427 self._factory = factory
428
429+ def set_document_size_limit(self, limit):
430+ self.document_size_limit = limit
431+
432+ def _check_doc_size(self, doc):
433+ if not self.document_size_limit:
434+ return
435+ if doc.get_size() > self.document_size_limit:
436+ raise errors.DocumentTooBig
437+
438 @staticmethod
439 def open_database(url, create):
440 db = HTTPDatabase(url)
441@@ -75,6 +85,7 @@
442 def put_doc(self, doc):
443 if doc.doc_id is None:
444 raise errors.InvalidDocId()
445+ self._check_doc_size(doc)
446 params = {}
447 if doc.rev is not None:
448 params['old_rev'] = doc.rev
449@@ -105,6 +116,7 @@
450 def create_doc(self, content, doc_id=None):
451 if doc_id is None:
452 doc_id = 'D-%s' % (uuid.uuid4().hex,)
453+ self._check_doc_size(self._factory(doc_id, None, content))
454 res, headers = self._request_json('PUT', ['doc', doc_id], {},
455 content, 'application/json')
456 new_doc = self._factory(doc_id, res['rev'], content)
457
458=== modified file 'u1db/remote/http_errors.py'
459--- u1db/remote/http_errors.py 2012-07-04 15:43:00 +0000
460+++ u1db/remote/http_errors.py 2012-07-10 17:17:20 +0000
461@@ -25,6 +25,7 @@
462 wire_description_to_status = dict([
463 (errors.InvalidDocId.wire_description, 400),
464 (errors.Unauthorized.wire_description, 401),
465+ (errors.DocumentTooBig.wire_description, 403),
466 (errors.DatabaseDoesNotExist.wire_description, 404),
467 (errors.DocumentDoesNotExist.wire_description, 404),
468 (errors.DocumentAlreadyDeleted.wire_description, 404),
469
470=== modified file 'u1db/tests/c_backend_wrapper.pyx'
471--- u1db/tests/c_backend_wrapper.pyx 2012-07-06 17:34:14 +0000
472+++ u1db/tests/c_backend_wrapper.pyx 2012-07-10 17:17:20 +0000
473@@ -68,6 +68,7 @@
474 u1database * u1db_open(char *fname)
475 void u1db_free(u1database **)
476 int u1db_set_replica_uid(u1database *, char *replica_uid)
477+ int u1db_set_document_size_limit(u1database *, int limit)
478 int u1db_get_replica_uid(u1database *, const_char_ptr *replica_uid)
479 int u1db_create_doc(u1database *db, char *json, char *doc_id,
480 u1db_document **doc)
481@@ -128,6 +129,7 @@
482 int U1DB_DOCUMENT_DOES_NOT_EXIST
483 int U1DB_NOT_IMPLEMENTED
484 int U1DB_INVALID_JSON
485+ int U1DB_DOCUMENT_TOO_BIG
486 int U1DB_INVALID_VALUE_FOR_INDEX
487 int U1DB_INVALID_GLOBBING
488 int U1DB_BROKEN_SYNC_STREAM
489@@ -145,6 +147,7 @@
490
491 void u1db_free_doc(u1db_document **doc)
492 int u1db_doc_set_json(u1db_document *doc, char *json)
493+ int u1db_doc_get_size(u1db_document *doc)
494
495
496 cdef extern from "u1db/u1db_internal.h":
497@@ -203,6 +206,7 @@
498
499
500 int u1db__get_generation(u1database *, int *db_rev)
501+ int u1db__get_document_size_limit(u1database *, int *limit)
502 int u1db__get_generation_info(u1database *, int *db_rev, char **trans_id)
503 int u1db__get_trans_id_for_gen(u1database *, int db_rev, char **trans_id)
504 int u1db_validate_gen_and_trans_id(u1database *, int db_rev,
505@@ -487,6 +491,8 @@
506 def set_json(self, val):
507 u1db_doc_set_json(self._doc, val)
508
509+ def get_size(self):
510+ return u1db_doc_get_size(self._doc)
511
512 property has_conflicts:
513 def __get__(self):
514@@ -600,6 +606,8 @@
515 raise errors.Unavailable
516 if status == U1DB_INVALID_JSON:
517 raise errors.InvalidJSON
518+ if status == U1DB_DOCUMENT_TOO_BIG:
519+ raise errors.DocumentTooBig
520 raise RuntimeError('%s (status: %s)' % (context, status))
521
522
523@@ -881,9 +889,24 @@
524 cdef int status
525 status = u1db_set_replica_uid(self._db, replica_uid)
526 if status != 0:
527- raise RuntimeError('Machine_id could not be set to %s, error: %d'
528+ raise RuntimeError('replica_uid could not be set to %s, error: %d'
529 % (replica_uid, status))
530
531+ property document_size_limit:
532+ def __get__(self):
533+ cdef int limit
534+ handle_status("document_size_limit",
535+ u1db__get_document_size_limit(self._db, &limit))
536+ return limit
537+
538+ def set_document_size_limit(self, limit):
539+ cdef int status
540+ status = u1db_set_document_size_limit(self._db, limit)
541+ if status != 0:
542+ raise RuntimeError(
543+ "document_size_limit could not be set to %d, error: %d",
544+ (limit, status))
545+
546 def _allocate_doc_id(self):
547 cdef char *val
548 val = u1db__allocate_doc_id(self._db)
549
550=== modified file 'u1db/tests/test_backends.py'
551--- u1db/tests/test_backends.py 2012-07-06 17:34:14 +0000
552+++ u1db/tests/test_backends.py 2012-07-10 17:17:20 +0000
553@@ -150,6 +150,17 @@
554 doc = self.make_document('d\xc3\xa5c-id', None, simple_doc)
555 self.assertRaises(errors.InvalidDocId, self.db.put_doc, doc)
556
557+ def test_put_doc_refuses_oversized_documents(self):
558+ self.db.set_document_size_limit(1)
559+ doc = self.make_document('doc-id', None, simple_doc)
560+ self.assertRaises(errors.DocumentTooBig, self.db.put_doc, doc)
561+
562+ def test_create_doc_refuses_oversized_documents(self):
563+ self.db.set_document_size_limit(1)
564+ self.assertRaises(
565+ errors.DocumentTooBig, self.db.create_doc, simple_doc,
566+ doc_id='my_doc_id')
567+
568 def test_put_fails_with_bad_old_rev(self):
569 doc = self.db.create_doc(simple_doc, doc_id='my_doc_id')
570 old_rev = doc.rev
571@@ -265,6 +276,14 @@
572 self.db.put_doc(doc)
573 self.assertGetDoc(self.db, doc.doc_id, doc.rev, nested_doc, False)
574
575+ def test_set_document_size_limit_zero(self):
576+ self.db.set_document_size_limit(0)
577+ self.assertEqual(0, self.db.document_size_limit)
578+
579+ def test_set_document_size_limit(self):
580+ self.db.set_document_size_limit(1000000)
581+ self.assertEqual(1000000, self.db.document_size_limit)
582+
583
584 class LocalDatabaseTests(tests.DatabaseBaseTests):
585
586
587=== modified file 'u1db/tests/test_document.py'
588--- u1db/tests/test_document.py 2012-06-29 16:18:14 +0000
589+++ u1db/tests/test_document.py 2012-07-10 17:17:20 +0000
590@@ -71,6 +71,15 @@
591 errors.InvalidJSON, self.make_document, 'id', 'uid:1',
592 'not a json dictionary')
593
594+ def test_get_size(self):
595+ doc_a = self.make_document('a', 'b', '{"some": "content"}')
596+ self.assertEqual(
597+ len('a' + 'b' + '{"some": "content"}'), doc_a.get_size())
598+
599+ def test_get_size_empty_document(self):
600+ doc_a = self.make_document('a', 'b', None)
601+ self.assertEqual(len('a' + 'b'), doc_a.get_size())
602+
603
604 class TestPyDocument(tests.TestCase):
605

Subscribers

People subscribed via source and target branches