Merge lp:~thisfred/u1db/iterate-over-list-of-dicts into lp:u1db

Proposed by Eric Casteleijn
Status: Merged
Approved by: Eric Casteleijn
Approved revision: 372
Merged at revision: 366
Proposed branch: lp:~thisfred/u1db/iterate-over-list-of-dicts
Merge into: lp:u1db
Diff against target: 349 lines (+130/-69)
5 files modified
src/u1db_query.c (+74/-46)
u1db/query_parser.py (+25/-17)
u1db/tests/test_backends.py (+20/-0)
u1db/tests/test_query_parser.py (+10/-5)
u1db/tests/test_sqlite_backend.py (+1/-1)
To merge this branch: bzr merge lp:~thisfred/u1db/iterate-over-list-of-dicts
Reviewer Review Type Date Requested Status
Samuele Pedroni Approve
Review via email: mp+117120@code.launchpad.net

Commit message

Added support for indexing dictionaries in lists, so this becomes possible:

        doc = self.db.create_doc_from_json(
            '{"foo": [{"zap": [{"qux": "fnord"}, {"qux": "zombo"}]}]}')
        self.db.create_index('test-idx', 'foo.zap.qux')
        self.assertEqual([doc], self.db.get_from_index('test-idx', 'fnord'))
        self.assertEqual([doc], self.db.get_from_index('test-idx', 'zombo'))

Description of the change

Added support for indexing dictionaries in lists, so this becomes possible:

        doc = self.db.create_doc_from_json(
            '{"foo": [{"zap": [{"qux": "fnord"}, {"qux": "zombo"}]}]}')
        self.db.create_index('test-idx', 'foo.zap.qux')
        self.assertEqual([doc], self.db.get_from_index('test-idx', 'fnord'))
        self.assertEqual([doc], self.db.get_from_index('test-idx', 'zombo'))

To post a comment you must log in.
Revision history for this message
Samuele Pedroni (pedronis) wrote :

 + subfields = self.field.split('.')

I would move this to the constructor

255 + return extract_field(raw_doc, subfields)

+def extract_field(raw_doc, subfields):
206 + if not isinstance(raw_doc, dict):
207 + return []
208 + val = raw_doc.get(subfields.pop(0))

I would use a index argument and avoid the pop (especially pop(0)) and copy

216 + subfields = []
this line is not needed

same for these:
223 + if val is None:
224 + return []

line 209-210 cover that

368. By Eric Casteleijn

split in __init__

369. By Eric Casteleijn

split in __init__

370. By Eric Casteleijn

don't mutate list

Revision history for this message
Eric Casteleijn (thisfred) wrote :

Fixes pushed

371. By Eric Casteleijn

don't copy list around

372. By Eric Casteleijn

don't copy list around

Revision history for this message
Samuele Pedroni (pedronis) wrote :

+1

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'src/u1db_query.c'
--- src/u1db_query.c 2012-07-27 15:17:04 +0000
+++ src/u1db_query.c 2012-07-31 14:58:20 +0000
@@ -252,60 +252,84 @@
252 op_combine, "combine", json_type_string, -1, JUST_EXPRESSION};252 op_combine, "combine", json_type_string, -1, JUST_EXPRESSION};
253253
254static int254static int
255extract_field_values(json_object *obj, const string_list *field_path,255extract_value(json_object *val, int value_type, string_list *values)
256 int value_type, string_list *values)
257{256{
258 string_list_item *item = NULL;257 int status = U1DB_OK;
258 int i, integer_value, boolean_value, length;
259 char string_value[MAX_INT_STR_LEN];259 char string_value[MAX_INT_STR_LEN];
260 struct array_list *list_val = NULL;
261 json_object *val = NULL;
262 int i, integer_value, boolean_value;
263 int status = U1DB_OK;
264 val = obj;
265 if (val == NULL)
266 goto finish;
267 for (item = field_path->head; item != NULL; item = item->next)
268 {
269 val = json_object_object_get(val, item->data);
270 if (val == NULL)
271 goto finish;
272 }
273 if (json_object_is_type(val, json_type_string) && value_type ==260 if (json_object_is_type(val, json_type_string) && value_type ==
274 json_type_string) {261 json_type_string) {
275 if ((status = append(values, json_object_get_string(val))) != U1DB_OK)262 status = append(values, json_object_get_string(val));
276 goto finish;263 goto finish;
277 } else if (json_object_is_type(val, json_type_int) && value_type ==264 }
265 if (json_object_is_type(val, json_type_int) && value_type ==
278 json_type_int) {266 json_type_int) {
279 integer_value = json_object_get_int(val);267 integer_value = json_object_get_int(val);
280 snprintf(string_value, MAX_INT_STR_LEN, "%d", integer_value);268 snprintf(string_value, MAX_INT_STR_LEN, "%d", integer_value);
281 if ((status = append(values, string_value)) != U1DB_OK)269 status = append(values, string_value);
282 goto finish;270 goto finish;
283 } else if (json_object_is_type(val, json_type_boolean) &&271 }
284 value_type == json_type_boolean) {272 if (json_object_is_type(val, json_type_boolean) &&
273 value_type == json_type_boolean) {
285 boolean_value = json_object_get_boolean(val);274 boolean_value = json_object_get_boolean(val);
286 if (boolean_value) {275 if (boolean_value) {
287 status = append(values, "1");276 status = append(values, "1");
288 if (status != U1DB_OK)
289 goto finish;
290 } else {277 } else {
291 status = append(values, "0");278 status = append(values, "0");
279 }
280 goto finish;
281 }
282 if (json_object_is_type(val, json_type_array)) {
283 length = json_object_array_length(val);
284 for (i = 0; i < length; i++) {
285 status = extract_value(
286 json_object_array_get_idx(val, i), value_type, values);
292 if (status != U1DB_OK)287 if (status != U1DB_OK)
293 goto finish;288 goto finish;
294 }289 }
295 } else if (json_object_is_type(val, json_type_array)) {290 }
296 // TODO: recursively check the types291finish:
297 list_val = json_object_get_array(val);292 return status;
298 for (i = 0; i < list_val->length; i++)293}
299 {294
300 if ((status = append(values, json_object_get_string(295static int
301 array_list_get_idx(296extract_field_values(json_object *obj, const string_list_item *field,
302 list_val, i)))) != U1DB_OK)297 int value_type, string_list *values)
303 goto finish;298{
304 }299 json_object *val = NULL;
305 }300 json_object *array_item = NULL;
306finish:301 int i, length;
307 return status;302 int status = U1DB_OK;
308}303
304 if (!json_object_is_type(obj, json_type_object)) {
305 goto finish;
306 }
307 val = json_object_object_get(obj, field->data);
308 if (val == NULL)
309 goto finish;
310 if (field->next != NULL) {
311 if (json_object_is_type(val, json_type_array)) {
312 length = json_object_array_length(val);
313 for (i = 0; i < length; i++) {
314 array_item = json_object_array_get_idx(val, i);
315 status = extract_field_values(
316 array_item, field->next, value_type, values);
317 if (status != U1DB_OK)
318 goto finish;
319 }
320 goto finish;
321 }
322 if (json_object_is_type(val, json_type_object)) {
323 status = extract_field_values(val, field->next, value_type, values);
324 goto finish;
325 }
326 goto finish;
327 }
328 status = extract_value(val, value_type, values);
329finish:
330 return status;
331}
332
309333
310static int334static int
311split(string_list *result, char *string, char splitter)335split(string_list *result, char *string, char splitter)
@@ -351,7 +375,7 @@
351 status = ((op_function)tree->op)(tree, obj, values);375 status = ((op_function)tree->op)(tree, obj, values);
352 } else {376 } else {
353 status = extract_field_values(377 status = extract_field_values(
354 obj, tree->field_path, json_type_string, values);378 obj, tree->field_path->head, json_type_string, values);
355 }379 }
356 return status;380 return status;
357}381}
@@ -411,7 +435,7 @@
411 if (status != U1DB_OK)435 if (status != U1DB_OK)
412 return status;436 return status;
413 status = extract_field_values(437 status = extract_field_values(
414 obj, node->field_path, json_type_int, values);438 obj, node->field_path->head, json_type_int, values);
415 if (status != U1DB_OK)439 if (status != U1DB_OK)
416 goto finish;440 goto finish;
417 node = node->next_sibling;441 node = node->next_sibling;
@@ -539,7 +563,7 @@
539 //booleans by extract_field_values.563 //booleans by extract_field_values.
540564
541 status = extract_field_values(565 status = extract_field_values(
542 obj, tree->first_child->field_path, json_type_boolean, values);566 obj, tree->first_child->field_path->head, json_type_boolean, values);
543 if (status != U1DB_OK)567 if (status != U1DB_OK)
544 goto finish;568 goto finish;
545 for (item = values->head; item != NULL; item = item->next)569 for (item = values->head; item != NULL; item = item->next)
@@ -1274,7 +1298,7 @@
1274 char *sep = NULL;1298 char *sep = NULL;
1275 parse_tree *node = NULL;1299 parse_tree *node = NULL;
1276 int status = U1DB_OK;1300 int status = U1DB_OK;
1277 int i;1301 int i, array_size;
12781302
1279 sep = get_token(tokens);1303 sep = get_token(tokens);
1280 if (sep == NULL || strcmp(sep, "(") != 0) {1304 if (sep == NULL || strcmp(sep, "(") != 0) {
@@ -1321,8 +1345,9 @@
1321 }1345 }
1322 }1346 }
1323 i = 0;1347 i = 0;
1348 array_size = sizeof(result->value_types) / sizeof(int);
1324 for (node = result->first_child; node != NULL; node = node->next_sibling) {1349 for (node = result->first_child; node != NULL; node = node->next_sibling) {
1325 node->arg_type = result->value_types[i % result->number_of_children];1350 node->arg_type = result->value_types[i % array_size];
1326 if (node->arg_type == EXPRESSION) {1351 if (node->arg_type == EXPRESSION) {
1327 status = to_getter(node);1352 status = to_getter(node);
1328 if (status != U1DB_OK)1353 if (status != U1DB_OK)
@@ -1538,7 +1563,8 @@
1538 if (ctx->obj == NULL || !json_object_is_type(ctx->obj, json_type_object)) {1563 if (ctx->obj == NULL || !json_object_is_type(ctx->obj, json_type_object)) {
1539 return U1DB_INVALID_JSON;1564 return U1DB_INVALID_JSON;
1540 }1565 }
1541 if ((status = init_list(&values)) != U1DB_OK)1566 status = init_list(&values);
1567 if (status != U1DB_OK)
1542 goto finish;1568 goto finish;
1543 status = get_values(tree, ctx->obj, values);1569 status = get_values(tree, ctx->obj, values);
1544 if (status != U1DB_OK)1570 if (status != U1DB_OK)
@@ -1550,8 +1576,10 @@
1550 goto finish;1576 goto finish;
1551 }1577 }
1552finish:1578finish:
1553 if (values != NULL)1579 if (values != NULL) {
1554 destroy_list(values);1580 destroy_list(values);
1581 values = NULL;
1582 }
1555 return status;1583 return status;
1556}1584}
15571585
15581586
=== modified file 'u1db/query_parser.py'
--- u1db/query_parser.py 2012-07-27 14:50:11 +0000
+++ u1db/query_parser.py 2012-07-31 14:58:20 +0000
@@ -53,6 +53,29 @@
53 return self.value53 return self.value
5454
5555
56def extract_field(raw_doc, subfields, index=0):
57 if not isinstance(raw_doc, dict):
58 return []
59 val = raw_doc.get(subfields[index])
60 if val is None:
61 return []
62 if index < len(subfields) - 1:
63 if isinstance(val, list):
64 results = []
65 for item in val:
66 results.extend(extract_field(item, subfields, index + 1))
67 return results
68 if isinstance(val, dict):
69 return extract_field(val, subfields, index + 1)
70 return []
71 if isinstance(val, dict):
72 return []
73 if isinstance(val, list):
74 # Strip anything in the list that isn't a simple type
75 return [v for v in val if not isinstance(v, (dict, list))]
76 return [val]
77
78
56class ExtractField(Getter):79class ExtractField(Getter):
57 """Extract a field from the document."""80 """Extract a field from the document."""
5881
@@ -69,25 +92,10 @@
69 :param field: a specifier for the field to return.92 :param field: a specifier for the field to return.
70 This is either a field name, or a dotted field name.93 This is either a field name, or a dotted field name.
71 """94 """
72 self.field = field95 self.field = field.split('.')
7396
74 def get(self, raw_doc):97 def get(self, raw_doc):
75 for subfield in self.field.split('.'):98 return extract_field(raw_doc, self.field)
76 if isinstance(raw_doc, dict):
77 raw_doc = raw_doc.get(subfield)
78 else:
79 return []
80 if isinstance(raw_doc, dict):
81 return []
82 if raw_doc is None:
83 result = []
84 elif isinstance(raw_doc, list):
85 # Strip anything in the list that isn't a simple type
86 result = [val for val in raw_doc
87 if not isinstance(val, (dict, list))]
88 else:
89 result = [raw_doc]
90 return result
9199
92100
93class Transformation(Getter):101class Transformation(Getter):
94102
=== modified file 'u1db/tests/test_backends.py'
--- u1db/tests/test_backends.py 2012-07-27 15:24:48 +0000
+++ u1db/tests/test_backends.py 2012-07-31 14:58:20 +0000
@@ -1470,6 +1470,26 @@
1470 self.db.create_index('test-idx', 'sub.foo.bar.baz.qux.fnord')1470 self.db.create_index('test-idx', 'sub.foo.bar.baz.qux.fnord')
1471 self.assertEqual([], self.db.get_from_index('test-idx', '*'))1471 self.assertEqual([], self.db.get_from_index('test-idx', '*'))
14721472
1473 def test_nested_traverses_lists(self):
1474 # subpath finds dicts in list
1475 doc = self.db.create_doc_from_json(
1476 '{"foo": [{"zap": "bar"}, {"zap": "baz"}]}')
1477 # subpath only finds dicts in list
1478 self.db.create_doc_from_json('{"foo": ["zap", "baz"]}')
1479 self.db.create_index('test-idx', 'foo.zap')
1480 self.assertEqual([doc], self.db.get_from_index('test-idx', 'bar'))
1481 self.assertEqual([doc], self.db.get_from_index('test-idx', 'baz'))
1482
1483 def test_nested_list_traversal(self):
1484 # subpath finds dicts in list
1485 doc = self.db.create_doc_from_json(
1486 '{"foo": [{"zap": [{"qux": "fnord"}, {"qux": "zombo"}]},'
1487 '{"zap": "baz"}]}')
1488 # subpath only finds dicts in list
1489 self.db.create_index('test-idx', 'foo.zap.qux')
1490 self.assertEqual([doc], self.db.get_from_index('test-idx', 'fnord'))
1491 self.assertEqual([doc], self.db.get_from_index('test-idx', 'zombo'))
1492
1473 def test_index_list1(self):1493 def test_index_list1(self):
1474 self.db.create_index("index", "name")1494 self.db.create_index("index", "name")
1475 content = '{"name": ["foo", "bar"]}'1495 content = '{"name": ["foo", "bar"]}'
14761496
=== modified file 'u1db/tests/test_query_parser.py'
--- u1db/tests/test_query_parser.py 2012-07-26 13:35:50 +0000
+++ u1db/tests/test_query_parser.py 2012-07-31 14:58:20 +0000
@@ -179,6 +179,11 @@
179 def test_get_value_list_of_dicts(self):179 def test_get_value_list_of_dicts(self):
180 self.assertExtractField([], 'foo', {'foo': [{'zap': 'bar'}]})180 self.assertExtractField([], 'foo', {'foo': [{'zap': 'bar'}]})
181181
182 def test_get_value_list_of_dicts2(self):
183 self.assertExtractField(
184 ['bar', 'baz'], 'foo.zap',
185 {'foo': [{'zap': 'bar'}, {'zap': 'baz'}]})
186
182 def test_get_value_int(self):187 def test_get_value_int(self):
183 self.assertExtractField([9], 'foo', {'foo': 9})188 self.assertExtractField([9], 'foo', {'foo': 9})
184189
@@ -395,12 +400,12 @@
395 def test_parse_field(self):400 def test_parse_field(self):
396 getter = self.parse("a")401 getter = self.parse("a")
397 self.assertIsInstance(getter, query_parser.ExtractField)402 self.assertIsInstance(getter, query_parser.ExtractField)
398 self.assertEqual("a", getter.field)403 self.assertEqual(["a"], getter.field)
399404
400 def test_parse_dotted_field(self):405 def test_parse_dotted_field(self):
401 getter = self.parse("a.b")406 getter = self.parse("a.b")
402 self.assertIsInstance(getter, query_parser.ExtractField)407 self.assertIsInstance(getter, query_parser.ExtractField)
403 self.assertEqual("a.b", getter.field)408 self.assertEqual(["a", "b"], getter.field)
404409
405 def test_parse_dotted_field_nothing_after_dot(self):410 def test_parse_dotted_field_nothing_after_dot(self):
406 self.assertParseError("a.")411 self.assertParseError("a.")
@@ -427,12 +432,12 @@
427 getter = self.parse("lower(a)")432 getter = self.parse("lower(a)")
428 self.assertIsInstance(getter, query_parser.Lower)433 self.assertIsInstance(getter, query_parser.Lower)
429 self.assertIsInstance(getter.inner, query_parser.ExtractField)434 self.assertIsInstance(getter.inner, query_parser.ExtractField)
430 self.assertEqual("a", getter.inner.field)435 self.assertEqual(["a"], getter.inner.field)
431436
432 def test_parse_all(self):437 def test_parse_all(self):
433 getters = self.parse_all(["a", "b"])438 getters = self.parse_all(["a", "b"])
434 self.assertEqual(2, len(getters))439 self.assertEqual(2, len(getters))
435 self.assertIsInstance(getters[0], query_parser.ExtractField)440 self.assertIsInstance(getters[0], query_parser.ExtractField)
436 self.assertEqual("a", getters[0].field)441 self.assertEqual(["a"], getters[0].field)
437 self.assertIsInstance(getters[1], query_parser.ExtractField)442 self.assertIsInstance(getters[1], query_parser.ExtractField)
438 self.assertEqual("b", getters[1].field)443 self.assertEqual(["b"], getters[1].field)
439444
=== modified file 'u1db/tests/test_sqlite_backend.py'
--- u1db/tests/test_sqlite_backend.py 2012-07-19 19:50:58 +0000
+++ u1db/tests/test_sqlite_backend.py 2012-07-31 14:58:20 +0000
@@ -123,7 +123,7 @@
123 self.db = sqlite_backend.SQLitePartialExpandDatabase(':memory:')123 self.db = sqlite_backend.SQLitePartialExpandDatabase(':memory:')
124 g = self.db._parse_index_definition('fieldname')124 g = self.db._parse_index_definition('fieldname')
125 self.assertIsInstance(g, query_parser.ExtractField)125 self.assertIsInstance(g, query_parser.ExtractField)
126 self.assertEqual('fieldname', g.field)126 self.assertEqual(['fieldname'], g.field)
127127
128 def test__update_indexes(self):128 def test__update_indexes(self):
129 self.db = sqlite_backend.SQLitePartialExpandDatabase(':memory:')129 self.db = sqlite_backend.SQLitePartialExpandDatabase(':memory:')

Subscribers

People subscribed via source and target branches