Merge lp:~thisfred/u1db/iterate-over-list-of-dicts into lp:u1db

Proposed by Eric Casteleijn
Status: Merged
Approved by: Eric Casteleijn
Approved revision: 372
Merged at revision: 366
Proposed branch: lp:~thisfred/u1db/iterate-over-list-of-dicts
Merge into: lp:u1db
Diff against target: 349 lines (+130/-69)
5 files modified
src/u1db_query.c (+74/-46)
u1db/query_parser.py (+25/-17)
u1db/tests/test_backends.py (+20/-0)
u1db/tests/test_query_parser.py (+10/-5)
u1db/tests/test_sqlite_backend.py (+1/-1)
To merge this branch: bzr merge lp:~thisfred/u1db/iterate-over-list-of-dicts
Reviewer Review Type Date Requested Status
Samuele Pedroni Approve
Review via email: mp+117120@code.launchpad.net

Commit message

Added support for indexing dictionaries in lists, so this becomes possible:

        doc = self.db.create_doc_from_json(
            '{"foo": [{"zap": [{"qux": "fnord"}, {"qux": "zombo"}]}]}')
        self.db.create_index('test-idx', 'foo.zap.qux')
        self.assertEqual([doc], self.db.get_from_index('test-idx', 'fnord'))
        self.assertEqual([doc], self.db.get_from_index('test-idx', 'zombo'))

Description of the change

Added support for indexing dictionaries in lists, so this becomes possible:

        doc = self.db.create_doc_from_json(
            '{"foo": [{"zap": [{"qux": "fnord"}, {"qux": "zombo"}]}]}')
        self.db.create_index('test-idx', 'foo.zap.qux')
        self.assertEqual([doc], self.db.get_from_index('test-idx', 'fnord'))
        self.assertEqual([doc], self.db.get_from_index('test-idx', 'zombo'))

To post a comment you must log in.
Revision history for this message
Samuele Pedroni (pedronis) wrote :

 + subfields = self.field.split('.')

I would move this to the constructor

255 + return extract_field(raw_doc, subfields)

+def extract_field(raw_doc, subfields):
206 + if not isinstance(raw_doc, dict):
207 + return []
208 + val = raw_doc.get(subfields.pop(0))

I would use a index argument and avoid the pop (especially pop(0)) and copy

216 + subfields = []
this line is not needed

same for these:
223 + if val is None:
224 + return []

line 209-210 cover that

368. By Eric Casteleijn

split in __init__

369. By Eric Casteleijn

split in __init__

370. By Eric Casteleijn

don't mutate list

Revision history for this message
Eric Casteleijn (thisfred) wrote :

Fixes pushed

371. By Eric Casteleijn

don't copy list around

372. By Eric Casteleijn

don't copy list around

Revision history for this message
Samuele Pedroni (pedronis) wrote :

+1

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/u1db_query.c'
2--- src/u1db_query.c 2012-07-27 15:17:04 +0000
3+++ src/u1db_query.c 2012-07-31 14:58:20 +0000
4@@ -252,60 +252,84 @@
5 op_combine, "combine", json_type_string, -1, JUST_EXPRESSION};
6
7 static int
8-extract_field_values(json_object *obj, const string_list *field_path,
9- int value_type, string_list *values)
10+extract_value(json_object *val, int value_type, string_list *values)
11 {
12- string_list_item *item = NULL;
13+ int status = U1DB_OK;
14+ int i, integer_value, boolean_value, length;
15 char string_value[MAX_INT_STR_LEN];
16- struct array_list *list_val = NULL;
17- json_object *val = NULL;
18- int i, integer_value, boolean_value;
19- int status = U1DB_OK;
20- val = obj;
21- if (val == NULL)
22- goto finish;
23- for (item = field_path->head; item != NULL; item = item->next)
24- {
25- val = json_object_object_get(val, item->data);
26- if (val == NULL)
27- goto finish;
28- }
29 if (json_object_is_type(val, json_type_string) && value_type ==
30 json_type_string) {
31- if ((status = append(values, json_object_get_string(val))) != U1DB_OK)
32- goto finish;
33- } else if (json_object_is_type(val, json_type_int) && value_type ==
34+ status = append(values, json_object_get_string(val));
35+ goto finish;
36+ }
37+ if (json_object_is_type(val, json_type_int) && value_type ==
38 json_type_int) {
39 integer_value = json_object_get_int(val);
40 snprintf(string_value, MAX_INT_STR_LEN, "%d", integer_value);
41- if ((status = append(values, string_value)) != U1DB_OK)
42- goto finish;
43- } else if (json_object_is_type(val, json_type_boolean) &&
44- value_type == json_type_boolean) {
45+ status = append(values, string_value);
46+ goto finish;
47+ }
48+ if (json_object_is_type(val, json_type_boolean) &&
49+ value_type == json_type_boolean) {
50 boolean_value = json_object_get_boolean(val);
51 if (boolean_value) {
52 status = append(values, "1");
53- if (status != U1DB_OK)
54- goto finish;
55 } else {
56 status = append(values, "0");
57+ }
58+ goto finish;
59+ }
60+ if (json_object_is_type(val, json_type_array)) {
61+ length = json_object_array_length(val);
62+ for (i = 0; i < length; i++) {
63+ status = extract_value(
64+ json_object_array_get_idx(val, i), value_type, values);
65 if (status != U1DB_OK)
66 goto finish;
67 }
68- } else if (json_object_is_type(val, json_type_array)) {
69- // TODO: recursively check the types
70- list_val = json_object_get_array(val);
71- for (i = 0; i < list_val->length; i++)
72- {
73- if ((status = append(values, json_object_get_string(
74- array_list_get_idx(
75- list_val, i)))) != U1DB_OK)
76- goto finish;
77- }
78- }
79-finish:
80- return status;
81-}
82+ }
83+finish:
84+ return status;
85+}
86+
87+static int
88+extract_field_values(json_object *obj, const string_list_item *field,
89+ int value_type, string_list *values)
90+{
91+ json_object *val = NULL;
92+ json_object *array_item = NULL;
93+ int i, length;
94+ int status = U1DB_OK;
95+
96+ if (!json_object_is_type(obj, json_type_object)) {
97+ goto finish;
98+ }
99+ val = json_object_object_get(obj, field->data);
100+ if (val == NULL)
101+ goto finish;
102+ if (field->next != NULL) {
103+ if (json_object_is_type(val, json_type_array)) {
104+ length = json_object_array_length(val);
105+ for (i = 0; i < length; i++) {
106+ array_item = json_object_array_get_idx(val, i);
107+ status = extract_field_values(
108+ array_item, field->next, value_type, values);
109+ if (status != U1DB_OK)
110+ goto finish;
111+ }
112+ goto finish;
113+ }
114+ if (json_object_is_type(val, json_type_object)) {
115+ status = extract_field_values(val, field->next, value_type, values);
116+ goto finish;
117+ }
118+ goto finish;
119+ }
120+ status = extract_value(val, value_type, values);
121+finish:
122+ return status;
123+}
124+
125
126 static int
127 split(string_list *result, char *string, char splitter)
128@@ -351,7 +375,7 @@
129 status = ((op_function)tree->op)(tree, obj, values);
130 } else {
131 status = extract_field_values(
132- obj, tree->field_path, json_type_string, values);
133+ obj, tree->field_path->head, json_type_string, values);
134 }
135 return status;
136 }
137@@ -411,7 +435,7 @@
138 if (status != U1DB_OK)
139 return status;
140 status = extract_field_values(
141- obj, node->field_path, json_type_int, values);
142+ obj, node->field_path->head, json_type_int, values);
143 if (status != U1DB_OK)
144 goto finish;
145 node = node->next_sibling;
146@@ -539,7 +563,7 @@
147 //booleans by extract_field_values.
148
149 status = extract_field_values(
150- obj, tree->first_child->field_path, json_type_boolean, values);
151+ obj, tree->first_child->field_path->head, json_type_boolean, values);
152 if (status != U1DB_OK)
153 goto finish;
154 for (item = values->head; item != NULL; item = item->next)
155@@ -1274,7 +1298,7 @@
156 char *sep = NULL;
157 parse_tree *node = NULL;
158 int status = U1DB_OK;
159- int i;
160+ int i, array_size;
161
162 sep = get_token(tokens);
163 if (sep == NULL || strcmp(sep, "(") != 0) {
164@@ -1321,8 +1345,9 @@
165 }
166 }
167 i = 0;
168+ array_size = sizeof(result->value_types) / sizeof(int);
169 for (node = result->first_child; node != NULL; node = node->next_sibling) {
170- node->arg_type = result->value_types[i % result->number_of_children];
171+ node->arg_type = result->value_types[i % array_size];
172 if (node->arg_type == EXPRESSION) {
173 status = to_getter(node);
174 if (status != U1DB_OK)
175@@ -1538,7 +1563,8 @@
176 if (ctx->obj == NULL || !json_object_is_type(ctx->obj, json_type_object)) {
177 return U1DB_INVALID_JSON;
178 }
179- if ((status = init_list(&values)) != U1DB_OK)
180+ status = init_list(&values);
181+ if (status != U1DB_OK)
182 goto finish;
183 status = get_values(tree, ctx->obj, values);
184 if (status != U1DB_OK)
185@@ -1550,8 +1576,10 @@
186 goto finish;
187 }
188 finish:
189- if (values != NULL)
190+ if (values != NULL) {
191 destroy_list(values);
192+ values = NULL;
193+ }
194 return status;
195 }
196
197
198=== modified file 'u1db/query_parser.py'
199--- u1db/query_parser.py 2012-07-27 14:50:11 +0000
200+++ u1db/query_parser.py 2012-07-31 14:58:20 +0000
201@@ -53,6 +53,29 @@
202 return self.value
203
204
205+def extract_field(raw_doc, subfields, index=0):
206+ if not isinstance(raw_doc, dict):
207+ return []
208+ val = raw_doc.get(subfields[index])
209+ if val is None:
210+ return []
211+ if index < len(subfields) - 1:
212+ if isinstance(val, list):
213+ results = []
214+ for item in val:
215+ results.extend(extract_field(item, subfields, index + 1))
216+ return results
217+ if isinstance(val, dict):
218+ return extract_field(val, subfields, index + 1)
219+ return []
220+ if isinstance(val, dict):
221+ return []
222+ if isinstance(val, list):
223+ # Strip anything in the list that isn't a simple type
224+ return [v for v in val if not isinstance(v, (dict, list))]
225+ return [val]
226+
227+
228 class ExtractField(Getter):
229 """Extract a field from the document."""
230
231@@ -69,25 +92,10 @@
232 :param field: a specifier for the field to return.
233 This is either a field name, or a dotted field name.
234 """
235- self.field = field
236+ self.field = field.split('.')
237
238 def get(self, raw_doc):
239- for subfield in self.field.split('.'):
240- if isinstance(raw_doc, dict):
241- raw_doc = raw_doc.get(subfield)
242- else:
243- return []
244- if isinstance(raw_doc, dict):
245- return []
246- if raw_doc is None:
247- result = []
248- elif isinstance(raw_doc, list):
249- # Strip anything in the list that isn't a simple type
250- result = [val for val in raw_doc
251- if not isinstance(val, (dict, list))]
252- else:
253- result = [raw_doc]
254- return result
255+ return extract_field(raw_doc, self.field)
256
257
258 class Transformation(Getter):
259
260=== modified file 'u1db/tests/test_backends.py'
261--- u1db/tests/test_backends.py 2012-07-27 15:24:48 +0000
262+++ u1db/tests/test_backends.py 2012-07-31 14:58:20 +0000
263@@ -1470,6 +1470,26 @@
264 self.db.create_index('test-idx', 'sub.foo.bar.baz.qux.fnord')
265 self.assertEqual([], self.db.get_from_index('test-idx', '*'))
266
267+ def test_nested_traverses_lists(self):
268+ # subpath finds dicts in list
269+ doc = self.db.create_doc_from_json(
270+ '{"foo": [{"zap": "bar"}, {"zap": "baz"}]}')
271+ # subpath only finds dicts in list
272+ self.db.create_doc_from_json('{"foo": ["zap", "baz"]}')
273+ self.db.create_index('test-idx', 'foo.zap')
274+ self.assertEqual([doc], self.db.get_from_index('test-idx', 'bar'))
275+ self.assertEqual([doc], self.db.get_from_index('test-idx', 'baz'))
276+
277+ def test_nested_list_traversal(self):
278+ # subpath finds dicts in list
279+ doc = self.db.create_doc_from_json(
280+ '{"foo": [{"zap": [{"qux": "fnord"}, {"qux": "zombo"}]},'
281+ '{"zap": "baz"}]}')
282+ # subpath only finds dicts in list
283+ self.db.create_index('test-idx', 'foo.zap.qux')
284+ self.assertEqual([doc], self.db.get_from_index('test-idx', 'fnord'))
285+ self.assertEqual([doc], self.db.get_from_index('test-idx', 'zombo'))
286+
287 def test_index_list1(self):
288 self.db.create_index("index", "name")
289 content = '{"name": ["foo", "bar"]}'
290
291=== modified file 'u1db/tests/test_query_parser.py'
292--- u1db/tests/test_query_parser.py 2012-07-26 13:35:50 +0000
293+++ u1db/tests/test_query_parser.py 2012-07-31 14:58:20 +0000
294@@ -179,6 +179,11 @@
295 def test_get_value_list_of_dicts(self):
296 self.assertExtractField([], 'foo', {'foo': [{'zap': 'bar'}]})
297
298+ def test_get_value_list_of_dicts2(self):
299+ self.assertExtractField(
300+ ['bar', 'baz'], 'foo.zap',
301+ {'foo': [{'zap': 'bar'}, {'zap': 'baz'}]})
302+
303 def test_get_value_int(self):
304 self.assertExtractField([9], 'foo', {'foo': 9})
305
306@@ -395,12 +400,12 @@
307 def test_parse_field(self):
308 getter = self.parse("a")
309 self.assertIsInstance(getter, query_parser.ExtractField)
310- self.assertEqual("a", getter.field)
311+ self.assertEqual(["a"], getter.field)
312
313 def test_parse_dotted_field(self):
314 getter = self.parse("a.b")
315 self.assertIsInstance(getter, query_parser.ExtractField)
316- self.assertEqual("a.b", getter.field)
317+ self.assertEqual(["a", "b"], getter.field)
318
319 def test_parse_dotted_field_nothing_after_dot(self):
320 self.assertParseError("a.")
321@@ -427,12 +432,12 @@
322 getter = self.parse("lower(a)")
323 self.assertIsInstance(getter, query_parser.Lower)
324 self.assertIsInstance(getter.inner, query_parser.ExtractField)
325- self.assertEqual("a", getter.inner.field)
326+ self.assertEqual(["a"], getter.inner.field)
327
328 def test_parse_all(self):
329 getters = self.parse_all(["a", "b"])
330 self.assertEqual(2, len(getters))
331 self.assertIsInstance(getters[0], query_parser.ExtractField)
332- self.assertEqual("a", getters[0].field)
333+ self.assertEqual(["a"], getters[0].field)
334 self.assertIsInstance(getters[1], query_parser.ExtractField)
335- self.assertEqual("b", getters[1].field)
336+ self.assertEqual(["b"], getters[1].field)
337
338=== modified file 'u1db/tests/test_sqlite_backend.py'
339--- u1db/tests/test_sqlite_backend.py 2012-07-19 19:50:58 +0000
340+++ u1db/tests/test_sqlite_backend.py 2012-07-31 14:58:20 +0000
341@@ -123,7 +123,7 @@
342 self.db = sqlite_backend.SQLitePartialExpandDatabase(':memory:')
343 g = self.db._parse_index_definition('fieldname')
344 self.assertIsInstance(g, query_parser.ExtractField)
345- self.assertEqual('fieldname', g.field)
346+ self.assertEqual(['fieldname'], g.field)
347
348 def test__update_indexes(self):
349 self.db = sqlite_backend.SQLitePartialExpandDatabase(':memory:')

Subscribers

People subscribed via source and target branches