Merge lp:~wgrant/launchpad/bulk-insert into lp:launchpad

Proposed by William Grant
Status: Merged
Approved by: Curtis Hovey
Approved revision: no longer in the source branch.
Merged at revision: 14851
Proposed branch: lp:~wgrant/launchpad/bulk-insert
Merge into: lp:launchpad
Diff against target: 212 lines (+139/-3)
2 files modified
lib/lp/services/database/bulk.py (+74/-2)
lib/lp/services/database/tests/test_bulk.py (+65/-1)
To merge this branch: bzr merge lp:~wgrant/launchpad/bulk-insert
Reviewer Review Type Date Requested Status
Curtis Hovey (community) code Needs Fixing
Review via email: mp+93930@code.launchpad.net

Commit message

[r=sinzui][no-qa] Introduce lp.services.database.bulk.create().

Description of the change

This branch adds lp.services.database.bulk.create(), a helper to insert multiple DB rows in a single statement.

It touches some of Storm's privates in inappropriate ways in order to handle References in a pretty (and typesafe) manner. Reference columns and values have to be flattened into (potentially compound) primary keys, and the methods to do that are private.

To post a comment you must log in.
Revision history for this message
Curtis Hovey (sinzui) wrote :

Change the assert on line 82. It should be explicitly raised if the method has an integrity issue. Is this an AssertionError? I read "The Storm columns to insert values into. Must be from a single class", which implies this is a ValueError. Maybe?
    if len(clses) == 1:
        raise ValueError('The Storm columns to insert values into must be from a single class.')

review: Needs Fixing (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/services/database/bulk.py'
--- lib/lp/services/database/bulk.py 2012-02-20 07:11:22 +0000
+++ lib/lp/services/database/bulk.py 2012-02-21 22:17:21 +0000
@@ -5,6 +5,7 @@
55
6__metaclass__ = type6__metaclass__ = type
7__all__ = [7__all__ = [
8 'create',
8 'load',9 'load',
9 'load_referencing',10 'load_referencing',
10 'load_related',11 'load_related',
@@ -13,14 +14,24 @@
1314
1415
15from collections import defaultdict16from collections import defaultdict
17from itertools import chain
16from functools import partial18from functools import partial
17from operator import attrgetter19from operator import (
20 attrgetter,
21 itemgetter,
22 )
1823
24from storm.databases.postgres import Returning
19from storm.expr import (25from storm.expr import (
20 And,26 And,
27 Insert,
21 Or,28 Or,
22 )29 )
23from storm.info import get_cls_info30from storm.info import (
31 get_cls_info,
32 get_obj_info,
33 )
34from storm.references import Reference
24from storm.store import Store35from storm.store import Store
25from zope.security.proxy import removeSecurityProxy36from zope.security.proxy import removeSecurityProxy
2637
@@ -153,3 +164,64 @@
153 for owning_object in owning_objects:164 for owning_object in owning_objects:
154 keys.update(map(partial(getattr, owning_object), foreign_keys))165 keys.update(map(partial(getattr, owning_object), foreign_keys))
155 return load(object_type, keys)166 return load(object_type, keys)
167
168
169def _dbify_value(col, val):
170 """Convert a value into a form that Storm can compile directly."""
171 if isinstance(col, Reference):
172 # References are mainly meant to be used as descriptors, so we
173 # have to perform a bit of evil here to turn the (potentially
174 # None) value into a sequence of primary key values.
175 if val is None:
176 return (None,) * len(col._relation._get_local_columns(col._cls))
177 else:
178 return col._relation.get_remote_variables(
179 get_obj_info(val).get_obj())
180 else:
181 return (col.variable_factory(value=val),)
182
183
184def _dbify_column(col):
185 """Convert a column into a form that Storm can compile directly."""
186 if isinstance(col, Reference):
187 # References are mainly meant to be used as descriptors, so we
188 # haver to perform a bit of evil here to turn the column into
189 # a sequence of primary key columns.
190 return col._relation._get_local_columns(col._cls)
191 else:
192 return (col,)
193
194
195def create(columns, values):
196 """Create a large number of objects efficiently.
197
198 :param cols: The Storm columns to insert values into. Must be from a
199 single class.
200 :param values: A list of lists of values for the columns.
201 :return: A list of the created objects.
202 """
203 # Flatten Reference faux-columns into their primary keys.
204 db_cols = list(chain.from_iterable(map(_dbify_column, columns)))
205 clses = set(col.cls for col in db_cols)
206 if len(clses) != 1:
207 raise ValueError(
208 "The Storm columns to insert values into must be from a single "
209 "class.")
210 [cls] = clses
211 primary_key = get_cls_info(cls).primary_key
212
213 # Mangle our value list into compilable values. Normal columns just
214 # get passed through the variable factory, while References get
215 # squashed into primary key variables.
216 db_values = [
217 list(chain.from_iterable(
218 _dbify_value(col, val) for col, val in zip(columns, value)))
219 for value in values]
220
221 result = IStore(cls).execute(
222 Returning(Insert(
223 db_cols, expr=db_values, primary_columns=primary_key)))
224 if len(primary_key) == 1:
225 return load(cls, map(itemgetter(0), result))
226 else:
227 return load(cls, result)
156228
=== modified file 'lib/lp/services/database/tests/test_bulk.py'
--- lib/lp/services/database/tests/test_bulk.py 2012-02-20 07:11:22 +0000
+++ lib/lp/services/database/tests/test_bulk.py 2012-02-21 22:17:21 +0000
@@ -5,16 +5,26 @@
55
6__metaclass__ = type6__metaclass__ = type
77
8import datetime
9
10from pytz import UTC
8from storm.exceptions import ClassInfoError11from storm.exceptions import ClassInfoError
9from storm.info import get_obj_info12from storm.info import get_obj_info
10from storm.store import Store13from storm.store import Store
14from testtools.matchers import Equals
11import transaction15import transaction
12from zope.security import (16from zope.security import (
13 checker,17 checker,
14 proxy,18 proxy,
15 )19 )
1620
21from lp.bugs.enum import BugNotificationLevel
17from lp.bugs.model.bug import BugAffectsPerson22from lp.bugs.model.bug import BugAffectsPerson
23from lp.bugs.model.bugsubscription import BugSubscription
24from lp.code.model.branchjob import (
25 BranchJob,
26 BranchJobType,
27 )
18from lp.code.model.branchsubscription import BranchSubscription28from lp.code.model.branchsubscription import BranchSubscription
19from lp.registry.model.person import Person29from lp.registry.model.person import Person
20from lp.services.database import bulk30from lp.services.database import bulk
@@ -27,12 +37,15 @@
27 FeatureFlag,37 FeatureFlag,
28 getFeatureStore,38 getFeatureStore,
29 )39 )
40from lp.services.job.model.job import Job
30from lp.soyuz.model.component import Component41from lp.soyuz.model.component import Component
31from lp.testing import (42from lp.testing import (
43 StormStatementRecorder,
32 TestCase,44 TestCase,
33 TestCaseWithFactory,45 TestCaseWithFactory,
34 )46 )
35from lp.testing.layers import DatabaseFunctionalLayer47from lp.testing.layers import DatabaseFunctionalLayer
48from lp.testing.matchers import HasQueryCount
3649
3750
38object_is_key = lambda thing: thing51object_is_key = lambda thing: thing
@@ -219,9 +232,60 @@
219 self.factory.makeBranch(),232 self.factory.makeBranch(),
220 self.factory.makeBranch(),233 self.factory.makeBranch(),
221 ]234 ]
222 expected = set(list(owned_objects[0].subscriptions) + 235 expected = set(list(owned_objects[0].subscriptions) +
223 list(owned_objects[1].subscriptions))236 list(owned_objects[1].subscriptions))
224 self.assertNotEqual(0, len(expected))237 self.assertNotEqual(0, len(expected))
225 self.assertEqual(expected,238 self.assertEqual(expected,
226 set(bulk.load_referencing(BranchSubscription, owned_objects,239 set(bulk.load_referencing(BranchSubscription, owned_objects,
227 ['branchID'])))240 ['branchID'])))
241
242
243class TestCreate(TestCaseWithFactory):
244
245 layer = DatabaseFunctionalLayer
246
247 def test_references_and_enums(self):
248 # create() correctly compiles plain types, enums and references.
249 bug = self.factory.makeBug()
250 people = [self.factory.makePerson() for i in range(5)]
251
252 wanted = [
253 (bug, person, person, datetime.datetime.now(UTC),
254 BugNotificationLevel.LIFECYCLE)
255 for person in people]
256
257 with StormStatementRecorder() as recorder:
258 subs = bulk.create(
259 (BugSubscription.bug, BugSubscription.person,
260 BugSubscription.subscribed_by, BugSubscription.date_created,
261 BugSubscription.bug_notification_level),
262 wanted)
263
264 self.assertThat(recorder, HasQueryCount(Equals(2)))
265 self.assertContentEqual(
266 wanted,
267 ((sub.bug, sub.person, sub.subscribed_by, sub.date_created,
268 sub.bug_notification_level) for sub in subs))
269
270 def test_null_reference(self):
271 # create() handles None as a Reference value.
272 job = Job()
273 IStore(Job).add(job)
274 wanted = [(None, job, BranchJobType.RECLAIM_BRANCH_SPACE)]
275 [branchjob] = bulk.create(
276 (BranchJob.branch, BranchJob.job, BranchJob.job_type),
277 wanted)
278 self.assertEqual(
279 wanted, [(branchjob.branch, branchjob.job, branchjob.job_type)])
280
281 def test_fails_on_multiple_classes(self):
282 # create() only inserts into columns on a single class.
283 self.assertRaises(
284 ValueError,
285 bulk.create, (BugSubscription.bug, BranchSubscription.branch), [])
286
287 def test_fails_on_reference_mismatch(self):
288 # create() handles Reference columns in a typesafe manner.
289 self.assertRaisesWithContent(
290 RuntimeError, "Property used in an unknown class",
291 bulk.create, (BugSubscription.bug,), [[self.factory.makeBranch()]])