Merge lp:~rvb/maas/bulk-load into lp:~maas-committers/maas/trunk

Proposed by Raphaël Badin
Status: Merged
Approved by: Raphaël Badin
Approved revision: no longer in the source branch.
Merged at revision: 1289
Proposed branch: lp:~rvb/maas/bulk-load
Merge into: lp:~maas-committers/maas/trunk
Diff against target: 199 lines (+117/-1)
7 files modified
src/maasserver/api.py (+3/-0)
src/maasserver/models/macaddress.py (+3/-0)
src/maasserver/models/managers.py (+30/-0)
src/maasserver/models/node.py (+3/-1)
src/maasserver/tests/models.py (+12/-0)
src/maasserver/tests/test_api.py (+24/-0)
src/maasserver/tests/test_managers.py (+42/-0)
To merge this branch: bzr merge lp:~rvb/maas/bulk-load
Reviewer Review Type Date Requested Status
Gavin Panella (community) Approve
Review via email: mp+130535@code.launchpad.net

Commit message

Load the mac addresses related to a node from the cache even if .iterator() is used. Preload the macaddresses and the tags related to a list of nodes when fetching the list of nodes via the api.

Description of the change

Load the related mac addresses in bulk. Preload the macaddresses and the tags related to a list of nodes when fetching the list of nodes via the api.

= Notes =

This took some time to figure out a clean way to fix that problem but I think this solution is quite all right and does not interfere too much with the rest of the code.

I tried using [tag.name for tag in self.tags.only('name')] in tag_names (to avoid loading the other fields) but I looks like the cache is not used in this case.

With that change in, a request to http://0.0.0.0:5240/api/1.0/nodes/?op=list with 8000 nodes now takes ~8s. It probably could be improved but I suspect the bottleneck is the json serialization now.

To post a comment you must log in.
Revision history for this message
Gavin Panella (allenap) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/maasserver/api.py'
2--- src/maasserver/api.py 2012-10-19 12:58:42 +0000
3+++ src/maasserver/api.py 2012-10-19 13:57:21 +0000
4@@ -873,6 +873,9 @@
5 request.user, NODE_PERMISSION.VIEW, ids=match_ids)
6 if match_macs is not None:
7 nodes = nodes.filter(macaddress__mac_address__in=match_macs)
8+ # Prefetch related macaddresses and tags.
9+ nodes = nodes.prefetch_related('macaddress_set__node')
10+ nodes = nodes.prefetch_related('tags')
11 return nodes.order_by('id')
12
13 @operation(idempotent=True)
14
15=== modified file 'src/maasserver/models/macaddress.py'
16--- src/maasserver/models/macaddress.py 2012-06-25 12:51:06 +0000
17+++ src/maasserver/models/macaddress.py 2012-10-19 13:57:21 +0000
18@@ -21,6 +21,7 @@
19 from maasserver import DefaultMeta
20 from maasserver.fields import MACAddressField
21 from maasserver.models.cleansave import CleanSave
22+from maasserver.models.managers import BulkManager
23 from maasserver.models.timestampedmodel import TimestampedModel
24
25
26@@ -38,6 +39,8 @@
27 mac_address = MACAddressField(unique=True)
28 node = ForeignKey('Node', editable=False)
29
30+ objects = BulkManager()
31+
32 class Meta(DefaultMeta):
33 verbose_name = "MAC address"
34 verbose_name_plural = "MAC addresses"
35
36=== added file 'src/maasserver/models/managers.py'
37--- src/maasserver/models/managers.py 1970-01-01 00:00:00 +0000
38+++ src/maasserver/models/managers.py 2012-10-19 13:57:21 +0000
39@@ -0,0 +1,30 @@
40+# Copyright 2012 Canonical Ltd. This software is licensed under the
41+# GNU Affero General Public License version 3 (see the file LICENSE).
42+
43+"""Custom MAAS manager classes."""
44+
45+from __future__ import (
46+ absolute_import,
47+ print_function,
48+ unicode_literals,
49+ )
50+
51+__metaclass__ = type
52+__all__ = [
53+ 'BulkManager',
54+ ]
55+
56+
57+from django.db.models import Manager
58+
59+
60+class BulkManager(Manager):
61+ """A Manager which loads objects from the cache if it's populated.
62+
63+ Even when iterator() is explicitely called (which happens in piston when
64+ related collections are fetched), this manager will fetch objects in bulk
65+ if the cache is populated (i.e. if prefetch_related was used).
66+ """
67+
68+ def iterator(self):
69+ return self.all()
70
71=== modified file 'src/maasserver/models/node.py'
72--- src/maasserver/models/node.py 2012-10-15 08:01:19 +0000
73+++ src/maasserver/models/node.py 2012-10-19 13:57:21 +0000
74@@ -492,7 +492,9 @@
75 return self.system_id
76
77 def tag_names(self):
78- return self.tags.values_list('name', flat=True)
79+ # We don't use self.tags.values_list here because this does not
80+ # take advantage of the cache.
81+ return [tag.name for tag in self.tags.all()]
82
83 def clean_status(self):
84 """Check a node's status transition against the node-status FSM."""
85
86=== modified file 'src/maasserver/tests/models.py'
87--- src/maasserver/tests/models.py 2012-09-18 14:37:28 +0000
88+++ src/maasserver/tests/models.py 2012-10-19 13:57:21 +0000
89@@ -17,12 +17,14 @@
90
91 from django.db.models import (
92 CharField,
93+ ForeignKey,
94 Model,
95 )
96 from maasserver.fields import (
97 JSONObjectField,
98 XMLField,
99 )
100+from maasserver.models.managers import BulkManager
101 from maasserver.models.timestampedmodel import TimestampedModel
102
103
104@@ -53,3 +55,13 @@
105 class FieldChangeTestModel(Model):
106 name1 = CharField(max_length=255, unique=False)
107 name2 = CharField(max_length=255, unique=False)
108+
109+
110+class BulkManagerParentTestModel(Model):
111+ pass
112+
113+
114+class BulkManagerTestModel(Model):
115+ parent = ForeignKey('BulkManagerParentTestModel', editable=False)
116+
117+ objects = BulkManager()
118
119=== modified file 'src/maasserver/tests/test_api.py'
120--- src/maasserver/tests/test_api.py 2012-10-19 12:58:42 +0000
121+++ src/maasserver/tests/test_api.py 2012-10-19 13:57:21 +0000
122@@ -1657,6 +1657,30 @@
123 [node1.system_id, node2.system_id],
124 extract_system_ids(parsed_result))
125
126+ def create_nodes(self, nodegroup, nb):
127+ [factory.make_node(nodegroup=nodegroup, mac=True)
128+ for i in range(nb)]
129+
130+ def test_GET_list_nodes_issues_constant_number_of_queries(self):
131+ nodegroup = factory.make_node_group()
132+ self.create_nodes(nodegroup, 10)
133+ num_queries1, response1 = self.getNumQueries(
134+ self.client.get, self.get_uri('nodes/'), {'op': 'list'})
135+ self.create_nodes(nodegroup, 10)
136+ num_queries2, response2 = self.getNumQueries(
137+ self.client.get, self.get_uri('nodes/'), {'op': 'list'})
138+ # Make sure the responses are ok as it's not useful to compare the
139+ # number of queries if they are not.
140+ self.assertEqual(
141+ [httplib.OK, httplib.OK, 10, 20],
142+ [
143+ response1.status_code,
144+ response2.status_code,
145+ len(extract_system_ids(json.loads(response1.content))),
146+ len(extract_system_ids(json.loads(response2.content))),
147+ ])
148+ self.assertEqual(num_queries1, num_queries2)
149+
150 def test_GET_list_without_nodes_returns_empty_list(self):
151 # If there are no nodes to list, the "list" op still works but
152 # returns an empty list.
153
154=== added file 'src/maasserver/tests/test_managers.py'
155--- src/maasserver/tests/test_managers.py 1970-01-01 00:00:00 +0000
156+++ src/maasserver/tests/test_managers.py 2012-10-19 13:57:21 +0000
157@@ -0,0 +1,42 @@
158+# Copyright 2012 Canonical Ltd. This software is licensed under the
159+# GNU Affero General Public License version 3 (see the file LICENSE).
160+
161+"""Test maasserver model managers."""
162+
163+from __future__ import (
164+ absolute_import,
165+ print_function,
166+ unicode_literals,
167+ )
168+
169+__metaclass__ = type
170+__all__ = []
171+
172+from maasserver.testing.testcase import TestModelTestCase
173+from maasserver.tests.models import (
174+ BulkManagerParentTestModel, BulkManagerTestModel)
175+
176+
177+class BulkManagerTest(TestModelTestCase):
178+
179+ app = 'maasserver.tests'
180+
181+ def test_manager_iterator_uses_cache(self):
182+ parents = set()
183+ for i in range(3):
184+ parents.add(BulkManagerParentTestModel.objects.create())
185+ for i in range(10):
186+ for parent in parents:
187+ BulkManagerTestModel.objects.create(parent=parent)
188+ parents = BulkManagerParentTestModel.objects.all().prefetch_related(
189+ 'bulkmanagertestmodel_set')
190+ # Only two queries are used to fetch all the objects:
191+ # One to fetch the parents, one to fetch the childrens (the query from
192+ # the prefetch_related statement).
193+ # Even if we call iterator() on the related objects, the cache is
194+ # used because BulkManagerTestModel has a manager based on
195+ # BulkManager.
196+ self.assertNumQueries(
197+ 2,
198+ lambda: [list(parent.bulkmanagertestmodel_set.iterator())
199+ for parent in parents])