Merge lp:~allenap/launchpad/early-batching-bulk-methods-bug-509223 into lp:launchpad

Proposed by Gavin Panella
Status: Merged
Approved by: Graham Binns
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~allenap/launchpad/early-batching-bulk-methods-bug-509223
Merge into: lp:launchpad
Diff against target: 221 lines (+166/-2)
3 files modified
lib/lp/bugs/interfaces/bugwatch.py (+31/-0)
lib/lp/bugs/model/bugwatch.py (+29/-1)
lib/lp/bugs/tests/test_bugwatch.py (+106/-1)
To merge this branch: bzr merge lp:~allenap/launchpad/early-batching-bulk-methods-bug-509223
Reviewer Review Type Date Requested Status
Graham Binns (community) code Approve
Review via email: mp+24134@code.launchpad.net

Commit message

New methods for IBugWatchSet - bulkSetStatus() and bulkAddActivity() - which will be used to improve performance in checkwatches.

Description of the change

This branch adds two new methods to BugWatchSet: bulkSetStatus() and bulkAddActivity().

In checkwatches it's often necessary to update many 100s (maybe 1000s) of bug watches at a time, especially when the remote bug tracker is down. Currently checkwatches iterates through the materialized bug watch objects (from Storm), updating their attributes and calling addActivity(), before committing. This is horrendously slow because (a) these bug watch objects may need reloading because they were materialized in a separate transaction and reloads are done one at a time, (b) they will be saved one at a time, and (c) the activity is added one row at a time. For even a modest 100 bug watches this will result in 100 SELECT, 100 UPDATE and 100 INSERT statements. This branch reduces this to 1 UPDATE and 1 INSERT statement.

These methods were conceived with checkwatches in mind, but have not been stitched in yet. Graham is doing some major refactoring in the checkwatches packages so I'm deferring that work until later.

To post a comment you must log in.
Revision history for this message
Graham Binns (gmb) wrote :

<gmb> allenap, I think bulkSetStatus() is a little confusing, because it's called ...SetStatus() but it in fact mutates last_error_type. However, given that we should rename that at some point I'd settle for an XXX to the effect of "yes, this sounds confusing, it isn't"
 (and a bug)
<gmb> allenap, Other than that, the branch looks brilliant. r=me.

review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/bugs/interfaces/bugwatch.py'
2--- lib/lp/bugs/interfaces/bugwatch.py 2010-03-23 12:55:05 +0000
3+++ lib/lp/bugs/interfaces/bugwatch.py 2010-04-27 08:44:29 +0000
4@@ -288,6 +288,37 @@
5 :type bug_watch_ids: An iterable of `int`s, or `None`.
6 """
7
8+ # XXX: GavinPanella bug=570277 2010-04-26: In bulkSetError() the
9+ # last_error_type argument accepts the same values as the result
10+ # argument to bulkAddActivity(). Using different terms for
11+ # essentially the same thing is confusing.
12+
13+ def bulkSetError(bug_watches, last_error_type=None):
14+ """Efficiently update the status of the given bug watches.
15+
16+ Sets the `last_error_type` field as instructed, updates
17+ `lastchecked` to now and resets `next_check` to None, all in
18+ the most efficient way possible.
19+
20+ :param bug_watches: An iterable of `IBugWatch` objects or
21+ primary keys for the same.
22+ :param last_error_type: A member of `BugWatchActivityStatus`
23+ or None.
24+ """
25+
26+ def bulkAddActivity(bug_watches, result=None, message=None, oops_id=None):
27+ """Efficiently add activity for the given bug watches.
28+
29+ Add `BugWatchActivity` records for the given bug watches in
30+ the most efficient way possible.
31+
32+ :param bug_watches: An iterable of `IBugWatch` objects or
33+ primary keys for the same.
34+ :param result: See `IBugWatch.addActivity`.
35+ :param message: See `IBugWatch.addActivity`.
36+ :param oops_id: See `IBugWatch.addActivity`.
37+ """
38+
39
40 class NoBugTrackerFound(Exception):
41 """No bug tracker with the base_url is registered in Launchpad."""
42
43=== modified file 'lib/lp/bugs/model/bugwatch.py'
44--- lib/lp/bugs/model/bugwatch.py 2010-04-09 15:04:19 +0000
45+++ lib/lp/bugs/model/bugwatch.py 2010-04-27 08:44:29 +0000
46@@ -33,7 +33,7 @@
47 from canonical.database.constants import UTC_NOW
48 from canonical.database.datetimecol import UtcDateTimeCol
49 from canonical.database.enumcol import EnumCol
50-from canonical.database.sqlbase import SQLBase
51+from canonical.database.sqlbase import SQLBase, sqlvalues
52 from canonical.launchpad.database.message import Message
53 from canonical.launchpad.helpers import shortlist
54 from canonical.launchpad.interfaces.launchpad import ILaunchpadCelebrities
55@@ -641,6 +641,34 @@
56 query = query.find(In(BugWatch.id, bug_watch_ids))
57 return query
58
59+ def bulkSetError(self, bug_watches, last_error_type=None):
60+ """See `IBugWatchSet`."""
61+ bug_watch_ids = set(
62+ (bug_watch.id if IBugWatch.providedBy(bug_watch) else bug_watch)
63+ for bug_watch in bug_watches)
64+ bug_watches_in_database = IStore(BugWatch).find(
65+ BugWatch, In(BugWatch.id, list(bug_watch_ids)))
66+ bug_watches_in_database.set(
67+ lastchecked=UTC_NOW,
68+ last_error_type=last_error_type,
69+ next_check=None)
70+
71+ def bulkAddActivity(self, bug_watches, result=None, message=None,
72+ oops_id=None):
73+ """See `IBugWatchSet`."""
74+ bug_watch_ids = set(
75+ (bug_watch.id if IBugWatch.providedBy(bug_watch) else bug_watch)
76+ for bug_watch in bug_watches)
77+ insert_activity_statement = (
78+ "INSERT INTO BugWatchActivity"
79+ " (bug_watch, result, message, oops_id) "
80+ "SELECT BugWatch.id, %s, %s, %s FROM BugWatch"
81+ " WHERE BugWatch.id IN %s"
82+ )
83+ IStore(BugWatch).execute(
84+ insert_activity_statement % sqlvalues(
85+ result, message, oops_id, bug_watch_ids))
86+
87
88 class BugWatchActivity(Storm):
89 """See `IBugWatchActivity`."""
90
91=== modified file 'lib/lp/bugs/tests/test_bugwatch.py'
92--- lib/lp/bugs/tests/test_bugwatch.py 2010-04-13 01:49:42 +0000
93+++ lib/lp/bugs/tests/test_bugwatch.py 2010-04-27 08:44:29 +0000
94@@ -13,6 +13,8 @@
95 from zope.component import getUtility
96 from zope.security.proxy import removeSecurityProxy
97
98+from canonical.database.constants import UTC_NOW
99+
100 from canonical.launchpad.ftests import login, ANONYMOUS
101 from canonical.launchpad.scripts.garbo import BugWatchActivityPruner
102 from canonical.launchpad.scripts.logger import QuietFakeLogger
103@@ -23,7 +25,8 @@
104 from lp.bugs.interfaces.bugtask import BugTaskImportance, BugTaskStatus
105 from lp.bugs.interfaces.bugtracker import BugTrackerType, IBugTrackerSet
106 from lp.bugs.interfaces.bugwatch import (
107- IBugWatchSet, NoBugTrackerFound, UnrecognizedBugTrackerURL)
108+ BugWatchActivityStatus, IBugWatchSet, NoBugTrackerFound,
109+ UnrecognizedBugTrackerURL)
110 from lp.bugs.scripts.checkwatches.scheduler import MAX_SAMPLE_SIZE
111 from lp.registry.interfaces.person import IPersonSet
112
113@@ -434,6 +437,108 @@
114 bug_watch.id for bug_watch in bug_watches_limited])))
115
116
117+class TestBugWatchSetBulkOperations(TestCaseWithFactory):
118+ """Tests for the bugwatch updating system."""
119+
120+ layer = LaunchpadZopelessLayer
121+
122+ def setUp(self):
123+ super(TestBugWatchSetBulkOperations, self).setUp()
124+ self.bug_watches = [
125+ self.factory.makeBugWatch(remote_bug='alice'),
126+ self.factory.makeBugWatch(remote_bug='bob'),
127+ ]
128+ for bug_watch in self.bug_watches:
129+ bug_watch.lastchecked = None
130+ bug_watch.last_error_type = None
131+ bug_watch.next_check = UTC_NOW
132+
133+ def _checkStatusOfBugWatches(
134+ self, last_checked_is_null, next_check_is_null, last_error_type):
135+ for bug_watch in self.bug_watches:
136+ self.failUnlessEqual(
137+ last_checked_is_null, bug_watch.lastchecked is None)
138+ self.failUnlessEqual(
139+ next_check_is_null, bug_watch.next_check is None)
140+ self.failUnlessEqual(
141+ last_error_type, bug_watch.last_error_type)
142+
143+ def test_bulkSetError(self):
144+ # Called with only bug watches, bulkSetError() marks the
145+ # watches as checked without error, and unscheduled.
146+ getUtility(IBugWatchSet).bulkSetError(self.bug_watches)
147+ self._checkStatusOfBugWatches(False, True, None)
148+
149+ def test_bulkSetError_with_error(self):
150+ # Called with bug watches and an error, bulkSetError() marks
151+ # the watches with the given error, and unschedules them.
152+ error = BugWatchActivityStatus.BUG_NOT_FOUND
153+ getUtility(IBugWatchSet).bulkSetError(self.bug_watches, error)
154+ self._checkStatusOfBugWatches(False, True, error)
155+
156+ def test_bulkSetError_with_id_list(self):
157+ # The ids of bug watches to update can be passed in.
158+ getUtility(IBugWatchSet).bulkSetError(
159+ [bug_watch.id for bug_watch in self.bug_watches])
160+ self._checkStatusOfBugWatches(False, True, None)
161+
162+ def test_bulkSetError_with_mixed_list(self):
163+ # The list passed in can contain a mix of bug watches and
164+ # their ids.
165+ getUtility(IBugWatchSet).bulkSetError(
166+ [bug_watch.id for bug_watch in self.bug_watches[::2]] +
167+ [bug_watch for bug_watch in self.bug_watches[1::2]])
168+ self._checkStatusOfBugWatches(False, True, None)
169+
170+ def test_bulkSetError_with_iterator(self):
171+ # Any iterator can be passed in.
172+ getUtility(IBugWatchSet).bulkSetError(
173+ (bug_watch for bug_watch in self.bug_watches))
174+ self._checkStatusOfBugWatches(False, True, None)
175+
176+ def _checkActivityForBugWatches(self, result, message, oops_id):
177+ for bug_watch in self.bug_watches:
178+ latest_activity = bug_watch.activity.first()
179+ self.failUnlessEqual(result, latest_activity.result)
180+ self.failUnlessEqual(message, latest_activity.message)
181+ self.failUnlessEqual(oops_id, latest_activity.oops_id)
182+
183+ def test_bulkAddActivity(self):
184+ # Called with only bug watches, bulkAddActivity() adds
185+ # successful activity records for the given bug watches.
186+ getUtility(IBugWatchSet).bulkAddActivity(self.bug_watches)
187+ self._checkActivityForBugWatches(None, None, None)
188+
189+ def test_bulkAddActivity_with_error(self):
190+ # Called with additional error information, bulkAddActivity()
191+ # adds appropriate and identical activity records for each of
192+ # the given bug watches.
193+ error = BugWatchActivityStatus.PRIVATE_REMOTE_BUG
194+ getUtility(IBugWatchSet).bulkAddActivity(
195+ self.bug_watches, error, "Forbidden", "OOPS-1234")
196+ self._checkActivityForBugWatches(error, "Forbidden", "OOPS-1234")
197+
198+ def test_bulkAddActivity_with_id_list(self):
199+ # The ids of bug watches can be passed in.
200+ getUtility(IBugWatchSet).bulkAddActivity(
201+ [bug_watch.id for bug_watch in self.bug_watches])
202+ self._checkActivityForBugWatches(None, None, None)
203+
204+ def test_bulkAddActivity_with_mixed_list(self):
205+ # The list passed in can contain a mix of bug watches and
206+ # their ids.
207+ getUtility(IBugWatchSet).bulkAddActivity(
208+ [bug_watch.id for bug_watch in self.bug_watches[::2]] +
209+ [bug_watch for bug_watch in self.bug_watches[1::2]])
210+ self._checkActivityForBugWatches(None, None, None)
211+
212+ def test_bulkAddActivity_with_iterator(self):
213+ # Any iterator can be passed in.
214+ getUtility(IBugWatchSet).bulkAddActivity(
215+ (bug_watch for bug_watch in self.bug_watches))
216+ self._checkActivityForBugWatches(None, None, None)
217+
218+
219 class TestBugWatchBugTasks(TestCaseWithFactory):
220
221 layer = DatabaseFunctionalLayer