Merge lp:~allenap/launchpad/early-batching-bulk-methods-bug-509223 into lp:launchpad

Proposed by Gavin Panella
Status: Merged
Approved by: Graham Binns
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~allenap/launchpad/early-batching-bulk-methods-bug-509223
Merge into: lp:launchpad
Diff against target: 221 lines (+166/-2)
3 files modified
lib/lp/bugs/interfaces/bugwatch.py (+31/-0)
lib/lp/bugs/model/bugwatch.py (+29/-1)
lib/lp/bugs/tests/test_bugwatch.py (+106/-1)
To merge this branch: bzr merge lp:~allenap/launchpad/early-batching-bulk-methods-bug-509223
Reviewer Review Type Date Requested Status
Graham Binns (community) code Approve
Review via email: mp+24134@code.launchpad.net

Commit message

New methods for IBugWatchSet - bulkSetStatus() and bulkAddActivity() - which will be used to improve performance in checkwatches.

Description of the change

This branch adds two new methods to BugWatchSet: bulkSetStatus() and bulkAddActivity().

In checkwatches it's often necessary to update many 100s (maybe 1000s) of bug watches at a time, especially when the remote bug tracker is down. Currently checkwatches iterates through the materialized bug watch objects (from Storm), updating their attributes and calling addActivity(), before committing. This is horrendously slow because (a) these bug watch objects may need reloading because they were materialized in a separate transaction and reloads are done one at a time, (b) they will be saved one at a time, and (c) the activity is added one row at a time. For even a modest 100 bug watches this will result in 100 SELECT, 100 UPDATE and 100 INSERT statements. This branch reduces this to 1 UPDATE and 1 INSERT statement.

These methods were conceived with checkwatches in mind, but have not been stitched in yet. Graham is doing some major refactoring in the checkwatches packages so I'm deferring that work until later.

To post a comment you must log in.
Revision history for this message
Graham Binns (gmb) wrote :

<gmb> allenap, I think bulkSetStatus() is a little confusing, because it's called ...SetStatus() but it in fact mutates last_error_type. However, given that we should rename that at some point I'd settle for an XXX to the effect of "yes, this sounds confusing, it isn't"
 (and a bug)
<gmb> allenap, Other than that, the branch looks brilliant. r=me.

review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/bugs/interfaces/bugwatch.py'
--- lib/lp/bugs/interfaces/bugwatch.py 2010-03-23 12:55:05 +0000
+++ lib/lp/bugs/interfaces/bugwatch.py 2010-04-27 08:44:29 +0000
@@ -288,6 +288,37 @@
288 :type bug_watch_ids: An iterable of `int`s, or `None`.288 :type bug_watch_ids: An iterable of `int`s, or `None`.
289 """289 """
290290
291 # XXX: GavinPanella bug=570277 2010-04-26: In bulkSetError() the
292 # last_error_type argument accepts the same values as the result
293 # argument to bulkAddActivity(). Using different terms for
294 # essentially the same thing is confusing.
295
296 def bulkSetError(bug_watches, last_error_type=None):
297 """Efficiently update the status of the given bug watches.
298
299 Sets the `last_error_type` field as instructed, updates
300 `lastchecked` to now and resets `next_check` to None, all in
301 the most efficient way possible.
302
303 :param bug_watches: An iterable of `IBugWatch` objects or
304 primary keys for the same.
305 :param last_error_type: A member of `BugWatchActivityStatus`
306 or None.
307 """
308
309 def bulkAddActivity(bug_watches, result=None, message=None, oops_id=None):
310 """Efficiently add activity for the given bug watches.
311
312 Add `BugWatchActivity` records for the given bug watches in
313 the most efficient way possible.
314
315 :param bug_watches: An iterable of `IBugWatch` objects or
316 primary keys for the same.
317 :param result: See `IBugWatch.addActivity`.
318 :param message: See `IBugWatch.addActivity`.
319 :param oops_id: See `IBugWatch.addActivity`.
320 """
321
291322
292class NoBugTrackerFound(Exception):323class NoBugTrackerFound(Exception):
293 """No bug tracker with the base_url is registered in Launchpad."""324 """No bug tracker with the base_url is registered in Launchpad."""
294325
=== modified file 'lib/lp/bugs/model/bugwatch.py'
--- lib/lp/bugs/model/bugwatch.py 2010-04-09 15:04:19 +0000
+++ lib/lp/bugs/model/bugwatch.py 2010-04-27 08:44:29 +0000
@@ -33,7 +33,7 @@
33from canonical.database.constants import UTC_NOW33from canonical.database.constants import UTC_NOW
34from canonical.database.datetimecol import UtcDateTimeCol34from canonical.database.datetimecol import UtcDateTimeCol
35from canonical.database.enumcol import EnumCol35from canonical.database.enumcol import EnumCol
36from canonical.database.sqlbase import SQLBase36from canonical.database.sqlbase import SQLBase, sqlvalues
37from canonical.launchpad.database.message import Message37from canonical.launchpad.database.message import Message
38from canonical.launchpad.helpers import shortlist38from canonical.launchpad.helpers import shortlist
39from canonical.launchpad.interfaces.launchpad import ILaunchpadCelebrities39from canonical.launchpad.interfaces.launchpad import ILaunchpadCelebrities
@@ -641,6 +641,34 @@
641 query = query.find(In(BugWatch.id, bug_watch_ids))641 query = query.find(In(BugWatch.id, bug_watch_ids))
642 return query642 return query
643643
644 def bulkSetError(self, bug_watches, last_error_type=None):
645 """See `IBugWatchSet`."""
646 bug_watch_ids = set(
647 (bug_watch.id if IBugWatch.providedBy(bug_watch) else bug_watch)
648 for bug_watch in bug_watches)
649 bug_watches_in_database = IStore(BugWatch).find(
650 BugWatch, In(BugWatch.id, list(bug_watch_ids)))
651 bug_watches_in_database.set(
652 lastchecked=UTC_NOW,
653 last_error_type=last_error_type,
654 next_check=None)
655
656 def bulkAddActivity(self, bug_watches, result=None, message=None,
657 oops_id=None):
658 """See `IBugWatchSet`."""
659 bug_watch_ids = set(
660 (bug_watch.id if IBugWatch.providedBy(bug_watch) else bug_watch)
661 for bug_watch in bug_watches)
662 insert_activity_statement = (
663 "INSERT INTO BugWatchActivity"
664 " (bug_watch, result, message, oops_id) "
665 "SELECT BugWatch.id, %s, %s, %s FROM BugWatch"
666 " WHERE BugWatch.id IN %s"
667 )
668 IStore(BugWatch).execute(
669 insert_activity_statement % sqlvalues(
670 result, message, oops_id, bug_watch_ids))
671
644672
645class BugWatchActivity(Storm):673class BugWatchActivity(Storm):
646 """See `IBugWatchActivity`."""674 """See `IBugWatchActivity`."""
647675
=== modified file 'lib/lp/bugs/tests/test_bugwatch.py'
--- lib/lp/bugs/tests/test_bugwatch.py 2010-04-13 01:49:42 +0000
+++ lib/lp/bugs/tests/test_bugwatch.py 2010-04-27 08:44:29 +0000
@@ -13,6 +13,8 @@
13from zope.component import getUtility13from zope.component import getUtility
14from zope.security.proxy import removeSecurityProxy14from zope.security.proxy import removeSecurityProxy
1515
16from canonical.database.constants import UTC_NOW
17
16from canonical.launchpad.ftests import login, ANONYMOUS18from canonical.launchpad.ftests import login, ANONYMOUS
17from canonical.launchpad.scripts.garbo import BugWatchActivityPruner19from canonical.launchpad.scripts.garbo import BugWatchActivityPruner
18from canonical.launchpad.scripts.logger import QuietFakeLogger20from canonical.launchpad.scripts.logger import QuietFakeLogger
@@ -23,7 +25,8 @@
23from lp.bugs.interfaces.bugtask import BugTaskImportance, BugTaskStatus25from lp.bugs.interfaces.bugtask import BugTaskImportance, BugTaskStatus
24from lp.bugs.interfaces.bugtracker import BugTrackerType, IBugTrackerSet26from lp.bugs.interfaces.bugtracker import BugTrackerType, IBugTrackerSet
25from lp.bugs.interfaces.bugwatch import (27from lp.bugs.interfaces.bugwatch import (
26 IBugWatchSet, NoBugTrackerFound, UnrecognizedBugTrackerURL)28 BugWatchActivityStatus, IBugWatchSet, NoBugTrackerFound,
29 UnrecognizedBugTrackerURL)
27from lp.bugs.scripts.checkwatches.scheduler import MAX_SAMPLE_SIZE30from lp.bugs.scripts.checkwatches.scheduler import MAX_SAMPLE_SIZE
28from lp.registry.interfaces.person import IPersonSet31from lp.registry.interfaces.person import IPersonSet
2932
@@ -434,6 +437,108 @@
434 bug_watch.id for bug_watch in bug_watches_limited])))437 bug_watch.id for bug_watch in bug_watches_limited])))
435438
436439
440class TestBugWatchSetBulkOperations(TestCaseWithFactory):
441 """Tests for the bugwatch updating system."""
442
443 layer = LaunchpadZopelessLayer
444
445 def setUp(self):
446 super(TestBugWatchSetBulkOperations, self).setUp()
447 self.bug_watches = [
448 self.factory.makeBugWatch(remote_bug='alice'),
449 self.factory.makeBugWatch(remote_bug='bob'),
450 ]
451 for bug_watch in self.bug_watches:
452 bug_watch.lastchecked = None
453 bug_watch.last_error_type = None
454 bug_watch.next_check = UTC_NOW
455
456 def _checkStatusOfBugWatches(
457 self, last_checked_is_null, next_check_is_null, last_error_type):
458 for bug_watch in self.bug_watches:
459 self.failUnlessEqual(
460 last_checked_is_null, bug_watch.lastchecked is None)
461 self.failUnlessEqual(
462 next_check_is_null, bug_watch.next_check is None)
463 self.failUnlessEqual(
464 last_error_type, bug_watch.last_error_type)
465
466 def test_bulkSetError(self):
467 # Called with only bug watches, bulkSetError() marks the
468 # watches as checked without error, and unscheduled.
469 getUtility(IBugWatchSet).bulkSetError(self.bug_watches)
470 self._checkStatusOfBugWatches(False, True, None)
471
472 def test_bulkSetError_with_error(self):
473 # Called with bug watches and an error, bulkSetError() marks
474 # the watches with the given error, and unschedules them.
475 error = BugWatchActivityStatus.BUG_NOT_FOUND
476 getUtility(IBugWatchSet).bulkSetError(self.bug_watches, error)
477 self._checkStatusOfBugWatches(False, True, error)
478
479 def test_bulkSetError_with_id_list(self):
480 # The ids of bug watches to update can be passed in.
481 getUtility(IBugWatchSet).bulkSetError(
482 [bug_watch.id for bug_watch in self.bug_watches])
483 self._checkStatusOfBugWatches(False, True, None)
484
485 def test_bulkSetError_with_mixed_list(self):
486 # The list passed in can contain a mix of bug watches and
487 # their ids.
488 getUtility(IBugWatchSet).bulkSetError(
489 [bug_watch.id for bug_watch in self.bug_watches[::2]] +
490 [bug_watch for bug_watch in self.bug_watches[1::2]])
491 self._checkStatusOfBugWatches(False, True, None)
492
493 def test_bulkSetError_with_iterator(self):
494 # Any iterator can be passed in.
495 getUtility(IBugWatchSet).bulkSetError(
496 (bug_watch for bug_watch in self.bug_watches))
497 self._checkStatusOfBugWatches(False, True, None)
498
499 def _checkActivityForBugWatches(self, result, message, oops_id):
500 for bug_watch in self.bug_watches:
501 latest_activity = bug_watch.activity.first()
502 self.failUnlessEqual(result, latest_activity.result)
503 self.failUnlessEqual(message, latest_activity.message)
504 self.failUnlessEqual(oops_id, latest_activity.oops_id)
505
506 def test_bulkAddActivity(self):
507 # Called with only bug watches, bulkAddActivity() adds
508 # successful activity records for the given bug watches.
509 getUtility(IBugWatchSet).bulkAddActivity(self.bug_watches)
510 self._checkActivityForBugWatches(None, None, None)
511
512 def test_bulkAddActivity_with_error(self):
513 # Called with additional error information, bulkAddActivity()
514 # adds appropriate and identical activity records for each of
515 # the given bug watches.
516 error = BugWatchActivityStatus.PRIVATE_REMOTE_BUG
517 getUtility(IBugWatchSet).bulkAddActivity(
518 self.bug_watches, error, "Forbidden", "OOPS-1234")
519 self._checkActivityForBugWatches(error, "Forbidden", "OOPS-1234")
520
521 def test_bulkAddActivity_with_id_list(self):
522 # The ids of bug watches can be passed in.
523 getUtility(IBugWatchSet).bulkAddActivity(
524 [bug_watch.id for bug_watch in self.bug_watches])
525 self._checkActivityForBugWatches(None, None, None)
526
527 def test_bulkAddActivity_with_mixed_list(self):
528 # The list passed in can contain a mix of bug watches and
529 # their ids.
530 getUtility(IBugWatchSet).bulkAddActivity(
531 [bug_watch.id for bug_watch in self.bug_watches[::2]] +
532 [bug_watch for bug_watch in self.bug_watches[1::2]])
533 self._checkActivityForBugWatches(None, None, None)
534
535 def test_bulkAddActivity_with_iterator(self):
536 # Any iterator can be passed in.
537 getUtility(IBugWatchSet).bulkAddActivity(
538 (bug_watch for bug_watch in self.bug_watches))
539 self._checkActivityForBugWatches(None, None, None)
540
541
437class TestBugWatchBugTasks(TestCaseWithFactory):542class TestBugWatchBugTasks(TestCaseWithFactory):
438543
439 layer = DatabaseFunctionalLayer544 layer = DatabaseFunctionalLayer