Merge lp:~edwin-grubbs/launchpad/bug-436974-series-timeouts into lp:launchpad

Proposed by Edwin Grubbs
Status: Merged
Approved by: Edwin Grubbs
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~edwin-grubbs/launchpad/bug-436974-series-timeouts
Merge into: lp:launchpad
Diff against target: 217 lines
2 files modified
lib/lp/bugs/model/bugtask.py (+29/-14)
lib/lp/registry/browser/product.py (+57/-59)
To merge this branch: bzr merge lp:~edwin-grubbs/launchpad/bug-436974-series-timeouts
Reviewer Review Type Date Requested Status
Michael Nelson (community) code Approve
Review via email: mp+13397@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Edwin Grubbs (edwin-grubbs) wrote :

Summary
-------

Optimize the $productseries/+series page.

Implementation details
----------------------

Eliminated a query for product release files by making it lazy load,
since that data is not used on the +series page.

Improved the query for BugTask.getStatusCountsForProductSeries(), which
is called for every active series in the product.

Tests
-----

./bin/test -vv -t xx-productseries-series.txt

Demo and Q/A
------------

* Open http://launchpad.dev/bzr/+series
  * This should not timeout on staging or edge.

Revision history for this message
Michael Nelson (michael.nelson) wrote :
Download full text (10.5 KiB)

Hi Edwin,

I've marked this as Needs Fixing because there are a few things that look like oversights - but I might be wrong. Other than that, just a few comments. See below.

Thanks!

> === modified file 'lib/lp/bugs/model/bugtask.py'
> --- lib/lp/bugs/model/bugtask.py 2009-10-01 19:50:54 +0000
> +++ lib/lp/bugs/model/bugtask.py 2009-10-15 09:32:02 +0000
> @@ -2021,22 +2021,37 @@
> """See `IBugTaskSet`."""
> bug_privacy_filter = get_bug_privacy_filter(user)
> if bug_privacy_filter != "":
> - bug_privacy_filter = ' AND ' + bug_privacy_filter
> + bug_privacy_filter = 'AND ' + bug_privacy_filter
> cur = cursor()
> - condition = """
> - (BugTask.productseries = %s
> - OR Milestone.productseries = %s)
> - """ % sqlvalues(product_series, product_series)
> +
> + # The union is actually much faster than a LEFT JOIN with the
> + # Milestone table, since postgres optimizes it to perform index
> + # scans instead of sequential scans on the BugTask table.

Wow - was there any way you were able to verify that locally? That's
really interesting.

> query = """
> - SELECT BugTask.status, count(*)
> - FROM BugTask
> - JOIN Bug ON BugTask.bug = Bug.id
> - LEFT JOIN Milestone ON BugTask.milestone = Milestone.id
> - WHERE
> - %s
> - %s
> - GROUP BY BugTask.status
> - """ % (condition, bug_privacy_filter)
> + SELECT status, count(*)
> + FROM (
> + SELECT BugTask.status
> + FROM BugTask
> + JOIN Bug ON BugTask.bug = Bug.id
> + WHERE
> + BugTask.productseries = %(series)s
> + %(privacy)s
> +
> + UNION ALL
> +
> + SELECT BugTask.status
> + FROM BugTask
> + JOIN Bug ON BugTask.bug = Bug.id
> + JOIN Milestone ON BugTask.milestone = Milestone.id
> + WHERE
> + BugTask.productseries IS NULL
> + AND Milestone.productseries = %(series)s
> + %(privacy)s
> + ) AS subquery
> + GROUP BY status
> + """ % dict(series=quote(product_series),
> + privacy=bug_privacy_filter)
> +
> cur.execute(query)
> return cur.fetchall()
>
>
> === modified file 'lib/lp/registry/browser/product.py'
> --- lib/lp/registry/browser/product.py 2009-10-08 15:54:09 +0000
> +++ lib/lp/registry/browser/product.py 2009-10-15 09:32:02 +0000
> @@ -558,14 +558,28 @@
> def __init__(self, product):
> self.product = product
> self.serieses = []
> - self.series_by_id = {}
> + for series in self.product.serieses:
> + series_with_releases = SeriesWithReleases(series, parent=self)
> + self.serieses.append(series_with_releases)
> + if self.product.development_focus == series:
> + self.development_focus = series_with_releases
>
> - def...

review: Needs Fixing (code)
Revision history for this message
Edwin Grubbs (edwin-grubbs) wrote :
Download full text (17.4 KiB)

Hi Michael,

Thanks for the review. Incremental diff included.

On Thu, Oct 15, 2009 at 5:51 AM, Michael Nelson
<email address hidden> wrote:
> Review: Needs Fixing code
> Hi Edwin,
>
> I've marked this as Needs Fixing because there are a few things that look like oversights - but I might be wrong. Other than that, just a few comments. See below.
>
> Thanks!
>
>> === modified file 'lib/lp/bugs/model/bugtask.py'
>> --- lib/lp/bugs/model/bugtask.py      2009-10-01 19:50:54 +0000
>> +++ lib/lp/bugs/model/bugtask.py      2009-10-15 09:32:02 +0000
>> @@ -2021,22 +2021,37 @@
>>          """See `IBugTaskSet`."""
>>          bug_privacy_filter = get_bug_privacy_filter(user)
>>          if bug_privacy_filter != "":
>> -            bug_privacy_filter = ' AND ' + bug_privacy_filter
>> +            bug_privacy_filter = 'AND ' + bug_privacy_filter
>>          cur = cursor()
>> -        condition = """
>> -            (BugTask.productseries = %s
>> -                 OR Milestone.productseries = %s)
>> -            """ % sqlvalues(product_series, product_series)
>> +
>> +        # The union is actually much faster than a LEFT JOIN with the
>> +        # Milestone table, since postgres optimizes it to perform index
>> +        # scans instead of sequential scans on the BugTask table.
>
> Wow - was there any way you were able to verify that locally? That's
> really interesting.

It actually runs a little bit slower locally, but on staging's db it runs 50
to 170 times as fast. I can show you the output of EXPLAIN ANALYZE if
you are interested.

>>          query = """
>> -            SELECT BugTask.status, count(*)
>> -            FROM BugTask
>> -                JOIN Bug ON BugTask.bug = Bug.id
>> -                LEFT JOIN Milestone ON BugTask.milestone = Milestone.id
>> -            WHERE
>> -                %s
>> -                %s
>> -            GROUP BY BugTask.status
>> -            """ % (condition, bug_privacy_filter)
>> +            SELECT status, count(*)
>> +            FROM (
>> +                SELECT BugTask.status
>> +                FROM BugTask
>> +                    JOIN Bug ON BugTask.bug = Bug.id
>> +                WHERE
>> +                    BugTask.productseries = %(series)s
>> +                    %(privacy)s
>> +
>> +                UNION ALL
>> +
>> +                SELECT BugTask.status
>> +                FROM BugTask
>> +                    JOIN Bug ON BugTask.bug = Bug.id
>> +                    JOIN Milestone ON BugTask.milestone = Milestone.id
>> +                WHERE
>> +                    BugTask.productseries IS NULL
>> +                    AND Milestone.productseries = %(series)s
>> +                    %(privacy)s
>> +                ) AS subquery
>> +            GROUP BY status
>> +            """ % dict(series=quote(product_series),
>> +                       privacy=bug_privacy_filter)
>> +
>>          cur.execute(query)
>>          return cur.fetchall()
>>
>>
>> === modified file 'lib/lp/registry/browser/product.py'
>> --- lib/lp/registry/browser/product.py        2009-10-08 15:54:09 +0000
>> +++ lib/lp/registry/browser/product.py        2009-10-15 09:32:02 +0000
>> @@ -558,14 +558,28 @@
>>      def __...

Revision history for this message
Michael Nelson (michael.nelson) wrote :

> Hi Michael,
>
> Thanks for the review. Incremental diff included.
>

Thanks for making me laugh with your comments :)

Just a typo in your comment below. Great work!

> === modified file 'lib/lp/registry/browser/product.py'
> --- lib/lp/registry/browser/product.py 2009-10-13 02:44:40 +0000
> +++ lib/lp/registry/browser/product.py 2009-10-15 15:38:50 +0000
> @@ -643,22 +640,25 @@
> self.parent = parent
> self._files = None
>
> - @cachedproperty
> + @property
> def files(self):
> + """Cache the release files for all the releases in the product."""
> if self._files is None:
> # Get all of the files for all of the releases. The query
> # returns all releases sorted properly.
> product = self.parent.parent
> - decorated_releases = product.release_by_id.values()
> + release_delegates = product.release_by_id.values()
> files = getUtility(IProductReleaseSet).getFilesForReleases(
> - decorated_releases)
> - for decorated_release in decorated_releases:
> - decorated_release._files = []
> + release_delegates)
> + for release_delegate in release_delegates:
> + release_delegate._files = []
> for file in files:
> id = file.productrelease.id
> - decorated_release = product.release_by_id[id]
> - decorated_release._files.append(file)
> + release_delegate = product.release_by_id[id]
> + release_delegate._files.append(file)
>
> + # self._files was set above, since self is actually in the
> + # decorated_releases variable.

Should be release_delegates now ^^^^ to avoid confusion :)

review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/bugs/model/bugtask.py'
--- lib/lp/bugs/model/bugtask.py 2009-10-01 19:50:54 +0000
+++ lib/lp/bugs/model/bugtask.py 2009-10-16 14:08:13 +0000
@@ -2021,22 +2021,37 @@
2021 """See `IBugTaskSet`."""2021 """See `IBugTaskSet`."""
2022 bug_privacy_filter = get_bug_privacy_filter(user)2022 bug_privacy_filter = get_bug_privacy_filter(user)
2023 if bug_privacy_filter != "":2023 if bug_privacy_filter != "":
2024 bug_privacy_filter = ' AND ' + bug_privacy_filter2024 bug_privacy_filter = 'AND ' + bug_privacy_filter
2025 cur = cursor()2025 cur = cursor()
2026 condition = """2026
2027 (BugTask.productseries = %s2027 # The union is actually much faster than a LEFT JOIN with the
2028 OR Milestone.productseries = %s)2028 # Milestone table, since postgres optimizes it to perform index
2029 """ % sqlvalues(product_series, product_series)2029 # scans instead of sequential scans on the BugTask table.
2030 query = """2030 query = """
2031 SELECT BugTask.status, count(*)2031 SELECT status, count(*)
2032 FROM BugTask2032 FROM (
2033 JOIN Bug ON BugTask.bug = Bug.id2033 SELECT BugTask.status
2034 LEFT JOIN Milestone ON BugTask.milestone = Milestone.id2034 FROM BugTask
2035 WHERE2035 JOIN Bug ON BugTask.bug = Bug.id
2036 %s2036 WHERE
2037 %s2037 BugTask.productseries = %(series)s
2038 GROUP BY BugTask.status2038 %(privacy)s
2039 """ % (condition, bug_privacy_filter)2039
2040 UNION ALL
2041
2042 SELECT BugTask.status
2043 FROM BugTask
2044 JOIN Bug ON BugTask.bug = Bug.id
2045 JOIN Milestone ON BugTask.milestone = Milestone.id
2046 WHERE
2047 BugTask.productseries IS NULL
2048 AND Milestone.productseries = %(series)s
2049 %(privacy)s
2050 ) AS subquery
2051 GROUP BY status
2052 """ % dict(series=quote(product_series),
2053 privacy=bug_privacy_filter)
2054
2040 cur.execute(query)2055 cur.execute(query)
2041 return cur.fetchall()2056 return cur.fetchall()
20422057
20432058
=== modified file 'lib/lp/registry/browser/product.py'
--- lib/lp/registry/browser/product.py 2009-10-08 15:54:09 +0000
+++ lib/lp/registry/browser/product.py 2009-10-16 14:08:13 +0000
@@ -549,8 +549,9 @@
549 cached locally and simply returned.549 cached locally and simply returned.
550 """550 """
551551
552 # These need to be predeclared to avoid delegates taking them552 # `serieses` and `development_focus` need to be declared as class
553 # over.553 # attributes so that this class will not delegate the actual instance
554 # variables to self.product, which would bypass the caching.
554 serieses = None555 serieses = None
555 development_focus = None556 development_focus = None
556 delegates(IProduct, 'product')557 delegates(IProduct, 'product')
@@ -558,17 +559,24 @@
558 def __init__(self, product):559 def __init__(self, product):
559 self.product = product560 self.product = product
560 self.serieses = []561 self.serieses = []
561 self.series_by_id = {}562 for series in self.product.serieses:
562563 series_with_releases = SeriesWithReleases(series, parent=self)
563 def setSeries(self, serieses):564 self.serieses.append(series_with_releases)
564 """Set the serieses to the provided collection."""565 if self.product.development_focus == series:
565 self.serieses = serieses566 self.development_focus = series_with_releases
566 self.series_by_id = dict(567
567 (series.id, series) for series in self.serieses)568 # Get all of the releases for all of the serieses in a single
568569 # query. The query sorts the releases properly so we know the
569 def getSeriesById(self, id):570 # resulting list is sorted correctly.
570 """Look up and return a ProductSeries by id."""571 series_by_id = dict((series.id, series) for series in self.serieses)
571 return self.series_by_id[id]572 self.release_by_id = {}
573 milestones_and_releases = list(
574 self.product.getMilestonesAndReleases())
575 for milestone, release in milestones_and_releases:
576 series = series_by_id[milestone.productseries.id]
577 release_delegate = ReleaseWithFiles(release, parent=series)
578 series.addRelease(release_delegate)
579 self.release_by_id[release.id] = release_delegate
572580
573581
574class SeriesWithReleases:582class SeriesWithReleases:
@@ -579,13 +587,17 @@
579 cached locally and simply returned.587 cached locally and simply returned.
580 """588 """
581589
582 # These need to be predeclared to avoid delegates taking them590 # `parent` and `releases` need to be declared as class attributes so that
583 # over.591 # this class will not delegate the actual instance variables to
592 # self.series, which would bypass the caching for self.releases and would
593 # raise an AttributeError for self.parent.
594 parent = None
584 releases = None595 releases = None
585 delegates(IProductSeries, 'series')596 delegates(IProductSeries, 'series')
586597
587 def __init__(self, series):598 def __init__(self, series, parent):
588 self.series = series599 self.series = series
600 self.parent = parent
589 self.releases = []601 self.releases = []
590602
591 def addRelease(self, release):603 def addRelease(self, release):
@@ -617,17 +629,37 @@
617 cached locally and simply returned.629 cached locally and simply returned.
618 """630 """
619631
620 # These need to be predeclared to avoid delegates taking them632 # `parent` needs to be declared as class attributes so that
621 # over.633 # this class will not delegate the actual instance variables to
622 files = None634 # self.release, which would raise an AttributeError.
635 parent = None
623 delegates(IProductRelease, 'release')636 delegates(IProductRelease, 'release')
624637
625 def __init__(self, release):638 def __init__(self, release, parent):
626 self.release = release639 self.release = release
627 self.files = []640 self.parent = parent
628641 self._files = None
629 def addFile(self, file):642
630 self.files.append(file)643 @property
644 def files(self):
645 """Cache the release files for all the releases in the product."""
646 if self._files is None:
647 # Get all of the files for all of the releases. The query
648 # returns all releases sorted properly.
649 product = self.parent.parent
650 release_delegates = product.release_by_id.values()
651 files = getUtility(IProductReleaseSet).getFilesForReleases(
652 release_delegates)
653 for release_delegate in release_delegates:
654 release_delegate._files = []
655 for file in files:
656 id = file.productrelease.id
657 release_delegate = product.release_by_id[id]
658 release_delegate._files.append(file)
659
660 # self._files was set above, since self is actually in the
661 # release_delegates variable.
662 return self._files
631663
632 @property664 @property
633 def name_with_codename(self):665 def name_with_codename(self):
@@ -653,41 +685,7 @@
653 Decorated classes are created, and they contain cached data685 Decorated classes are created, and they contain cached data
654 obtained with a few queries rather than many iterated queries.686 obtained with a few queries rather than many iterated queries.
655 """687 """
656 # Create the decorated product and set the list of series.688 return ProductWithSeries(self.context)
657 original_product = self.context
658
659 product_with_series = ProductWithSeries(original_product)
660 serieses = []
661 for series in original_product.serieses:
662 series_with_releases = SeriesWithReleases(series)
663 serieses.append(series_with_releases)
664 if original_product.development_focus == series:
665 product_with_series.development_focus = series_with_releases
666
667 product_with_series.setSeries(serieses)
668
669 # Get all of the releases for all of the serieses in a single
670 # query. The query sorts the releases properly so we know the
671 # resulting list is sorted correctly.
672 release_by_id = {}
673 milestones_and_releases = list(
674 original_product.getMilestonesAndReleases())
675 for milestone, release in milestones_and_releases:
676 series = product_with_series.getSeriesById(
677 milestone.productseries.id)
678 decorated_release = ReleaseWithFiles(release)
679 series.addRelease(decorated_release)
680 release_by_id[release.id] = decorated_release
681
682 # Get all of the files for all of the releases. The query
683 # returns all releases sorted properly.
684 releases = [release for milestone, release in milestones_and_releases]
685 files = getUtility(IProductReleaseSet).getFilesForReleases(releases)
686 for file in files:
687 release = release_by_id[file.productrelease.id]
688 release.addFile(file)
689
690 return product_with_series
691689
692 def deleteFiles(self, releases):690 def deleteFiles(self, releases):
693 """Delete the selected files from the set of releases.691 """Delete the selected files from the set of releases.