Merge lp:~lifeless/launchpad/ppr-move into lp:launchpad

Proposed by Robert Collins
Status: Merged
Approved by: Robert Collins
Approved revision: no longer in the source branch.
Merged at revision: 15780
Proposed branch: lp:~lifeless/launchpad/ppr-move
Merge into: lp:launchpad
Diff against target: 1955 lines (+0/-1931)
5 files modified
lib/lp/scripts/utilities/pageperformancereport.py (+0/-1236)
lib/lp/scripts/utilities/tests/test_pageperformancereport.py (+0/-483)
utilities/page-performance-report-daily.sh (+0/-115)
utilities/page-performance-report.ini (+0/-79)
utilities/page-performance-report.py (+0/-18)
To merge this branch: bzr merge lp:~lifeless/launchpad/ppr-move
Reviewer Review Type Date Requested Status
Robert Collins (community) Approve
Review via email: mp+118873@code.launchpad.net

Commit message

PPR is now in lp-dev-utils, remove it from LP so we don't have two copies.

Description of the change

PPR is now in lp-dev-utils, remove it from LP so we don't have two copies.

To post a comment you must log in.
Revision history for this message
Robert Collins (lifeless) wrote :

Self review, nothing to see here. We might want to bring the ini file back I suppose, but lp-dev-utils is still LP focused so for now, keep it all together.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== removed file 'lib/lp/scripts/utilities/pageperformancereport.py'
2--- lib/lp/scripts/utilities/pageperformancereport.py 2012-06-29 08:40:05 +0000
3+++ lib/lp/scripts/utilities/pageperformancereport.py 1970-01-01 00:00:00 +0000
4@@ -1,1236 +0,0 @@
5-# Copyright 2010 Canonical Ltd. This software is licensed under the
6-# GNU Affero General Public License version 3 (see the file LICENSE).
7-
8-"""Page performance report generated from zserver trace logs."""
9-
10-__metaclass__ = type
11-__all__ = ['main']
12-
13-import bz2
14-from cgi import escape as html_quote
15-from ConfigParser import RawConfigParser
16-import copy
17-import cPickle
18-import csv
19-from datetime import datetime
20-import gzip
21-import math
22-import os.path
23-import re
24-import textwrap
25-from textwrap import dedent
26-import time
27-
28-import simplejson as json
29-import sre_constants
30-import zc.zservertracelog.tracereport
31-
32-from lp.scripts.helpers import LPOptionParser
33-from lp.services.config import config
34-from lp.services.scripts.logger import log
35-
36-
37-class Request(zc.zservertracelog.tracereport.Request):
38- url = None
39- pageid = None
40- ticks = None
41- sql_statements = None
42- sql_seconds = None
43-
44- # Override the broken version in our superclass that always
45- # returns an integer.
46- @property
47- def app_seconds(self):
48- interval = self.app_time - self.start_app_time
49- return interval.seconds + interval.microseconds / 1000000.0
50-
51- # Override the broken version in our superclass that always
52- # returns an integer.
53- @property
54- def total_seconds(self):
55- interval = self.end - self.start
56- return interval.seconds + interval.microseconds / 1000000.0
57-
58-
59-class Category:
60- """A Category in our report.
61-
62- Requests belong to a Category if the URL matches a regular expression.
63- """
64-
65- def __init__(self, title, regexp):
66- self.title = title
67- self.regexp = regexp
68- self._compiled_regexp = re.compile(regexp, re.I | re.X)
69- self.partition = False
70-
71- def match(self, request):
72- """Return true when the request match this category."""
73- return self._compiled_regexp.search(request.url) is not None
74-
75- def __cmp__(self, other):
76- return cmp(self.title.lower(), other.title.lower())
77-
78- def __deepcopy__(self, memo):
79- # We provide __deepcopy__ because the module doesn't handle
80- # compiled regular expression by default.
81- return Category(self.title, self.regexp)
82-
83-
84-class OnlineStatsCalculator:
85- """Object that can compute count, sum, mean, variance and median.
86-
87- It computes these value incrementally and using minimal storage
88- using the Welford / Knuth algorithm described at
89- http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
90- """
91-
92- def __init__(self):
93- self.count = 0
94- self.sum = 0
95- self.M2 = 0.0 # Sum of square difference
96- self.mean = 0.0
97-
98- def update(self, x):
99- """Incrementally update the stats when adding x to the set.
100-
101- None values are ignored.
102- """
103- if x is None:
104- return
105- self.count += 1
106- self.sum += x
107- delta = x - self.mean
108- self.mean = float(self.sum)/self.count
109- self.M2 += delta*(x - self.mean)
110-
111- @property
112- def variance(self):
113- """Return the population variance."""
114- if self.count == 0:
115- return 0
116- else:
117- return self.M2/self.count
118-
119- @property
120- def std(self):
121- """Return the standard deviation."""
122- if self.count == 0:
123- return 0
124- else:
125- return math.sqrt(self.variance)
126-
127- def __add__(self, other):
128- """Adds this and another OnlineStatsCalculator.
129-
130- The result combines the stats of the two objects.
131- """
132- results = OnlineStatsCalculator()
133- results.count = self.count + other.count
134- results.sum = self.sum + other.sum
135- if self.count > 0 and other.count > 0:
136- # This is 2.1b in Chan, Tony F.; Golub, Gene H.; LeVeque,
137- # Randall J. (1979), "Updating Formulae and a Pairwise Algorithm
138- # for Computing Sample Variances.",
139- # Technical Report STAN-CS-79-773,
140- # Department of Computer Science, Stanford University,
141- # ftp://reports.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf .
142- results.M2 = self.M2 + other.M2 + (
143- (float(self.count) / (other.count * results.count)) *
144- ((float(other.count) / self.count) * self.sum - other.sum)**2)
145- else:
146- results.M2 = self.M2 + other.M2 # One of them is 0.
147- if results.count > 0:
148- results.mean = float(results.sum) / results.count
149- return results
150-
151-
152-class OnlineApproximateMedian:
153- """Approximate the median of a set of elements.
154-
155- This implements a space-efficient algorithm which only sees each value
156- once. (It will hold in memory log bucket_size of n elements.)
157-
158- It was described and analysed in
159- D. Cantone and M.Hofri,
160- "Analysis of An Approximate Median Selection Algorithm"
161- ftp://ftp.cs.wpi.edu/pub/techreports/pdf/06-17.pdf
162-
163- This algorithm is similar to Tukey's median of medians technique.
164- It will compute the median among bucket_size values. And the median among
165- those.
166- """
167-
168- def __init__(self, bucket_size=9):
169- """Creates a new estimator.
170-
171- It approximates the median by finding the median among each
172- successive bucket_size element. And then using these medians for other
173- rounds of selection.
174-
175- The bucket size should be a low odd-integer.
176- """
177- self.bucket_size = bucket_size
178- # Index of the median in a completed bucket.
179- self.median_idx = (bucket_size-1)//2
180- self.buckets = []
181-
182- def update(self, x, order=0):
183- """Update with x."""
184- if x is None:
185- return
186-
187- i = order
188- while True:
189- # Create bucket on demand.
190- if i >= len(self.buckets):
191- for n in range((i+1)-len(self.buckets)):
192- self.buckets.append([])
193- bucket = self.buckets[i]
194- bucket.append(x)
195- if len(bucket) == self.bucket_size:
196- # Select the median in this bucket, and promote it.
197- x = sorted(bucket)[self.median_idx]
198- # Free the bucket for the next round.
199- del bucket[:]
200- i += 1
201- continue
202- else:
203- break
204-
205- @property
206- def median(self):
207- """Return the median."""
208- # Find the 'weighted' median by assigning a weight to each
209- # element proportional to how far they have been selected.
210- candidates = []
211- total_weight = 0
212- for i, bucket in enumerate(self.buckets):
213- weight = self.bucket_size ** i
214- for x in bucket:
215- total_weight += weight
216- candidates.append([x, weight])
217- if len(candidates) == 0:
218- return 0
219-
220- # Each weight is the equivalent of having the candidates appear
221- # that number of times in the array.
222- # So buckets like [[1, 2], [2, 3], [4, 2]] would be expanded to
223- # [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4,
224- # 4, 4, 4, 4, 4] and we find the median of that list (2).
225- # We don't expand the items to conserve memory.
226- median = (total_weight-1) / 2
227- weighted_idx = 0
228- for x, weight in sorted(candidates):
229- weighted_idx += weight
230- if weighted_idx > median:
231- return x
232-
233- def __add__(self, other):
234- """Merge two approximators together.
235-
236- All candidates from the other are merged through the standard
237- algorithm, starting at the same level. So an item that went through
238- two rounds of selection, will be compared with other items having
239- gone through the same number of rounds.
240- """
241- results = OnlineApproximateMedian(self.bucket_size)
242- results.buckets = copy.deepcopy(self.buckets)
243- for i, bucket in enumerate(other.buckets):
244- for x in bucket:
245- results.update(x, i)
246- return results
247-
248-
249-class Stats:
250- """Bag to hold and compute request statistics.
251-
252- All times are in seconds.
253- """
254- total_hits = 0 # Total hits.
255-
256- total_time = 0 # Total time spent rendering.
257- mean = 0 # Mean time per hit.
258- median = 0 # Median time per hit.
259- std = 0 # Standard deviation per hit.
260- histogram = None # # Request times histogram.
261-
262- total_sqltime = 0 # Total time spent waiting for SQL to process.
263- mean_sqltime = 0 # Mean time spend waiting for SQL to process.
264- median_sqltime = 0 # Median time spend waiting for SQL to process.
265- std_sqltime = 0 # Standard deviation of SQL time.
266-
267- total_sqlstatements = 0 # Total number of SQL statements issued.
268- mean_sqlstatements = 0
269- median_sqlstatements = 0
270- std_sqlstatements = 0
271-
272- @property
273- def ninetyninth_percentile_time(self):
274- """Time under which 99% of requests are rendered.
275-
276- This is estimated as 3 std deviations from the mean. Given that
277- in a daily report, many URLs or PageIds won't have 100 requests, it's
278- more useful to use this estimator.
279- """
280- return self.mean + 3*self.std
281-
282- @property
283- def ninetyninth_percentile_sqltime(self):
284- """SQL time under which 99% of requests are rendered.
285-
286- This is estimated as 3 std deviations from the mean.
287- """
288- return self.mean_sqltime + 3*self.std_sqltime
289-
290- @property
291- def ninetyninth_percentile_sqlstatements(self):
292- """Number of SQL statements under which 99% of requests are rendered.
293-
294- This is estimated as 3 std deviations from the mean.
295- """
296- return self.mean_sqlstatements + 3*self.std_sqlstatements
297-
298- def text(self):
299- """Return a textual version of the stats."""
300- return textwrap.dedent("""
301- <Stats for %d requests:
302- Time: total=%.2f; mean=%.2f; median=%.2f; std=%.2f
303- SQL time: total=%.2f; mean=%.2f; median=%.2f; std=%.2f
304- SQL stmt: total=%.f; mean=%.2f; median=%.f; std=%.2f
305- >""" % (
306- self.total_hits, self.total_time, self.mean, self.median,
307- self.std, self.total_sqltime, self.mean_sqltime,
308- self.median_sqltime, self.std_sqltime,
309- self.total_sqlstatements, self.mean_sqlstatements,
310- self.median_sqlstatements, self.std_sqlstatements))
311-
312-
313-class OnlineStats(Stats):
314- """Implementation of stats that can be computed online.
315-
316- You call update() for each request and the stats are updated incrementally
317- with minimum storage space.
318- """
319-
320- def __init__(self, histogram_width, histogram_resolution):
321- self.time_stats = OnlineStatsCalculator()
322- self.time_median_approximate = OnlineApproximateMedian()
323- self.sql_time_stats = OnlineStatsCalculator()
324- self.sql_time_median_approximate = OnlineApproximateMedian()
325- self.sql_statements_stats = OnlineStatsCalculator()
326- self.sql_statements_median_approximate = OnlineApproximateMedian()
327- self.histogram = Histogram(histogram_width, histogram_resolution)
328-
329- @property
330- def total_hits(self):
331- return self.time_stats.count
332-
333- @property
334- def total_time(self):
335- return self.time_stats.sum
336-
337- @property
338- def mean(self):
339- return self.time_stats.mean
340-
341- @property
342- def median(self):
343- return self.time_median_approximate.median
344-
345- @property
346- def std(self):
347- return self.time_stats.std
348-
349- @property
350- def total_sqltime(self):
351- return self.sql_time_stats.sum
352-
353- @property
354- def mean_sqltime(self):
355- return self.sql_time_stats.mean
356-
357- @property
358- def median_sqltime(self):
359- return self.sql_time_median_approximate.median
360-
361- @property
362- def std_sqltime(self):
363- return self.sql_time_stats.std
364-
365- @property
366- def total_sqlstatements(self):
367- return self.sql_statements_stats.sum
368-
369- @property
370- def mean_sqlstatements(self):
371- return self.sql_statements_stats.mean
372-
373- @property
374- def median_sqlstatements(self):
375- return self.sql_statements_median_approximate.median
376-
377- @property
378- def std_sqlstatements(self):
379- return self.sql_statements_stats.std
380-
381- def update(self, request):
382- """Update the stats based on request."""
383- self.time_stats.update(request.app_seconds)
384- self.time_median_approximate.update(request.app_seconds)
385- self.sql_time_stats.update(request.sql_seconds)
386- self.sql_time_median_approximate.update(request.sql_seconds)
387- self.sql_statements_stats.update(request.sql_statements)
388- self.sql_statements_median_approximate.update(request.sql_statements)
389- self.histogram.update(request.app_seconds)
390-
391- def __add__(self, other):
392- """Merge another OnlineStats with this one."""
393- results = copy.deepcopy(self)
394- results.time_stats += other.time_stats
395- results.time_median_approximate += other.time_median_approximate
396- results.sql_time_stats += other.sql_time_stats
397- results.sql_time_median_approximate += (
398- other.sql_time_median_approximate)
399- results.sql_statements_stats += other.sql_statements_stats
400- results.sql_statements_median_approximate += (
401- other.sql_statements_median_approximate)
402- results.histogram = self.histogram + other.histogram
403- return results
404-
405-
406-class Histogram:
407- """A simple object to compute histogram of a value."""
408-
409- @staticmethod
410- def from_bins_data(data):
411- """Create an histogram from existing bins data."""
412- assert data[0][0] == 0, "First bin should start at zero."
413-
414- hist = Histogram(len(data), data[1][0])
415- for idx, bin in enumerate(data):
416- hist.count += bin[1]
417- hist.bins[idx][1] = bin[1]
418-
419- return hist
420-
421- def __init__(self, bins_count, bins_size):
422- """Create a new histogram.
423-
424- The histogram will count the frequency of values in bins_count bins
425- of bins_size each.
426- """
427- self.count = 0
428- self.bins_count = bins_count
429- self.bins_size = bins_size
430- self.bins = []
431- for x in range(bins_count):
432- self.bins.append([x*bins_size, 0])
433-
434- @property
435- def bins_relative(self):
436- """Return the bins with the frequency expressed as a ratio."""
437- return [[x, float(f)/self.count] for x, f in self.bins]
438-
439- def update(self, value):
440- """Update the histogram for this value.
441-
442- All values higher than the last bin minimum are counted in that last
443- bin.
444- """
445- self.count += 1
446- idx = int(min(self.bins_count-1, value / self.bins_size))
447- self.bins[idx][1] += 1
448-
449- def __repr__(self):
450- """A string representation of this histogram."""
451- return "<Histogram %s>" % self.bins
452-
453- def __eq__(self, other):
454- """Two histogram are equals if they have the same bins content."""
455- if not isinstance(other, Histogram):
456- return False
457-
458- if self.bins_count != other.bins_count:
459- return False
460-
461- if self.bins_size != other.bins_size:
462- return False
463-
464- for idx, other_bin in enumerate(other.bins):
465- if self.bins[idx][1] != other_bin[1]:
466- return False
467-
468- return True
469-
470- def __add__(self, other):
471- """Add the frequency of the other histogram to this one.
472-
473- The resulting histogram has the same bins_size than this one.
474- If the other one has a bigger bins_size, we'll assume an even
475- distribution and distribute the frequency across the smaller bins. If
476- it has a lower bin_size, we'll aggregate its bins into the larger
477- ones. We only support different bins_size if the ratio can be
478- expressed as the ratio between 1 and an integer.
479-
480- The resulting histogram is as wide as the widest one.
481- """
482- ratio = float(other.bins_size) / self.bins_size
483- bins_count = max(self.bins_count, math.ceil(other.bins_count * ratio))
484- total = Histogram(int(bins_count), self.bins_size)
485- total.count = self.count + other.count
486-
487- # Copy our bins into the total
488- for idx, bin in enumerate(self.bins):
489- total.bins[idx][1] = bin[1]
490-
491- assert int(ratio) == ratio or int(1/ratio) == 1/ratio, (
492- "We only support different bins size when the ratio is an "
493- "integer to 1: "
494- % ratio)
495-
496- if ratio >= 1:
497- # We distribute the frequency across the bins.
498- # For example. if the ratio is 3:1, we'll add a third
499- # of the lower resolution bin to 3 of the higher one.
500- for other_idx, bin in enumerate(other.bins):
501- f = bin[1] / ratio
502- start = int(math.floor(other_idx * ratio))
503- end = int(start + ratio)
504- for idx in range(start, end):
505- total.bins[idx][1] += f
506- else:
507- # We need to collect the higher resolution bins into the
508- # corresponding lower one.
509- for other_idx, bin in enumerate(other.bins):
510- idx = int(other_idx * ratio)
511- total.bins[idx][1] += bin[1]
512-
513- return total
514-
515-
516-class RequestTimes:
517- """Collect statistics from requests.
518-
519- Statistics are updated by calling the add_request() method.
520-
521- Statistics for mean/stddev/total/median for request times, SQL times and
522- number of SQL statements are collected.
523-
524- They are grouped by Category, URL or PageID.
525- """
526-
527- def __init__(self, categories, options):
528- self.by_pageids = options.pageids
529- self.top_urls = options.top_urls
530- # We only keep in memory 50 times the number of URLs we want to
531- # return. The number of URLs can go pretty high (because of the
532- # distinct query parameters).
533- #
534- # Keeping all in memory at once is prohibitive. On a small but
535- # representative sample, keeping 50 times the possible number of
536- # candidates and culling to 90% on overflow, generated an identical
537- # report than keeping all the candidates in-memory.
538- #
539- # Keeping 10 times or culling at 90% generated a near-identical report
540- # (it differed a little in the tail.)
541- #
542- # The size/cull parameters might need to change if the requests
543- # distribution become very different than what it currently is.
544- self.top_urls_cache_size = self.top_urls * 50
545-
546- # Histogram has a bin per resolution up to our timeout
547- #(and an extra bin).
548- self.histogram_resolution = float(options.resolution)
549- self.histogram_width = int(
550- options.timeout / self.histogram_resolution) + 1
551- self.category_times = [
552- (category, OnlineStats(
553- self.histogram_width, self.histogram_resolution))
554- for category in categories]
555- self.url_times = {}
556- self.pageid_times = {}
557-
558- def add_request(self, request):
559- """Add request to the set of requests we collect stats for."""
560- matched = []
561- for category, stats in self.category_times:
562- if category.match(request):
563- stats.update(request)
564- if category.partition:
565- matched.append(category.title)
566-
567- if len(matched) > 1:
568- log.warning(
569- "Multiple partition categories matched by %s (%s)",
570- request.url, ", ".join(matched))
571- elif not matched:
572- log.warning("%s isn't part of the partition", request.url)
573-
574- if self.by_pageids:
575- pageid = request.pageid or 'Unknown'
576- stats = self.pageid_times.setdefault(
577- pageid, OnlineStats(
578- self.histogram_width, self.histogram_resolution))
579- stats.update(request)
580-
581- if self.top_urls:
582- stats = self.url_times.setdefault(
583- request.url, OnlineStats(
584- self.histogram_width, self.histogram_resolution))
585- stats.update(request)
586- # Whenever we have more URLs than we need to, discard 10%
587- # that is less likely to end up in the top.
588- if len(self.url_times) > self.top_urls_cache_size:
589- cutoff = int(self.top_urls_cache_size*0.90)
590- self.url_times = dict(
591- sorted(self.url_times.items(),
592- key=lambda (url, stats): stats.total_time,
593- reverse=True)[:cutoff])
594-
595- def get_category_times(self):
596- """Return the times for each category."""
597- return self.category_times
598-
599- def get_top_urls_times(self):
600- """Return the times for the Top URL by total time"""
601- # Sort the result by total time
602- return sorted(
603- self.url_times.items(),
604- key=lambda (url, stats): stats.total_time,
605- reverse=True)[:self.top_urls]
606-
607- def get_pageid_times(self):
608- """Return the times for the pageids."""
609- # Sort the result by pageid
610- return sorted(self.pageid_times.items())
611-
612- def __add__(self, other):
613- """Merge two RequestTimes together."""
614- results = copy.deepcopy(self)
615- for other_category, other_stats in other.category_times:
616- for i, (category, stats) in enumerate(self.category_times):
617- if category.title == other_category.title:
618- results.category_times[i] = (
619- category, stats + other_stats)
620- break
621- else:
622- results.category_times.append(
623- (other_category, copy.deepcopy(other_stats)))
624-
625- url_times = results.url_times
626- for url, stats in other.url_times.items():
627- if url in url_times:
628- url_times[url] += stats
629- else:
630- url_times[url] = copy.deepcopy(stats)
631- # Only keep top_urls_cache_size entries.
632- if len(self.url_times) > self.top_urls_cache_size:
633- self.url_times = dict(
634- sorted(
635- url_times.items(),
636- key=lambda (url, stats): stats.total_time,
637- reverse=True)[:self.top_urls_cache_size])
638-
639- pageid_times = results.pageid_times
640- for pageid, stats in other.pageid_times.items():
641- if pageid in pageid_times:
642- pageid_times[pageid] += stats
643- else:
644- pageid_times[pageid] = copy.deepcopy(stats)
645-
646- return results
647-
648-
649-def main():
650- parser = LPOptionParser("%prog [args] tracelog [...]")
651-
652- parser.add_option(
653- "-c", "--config", dest="config",
654- default=os.path.join(
655- config.root, "utilities", "page-performance-report.ini"),
656- metavar="FILE", help="Load configuration from FILE")
657- parser.add_option(
658- "--from", dest="from_ts", type="datetime",
659- default=None, metavar="TIMESTAMP",
660- help="Ignore log entries before TIMESTAMP")
661- parser.add_option(
662- "--until", dest="until_ts", type="datetime",
663- default=None, metavar="TIMESTAMP",
664- help="Ignore log entries after TIMESTAMP")
665- parser.add_option(
666- "--no-partition", dest="partition",
667- action="store_false", default=True,
668- help="Do not produce partition report")
669- parser.add_option(
670- "--no-categories", dest="categories",
671- action="store_false", default=True,
672- help="Do not produce categories report")
673- parser.add_option(
674- "--no-pageids", dest="pageids",
675- action="store_false", default=True,
676- help="Do not produce pageids report")
677- parser.add_option(
678- "--top-urls", dest="top_urls", type=int, metavar="N",
679- default=50, help="Generate report for top N urls by hitcount.")
680- parser.add_option(
681- "--directory", dest="directory",
682- default=os.getcwd(), metavar="DIR",
683- help="Output reports in DIR directory")
684- parser.add_option(
685- "--timeout", dest="timeout",
686- # Default to 9: our production timeout.
687- default=9, type="int", metavar="SECONDS",
688- help="The configured timeout value: used to determine high risk " +
689- "page ids. That would be pages which 99% under render time is "
690- "greater than timeoout - 2s. Default is %defaults.")
691- parser.add_option(
692- "--histogram-resolution", dest="resolution",
693- # Default to 0.5s
694- default=0.5, type="float", metavar="SECONDS",
695- help="The resolution of the histogram bin width. Detault to "
696- "%defaults.")
697- parser.add_option(
698- "--merge", dest="merge",
699- default=False, action='store_true',
700- help="Files are interpreted as pickled stats and are aggregated " +
701- "for the report.")
702-
703- options, args = parser.parse_args()
704-
705- if not os.path.isdir(options.directory):
706- parser.error("Directory %s does not exist" % options.directory)
707-
708- if len(args) == 0:
709- parser.error("At least one zserver tracelog file must be provided")
710-
711- if options.from_ts is not None and options.until_ts is not None:
712- if options.from_ts > options.until_ts:
713- parser.error(
714- "--from timestamp %s is before --until timestamp %s"
715- % (options.from_ts, options.until_ts))
716- if options.from_ts is not None or options.until_ts is not None:
717- if options.merge:
718- parser.error('--from and --until cannot be used with --merge')
719-
720- for filename in args:
721- if not os.path.exists(filename):
722- parser.error("Tracelog file %s not found." % filename)
723-
724- if not os.path.exists(options.config):
725- parser.error("Config file %s not found." % options.config)
726-
727- # Need a better config mechanism as ConfigParser doesn't preserve order.
728- script_config = RawConfigParser()
729- script_config.optionxform = str # Make keys case sensitive.
730- script_config.readfp(open(options.config))
731-
732- categories = [] # A list of Category, in report order.
733- for option in script_config.options('categories'):
734- regexp = script_config.get('categories', option)
735- try:
736- categories.append(Category(option, regexp))
737- except sre_constants.error as x:
738- log.fatal("Unable to compile regexp %r (%s)" % (regexp, x))
739- return 1
740- categories.sort()
741-
742- if len(categories) == 0:
743- parser.error("No data in [categories] section of configuration.")
744-
745- # Determine the categories making a partition of the requests
746- for option in script_config.options('partition'):
747- for category in categories:
748- if category.title == option:
749- category.partition = True
750- break
751- else:
752- log.warning(
753- "In partition definition: %s isn't a defined category",
754- option)
755-
756- times = RequestTimes(categories, options)
757-
758- if options.merge:
759- for filename in args:
760- log.info('Merging %s...' % filename)
761- f = bz2.BZ2File(filename, 'r')
762- times += cPickle.load(f)
763- f.close()
764- else:
765- parse(args, times, options)
766-
767- category_times = times.get_category_times()
768-
769- pageid_times = []
770- url_times= []
771- if options.top_urls:
772- url_times = times.get_top_urls_times()
773- if options.pageids:
774- pageid_times = times.get_pageid_times()
775-
776- def _report_filename(filename):
777- return os.path.join(options.directory, filename)
778-
779- # Partition report
780- if options.partition:
781- report_filename = _report_filename('partition.html')
782- log.info("Generating %s", report_filename)
783- partition_times = [
784- category_time
785- for category_time in category_times
786- if category_time[0].partition]
787- html_report(
788- open(report_filename, 'w'), partition_times, None, None,
789- histogram_resolution=options.resolution,
790- category_name='Partition')
791-
792- # Category only report.
793- if options.categories:
794- report_filename = _report_filename('categories.html')
795- log.info("Generating %s", report_filename)
796- html_report(
797- open(report_filename, 'w'), category_times, None, None,
798- histogram_resolution=options.resolution)
799-
800- # Pageid only report.
801- if options.pageids:
802- report_filename = _report_filename('pageids.html')
803- log.info("Generating %s", report_filename)
804- html_report(
805- open(report_filename, 'w'), None, pageid_times, None,
806- histogram_resolution=options.resolution)
807-
808- # Top URL only report.
809- if options.top_urls:
810- report_filename = _report_filename('top%d.html' % options.top_urls)
811- log.info("Generating %s", report_filename)
812- html_report(
813- open(report_filename, 'w'), None, None, url_times,
814- histogram_resolution=options.resolution)
815-
816- # Combined report.
817- if options.categories and options.pageids:
818- report_filename = _report_filename('combined.html')
819- html_report(
820- open(report_filename, 'w'),
821- category_times, pageid_times, url_times,
822- histogram_resolution=options.resolution)
823-
824- # Report of likely timeout candidates
825- report_filename = _report_filename('timeout-candidates.html')
826- log.info("Generating %s", report_filename)
827- html_report(
828- open(report_filename, 'w'), None, pageid_times, None,
829- options.timeout - 2,
830- histogram_resolution=options.resolution)
831-
832- # Save the times cache for later merging.
833- report_filename = _report_filename('stats.pck.bz2')
834- log.info("Saving times database in %s", report_filename)
835- stats_file = bz2.BZ2File(report_filename, 'w')
836- cPickle.dump(times, stats_file, protocol=cPickle.HIGHEST_PROTOCOL)
837- stats_file.close()
838-
839- # Output metrics for selected categories.
840- report_filename = _report_filename('metrics.dat')
841- log.info('Saving category_metrics %s', report_filename)
842- metrics_file = open(report_filename, 'w')
843- writer = csv.writer(metrics_file, delimiter=':')
844- date = options.until_ts or options.from_ts or datetime.utcnow()
845- date = time.mktime(date.timetuple())
846-
847- for option in script_config.options('metrics'):
848- name = script_config.get('metrics', option)
849- for category, stats in category_times:
850- if category.title == name:
851- writer.writerows([
852- ("%s_99" % option, "%f@%d" % (
853- stats.ninetyninth_percentile_time, date)),
854- ("%s_hits" % option, "%d@%d" % (stats.total_hits, date))])
855- break
856- else:
857- log.warning("Can't find category %s for metric %s" % (
858- option, name))
859- metrics_file.close()
860-
861- return 0
862-
863-
864-def smart_open(filename, mode='r'):
865- """Open a file, transparently handling compressed files.
866-
867- Compressed files are detected by file extension.
868- """
869- ext = os.path.splitext(filename)[1]
870- if ext == '.bz2':
871- return bz2.BZ2File(filename, 'r')
872- elif ext == '.gz':
873- return gzip.GzipFile(filename, 'r')
874- else:
875- return open(filename, mode)
876-
877-
878-class MalformedLine(Exception):
879- """A malformed line was found in the trace log."""
880-
881-
882-_ts_re = re.compile(
883- '^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?:.(\d{6}))?$')
884-
885-
886-def parse_timestamp(ts_string):
887- match = _ts_re.search(ts_string)
888- if match is None:
889- raise ValueError("Invalid timestamp")
890- return datetime(
891- *(int(elem) for elem in match.groups() if elem is not None))
892-
893-
894-def parse(tracefiles, times, options):
895- requests = {}
896- total_requests = 0
897- for tracefile in tracefiles:
898- log.info('Processing %s', tracefile)
899- for line in smart_open(tracefile):
900- line = line.rstrip()
901- try:
902- record = line.split(' ', 7)
903- try:
904- record_type, request_id, date, time_ = record[:4]
905- except ValueError:
906- raise MalformedLine()
907-
908- if record_type == 'S':
909- # Short circuit - we don't care about these entries.
910- continue
911-
912- # Parse the timestamp.
913- ts_string = '%s %s' % (date, time_)
914- try:
915- dt = parse_timestamp(ts_string)
916- except ValueError:
917- raise MalformedLine(
918- 'Invalid timestamp %s' % repr(ts_string))
919-
920- # Filter entries by command line date range.
921- if options.from_ts is not None and dt < options.from_ts:
922- continue # Skip to next line.
923- if options.until_ts is not None and dt > options.until_ts:
924- break # Skip to next log file.
925-
926- args = record[4:]
927-
928- def require_args(count):
929- if len(args) < count:
930- raise MalformedLine()
931-
932- if record_type == 'B': # Request begins.
933- require_args(2)
934- requests[request_id] = Request(dt, args[0], args[1])
935- continue
936-
937- request = requests.get(request_id, None)
938- if request is None: # Just ignore partial records.
939- continue
940-
941- # Old stype extension record from Launchpad. Just
942- # contains the URL.
943- if (record_type == '-' and len(args) == 1
944- and args[0].startswith('http')):
945- request.url = args[0]
946-
947- # New style extension record with a prefix.
948- elif record_type == '-':
949- # Launchpad outputs several things as tracelog
950- # extension records. We include a prefix to tell
951- # them apart.
952- require_args(1)
953-
954- parse_extension_record(request, args)
955-
956- elif record_type == 'I': # Got request input.
957- require_args(1)
958- request.I(dt, args[0])
959-
960- elif record_type == 'C': # Entered application thread.
961- request.C(dt)
962-
963- elif record_type == 'A': # Application done.
964- require_args(2)
965- request.A(dt, args[0], args[1])
966-
967- elif record_type == 'E': # Request done.
968- del requests[request_id]
969- request.E(dt)
970- total_requests += 1
971- if total_requests % 10000 == 0:
972- log.debug("Parsed %d requests", total_requests)
973-
974- # Add the request to any matching categories.
975- times.add_request(request)
976- else:
977- raise MalformedLine('Unknown record type %s', record_type)
978- except MalformedLine as x:
979- log.error(
980- "Malformed line %s (%s)" % (repr(line), x))
981-
982-
983-def parse_extension_record(request, args):
984- """Decode a ZServer extension records and annotate request."""
985- prefix = args[0]
986-
987- if prefix == 'u':
988- request.url = ' '.join(args[1:]) or None
989- elif prefix == 'p':
990- request.pageid = ' '.join(args[1:]) or None
991- elif prefix == 't':
992- if len(args) != 4:
993- raise MalformedLine("Wrong number of arguments %s" % (args,))
994- request.sql_statements = int(args[2])
995- request.sql_seconds = float(args[3]) / 1000
996- else:
997- raise MalformedLine(
998- "Unknown extension prefix %s" % prefix)
999-
1000-
1001-def html_report(
1002- outf, category_times, pageid_times, url_times,
1003- ninetyninth_percentile_threshold=None, histogram_resolution=0.5,
1004- category_name='Category'):
1005- """Write an html report to outf.
1006-
1007- :param outf: A file object to write the report to.
1008- :param category_times: The time statistics for categories.
1009- :param pageid_times: The time statistics for pageids.
1010- :param url_times: The time statistics for the top XXX urls.
1011- :param ninetyninth_percentile_threshold: Lower threshold for inclusion of
1012- pages in the pageid section; pages where 99 percent of the requests are
1013- served under this threshold will not be included.
1014- :param histogram_resolution: used as the histogram bar width
1015- :param category_name: The name to use for category report. Defaults to
1016- 'Category'.
1017- """
1018-
1019- print >> outf, dedent('''\
1020- <!DOCTYPE html>
1021- <html>
1022- <head>
1023- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
1024- <title>Launchpad Page Performance Report %(date)s</title>
1025- <script language="javascript" type="text/javascript"
1026- src="https://devpad.canonical.com/~lpqateam/ppr/js/flot/jquery.min.js"
1027- ></script>
1028- <script language="javascript" type="text/javascript"
1029- src="https://devpad.canonical.com/~lpqateam/ppr/js/jquery.appear-1.1.1.min.js"
1030- ></script>
1031- <script language="javascript" type="text/javascript"
1032- src="https://devpad.canonical.com/~lpqateam/ppr/js/flot/jquery.flot.min.js"
1033- ></script>
1034- <script language="javascript" type="text/javascript"
1035- src="https://devpad.canonical.com/~lpqateam/ppr/js/sorttable.js"></script>
1036- <style type="text/css">
1037- h3 { font-weight: normal; font-size: 1em; }
1038- thead th { padding-left: 1em; padding-right: 1em; }
1039- .category-title { text-align: right; padding-right: 2em;
1040- max-width: 25em; }
1041- .regexp { font-size: x-small; font-weight: normal; }
1042- .mean { text-align: right; padding-right: 1em; }
1043- .median { text-align: right; padding-right: 1em; }
1044- .standard-deviation { text-align: right; padding-right: 1em; }
1045- .histogram { padding: 0.5em 1em; width:400px; height:250px; }
1046- .odd-row { background-color: #eeeeff; }
1047- .even-row { background-color: #ffffee; }
1048- table.sortable thead {
1049- background-color:#eee;
1050- color:#666666;
1051- font-weight: bold;
1052- cursor: default;
1053- }
1054- td.numeric {
1055- font-family: monospace;
1056- text-align: right;
1057- padding: 1em;
1058- }
1059- .clickable { cursor: hand; }
1060- .total-hits, .histogram, .median-sqltime,
1061- .median-sqlstatements { border-right: 1px dashed #000000; }
1062- </style>
1063- </head>
1064- <body>
1065- <h1>Launchpad Page Performance Report</h1>
1066- <h3>%(date)s</h3>
1067- ''' % {'date': time.ctime()})
1068-
1069- table_header = dedent('''\
1070- <table class="sortable page-performance-report">
1071- <caption align="top">Click on column headings to sort.</caption>
1072- <thead>
1073- <tr>
1074- <th class="clickable">Name</th>
1075-
1076- <th class="clickable">Total Hits</th>
1077-
1078- <th class="clickable">99% Under Time (secs)</th>
1079-
1080- <th class="clickable">Mean Time (secs)</th>
1081- <th class="clickable">Time Standard Deviation</th>
1082- <th class="clickable">Median Time (secs)</th>
1083- <th class="sorttable_nosort">Time Distribution</th>
1084-
1085- <th class="clickable">99% Under SQL Time (secs)</th>
1086- <th class="clickable">Mean SQL Time (secs)</th>
1087- <th class="clickable">SQL Time Standard Deviation</th>
1088- <th class="clickable">Median SQL Time (secs)</th>
1089-
1090- <th class="clickable">99% Under SQL Statements</th>
1091- <th class="clickable">Mean SQL Statements</th>
1092- <th class="clickable">SQL Statement Standard Deviation</th>
1093- <th class="clickable">Median SQL Statements</th>
1094-
1095- <th class="clickable">Hits * 99% Under SQL Statement</th>
1096- </tr>
1097- </thead>
1098- <tbody>
1099- ''')
1100- table_footer = "</tbody></table>"
1101-
1102- # Store our generated histograms to output Javascript later.
1103- histograms = []
1104-
1105- def handle_times(html_title, stats):
1106- histograms.append(stats.histogram)
1107- print >> outf, dedent("""\
1108- <tr>
1109- <th class="category-title">%s</th>
1110- <td class="numeric total-hits">%d</td>
1111- <td class="numeric 99pc-under-time">%.2f</td>
1112- <td class="numeric mean-time">%.2f</td>
1113- <td class="numeric std-time">%.2f</td>
1114- <td class="numeric median-time">%.2f</td>
1115- <td>
1116- <div class="histogram" id="histogram%d"></div>
1117- </td>
1118- <td class="numeric 99pc-under-sqltime">%.2f</td>
1119- <td class="numeric mean-sqltime">%.2f</td>
1120- <td class="numeric std-sqltime">%.2f</td>
1121- <td class="numeric median-sqltime">%.2f</td>
1122-
1123- <td class="numeric 99pc-under-sqlstatement">%.f</td>
1124- <td class="numeric mean-sqlstatements">%.2f</td>
1125- <td class="numeric std-sqlstatements">%.2f</td>
1126- <td class="numeric median-sqlstatements">%.2f</td>
1127-
1128- <td class="numeric high-db-usage">%.f</td>
1129- </tr>
1130- """ % (
1131- html_title,
1132- stats.total_hits, stats.ninetyninth_percentile_time,
1133- stats.mean, stats.std, stats.median,
1134- len(histograms) - 1,
1135- stats.ninetyninth_percentile_sqltime, stats.mean_sqltime,
1136- stats.std_sqltime, stats.median_sqltime,
1137- stats.ninetyninth_percentile_sqlstatements,
1138- stats.mean_sqlstatements,
1139- stats.std_sqlstatements, stats.median_sqlstatements,
1140- stats.ninetyninth_percentile_sqlstatements* stats.total_hits,
1141- ))
1142-
1143- # Table of contents
1144- print >> outf, '<ol>'
1145- if category_times:
1146- print >> outf, '<li><a href="#catrep">%s Report</a></li>' % (
1147- category_name)
1148- if pageid_times:
1149- print >> outf, '<li><a href="#pageidrep">Pageid Report</a></li>'
1150- if url_times:
1151- print >> outf, '<li><a href="#topurlrep">Top URL Report</a></li>'
1152- print >> outf, '</ol>'
1153-
1154- if category_times:
1155- print >> outf, '<h2 id="catrep">%s Report</h2>' % (
1156- category_name)
1157- print >> outf, table_header
1158- for category, times in category_times:
1159- html_title = '%s<br/><span class="regexp">%s</span>' % (
1160- html_quote(category.title), html_quote(category.regexp))
1161- handle_times(html_title, times)
1162- print >> outf, table_footer
1163-
1164- if pageid_times:
1165- print >> outf, '<h2 id="pageidrep">Pageid Report</h2>'
1166- print >> outf, table_header
1167- for pageid, times in pageid_times:
1168- if (ninetyninth_percentile_threshold is not None and
1169- (times.ninetyninth_percentile_time <
1170- ninetyninth_percentile_threshold)):
1171- continue
1172- handle_times(html_quote(pageid), times)
1173- print >> outf, table_footer
1174-
1175- if url_times:
1176- print >> outf, '<h2 id="topurlrep">Top URL Report</h2>'
1177- print >> outf, table_header
1178- for url, times in url_times:
1179- handle_times(html_quote(url), times)
1180- print >> outf, table_footer
1181-
1182- # Ourput the javascript to render our histograms nicely, replacing
1183- # the placeholder <div> tags output earlier.
1184- print >> outf, dedent("""\
1185- <script language="javascript" type="text/javascript">
1186- $(function () {
1187- var options = {
1188- series: {
1189- bars: {show: true, barWidth: %s}
1190- },
1191- xaxis: {
1192- tickFormatter: function (val, axis) {
1193- return val.toFixed(axis.tickDecimals) + "s";
1194- }
1195- },
1196- yaxis: {
1197- min: 0,
1198- max: 1,
1199- transform: function (v) {
1200- return Math.pow(Math.log(v*100+1)/Math.LN2, 0.5);
1201- },
1202- inverseTransform: function (v) {
1203- return Math.pow(Math.exp(v*100+1)/Math.LN2, 2);
1204- },
1205- tickDecimals: 1,
1206- tickFormatter: function (val, axis) {
1207- return (val * 100).toFixed(axis.tickDecimals) + "%%";
1208- },
1209- ticks: [0.001,0.01,0.10,0.50,1.0]
1210- },
1211- grid: {
1212- aboveData: true,
1213- labelMargin: 15
1214- }
1215- };
1216- """ % histogram_resolution)
1217-
1218- for i, histogram in enumerate(histograms):
1219- if histogram.count == 0:
1220- continue
1221- print >> outf, dedent("""\
1222- function plot_histogram_%(id)d() {
1223- var d = %(data)s;
1224-
1225- $.plot(
1226- $("#histogram%(id)d"),
1227- [{data: d}], options);
1228- }
1229- $('#histogram%(id)d').appear(function() {
1230- plot_histogram_%(id)d();
1231- });
1232-
1233- """ % {'id': i, 'data': json.dumps(histogram.bins_relative)})
1234-
1235- print >> outf, dedent("""\
1236- });
1237- </script>
1238- </body>
1239- </html>
1240- """)
1241
1242=== removed file 'lib/lp/scripts/utilities/tests/test_pageperformancereport.py'
1243--- lib/lp/scripts/utilities/tests/test_pageperformancereport.py 2011-08-12 11:37:08 +0000
1244+++ lib/lp/scripts/utilities/tests/test_pageperformancereport.py 1970-01-01 00:00:00 +0000
1245@@ -1,483 +0,0 @@
1246-# Copyright 2010 Canonical Ltd. This software is licensed under the
1247-# GNU Affero General Public License version 3 (see the file LICENSE).
1248-
1249-"""Test the pageperformancereport script."""
1250-
1251-__metaclass__ = type
1252-
1253-from lp.scripts.utilities.pageperformancereport import (
1254- Category,
1255- Histogram,
1256- OnlineApproximateMedian,
1257- OnlineStats,
1258- OnlineStatsCalculator,
1259- RequestTimes,
1260- Stats,
1261- )
1262-from lp.testing import TestCase
1263-
1264-
1265-class FakeOptions:
1266- timeout = 5
1267- db_file = None
1268- pageids = True
1269- top_urls = 3
1270- resolution = 1
1271-
1272- def __init__(self, **kwargs):
1273- """Assign all arguments as attributes."""
1274- self.__dict__.update(kwargs)
1275-
1276-
1277-class FakeRequest:
1278-
1279- def __init__(self, url, app_seconds, sql_statements=None,
1280- sql_seconds=None, pageid=None):
1281- self.url = url
1282- self.pageid = pageid
1283- self.app_seconds = app_seconds
1284- self.sql_statements = sql_statements
1285- self.sql_seconds = sql_seconds
1286-
1287-
1288-class FakeStats(Stats):
1289-
1290- def __init__(self, **kwargs):
1291- # Override the constructor to just store the values.
1292- self.__dict__.update(kwargs)
1293-
1294-
1295-FAKE_REQUESTS = [
1296- FakeRequest('/', 0.5, pageid='+root'),
1297- FakeRequest('/bugs', 4.5, 56, 3.0, pageid='+bugs'),
1298- FakeRequest('/bugs', 4.2, 56, 2.2, pageid='+bugs'),
1299- FakeRequest('/bugs', 5.5, 76, 4.0, pageid='+bugs'),
1300- FakeRequest('/ubuntu', 2.5, 6, 2.0, pageid='+distribution'),
1301- FakeRequest('/launchpad', 3.5, 3, 3.0, pageid='+project'),
1302- FakeRequest('/bzr', 2.5, 4, 2.0, pageid='+project'),
1303- FakeRequest('/bugs/1', 20.5, 567, 14.0, pageid='+bug'),
1304- FakeRequest('/bugs/1', 15.5, 567, 9.0, pageid='+bug'),
1305- FakeRequest('/bugs/5', 1.5, 30, 1.2, pageid='+bug'),
1306- FakeRequest('/lazr', 1.0, 16, 0.3, pageid='+project'),
1307- FakeRequest('/drizzle', 0.9, 11, 1.3, pageid='+project'),
1308- ]
1309-
1310-
1311-# The category stats computed for the above 12 requests.
1312-CATEGORY_STATS = [
1313- # Median is an approximation.
1314- # Real values are: 2.50, 2.20, 30
1315- (Category('All', ''), FakeStats(
1316- total_hits=12, total_time=62.60, mean=5.22, median=4.20, std=5.99,
1317- total_sqltime=42, mean_sqltime=3.82, median_sqltime=3.0,
1318- std_sqltime=3.89,
1319- total_sqlstatements=1392, mean_sqlstatements=126.55,
1320- median_sqlstatements=56, std_sqlstatements=208.94,
1321- histogram=[[0, 2], [1, 2], [2, 2], [3, 1], [4, 2], [5, 3]],
1322- )),
1323- (Category('Test', ''), FakeStats(
1324- histogram=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]])),
1325- (Category('Bugs', ''), FakeStats(
1326- total_hits=6, total_time=51.70, mean=8.62, median=4.5, std=6.90,
1327- total_sqltime=33.40, mean_sqltime=5.57, median_sqltime=3,
1328- std_sqltime=4.52,
1329- total_sqlstatements=1352, mean_sqlstatements=225.33,
1330- median_sqlstatements=56, std_sqlstatements=241.96,
1331- histogram=[[0, 0], [1, 1], [2, 0], [3, 0], [4, 2], [5, 3]],
1332- )),
1333- ]
1334-
1335-
1336-# The top 3 URL stats computed for the above 12 requests.
1337-TOP_3_URL_STATS = [
1338- ('/bugs/1', FakeStats(
1339- total_hits=2, total_time=36.0, mean=18.0, median=15.5, std=2.50,
1340- total_sqltime=23.0, mean_sqltime=11.5, median_sqltime=9.0,
1341- std_sqltime=2.50,
1342- total_sqlstatements=1134, mean_sqlstatements=567.0,
1343- median_sqlstatements=567, std_statements=0,
1344- histogram=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0], [5, 2]],
1345- )),
1346- ('/bugs', FakeStats(
1347- total_hits=3, total_time=14.2, mean=4.73, median=4.5, std=0.56,
1348- total_sqltime=9.2, mean_sqltime=3.07, median_sqltime=3,
1349- std_sqltime=0.74,
1350- total_sqlstatements=188, mean_sqlstatements=62.67,
1351- median_sqlstatements=56, std_sqlstatements=9.43,
1352- histogram=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 2], [5, 1]],
1353- )),
1354- ('/launchpad', FakeStats(
1355- total_hits=1, total_time=3.5, mean=3.5, median=3.5, std=0,
1356- total_sqltime=3.0, mean_sqltime=3, median_sqltime=3, std_sqltime=0,
1357- total_sqlstatements=3, mean_sqlstatements=3,
1358- median_sqlstatements=3, std_sqlstatements=0,
1359- histogram=[[0, 0], [1, 0], [2, 0], [3, 1], [4, 0], [5, 0]],
1360- )),
1361- ]
1362-
1363-
1364-# The pageid stats computed for the above 12 requests.
1365-PAGEID_STATS = [
1366- ('+bug', FakeStats(
1367- total_hits=3, total_time=37.5, mean=12.5, median=15.5, std=8.04,
1368- total_sqltime=24.2, mean_sqltime=8.07, median_sqltime=9,
1369- std_sqltime=5.27,
1370- total_sqlstatements=1164, mean_sqlstatements=388,
1371- median_sqlstatements=567, std_sqlstatements=253.14,
1372- histogram=[[0, 0], [1, 1], [2, 0], [3, 0], [4, 0], [5, 2]],
1373- )),
1374- ('+bugs', FakeStats(
1375- total_hits=3, total_time=14.2, mean=4.73, median=4.5, std=0.56,
1376- total_sqltime=9.2, mean_sqltime=3.07, median_sqltime=3,
1377- std_sqltime=0.74,
1378- total_sqlstatements=188, mean_sqlstatements=62.67,
1379- median_sqlstatements=56, std_sqlstatements=9.43,
1380- histogram=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 2], [5, 1]],
1381- )),
1382- ('+distribution', FakeStats(
1383- total_hits=1, total_time=2.5, mean=2.5, median=2.5, std=0,
1384- total_sqltime=2.0, mean_sqltime=2, median_sqltime=2, std_sqltime=0,
1385- total_sqlstatements=6, mean_sqlstatements=6,
1386- median_sqlstatements=6, std_sqlstatements=0,
1387- histogram=[[0, 0], [1, 0], [2, 1], [3, 0], [4, 0], [5, 0]],
1388- )),
1389- ('+project', FakeStats(
1390- total_hits=4, total_time=7.9, mean=1.98, median=1, std=1.08,
1391- total_sqltime=6.6, mean_sqltime=1.65, median_sqltime=1.3,
1392- std_sqltime=0.99,
1393- total_sqlstatements=34, mean_sqlstatements=8.5,
1394- median_sqlstatements=4, std_sqlstatements=5.32,
1395- histogram=[[0, 1], [1, 1], [2, 1], [3, 1], [4, 0], [5, 0]],
1396- )),
1397- ('+root', FakeStats(
1398- total_hits=1, total_time=0.5, mean=0.5, median=0.5, std=0,
1399- histogram=[[0, 1], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]],
1400- )),
1401- ]
1402-
1403-
1404-class TestRequestTimes(TestCase):
1405- """Tests the RequestTimes backend."""
1406-
1407- def setUp(self):
1408- TestCase.setUp(self)
1409- self.categories = [
1410- Category('All', '.*'), Category('Test', '.*test.*'),
1411- Category('Bugs', '.*bugs.*')]
1412- self.db = RequestTimes(self.categories, FakeOptions())
1413-
1414- def setUpRequests(self):
1415- """Insert some requests into the db."""
1416- for r in FAKE_REQUESTS:
1417- self.db.add_request(r)
1418-
1419- def assertStatsAreEquals(self, expected, results):
1420- self.assertEquals(
1421- len(expected), len(results), 'Wrong number of results')
1422- for idx in range(len(results)):
1423- self.assertEquals(expected[idx][0], results[idx][0],
1424- "Wrong key for results %d" % idx)
1425- key = results[idx][0]
1426- self.assertEquals(expected[idx][1].text(), results[idx][1].text(),
1427- "Wrong stats for results %d (%s)" % (idx, key))
1428- self.assertEquals(
1429- Histogram.from_bins_data(expected[idx][1].histogram),
1430- results[idx][1].histogram,
1431- "Wrong histogram for results %d (%s)" % (idx, key))
1432-
1433- def test_get_category_times(self):
1434- self.setUpRequests()
1435- category_times = self.db.get_category_times()
1436- self.assertStatsAreEquals(CATEGORY_STATS, category_times)
1437-
1438- def test_get_url_times(self):
1439- self.setUpRequests()
1440- url_times = self.db.get_top_urls_times()
1441- self.assertStatsAreEquals(TOP_3_URL_STATS, url_times)
1442-
1443- def test_get_pageid_times(self):
1444- self.setUpRequests()
1445- pageid_times = self.db.get_pageid_times()
1446- self.assertStatsAreEquals(PAGEID_STATS, pageid_times)
1447-
1448- def test___add__(self):
1449- # Ensure that adding two RequestTimes together result in
1450- # a merge of their constituencies.
1451- db1 = self.db
1452- db2 = RequestTimes(self.categories, FakeOptions())
1453- db1.add_request(FakeRequest('/', 1.5, 5, 1.0, '+root'))
1454- db1.add_request(FakeRequest('/bugs', 3.5, 15, 1.0, '+bugs'))
1455- db2.add_request(FakeRequest('/bugs/1', 5.0, 30, 4.0, '+bug'))
1456- results = db1 + db2
1457- self.assertEquals(3, results.category_times[0][1].total_hits)
1458- self.assertEquals(0, results.category_times[1][1].total_hits)
1459- self.assertEquals(2, results.category_times[2][1].total_hits)
1460- self.assertEquals(1, results.pageid_times['+root'].total_hits)
1461- self.assertEquals(1, results.pageid_times['+bugs'].total_hits)
1462- self.assertEquals(1, results.pageid_times['+bug'].total_hits)
1463- self.assertEquals(1, results.url_times['/'].total_hits)
1464- self.assertEquals(1, results.url_times['/bugs'].total_hits)
1465- self.assertEquals(1, results.url_times['/bugs/1'].total_hits)
1466-
1467- def test_histogram_init_with_resolution(self):
1468- # Test that the resolution parameter increase the number of bins
1469- db = RequestTimes(
1470- self.categories, FakeOptions(timeout=4, resolution=1))
1471- self.assertEquals(5, db.histogram_width)
1472- self.assertEquals(1, db.histogram_resolution)
1473- db = RequestTimes(
1474- self.categories, FakeOptions(timeout=4, resolution=0.5))
1475- self.assertEquals(9, db.histogram_width)
1476- self.assertEquals(0.5, db.histogram_resolution)
1477- db = RequestTimes(
1478- self.categories, FakeOptions(timeout=4, resolution=2))
1479- self.assertEquals(3, db.histogram_width)
1480- self.assertEquals(2, db.histogram_resolution)
1481-
1482-
1483-class TestOnlineStats(TestCase):
1484- """Tests for the OnlineStats class."""
1485-
1486- def test___add__(self):
1487- # Ensure that adding two OnlineStats merge all their constituencies.
1488- stats1 = OnlineStats(4, 1)
1489- stats1.update(FakeRequest('/', 2.0, 5, 1.5))
1490- stats2 = OnlineStats(4, 1)
1491- stats2.update(FakeRequest('/', 1.5, 2, 3.0))
1492- stats2.update(FakeRequest('/', 5.0, 2, 2.0))
1493- results = stats1 + stats2
1494- self.assertEquals(3, results.total_hits)
1495- self.assertEquals(2, results.median)
1496- self.assertEquals(9, results.total_sqlstatements)
1497- self.assertEquals(2, results.median_sqlstatements)
1498- self.assertEquals(6.5, results.total_sqltime)
1499- self.assertEquals(2.0, results.median_sqltime)
1500- self.assertEquals(
1501- Histogram.from_bins_data([[0, 0], [1, 1], [2, 1], [3, 1]]),
1502- results.histogram)
1503-
1504-
1505-class TestOnlineStatsCalculator(TestCase):
1506- """Tests for the online stats calculator."""
1507-
1508- def setUp(self):
1509- TestCase.setUp(self)
1510- self.stats = OnlineStatsCalculator()
1511-
1512- def test_stats_for_empty_set(self):
1513- # Test the stats when there is no input.
1514- self.assertEquals(0, self.stats.count)
1515- self.assertEquals(0, self.stats.sum)
1516- self.assertEquals(0, self.stats.mean)
1517- self.assertEquals(0, self.stats.variance)
1518- self.assertEquals(0, self.stats.std)
1519-
1520- def test_stats_for_one_value(self):
1521- # Test the stats when adding one element.
1522- self.stats.update(5)
1523- self.assertEquals(1, self.stats.count)
1524- self.assertEquals(5, self.stats.sum)
1525- self.assertEquals(5, self.stats.mean)
1526- self.assertEquals(0, self.stats.variance)
1527- self.assertEquals(0, self.stats.std)
1528-
1529- def test_None_are_ignored(self):
1530- self.stats.update(None)
1531- self.assertEquals(0, self.stats.count)
1532-
1533- def test_stats_for_3_values(self):
1534- for x in [3, 6, 9]:
1535- self.stats.update(x)
1536- self.assertEquals(3, self.stats.count)
1537- self.assertEquals(18, self.stats.sum)
1538- self.assertEquals(6, self.stats.mean)
1539- self.assertEquals(6, self.stats.variance)
1540- self.assertEquals("2.45", "%.2f" % self.stats.std)
1541-
1542- def test___add___two_empty_together(self):
1543- stats2 = OnlineStatsCalculator()
1544- results = self.stats + stats2
1545- self.assertEquals(0, results.count)
1546- self.assertEquals(0, results.sum)
1547- self.assertEquals(0, results.mean)
1548- self.assertEquals(0, results.variance)
1549-
1550- def test___add___one_empty(self):
1551- stats2 = OnlineStatsCalculator()
1552- for x in [1, 2, 3]:
1553- self.stats.update(x)
1554- results = self.stats + stats2
1555- self.assertEquals(3, results.count)
1556- self.assertEquals(6, results.sum)
1557- self.assertEquals(2, results.mean)
1558- self.assertEquals(2, results.M2)
1559-
1560- def test___add__(self):
1561- stats2 = OnlineStatsCalculator()
1562- for x in [3, 6, 9]:
1563- self.stats.update(x)
1564- for x in [1, 2, 3]:
1565- stats2.update(x)
1566- results = self.stats + stats2
1567- self.assertEquals(6, results.count)
1568- self.assertEquals(24, results.sum)
1569- self.assertEquals(4, results.mean)
1570- self.assertEquals(44, results.M2)
1571-
1572-
1573-SHUFFLE_RANGE_100 = [
1574- 25, 79, 99, 76, 60, 63, 87, 77, 51, 82, 42, 96, 93, 58, 32, 66, 75,
1575- 2, 26, 22, 11, 73, 61, 83, 65, 68, 44, 81, 64, 3, 33, 34, 15, 1,
1576- 92, 27, 90, 74, 46, 57, 59, 31, 13, 19, 89, 29, 56, 94, 50, 49, 62,
1577- 37, 21, 35, 5, 84, 88, 16, 8, 23, 40, 6, 48, 10, 97, 0, 53, 17, 30,
1578- 18, 43, 86, 12, 71, 38, 78, 36, 7, 45, 47, 80, 54, 39, 91, 98, 24,
1579- 55, 14, 52, 20, 69, 85, 95, 28, 4, 9, 67, 70, 41, 72,
1580- ]
1581-
1582-
1583-class TestOnlineApproximateMedian(TestCase):
1584- """Tests for the approximate median computation."""
1585-
1586- def setUp(self):
1587- TestCase.setUp(self)
1588- self.estimator = OnlineApproximateMedian()
1589-
1590- def test_median_is_0_when_no_input(self):
1591- self.assertEquals(0, self.estimator.median)
1592-
1593- def test_median_is_true_median_for_n_lower_than_bucket_size(self):
1594- for x in range(9):
1595- self.estimator.update(x)
1596- self.assertEquals(4, self.estimator.median)
1597-
1598- def test_None_input_is_ignored(self):
1599- self.estimator.update(1)
1600- self.estimator.update(None)
1601- self.assertEquals(1, self.estimator.median)
1602-
1603- def test_approximate_median_is_good_enough(self):
1604- for x in SHUFFLE_RANGE_100:
1605- self.estimator.update(x)
1606- # True median is 50, 49 is good enough :-)
1607- self.assertIn(self.estimator.median, range(49,52))
1608-
1609- def test___add__(self):
1610- median1 = OnlineApproximateMedian(3)
1611- median1.buckets = [[1, 3], [4, 5], [6, 3]]
1612- median2 = OnlineApproximateMedian(3)
1613- median2.buckets = [[], [3, 6], [3, 7]]
1614- results = median1 + median2
1615- self.assertEquals([[1, 3], [6], [3, 7], [4]], results.buckets)
1616-
1617-
1618-class TestHistogram(TestCase):
1619- """Test the histogram computation."""
1620-
1621- def test__init__(self):
1622- hist = Histogram(4, 1)
1623- self.assertEquals(4, hist.bins_count)
1624- self.assertEquals(1, hist.bins_size)
1625- self.assertEquals([[0, 0], [1, 0], [2, 0], [3, 0]], hist.bins)
1626-
1627- def test__init__bins_size_float(self):
1628- hist = Histogram(9, 0.5)
1629- self.assertEquals(9, hist.bins_count)
1630- self.assertEquals(0.5, hist.bins_size)
1631- self.assertEquals(
1632- [[0, 0], [0.5, 0], [1.0, 0], [1.5, 0],
1633- [2.0, 0], [2.5, 0], [3.0, 0], [3.5, 0], [4.0, 0]], hist.bins)
1634-
1635- def test_update(self):
1636- hist = Histogram(4, 1)
1637- hist.update(1)
1638- self.assertEquals(1, hist.count)
1639- self.assertEquals([[0, 0], [1, 1], [2, 0], [3, 0]], hist.bins)
1640-
1641- hist.update(1.3)
1642- self.assertEquals(2, hist.count)
1643- self.assertEquals([[0, 0], [1, 2], [2, 0], [3, 0]], hist.bins)
1644-
1645- def test_update_float_bin_size(self):
1646- hist = Histogram(4, 0.5)
1647- hist.update(1.3)
1648- self.assertEquals([[0, 0], [0.5, 0], [1.0, 1], [1.5, 0]], hist.bins)
1649- hist.update(0.5)
1650- self.assertEquals([[0, 0], [0.5, 1], [1.0, 1], [1.5, 0]], hist.bins)
1651- hist.update(0.6)
1652- self.assertEquals([[0, 0], [0.5, 2], [1.0, 1], [1.5, 0]], hist.bins)
1653-
1654- def test_update_max_goes_in_last_bin(self):
1655- hist = Histogram(4, 1)
1656- hist.update(9)
1657- self.assertEquals([[0, 0], [1, 0], [2, 0], [3, 1]], hist.bins)
1658-
1659- def test_bins_relative(self):
1660- hist = Histogram(4, 1)
1661- for x in range(4):
1662- hist.update(x)
1663- self.assertEquals(
1664- [[0, 0.25], [1, 0.25], [2, 0.25], [3, 0.25]], hist.bins_relative)
1665-
1666- def test_from_bins_data(self):
1667- hist = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]])
1668- self.assertEquals(4, hist.bins_count)
1669- self.assertEquals(1, hist.bins_size)
1670- self.assertEquals(6, hist.count)
1671- self.assertEquals([[0, 1], [1, 3], [2, 1], [3, 1]], hist.bins)
1672-
1673- def test___repr__(self):
1674- hist = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]])
1675- self.assertEquals(
1676- "<Histogram [[0, 1], [1, 3], [2, 1], [3, 1]]>", repr(hist))
1677-
1678- def test___eq__(self):
1679- hist1 = Histogram(4, 1)
1680- hist2 = Histogram(4, 1)
1681- self.assertEquals(hist1, hist2)
1682-
1683- def test__eq___with_data(self):
1684- hist1 = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]])
1685- hist2 = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]])
1686- self.assertEquals(hist1, hist2)
1687-
1688- def test___add__(self):
1689- hist1 = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]])
1690- hist2 = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]])
1691- hist3 = Histogram.from_bins_data([[0, 2], [1, 6], [2, 2], [3, 2]])
1692- total = hist1 + hist2
1693- self.assertEquals(hist3, total)
1694- self.assertEquals(12, total.count)
1695-
1696- def test___add___uses_widest(self):
1697- # Make sure that the resulting histogram is as wide as the widest one.
1698- hist1 = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]])
1699- hist2 = Histogram.from_bins_data(
1700- [[0, 1], [1, 3], [2, 1], [3, 1], [4, 2], [5, 3]])
1701- hist3 = Histogram.from_bins_data(
1702- [[0, 2], [1, 6], [2, 2], [3, 2], [4, 2], [5, 3]])
1703- self.assertEquals(hist3, hist1 + hist2)
1704-
1705- def test___add___interpolate_lower_resolution(self):
1706- # Make sure that when the other histogram has a bigger bin_size
1707- # the frequency is correctly split across the different bins.
1708- hist1 = Histogram.from_bins_data(
1709- [[0, 1], [0.5, 3], [1.0, 1], [1.5, 1]])
1710- hist2 = Histogram.from_bins_data(
1711- [[0, 1], [1, 2], [2, 3], [3, 1], [4, 1]])
1712-
1713- hist3 = Histogram.from_bins_data(
1714- [[0, 1.5], [0.5, 3.5], [1.0, 2], [1.5, 2],
1715- [2.0, 1.5], [2.5, 1.5], [3.0, 0.5], [3.5, 0.5],
1716- [4.0, 0.5], [4.5, 0.5]])
1717- self.assertEquals(hist3, hist1 + hist2)
1718-
1719- def test___add___higher_resolution(self):
1720- # Make sure that when the other histogram has a smaller bin_size
1721- # the frequency is correctly added.
1722- hist1 = Histogram.from_bins_data([[0, 1], [1, 2], [2, 3]])
1723- hist2 = Histogram.from_bins_data(
1724- [[0, 1], [0.5, 3], [1.0, 1], [1.5, 1], [2.0, 3], [2.5, 1],
1725- [3, 4], [3.5, 2]])
1726-
1727- hist3 = Histogram.from_bins_data([[0, 5], [1, 4], [2, 7], [3, 6]])
1728- self.assertEquals(hist3, hist1 + hist2)
1729
1730=== removed file 'utilities/page-performance-report-daily.sh'
1731--- utilities/page-performance-report-daily.sh 2011-05-04 17:32:17 +0000
1732+++ utilities/page-performance-report-daily.sh 1970-01-01 00:00:00 +0000
1733@@ -1,115 +0,0 @@
1734-#!/bin/sh
1735-
1736-#TZ=UTC # trace logs are still BST - blech
1737-
1738-CATEGORY=lpnet
1739-LOGS_ROOTS="/srv/launchpad.net-logs/production /srv/launchpad.net-logs/edge"
1740-OUTPUT_ROOT=${HOME}/public_html/ppr/lpnet
1741-DAY_FMT="+%Y-%m-%d"
1742-
1743-find_logs() {
1744- from=$1
1745- until=$2
1746-
1747- end_mtime_switch=
1748- days_to_end="$(expr `date +%j` - `date -d $until +%j` - 1)"
1749- if [ $days_to_end -gt 0 ]; then
1750- end_mtime_switch="-daystart -mtime +$days_to_end"
1751- fi
1752-
1753- find ${LOGS_ROOTS} \
1754- -maxdepth 2 -type f -newermt "$from - 1 day" $end_mtime_switch \
1755- -name launchpad-trace\* \
1756- | sort | xargs -x
1757-}
1758-
1759-# Find all the daily stats.pck.bz2 $from $until
1760-find_stats() {
1761- from=$1
1762- until=$2
1763-
1764- # Build a string of all the days within range.
1765- local dates
1766- local day
1767- day=$from
1768- while [ $day != $until ]; do
1769- dates="$dates $day"
1770- day=`date $DAY_FMT -d "$day + 1 day"`
1771- done
1772-
1773- # Use that to build a regex that will be used to select
1774- # the files to use.
1775- local regex
1776- regex="daily_(`echo $dates |sed -e 's/ /|/g'`)"
1777-
1778- find ${OUTPUT_ROOT} -name 'stats.pck.bz2' | egrep $regex
1779-}
1780-
1781-report() {
1782- type=$1
1783- from=$2
1784- until=$3
1785- link=$4
1786-
1787- local files
1788- local options
1789- if [ "$type" = "daily" ]; then
1790- files=`find_logs $from $until`
1791- options="--from=$from --until=$until"
1792- else
1793- files=`find_stats $from $until`
1794- options="--merge"
1795- fi
1796-
1797- local dir
1798- dir=${OUTPUT_ROOT}/`date -d $from +%Y-%m`/${type}_${from}_${until}
1799- mkdir -p ${dir}
1800-
1801- echo Generating report from $from until $until into $dir `date`
1802-
1803- ./page-performance-report.py -v --top-urls=200 --directory=${dir} \
1804- $options $files
1805-
1806- # Only do the linking if requested.
1807- if [ "$link" = "link" ]; then
1808- ln -sf ${dir}/partition.html \
1809- ${OUTPUT_ROOT}/latest-${type}-partition.html
1810- ln -sf ${dir}/categories.html \
1811- ${OUTPUT_ROOT}/latest-${type}-categories.html
1812- ln -sf ${dir}/pageids.html \
1813- ${OUTPUT_ROOT}/latest-${type}-pageids.html
1814- ln -sf ${dir}/combined.html \
1815- ${OUTPUT_ROOT}/latest-${type}-combined.html
1816- ln -sf ${dir}/metrics.dat ${OUTPUT_ROOT}/latest-${type}-metrics.dat
1817- ln -sf ${dir}/top200.html ${OUTPUT_ROOT}/latest-${type}-top200.html
1818- ln -sf ${dir}/timeout-candidates.html \
1819- ${OUTPUT_ROOT}/latest-${type}-timeout-candidates.html
1820- fi
1821-
1822- return 0
1823-}
1824-
1825-local link
1826-if [ "$3" = "-l" ]; then
1827- link="link"
1828-fi
1829-
1830-if [ "$1" = '-d' ]; then
1831- report daily `date -d $2 $DAY_FMT` `date -d "$2 + 1 day" $DAY_FMT` $link
1832-elif [ "$1" = '-w' ]; then
1833- report weekly `date -d $2 $DAY_FMT` `date -d "$2 + 1 week" $DAY_FMT` $link
1834-elif [ "$1" = '-m' ]; then
1835- report monthly `date -d $2 $DAY_FMT` `date -d "$2 + 1 month" $DAY_FMT` $link
1836-else
1837- # Default invocation used from cron to generate latest one.
1838- now=`date $DAY_FMT`
1839- report daily `date -d yesterday $DAY_FMT` $now link
1840-
1841- if [ `date +%a` = 'Sun' ]; then
1842- report weekly `date -d 'last week' $DAY_FMT` $now link
1843- fi
1844-
1845- if [ `date +%d` = '01' ]; then
1846- report monthly `date -d 'last month' $DAY_FMT` $now link
1847- fi
1848-fi
1849
1850=== removed file 'utilities/page-performance-report.ini'
1851--- utilities/page-performance-report.ini 2011-09-21 21:12:02 +0000
1852+++ utilities/page-performance-report.ini 1970-01-01 00:00:00 +0000
1853@@ -1,79 +0,0 @@
1854-[categories]
1855-# Category -> Python regular expression.
1856-# Remeber to quote ?, ., + & ? characters to match litterally.
1857-# 'kodos' is useful for interactively testing regular expressions.
1858-All Launchpad=.
1859-All Launchpad except operational pages=(?<!\+opstats|\+haproxy)$
1860-
1861-API=(^https?://api\.|/\+access-token$)
1862-Operational=(\+opstats|\+haproxy)$
1863-Web (Non API/non operational/non XML-RPC)=^https?://(?!api\.)
1864- [^/]+($|/
1865- (?!\+haproxy|\+opstats|\+access-token
1866- |((authserver|bugs|bazaar|codehosting|
1867- codeimportscheduler|mailinglists|softwarecenteragent|
1868- featureflags)/\w+$)))
1869-Other=^/
1870-
1871-Launchpad Frontpage=^https?://launchpad\.[^/]+(/index\.html)?$
1872-
1873-# Note that the bug text dump is served on the main launchpad domain
1874-# and we need to exlude it from the registry stats.
1875-Registry=^https?://launchpad\..*(?<!/\+text)(?<!/\+access-token)$
1876-Registry - Person Index=^https?://launchpad\.[^/]+/%7E[^/]+(/\+index)?$
1877-Registry - Pillar Index=^https?://launchpad\.[^/]+/\w[^/]*(/\+index)?$
1878-
1879-Answers=^https?://answers\.
1880-Answers - Front page=^https?://answers\.[^/]+(/questions/\+index)?$
1881-
1882-Blueprints=^https?://blueprints\.
1883-Blueprints - Front page=^https?://blueprints\.[^/]+(/specs/\+index)?$
1884-
1885-# Note that the bug text dump is not served on the bugs domain,
1886-# probably for hysterical reasons. This is why the bugs regexp is
1887-# confusing.
1888-Bugs=^https?://(bugs\.|.+/bugs/\d+/\+text$)
1889-Bugs - Front page=^https?://bugs\.[^/]+(/bugs/\+index)?$
1890-Bugs - Bug Page=^https?://bugs\.[^/]+/.+/\+bug/\d+(/\+index)?$
1891-Bugs - Pillar Index=^https?://bugs\.[^/]+/\w[^/]*(/\+bugs-index)?$
1892-Bugs - Search=^https?://bugs\.[^/]+/.+/\+bugs$
1893-Bugs - Text Dump=^https?://launchpad\..+/\+text$
1894-
1895-Code=^https?://code\.
1896-Code - Front page=^https?://code\.[^/]+(/\+code/\+index)?$
1897-Code - Pillar Branches=^https?://code\.[^/]+/\w[^/]*(/\+code-index)?$
1898-Code - Branch Page=^https?://code\.[^/]+/%7E[^/]+/[^/]+/[^/]+(/\+index)?$
1899-Code - Merge Proposal=^https?://code\.[^/]+/.+/\+merge/\d+(/\+index)$
1900-
1901-Soyuz - PPA Index=^https?://launchpad\.[^/]+/.+/\+archive/[^/]+(/\+index)?$
1902-
1903-Translations=^https?://translations\.
1904-Translations - Front page=^https?://translations\.[^/]+/translations/\+index$
1905-Translations - Overview=^https?://translations\..*/\+lang/\w+(/\+index)?$
1906-
1907-Public XML-RPC=^https://(launchpad|xmlrpc)[^/]+/bazaar/\w+$
1908-Private XML-RPC=^https://(launchpad|xmlrpc)[^/]+/
1909- (authserver|bugs|codehosting|
1910- codeimportscheduler|mailinglists|
1911- softwarecenteragent|featureflags)/\w+$
1912-
1913-[metrics]
1914-ppr_all=All Launchpad except operational pages
1915-ppr_web=Web (Non API/non operational/non XML-RPC)
1916-ppr_operational=Operational
1917-ppr_bugs=Bugs
1918-ppr_api=API
1919-ppr_code=Code
1920-ppr_public_xmlrpc=Public XML-RPC
1921-ppr_private_xmlrpc=Private XML-RPC
1922-ppr_translations=Translations
1923-ppr_registry=Registry
1924-ppr_other=Other
1925-
1926-[partition]
1927-API=
1928-Operational=
1929-Private XML-RPC=
1930-Public XML-RPC=
1931-Web (Non API/non operational/non XML-RPC)=
1932-Other=
1933
1934=== removed file 'utilities/page-performance-report.py'
1935--- utilities/page-performance-report.py 2010-04-27 19:48:39 +0000
1936+++ utilities/page-performance-report.py 1970-01-01 00:00:00 +0000
1937@@ -1,18 +0,0 @@
1938-#!/usr/bin/python -S
1939-#
1940-# Copyright 2010 Canonical Ltd. This software is licensed under the
1941-# GNU Affero General Public License version 3 (see the file LICENSE).
1942-
1943-"""Page performance report generated from zserver tracelogs."""
1944-
1945-__metaclass__ = type
1946-
1947-import _pythonpath
1948-
1949-import sys
1950-
1951-from lp.scripts.utilities.pageperformancereport import main
1952-
1953-
1954-if __name__ == '__main__':
1955- sys.exit(main())