Merge lp:~lifeless/launchpad/ppr-move into lp:launchpad
- ppr-move
- Merge into devel
Proposed by
Robert Collins
Status: | Merged |
---|---|
Approved by: | Robert Collins |
Approved revision: | no longer in the source branch. |
Merged at revision: | 15780 |
Proposed branch: | lp:~lifeless/launchpad/ppr-move |
Merge into: | lp:launchpad |
Diff against target: |
1955 lines (+0/-1931) 5 files modified
lib/lp/scripts/utilities/pageperformancereport.py (+0/-1236) lib/lp/scripts/utilities/tests/test_pageperformancereport.py (+0/-483) utilities/page-performance-report-daily.sh (+0/-115) utilities/page-performance-report.ini (+0/-79) utilities/page-performance-report.py (+0/-18) |
To merge this branch: | bzr merge lp:~lifeless/launchpad/ppr-move |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Robert Collins (community) | Approve | ||
Review via email: mp+118873@code.launchpad.net |
Commit message
PPR is now in lp-dev-utils, remove it from LP so we don't have two copies.
Description of the change
PPR is now in lp-dev-utils, remove it from LP so we don't have two copies.
To post a comment you must log in.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === removed file 'lib/lp/scripts/utilities/pageperformancereport.py' |
2 | --- lib/lp/scripts/utilities/pageperformancereport.py 2012-06-29 08:40:05 +0000 |
3 | +++ lib/lp/scripts/utilities/pageperformancereport.py 1970-01-01 00:00:00 +0000 |
4 | @@ -1,1236 +0,0 @@ |
5 | -# Copyright 2010 Canonical Ltd. This software is licensed under the |
6 | -# GNU Affero General Public License version 3 (see the file LICENSE). |
7 | - |
8 | -"""Page performance report generated from zserver trace logs.""" |
9 | - |
10 | -__metaclass__ = type |
11 | -__all__ = ['main'] |
12 | - |
13 | -import bz2 |
14 | -from cgi import escape as html_quote |
15 | -from ConfigParser import RawConfigParser |
16 | -import copy |
17 | -import cPickle |
18 | -import csv |
19 | -from datetime import datetime |
20 | -import gzip |
21 | -import math |
22 | -import os.path |
23 | -import re |
24 | -import textwrap |
25 | -from textwrap import dedent |
26 | -import time |
27 | - |
28 | -import simplejson as json |
29 | -import sre_constants |
30 | -import zc.zservertracelog.tracereport |
31 | - |
32 | -from lp.scripts.helpers import LPOptionParser |
33 | -from lp.services.config import config |
34 | -from lp.services.scripts.logger import log |
35 | - |
36 | - |
37 | -class Request(zc.zservertracelog.tracereport.Request): |
38 | - url = None |
39 | - pageid = None |
40 | - ticks = None |
41 | - sql_statements = None |
42 | - sql_seconds = None |
43 | - |
44 | - # Override the broken version in our superclass that always |
45 | - # returns an integer. |
46 | - @property |
47 | - def app_seconds(self): |
48 | - interval = self.app_time - self.start_app_time |
49 | - return interval.seconds + interval.microseconds / 1000000.0 |
50 | - |
51 | - # Override the broken version in our superclass that always |
52 | - # returns an integer. |
53 | - @property |
54 | - def total_seconds(self): |
55 | - interval = self.end - self.start |
56 | - return interval.seconds + interval.microseconds / 1000000.0 |
57 | - |
58 | - |
59 | -class Category: |
60 | - """A Category in our report. |
61 | - |
62 | - Requests belong to a Category if the URL matches a regular expression. |
63 | - """ |
64 | - |
65 | - def __init__(self, title, regexp): |
66 | - self.title = title |
67 | - self.regexp = regexp |
68 | - self._compiled_regexp = re.compile(regexp, re.I | re.X) |
69 | - self.partition = False |
70 | - |
71 | - def match(self, request): |
72 | - """Return true when the request match this category.""" |
73 | - return self._compiled_regexp.search(request.url) is not None |
74 | - |
75 | - def __cmp__(self, other): |
76 | - return cmp(self.title.lower(), other.title.lower()) |
77 | - |
78 | - def __deepcopy__(self, memo): |
79 | - # We provide __deepcopy__ because the module doesn't handle |
80 | - # compiled regular expression by default. |
81 | - return Category(self.title, self.regexp) |
82 | - |
83 | - |
84 | -class OnlineStatsCalculator: |
85 | - """Object that can compute count, sum, mean, variance and median. |
86 | - |
87 | - It computes these value incrementally and using minimal storage |
88 | - using the Welford / Knuth algorithm described at |
89 | - http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm |
90 | - """ |
91 | - |
92 | - def __init__(self): |
93 | - self.count = 0 |
94 | - self.sum = 0 |
95 | - self.M2 = 0.0 # Sum of square difference |
96 | - self.mean = 0.0 |
97 | - |
98 | - def update(self, x): |
99 | - """Incrementally update the stats when adding x to the set. |
100 | - |
101 | - None values are ignored. |
102 | - """ |
103 | - if x is None: |
104 | - return |
105 | - self.count += 1 |
106 | - self.sum += x |
107 | - delta = x - self.mean |
108 | - self.mean = float(self.sum)/self.count |
109 | - self.M2 += delta*(x - self.mean) |
110 | - |
111 | - @property |
112 | - def variance(self): |
113 | - """Return the population variance.""" |
114 | - if self.count == 0: |
115 | - return 0 |
116 | - else: |
117 | - return self.M2/self.count |
118 | - |
119 | - @property |
120 | - def std(self): |
121 | - """Return the standard deviation.""" |
122 | - if self.count == 0: |
123 | - return 0 |
124 | - else: |
125 | - return math.sqrt(self.variance) |
126 | - |
127 | - def __add__(self, other): |
128 | - """Adds this and another OnlineStatsCalculator. |
129 | - |
130 | - The result combines the stats of the two objects. |
131 | - """ |
132 | - results = OnlineStatsCalculator() |
133 | - results.count = self.count + other.count |
134 | - results.sum = self.sum + other.sum |
135 | - if self.count > 0 and other.count > 0: |
136 | - # This is 2.1b in Chan, Tony F.; Golub, Gene H.; LeVeque, |
137 | - # Randall J. (1979), "Updating Formulae and a Pairwise Algorithm |
138 | - # for Computing Sample Variances.", |
139 | - # Technical Report STAN-CS-79-773, |
140 | - # Department of Computer Science, Stanford University, |
141 | - # ftp://reports.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf . |
142 | - results.M2 = self.M2 + other.M2 + ( |
143 | - (float(self.count) / (other.count * results.count)) * |
144 | - ((float(other.count) / self.count) * self.sum - other.sum)**2) |
145 | - else: |
146 | - results.M2 = self.M2 + other.M2 # One of them is 0. |
147 | - if results.count > 0: |
148 | - results.mean = float(results.sum) / results.count |
149 | - return results |
150 | - |
151 | - |
152 | -class OnlineApproximateMedian: |
153 | - """Approximate the median of a set of elements. |
154 | - |
155 | - This implements a space-efficient algorithm which only sees each value |
156 | - once. (It will hold in memory log bucket_size of n elements.) |
157 | - |
158 | - It was described and analysed in |
159 | - D. Cantone and M.Hofri, |
160 | - "Analysis of An Approximate Median Selection Algorithm" |
161 | - ftp://ftp.cs.wpi.edu/pub/techreports/pdf/06-17.pdf |
162 | - |
163 | - This algorithm is similar to Tukey's median of medians technique. |
164 | - It will compute the median among bucket_size values. And the median among |
165 | - those. |
166 | - """ |
167 | - |
168 | - def __init__(self, bucket_size=9): |
169 | - """Creates a new estimator. |
170 | - |
171 | - It approximates the median by finding the median among each |
172 | - successive bucket_size element. And then using these medians for other |
173 | - rounds of selection. |
174 | - |
175 | - The bucket size should be a low odd-integer. |
176 | - """ |
177 | - self.bucket_size = bucket_size |
178 | - # Index of the median in a completed bucket. |
179 | - self.median_idx = (bucket_size-1)//2 |
180 | - self.buckets = [] |
181 | - |
182 | - def update(self, x, order=0): |
183 | - """Update with x.""" |
184 | - if x is None: |
185 | - return |
186 | - |
187 | - i = order |
188 | - while True: |
189 | - # Create bucket on demand. |
190 | - if i >= len(self.buckets): |
191 | - for n in range((i+1)-len(self.buckets)): |
192 | - self.buckets.append([]) |
193 | - bucket = self.buckets[i] |
194 | - bucket.append(x) |
195 | - if len(bucket) == self.bucket_size: |
196 | - # Select the median in this bucket, and promote it. |
197 | - x = sorted(bucket)[self.median_idx] |
198 | - # Free the bucket for the next round. |
199 | - del bucket[:] |
200 | - i += 1 |
201 | - continue |
202 | - else: |
203 | - break |
204 | - |
205 | - @property |
206 | - def median(self): |
207 | - """Return the median.""" |
208 | - # Find the 'weighted' median by assigning a weight to each |
209 | - # element proportional to how far they have been selected. |
210 | - candidates = [] |
211 | - total_weight = 0 |
212 | - for i, bucket in enumerate(self.buckets): |
213 | - weight = self.bucket_size ** i |
214 | - for x in bucket: |
215 | - total_weight += weight |
216 | - candidates.append([x, weight]) |
217 | - if len(candidates) == 0: |
218 | - return 0 |
219 | - |
220 | - # Each weight is the equivalent of having the candidates appear |
221 | - # that number of times in the array. |
222 | - # So buckets like [[1, 2], [2, 3], [4, 2]] would be expanded to |
223 | - # [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, |
224 | - # 4, 4, 4, 4, 4] and we find the median of that list (2). |
225 | - # We don't expand the items to conserve memory. |
226 | - median = (total_weight-1) / 2 |
227 | - weighted_idx = 0 |
228 | - for x, weight in sorted(candidates): |
229 | - weighted_idx += weight |
230 | - if weighted_idx > median: |
231 | - return x |
232 | - |
233 | - def __add__(self, other): |
234 | - """Merge two approximators together. |
235 | - |
236 | - All candidates from the other are merged through the standard |
237 | - algorithm, starting at the same level. So an item that went through |
238 | - two rounds of selection, will be compared with other items having |
239 | - gone through the same number of rounds. |
240 | - """ |
241 | - results = OnlineApproximateMedian(self.bucket_size) |
242 | - results.buckets = copy.deepcopy(self.buckets) |
243 | - for i, bucket in enumerate(other.buckets): |
244 | - for x in bucket: |
245 | - results.update(x, i) |
246 | - return results |
247 | - |
248 | - |
249 | -class Stats: |
250 | - """Bag to hold and compute request statistics. |
251 | - |
252 | - All times are in seconds. |
253 | - """ |
254 | - total_hits = 0 # Total hits. |
255 | - |
256 | - total_time = 0 # Total time spent rendering. |
257 | - mean = 0 # Mean time per hit. |
258 | - median = 0 # Median time per hit. |
259 | - std = 0 # Standard deviation per hit. |
260 | - histogram = None # # Request times histogram. |
261 | - |
262 | - total_sqltime = 0 # Total time spent waiting for SQL to process. |
263 | - mean_sqltime = 0 # Mean time spend waiting for SQL to process. |
264 | - median_sqltime = 0 # Median time spend waiting for SQL to process. |
265 | - std_sqltime = 0 # Standard deviation of SQL time. |
266 | - |
267 | - total_sqlstatements = 0 # Total number of SQL statements issued. |
268 | - mean_sqlstatements = 0 |
269 | - median_sqlstatements = 0 |
270 | - std_sqlstatements = 0 |
271 | - |
272 | - @property |
273 | - def ninetyninth_percentile_time(self): |
274 | - """Time under which 99% of requests are rendered. |
275 | - |
276 | - This is estimated as 3 std deviations from the mean. Given that |
277 | - in a daily report, many URLs or PageIds won't have 100 requests, it's |
278 | - more useful to use this estimator. |
279 | - """ |
280 | - return self.mean + 3*self.std |
281 | - |
282 | - @property |
283 | - def ninetyninth_percentile_sqltime(self): |
284 | - """SQL time under which 99% of requests are rendered. |
285 | - |
286 | - This is estimated as 3 std deviations from the mean. |
287 | - """ |
288 | - return self.mean_sqltime + 3*self.std_sqltime |
289 | - |
290 | - @property |
291 | - def ninetyninth_percentile_sqlstatements(self): |
292 | - """Number of SQL statements under which 99% of requests are rendered. |
293 | - |
294 | - This is estimated as 3 std deviations from the mean. |
295 | - """ |
296 | - return self.mean_sqlstatements + 3*self.std_sqlstatements |
297 | - |
298 | - def text(self): |
299 | - """Return a textual version of the stats.""" |
300 | - return textwrap.dedent(""" |
301 | - <Stats for %d requests: |
302 | - Time: total=%.2f; mean=%.2f; median=%.2f; std=%.2f |
303 | - SQL time: total=%.2f; mean=%.2f; median=%.2f; std=%.2f |
304 | - SQL stmt: total=%.f; mean=%.2f; median=%.f; std=%.2f |
305 | - >""" % ( |
306 | - self.total_hits, self.total_time, self.mean, self.median, |
307 | - self.std, self.total_sqltime, self.mean_sqltime, |
308 | - self.median_sqltime, self.std_sqltime, |
309 | - self.total_sqlstatements, self.mean_sqlstatements, |
310 | - self.median_sqlstatements, self.std_sqlstatements)) |
311 | - |
312 | - |
313 | -class OnlineStats(Stats): |
314 | - """Implementation of stats that can be computed online. |
315 | - |
316 | - You call update() for each request and the stats are updated incrementally |
317 | - with minimum storage space. |
318 | - """ |
319 | - |
320 | - def __init__(self, histogram_width, histogram_resolution): |
321 | - self.time_stats = OnlineStatsCalculator() |
322 | - self.time_median_approximate = OnlineApproximateMedian() |
323 | - self.sql_time_stats = OnlineStatsCalculator() |
324 | - self.sql_time_median_approximate = OnlineApproximateMedian() |
325 | - self.sql_statements_stats = OnlineStatsCalculator() |
326 | - self.sql_statements_median_approximate = OnlineApproximateMedian() |
327 | - self.histogram = Histogram(histogram_width, histogram_resolution) |
328 | - |
329 | - @property |
330 | - def total_hits(self): |
331 | - return self.time_stats.count |
332 | - |
333 | - @property |
334 | - def total_time(self): |
335 | - return self.time_stats.sum |
336 | - |
337 | - @property |
338 | - def mean(self): |
339 | - return self.time_stats.mean |
340 | - |
341 | - @property |
342 | - def median(self): |
343 | - return self.time_median_approximate.median |
344 | - |
345 | - @property |
346 | - def std(self): |
347 | - return self.time_stats.std |
348 | - |
349 | - @property |
350 | - def total_sqltime(self): |
351 | - return self.sql_time_stats.sum |
352 | - |
353 | - @property |
354 | - def mean_sqltime(self): |
355 | - return self.sql_time_stats.mean |
356 | - |
357 | - @property |
358 | - def median_sqltime(self): |
359 | - return self.sql_time_median_approximate.median |
360 | - |
361 | - @property |
362 | - def std_sqltime(self): |
363 | - return self.sql_time_stats.std |
364 | - |
365 | - @property |
366 | - def total_sqlstatements(self): |
367 | - return self.sql_statements_stats.sum |
368 | - |
369 | - @property |
370 | - def mean_sqlstatements(self): |
371 | - return self.sql_statements_stats.mean |
372 | - |
373 | - @property |
374 | - def median_sqlstatements(self): |
375 | - return self.sql_statements_median_approximate.median |
376 | - |
377 | - @property |
378 | - def std_sqlstatements(self): |
379 | - return self.sql_statements_stats.std |
380 | - |
381 | - def update(self, request): |
382 | - """Update the stats based on request.""" |
383 | - self.time_stats.update(request.app_seconds) |
384 | - self.time_median_approximate.update(request.app_seconds) |
385 | - self.sql_time_stats.update(request.sql_seconds) |
386 | - self.sql_time_median_approximate.update(request.sql_seconds) |
387 | - self.sql_statements_stats.update(request.sql_statements) |
388 | - self.sql_statements_median_approximate.update(request.sql_statements) |
389 | - self.histogram.update(request.app_seconds) |
390 | - |
391 | - def __add__(self, other): |
392 | - """Merge another OnlineStats with this one.""" |
393 | - results = copy.deepcopy(self) |
394 | - results.time_stats += other.time_stats |
395 | - results.time_median_approximate += other.time_median_approximate |
396 | - results.sql_time_stats += other.sql_time_stats |
397 | - results.sql_time_median_approximate += ( |
398 | - other.sql_time_median_approximate) |
399 | - results.sql_statements_stats += other.sql_statements_stats |
400 | - results.sql_statements_median_approximate += ( |
401 | - other.sql_statements_median_approximate) |
402 | - results.histogram = self.histogram + other.histogram |
403 | - return results |
404 | - |
405 | - |
406 | -class Histogram: |
407 | - """A simple object to compute histogram of a value.""" |
408 | - |
409 | - @staticmethod |
410 | - def from_bins_data(data): |
411 | - """Create an histogram from existing bins data.""" |
412 | - assert data[0][0] == 0, "First bin should start at zero." |
413 | - |
414 | - hist = Histogram(len(data), data[1][0]) |
415 | - for idx, bin in enumerate(data): |
416 | - hist.count += bin[1] |
417 | - hist.bins[idx][1] = bin[1] |
418 | - |
419 | - return hist |
420 | - |
421 | - def __init__(self, bins_count, bins_size): |
422 | - """Create a new histogram. |
423 | - |
424 | - The histogram will count the frequency of values in bins_count bins |
425 | - of bins_size each. |
426 | - """ |
427 | - self.count = 0 |
428 | - self.bins_count = bins_count |
429 | - self.bins_size = bins_size |
430 | - self.bins = [] |
431 | - for x in range(bins_count): |
432 | - self.bins.append([x*bins_size, 0]) |
433 | - |
434 | - @property |
435 | - def bins_relative(self): |
436 | - """Return the bins with the frequency expressed as a ratio.""" |
437 | - return [[x, float(f)/self.count] for x, f in self.bins] |
438 | - |
439 | - def update(self, value): |
440 | - """Update the histogram for this value. |
441 | - |
442 | - All values higher than the last bin minimum are counted in that last |
443 | - bin. |
444 | - """ |
445 | - self.count += 1 |
446 | - idx = int(min(self.bins_count-1, value / self.bins_size)) |
447 | - self.bins[idx][1] += 1 |
448 | - |
449 | - def __repr__(self): |
450 | - """A string representation of this histogram.""" |
451 | - return "<Histogram %s>" % self.bins |
452 | - |
453 | - def __eq__(self, other): |
454 | - """Two histogram are equals if they have the same bins content.""" |
455 | - if not isinstance(other, Histogram): |
456 | - return False |
457 | - |
458 | - if self.bins_count != other.bins_count: |
459 | - return False |
460 | - |
461 | - if self.bins_size != other.bins_size: |
462 | - return False |
463 | - |
464 | - for idx, other_bin in enumerate(other.bins): |
465 | - if self.bins[idx][1] != other_bin[1]: |
466 | - return False |
467 | - |
468 | - return True |
469 | - |
470 | - def __add__(self, other): |
471 | - """Add the frequency of the other histogram to this one. |
472 | - |
473 | - The resulting histogram has the same bins_size than this one. |
474 | - If the other one has a bigger bins_size, we'll assume an even |
475 | - distribution and distribute the frequency across the smaller bins. If |
476 | - it has a lower bin_size, we'll aggregate its bins into the larger |
477 | - ones. We only support different bins_size if the ratio can be |
478 | - expressed as the ratio between 1 and an integer. |
479 | - |
480 | - The resulting histogram is as wide as the widest one. |
481 | - """ |
482 | - ratio = float(other.bins_size) / self.bins_size |
483 | - bins_count = max(self.bins_count, math.ceil(other.bins_count * ratio)) |
484 | - total = Histogram(int(bins_count), self.bins_size) |
485 | - total.count = self.count + other.count |
486 | - |
487 | - # Copy our bins into the total |
488 | - for idx, bin in enumerate(self.bins): |
489 | - total.bins[idx][1] = bin[1] |
490 | - |
491 | - assert int(ratio) == ratio or int(1/ratio) == 1/ratio, ( |
492 | - "We only support different bins size when the ratio is an " |
493 | - "integer to 1: " |
494 | - % ratio) |
495 | - |
496 | - if ratio >= 1: |
497 | - # We distribute the frequency across the bins. |
498 | - # For example. if the ratio is 3:1, we'll add a third |
499 | - # of the lower resolution bin to 3 of the higher one. |
500 | - for other_idx, bin in enumerate(other.bins): |
501 | - f = bin[1] / ratio |
502 | - start = int(math.floor(other_idx * ratio)) |
503 | - end = int(start + ratio) |
504 | - for idx in range(start, end): |
505 | - total.bins[idx][1] += f |
506 | - else: |
507 | - # We need to collect the higher resolution bins into the |
508 | - # corresponding lower one. |
509 | - for other_idx, bin in enumerate(other.bins): |
510 | - idx = int(other_idx * ratio) |
511 | - total.bins[idx][1] += bin[1] |
512 | - |
513 | - return total |
514 | - |
515 | - |
516 | -class RequestTimes: |
517 | - """Collect statistics from requests. |
518 | - |
519 | - Statistics are updated by calling the add_request() method. |
520 | - |
521 | - Statistics for mean/stddev/total/median for request times, SQL times and |
522 | - number of SQL statements are collected. |
523 | - |
524 | - They are grouped by Category, URL or PageID. |
525 | - """ |
526 | - |
527 | - def __init__(self, categories, options): |
528 | - self.by_pageids = options.pageids |
529 | - self.top_urls = options.top_urls |
530 | - # We only keep in memory 50 times the number of URLs we want to |
531 | - # return. The number of URLs can go pretty high (because of the |
532 | - # distinct query parameters). |
533 | - # |
534 | - # Keeping all in memory at once is prohibitive. On a small but |
535 | - # representative sample, keeping 50 times the possible number of |
536 | - # candidates and culling to 90% on overflow, generated an identical |
537 | - # report than keeping all the candidates in-memory. |
538 | - # |
539 | - # Keeping 10 times or culling at 90% generated a near-identical report |
540 | - # (it differed a little in the tail.) |
541 | - # |
542 | - # The size/cull parameters might need to change if the requests |
543 | - # distribution become very different than what it currently is. |
544 | - self.top_urls_cache_size = self.top_urls * 50 |
545 | - |
546 | - # Histogram has a bin per resolution up to our timeout |
547 | - #(and an extra bin). |
548 | - self.histogram_resolution = float(options.resolution) |
549 | - self.histogram_width = int( |
550 | - options.timeout / self.histogram_resolution) + 1 |
551 | - self.category_times = [ |
552 | - (category, OnlineStats( |
553 | - self.histogram_width, self.histogram_resolution)) |
554 | - for category in categories] |
555 | - self.url_times = {} |
556 | - self.pageid_times = {} |
557 | - |
558 | - def add_request(self, request): |
559 | - """Add request to the set of requests we collect stats for.""" |
560 | - matched = [] |
561 | - for category, stats in self.category_times: |
562 | - if category.match(request): |
563 | - stats.update(request) |
564 | - if category.partition: |
565 | - matched.append(category.title) |
566 | - |
567 | - if len(matched) > 1: |
568 | - log.warning( |
569 | - "Multiple partition categories matched by %s (%s)", |
570 | - request.url, ", ".join(matched)) |
571 | - elif not matched: |
572 | - log.warning("%s isn't part of the partition", request.url) |
573 | - |
574 | - if self.by_pageids: |
575 | - pageid = request.pageid or 'Unknown' |
576 | - stats = self.pageid_times.setdefault( |
577 | - pageid, OnlineStats( |
578 | - self.histogram_width, self.histogram_resolution)) |
579 | - stats.update(request) |
580 | - |
581 | - if self.top_urls: |
582 | - stats = self.url_times.setdefault( |
583 | - request.url, OnlineStats( |
584 | - self.histogram_width, self.histogram_resolution)) |
585 | - stats.update(request) |
586 | - # Whenever we have more URLs than we need to, discard 10% |
587 | - # that is less likely to end up in the top. |
588 | - if len(self.url_times) > self.top_urls_cache_size: |
589 | - cutoff = int(self.top_urls_cache_size*0.90) |
590 | - self.url_times = dict( |
591 | - sorted(self.url_times.items(), |
592 | - key=lambda (url, stats): stats.total_time, |
593 | - reverse=True)[:cutoff]) |
594 | - |
595 | - def get_category_times(self): |
596 | - """Return the times for each category.""" |
597 | - return self.category_times |
598 | - |
599 | - def get_top_urls_times(self): |
600 | - """Return the times for the Top URL by total time""" |
601 | - # Sort the result by total time |
602 | - return sorted( |
603 | - self.url_times.items(), |
604 | - key=lambda (url, stats): stats.total_time, |
605 | - reverse=True)[:self.top_urls] |
606 | - |
607 | - def get_pageid_times(self): |
608 | - """Return the times for the pageids.""" |
609 | - # Sort the result by pageid |
610 | - return sorted(self.pageid_times.items()) |
611 | - |
612 | - def __add__(self, other): |
613 | - """Merge two RequestTimes together.""" |
614 | - results = copy.deepcopy(self) |
615 | - for other_category, other_stats in other.category_times: |
616 | - for i, (category, stats) in enumerate(self.category_times): |
617 | - if category.title == other_category.title: |
618 | - results.category_times[i] = ( |
619 | - category, stats + other_stats) |
620 | - break |
621 | - else: |
622 | - results.category_times.append( |
623 | - (other_category, copy.deepcopy(other_stats))) |
624 | - |
625 | - url_times = results.url_times |
626 | - for url, stats in other.url_times.items(): |
627 | - if url in url_times: |
628 | - url_times[url] += stats |
629 | - else: |
630 | - url_times[url] = copy.deepcopy(stats) |
631 | - # Only keep top_urls_cache_size entries. |
632 | - if len(self.url_times) > self.top_urls_cache_size: |
633 | - self.url_times = dict( |
634 | - sorted( |
635 | - url_times.items(), |
636 | - key=lambda (url, stats): stats.total_time, |
637 | - reverse=True)[:self.top_urls_cache_size]) |
638 | - |
639 | - pageid_times = results.pageid_times |
640 | - for pageid, stats in other.pageid_times.items(): |
641 | - if pageid in pageid_times: |
642 | - pageid_times[pageid] += stats |
643 | - else: |
644 | - pageid_times[pageid] = copy.deepcopy(stats) |
645 | - |
646 | - return results |
647 | - |
648 | - |
649 | -def main(): |
650 | - parser = LPOptionParser("%prog [args] tracelog [...]") |
651 | - |
652 | - parser.add_option( |
653 | - "-c", "--config", dest="config", |
654 | - default=os.path.join( |
655 | - config.root, "utilities", "page-performance-report.ini"), |
656 | - metavar="FILE", help="Load configuration from FILE") |
657 | - parser.add_option( |
658 | - "--from", dest="from_ts", type="datetime", |
659 | - default=None, metavar="TIMESTAMP", |
660 | - help="Ignore log entries before TIMESTAMP") |
661 | - parser.add_option( |
662 | - "--until", dest="until_ts", type="datetime", |
663 | - default=None, metavar="TIMESTAMP", |
664 | - help="Ignore log entries after TIMESTAMP") |
665 | - parser.add_option( |
666 | - "--no-partition", dest="partition", |
667 | - action="store_false", default=True, |
668 | - help="Do not produce partition report") |
669 | - parser.add_option( |
670 | - "--no-categories", dest="categories", |
671 | - action="store_false", default=True, |
672 | - help="Do not produce categories report") |
673 | - parser.add_option( |
674 | - "--no-pageids", dest="pageids", |
675 | - action="store_false", default=True, |
676 | - help="Do not produce pageids report") |
677 | - parser.add_option( |
678 | - "--top-urls", dest="top_urls", type=int, metavar="N", |
679 | - default=50, help="Generate report for top N urls by hitcount.") |
680 | - parser.add_option( |
681 | - "--directory", dest="directory", |
682 | - default=os.getcwd(), metavar="DIR", |
683 | - help="Output reports in DIR directory") |
684 | - parser.add_option( |
685 | - "--timeout", dest="timeout", |
686 | - # Default to 9: our production timeout. |
687 | - default=9, type="int", metavar="SECONDS", |
688 | - help="The configured timeout value: used to determine high risk " + |
689 | - "page ids. That would be pages which 99% under render time is " |
690 | - "greater than timeoout - 2s. Default is %defaults.") |
691 | - parser.add_option( |
692 | - "--histogram-resolution", dest="resolution", |
693 | - # Default to 0.5s |
694 | - default=0.5, type="float", metavar="SECONDS", |
695 | - help="The resolution of the histogram bin width. Detault to " |
696 | - "%defaults.") |
697 | - parser.add_option( |
698 | - "--merge", dest="merge", |
699 | - default=False, action='store_true', |
700 | - help="Files are interpreted as pickled stats and are aggregated " + |
701 | - "for the report.") |
702 | - |
703 | - options, args = parser.parse_args() |
704 | - |
705 | - if not os.path.isdir(options.directory): |
706 | - parser.error("Directory %s does not exist" % options.directory) |
707 | - |
708 | - if len(args) == 0: |
709 | - parser.error("At least one zserver tracelog file must be provided") |
710 | - |
711 | - if options.from_ts is not None and options.until_ts is not None: |
712 | - if options.from_ts > options.until_ts: |
713 | - parser.error( |
714 | - "--from timestamp %s is before --until timestamp %s" |
715 | - % (options.from_ts, options.until_ts)) |
716 | - if options.from_ts is not None or options.until_ts is not None: |
717 | - if options.merge: |
718 | - parser.error('--from and --until cannot be used with --merge') |
719 | - |
720 | - for filename in args: |
721 | - if not os.path.exists(filename): |
722 | - parser.error("Tracelog file %s not found." % filename) |
723 | - |
724 | - if not os.path.exists(options.config): |
725 | - parser.error("Config file %s not found." % options.config) |
726 | - |
727 | - # Need a better config mechanism as ConfigParser doesn't preserve order. |
728 | - script_config = RawConfigParser() |
729 | - script_config.optionxform = str # Make keys case sensitive. |
730 | - script_config.readfp(open(options.config)) |
731 | - |
732 | - categories = [] # A list of Category, in report order. |
733 | - for option in script_config.options('categories'): |
734 | - regexp = script_config.get('categories', option) |
735 | - try: |
736 | - categories.append(Category(option, regexp)) |
737 | - except sre_constants.error as x: |
738 | - log.fatal("Unable to compile regexp %r (%s)" % (regexp, x)) |
739 | - return 1 |
740 | - categories.sort() |
741 | - |
742 | - if len(categories) == 0: |
743 | - parser.error("No data in [categories] section of configuration.") |
744 | - |
745 | - # Determine the categories making a partition of the requests |
746 | - for option in script_config.options('partition'): |
747 | - for category in categories: |
748 | - if category.title == option: |
749 | - category.partition = True |
750 | - break |
751 | - else: |
752 | - log.warning( |
753 | - "In partition definition: %s isn't a defined category", |
754 | - option) |
755 | - |
756 | - times = RequestTimes(categories, options) |
757 | - |
758 | - if options.merge: |
759 | - for filename in args: |
760 | - log.info('Merging %s...' % filename) |
761 | - f = bz2.BZ2File(filename, 'r') |
762 | - times += cPickle.load(f) |
763 | - f.close() |
764 | - else: |
765 | - parse(args, times, options) |
766 | - |
767 | - category_times = times.get_category_times() |
768 | - |
769 | - pageid_times = [] |
770 | - url_times= [] |
771 | - if options.top_urls: |
772 | - url_times = times.get_top_urls_times() |
773 | - if options.pageids: |
774 | - pageid_times = times.get_pageid_times() |
775 | - |
776 | - def _report_filename(filename): |
777 | - return os.path.join(options.directory, filename) |
778 | - |
779 | - # Partition report |
780 | - if options.partition: |
781 | - report_filename = _report_filename('partition.html') |
782 | - log.info("Generating %s", report_filename) |
783 | - partition_times = [ |
784 | - category_time |
785 | - for category_time in category_times |
786 | - if category_time[0].partition] |
787 | - html_report( |
788 | - open(report_filename, 'w'), partition_times, None, None, |
789 | - histogram_resolution=options.resolution, |
790 | - category_name='Partition') |
791 | - |
792 | - # Category only report. |
793 | - if options.categories: |
794 | - report_filename = _report_filename('categories.html') |
795 | - log.info("Generating %s", report_filename) |
796 | - html_report( |
797 | - open(report_filename, 'w'), category_times, None, None, |
798 | - histogram_resolution=options.resolution) |
799 | - |
800 | - # Pageid only report. |
801 | - if options.pageids: |
802 | - report_filename = _report_filename('pageids.html') |
803 | - log.info("Generating %s", report_filename) |
804 | - html_report( |
805 | - open(report_filename, 'w'), None, pageid_times, None, |
806 | - histogram_resolution=options.resolution) |
807 | - |
808 | - # Top URL only report. |
809 | - if options.top_urls: |
810 | - report_filename = _report_filename('top%d.html' % options.top_urls) |
811 | - log.info("Generating %s", report_filename) |
812 | - html_report( |
813 | - open(report_filename, 'w'), None, None, url_times, |
814 | - histogram_resolution=options.resolution) |
815 | - |
816 | - # Combined report. |
817 | - if options.categories and options.pageids: |
818 | - report_filename = _report_filename('combined.html') |
819 | - html_report( |
820 | - open(report_filename, 'w'), |
821 | - category_times, pageid_times, url_times, |
822 | - histogram_resolution=options.resolution) |
823 | - |
824 | - # Report of likely timeout candidates |
825 | - report_filename = _report_filename('timeout-candidates.html') |
826 | - log.info("Generating %s", report_filename) |
827 | - html_report( |
828 | - open(report_filename, 'w'), None, pageid_times, None, |
829 | - options.timeout - 2, |
830 | - histogram_resolution=options.resolution) |
831 | - |
832 | - # Save the times cache for later merging. |
833 | - report_filename = _report_filename('stats.pck.bz2') |
834 | - log.info("Saving times database in %s", report_filename) |
835 | - stats_file = bz2.BZ2File(report_filename, 'w') |
836 | - cPickle.dump(times, stats_file, protocol=cPickle.HIGHEST_PROTOCOL) |
837 | - stats_file.close() |
838 | - |
839 | - # Output metrics for selected categories. |
840 | - report_filename = _report_filename('metrics.dat') |
841 | - log.info('Saving category_metrics %s', report_filename) |
842 | - metrics_file = open(report_filename, 'w') |
843 | - writer = csv.writer(metrics_file, delimiter=':') |
844 | - date = options.until_ts or options.from_ts or datetime.utcnow() |
845 | - date = time.mktime(date.timetuple()) |
846 | - |
847 | - for option in script_config.options('metrics'): |
848 | - name = script_config.get('metrics', option) |
849 | - for category, stats in category_times: |
850 | - if category.title == name: |
851 | - writer.writerows([ |
852 | - ("%s_99" % option, "%f@%d" % ( |
853 | - stats.ninetyninth_percentile_time, date)), |
854 | - ("%s_hits" % option, "%d@%d" % (stats.total_hits, date))]) |
855 | - break |
856 | - else: |
857 | - log.warning("Can't find category %s for metric %s" % ( |
858 | - option, name)) |
859 | - metrics_file.close() |
860 | - |
861 | - return 0 |
862 | - |
863 | - |
864 | -def smart_open(filename, mode='r'): |
865 | - """Open a file, transparently handling compressed files. |
866 | - |
867 | - Compressed files are detected by file extension. |
868 | - """ |
869 | - ext = os.path.splitext(filename)[1] |
870 | - if ext == '.bz2': |
871 | - return bz2.BZ2File(filename, 'r') |
872 | - elif ext == '.gz': |
873 | - return gzip.GzipFile(filename, 'r') |
874 | - else: |
875 | - return open(filename, mode) |
876 | - |
877 | - |
878 | -class MalformedLine(Exception): |
879 | - """A malformed line was found in the trace log.""" |
880 | - |
881 | - |
882 | -_ts_re = re.compile( |
883 | - '^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?:.(\d{6}))?$') |
884 | - |
885 | - |
886 | -def parse_timestamp(ts_string): |
887 | - match = _ts_re.search(ts_string) |
888 | - if match is None: |
889 | - raise ValueError("Invalid timestamp") |
890 | - return datetime( |
891 | - *(int(elem) for elem in match.groups() if elem is not None)) |
892 | - |
893 | - |
894 | -def parse(tracefiles, times, options): |
895 | - requests = {} |
896 | - total_requests = 0 |
897 | - for tracefile in tracefiles: |
898 | - log.info('Processing %s', tracefile) |
899 | - for line in smart_open(tracefile): |
900 | - line = line.rstrip() |
901 | - try: |
902 | - record = line.split(' ', 7) |
903 | - try: |
904 | - record_type, request_id, date, time_ = record[:4] |
905 | - except ValueError: |
906 | - raise MalformedLine() |
907 | - |
908 | - if record_type == 'S': |
909 | - # Short circuit - we don't care about these entries. |
910 | - continue |
911 | - |
912 | - # Parse the timestamp. |
913 | - ts_string = '%s %s' % (date, time_) |
914 | - try: |
915 | - dt = parse_timestamp(ts_string) |
916 | - except ValueError: |
917 | - raise MalformedLine( |
918 | - 'Invalid timestamp %s' % repr(ts_string)) |
919 | - |
920 | - # Filter entries by command line date range. |
921 | - if options.from_ts is not None and dt < options.from_ts: |
922 | - continue # Skip to next line. |
923 | - if options.until_ts is not None and dt > options.until_ts: |
924 | - break # Skip to next log file. |
925 | - |
926 | - args = record[4:] |
927 | - |
928 | - def require_args(count): |
929 | - if len(args) < count: |
930 | - raise MalformedLine() |
931 | - |
932 | - if record_type == 'B': # Request begins. |
933 | - require_args(2) |
934 | - requests[request_id] = Request(dt, args[0], args[1]) |
935 | - continue |
936 | - |
937 | - request = requests.get(request_id, None) |
938 | - if request is None: # Just ignore partial records. |
939 | - continue |
940 | - |
941 | - # Old stype extension record from Launchpad. Just |
942 | - # contains the URL. |
943 | - if (record_type == '-' and len(args) == 1 |
944 | - and args[0].startswith('http')): |
945 | - request.url = args[0] |
946 | - |
947 | - # New style extension record with a prefix. |
948 | - elif record_type == '-': |
949 | - # Launchpad outputs several things as tracelog |
950 | - # extension records. We include a prefix to tell |
951 | - # them apart. |
952 | - require_args(1) |
953 | - |
954 | - parse_extension_record(request, args) |
955 | - |
956 | - elif record_type == 'I': # Got request input. |
957 | - require_args(1) |
958 | - request.I(dt, args[0]) |
959 | - |
960 | - elif record_type == 'C': # Entered application thread. |
961 | - request.C(dt) |
962 | - |
963 | - elif record_type == 'A': # Application done. |
964 | - require_args(2) |
965 | - request.A(dt, args[0], args[1]) |
966 | - |
967 | - elif record_type == 'E': # Request done. |
968 | - del requests[request_id] |
969 | - request.E(dt) |
970 | - total_requests += 1 |
971 | - if total_requests % 10000 == 0: |
972 | - log.debug("Parsed %d requests", total_requests) |
973 | - |
974 | - # Add the request to any matching categories. |
975 | - times.add_request(request) |
976 | - else: |
977 | - raise MalformedLine('Unknown record type %s', record_type) |
978 | - except MalformedLine as x: |
979 | - log.error( |
980 | - "Malformed line %s (%s)" % (repr(line), x)) |
981 | - |
982 | - |
983 | -def parse_extension_record(request, args): |
984 | - """Decode a ZServer extension records and annotate request.""" |
985 | - prefix = args[0] |
986 | - |
987 | - if prefix == 'u': |
988 | - request.url = ' '.join(args[1:]) or None |
989 | - elif prefix == 'p': |
990 | - request.pageid = ' '.join(args[1:]) or None |
991 | - elif prefix == 't': |
992 | - if len(args) != 4: |
993 | - raise MalformedLine("Wrong number of arguments %s" % (args,)) |
994 | - request.sql_statements = int(args[2]) |
995 | - request.sql_seconds = float(args[3]) / 1000 |
996 | - else: |
997 | - raise MalformedLine( |
998 | - "Unknown extension prefix %s" % prefix) |
999 | - |
1000 | - |
1001 | -def html_report( |
1002 | - outf, category_times, pageid_times, url_times, |
1003 | - ninetyninth_percentile_threshold=None, histogram_resolution=0.5, |
1004 | - category_name='Category'): |
1005 | - """Write an html report to outf. |
1006 | - |
1007 | - :param outf: A file object to write the report to. |
1008 | - :param category_times: The time statistics for categories. |
1009 | - :param pageid_times: The time statistics for pageids. |
1010 | - :param url_times: The time statistics for the top XXX urls. |
1011 | - :param ninetyninth_percentile_threshold: Lower threshold for inclusion of |
1012 | - pages in the pageid section; pages where 99 percent of the requests are |
1013 | - served under this threshold will not be included. |
1014 | - :param histogram_resolution: used as the histogram bar width |
1015 | - :param category_name: The name to use for category report. Defaults to |
1016 | - 'Category'. |
1017 | - """ |
1018 | - |
1019 | - print >> outf, dedent('''\ |
1020 | - <!DOCTYPE html> |
1021 | - <html> |
1022 | - <head> |
1023 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> |
1024 | - <title>Launchpad Page Performance Report %(date)s</title> |
1025 | - <script language="javascript" type="text/javascript" |
1026 | - src="https://devpad.canonical.com/~lpqateam/ppr/js/flot/jquery.min.js" |
1027 | - ></script> |
1028 | - <script language="javascript" type="text/javascript" |
1029 | - src="https://devpad.canonical.com/~lpqateam/ppr/js/jquery.appear-1.1.1.min.js" |
1030 | - ></script> |
1031 | - <script language="javascript" type="text/javascript" |
1032 | - src="https://devpad.canonical.com/~lpqateam/ppr/js/flot/jquery.flot.min.js" |
1033 | - ></script> |
1034 | - <script language="javascript" type="text/javascript" |
1035 | - src="https://devpad.canonical.com/~lpqateam/ppr/js/sorttable.js"></script> |
1036 | - <style type="text/css"> |
1037 | - h3 { font-weight: normal; font-size: 1em; } |
1038 | - thead th { padding-left: 1em; padding-right: 1em; } |
1039 | - .category-title { text-align: right; padding-right: 2em; |
1040 | - max-width: 25em; } |
1041 | - .regexp { font-size: x-small; font-weight: normal; } |
1042 | - .mean { text-align: right; padding-right: 1em; } |
1043 | - .median { text-align: right; padding-right: 1em; } |
1044 | - .standard-deviation { text-align: right; padding-right: 1em; } |
1045 | - .histogram { padding: 0.5em 1em; width:400px; height:250px; } |
1046 | - .odd-row { background-color: #eeeeff; } |
1047 | - .even-row { background-color: #ffffee; } |
1048 | - table.sortable thead { |
1049 | - background-color:#eee; |
1050 | - color:#666666; |
1051 | - font-weight: bold; |
1052 | - cursor: default; |
1053 | - } |
1054 | - td.numeric { |
1055 | - font-family: monospace; |
1056 | - text-align: right; |
1057 | - padding: 1em; |
1058 | - } |
1059 | - .clickable { cursor: hand; } |
1060 | - .total-hits, .histogram, .median-sqltime, |
1061 | - .median-sqlstatements { border-right: 1px dashed #000000; } |
1062 | - </style> |
1063 | - </head> |
1064 | - <body> |
1065 | - <h1>Launchpad Page Performance Report</h1> |
1066 | - <h3>%(date)s</h3> |
1067 | - ''' % {'date': time.ctime()}) |
1068 | - |
1069 | - table_header = dedent('''\ |
1070 | - <table class="sortable page-performance-report"> |
1071 | - <caption align="top">Click on column headings to sort.</caption> |
1072 | - <thead> |
1073 | - <tr> |
1074 | - <th class="clickable">Name</th> |
1075 | - |
1076 | - <th class="clickable">Total Hits</th> |
1077 | - |
1078 | - <th class="clickable">99% Under Time (secs)</th> |
1079 | - |
1080 | - <th class="clickable">Mean Time (secs)</th> |
1081 | - <th class="clickable">Time Standard Deviation</th> |
1082 | - <th class="clickable">Median Time (secs)</th> |
1083 | - <th class="sorttable_nosort">Time Distribution</th> |
1084 | - |
1085 | - <th class="clickable">99% Under SQL Time (secs)</th> |
1086 | - <th class="clickable">Mean SQL Time (secs)</th> |
1087 | - <th class="clickable">SQL Time Standard Deviation</th> |
1088 | - <th class="clickable">Median SQL Time (secs)</th> |
1089 | - |
1090 | - <th class="clickable">99% Under SQL Statements</th> |
1091 | - <th class="clickable">Mean SQL Statements</th> |
1092 | - <th class="clickable">SQL Statement Standard Deviation</th> |
1093 | - <th class="clickable">Median SQL Statements</th> |
1094 | - |
1095 | - <th class="clickable">Hits * 99% Under SQL Statement</th> |
1096 | - </tr> |
1097 | - </thead> |
1098 | - <tbody> |
1099 | - ''') |
1100 | - table_footer = "</tbody></table>" |
1101 | - |
1102 | - # Store our generated histograms to output Javascript later. |
1103 | - histograms = [] |
1104 | - |
1105 | - def handle_times(html_title, stats): |
1106 | - histograms.append(stats.histogram) |
1107 | - print >> outf, dedent("""\ |
1108 | - <tr> |
1109 | - <th class="category-title">%s</th> |
1110 | - <td class="numeric total-hits">%d</td> |
1111 | - <td class="numeric 99pc-under-time">%.2f</td> |
1112 | - <td class="numeric mean-time">%.2f</td> |
1113 | - <td class="numeric std-time">%.2f</td> |
1114 | - <td class="numeric median-time">%.2f</td> |
1115 | - <td> |
1116 | - <div class="histogram" id="histogram%d"></div> |
1117 | - </td> |
1118 | - <td class="numeric 99pc-under-sqltime">%.2f</td> |
1119 | - <td class="numeric mean-sqltime">%.2f</td> |
1120 | - <td class="numeric std-sqltime">%.2f</td> |
1121 | - <td class="numeric median-sqltime">%.2f</td> |
1122 | - |
1123 | - <td class="numeric 99pc-under-sqlstatement">%.f</td> |
1124 | - <td class="numeric mean-sqlstatements">%.2f</td> |
1125 | - <td class="numeric std-sqlstatements">%.2f</td> |
1126 | - <td class="numeric median-sqlstatements">%.2f</td> |
1127 | - |
1128 | - <td class="numeric high-db-usage">%.f</td> |
1129 | - </tr> |
1130 | - """ % ( |
1131 | - html_title, |
1132 | - stats.total_hits, stats.ninetyninth_percentile_time, |
1133 | - stats.mean, stats.std, stats.median, |
1134 | - len(histograms) - 1, |
1135 | - stats.ninetyninth_percentile_sqltime, stats.mean_sqltime, |
1136 | - stats.std_sqltime, stats.median_sqltime, |
1137 | - stats.ninetyninth_percentile_sqlstatements, |
1138 | - stats.mean_sqlstatements, |
1139 | - stats.std_sqlstatements, stats.median_sqlstatements, |
1140 | - stats.ninetyninth_percentile_sqlstatements* stats.total_hits, |
1141 | - )) |
1142 | - |
1143 | - # Table of contents |
1144 | - print >> outf, '<ol>' |
1145 | - if category_times: |
1146 | - print >> outf, '<li><a href="#catrep">%s Report</a></li>' % ( |
1147 | - category_name) |
1148 | - if pageid_times: |
1149 | - print >> outf, '<li><a href="#pageidrep">Pageid Report</a></li>' |
1150 | - if url_times: |
1151 | - print >> outf, '<li><a href="#topurlrep">Top URL Report</a></li>' |
1152 | - print >> outf, '</ol>' |
1153 | - |
1154 | - if category_times: |
1155 | - print >> outf, '<h2 id="catrep">%s Report</h2>' % ( |
1156 | - category_name) |
1157 | - print >> outf, table_header |
1158 | - for category, times in category_times: |
1159 | - html_title = '%s<br/><span class="regexp">%s</span>' % ( |
1160 | - html_quote(category.title), html_quote(category.regexp)) |
1161 | - handle_times(html_title, times) |
1162 | - print >> outf, table_footer |
1163 | - |
1164 | - if pageid_times: |
1165 | - print >> outf, '<h2 id="pageidrep">Pageid Report</h2>' |
1166 | - print >> outf, table_header |
1167 | - for pageid, times in pageid_times: |
1168 | - if (ninetyninth_percentile_threshold is not None and |
1169 | - (times.ninetyninth_percentile_time < |
1170 | - ninetyninth_percentile_threshold)): |
1171 | - continue |
1172 | - handle_times(html_quote(pageid), times) |
1173 | - print >> outf, table_footer |
1174 | - |
1175 | - if url_times: |
1176 | - print >> outf, '<h2 id="topurlrep">Top URL Report</h2>' |
1177 | - print >> outf, table_header |
1178 | - for url, times in url_times: |
1179 | - handle_times(html_quote(url), times) |
1180 | - print >> outf, table_footer |
1181 | - |
1182 | - # Ourput the javascript to render our histograms nicely, replacing |
1183 | - # the placeholder <div> tags output earlier. |
1184 | - print >> outf, dedent("""\ |
1185 | - <script language="javascript" type="text/javascript"> |
1186 | - $(function () { |
1187 | - var options = { |
1188 | - series: { |
1189 | - bars: {show: true, barWidth: %s} |
1190 | - }, |
1191 | - xaxis: { |
1192 | - tickFormatter: function (val, axis) { |
1193 | - return val.toFixed(axis.tickDecimals) + "s"; |
1194 | - } |
1195 | - }, |
1196 | - yaxis: { |
1197 | - min: 0, |
1198 | - max: 1, |
1199 | - transform: function (v) { |
1200 | - return Math.pow(Math.log(v*100+1)/Math.LN2, 0.5); |
1201 | - }, |
1202 | - inverseTransform: function (v) { |
1203 | - return Math.pow(Math.exp(v*100+1)/Math.LN2, 2); |
1204 | - }, |
1205 | - tickDecimals: 1, |
1206 | - tickFormatter: function (val, axis) { |
1207 | - return (val * 100).toFixed(axis.tickDecimals) + "%%"; |
1208 | - }, |
1209 | - ticks: [0.001,0.01,0.10,0.50,1.0] |
1210 | - }, |
1211 | - grid: { |
1212 | - aboveData: true, |
1213 | - labelMargin: 15 |
1214 | - } |
1215 | - }; |
1216 | - """ % histogram_resolution) |
1217 | - |
1218 | - for i, histogram in enumerate(histograms): |
1219 | - if histogram.count == 0: |
1220 | - continue |
1221 | - print >> outf, dedent("""\ |
1222 | - function plot_histogram_%(id)d() { |
1223 | - var d = %(data)s; |
1224 | - |
1225 | - $.plot( |
1226 | - $("#histogram%(id)d"), |
1227 | - [{data: d}], options); |
1228 | - } |
1229 | - $('#histogram%(id)d').appear(function() { |
1230 | - plot_histogram_%(id)d(); |
1231 | - }); |
1232 | - |
1233 | - """ % {'id': i, 'data': json.dumps(histogram.bins_relative)}) |
1234 | - |
1235 | - print >> outf, dedent("""\ |
1236 | - }); |
1237 | - </script> |
1238 | - </body> |
1239 | - </html> |
1240 | - """) |
1241 | |
1242 | === removed file 'lib/lp/scripts/utilities/tests/test_pageperformancereport.py' |
1243 | --- lib/lp/scripts/utilities/tests/test_pageperformancereport.py 2011-08-12 11:37:08 +0000 |
1244 | +++ lib/lp/scripts/utilities/tests/test_pageperformancereport.py 1970-01-01 00:00:00 +0000 |
1245 | @@ -1,483 +0,0 @@ |
1246 | -# Copyright 2010 Canonical Ltd. This software is licensed under the |
1247 | -# GNU Affero General Public License version 3 (see the file LICENSE). |
1248 | - |
1249 | -"""Test the pageperformancereport script.""" |
1250 | - |
1251 | -__metaclass__ = type |
1252 | - |
1253 | -from lp.scripts.utilities.pageperformancereport import ( |
1254 | - Category, |
1255 | - Histogram, |
1256 | - OnlineApproximateMedian, |
1257 | - OnlineStats, |
1258 | - OnlineStatsCalculator, |
1259 | - RequestTimes, |
1260 | - Stats, |
1261 | - ) |
1262 | -from lp.testing import TestCase |
1263 | - |
1264 | - |
1265 | -class FakeOptions: |
1266 | - timeout = 5 |
1267 | - db_file = None |
1268 | - pageids = True |
1269 | - top_urls = 3 |
1270 | - resolution = 1 |
1271 | - |
1272 | - def __init__(self, **kwargs): |
1273 | - """Assign all arguments as attributes.""" |
1274 | - self.__dict__.update(kwargs) |
1275 | - |
1276 | - |
1277 | -class FakeRequest: |
1278 | - |
1279 | - def __init__(self, url, app_seconds, sql_statements=None, |
1280 | - sql_seconds=None, pageid=None): |
1281 | - self.url = url |
1282 | - self.pageid = pageid |
1283 | - self.app_seconds = app_seconds |
1284 | - self.sql_statements = sql_statements |
1285 | - self.sql_seconds = sql_seconds |
1286 | - |
1287 | - |
1288 | -class FakeStats(Stats): |
1289 | - |
1290 | - def __init__(self, **kwargs): |
1291 | - # Override the constructor to just store the values. |
1292 | - self.__dict__.update(kwargs) |
1293 | - |
1294 | - |
1295 | -FAKE_REQUESTS = [ |
1296 | - FakeRequest('/', 0.5, pageid='+root'), |
1297 | - FakeRequest('/bugs', 4.5, 56, 3.0, pageid='+bugs'), |
1298 | - FakeRequest('/bugs', 4.2, 56, 2.2, pageid='+bugs'), |
1299 | - FakeRequest('/bugs', 5.5, 76, 4.0, pageid='+bugs'), |
1300 | - FakeRequest('/ubuntu', 2.5, 6, 2.0, pageid='+distribution'), |
1301 | - FakeRequest('/launchpad', 3.5, 3, 3.0, pageid='+project'), |
1302 | - FakeRequest('/bzr', 2.5, 4, 2.0, pageid='+project'), |
1303 | - FakeRequest('/bugs/1', 20.5, 567, 14.0, pageid='+bug'), |
1304 | - FakeRequest('/bugs/1', 15.5, 567, 9.0, pageid='+bug'), |
1305 | - FakeRequest('/bugs/5', 1.5, 30, 1.2, pageid='+bug'), |
1306 | - FakeRequest('/lazr', 1.0, 16, 0.3, pageid='+project'), |
1307 | - FakeRequest('/drizzle', 0.9, 11, 1.3, pageid='+project'), |
1308 | - ] |
1309 | - |
1310 | - |
1311 | -# The category stats computed for the above 12 requests. |
1312 | -CATEGORY_STATS = [ |
1313 | - # Median is an approximation. |
1314 | - # Real values are: 2.50, 2.20, 30 |
1315 | - (Category('All', ''), FakeStats( |
1316 | - total_hits=12, total_time=62.60, mean=5.22, median=4.20, std=5.99, |
1317 | - total_sqltime=42, mean_sqltime=3.82, median_sqltime=3.0, |
1318 | - std_sqltime=3.89, |
1319 | - total_sqlstatements=1392, mean_sqlstatements=126.55, |
1320 | - median_sqlstatements=56, std_sqlstatements=208.94, |
1321 | - histogram=[[0, 2], [1, 2], [2, 2], [3, 1], [4, 2], [5, 3]], |
1322 | - )), |
1323 | - (Category('Test', ''), FakeStats( |
1324 | - histogram=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]])), |
1325 | - (Category('Bugs', ''), FakeStats( |
1326 | - total_hits=6, total_time=51.70, mean=8.62, median=4.5, std=6.90, |
1327 | - total_sqltime=33.40, mean_sqltime=5.57, median_sqltime=3, |
1328 | - std_sqltime=4.52, |
1329 | - total_sqlstatements=1352, mean_sqlstatements=225.33, |
1330 | - median_sqlstatements=56, std_sqlstatements=241.96, |
1331 | - histogram=[[0, 0], [1, 1], [2, 0], [3, 0], [4, 2], [5, 3]], |
1332 | - )), |
1333 | - ] |
1334 | - |
1335 | - |
1336 | -# The top 3 URL stats computed for the above 12 requests. |
1337 | -TOP_3_URL_STATS = [ |
1338 | - ('/bugs/1', FakeStats( |
1339 | - total_hits=2, total_time=36.0, mean=18.0, median=15.5, std=2.50, |
1340 | - total_sqltime=23.0, mean_sqltime=11.5, median_sqltime=9.0, |
1341 | - std_sqltime=2.50, |
1342 | - total_sqlstatements=1134, mean_sqlstatements=567.0, |
1343 | - median_sqlstatements=567, std_statements=0, |
1344 | - histogram=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0], [5, 2]], |
1345 | - )), |
1346 | - ('/bugs', FakeStats( |
1347 | - total_hits=3, total_time=14.2, mean=4.73, median=4.5, std=0.56, |
1348 | - total_sqltime=9.2, mean_sqltime=3.07, median_sqltime=3, |
1349 | - std_sqltime=0.74, |
1350 | - total_sqlstatements=188, mean_sqlstatements=62.67, |
1351 | - median_sqlstatements=56, std_sqlstatements=9.43, |
1352 | - histogram=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 2], [5, 1]], |
1353 | - )), |
1354 | - ('/launchpad', FakeStats( |
1355 | - total_hits=1, total_time=3.5, mean=3.5, median=3.5, std=0, |
1356 | - total_sqltime=3.0, mean_sqltime=3, median_sqltime=3, std_sqltime=0, |
1357 | - total_sqlstatements=3, mean_sqlstatements=3, |
1358 | - median_sqlstatements=3, std_sqlstatements=0, |
1359 | - histogram=[[0, 0], [1, 0], [2, 0], [3, 1], [4, 0], [5, 0]], |
1360 | - )), |
1361 | - ] |
1362 | - |
1363 | - |
1364 | -# The pageid stats computed for the above 12 requests. |
1365 | -PAGEID_STATS = [ |
1366 | - ('+bug', FakeStats( |
1367 | - total_hits=3, total_time=37.5, mean=12.5, median=15.5, std=8.04, |
1368 | - total_sqltime=24.2, mean_sqltime=8.07, median_sqltime=9, |
1369 | - std_sqltime=5.27, |
1370 | - total_sqlstatements=1164, mean_sqlstatements=388, |
1371 | - median_sqlstatements=567, std_sqlstatements=253.14, |
1372 | - histogram=[[0, 0], [1, 1], [2, 0], [3, 0], [4, 0], [5, 2]], |
1373 | - )), |
1374 | - ('+bugs', FakeStats( |
1375 | - total_hits=3, total_time=14.2, mean=4.73, median=4.5, std=0.56, |
1376 | - total_sqltime=9.2, mean_sqltime=3.07, median_sqltime=3, |
1377 | - std_sqltime=0.74, |
1378 | - total_sqlstatements=188, mean_sqlstatements=62.67, |
1379 | - median_sqlstatements=56, std_sqlstatements=9.43, |
1380 | - histogram=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 2], [5, 1]], |
1381 | - )), |
1382 | - ('+distribution', FakeStats( |
1383 | - total_hits=1, total_time=2.5, mean=2.5, median=2.5, std=0, |
1384 | - total_sqltime=2.0, mean_sqltime=2, median_sqltime=2, std_sqltime=0, |
1385 | - total_sqlstatements=6, mean_sqlstatements=6, |
1386 | - median_sqlstatements=6, std_sqlstatements=0, |
1387 | - histogram=[[0, 0], [1, 0], [2, 1], [3, 0], [4, 0], [5, 0]], |
1388 | - )), |
1389 | - ('+project', FakeStats( |
1390 | - total_hits=4, total_time=7.9, mean=1.98, median=1, std=1.08, |
1391 | - total_sqltime=6.6, mean_sqltime=1.65, median_sqltime=1.3, |
1392 | - std_sqltime=0.99, |
1393 | - total_sqlstatements=34, mean_sqlstatements=8.5, |
1394 | - median_sqlstatements=4, std_sqlstatements=5.32, |
1395 | - histogram=[[0, 1], [1, 1], [2, 1], [3, 1], [4, 0], [5, 0]], |
1396 | - )), |
1397 | - ('+root', FakeStats( |
1398 | - total_hits=1, total_time=0.5, mean=0.5, median=0.5, std=0, |
1399 | - histogram=[[0, 1], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], |
1400 | - )), |
1401 | - ] |
1402 | - |
1403 | - |
1404 | -class TestRequestTimes(TestCase): |
1405 | - """Tests the RequestTimes backend.""" |
1406 | - |
1407 | - def setUp(self): |
1408 | - TestCase.setUp(self) |
1409 | - self.categories = [ |
1410 | - Category('All', '.*'), Category('Test', '.*test.*'), |
1411 | - Category('Bugs', '.*bugs.*')] |
1412 | - self.db = RequestTimes(self.categories, FakeOptions()) |
1413 | - |
1414 | - def setUpRequests(self): |
1415 | - """Insert some requests into the db.""" |
1416 | - for r in FAKE_REQUESTS: |
1417 | - self.db.add_request(r) |
1418 | - |
1419 | - def assertStatsAreEquals(self, expected, results): |
1420 | - self.assertEquals( |
1421 | - len(expected), len(results), 'Wrong number of results') |
1422 | - for idx in range(len(results)): |
1423 | - self.assertEquals(expected[idx][0], results[idx][0], |
1424 | - "Wrong key for results %d" % idx) |
1425 | - key = results[idx][0] |
1426 | - self.assertEquals(expected[idx][1].text(), results[idx][1].text(), |
1427 | - "Wrong stats for results %d (%s)" % (idx, key)) |
1428 | - self.assertEquals( |
1429 | - Histogram.from_bins_data(expected[idx][1].histogram), |
1430 | - results[idx][1].histogram, |
1431 | - "Wrong histogram for results %d (%s)" % (idx, key)) |
1432 | - |
1433 | - def test_get_category_times(self): |
1434 | - self.setUpRequests() |
1435 | - category_times = self.db.get_category_times() |
1436 | - self.assertStatsAreEquals(CATEGORY_STATS, category_times) |
1437 | - |
1438 | - def test_get_url_times(self): |
1439 | - self.setUpRequests() |
1440 | - url_times = self.db.get_top_urls_times() |
1441 | - self.assertStatsAreEquals(TOP_3_URL_STATS, url_times) |
1442 | - |
1443 | - def test_get_pageid_times(self): |
1444 | - self.setUpRequests() |
1445 | - pageid_times = self.db.get_pageid_times() |
1446 | - self.assertStatsAreEquals(PAGEID_STATS, pageid_times) |
1447 | - |
1448 | - def test___add__(self): |
1449 | - # Ensure that adding two RequestTimes together result in |
1450 | - # a merge of their constituencies. |
1451 | - db1 = self.db |
1452 | - db2 = RequestTimes(self.categories, FakeOptions()) |
1453 | - db1.add_request(FakeRequest('/', 1.5, 5, 1.0, '+root')) |
1454 | - db1.add_request(FakeRequest('/bugs', 3.5, 15, 1.0, '+bugs')) |
1455 | - db2.add_request(FakeRequest('/bugs/1', 5.0, 30, 4.0, '+bug')) |
1456 | - results = db1 + db2 |
1457 | - self.assertEquals(3, results.category_times[0][1].total_hits) |
1458 | - self.assertEquals(0, results.category_times[1][1].total_hits) |
1459 | - self.assertEquals(2, results.category_times[2][1].total_hits) |
1460 | - self.assertEquals(1, results.pageid_times['+root'].total_hits) |
1461 | - self.assertEquals(1, results.pageid_times['+bugs'].total_hits) |
1462 | - self.assertEquals(1, results.pageid_times['+bug'].total_hits) |
1463 | - self.assertEquals(1, results.url_times['/'].total_hits) |
1464 | - self.assertEquals(1, results.url_times['/bugs'].total_hits) |
1465 | - self.assertEquals(1, results.url_times['/bugs/1'].total_hits) |
1466 | - |
1467 | - def test_histogram_init_with_resolution(self): |
1468 | - # Test that the resolution parameter increase the number of bins |
1469 | - db = RequestTimes( |
1470 | - self.categories, FakeOptions(timeout=4, resolution=1)) |
1471 | - self.assertEquals(5, db.histogram_width) |
1472 | - self.assertEquals(1, db.histogram_resolution) |
1473 | - db = RequestTimes( |
1474 | - self.categories, FakeOptions(timeout=4, resolution=0.5)) |
1475 | - self.assertEquals(9, db.histogram_width) |
1476 | - self.assertEquals(0.5, db.histogram_resolution) |
1477 | - db = RequestTimes( |
1478 | - self.categories, FakeOptions(timeout=4, resolution=2)) |
1479 | - self.assertEquals(3, db.histogram_width) |
1480 | - self.assertEquals(2, db.histogram_resolution) |
1481 | - |
1482 | - |
1483 | -class TestOnlineStats(TestCase): |
1484 | - """Tests for the OnlineStats class.""" |
1485 | - |
1486 | - def test___add__(self): |
1487 | - # Ensure that adding two OnlineStats merge all their constituencies. |
1488 | - stats1 = OnlineStats(4, 1) |
1489 | - stats1.update(FakeRequest('/', 2.0, 5, 1.5)) |
1490 | - stats2 = OnlineStats(4, 1) |
1491 | - stats2.update(FakeRequest('/', 1.5, 2, 3.0)) |
1492 | - stats2.update(FakeRequest('/', 5.0, 2, 2.0)) |
1493 | - results = stats1 + stats2 |
1494 | - self.assertEquals(3, results.total_hits) |
1495 | - self.assertEquals(2, results.median) |
1496 | - self.assertEquals(9, results.total_sqlstatements) |
1497 | - self.assertEquals(2, results.median_sqlstatements) |
1498 | - self.assertEquals(6.5, results.total_sqltime) |
1499 | - self.assertEquals(2.0, results.median_sqltime) |
1500 | - self.assertEquals( |
1501 | - Histogram.from_bins_data([[0, 0], [1, 1], [2, 1], [3, 1]]), |
1502 | - results.histogram) |
1503 | - |
1504 | - |
1505 | -class TestOnlineStatsCalculator(TestCase): |
1506 | - """Tests for the online stats calculator.""" |
1507 | - |
1508 | - def setUp(self): |
1509 | - TestCase.setUp(self) |
1510 | - self.stats = OnlineStatsCalculator() |
1511 | - |
1512 | - def test_stats_for_empty_set(self): |
1513 | - # Test the stats when there is no input. |
1514 | - self.assertEquals(0, self.stats.count) |
1515 | - self.assertEquals(0, self.stats.sum) |
1516 | - self.assertEquals(0, self.stats.mean) |
1517 | - self.assertEquals(0, self.stats.variance) |
1518 | - self.assertEquals(0, self.stats.std) |
1519 | - |
1520 | - def test_stats_for_one_value(self): |
1521 | - # Test the stats when adding one element. |
1522 | - self.stats.update(5) |
1523 | - self.assertEquals(1, self.stats.count) |
1524 | - self.assertEquals(5, self.stats.sum) |
1525 | - self.assertEquals(5, self.stats.mean) |
1526 | - self.assertEquals(0, self.stats.variance) |
1527 | - self.assertEquals(0, self.stats.std) |
1528 | - |
1529 | - def test_None_are_ignored(self): |
1530 | - self.stats.update(None) |
1531 | - self.assertEquals(0, self.stats.count) |
1532 | - |
1533 | - def test_stats_for_3_values(self): |
1534 | - for x in [3, 6, 9]: |
1535 | - self.stats.update(x) |
1536 | - self.assertEquals(3, self.stats.count) |
1537 | - self.assertEquals(18, self.stats.sum) |
1538 | - self.assertEquals(6, self.stats.mean) |
1539 | - self.assertEquals(6, self.stats.variance) |
1540 | - self.assertEquals("2.45", "%.2f" % self.stats.std) |
1541 | - |
1542 | - def test___add___two_empty_together(self): |
1543 | - stats2 = OnlineStatsCalculator() |
1544 | - results = self.stats + stats2 |
1545 | - self.assertEquals(0, results.count) |
1546 | - self.assertEquals(0, results.sum) |
1547 | - self.assertEquals(0, results.mean) |
1548 | - self.assertEquals(0, results.variance) |
1549 | - |
1550 | - def test___add___one_empty(self): |
1551 | - stats2 = OnlineStatsCalculator() |
1552 | - for x in [1, 2, 3]: |
1553 | - self.stats.update(x) |
1554 | - results = self.stats + stats2 |
1555 | - self.assertEquals(3, results.count) |
1556 | - self.assertEquals(6, results.sum) |
1557 | - self.assertEquals(2, results.mean) |
1558 | - self.assertEquals(2, results.M2) |
1559 | - |
1560 | - def test___add__(self): |
1561 | - stats2 = OnlineStatsCalculator() |
1562 | - for x in [3, 6, 9]: |
1563 | - self.stats.update(x) |
1564 | - for x in [1, 2, 3]: |
1565 | - stats2.update(x) |
1566 | - results = self.stats + stats2 |
1567 | - self.assertEquals(6, results.count) |
1568 | - self.assertEquals(24, results.sum) |
1569 | - self.assertEquals(4, results.mean) |
1570 | - self.assertEquals(44, results.M2) |
1571 | - |
1572 | - |
1573 | -SHUFFLE_RANGE_100 = [ |
1574 | - 25, 79, 99, 76, 60, 63, 87, 77, 51, 82, 42, 96, 93, 58, 32, 66, 75, |
1575 | - 2, 26, 22, 11, 73, 61, 83, 65, 68, 44, 81, 64, 3, 33, 34, 15, 1, |
1576 | - 92, 27, 90, 74, 46, 57, 59, 31, 13, 19, 89, 29, 56, 94, 50, 49, 62, |
1577 | - 37, 21, 35, 5, 84, 88, 16, 8, 23, 40, 6, 48, 10, 97, 0, 53, 17, 30, |
1578 | - 18, 43, 86, 12, 71, 38, 78, 36, 7, 45, 47, 80, 54, 39, 91, 98, 24, |
1579 | - 55, 14, 52, 20, 69, 85, 95, 28, 4, 9, 67, 70, 41, 72, |
1580 | - ] |
1581 | - |
1582 | - |
1583 | -class TestOnlineApproximateMedian(TestCase): |
1584 | - """Tests for the approximate median computation.""" |
1585 | - |
1586 | - def setUp(self): |
1587 | - TestCase.setUp(self) |
1588 | - self.estimator = OnlineApproximateMedian() |
1589 | - |
1590 | - def test_median_is_0_when_no_input(self): |
1591 | - self.assertEquals(0, self.estimator.median) |
1592 | - |
1593 | - def test_median_is_true_median_for_n_lower_than_bucket_size(self): |
1594 | - for x in range(9): |
1595 | - self.estimator.update(x) |
1596 | - self.assertEquals(4, self.estimator.median) |
1597 | - |
1598 | - def test_None_input_is_ignored(self): |
1599 | - self.estimator.update(1) |
1600 | - self.estimator.update(None) |
1601 | - self.assertEquals(1, self.estimator.median) |
1602 | - |
1603 | - def test_approximate_median_is_good_enough(self): |
1604 | - for x in SHUFFLE_RANGE_100: |
1605 | - self.estimator.update(x) |
1606 | - # True median is 50, 49 is good enough :-) |
1607 | - self.assertIn(self.estimator.median, range(49,52)) |
1608 | - |
1609 | - def test___add__(self): |
1610 | - median1 = OnlineApproximateMedian(3) |
1611 | - median1.buckets = [[1, 3], [4, 5], [6, 3]] |
1612 | - median2 = OnlineApproximateMedian(3) |
1613 | - median2.buckets = [[], [3, 6], [3, 7]] |
1614 | - results = median1 + median2 |
1615 | - self.assertEquals([[1, 3], [6], [3, 7], [4]], results.buckets) |
1616 | - |
1617 | - |
1618 | -class TestHistogram(TestCase): |
1619 | - """Test the histogram computation.""" |
1620 | - |
1621 | - def test__init__(self): |
1622 | - hist = Histogram(4, 1) |
1623 | - self.assertEquals(4, hist.bins_count) |
1624 | - self.assertEquals(1, hist.bins_size) |
1625 | - self.assertEquals([[0, 0], [1, 0], [2, 0], [3, 0]], hist.bins) |
1626 | - |
1627 | - def test__init__bins_size_float(self): |
1628 | - hist = Histogram(9, 0.5) |
1629 | - self.assertEquals(9, hist.bins_count) |
1630 | - self.assertEquals(0.5, hist.bins_size) |
1631 | - self.assertEquals( |
1632 | - [[0, 0], [0.5, 0], [1.0, 0], [1.5, 0], |
1633 | - [2.0, 0], [2.5, 0], [3.0, 0], [3.5, 0], [4.0, 0]], hist.bins) |
1634 | - |
1635 | - def test_update(self): |
1636 | - hist = Histogram(4, 1) |
1637 | - hist.update(1) |
1638 | - self.assertEquals(1, hist.count) |
1639 | - self.assertEquals([[0, 0], [1, 1], [2, 0], [3, 0]], hist.bins) |
1640 | - |
1641 | - hist.update(1.3) |
1642 | - self.assertEquals(2, hist.count) |
1643 | - self.assertEquals([[0, 0], [1, 2], [2, 0], [3, 0]], hist.bins) |
1644 | - |
1645 | - def test_update_float_bin_size(self): |
1646 | - hist = Histogram(4, 0.5) |
1647 | - hist.update(1.3) |
1648 | - self.assertEquals([[0, 0], [0.5, 0], [1.0, 1], [1.5, 0]], hist.bins) |
1649 | - hist.update(0.5) |
1650 | - self.assertEquals([[0, 0], [0.5, 1], [1.0, 1], [1.5, 0]], hist.bins) |
1651 | - hist.update(0.6) |
1652 | - self.assertEquals([[0, 0], [0.5, 2], [1.0, 1], [1.5, 0]], hist.bins) |
1653 | - |
1654 | - def test_update_max_goes_in_last_bin(self): |
1655 | - hist = Histogram(4, 1) |
1656 | - hist.update(9) |
1657 | - self.assertEquals([[0, 0], [1, 0], [2, 0], [3, 1]], hist.bins) |
1658 | - |
1659 | - def test_bins_relative(self): |
1660 | - hist = Histogram(4, 1) |
1661 | - for x in range(4): |
1662 | - hist.update(x) |
1663 | - self.assertEquals( |
1664 | - [[0, 0.25], [1, 0.25], [2, 0.25], [3, 0.25]], hist.bins_relative) |
1665 | - |
1666 | - def test_from_bins_data(self): |
1667 | - hist = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]]) |
1668 | - self.assertEquals(4, hist.bins_count) |
1669 | - self.assertEquals(1, hist.bins_size) |
1670 | - self.assertEquals(6, hist.count) |
1671 | - self.assertEquals([[0, 1], [1, 3], [2, 1], [3, 1]], hist.bins) |
1672 | - |
1673 | - def test___repr__(self): |
1674 | - hist = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]]) |
1675 | - self.assertEquals( |
1676 | - "<Histogram [[0, 1], [1, 3], [2, 1], [3, 1]]>", repr(hist)) |
1677 | - |
1678 | - def test___eq__(self): |
1679 | - hist1 = Histogram(4, 1) |
1680 | - hist2 = Histogram(4, 1) |
1681 | - self.assertEquals(hist1, hist2) |
1682 | - |
1683 | - def test__eq___with_data(self): |
1684 | - hist1 = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]]) |
1685 | - hist2 = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]]) |
1686 | - self.assertEquals(hist1, hist2) |
1687 | - |
1688 | - def test___add__(self): |
1689 | - hist1 = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]]) |
1690 | - hist2 = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]]) |
1691 | - hist3 = Histogram.from_bins_data([[0, 2], [1, 6], [2, 2], [3, 2]]) |
1692 | - total = hist1 + hist2 |
1693 | - self.assertEquals(hist3, total) |
1694 | - self.assertEquals(12, total.count) |
1695 | - |
1696 | - def test___add___uses_widest(self): |
1697 | - # Make sure that the resulting histogram is as wide as the widest one. |
1698 | - hist1 = Histogram.from_bins_data([[0, 1], [1, 3], [2, 1], [3, 1]]) |
1699 | - hist2 = Histogram.from_bins_data( |
1700 | - [[0, 1], [1, 3], [2, 1], [3, 1], [4, 2], [5, 3]]) |
1701 | - hist3 = Histogram.from_bins_data( |
1702 | - [[0, 2], [1, 6], [2, 2], [3, 2], [4, 2], [5, 3]]) |
1703 | - self.assertEquals(hist3, hist1 + hist2) |
1704 | - |
1705 | - def test___add___interpolate_lower_resolution(self): |
1706 | - # Make sure that when the other histogram has a bigger bin_size |
1707 | - # the frequency is correctly split across the different bins. |
1708 | - hist1 = Histogram.from_bins_data( |
1709 | - [[0, 1], [0.5, 3], [1.0, 1], [1.5, 1]]) |
1710 | - hist2 = Histogram.from_bins_data( |
1711 | - [[0, 1], [1, 2], [2, 3], [3, 1], [4, 1]]) |
1712 | - |
1713 | - hist3 = Histogram.from_bins_data( |
1714 | - [[0, 1.5], [0.5, 3.5], [1.0, 2], [1.5, 2], |
1715 | - [2.0, 1.5], [2.5, 1.5], [3.0, 0.5], [3.5, 0.5], |
1716 | - [4.0, 0.5], [4.5, 0.5]]) |
1717 | - self.assertEquals(hist3, hist1 + hist2) |
1718 | - |
1719 | - def test___add___higher_resolution(self): |
1720 | - # Make sure that when the other histogram has a smaller bin_size |
1721 | - # the frequency is correctly added. |
1722 | - hist1 = Histogram.from_bins_data([[0, 1], [1, 2], [2, 3]]) |
1723 | - hist2 = Histogram.from_bins_data( |
1724 | - [[0, 1], [0.5, 3], [1.0, 1], [1.5, 1], [2.0, 3], [2.5, 1], |
1725 | - [3, 4], [3.5, 2]]) |
1726 | - |
1727 | - hist3 = Histogram.from_bins_data([[0, 5], [1, 4], [2, 7], [3, 6]]) |
1728 | - self.assertEquals(hist3, hist1 + hist2) |
1729 | |
1730 | === removed file 'utilities/page-performance-report-daily.sh' |
1731 | --- utilities/page-performance-report-daily.sh 2011-05-04 17:32:17 +0000 |
1732 | +++ utilities/page-performance-report-daily.sh 1970-01-01 00:00:00 +0000 |
1733 | @@ -1,115 +0,0 @@ |
1734 | -#!/bin/sh |
1735 | - |
1736 | -#TZ=UTC # trace logs are still BST - blech |
1737 | - |
1738 | -CATEGORY=lpnet |
1739 | -LOGS_ROOTS="/srv/launchpad.net-logs/production /srv/launchpad.net-logs/edge" |
1740 | -OUTPUT_ROOT=${HOME}/public_html/ppr/lpnet |
1741 | -DAY_FMT="+%Y-%m-%d" |
1742 | - |
1743 | -find_logs() { |
1744 | - from=$1 |
1745 | - until=$2 |
1746 | - |
1747 | - end_mtime_switch= |
1748 | - days_to_end="$(expr `date +%j` - `date -d $until +%j` - 1)" |
1749 | - if [ $days_to_end -gt 0 ]; then |
1750 | - end_mtime_switch="-daystart -mtime +$days_to_end" |
1751 | - fi |
1752 | - |
1753 | - find ${LOGS_ROOTS} \ |
1754 | - -maxdepth 2 -type f -newermt "$from - 1 day" $end_mtime_switch \ |
1755 | - -name launchpad-trace\* \ |
1756 | - | sort | xargs -x |
1757 | -} |
1758 | - |
1759 | -# Find all the daily stats.pck.bz2 $from $until |
1760 | -find_stats() { |
1761 | - from=$1 |
1762 | - until=$2 |
1763 | - |
1764 | - # Build a string of all the days within range. |
1765 | - local dates |
1766 | - local day |
1767 | - day=$from |
1768 | - while [ $day != $until ]; do |
1769 | - dates="$dates $day" |
1770 | - day=`date $DAY_FMT -d "$day + 1 day"` |
1771 | - done |
1772 | - |
1773 | - # Use that to build a regex that will be used to select |
1774 | - # the files to use. |
1775 | - local regex |
1776 | - regex="daily_(`echo $dates |sed -e 's/ /|/g'`)" |
1777 | - |
1778 | - find ${OUTPUT_ROOT} -name 'stats.pck.bz2' | egrep $regex |
1779 | -} |
1780 | - |
1781 | -report() { |
1782 | - type=$1 |
1783 | - from=$2 |
1784 | - until=$3 |
1785 | - link=$4 |
1786 | - |
1787 | - local files |
1788 | - local options |
1789 | - if [ "$type" = "daily" ]; then |
1790 | - files=`find_logs $from $until` |
1791 | - options="--from=$from --until=$until" |
1792 | - else |
1793 | - files=`find_stats $from $until` |
1794 | - options="--merge" |
1795 | - fi |
1796 | - |
1797 | - local dir |
1798 | - dir=${OUTPUT_ROOT}/`date -d $from +%Y-%m`/${type}_${from}_${until} |
1799 | - mkdir -p ${dir} |
1800 | - |
1801 | - echo Generating report from $from until $until into $dir `date` |
1802 | - |
1803 | - ./page-performance-report.py -v --top-urls=200 --directory=${dir} \ |
1804 | - $options $files |
1805 | - |
1806 | - # Only do the linking if requested. |
1807 | - if [ "$link" = "link" ]; then |
1808 | - ln -sf ${dir}/partition.html \ |
1809 | - ${OUTPUT_ROOT}/latest-${type}-partition.html |
1810 | - ln -sf ${dir}/categories.html \ |
1811 | - ${OUTPUT_ROOT}/latest-${type}-categories.html |
1812 | - ln -sf ${dir}/pageids.html \ |
1813 | - ${OUTPUT_ROOT}/latest-${type}-pageids.html |
1814 | - ln -sf ${dir}/combined.html \ |
1815 | - ${OUTPUT_ROOT}/latest-${type}-combined.html |
1816 | - ln -sf ${dir}/metrics.dat ${OUTPUT_ROOT}/latest-${type}-metrics.dat |
1817 | - ln -sf ${dir}/top200.html ${OUTPUT_ROOT}/latest-${type}-top200.html |
1818 | - ln -sf ${dir}/timeout-candidates.html \ |
1819 | - ${OUTPUT_ROOT}/latest-${type}-timeout-candidates.html |
1820 | - fi |
1821 | - |
1822 | - return 0 |
1823 | -} |
1824 | - |
1825 | -local link |
1826 | -if [ "$3" = "-l" ]; then |
1827 | - link="link" |
1828 | -fi |
1829 | - |
1830 | -if [ "$1" = '-d' ]; then |
1831 | - report daily `date -d $2 $DAY_FMT` `date -d "$2 + 1 day" $DAY_FMT` $link |
1832 | -elif [ "$1" = '-w' ]; then |
1833 | - report weekly `date -d $2 $DAY_FMT` `date -d "$2 + 1 week" $DAY_FMT` $link |
1834 | -elif [ "$1" = '-m' ]; then |
1835 | - report monthly `date -d $2 $DAY_FMT` `date -d "$2 + 1 month" $DAY_FMT` $link |
1836 | -else |
1837 | - # Default invocation used from cron to generate latest one. |
1838 | - now=`date $DAY_FMT` |
1839 | - report daily `date -d yesterday $DAY_FMT` $now link |
1840 | - |
1841 | - if [ `date +%a` = 'Sun' ]; then |
1842 | - report weekly `date -d 'last week' $DAY_FMT` $now link |
1843 | - fi |
1844 | - |
1845 | - if [ `date +%d` = '01' ]; then |
1846 | - report monthly `date -d 'last month' $DAY_FMT` $now link |
1847 | - fi |
1848 | -fi |
1849 | |
1850 | === removed file 'utilities/page-performance-report.ini' |
1851 | --- utilities/page-performance-report.ini 2011-09-21 21:12:02 +0000 |
1852 | +++ utilities/page-performance-report.ini 1970-01-01 00:00:00 +0000 |
1853 | @@ -1,79 +0,0 @@ |
1854 | -[categories] |
1855 | -# Category -> Python regular expression. |
1856 | -# Remeber to quote ?, ., + & ? characters to match litterally. |
1857 | -# 'kodos' is useful for interactively testing regular expressions. |
1858 | -All Launchpad=. |
1859 | -All Launchpad except operational pages=(?<!\+opstats|\+haproxy)$ |
1860 | - |
1861 | -API=(^https?://api\.|/\+access-token$) |
1862 | -Operational=(\+opstats|\+haproxy)$ |
1863 | -Web (Non API/non operational/non XML-RPC)=^https?://(?!api\.) |
1864 | - [^/]+($|/ |
1865 | - (?!\+haproxy|\+opstats|\+access-token |
1866 | - |((authserver|bugs|bazaar|codehosting| |
1867 | - codeimportscheduler|mailinglists|softwarecenteragent| |
1868 | - featureflags)/\w+$))) |
1869 | -Other=^/ |
1870 | - |
1871 | -Launchpad Frontpage=^https?://launchpad\.[^/]+(/index\.html)?$ |
1872 | - |
1873 | -# Note that the bug text dump is served on the main launchpad domain |
1874 | -# and we need to exlude it from the registry stats. |
1875 | -Registry=^https?://launchpad\..*(?<!/\+text)(?<!/\+access-token)$ |
1876 | -Registry - Person Index=^https?://launchpad\.[^/]+/%7E[^/]+(/\+index)?$ |
1877 | -Registry - Pillar Index=^https?://launchpad\.[^/]+/\w[^/]*(/\+index)?$ |
1878 | - |
1879 | -Answers=^https?://answers\. |
1880 | -Answers - Front page=^https?://answers\.[^/]+(/questions/\+index)?$ |
1881 | - |
1882 | -Blueprints=^https?://blueprints\. |
1883 | -Blueprints - Front page=^https?://blueprints\.[^/]+(/specs/\+index)?$ |
1884 | - |
1885 | -# Note that the bug text dump is not served on the bugs domain, |
1886 | -# probably for hysterical reasons. This is why the bugs regexp is |
1887 | -# confusing. |
1888 | -Bugs=^https?://(bugs\.|.+/bugs/\d+/\+text$) |
1889 | -Bugs - Front page=^https?://bugs\.[^/]+(/bugs/\+index)?$ |
1890 | -Bugs - Bug Page=^https?://bugs\.[^/]+/.+/\+bug/\d+(/\+index)?$ |
1891 | -Bugs - Pillar Index=^https?://bugs\.[^/]+/\w[^/]*(/\+bugs-index)?$ |
1892 | -Bugs - Search=^https?://bugs\.[^/]+/.+/\+bugs$ |
1893 | -Bugs - Text Dump=^https?://launchpad\..+/\+text$ |
1894 | - |
1895 | -Code=^https?://code\. |
1896 | -Code - Front page=^https?://code\.[^/]+(/\+code/\+index)?$ |
1897 | -Code - Pillar Branches=^https?://code\.[^/]+/\w[^/]*(/\+code-index)?$ |
1898 | -Code - Branch Page=^https?://code\.[^/]+/%7E[^/]+/[^/]+/[^/]+(/\+index)?$ |
1899 | -Code - Merge Proposal=^https?://code\.[^/]+/.+/\+merge/\d+(/\+index)$ |
1900 | - |
1901 | -Soyuz - PPA Index=^https?://launchpad\.[^/]+/.+/\+archive/[^/]+(/\+index)?$ |
1902 | - |
1903 | -Translations=^https?://translations\. |
1904 | -Translations - Front page=^https?://translations\.[^/]+/translations/\+index$ |
1905 | -Translations - Overview=^https?://translations\..*/\+lang/\w+(/\+index)?$ |
1906 | - |
1907 | -Public XML-RPC=^https://(launchpad|xmlrpc)[^/]+/bazaar/\w+$ |
1908 | -Private XML-RPC=^https://(launchpad|xmlrpc)[^/]+/ |
1909 | - (authserver|bugs|codehosting| |
1910 | - codeimportscheduler|mailinglists| |
1911 | - softwarecenteragent|featureflags)/\w+$ |
1912 | - |
1913 | -[metrics] |
1914 | -ppr_all=All Launchpad except operational pages |
1915 | -ppr_web=Web (Non API/non operational/non XML-RPC) |
1916 | -ppr_operational=Operational |
1917 | -ppr_bugs=Bugs |
1918 | -ppr_api=API |
1919 | -ppr_code=Code |
1920 | -ppr_public_xmlrpc=Public XML-RPC |
1921 | -ppr_private_xmlrpc=Private XML-RPC |
1922 | -ppr_translations=Translations |
1923 | -ppr_registry=Registry |
1924 | -ppr_other=Other |
1925 | - |
1926 | -[partition] |
1927 | -API= |
1928 | -Operational= |
1929 | -Private XML-RPC= |
1930 | -Public XML-RPC= |
1931 | -Web (Non API/non operational/non XML-RPC)= |
1932 | -Other= |
1933 | |
1934 | === removed file 'utilities/page-performance-report.py' |
1935 | --- utilities/page-performance-report.py 2010-04-27 19:48:39 +0000 |
1936 | +++ utilities/page-performance-report.py 1970-01-01 00:00:00 +0000 |
1937 | @@ -1,18 +0,0 @@ |
1938 | -#!/usr/bin/python -S |
1939 | -# |
1940 | -# Copyright 2010 Canonical Ltd. This software is licensed under the |
1941 | -# GNU Affero General Public License version 3 (see the file LICENSE). |
1942 | - |
1943 | -"""Page performance report generated from zserver tracelogs.""" |
1944 | - |
1945 | -__metaclass__ = type |
1946 | - |
1947 | -import _pythonpath |
1948 | - |
1949 | -import sys |
1950 | - |
1951 | -from lp.scripts.utilities.pageperformancereport import main |
1952 | - |
1953 | - |
1954 | -if __name__ == '__main__': |
1955 | - sys.exit(main()) |
Self review, nothing to see here. We might want to bring the ini file back I suppose, but lp-dev-utils is still LP focused so for now, keep it all together.