Merge lp:~lucio.torre/txstatsd/make-ewma-sampling-faster2 into lp:txstatsd

Proposed by Lucio Torre
Status: Merged
Approved by: Sidnei da Silva
Approved revision: 64
Merged at revision: 67
Proposed branch: lp:~lucio.torre/txstatsd/make-ewma-sampling-faster2
Merge into: lp:txstatsd
Diff against target: 155 lines (+48/-26)
2 files modified
txstatsd/stats/exponentiallydecayingsample.py (+24/-24)
txstatsd/tests/stats/test_exponentiallydecayingsample.py (+24/-2)
To merge this branch: bzr merge lp:~lucio.torre/txstatsd/make-ewma-sampling-faster2
Reviewer Review Type Date Requested Status
Sidnei da Silva Approve
Review via email: mp+90780@code.launchpad.net

Commit message

make ewma sampling take 23% of the time it takes now

Description of the change

make ewma sampling take 23% of the time it takes now

it also changes the rescale threshold so the difference is never bigger than 600 because:

In [21]: math.exp(717)
---------------------------------------------------------------------------
OverflowError Traceback (most recent call last)

/home/lucio/canonical/txstatsd/fix-memory-issues/<ipython console> in <module>()

OverflowError: math range error

To post a comment you must log in.
Revision history for this message
Sidnei da Silva (sidnei) wrote :

i *think* del self._values[:] gives a better hint to GC than self._values = [], but [citation needed]

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'txstatsd/stats/exponentiallydecayingsample.py'
2--- txstatsd/stats/exponentiallydecayingsample.py 2011-09-14 12:01:10 +0000
3+++ txstatsd/stats/exponentiallydecayingsample.py 2012-01-30 20:49:27 +0000
4@@ -1,4 +1,4 @@
5-
6+import bisect
7 import math
8 import random
9 import time
10@@ -19,10 +19,10 @@
11 library/publications/CormodeShkapenyukSrivastavaXu09.pdf>}
12 """
13
14- # 1 hour (in seconds)
15- RESCALE_THRESHOLD = 60 * 60
16+ # 10 minutes (in seconds)
17+ RESCALE_THRESHOLD = 60 * 10
18
19- def __init__(self, reservoir_size, alpha):
20+ def __init__(self, reservoir_size, alpha, wall_time=None):
21 """Creates a new C{ExponentiallyDecayingSample}.
22
23 @param reservoir_size: The number of samples to keep in the sampling
24@@ -30,7 +30,7 @@
25 @parama alpha: The exponential decay factor; the higher this is,
26 the more biased the sample will be towards newer values.
27 """
28- self._values = dict()
29+ self._values = []
30 self.alpha = alpha
31 self.reservoir_size = reservoir_size
32
33@@ -38,14 +38,17 @@
34 self.start_time = 0
35 self.next_scale_time = 0
36
37+ if wall_time is None:
38+ wall_time = time.time
39+ self._wall_time = wall_time
40 self.clear()
41
42 def clear(self):
43- self._values.clear()
44+ self._values = []
45 self.count = 0
46 self.start_time = self.tick()
47 self.next_scale_time = (
48- time.time() + ExponentiallyDecayingSample.RESCALE_THRESHOLD)
49+ self._wall_time() + self.RESCALE_THRESHOLD)
50
51 def size(self):
52 return min(self.reservoir_size, self.count)
53@@ -64,27 +67,24 @@
54 self.count += 1
55 new_count = self.count
56 if new_count <= self.reservoir_size:
57- self._values[priority] = value
58+ bisect.insort(self._values, (priority, value))
59 else:
60- keys = sorted(self._values.keys())
61- first = keys[0]
62+ first = self._values[0][0]
63
64 if first < priority:
65- if priority not in self._values:
66- self._values[priority] = value
67- del self._values[first]
68+ bisect.insort(self._values, (priority, value))
69+ self._values = self._values[1:]
70
71- now = time.time()
72+ now = self._wall_time()
73 next = self.next_scale_time
74 if now >= next:
75 self.rescale(now, next)
76
77 def get_values(self):
78- keys = sorted(self._values.keys())
79- return [self._values[k] for k in keys]
80+ return [v for (k, v) in self._values]
81
82 def tick(self):
83- return time.time()
84+ return self._wall_time()
85
86 def weight(self, t):
87 return math.exp(self.alpha * t)
88@@ -113,12 +113,12 @@
89 """
90
91 self.next_scale_time = (
92- now + ExponentiallyDecayingSample.RESCALE_THRESHOLD)
93+ now + self.RESCALE_THRESHOLD)
94 old_start_time = self.start_time
95 self.start_time = self.tick()
96- keys = sorted(self._values.keys())
97- for k in keys:
98- v = self._values[k]
99- del self._values[k]
100- self._values[k * math.exp(-self.alpha *
101- (self.start_time - old_start_time))] = v
102+
103+ new_values = []
104+ for k, v in self._values:
105+ nk = k * math.exp(-self.alpha * (self.start_time - old_start_time))
106+ new_values.append((nk, v))
107+ self._values = new_values
108
109=== modified file 'txstatsd/tests/stats/test_exponentiallydecayingsample.py'
110--- txstatsd/tests/stats/test_exponentiallydecayingsample.py 2011-09-14 12:01:10 +0000
111+++ txstatsd/tests/stats/test_exponentiallydecayingsample.py 2012-01-30 20:49:27 +0000
112@@ -1,3 +1,4 @@
113+import random
114
115 from unittest import TestCase
116
117@@ -26,7 +27,7 @@
118 for i in population:
119 sample.update(i)
120
121- self.assertEqual(sample.size(), 10, 'Should have 10 elements')
122+ self.assertEqual(sample.size(), 10)
123 self.assertEqual(len(sample.get_values()), 10,
124 'Should have 10 elements')
125 self.assertEqual(
126@@ -42,6 +43,27 @@
127 self.assertEqual(sample.size(), 100, 'Should have 100 elements')
128 self.assertEqual(len(sample.get_values()), 100,
129 'Should have 100 elements')
130+
131 self.assertEqual(
132 len(set(sample.get_values()).difference(set(population))), 0,
133- 'Should only have elements from the population')
134\ No newline at end of file
135+ 'Should only have elements from the population')
136+
137+ def test_ewma_sample_load(self):
138+
139+ _time = [10000]
140+
141+ def wtime():
142+ return _time[0]
143+
144+ sample = ExponentiallyDecayingSample(100, 0.99, wall_time=wtime)
145+ sample.RESCALE_THRESHOLD = 100
146+ sample.clear()
147+ for i in xrange(10000000):
148+ sample.update(random.normalvariate(0, 10))
149+ _time[0] += 1
150+
151+ self.assertEqual(sample.size(), 100)
152+ self.assertEqual(len(sample.get_values()), 100,
153+ 'Should have 100 elements')
154+ test_ewma_sample_load.skip = "takes too long to run"
155+

Subscribers

People subscribed via source and target branches