Merge lp:~beuno/txstatsd/distinct-plugin-webstats into lp:~txstatsd-dev/txstatsd/distinct-plugin

Proposed by Martin Albisetti
Status: Merged
Approved by: Sidnei da Silva
Approved revision: 13
Merged at revision: 11
Proposed branch: lp:~beuno/txstatsd/distinct-plugin-webstats
Merge into: lp:~txstatsd-dev/txstatsd/distinct-plugin
Diff against target: 307 lines (+214/-2)
2 files modified
distinctdb/distinctmetric.py (+74/-1)
distinctdb/tests/test_distinct.py (+140/-1)
To merge this branch: bzr merge lp:~beuno/txstatsd/distinct-plugin-webstats
Reviewer Review Type Date Requested Status
Sidnei da Silva Approve
Review via email: mp+96489@code.launchpad.net

Commit message

Get distinct user stats.

Description of the change

Get distinct user stats.

To post a comment you must log in.
Revision history for this message
Lucio Torre (lucio.torre) wrote :

1- theres no need to keep all the attributes of DistinctMetricReporter on DistinctResource
20 + def __init__(self, reporter):
21 + resource.Resource.__init__(self)
22 + self.reporter = reporter
23 + self.dsn = reporter.dsn
24 + self.prefix = reporter.prefix
25 + self.name = reporter.name
26 + self.bucket_size = reporter.bucket_size

2- t1 and t2 both fall in the same bucket! (its only ten seconds difference)

3- you should also exercise the distinct part of the query, adding a repeated value in the bucket making sure it wont add to the count

Revision history for this message
Lucio Torre (lucio.torre) wrote :

maybe we want the get_distinct_count function to take a DateTime object instead of a timestamp? or both?

Revision history for this message
Sidnei da Silva (sidnei) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'distinctdb/distinctmetric.py'
2--- distinctdb/distinctmetric.py 2012-03-08 16:30:49 +0000
3+++ distinctdb/distinctmetric.py 2012-03-14 18:11:18 +0000
4@@ -1,11 +1,14 @@
5+import json
6 import time
7 import threading
8+from functools import partial
9
10 import psycopg2
11 import redis
12
13 from zope.interface import implements
14-from twisted.internet import reactor
15+from twisted.internet import reactor, threads
16+from twisted.web import server, resource
17 from txstatsd.itxstatsd import IMetric
18
19 ONE_MINUTE = 60
20@@ -13,6 +16,42 @@
21 ONE_DAY = 24 * ONE_HOUR
22
23
24+class JSONMethodResource(resource.Resource):
25+ """Renders the result of calling C{name} on C{target} as a json result."""
26+ isLeaf = True
27+
28+ def __init__(self, target, name):
29+ self.target = target
30+ self.name = name
31+
32+ def _render_json_error(self, request, result):
33+ """Fetch the data, make the request return"""
34+ request.write(json.dumps({"error": result.getErrorMessage()}))
35+ request.finish()
36+
37+ def _render_json_result(self, request, result):
38+ """Fetch the data, make the request return"""
39+ request.write(json.dumps({"result": result}))
40+ request.finish()
41+
42+ def render_GET(self, request):
43+ """Return the web request asynchronously."""
44+ d = threads.deferToThread(getattr(self.target, self.name), **request.args)
45+ d.addCallback(partial(self._render_json_result, request))
46+ d.addErrback(partial(self._render_json_error, request))
47+ return server.NOT_DONE_YET
48+
49+
50+class DistinctResource(resource.Resource):
51+ """Returns statistics about unique values."""
52+
53+ def __init__(self, reporter):
54+ resource.Resource.__init__(self)
55+ self.reporter = reporter
56+ self.putChild("top", JSONMethodResource(self.reporter, "get_distinct_top_value"))
57+ self.putChild("count", JSONMethodResource(self.reporter, "get_distinct_count"))
58+
59+
60 class DistinctMetricReporter(object):
61 """
62 Keeps an mesurement of the distinct numbers of items seen and the times
63@@ -60,6 +99,10 @@
64 timestamp = self.wall_time_func()
65 return int(timestamp / (self.bucket_size))
66
67+ def getResource(self):
68+ """Return an http resource to represent this."""
69+ return DistinctResource(self)
70+
71 def process(self, fields):
72 self.update(fields[0])
73
74@@ -151,3 +194,33 @@
75 for item, value in items.iteritems():
76 metrics.append((self.prefix + self.name + item, value, timestamp))
77 return metrics
78+
79+ def get_distinct_count(self, since, until):
80+ """Get a distinct count for a path between certain ranges."""
81+ since_bucket = self.get_bucket_no(since)
82+ until_bucket = self.get_bucket_no(until)
83+ path = self.prefix + self.name
84+ c = psycopg2.connect(self.dsn)
85+ cr = c.cursor()
86+ cr.execute("SELECT COUNT(DISTINCT value) FROM points "
87+ "INNER JOIN paths ON (paths.id = points.path_id) "
88+ "WHERE paths.path = %s AND bucket BETWEEN %s AND %s", (
89+ path, since_bucket, until_bucket,))
90+ row = cr.fetchone()
91+ return row[0]
92+
93+ def get_distinct_top_value(self, since, until, how_many=20):
94+ """Get the top distinct values for a path between certain ranges."""
95+ since_bucket = self.get_bucket_no(since)
96+ until_bucket = self.get_bucket_no(until)
97+ path = self.prefix + self.name
98+ c = psycopg2.connect(self.dsn)
99+ cr = c.cursor()
100+ cr.execute("SELECT value, COUNT(value) AS cnt FROM points "
101+ "INNER JOIN paths ON (paths.id = points.path_id) "
102+ "WHERE paths.path = %s AND bucket BETWEEN %s AND %s"
103+ "GROUP BY value ORDER BY cnt DESC LIMIT %s", (
104+ path, since_bucket, until_bucket, how_many,))
105+ rows = cr.fetchall()
106+ return rows
107+
108
109=== modified file 'distinctdb/tests/test_distinct.py'
110--- distinctdb/tests/test_distinct.py 2012-03-08 16:30:49 +0000
111+++ distinctdb/tests/test_distinct.py 2012-03-14 18:11:18 +0000
112@@ -1,14 +1,17 @@
113 # Copyright (C) 2011 Canonical
114 # All Rights Reserved
115
116+import json
117 import ConfigParser
118 from cStringIO import StringIO
119 import os
120 import time
121+
122 try:
123 from subprocess import check_output
124 except ImportError:
125 import subprocess
126+
127 def check_output(args):
128 return subprocess.Popen(args,
129 stdout=subprocess.PIPE).communicate()[0]
130@@ -20,12 +23,32 @@
131 from twisted.internet import reactor
132 from twisted.plugin import getPlugins
133 from twisted.plugins import distinctdbplugin
134+from twisted.web.test.test_web import DummyRequest
135+
136 from txstatsd.itxstatsd import IMetricFactory
137 from txstatsd import service
138
139 from distinctdb import distinctmetric as distinct
140
141
142+class DummyReporter(object):
143+
144+ def __init__(self):
145+ self.called = []
146+
147+ def get_foo(self):
148+ self.called.append(("get_foo", ()))
149+ return "foo"
150+
151+ def get_distinct_count(self, since, until):
152+ self.called.append(("get_distinct_count", (since, until)))
153+ return 42
154+
155+ def get_distinct_top_value(self, since, until, how_many=20):
156+ self.called.append(("get_distinct_top_value", (since, until, how_many)))
157+ return [("one", 1), ("two", 1)]
158+
159+
160 class TestDistinctMetricReporter(TestCase):
161
162 def test_get_bucket_no(self):
163@@ -101,6 +124,53 @@
164 self.assertEquals(dmr.dsn, "dbdsn")
165
166
167+class TestJSONMethodResource(TestCase):
168+
169+ def test_render_result_as_json(self):
170+ reporter = DummyReporter()
171+ request = DummyRequest([])
172+ resource = distinct.JSONMethodResource(reporter, "get_foo")
173+ def check(result):
174+ self.assertEquals({"result": "foo"},
175+ json.loads("".join(request.written)))
176+ d = request.notifyFinish()
177+ d.addCallback(check)
178+ d.addErrback(self.fail)
179+ request.render(resource)
180+ return d
181+
182+
183+class TestDistinctResource(TestCase):
184+
185+ def test_render_top_resource(self):
186+ reporter = DummyReporter()
187+ request = DummyRequest([])
188+ request.args = {"since": time.time(), "until": time.time() + 1}
189+ resource = distinct.DistinctResource(reporter)
190+ child_resource = resource.getChildWithDefault("top", request)
191+ def check(result):
192+ self.assertEquals(json.dumps({"result": [("one", 1), ("two", 1)]}),
193+ "".join(request.written))
194+ d = request.notifyFinish()
195+ d.addCallback(check)
196+ request.render(child_resource)
197+ return d
198+
199+ def test_render_count_resource(self):
200+ reporter = DummyReporter()
201+ request = DummyRequest([])
202+ request.args = {"since": time.time(), "until": time.time() + 1}
203+ resource = distinct.DistinctResource(reporter)
204+ child_resource = resource.getChildWithDefault("count", request)
205+ def check(result):
206+ self.assertEquals(json.dumps({"result": 42}),
207+ "".join(request.written))
208+ d = request.notifyFinish()
209+ d.addCallback(check)
210+ request.render(child_resource)
211+ return d
212+
213+
214 class TestPlugin(TestCase):
215
216 def test_factory(self):
217@@ -123,6 +193,15 @@
218 cr.execute("DELETE FROM points")
219 cr.execute("commit")
220
221+ def _create_test_data_points(self, name, bucket_no):
222+ """Easily create test data."""
223+ dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
224+ dmr.update(name)
225+ dmr._save_bucket(dmr.bucket, bucket_no)
226+
227+
228+class TestDatabaseMetricStorage(TestDatabase):
229+
230 def test_connect(self):
231 cr = self.conn.cursor()
232 cr.execute("SELECT 0")
233@@ -176,6 +255,67 @@
234 dmr2._save_bucket({}, 0)
235 self.assertEquals(dmr.metric_id, dmr2.metric_id)
236
237+ def test_get_distinct_count(self):
238+ """Test get_distinct_count."""
239+ dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
240+ t1 = time.time()
241+ # add a day
242+ t2 = time.time() + 60 * 60 * 24 + 1
243+ b1 = dmr.get_bucket_no(t1)
244+ b2 = dmr.get_bucket_no(t2)
245+ # Make sure we're using different buckets
246+ self.assertNotEqual(b1, b2)
247+ self._create_test_data_points("one", b1)
248+ count = dmr.get_distinct_count(t1, t2)
249+ self.assertEqual(count, 1)
250+ self._create_test_data_points("two", b1)
251+ count = dmr.get_distinct_count(t1, t2)
252+ self.assertEqual(count, 2)
253+ # Add it again, make sure the distinct is filtering it out
254+ self._create_test_data_points("two", b1)
255+ count = dmr.get_distinct_count(t1, t2)
256+ self.assertEqual(count, 2)
257+ # Now we add to a newer bucket, but still within range
258+ self._create_test_data_points("three", b2)
259+ count = dmr.get_distinct_count(t1, t2)
260+ self.assertEqual(count, 3)
261+ # Now to a older bucket, out of range
262+ self._create_test_data_points("zero", b1 - 1)
263+ count = dmr.get_distinct_count(t1, t2)
264+ self.assertEqual(count, 3)
265+ # Now to a newer bucket, out of range
266+ self._create_test_data_points("infinity", b2 + 1)
267+ count = dmr.get_distinct_count(t1, t2)
268+ self.assertEqual(count, 3)
269+
270+ def test_get_distinct_top_value(self):
271+ """Test get_distinct_top_value."""
272+ dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
273+ t1 = time.time()
274+ # add a day
275+ t2 = time.time() + 60 * 60 * 24 + 1
276+ b1 = dmr.get_bucket_no(t1)
277+ b2 = dmr.get_bucket_no(t2)
278+ # Make sure we're using different buckets
279+ self.assertNotEqual(b1, b2)
280+ self._create_test_data_points("one", b1)
281+ values = dmr.get_distinct_top_value(t1, t2)
282+ self.assertEqual(values, [("one", 1)])
283+ self._create_test_data_points("two", b1)
284+ values = dmr.get_distinct_top_value(t1, t2)
285+ self.assertEqual(values, [("one", 1), ("two", 1)])
286+ self._create_test_data_points("one", b1)
287+ values = dmr.get_distinct_top_value(t1, t2)
288+ self.assertEqual(values, [("one", 2), ("two", 1)])
289+ # Create a third, and make "two" have 2 values so "three" is last
290+ self._create_test_data_points("two", b1)
291+ self._create_test_data_points("three", b1)
292+ values = dmr.get_distinct_top_value(t1, t2)
293+ self.assertEqual(values, [("one", 2), ("two", 2), ("three", 1)])
294+ # Only get the top 2
295+ values = dmr.get_distinct_top_value(t1, t2, how_many=2)
296+ self.assertEqual(values, [("one", 2), ("two", 2)])
297+
298
299 class TestRedis(TestCase):
300
301@@ -246,6 +386,5 @@
302 dmr1 = distinct.DistinctMetricReporter("somename")
303 dmr2 = distinct.DistinctMetricReporter("othename")
304 dmr3 = distinct.DistinctMetricReporter("somename")
305-
306 self.assertEquals(dmr1.bucket_name_for(1), dmr3.bucket_name_for(1))
307 self.assertNotEquals(dmr1.bucket_name_for(1), dmr2.bucket_name_for(1))

Subscribers

People subscribed via source and target branches