txStatsD

Merge lp:~sidnei/txstatsd/distinct-plugin-webstats into lp:~beuno/txstatsd/distinct-plugin-webstats

distinct-plugin-webstats
Merge into distinct-plugin-webstats

Proposed by Sidnei da Silva on 2012-03-14

Status:	Merged
Merged at revision:	13
Proposed branch:	lp:~sidnei/txstatsd/distinct-plugin-webstats
Merge into:	lp:~beuno/txstatsd/distinct-plugin-webstats
Diff against target:	423 lines (+183/-115) 2 files modified distinctdb/distinctmetric.py (+60/-46) distinctdb/tests/test_distinct.py (+123/-69)
To merge this branch:	bzr merge lp:~sidnei/txstatsd/distinct-plugin-webstats
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
Martin Albisetti		2012-03-14	Pending
Review via email: mp+97457@code.launchpad.net

Description of the change

Refactor a bit so that top/count are sub urls of the DistinctMetricResource.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Martin Albisetti

Sidnei da Silva

 === modified file 'distinctdb/distinctmetric.py'
 --- distinctdb/distinctmetric.py	2012-03-12 23:18:39 +0000
 +++ distinctdb/distinctmetric.py	2012-03-14 16:56:19 +0000
@@ -1,13 +1,13 @@
  import json
  import time
  import threading
++from functools import partial
  import psycopg2
  import redis
  from zope.interface import implements
--from twisted.internet import reactor
--from twisted.internet.task import deferLater
++from twisted.internet import reactor, threads
  from twisted.web import server, resource
  from txstatsd.itxstatsd import IMetric
@@ -16,56 +16,40 @@
  ONE_DAY = 24 * ONE_HOUR
++class JSONMethodResource(resource.Resource):
++    """Renders the result of calling C{name} on C{target} as a json result."""
++    isLeaf = True
++
++    def __init__(self, target, name):
++        self.target = target
++        self.name = name
++
++    def _render_json_error(self, request, result):
++        """Fetch the data, make the request return"""
++        request.write(json.dumps({"error": result.getErrorMessage()}))
++        request.finish()
++
++    def _render_json_result(self, request, result):
++        """Fetch the data, make the request return"""
++        request.write(json.dumps({"result": result}))
++        request.finish()
++
++    def render_GET(self, request):
++        """Return the web request asynchronously."""
++        d = threads.deferToThread(getattr(self.target, self.name), **request.args)
++        d.addCallback(partial(self._render_json_result, request))
++        d.addErrback(partial(self._render_json_error, request))
++        return server.NOT_DONE_YET
++
++
  class DistinctResource(resource.Resource):
      """Returns statistics about unique values."""
--    isLeaf = True
      def __init__(self, reporter):
          resource.Resource.__init__(self)
          self.reporter = reporter
--
--    def _return_request(self, request):
--        """Fetch the data, make the request return"""
--        #XXX: No idea how to properly distinguish which method to call
--        data = self.get_distinct_count(request.args['since'],
--                                       request.args['until'])
--        request.write(json.dumps(data))
--        request.finish()
--
--    def render_GET(self, request):
--        """Return the web request asynchronously."""
--        d = deferLater(reactor, 5, lambda: request)
--        d.addCallback(self._return_request)
--        return server.NOT_DONE_YET
--
--    def get_distinct_count(self, since, until):
--        """Get a distinct count for a path between certain ranges."""
--        since_bucket = self.reporter.get_bucket_no(since)
--        until_bucket = self.reporter.get_bucket_no(until)
--        path = self.reporter.prefix + self.reporter.name
--        c = psycopg2.connect(self.reporter.dsn)
--        cr = c.cursor()
--        cr.execute("SELECT COUNT(DISTINCT value) FROM points "
--                   "INNER JOIN paths ON (paths.id = points.path_id) "
--                   "WHERE paths.path = %s AND bucket BETWEEN %s AND %s", (
--                       path, since_bucket, until_bucket,))
--        row = cr.fetchone()
--        return row[0]
--
--    def get_distinct_top_value(self, since, until, how_many=20):
--        """Get the top distinct values for a path between certain ranges."""
--        since_bucket = self.reporter.get_bucket_no(since)
--        until_bucket = self.reporter.get_bucket_no(until)
--        path = self.reporter.prefix + self.reporter.name
--        c = psycopg2.connect(self.reporter.dsn)
--        cr = c.cursor()
--        cr.execute("SELECT value, COUNT(value) AS cnt FROM points "
--                   "INNER JOIN paths ON (paths.id = points.path_id) "
--                   "WHERE paths.path = %s AND bucket BETWEEN %s AND %s"
--                   "GROUP BY value ORDER BY cnt DESC LIMIT %s", (
--                       path, since_bucket, until_bucket, how_many,))
--        rows = cr.fetchall()
--        return rows
++        self.putChild("top", JSONMethodResource(self.reporter, "get_distinct_top_value"))
++        self.putChild("count", JSONMethodResource(self.reporter, "get_distinct_count"))
  class DistinctMetricReporter(object):
@@ -209,3 +193,33 @@
          for item, value in items.iteritems():
              metrics.append((self.prefix + self.name + item, value, timestamp))
          return metrics
++
++    def get_distinct_count(self, since, until):
++        """Get a distinct count for a path between certain ranges."""
++        since_bucket = self.get_bucket_no(since)
++        until_bucket = self.get_bucket_no(until)
++        path = self.prefix + self.name
++        c = psycopg2.connect(self.dsn)
++        cr = c.cursor()
++        cr.execute("SELECT COUNT(DISTINCT value) FROM points "
++                   "INNER JOIN paths ON (paths.id = points.path_id) "
++                   "WHERE paths.path = %s AND bucket BETWEEN %s AND %s", (
++                       path, since_bucket, until_bucket,))
++        row = cr.fetchone()
++        return row[0]
++
++    def get_distinct_top_value(self, since, until, how_many=20):
++        """Get the top distinct values for a path between certain ranges."""
++        since_bucket = self.get_bucket_no(since)
++        until_bucket = self.get_bucket_no(until)
++        path = self.prefix + self.name
++        c = psycopg2.connect(self.dsn)
++        cr = c.cursor()
++        cr.execute("SELECT value, COUNT(value) AS cnt FROM points "
++                   "INNER JOIN paths ON (paths.id = points.path_id) "
++                   "WHERE paths.path = %s AND bucket BETWEEN %s AND %s"
++                   "GROUP BY value ORDER BY cnt DESC LIMIT %s", (
++                       path, since_bucket, until_bucket, how_many,))
++        rows = cr.fetchall()
++        return rows
++
 === modified file 'distinctdb/tests/test_distinct.py'
 --- distinctdb/tests/test_distinct.py	2012-03-12 22:36:36 +0000
 +++ distinctdb/tests/test_distinct.py	2012-03-14 16:56:19 +0000
@@ -1,10 +1,12 @@
  # Copyright (C) 2011 Canonical
  # All Rights Reserved
++import json
  import ConfigParser
  from cStringIO import StringIO
  import os
  import time
++
  try:
      from subprocess import check_output
  except ImportError:
@@ -21,12 +23,32 @@
  from twisted.internet import reactor
  from twisted.plugin import getPlugins
  from twisted.plugins import distinctdbplugin
++from twisted.web.test.test_web import DummyRequest
++
  from txstatsd.itxstatsd import IMetricFactory
  from txstatsd import service
  from distinctdb import distinctmetric as distinct
++class DummyReporter(object):
++
++    def __init__(self):
++        self.called = []
++
++    def get_foo(self):
++        self.called.append(("get_foo", ()))
++        return "foo"
++
++    def get_distinct_count(self, since, until):
++        self.called.append(("get_distinct_count", (since, until)))
++        return 42
++
++    def get_distinct_top_value(self, since, until, how_many=20):
++        self.called.append(("get_distinct_top_value", (since, until, how_many)))
++        return [("one", 1), ("two", 1)]
++
++
  class TestDistinctMetricReporter(TestCase):
      def test_get_bucket_no(self):
@@ -102,8 +124,62 @@
          self.assertEquals(dmr.dsn, "dbdsn")
++class TestJSONMethodResource(TestCase):
++
++    def test_render_result_as_json(self):
++        reporter = DummyReporter()
++        request = DummyRequest([])
++        resource = distinct.JSONMethodResource(reporter, "get_foo")
++        def check(result):
++            self.assertEquals({"result": "foo"},
++                              json.loads("".join(request.written)))
++        d = request.notifyFinish()
++        d.addCallback(check)
++        d.addErrback(self.fail)
++        request.render(resource)
++        return d
++
++
  class TestDistinctResource(TestCase):
++    def test_render_top_resource(self):
++        reporter = DummyReporter()
++        request = DummyRequest([])
++        request.args = {"since": time.time(), "until": time.time() + 1}
++        resource = distinct.DistinctResource(reporter)
++        child_resource = resource.getChildWithDefault("top", request)
++        def check(result):
++            self.assertEquals(json.dumps({"result": [("one", 1), ("two", 1)]}),
++                              "".join(request.written))
++        d = request.notifyFinish()
++        d.addCallback(check)
++        request.render(child_resource)
++        return d
++
++    def test_render_count_resource(self):
++        reporter = DummyReporter()
++        request = DummyRequest([])
++        request.args = {"since": time.time(), "until": time.time() + 1}
++        resource = distinct.DistinctResource(reporter)
++        child_resource = resource.getChildWithDefault("count", request)
++        def check(result):
++            self.assertEquals(json.dumps({"result": 42}),
++                              "".join(request.written))
++        d = request.notifyFinish()
++        d.addCallback(check)
++        request.render(child_resource)
++        return d
++
++
++class TestPlugin(TestCase):
++
++    def test_factory(self):
++        self.assertTrue(distinctdbplugin.distinct_metric_factory in \
++                        list(getPlugins(IMetricFactory)))
++
++
++class TestDatabase(TestCase):
++
      def setUp(self):
          rootdir = check_output(["bzr", "root"]).strip()
          dsn_file = os.path.join(rootdir, "tmp", "pg.dsn")
@@ -123,10 +199,45 @@
          dmr.update(name)
          dmr._save_bucket(dmr.bucket, bucket_no)
++
++class TestDatabaseMetricStorage(TestDatabase):
++
++    def test_connect(self):
++        cr = self.conn.cursor()
++        cr.execute("SELECT 0")
++        result = cr.fetchall()
++        self.assertTrue(result, [(0,)])
++
++    def test_create_metric_id(self):
++        dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
++        dmr._save_bucket({}, 0)
++        cr = self.conn.cursor()
++        cr.execute("SELECT * FROM paths WHERE path = 'test'")
++        cr.execute("SELECT * FROM paths")
++        self.assertEquals(len(cr.fetchall()), 1)
++
++    def test_find_saved_data(self):
++        dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
++        dmr.update("one")
++        dmr.update("one")
++        dmr.update("two")
++        dmr._save_bucket(dmr.bucket, 0)
++        cr = self.conn.cursor()
++        cr.execute("SELECT * FROM points ORDER BY value")
++        rows = cr.fetchall()
++        self.assertEquals(rows, [(dmr.metric_id, 0, "one", 2),
++                                 (dmr.metric_id, 0, "two", 1)])
++
++    def test_load_metric_id(self):
++        dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
++        dmr._save_bucket({}, 0)
++        dmr2 = distinct.DistinctMetricReporter("test", dsn=self.dsn)
++        dmr2._save_bucket({}, 0)
++        self.assertEquals(dmr.metric_id, dmr2.metric_id)
++
      def test_get_distinct_count(self):
          """Test get_distinct_count."""
          dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
--        dr = distinct.DistinctResource(dmr)
          t1 = time.time()
          # add a day
          t2 = time.time() + 60 * 60 * 24 + 1
@@ -135,32 +246,31 @@
          # Make sure we're using different buckets
          self.assertNotEqual(b1, b2)
          self._create_test_data_points("one", b1)
--        count = dr.get_distinct_count(t1, t2)
++        count = dmr.get_distinct_count(t1, t2)
          self.assertEqual(count, 1)
          self._create_test_data_points("two", b1)
--        count = dr.get_distinct_count(t1, t2)
++        count = dmr.get_distinct_count(t1, t2)
          self.assertEqual(count, 2)
          # Add it again, make sure the distinct is filtering it out
          self._create_test_data_points("two", b1)
--        count = dr.get_distinct_count(t1, t2)
++        count = dmr.get_distinct_count(t1, t2)
          self.assertEqual(count, 2)
          # Now we add to a newer bucket, but still within range
          self._create_test_data_points("three", b2)
--        count = dr.get_distinct_count(t1, t2)
++        count = dmr.get_distinct_count(t1, t2)
          self.assertEqual(count, 3)
          # Now to a older bucket, out of range
          self._create_test_data_points("zero", b1 - 1)
--        count = dr.get_distinct_count(t1, t2)
++        count = dmr.get_distinct_count(t1, t2)
          self.assertEqual(count, 3)
          # Now to a newer bucket, out of range
          self._create_test_data_points("infinity", b2 + 1)
--        count = dr.get_distinct_count(t1, t2)
++        count = dmr.get_distinct_count(t1, t2)
          self.assertEqual(count, 3)
      def test_get_distinct_top_value(self):
          """Test get_distinct_top_value."""
          dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
--        dr = distinct.DistinctResource(dmr)
          t1 = time.time()
          # add a day
          t2 = time.time() + 60 * 60 * 24 + 1
@@ -169,80 +279,24 @@
          # Make sure we're using different buckets
          self.assertNotEqual(b1, b2)
          self._create_test_data_points("one", b1)
--        values = dr.get_distinct_top_value(t1, t2)
++        values = dmr.get_distinct_top_value(t1, t2)
          self.assertEqual(values, [("one", 1)])
          self._create_test_data_points("two", b1)
--        values = dr.get_distinct_top_value(t1, t2)
++        values = dmr.get_distinct_top_value(t1, t2)
          self.assertEqual(values, [("one", 1), ("two", 1)])
          self._create_test_data_points("one", b1)
--        values = dr.get_distinct_top_value(t1, t2)
++        values = dmr.get_distinct_top_value(t1, t2)
          self.assertEqual(values, [("one", 2), ("two", 1)])
          # Create a third, and make "two" have 2 values so "three" is last
          self._create_test_data_points("two", b1)
          self._create_test_data_points("three", b1)
--        values = dr.get_distinct_top_value(t1, t2)
++        values = dmr.get_distinct_top_value(t1, t2)
          self.assertEqual(values, [("one", 2), ("two", 2), ("three", 1)])
          # Only get the top 2
--        values = dr.get_distinct_top_value(t1, t2, how_many=2)
++        values = dmr.get_distinct_top_value(t1, t2, how_many=2)
          self.assertEqual(values, [("one", 2), ("two", 2)])
--class TestPlugin(TestCase):
--
--    def test_factory(self):
--        self.assertTrue(distinctdbplugin.distinct_metric_factory in \
--                        list(getPlugins(IMetricFactory)))
--
--
--class TestDatabase(TestCase):
--
--    def setUp(self):
--        rootdir = check_output(["bzr", "root"]).strip()
--        dsn_file = os.path.join(rootdir, "tmp", "pg.dsn")
--        self.dsn = open(dsn_file).read()
--        self.conn = psycopg2.connect(self.dsn)
--
--    def tearDown(self):
--        cr = self.conn.cursor()
--        cr.execute("rollback")
--        cr.execute("DELETE FROM paths")
--        cr.execute("DELETE FROM points")
--        cr.execute("commit")
--
--    def test_connect(self):
--        cr = self.conn.cursor()
--        cr.execute("SELECT 0")
--        result = cr.fetchall()
--        self.assertTrue(result, [(0,)])
--
--    def test_create_metric_id(self):
--        dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
--        dmr._save_bucket({}, 0)
--        cr = self.conn.cursor()
--        cr.execute("SELECT * FROM paths WHERE path = 'test'")
--        cr.execute("SELECT * FROM paths")
--        self.assertEquals(len(cr.fetchall()), 1)
--
--    def test_find_saved_data(self):
--        dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
--        dmr.update("one")
--        dmr.update("one")
--        dmr.update("two")
--        dmr._save_bucket(dmr.bucket, 0)
--        cr = self.conn.cursor()
--        cr.execute("SELECT * FROM points ORDER BY value")
--        rows = cr.fetchall()
--        self.assertEquals(rows, [(dmr.metric_id, 0, "one", 2),
--                                 (dmr.metric_id, 0, "two", 1)])
--
--    def test_load_metric_id(self):
--        dmr = distinct.DistinctMetricReporter("test", dsn=self.dsn)
--        dmr._save_bucket({}, 0)
--        dmr2 = distinct.DistinctMetricReporter("test", dsn=self.dsn)
--        dmr2._save_bucket({}, 0)
--        self.assertEquals(dmr.metric_id, dmr2.metric_id)
--
--
  class TestRedis(TestCase):
      def setUp(self):

txStatsD

Merge lp:~sidnei/txstatsd/distinct-plugin-webstats into lp:~beuno/txstatsd/distinct-plugin-webstats

Commit message

Description of the change

Preview Diff

Subscribers