txStatsD

Merge lp:~lucio.torre/txstatsd/use-uniform-sampling into lp:txstatsd

use-uniform-sampling
Merge into trunk

Proposed by Lucio Torre on 2012-05-22

Status:	Merged
Approved by:	Sidnei da Silva on 2012-05-24
Approved revision:	91
Merged at revision:	91
Proposed branch:	lp:~lucio.torre/txstatsd/use-uniform-sampling
Merge into:	lp:txstatsd
Diff against target:	278 lines (+71/-79) 4 files modified txstatsd/metrics/timermetric.py (+22/-29) txstatsd/server/configurableprocessor.py (+2/-1) txstatsd/tests/metrics/test_timermetric.py (+6/-22) txstatsd/tests/test_configurableprocessor.py (+41/-27)
To merge this branch:	bzr merge lp:~lucio.torre/txstatsd/use-uniform-sampling
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
Sidnei da Silva		2012-05-22	Approve on 2012-05-24
Review via email: mp+106880@code.launchpad.net

Commit message

Drop exponential sampling for uniform sampling.

Description of the change

drop exponential sampling for uniform sampling.
Between each report() call we create a bucket where we sample uniformly from the values seen in the period.
This will produce more spiky metrics but with more information and an more exact rate, but also means dropping Xmin_rate.

also, cheaper on the cpu.

Revision history for this message

Sidnei da Silva (sidnei) wrote on 2012-05-24:

Looks good, works great. Not much improvement in CPU, but meh. +1!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Lucio Torre

txStatsD Developers

 === modified file 'txstatsd/metrics/timermetric.py'
 --- txstatsd/metrics/timermetric.py	2012-02-09 02:02:42 +0000
 +++ txstatsd/metrics/timermetric.py	2012-05-22 19:16:23 +0000
@@ -6,8 +6,8 @@
  from txstatsd.metrics.histogrammetric import HistogramMetricReporter
  from txstatsd.metrics.metermetric import MeterMetricReporter
  from txstatsd.metrics.metric import Metric
--from txstatsd.stats.exponentiallydecayingsample \
--    import ExponentiallyDecayingSample
++from txstatsd.stats.uniformsample \
++    import UniformSample
  class TimerMetric(Metric):
@@ -69,30 +69,25 @@
              prefix += "."
          self.prefix = prefix
--        sample = ExponentiallyDecayingSample(1028, 0.015)
++        sample = UniformSample(1028)
          self.histogram = HistogramMetricReporter(sample)
--        self.meter = MeterMetricReporter(
--            "calls", wall_time_func=self.wall_time_func)
++        # total number of values seen
++        self.count = 0
          self.clear()
--    def clear(self):
++    def clear(self, timestamp=None):
          """Clears all recorded durations."""
          self.histogram.clear()
--
--    def count(self):
--        return self.histogram.count
--
--    def fifteen_minute_rate(self):
--        return self.meter.fifteen_minute_rate()
--
--    def five_minute_rate(self):
--        return self.meter.five_minute_rate()
--
--    def mean_rate(self):
--        return self.meter.mean_rate()
--
--    def one_minute_rate(self):
--        return self.meter.one_minute_rate()
++        if timestamp is None:
++            timestamp = self.wall_time_func()
++        self.last_time = float(timestamp)
++
++    def rate(self, timestamp):
++        """The number of values seen since last clear."""
++        dt = (timestamp - self.last_time)
++        if dt == 0:
++            return 0
++        return self.histogram.count / dt
      def max(self):
          """Returns the longest recorded duration."""
@@ -132,13 +127,12 @@
          @param duration: The length of the duration in seconds.
          """
++        self.count += 1
          if duration >= 0:
              self.histogram.update(duration)
--            self.meter.mark()
      def tick(self):
--        """Updates the moving averages."""
--        self.meter.tick()
++        pass
      def report(self, timestamp):
          # median, 75, 95, 98, 99, 99.9 percentile
@@ -150,12 +144,11 @@
                   ".stddev": self.std_dev(),
                   ".99percentile": percentiles[4],
                   ".999percentile": percentiles[5],
--                 ".count": self.meter.count,
--                 ".1min_rate": self.meter.one_minute_rate(),
--                 ".5min_rate": self.meter.five_minute_rate(),
--                 ".15min_rate": self.meter.fifteen_minute_rate()}
--
++                 ".count": self.count,
++                 ".rate": self.rate(timestamp),
++            }
          for item, value in items.iteritems():
              metrics.append((self.prefix + self.name + item,
                              round(value, 6), timestamp))
++        self.clear(timestamp)
          return metrics
 === modified file 'txstatsd/server/configurableprocessor.py'
 --- txstatsd/server/configurableprocessor.py	2012-02-07 21:11:42 +0000
 +++ txstatsd/server/configurableprocessor.py	2012-05-22 19:16:23 +0000
@@ -40,7 +40,8 @@
      def compose_timer_metric(self, key, duration):
          if not key in self.timer_metrics:
--            metric = TimerMetricReporter(key, prefix=self.message_prefix)
++            metric = TimerMetricReporter(key,
++                wall_time_func=self.time_function, prefix=self.message_prefix)
              self.timer_metrics[key] = metric
          self.timer_metrics[key].update(duration)
 === modified file 'txstatsd/tests/metrics/test_timermetric.py'
 --- txstatsd/tests/metrics/test_timermetric.py	2011-09-14 12:01:10 +0000
 +++ txstatsd/tests/metrics/test_timermetric.py	2012-05-22 19:16:23 +0000
@@ -1,5 +1,6 @@
  import math
++import time
  from twisted.trial.unittest import TestCase
@@ -9,7 +10,6 @@
  class TestBlankTimerMetric(TestCase):
      def setUp(self):
          self.timer = TimerMetricReporter('test')
--        self.timer.tick()
      def test_max(self):
          self.assertEqual(
@@ -28,7 +28,7 @@
      def test_count(self):
          self.assertEqual(
--            self.timer.count(), 0,
++            self.timer.count, 0,
              'Should have a count of zero')
      def test_std_dev(self):
@@ -54,26 +54,11 @@
              percentiles[4], 0,
              'Should have p99.9 of zero')
--    def test_mean_rate(self):
--        self.assertEqual(
--            self.timer.mean_rate(), 0,
--            'Should have a mean rate of zero')
--
--    def test_one_minute_rate(self):
--        self.assertEqual(
--            self.timer.one_minute_rate(), 0,
++    def test_rate(self):
++        self.assertEqual(
++            self.timer.rate(time.time()), 0,
              'Should have a one-minute rate of zero`')
--    def test_five_minute_rate(self):
--        self.assertEqual(
--            self.timer.five_minute_rate(), 0,
--            'Should have a five-minute rate of zero')
--
--    def test_fifteen_minute_rate(self):
--        self.assertEqual(
--            self.timer.fifteen_minute_rate(), 0,
--            'Should have a fifteen-minute rate of zero')
--
      def test_no_values(self):
          self.assertEqual(
              len(self.timer.get_values()), 0,
@@ -83,7 +68,6 @@
  class TestTimingSeriesEvents(TestCase):
      def setUp(self):
          self.timer = TimerMetricReporter('test')
--        self.timer.tick()
          self.timer.update(10)
          self.timer.update(20)
          self.timer.update(20)
@@ -92,7 +76,7 @@
      def test_count(self):
          self.assertEqual(
--            self.timer.count(), 5,
++            self.timer.count, 5,
              'Should record the count')
      def test_min(self):
 === modified file 'txstatsd/tests/test_configurableprocessor.py'
 --- txstatsd/tests/test_configurableprocessor.py	2012-05-14 19:18:27 +0000
 +++ txstatsd/tests/test_configurableprocessor.py	2012-05-22 19:16:23 +0000
@@ -64,29 +64,40 @@
          If a single timer with a single data point is present, all
          percentiles will be set to the same value.
          """
++
++        _now = 40
++
          configurable_processor = ConfigurableMessageProcessor(
--            time_function=lambda: 42)
++            time_function=lambda: _now)
          configurable_processor.process("glork:24|ms")
++        _now = 42
++
          messages = configurable_processor.flush()
--
--        self.assertEqual(('glork.15min_rate', 0.0, 42), messages[0])
--        self.assertEqual(('glork.1min_rate', 0.0, 42), messages[1])
--        self.assertEqual(('glork.5min_rate', 0.0, 42), messages[2])
--        self.assertEqual(("glork.999percentile", 24.0, 42), messages[3])
--        self.assertEqual(("glork.99percentile", 24.0, 42), messages[4])
--        self.assertEqual(("glork.count", 1., 42), messages[5])
--        self.assertEqual(("glork.max", 24.0, 42), messages[6])
--        self.assertEqual(("glork.mean", 24.0, 42), messages[7])
--        self.assertEqual(("glork.min", 24.0, 42), messages[8])
--        self.assertEqual(("glork.stddev", 0.0, 42), messages[9])
++        messages.sort()
++
++        expected = [
++            ("glork.999percentile", 24.0, 42),
++            ("glork.99percentile", 24.0, 42),
++            ('glork.count', 1.0, 42),
++            ("glork.max", 24.0, 42),
++            ("glork.mean", 24.0, 42),
++            ("glork.min", 24.0, 42),
++            ('glork.rate', 0.5, 42),
++            ("glork.stddev", 0.0, 42),
++            ]
++        expected.sort()
++
++        for e, f in zip(expected, messages):
++            self.assertEqual(e, f)
      def test_flush_single_timer_multiple_times(self):
          """
          Test reporting of multiple timer metric samples.
          """
++        _now = 40
          configurable_processor = ConfigurableMessageProcessor(
--            time_function=lambda: 42)
++            time_function=lambda: _now)
          configurable_processor.process("glork:4|ms")
          configurable_processor.update_metrics()
@@ -101,21 +112,24 @@
          configurable_processor.process("glork:42|ms")
          configurable_processor.update_metrics()
++        _now = 42
          messages = configurable_processor.flush()
--
--        self.assertEqual(('glork.15min_rate', 0.20000000000000001, 42),
--            messages[0])
--        self.assertEqual(('glork.1min_rate', 0.20000000000000001, 42),
--            messages[1])
--        self.assertEqual(('glork.5min_rate', 0.20000000000000001, 42),
--            messages[2])
--        self.assertEqual(("glork.999percentile", 42.0, 42), messages[3])
--        self.assertEqual(("glork.99percentile", 42.0, 42), messages[4])
--        self.assertEqual(('glork.count', 6.0, 42), messages[5])
--        self.assertEqual(("glork.max", 42.0, 42), messages[6])
--        self.assertEqual(("glork.mean", 18.0, 42), messages[7])
--        self.assertEqual(("glork.min", 4.0, 42), messages[8])
--        self.assertEqual(("glork.stddev", 13.490738, 42), messages[9])
++        messages.sort()
++
++        expected = [
++            ("glork.999percentile", 42.0, 42),
++            ("glork.99percentile", 42.0, 42),
++            ('glork.count', 6.0, 42),
++            ("glork.max", 42.0, 42),
++            ("glork.mean", 18.0, 42),
++            ("glork.min", 4.0, 42),
++            ('glork.rate', 3, 42),
++            ("glork.stddev", 13.490738, 42),
++            ]
++        expected.sort()
++
++        for e, f in zip(expected, messages):
++            self.assertEqual(e, f)
  class FlushMeterMetricMessagesTest(TestCase):