diff --git a/plugins/inputs/statsd/README.md b/plugins/inputs/statsd/README.md index 2e3c37b4006f3..4ea4bcc1676c6 100644 --- a/plugins/inputs/statsd/README.md +++ b/plugins/inputs/statsd/README.md @@ -183,6 +183,8 @@ metric type: for that stat during that interval. - `statsd__mean`: The mean is the average of all values statsd saw for that stat during that interval. + - `statsd__median`: The median is the middle of all values statsd saw + for that stat during that interval. - `statsd__stddev`: The stddev is the sample standard deviation of all values statsd saw for that stat during that interval. - `statsd__sum`: The sum is the sample sum of all values statsd saw diff --git a/plugins/inputs/statsd/running_stats.go b/plugins/inputs/statsd/running_stats.go index e33749b2c2179..08f24fe8861fd 100644 --- a/plugins/inputs/statsd/running_stats.go +++ b/plugins/inputs/statsd/running_stats.go @@ -7,6 +7,7 @@ import ( ) const defaultPercentileLimit = 1000 +const defaultMedianLimit = 1000 // RunningStats calculates a running mean, variance, standard deviation, // lower bound, upper bound, count, and can calculate estimated percentiles. @@ -31,12 +32,19 @@ type RunningStats struct { // cache if we have sorted the list so that we never re-sort a sorted list, // which can have very bad performance. - sorted bool + SortedPerc bool + + // Array used to calculate estimated median values + // We will store a maximum of MedLimit values, at which point we will start + // slicing old values + med []float64 + MedLimit int + MedInsertIndex int } func (rs *RunningStats) AddValue(v float64) { // Whenever a value is added, the list is no longer sorted. - rs.sorted = false + rs.SortedPerc = false if rs.n == 0 { rs.k = v @@ -45,7 +53,12 @@ func (rs *RunningStats) AddValue(v float64) { if rs.PercLimit == 0 { rs.PercLimit = defaultPercentileLimit } + if rs.MedLimit == 0 { + rs.MedLimit = defaultMedianLimit + rs.MedInsertIndex = 0 + } rs.perc = make([]float64, 0, rs.PercLimit) + rs.med = make([]float64, 0, rs.MedLimit) } // These are used for the running mean and variance @@ -69,12 +82,35 @@ func (rs *RunningStats) AddValue(v float64) { // Reached limit, choose random index to overwrite in the percentile array rs.perc[rand.Intn(len(rs.perc))] = v } + + if len(rs.med) < rs.MedLimit { + rs.med = append(rs.med, v) + } else { + // Reached limit, start over + rs.med[rs.MedInsertIndex] = v + } + rs.MedInsertIndex = (rs.MedInsertIndex + 1) % rs.MedLimit } func (rs *RunningStats) Mean() float64 { return rs.k + rs.ex/float64(rs.n) } +func (rs *RunningStats) Median() float64 { + // Need to sort for median, but keep temporal order + var values []float64 + values = append(values, rs.med...) + sort.Float64s(values) + count := len(values) + if count == 0 { + return 0 + } else if count%2 == 0 { + return (values[count/2-1] + values[count/2]) / 2 + } else { + return values[count/2] + } +} + func (rs *RunningStats) Variance() float64 { return (rs.ex2 - (rs.ex*rs.ex)/float64(rs.n)) / float64(rs.n) } @@ -104,9 +140,9 @@ func (rs *RunningStats) Percentile(n float64) float64 { n = 100 } - if !rs.sorted { + if !rs.SortedPerc { sort.Float64s(rs.perc) - rs.sorted = true + rs.SortedPerc = true } i := float64(len(rs.perc)) * n / float64(100) diff --git a/plugins/inputs/statsd/running_stats_test.go b/plugins/inputs/statsd/running_stats_test.go index 2cf987a69bbf1..267f9e156a09e 100644 --- a/plugins/inputs/statsd/running_stats_test.go +++ b/plugins/inputs/statsd/running_stats_test.go @@ -17,6 +17,9 @@ func TestRunningStats_Single(t *testing.T) { if rs.Mean() != 10.1 { t.Errorf("Expected %v, got %v", 10.1, rs.Mean()) } + if rs.Median() != 10.1 { + t.Errorf("Expected %v, got %v", 10.1, rs.Median()) + } if rs.Upper() != 10.1 { t.Errorf("Expected %v, got %v", 10.1, rs.Upper()) } @@ -61,6 +64,9 @@ func TestRunningStats_Duplicate(t *testing.T) { if rs.Mean() != 10.1 { t.Errorf("Expected %v, got %v", 10.1, rs.Mean()) } + if rs.Median() != 10.1 { + t.Errorf("Expected %v, got %v", 10.1, rs.Median()) + } if rs.Upper() != 10.1 { t.Errorf("Expected %v, got %v", 10.1, rs.Upper()) } @@ -105,6 +111,9 @@ func TestRunningStats(t *testing.T) { if rs.Mean() != 15.9375 { t.Errorf("Expected %v, got %v", 15.9375, rs.Mean()) } + if rs.Median() != 10.5 { + t.Errorf("Expected %v, got %v", 10.5, rs.Median()) + } if rs.Upper() != 45 { t.Errorf("Expected %v, got %v", 45, rs.Upper()) } @@ -164,3 +173,24 @@ func TestRunningStats_PercentileLimit(t *testing.T) { func fuzzyEqual(a, b, epsilon float64) bool { return math.Abs(a-b) <= epsilon } + +// Test that the median limit is respected and MedInsertIndex is properly incrementing index. +func TestRunningStats_MedianLimitIndex(t *testing.T) { + rs := RunningStats{} + rs.MedLimit = 10 + values := []float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} + + for _, v := range values { + rs.AddValue(v) + } + + if rs.Count() != 11 { + t.Errorf("Expected %v, got %v", 11, rs.Count()) + } + if len(rs.med) != 10 { + t.Errorf("Expected %v, got %v", 10, len(rs.med)) + } + if rs.MedInsertIndex != 1 { + t.Errorf("Expected %v, got %v", 0, rs.MedInsertIndex) + } +} diff --git a/plugins/inputs/statsd/statsd.go b/plugins/inputs/statsd/statsd.go index 97dca4656062c..da3a0b0159a32 100644 --- a/plugins/inputs/statsd/statsd.go +++ b/plugins/inputs/statsd/statsd.go @@ -253,6 +253,7 @@ func (s *Statsd) Gather(acc telegraf.Accumulator) error { prefix = fieldName + "_" } fields[prefix+"mean"] = stats.Mean() + fields[prefix+"median"] = stats.Median() fields[prefix+"stddev"] = stats.Stddev() fields[prefix+"sum"] = stats.Sum() fields[prefix+"upper"] = stats.Upper() diff --git a/plugins/inputs/statsd/statsd_test.go b/plugins/inputs/statsd/statsd_test.go index 22d6ee4e30901..9adf3c9f58835 100644 --- a/plugins/inputs/statsd/statsd_test.go +++ b/plugins/inputs/statsd/statsd_test.go @@ -419,6 +419,7 @@ func TestParse_Timings(t *testing.T) { "count": int64(5), "lower": float64(1), "mean": float64(3), + "median": float64(1), "stddev": float64(4), "sum": float64(15), "upper": float64(11), @@ -913,6 +914,7 @@ func TestParse_DataDogTags(t *testing.T) { "count": 10, "lower": float64(3), "mean": float64(3), + "median": float64(3), "stddev": float64(0), "sum": float64(30), "upper": float64(3), @@ -1211,6 +1213,7 @@ func TestParse_TimingsMultipleFieldsWithTemplate(t *testing.T) { "success_count": int64(5), "success_lower": float64(1), "success_mean": float64(3), + "success_median": float64(1), "success_stddev": float64(4), "success_sum": float64(15), "success_upper": float64(11), @@ -1219,6 +1222,7 @@ func TestParse_TimingsMultipleFieldsWithTemplate(t *testing.T) { "error_count": int64(5), "error_lower": float64(2), "error_mean": float64(6), + "error_median": float64(2), "error_stddev": float64(8), "error_sum": float64(30), "error_upper": float64(22), @@ -1259,6 +1263,7 @@ func TestParse_TimingsMultipleFieldsWithoutTemplate(t *testing.T) { "count": int64(5), "lower": float64(1), "mean": float64(3), + "median": float64(1), "stddev": float64(4), "sum": float64(15), "upper": float64(11), @@ -1268,6 +1273,7 @@ func TestParse_TimingsMultipleFieldsWithoutTemplate(t *testing.T) { "count": int64(5), "lower": float64(2), "mean": float64(6), + "median": float64(2), "stddev": float64(8), "sum": float64(30), "upper": float64(22),