Skip to content

Commit

Permalink
Only increment ruler warning eval metric for non PromQL warnings (#7592)
Browse files Browse the repository at this point in the history
  • Loading branch information
yeya24 authored Aug 7, 2024
1 parent 08af5d7 commit 7364836
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
### Fixed

* [#7511](https://github.com/thanos-io/thanos/pull/7511) Query Frontend: fix doubled gzip compression for response body.
* [#7592](https://github.com/thanos-io/thanos/pull/7592) Ruler: Only increment `thanos_rule_evaluation_with_warnings_total` metric for non PromQL warnings.

### Added

Expand Down
31 changes: 24 additions & 7 deletions cmd/thanos/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ import (
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/agent"
"github.com/prometheus/prometheus/tsdb/wlog"
"gopkg.in/yaml.v2"

"github.com/thanos-io/objstore"
"github.com/thanos-io/objstore/client"
objstoretracing "github.com/thanos-io/objstore/tracing/opentracing"
"github.com/thanos-io/promql-engine/execution/parse"
"gopkg.in/yaml.v2"

"github.com/thanos-io/thanos/pkg/alert"
v1 "github.com/thanos-io/thanos/pkg/api/rule"
Expand All @@ -54,6 +54,7 @@ import (
"github.com/thanos-io/thanos/pkg/component"
"github.com/thanos-io/thanos/pkg/discovery/dns"
"github.com/thanos-io/thanos/pkg/errutil"
"github.com/thanos-io/thanos/pkg/extannotations"
"github.com/thanos-io/thanos/pkg/extgrpc"
"github.com/thanos-io/thanos/pkg/extkingpin"
"github.com/thanos-io/thanos/pkg/extprom"
Expand Down Expand Up @@ -292,7 +293,7 @@ func newRuleMetrics(reg *prometheus.Registry) *RuleMetrics {
m.ruleEvalWarnings = factory.NewCounterVec(
prometheus.CounterOpts{
Name: "thanos_rule_evaluation_with_warnings_total",
Help: "The total number of rule evaluation that were successful but had warnings which can indicate partial error.",
Help: "The total number of rule evaluation that were successful but had non PromQL warnings which can indicate partial error.",
}, []string{"strategy"},
)
m.ruleEvalWarnings.WithLabelValues(strings.ToLower(storepb.PartialResponseStrategy_ABORT.String()))
Expand Down Expand Up @@ -939,6 +940,8 @@ func queryFuncCreator(
level.Error(logger).Log("err", err, "query", qs)
continue
}

warns = filterOutPromQLWarnings(warns, logger, qs)
if len(warns) > 0 {
ruleEvalWarnings.WithLabelValues(strings.ToLower(partialResponseStrategy.String())).Inc()
// TODO(bwplotka): Propagate those to UI, probably requires changing rule manager code ):
Expand Down Expand Up @@ -970,12 +973,13 @@ func queryFuncCreator(
continue
}

if len(result.Warnings) > 0 {
warnings := make([]string, 0, len(result.Warnings))
for _, warn := range result.Warnings {
warnings = append(warnings, warn.Error())
}
warnings = filterOutPromQLWarnings(warnings, logger, qs)
if len(warnings) > 0 {
ruleEvalWarnings.WithLabelValues(strings.ToLower(partialResponseStrategy.String())).Inc()
warnings := make([]string, 0, len(result.Warnings))
for _, w := range result.Warnings {
warnings = append(warnings, w.Error())
}
level.Warn(logger).Log("warnings", strings.Join(warnings, ", "), "query", qs)
}

Expand Down Expand Up @@ -1081,3 +1085,16 @@ func validateTemplate(tmplStr string) error {
}
return nil
}

// Filter out PromQL related warnings from warning response and keep store related warnings only.
func filterOutPromQLWarnings(warns []string, logger log.Logger, query string) []string {
storeWarnings := make([]string, 0, len(warns))
for _, warn := range warns {
if extannotations.IsPromQLAnnotation(warn) {
level.Warn(logger).Log("warning", warn, "query", query)
continue
}
storeWarnings = append(storeWarnings, warn)
}
return storeWarnings
}
60 changes: 60 additions & 0 deletions cmd/thanos/rule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ import (
"testing"

"github.com/efficientgo/core/testutil"
"github.com/go-kit/log"
"github.com/prometheus/prometheus/util/annotations"

"github.com/thanos-io/thanos/pkg/extpromql"
)

func Test_parseFlagLabels(t *testing.T) {
Expand Down Expand Up @@ -110,3 +114,59 @@ func Test_tableLinkForExpression(t *testing.T) {
testutil.Equals(t, resStr, td.expectStr)
}
}

func TestFilterOutPromQLWarnings(t *testing.T) {
logger := log.NewNopLogger()
query := "foo"
expr, err := extpromql.ParseExpr(`rate(prometheus_build_info[5m])`)
testutil.Ok(t, err)
possibleCounterInfo := annotations.NewPossibleNonCounterInfo("foo", expr.PositionRange())
badBucketLabelWarning := annotations.NewBadBucketLabelWarning("foo", "0.99", expr.PositionRange())
for _, tc := range []struct {
name string
warnings []string
expected []string
}{
{
name: "nil warning",
expected: make([]string, 0),
},
{
name: "empty warning",
warnings: make([]string, 0),
expected: make([]string, 0),
},
{
name: "no PromQL warning",
warnings: []string{
"some_warning_message",
},
expected: []string{
"some_warning_message",
},
},
{
name: "PromQL warning",
warnings: []string{
possibleCounterInfo.Error(),
},
expected: make([]string, 0),
},
{
name: "filter out all PromQL warnings",
warnings: []string{
possibleCounterInfo.Error(),
badBucketLabelWarning.Error(),
"some_warning_message",
},
expected: []string{
"some_warning_message",
},
},
} {
t.Run(tc.name, func(t *testing.T) {
output := filterOutPromQLWarnings(tc.warnings, logger, query)
testutil.Equals(t, tc.expected, output)
})
}
}

0 comments on commit 7364836

Please sign in to comment.