Skip to content

Commit

Permalink
add the tenant level limit for api response
Browse files Browse the repository at this point in the history
  • Loading branch information
qinxx108 authored and anna-tran committed Sep 18, 2024
1 parent 6dfdb09 commit 8adc244
Show file tree
Hide file tree
Showing 6 changed files with 247 additions and 7 deletions.
6 changes: 6 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -3460,6 +3460,12 @@ query_rejection:
# CLI flag: -alertmanager.max-alerts-size-bytes
[alertmanager_max_alerts_size_bytes: <int> | default = 0]


# Maximum total number of alerts that the alert manager read api can return. 0 =
# no limit.
# CLI flag: -alertmanager.read-api-max-alerts-counts
[alertmanager_read_api_max_alerts_counts: <int> | default = 0]

# list of rule groups to disable
[disabled_rule_groups: <list of DisabledRuleGroup> | default = []]
```
Expand Down
63 changes: 63 additions & 0 deletions pkg/alertmanager/alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"sync"
"time"

"github.com/prometheus/alertmanager/api/v2/models"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/pkg/errors"
Expand Down Expand Up @@ -45,6 +47,7 @@ import (
"github.com/prometheus/alertmanager/timeinterval"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/alertmanager/ui"
amcallback "github.com/prometheus/alertmanager/util/callback"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
commoncfg "github.com/prometheus/common/config"
Expand Down Expand Up @@ -265,10 +268,16 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
callback = newAlertsLimiter(am.cfg.UserID, am.cfg.Limits, reg)
}
am.alerts, err = mem.NewAlerts(context.Background(), am.marker, am.cfg.GCInterval, callback, am.logger, am.registry)

if err != nil {
return nil, fmt.Errorf("failed to create alerts: %v", err)
}

var apiCallback amcallback.Callback
if am.cfg.Limits != nil {
apiCallback = newAPIResponseLimiter(am.cfg.UserID, am.cfg.Limits)
}

am.api, err = api.New(api.Options{
Alerts: am.alerts,
Silences: am.silences,
Expand All @@ -281,6 +290,7 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
return am.dispatcher.Groups(f1, f2)
},
Concurrency: am.cfg.APIConcurrency,
APICallback: apiCallback,
})
if err != nil {
return nil, fmt.Errorf("failed to create api: %v", err)
Expand Down Expand Up @@ -765,3 +775,56 @@ func alertSize(alert model.Alert) int {
size += len(alert.GeneratorURL)
return size
}

// apiResponseLimiter limits the response received by the Alertmanager client and return to caller.
type apiResponseLimiter struct {
tenant string
limits Limits
}

func newAPIResponseLimiter(tenant string, limits Limits) *apiResponseLimiter {
limiter := &apiResponseLimiter{
tenant: tenant,
limits: limits,
}

return limiter
}

func (a *apiResponseLimiter) V2GetAlertsCallback(alerts models.GettableAlerts) (models.GettableAlerts, error) {
alertLimit := a.limits.AlertmanagerReadAPIMaxAlertsCount(a.tenant)

alertCount := len(alerts)
if alertLimit > 0 && alertCount > alertLimit {
alertCount = alertLimit
}
return alerts[:alertCount], nil
}

func (a *apiResponseLimiter) V2GetAlertGroupsCallback(alertgroups models.AlertGroups) (models.AlertGroups, error) {
alertLimit := a.limits.AlertmanagerReadAPIMaxAlertsCount(a.tenant)

limitReached := func(count int, limit int) bool {
return limit > 0 && limit <= count
}

remainingAlertsCountForCurrentGroup := func(count int, limit int, agAlertCount int) int {
if limit > 0 && (limit-count < agAlertCount) {
return limit - count
}
return agAlertCount
}

res := make(models.AlertGroups, 0, len(alertgroups))
alertsCount := 0

for i := 0; i < len(alertgroups) && !limitReached(alertsCount, alertLimit); i++ {
agAlertsCount := remainingAlertsCountForCurrentGroup(alertsCount, alertLimit, len(alertgroups[i].Alerts))
agAlerts := alertgroups[i].Alerts[:agAlertsCount]
alertsCount += agAlertsCount
alertgroups[i].Alerts = agAlerts
res = append(res, alertgroups[i])
}

return res, nil
}
156 changes: 156 additions & 0 deletions pkg/alertmanager/alertmanager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import (
"testing"
"time"

"github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/alertmanager/util/callback"

"github.com/go-kit/log"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/types"
Expand Down Expand Up @@ -236,3 +239,156 @@ func testLimiter(t *testing.T, limits Limits, ops []callbackOp) {
assert.Equal(t, op.expectedTotalSize, totalSize, "wrong total size, op %d", ix)
}
}

func TestAPIResponseLimiter_V2GetAlertsCallback(t *testing.T) {
alerts := []*models.GettableAlert{
{
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "alert1"},
},
},
{
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "alert2"},
},
},
{
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "alert3"},
},
},
}

for _, tc := range []struct {
name string
anames []string
callback callback.Callback
}{
{
"no call back",
[]string{"alert1", "alert2", "alert3"},
newAPIResponseLimiter("test", &mockAlertManagerLimits{}),
},
{
"callback: only return 1 alert",
[]string{"alert1"},
newAPIResponseLimiter("test", &mockAlertManagerLimits{maxAPIAlertsCount: 1}),
},
{
"callback: only return 2 alerts",
[]string{"alert1", "alert2"},
newAPIResponseLimiter("test", &mockAlertManagerLimits{maxAPIAlertsCount: 2}),
},
} {
t.Run(tc.name, func(t *testing.T) {
res, _ := tc.callback.V2GetAlertsCallback(alerts)
anames := []string{}
for _, a := range res {
name, ok := a.Labels["alertname"]
if ok {
anames = append(anames, string(name))
}
}
require.Equal(t, tc.anames, anames)
})
}
}

func TestAPIResponseLimiter_V2GetAlertGroupsCallback(t *testing.T) {
alerts1 := []*models.GettableAlert{
{
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "alert1"},
},
},
{
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "alert2"},
},
},
{
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "alert3"},
},
},
}
alerts2 := []*models.GettableAlert{
{
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "alert4"},
},
},
{
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "alert5"},
},
},
{
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "alert6"},
},
},
}
ags := []*models.AlertGroup{
{
Alerts: alerts1,
Labels: models.LabelSet{"ag": "ag1"},
},
{
Alerts: alerts2,
Labels: models.LabelSet{"ag": "ag2"},
},
}

agsNoalert := []*models.AlertGroup{
{
Labels: models.LabelSet{"ag": "ag1"},
},
}

for _, tc := range []struct {
inputGroup []*models.AlertGroup
name string
anames []string
callback callback.Callback
}{
{
ags,
"no call back",
[]string{"alert1", "alert2", "alert3", "alert4", "alert5", "alert6"},
newAPIResponseLimiter("test", &mockAlertManagerLimits{}),
},
{
ags,
"callback: only return 3 alert",
[]string{"alert1", "alert2", "alert3"},
newAPIResponseLimiter("test", &mockAlertManagerLimits{maxAPIAlertsCount: 3}),
},
{
ags,
"callback: only return 5 alerts",
[]string{"alert1", "alert2", "alert3", "alert4", "alert5"},
newAPIResponseLimiter("test", &mockAlertManagerLimits{maxAPIAlertsCount: 5}),
},
{
agsNoalert,
"callback: only return 5 alerts",
[]string{},
newAPIResponseLimiter("test", &mockAlertManagerLimits{maxAPIAlertsCount: 5}),
},
} {
t.Run(tc.name, func(t *testing.T) {
res, _ := tc.callback.V2GetAlertGroupsCallback(tc.inputGroup)
anames := []string{}
for _, a := range res {
for _, b := range a.Alerts {
name, ok := b.Labels["alertname"]
if ok {
anames = append(anames, string(name))
}
}
}
require.Equal(t, tc.anames, anames)
})
}
}
3 changes: 3 additions & 0 deletions pkg/alertmanager/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ type Limits interface {
// AlertmanagerMaxAlertsSizeBytes returns total max size of alerts that tenant can have active at the same time. 0 = no limit.
// Size of the alert is computed from alert labels, annotations and generator URL.
AlertmanagerMaxAlertsSizeBytes(tenant string) int

// AlertmanagerReadAPIMaxAlertsCount return total number of alerts that the alert manager read api can return. 0 = no limit.
AlertmanagerReadAPIMaxAlertsCount(tenant string) int
}

// A MultitenantAlertmanager manages Alertmanager instances for multiple
Expand Down
5 changes: 5 additions & 0 deletions pkg/alertmanager/multitenant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2219,6 +2219,7 @@ type mockAlertManagerLimits struct {
maxDispatcherAggregationGroups int
maxAlertsCount int
maxAlertsSizeBytes int
maxAPIAlertsCount int
}

func (m *mockAlertManagerLimits) AlertmanagerMaxConfigSize(tenant string) int {
Expand Down Expand Up @@ -2260,3 +2261,7 @@ func (m *mockAlertManagerLimits) AlertmanagerMaxAlertsCount(_ string) int {
func (m *mockAlertManagerLimits) AlertmanagerMaxAlertsSizeBytes(_ string) int {
return m.maxAlertsSizeBytes
}

func (m *mockAlertManagerLimits) AlertmanagerReadAPIMaxAlertsCount(_ string) int {
return m.maxAPIAlertsCount
}
21 changes: 14 additions & 7 deletions pkg/util/validation/limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,15 @@ type Limits struct {
NotificationRateLimit float64 `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"`
NotificationRateLimitPerIntegration NotificationRateLimitMap `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"`

AlertmanagerMaxConfigSizeBytes int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"`
AlertmanagerMaxTemplatesCount int `yaml:"alertmanager_max_templates_count" json:"alertmanager_max_templates_count"`
AlertmanagerMaxTemplateSizeBytes int `yaml:"alertmanager_max_template_size_bytes" json:"alertmanager_max_template_size_bytes"`
AlertmanagerMaxDispatcherAggregationGroups int `yaml:"alertmanager_max_dispatcher_aggregation_groups" json:"alertmanager_max_dispatcher_aggregation_groups"`
AlertmanagerMaxAlertsCount int `yaml:"alertmanager_max_alerts_count" json:"alertmanager_max_alerts_count"`
AlertmanagerMaxAlertsSizeBytes int `yaml:"alertmanager_max_alerts_size_bytes" json:"alertmanager_max_alerts_size_bytes"`
DisabledRuleGroups DisabledRuleGroups `yaml:"disabled_rule_groups" json:"disabled_rule_groups" doc:"nocli|description=list of rule groups to disable"`
AlertmanagerMaxConfigSizeBytes int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"`
AlertmanagerMaxTemplatesCount int `yaml:"alertmanager_max_templates_count" json:"alertmanager_max_templates_count"`
AlertmanagerMaxTemplateSizeBytes int `yaml:"alertmanager_max_template_size_bytes" json:"alertmanager_max_template_size_bytes"`
AlertmanagerMaxDispatcherAggregationGroups int `yaml:"alertmanager_max_dispatcher_aggregation_groups" json:"alertmanager_max_dispatcher_aggregation_groups"`
AlertmanagerMaxAlertsCount int `yaml:"alertmanager_max_alerts_count" json:"alertmanager_max_alerts_count"`
AlertmanagerMaxAlertsSizeBytes int `yaml:"alertmanager_max_alerts_size_bytes" json:"alertmanager_max_alerts_size_bytes"`
AlertmanagerReadAPIMaxAlertsCount int `yaml:"alertmanager_read_api_max_alerts_counts" json:"alertmanager_read_api_max_alerts_counts"`

DisabledRuleGroups DisabledRuleGroups `yaml:"disabled_rule_groups" json:"disabled_rule_groups" doc:"nocli|description=list of rule groups to disable"`
}

// RegisterFlags adds the flags required to config this to the given FlagSet
Expand Down Expand Up @@ -295,6 +297,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&l.AlertmanagerMaxDispatcherAggregationGroups, "alertmanager.max-dispatcher-aggregation-groups", 0, "Maximum number of aggregation groups in Alertmanager's dispatcher that a tenant can have. Each active aggregation group uses single goroutine. When the limit is reached, dispatcher will not dispatch alerts that belong to additional aggregation groups, but existing groups will keep working properly. 0 = no limit.")
f.IntVar(&l.AlertmanagerMaxAlertsCount, "alertmanager.max-alerts-count", 0, "Maximum number of alerts that a single user can have. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.")
f.IntVar(&l.AlertmanagerMaxAlertsSizeBytes, "alertmanager.max-alerts-size-bytes", 0, "Maximum total size of alerts that a single user can have, alert size is the sum of the bytes of its labels, annotations and generatorURL. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.")
f.IntVar(&l.AlertmanagerReadAPIMaxAlertsCount, "alertmanager.read-api-max-alerts-counts", 0, "Maximum total number of alerts that the alert manager read api can return. 0 = no limit.")
}

// Validate the limits config and returns an error if the validation
Expand Down Expand Up @@ -934,6 +937,10 @@ func (o *Overrides) DisabledRuleGroups(userID string) DisabledRuleGroups {
return DisabledRuleGroups{}
}

func (o *Overrides) AlertmanagerReadAPIMaxAlertsCount(userID string) int {
return o.GetOverridesForUser(userID).AlertmanagerReadAPIMaxAlertsCount
}

// GetOverridesForUser returns the per-tenant limits with overrides.
func (o *Overrides) GetOverridesForUser(userID string) *Limits {
if o.tenantLimits != nil {
Expand Down

0 comments on commit 8adc244

Please sign in to comment.