Skip to content

Commit

Permalink
Alertmanager: Register grafana state size on sync (#9295)
Browse files Browse the repository at this point in the history
* Register grafana state size on sync

* fetch state and register size first

* delete user label in case state not found
  • Loading branch information
titolins authored Sep 18, 2024
1 parent 355dcb6 commit a93ded8
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
30 changes: 20 additions & 10 deletions pkg/alertmanager/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,20 @@ func (cfg *MultitenantAlertmanagerConfig) CheckExternalURL(alertmanagerHTTPPrefi
}

type multitenantAlertmanagerMetrics struct {
grafanaStateSize *prometheus.GaugeVec
lastReloadSuccessful *prometheus.GaugeVec
lastReloadSuccessfulTimestamp *prometheus.GaugeVec
}

func newMultitenantAlertmanagerMetrics(reg prometheus.Registerer) *multitenantAlertmanagerMetrics {
m := &multitenantAlertmanagerMetrics{}

m.grafanaStateSize = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
Namespace: "cortex",
Name: "alertmanager_grafana_state_size_bytes",
Help: "Size of the grafana alertmanager state.",
}, []string{"user"})

m.lastReloadSuccessful = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
Namespace: "cortex",
Name: "alertmanager_config_last_reload_successful",
Expand Down Expand Up @@ -737,6 +744,19 @@ func (am *MultitenantAlertmanager) computeConfig(cfgs alertspb.AlertConfigDescs)

// syncStates promotes/unpromotes the Grafana state and updates the 'promoted' flag if needed.
func (am *MultitenantAlertmanager) syncStates(ctx context.Context, cfg amConfig) error {
// fetching grafana state first so we can register its size independently of it being promoted or not
s, err := am.store.GetFullGrafanaState(ctx, cfg.User)
if err != nil {
if errors.Is(err, alertspb.ErrNotFound) {
// This is expected if the state was already promoted.
level.Debug(am.logger).Log("msg", "grafana state not found, skipping promotion", "user", cfg.User)
am.multitenantMetrics.grafanaStateSize.DeleteLabelValues(cfg.User)
return nil
}
return err
}
am.multitenantMetrics.grafanaStateSize.WithLabelValues(cfg.User).Set(float64(s.State.Size()))

am.alertmanagersMtx.Lock()
userAM, ok := am.alertmanagers[cfg.User]
am.alertmanagersMtx.Unlock()
Expand All @@ -757,16 +777,6 @@ func (am *MultitenantAlertmanager) syncStates(ctx context.Context, cfg amConfig)

// Promote the Grafana Alertmanager state and update the usingGrafanaState flag.
level.Debug(am.logger).Log("msg", "promoting Grafana state", "user", cfg.User)
s, err := am.store.GetFullGrafanaState(ctx, cfg.User)
if err != nil {
if errors.Is(err, alertspb.ErrNotFound) {
// This is expected if the state was already promoted.
level.Debug(am.logger).Log("msg", "grafana state not found, skipping promotion", "user", cfg.User)
return nil
}
return err
}

// Translate Grafana state keys to Mimir state keys.
for i, p := range s.State.Parts {
switch p.Key {
Expand Down
2 changes: 2 additions & 0 deletions pkg/alertmanager/multitenant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3221,6 +3221,8 @@ func TestSyncStates(t *testing.T) {
prometheus.NewPedanticRegistry(),
)

require.NoError(t, store.SetFullGrafanaState(ctx, test.cfg.User, alertspb.FullStateDesc{}))

require.NoError(t, am.setConfig(amConfig{
AlertConfigDesc: alertspb.AlertConfigDesc{
User: test.cfg.User,
Expand Down

0 comments on commit a93ded8

Please sign in to comment.