Skip to content

Commit

Permalink
Receive: make tsdb stats limit configurable (thanos-io#6437)
Browse files Browse the repository at this point in the history
* Receive: make tsdb stats limit configurable

Signed-off-by: Michael Hoffmann <[email protected]>

* Receive: make tsdb stats limit configurable

Signed-off-by: Michael Hoffmann <[email protected]>

---------

Signed-off-by: Michael Hoffmann <[email protected]>
  • Loading branch information
MichaHoffmann authored and HC Zhu committed Jun 27, 2023
1 parent c4b8676 commit 0c88bbe
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re

### Added

- [#6437](https://github.com/thanos-io/thanos/pull/6437) Receive: make tenant stats limit configurable
- [#6369](https://github.com/thanos-io/thanos/pull/6369) Receive: add az-aware replication support for Ketama algorithm
- [#6185](https://github.com/thanos-io/thanos/pull/6185) Tracing: tracing in OTLP support configuring service_name.
- [#6192](https://github.com/thanos-io/thanos/pull/6192) Store: add flag `bucket-web-label` to select the label to use as timeline title in web UI
Expand Down
2 changes: 1 addition & 1 deletion docs/components/receive.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ The [Thanos Receive Controller](https://github.com/observatorium/thanos-receive-

## TSDB stats

Thanos Receive supports getting TSDB stats using the `/api/v1/status/tsdb` endpoint. Use the `THANOS-TENANT` HTTP header to get stats for individual Tenants. The output format of the endpoint is compatible with [Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats).
Thanos Receive supports getting TSDB stats using the `/api/v1/status/tsdb` endpoint. Use the `THANOS-TENANT` HTTP header to get stats for individual Tenants. Use the `limit` query parameter to tweak the number of stats to return (the default is 10). The output format of the endpoint is compatible with [Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats).

Note that each Thanos Receive will only expose local stats and replicated series will not be included in the response.

Expand Down
29 changes: 27 additions & 2 deletions pkg/receive/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"fmt"
"io"
stdlog "log"
"math"
"net"
"net/http"
"path"
Expand Down Expand Up @@ -54,12 +55,16 @@ const (
DefaultTenantHeader = "THANOS-TENANT"
// DefaultTenant is the default value used for when no tenant is passed via the tenant header.
DefaultTenant = "default-tenant"
// DefaultStatsLimit is the default value used for limiting tenant stats.
DefaultStatsLimit = 10
// DefaultTenantLabel is the default label-name used for when no tenant is passed via the tenant header.
DefaultTenantLabel = "tenant_id"
// DefaultReplicaHeader is the default header used to designate the replica count of a write request.
DefaultReplicaHeader = "THANOS-REPLICA"
// AllTenantsQueryParam is the query parameter for getting TSDB stats for all tenants.
AllTenantsQueryParam = "all_tenants"
// LimitStatsQueryParam is the query parameter for limiting the amount of returned TSDB stats.
LimitStatsQueryParam = "limit"
// Labels for metrics.
labelSuccess = "success"
labelError = "error"
Expand Down Expand Up @@ -280,6 +285,21 @@ func (h *Handler) testReady(f http.HandlerFunc) http.HandlerFunc {
}
}

func getStatsLimitParameter(r *http.Request) (int, error) {
statsLimitStr := r.URL.Query().Get(LimitStatsQueryParam)
if statsLimitStr == "" {
return DefaultStatsLimit, nil
}
statsLimit, err := strconv.ParseInt(statsLimitStr, 10, 0)
if err != nil {
return 0, fmt.Errorf("unable to parse '%s' parameter: %w", LimitStatsQueryParam, err)
}
if statsLimit > math.MaxInt {
return 0, fmt.Errorf("'%s' parameter is larger than %d", LimitStatsQueryParam, math.MaxInt)
}
return int(statsLimit), nil
}

func (h *Handler) getStats(r *http.Request, statsByLabelName string) ([]statusapi.TenantStats, *api.ApiError) {
if !h.isReady() {
return nil, &api.ApiError{Typ: api.ErrorInternal, Err: fmt.Errorf("service unavailable")}
Expand All @@ -292,15 +312,20 @@ func (h *Handler) getStats(r *http.Request, statsByLabelName string) ([]statusap
return nil, &api.ApiError{Typ: api.ErrorBadData, Err: err}
}

statsLimit, err := getStatsLimitParameter(r)
if err != nil {
return nil, &api.ApiError{Typ: api.ErrorBadData, Err: err}
}

if getAllTenantStats {
return h.options.TSDBStats.TenantStats(statsByLabelName), nil
return h.options.TSDBStats.TenantStats(statsLimit, statsByLabelName), nil
}

if tenantID == "" {
tenantID = h.options.DefaultTenantID
}

return h.options.TSDBStats.TenantStats(statsByLabelName, tenantID), nil
return h.options.TSDBStats.TenantStats(statsLimit, statsByLabelName, tenantID), nil
}

// Close stops the Handler.
Expand Down
48 changes: 48 additions & 0 deletions pkg/receive/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"path/filepath"
"runtime"
"runtime/pprof"
"strconv"
"strings"
"sync"
"testing"
Expand Down Expand Up @@ -1520,3 +1521,50 @@ func TestRelabel(t *testing.T) {
})
}
}

func TestGetStatsLimitParameter(t *testing.T) {
t.Run("invalid limit parameter, not integer", func(t *testing.T) {
r, err := http.NewRequest(http.MethodGet, "http://0:0", nil)
testutil.Ok(t, err)

q := r.URL.Query()
q.Add(LimitStatsQueryParam, "abc")
r.URL.RawQuery = q.Encode()

_, err = getStatsLimitParameter(r)
testutil.NotOk(t, err)
})
t.Run("invalid limit parameter, too large", func(t *testing.T) {
r, err := http.NewRequest(http.MethodGet, "http://0:0", nil)
testutil.Ok(t, err)

q := r.URL.Query()
q.Add(LimitStatsQueryParam, strconv.FormatUint(math.MaxInt+1, 10))
r.URL.RawQuery = q.Encode()

_, err = getStatsLimitParameter(r)
testutil.NotOk(t, err)
})
t.Run("not present returns default", func(t *testing.T) {
r, err := http.NewRequest(http.MethodGet, "http://0:0", nil)
testutil.Ok(t, err)

limit, err := getStatsLimitParameter(r)
testutil.Ok(t, err)
testutil.Equals(t, limit, DefaultStatsLimit)
})
t.Run("if present and valid, the parameter is returned", func(t *testing.T) {
r, err := http.NewRequest(http.MethodGet, "http://0:0", nil)
testutil.Ok(t, err)

const givenLimit = 20

q := r.URL.Query()
q.Add(LimitStatsQueryParam, strconv.FormatUint(givenLimit, 10))
r.URL.RawQuery = q.Encode()

limit, err := getStatsLimitParameter(r)
testutil.Ok(t, err)
testutil.Equals(t, limit, givenLimit)
})
}
6 changes: 3 additions & 3 deletions pkg/receive/multitsdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ import (
type TSDBStats interface {
// TenantStats returns TSDB head stats for the given tenants.
// If no tenantIDs are provided, stats for all tenants are returned.
TenantStats(statsByLabelName string, tenantIDs ...string) []status.TenantStats
TenantStats(limit int, statsByLabelName string, tenantIDs ...string) []status.TenantStats
}

type MultiTSDB struct {
Expand Down Expand Up @@ -518,7 +518,7 @@ func (t *MultiTSDB) TSDBExemplars() map[string]*exemplars.TSDB {
return res
}

func (t *MultiTSDB) TenantStats(statsByLabelName string, tenantIDs ...string) []status.TenantStats {
func (t *MultiTSDB) TenantStats(limit int, statsByLabelName string, tenantIDs ...string) []status.TenantStats {
t.mtx.RLock()
defer t.mtx.RUnlock()
if len(tenantIDs) == 0 {
Expand All @@ -545,7 +545,7 @@ func (t *MultiTSDB) TenantStats(statsByLabelName string, tenantIDs ...string) []
if db == nil {
return
}
stats := db.Head().Stats(statsByLabelName, 10)
stats := db.Head().Stats(statsByLabelName, limit)

mu.Lock()
defer mu.Unlock()
Expand Down
2 changes: 1 addition & 1 deletion pkg/receive/multitsdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ func TestMultiTSDBStats(t *testing.T) {
testutil.Ok(t, appendSample(m, "baz", time.Now()))
testutil.Equals(t, 3, len(m.TSDBLocalClients()))

stats := m.TenantStats(labels.MetricName, test.tenants...)
stats := m.TenantStats(10, labels.MetricName, test.tenants...)
testutil.Equals(t, test.expectedStats, len(stats))
})
}
Expand Down

0 comments on commit 0c88bbe

Please sign in to comment.