Skip to content

Commit

Permalink
metrics: add metrics for plan replayer and historical stats (#40271)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yisaer committed Jan 4, 2023
1 parent 3e65e9b commit f483b39
Show file tree
Hide file tree
Showing 7 changed files with 297 additions and 2 deletions.
8 changes: 8 additions & 0 deletions domain/historical_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,16 @@ package domain

import (
"github.com/pingcap/errors"
"github.com/pingcap/tidb/metrics"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/statistics/handle"
)

var (
generateHistoricalStatsSuccessCounter = metrics.HistoricalStatsCounter.WithLabelValues("generate", "success")
generateHistoricalStatsFailedCounter = metrics.HistoricalStatsCounter.WithLabelValues("generate", "fail")
)

// HistoricalStatsWorker indicates for dump historical stats
type HistoricalStatsWorker struct {
tblCH chan int64
Expand Down Expand Up @@ -52,8 +58,10 @@ func (w *HistoricalStatsWorker) DumpHistoricalStats(tableID int64, statsHandle *
return errors.Errorf("cannot get DBInfo by TableID %d", tableID)
}
if _, err := statsHandle.RecordHistoricalStatsToStorage(dbInfo.Name.O, tblInfo); err != nil {
generateHistoricalStatsFailedCounter.Inc()
return errors.Errorf("record table %s.%s's historical stats failed", dbInfo.Name.O, tblInfo.Name.O)
}
generateHistoricalStatsSuccessCounter.Inc()
return nil
}

Expand Down
12 changes: 11 additions & 1 deletion domain/plan_replayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/pingcap/tidb/bindinfo"
"github.com/pingcap/tidb/domain/infosync"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/metrics"
"github.com/pingcap/tidb/parser"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/terror"
Expand Down Expand Up @@ -167,6 +168,13 @@ func insertPlanReplayerSuccessStatusRecord(ctx context.Context, sctx sessionctx.
}
}

var (
planReplayerCaptureTaskSendCounter = metrics.PlanReplayerTaskCounter.WithLabelValues("capture", "send")
planReplayerCaptureTaskDiscardCounter = metrics.PlanReplayerTaskCounter.WithLabelValues("capture", "discard")

planReplayerRegisterTaskGauge = metrics.PlanReplayerRegisterTaskGauge
)

type planReplayerHandle struct {
*planReplayerTaskCollectorHandle
*planReplayerTaskDumpHandle
Expand All @@ -181,9 +189,10 @@ func (h *planReplayerHandle) SendTask(task *PlanReplayerDumpTask) bool {
if !task.IsContinuesCapture {
h.planReplayerTaskCollectorHandle.removeTask(task.PlanReplayerTaskKey)
}
planReplayerCaptureTaskSendCounter.Inc()
return true
default:
// TODO: add metrics here
planReplayerCaptureTaskDiscardCounter.Inc()
// directly discard the task if the task channel is full in order not to block the query process
logutil.BgLogger().Warn("discard one plan replayer dump task",
zap.String("sql-digest", task.SQLDigest), zap.String("plan-digest", task.PlanDigest))
Expand Down Expand Up @@ -221,6 +230,7 @@ func (h *planReplayerTaskCollectorHandle) CollectPlanReplayerTask() error {
}
}
h.setupTasks(tasks)
planReplayerRegisterTaskGauge.Set(float64(len(tasks)))
return nil
}

Expand Down
9 changes: 9 additions & 0 deletions domain/plan_replayer_dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/pingcap/tidb/bindinfo"
"github.com/pingcap/tidb/config"
"github.com/pingcap/tidb/infoschema"
"github.com/pingcap/tidb/metrics"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/sessionctx"
Expand Down Expand Up @@ -145,6 +146,11 @@ func (tne *tableNameExtractor) handleIsView(t *ast.TableName) (bool, error) {
return true, nil
}

var (
planReplayerDumpTaskSuccess = metrics.PlanReplayerTaskCounter.WithLabelValues("dump", "success")
planReplayerDumpTaskFailed = metrics.PlanReplayerTaskCounter.WithLabelValues("dump", "fail")
)

// DumpPlanReplayerInfo will dump the information about sqls.
// The files will be organized into the following format:
/*
Expand Down Expand Up @@ -212,6 +218,9 @@ func DumpPlanReplayerInfo(ctx context.Context, sctx sessionctx.Context,
zap.Strings("sqls", sqls))
}
errMsg = err.Error()
planReplayerDumpTaskFailed.Inc()
} else {
planReplayerDumpTaskSuccess.Inc()
}
err1 := zw.Close()
if err1 != nil {
Expand Down
229 changes: 229 additions & 0 deletions metrics/grafana/tidb.json
Original file line number Diff line number Diff line change
Expand Up @@ -14337,6 +14337,235 @@
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"description": "",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 184
},
"hiddenSeries": false,
"id": 236,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.11",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(tidb_plan_replayer_task{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"dump\"}[1m])) by (result)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "dump-task-{{result}}",
"refId": "A",
"step": 30
},
{
"exemplar": true,
"expr": "sum(rate(tidb_plan_replayer_task{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"capture\"}[1m])) by (result)",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 2,
"legendFormat": "capture-task-{{result}}",
"refId": "B",
"step": 30
},
{
"exemplar": true,
"expr": "avg(tidb_plan_replayer_register_task{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})",
"hide": false,
"interval": "",
"intervalFactor": 2,
"legendFormat": "register-task",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Plan Replayer Task OPM",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"description": "",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 184
},
"hiddenSeries": false,
"id": 237,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.11",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(tidb_statistics_historical_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"generate\"}[1m])) by (result)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "generate-{{result}}",
"refId": "A",
"step": 30
},
{
"exemplar": true,
"expr": "sum(rate(tidb_statistics_historical_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"dump\"}[1m])) by (result)",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 2,
"legendFormat": "dump-{{result}}",
"refId": "B",
"step": 30
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Historical Stats OPM",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"repeat": null,
Expand Down
4 changes: 4 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ func RegisterMetrics() {

prometheus.MustRegister(EMACPUUsageGauge)

prometheus.MustRegister(HistoricalStatsCounter)
prometheus.MustRegister(PlanReplayerTaskCounter)
prometheus.MustRegister(PlanReplayerRegisterTaskGauge)

tikvmetrics.InitMetrics(TiDB, TiKVClient)
tikvmetrics.RegisterMetrics()
tikvmetrics.TiKVPanicCounter = PanicCounter // reset tidb metrics for tikv metrics
Expand Down
21 changes: 21 additions & 0 deletions metrics/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,25 @@ var (
Name: "stats_healthy",
Help: "Gauge of stats healthy",
}, []string{LblType})

HistoricalStatsCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "tidb",
Subsystem: "statistics",
Name: "historical_stats",
Help: "counter of the historical stats operation",
}, []string{LblType, LblResult})

PlanReplayerTaskCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "tidb",
Subsystem: "plan_replayer",
Name: "task",
Help: "counter of plan replayer captured task",
}, []string{LblType, LblResult})

PlanReplayerRegisterTaskGauge = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "tidb",
Subsystem: "plan_replayer",
Name: "register_task",
Help: "gauge of plan replayer registered task",
})
)
Loading

0 comments on commit f483b39

Please sign in to comment.