Skip to content

Commit

Permalink
Storages: Add metrics in RateLimiter (#8636) (#8644)
Browse files Browse the repository at this point in the history
ref #8563
  • Loading branch information
ti-chi-bot authored Jan 18, 2024
1 parent 4219a63 commit 7618b28
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 24 deletions.
12 changes: 11 additions & 1 deletion dbms/src/Common/TiFlashMetrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,17 @@ namespace DB
M(tiflash_storage_read_thread_seconds, "Bucketed histogram of read thread", Histogram, \
F(type_merged_task, {{"type", "merged_task"}}, ExpBuckets{0.001, 2, 20})) \
M(tiflash_mpp_task_manager, "The gauge of mpp task manager", Gauge, \
F(type_mpp_query_count, {"type", "mpp_query_count"}))
F(type_mpp_query_count, {"type", "mpp_query_count"})) \
M(tiflash_storage_io_limiter_pending_seconds, "I/O limiter pending duration in seconds", Histogram, \
F(type_fg_read, {{"type", "fg_read"}}, ExpBuckets{0.001, 2, 20}), \
F(type_bg_read, {{"type", "bg_read"}}, ExpBuckets{0.001, 2, 20}), \
F(type_fg_write, {{"type", "fg_write"}}, ExpBuckets{0.001, 2, 20}), \
F(type_bg_write, {{"type", "bg_write"}}, ExpBuckets{0.001, 2, 20})) \
M(tiflash_storage_io_limiter_pending_count, "I/O limiter pending count", Counter, \
F(type_fg_read, {"type", "fg_read"}), \
F(type_bg_read, {"type", "bg_read"}), \
F(type_fg_write, {"type", "fg_write"}), \
F(type_bg_write, {"type", "bg_write"}))

// clang-format on

Expand Down
53 changes: 46 additions & 7 deletions dbms/src/Encryption/RateLimiter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

#include <cassert>
#include <fstream>
#include <magic_enum.hpp>

namespace CurrentMetrics
{
Expand Down Expand Up @@ -100,6 +99,48 @@ inline CurrentMetrics::Increment pendingRequestMetrics(LimiterType type)
}
}

void metricPendingDuration(LimiterType type, double second)
{
switch (type)
{
case LimiterType::FG_READ:
GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_fg_read).Observe(second);
break;
case LimiterType::BG_READ:
GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_bg_read).Observe(second);
break;
case LimiterType::FG_WRITE:
GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_fg_write).Observe(second);
break;
case LimiterType::BG_WRITE:
GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_bg_write).Observe(second);
break;
default:
break;
}
}

void metricPendingCount(LimiterType type)
{
switch (type)
{
case LimiterType::FG_READ:
GET_METRIC(tiflash_storage_io_limiter_pending_count, type_fg_read).Increment();
break;
case LimiterType::BG_READ:
GET_METRIC(tiflash_storage_io_limiter_pending_count, type_bg_read).Increment();
break;
case LimiterType::FG_WRITE:
GET_METRIC(tiflash_storage_io_limiter_pending_count, type_fg_write).Increment();
break;
case LimiterType::BG_WRITE:
GET_METRIC(tiflash_storage_io_limiter_pending_count, type_bg_write).Increment();
break;
default:
break;
}
}

WriteLimiter::WriteLimiter(Int64 rate_limit_per_sec_, LimiterType type_, UInt64 refill_period_ms_)
: refill_period_ms{refill_period_ms_}
, refill_balance_per_period{calculateRefillBalancePerPeriod(rate_limit_per_sec_)}
Expand All @@ -120,11 +161,11 @@ void WriteLimiter::request(Int64 bytes)
{
std::unique_lock lock(request_mutex);

if (stop)
if (unlikely(stop))
return;

// 0 means no limit
if (!refill_balance_per_period)
if (unlikely(!refill_balance_per_period))
return;

metricRequestBytes(type, bytes);
Expand All @@ -136,7 +177,8 @@ void WriteLimiter::request(Int64 bytes)
Stopwatch sw_pending;
Int64 wait_times = 0;
auto pending_request = pendingRequestMetrics(type);

metricPendingCount(type);
SCOPE_EXIT({ metricPendingDuration(type, sw_pending.elapsedSeconds()); });
// request cannot be satisfied at this moment, enqueue
Request r(bytes);
req_queue.push_back(&r);
Expand Down Expand Up @@ -709,13 +751,10 @@ IOLimitTuner::IOLimitTuner(

IOLimitTuner::TuneResult IOLimitTuner::tune() const
{
auto msg = fmt::format("limiter {} write {} read {}", limiterCount(), writeLimiterCount(), readLimiterCount());
if (limiterCount() < 2)
{
LOG_TRACE(log, "{} NOT need to tune.", msg);
return {0, 0, false, 0, 0, false};
}
LOG_INFO(log, "{} need to tune.", msg);
if (bg_write_stat)
{
LOG_DEBUG(log, "bg_write_stat => {}", bg_write_stat->toString());
Expand Down
4 changes: 1 addition & 3 deletions dbms/src/Encryption/RateLimiter.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
#include <chrono>
#include <condition_variable>
#include <magic_enum.hpp>
#include <memory>
#include <mutex>
#include <queue>
#include <thread>

// TODO: separate IO utility(i.e. FileProvider, RateLimiter) from Encryption directory
Expand Down Expand Up @@ -153,7 +151,7 @@ class WriteLimiter

std::mutex request_mutex;

LimiterType type;
const LimiterType type;

Stopwatch stat_stop_watch;
UInt64 alloc_bytes;
Expand Down
4 changes: 3 additions & 1 deletion dbms/src/Server/StorageConfigParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

#pragma once

#include <Common/Logger.h>
#include <Core/Types.h>

#include <tuple>
Expand All @@ -31,6 +30,9 @@ class LayeredConfiguration;

namespace DB
{
class Logger;
using LoggerPtr = std::shared_ptr<Logger>;

struct StorageIORateLimitConfig
{
public:
Expand Down
156 changes: 144 additions & 12 deletions metrics/grafana/tiflash_summary.json
Original file line number Diff line number Diff line change
Expand Up @@ -7157,7 +7157,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "I/O Limiter",
"title": "I/O Limiter Throughput",
"tooltip": {
"shared": true,
"sort": 0,
Expand Down Expand Up @@ -7201,7 +7201,7 @@
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"description": "I/O Limiter pending tasks.",
"description": "The storage I/O limiter metrics.",
"fieldConfig": {
"defaults": {},
"overrides": []
Expand All @@ -7212,7 +7212,117 @@
"h": 8,
"w": 12,
"x": 12,
"y": 64
"y": 61
},
"hiddenSeries": false,
"id": 266,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.11",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"$$hashKey": "object:563",
"alias": "/-/",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(tiflash_storage_io_limiter_pending_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)",
"format": "time_series",
"instant": false,
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{type}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "I/O Limiter Pending Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:230",
"decimals": 0,
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:231",
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"description": "I/O Limiter current pending count.",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 70
},
"hiddenSeries": false,
"id": 86,
Expand All @@ -7239,7 +7349,12 @@
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"seriesOverrides": [
{
"alias": "/pending/",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
Expand All @@ -7249,47 +7364,64 @@
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "other-{{instance}}",
"refId": "A"
"legendFormat": "other-current-{{instance}}",
"refId": "A",
"hide": true
},
{
"exemplar": true,
"expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)",
"hide": false,
"interval": "",
"legendFormat": "bgwrite-{{instance}}",
"legendFormat": "bgwrite-current-{{instance}}",
"refId": "B"
},
{
"exemplar": true,
"expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)",
"hide": false,
"interval": "",
"legendFormat": "fgwrite-{{instance}}",
"legendFormat": "fgwrite-current-{{instance}}",
"refId": "C"
},
{
"exemplar": true,
"expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)",
"hide": false,
"interval": "",
"legendFormat": "bgread-{{instance}}",
"legendFormat": "bgread-current-{{instance}}",
"refId": "D"
},
{
"exemplar": true,
"expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)",
"hide": false,
"interval": "",
"legendFormat": "fgread-{{instance}}",
"legendFormat": "fgread-current-{{instance}}",
"refId": "E"
},
{
"exemplar": true,
"expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))) by (le, type) / 1000000000)",
"hide": false,
"interval": "",
"legendFormat": "{{type}}-pending-max",
"refId": "F"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))",
"hide": false,
"interval": "",
"legendFormat": "{{type}}-pending-P99",
"refId": "G"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "I/O Limiter Pending Tasks",
"title": "I/O Limiter Current Pending Count and Duration",
"tooltip": {
"shared": true,
"sort": 0,
Expand All @@ -7314,7 +7446,7 @@
"show": true
},
{
"format": "short",
"format": "s",
"label": null,
"logBase": 1,
"max": null,
Expand Down

0 comments on commit 7618b28

Please sign in to comment.