Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Storages: Add metrics in RateLimiter (#8636) #8644

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion dbms/src/Common/TiFlashMetrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,17 @@ namespace DB
M(tiflash_storage_read_thread_seconds, "Bucketed histogram of read thread", Histogram, \
F(type_merged_task, {{"type", "merged_task"}}, ExpBuckets{0.001, 2, 20})) \
M(tiflash_mpp_task_manager, "The gauge of mpp task manager", Gauge, \
F(type_mpp_query_count, {"type", "mpp_query_count"}))
F(type_mpp_query_count, {"type", "mpp_query_count"})) \
M(tiflash_storage_io_limiter_pending_seconds, "I/O limiter pending duration in seconds", Histogram, \
F(type_fg_read, {{"type", "fg_read"}}, ExpBuckets{0.001, 2, 20}), \
F(type_bg_read, {{"type", "bg_read"}}, ExpBuckets{0.001, 2, 20}), \
F(type_fg_write, {{"type", "fg_write"}}, ExpBuckets{0.001, 2, 20}), \
F(type_bg_write, {{"type", "bg_write"}}, ExpBuckets{0.001, 2, 20})) \
M(tiflash_storage_io_limiter_pending_count, "I/O limiter pending count", Counter, \
F(type_fg_read, {"type", "fg_read"}), \
F(type_bg_read, {"type", "bg_read"}), \
F(type_fg_write, {"type", "fg_write"}), \
F(type_bg_write, {"type", "bg_write"}))

// clang-format on

Expand Down
53 changes: 46 additions & 7 deletions dbms/src/Encryption/RateLimiter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

#include <cassert>
#include <fstream>
#include <magic_enum.hpp>

namespace CurrentMetrics
{
Expand Down Expand Up @@ -100,6 +99,48 @@ inline CurrentMetrics::Increment pendingRequestMetrics(LimiterType type)
}
}

void metricPendingDuration(LimiterType type, double second)
{
switch (type)
{
case LimiterType::FG_READ:
GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_fg_read).Observe(second);
break;
case LimiterType::BG_READ:
GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_bg_read).Observe(second);
break;
case LimiterType::FG_WRITE:
GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_fg_write).Observe(second);
break;
case LimiterType::BG_WRITE:
GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_bg_write).Observe(second);
break;
default:
break;
}
}

void metricPendingCount(LimiterType type)
{
switch (type)
{
case LimiterType::FG_READ:
GET_METRIC(tiflash_storage_io_limiter_pending_count, type_fg_read).Increment();
break;
case LimiterType::BG_READ:
GET_METRIC(tiflash_storage_io_limiter_pending_count, type_bg_read).Increment();
break;
case LimiterType::FG_WRITE:
GET_METRIC(tiflash_storage_io_limiter_pending_count, type_fg_write).Increment();
break;
case LimiterType::BG_WRITE:
GET_METRIC(tiflash_storage_io_limiter_pending_count, type_bg_write).Increment();
break;
default:
break;
}
}

WriteLimiter::WriteLimiter(Int64 rate_limit_per_sec_, LimiterType type_, UInt64 refill_period_ms_)
: refill_period_ms{refill_period_ms_}
, refill_balance_per_period{calculateRefillBalancePerPeriod(rate_limit_per_sec_)}
Expand All @@ -120,11 +161,11 @@ void WriteLimiter::request(Int64 bytes)
{
std::unique_lock lock(request_mutex);

if (stop)
if (unlikely(stop))
return;

// 0 means no limit
if (!refill_balance_per_period)
if (unlikely(!refill_balance_per_period))
return;

metricRequestBytes(type, bytes);
Expand All @@ -136,7 +177,8 @@ void WriteLimiter::request(Int64 bytes)
Stopwatch sw_pending;
Int64 wait_times = 0;
auto pending_request = pendingRequestMetrics(type);

metricPendingCount(type);
SCOPE_EXIT({ metricPendingDuration(type, sw_pending.elapsedSeconds()); });
// request cannot be satisfied at this moment, enqueue
Request r(bytes);
req_queue.push_back(&r);
Expand Down Expand Up @@ -709,13 +751,10 @@ IOLimitTuner::IOLimitTuner(

IOLimitTuner::TuneResult IOLimitTuner::tune() const
{
auto msg = fmt::format("limiter {} write {} read {}", limiterCount(), writeLimiterCount(), readLimiterCount());
if (limiterCount() < 2)
{
LOG_TRACE(log, "{} NOT need to tune.", msg);
return {0, 0, false, 0, 0, false};
}
LOG_INFO(log, "{} need to tune.", msg);
if (bg_write_stat)
{
LOG_DEBUG(log, "bg_write_stat => {}", bg_write_stat->toString());
Expand Down
4 changes: 1 addition & 3 deletions dbms/src/Encryption/RateLimiter.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
#include <chrono>
#include <condition_variable>
#include <magic_enum.hpp>
#include <memory>
#include <mutex>
#include <queue>
#include <thread>

// TODO: separate IO utility(i.e. FileProvider, RateLimiter) from Encryption directory
Expand Down Expand Up @@ -153,7 +151,7 @@ class WriteLimiter

std::mutex request_mutex;

LimiterType type;
const LimiterType type;

Stopwatch stat_stop_watch;
UInt64 alloc_bytes;
Expand Down
4 changes: 3 additions & 1 deletion dbms/src/Server/StorageConfigParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

#pragma once

#include <Common/Logger.h>
#include <Core/Types.h>

#include <tuple>
Expand All @@ -31,6 +30,9 @@ class LayeredConfiguration;

namespace DB
{
class Logger;
using LoggerPtr = std::shared_ptr<Logger>;

struct StorageIORateLimitConfig
{
public:
Expand Down
156 changes: 144 additions & 12 deletions metrics/grafana/tiflash_summary.json
Original file line number Diff line number Diff line change
Expand Up @@ -7157,7 +7157,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "I/O Limiter",
"title": "I/O Limiter Throughput",
"tooltip": {
"shared": true,
"sort": 0,
Expand Down Expand Up @@ -7201,7 +7201,7 @@
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"description": "I/O Limiter pending tasks.",
"description": "The storage I/O limiter metrics.",
"fieldConfig": {
"defaults": {},
"overrides": []
Expand All @@ -7212,7 +7212,117 @@
"h": 8,
"w": 12,
"x": 12,
"y": 64
"y": 61
},
"hiddenSeries": false,
"id": 266,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.11",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"$$hashKey": "object:563",
"alias": "/-/",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(tiflash_storage_io_limiter_pending_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)",
"format": "time_series",
"instant": false,
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{type}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "I/O Limiter Pending Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:230",
"decimals": 0,
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:231",
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"description": "I/O Limiter current pending count.",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 70
},
"hiddenSeries": false,
"id": 86,
Expand All @@ -7239,7 +7349,12 @@
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"seriesOverrides": [
{
"alias": "/pending/",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
Expand All @@ -7249,47 +7364,64 @@
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "other-{{instance}}",
"refId": "A"
"legendFormat": "other-current-{{instance}}",
"refId": "A",
"hide": true
},
{
"exemplar": true,
"expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)",
"hide": false,
"interval": "",
"legendFormat": "bgwrite-{{instance}}",
"legendFormat": "bgwrite-current-{{instance}}",
"refId": "B"
},
{
"exemplar": true,
"expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)",
"hide": false,
"interval": "",
"legendFormat": "fgwrite-{{instance}}",
"legendFormat": "fgwrite-current-{{instance}}",
"refId": "C"
},
{
"exemplar": true,
"expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)",
"hide": false,
"interval": "",
"legendFormat": "bgread-{{instance}}",
"legendFormat": "bgread-current-{{instance}}",
"refId": "D"
},
{
"exemplar": true,
"expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)",
"hide": false,
"interval": "",
"legendFormat": "fgread-{{instance}}",
"legendFormat": "fgread-current-{{instance}}",
"refId": "E"
},
{
"exemplar": true,
"expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))) by (le, type) / 1000000000)",
"hide": false,
"interval": "",
"legendFormat": "{{type}}-pending-max",
"refId": "F"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))",
"hide": false,
"interval": "",
"legendFormat": "{{type}}-pending-P99",
"refId": "G"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "I/O Limiter Pending Tasks",
"title": "I/O Limiter Current Pending Count and Duration",
"tooltip": {
"shared": true,
"sort": 0,
Expand All @@ -7314,7 +7446,7 @@
"show": true
},
{
"format": "short",
"format": "s",
"label": null,
"logBase": 1,
"max": null,
Expand Down