diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 1c7f46c26a4..76a738cd0ee 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -277,7 +277,17 @@ namespace DB M(tiflash_storage_read_thread_seconds, "Bucketed histogram of read thread", Histogram, \ F(type_merged_task, {{"type", "merged_task"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_mpp_task_manager, "The gauge of mpp task manager", Gauge, \ - F(type_mpp_query_count, {"type", "mpp_query_count"})) + F(type_mpp_query_count, {"type", "mpp_query_count"})) \ + M(tiflash_storage_io_limiter_pending_seconds, "I/O limiter pending duration in seconds", Histogram, \ + F(type_fg_read, {{"type", "fg_read"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_bg_read, {{"type", "bg_read"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_fg_write, {{"type", "fg_write"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_bg_write, {{"type", "bg_write"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_storage_io_limiter_pending_count, "I/O limiter pending count", Counter, \ + F(type_fg_read, {"type", "fg_read"}), \ + F(type_bg_read, {"type", "bg_read"}), \ + F(type_fg_write, {"type", "fg_write"}), \ + F(type_bg_write, {"type", "bg_write"})) // clang-format on diff --git a/dbms/src/Encryption/RateLimiter.cpp b/dbms/src/Encryption/RateLimiter.cpp index ab45c84ea6c..54bb02a483a 100644 --- a/dbms/src/Encryption/RateLimiter.cpp +++ b/dbms/src/Encryption/RateLimiter.cpp @@ -23,7 +23,6 @@ #include #include -#include namespace CurrentMetrics { @@ -100,6 +99,48 @@ inline CurrentMetrics::Increment pendingRequestMetrics(LimiterType type) } } +void metricPendingDuration(LimiterType type, double second) +{ + switch (type) + { + case LimiterType::FG_READ: + GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_fg_read).Observe(second); + break; + case LimiterType::BG_READ: + GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_bg_read).Observe(second); + break; + case LimiterType::FG_WRITE: + GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_fg_write).Observe(second); + break; + case LimiterType::BG_WRITE: + GET_METRIC(tiflash_storage_io_limiter_pending_seconds, type_bg_write).Observe(second); + break; + default: + break; + } +} + +void metricPendingCount(LimiterType type) +{ + switch (type) + { + case LimiterType::FG_READ: + GET_METRIC(tiflash_storage_io_limiter_pending_count, type_fg_read).Increment(); + break; + case LimiterType::BG_READ: + GET_METRIC(tiflash_storage_io_limiter_pending_count, type_bg_read).Increment(); + break; + case LimiterType::FG_WRITE: + GET_METRIC(tiflash_storage_io_limiter_pending_count, type_fg_write).Increment(); + break; + case LimiterType::BG_WRITE: + GET_METRIC(tiflash_storage_io_limiter_pending_count, type_bg_write).Increment(); + break; + default: + break; + } +} + WriteLimiter::WriteLimiter(Int64 rate_limit_per_sec_, LimiterType type_, UInt64 refill_period_ms_) : refill_period_ms{refill_period_ms_} , refill_balance_per_period{calculateRefillBalancePerPeriod(rate_limit_per_sec_)} @@ -120,11 +161,11 @@ void WriteLimiter::request(Int64 bytes) { std::unique_lock lock(request_mutex); - if (stop) + if (unlikely(stop)) return; // 0 means no limit - if (!refill_balance_per_period) + if (unlikely(!refill_balance_per_period)) return; metricRequestBytes(type, bytes); @@ -136,7 +177,8 @@ void WriteLimiter::request(Int64 bytes) Stopwatch sw_pending; Int64 wait_times = 0; auto pending_request = pendingRequestMetrics(type); - + metricPendingCount(type); + SCOPE_EXIT({ metricPendingDuration(type, sw_pending.elapsedSeconds()); }); // request cannot be satisfied at this moment, enqueue Request r(bytes); req_queue.push_back(&r); @@ -709,13 +751,10 @@ IOLimitTuner::IOLimitTuner( IOLimitTuner::TuneResult IOLimitTuner::tune() const { - auto msg = fmt::format("limiter {} write {} read {}", limiterCount(), writeLimiterCount(), readLimiterCount()); if (limiterCount() < 2) { - LOG_TRACE(log, "{} NOT need to tune.", msg); return {0, 0, false, 0, 0, false}; } - LOG_INFO(log, "{} need to tune.", msg); if (bg_write_stat) { LOG_DEBUG(log, "bg_write_stat => {}", bg_write_stat->toString()); diff --git a/dbms/src/Encryption/RateLimiter.h b/dbms/src/Encryption/RateLimiter.h index af369907b40..ec212ab719b 100644 --- a/dbms/src/Encryption/RateLimiter.h +++ b/dbms/src/Encryption/RateLimiter.h @@ -23,9 +23,7 @@ #include #include #include -#include #include -#include #include // TODO: separate IO utility(i.e. FileProvider, RateLimiter) from Encryption directory @@ -153,7 +151,7 @@ class WriteLimiter std::mutex request_mutex; - LimiterType type; + const LimiterType type; Stopwatch stat_stop_watch; UInt64 alloc_bytes; diff --git a/dbms/src/Server/StorageConfigParser.h b/dbms/src/Server/StorageConfigParser.h index 83b017d6cb4..095b113f528 100644 --- a/dbms/src/Server/StorageConfigParser.h +++ b/dbms/src/Server/StorageConfigParser.h @@ -14,7 +14,6 @@ #pragma once -#include #include #include @@ -31,6 +30,9 @@ class LayeredConfiguration; namespace DB { +class Logger; +using LoggerPtr = std::shared_ptr; + struct StorageIORateLimitConfig { public: diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 1d50ea738c6..61430d4fb25 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -7157,7 +7157,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "I/O Limiter", + "title": "I/O Limiter Throughput", "tooltip": { "shared": true, "sort": 0, @@ -7201,7 +7201,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "I/O Limiter pending tasks.", + "description": "The storage I/O limiter metrics.", "fieldConfig": { "defaults": {}, "overrides": [] @@ -7212,7 +7212,117 @@ "h": 8, "w": 12, "x": 12, - "y": 64 + "y": 61 + }, + "hiddenSeries": false, + "id": 266, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:563", + "alias": "/-/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_io_limiter_pending_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "I/O Limiter Pending Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:230", + "decimals": 0, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:231", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "I/O Limiter current pending count.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 70 }, "hiddenSeries": false, "id": 86, @@ -7239,7 +7349,12 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/pending/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -7249,15 +7364,16 @@ "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "other-{{instance}}", - "refId": "A" + "legendFormat": "other-current-{{instance}}", + "refId": "A", + "hide": true }, { "exemplar": true, "expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "hide": false, "interval": "", - "legendFormat": "bgwrite-{{instance}}", + "legendFormat": "bgwrite-current-{{instance}}", "refId": "B" }, { @@ -7265,7 +7381,7 @@ "expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "hide": false, "interval": "", - "legendFormat": "fgwrite-{{instance}}", + "legendFormat": "fgwrite-current-{{instance}}", "refId": "C" }, { @@ -7273,7 +7389,7 @@ "expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "hide": false, "interval": "", - "legendFormat": "bgread-{{instance}}", + "legendFormat": "bgread-current-{{instance}}", "refId": "D" }, { @@ -7281,15 +7397,31 @@ "expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "hide": false, "interval": "", - "legendFormat": "fgread-{{instance}}", + "legendFormat": "fgread-current-{{instance}}", "refId": "E" + }, + { + "exemplar": true, + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))) by (le, type) / 1000000000)", + "hide": false, + "interval": "", + "legendFormat": "{{type}}-pending-max", + "refId": "F" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "hide": false, + "interval": "", + "legendFormat": "{{type}}-pending-P99", + "refId": "G" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "I/O Limiter Pending Tasks", + "title": "I/O Limiter Current Pending Count and Duration", "tooltip": { "shared": true, "sort": 0, @@ -7314,7 +7446,7 @@ "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null,