Skip to content

Commit

Permalink
feat: APM资源topo开发 (#2843)
Browse files Browse the repository at this point in the history
Co-authored-by: rxwycdh <[email protected]>
Co-authored-by: xlc <[email protected]>
Co-authored-by: 闪烁 <[email protected]>
  • Loading branch information
4 people authored Sep 10, 2024
1 parent 51ce449 commit 19ff702
Show file tree
Hide file tree
Showing 47 changed files with 2,540 additions and 1,859 deletions.
13 changes: 8 additions & 5 deletions bkmonitor/packages/apm_web/calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,11 @@ def range_cal(self, metric_result):
"""
normal_ts = defaultdict(int)
error_ts = defaultdict(int)
all_ts = []

series = metric_result.get("series", [])
if not series:
return {"metrics": [], "series": []}
all_ts = [i[-1] for i in metric_result["series"][0]["datapoints"]]

for i, item in enumerate(metric_result.get("series", [])):
if not item.get("datapoints"):
Expand All @@ -193,16 +197,15 @@ def range_cal(self, metric_result):
):
error_ts[timestamp] += value

if i == 0:
all_ts.append(timestamp)

return {
"metrics": [],
"series": [
{
"datapoints": [
(round(error_ts.get(t, 0) / (normal_ts.get(t, 0) + error_ts.get(t, 0)), 2), t) for t in all_ts
],
]
if normal_ts or error_ts
else [],
"dimensions": {},
"target": "flow",
"type": "bar",
Expand Down
11 changes: 8 additions & 3 deletions bkmonitor/packages/apm_web/handlers/service_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def get_service_apdex_key(cls, bk_biz_id, app_name, service_name, nodes=None, ra

@classmethod
@using_cache(CacheType.APM(60 * 10))
def get_node(cls, bk_biz_id, app_name, service_name):
def get_node(cls, bk_biz_id, app_name, service_name, raise_exception=True):
"""获取 topoNode 节点信息"""
params = {
"bk_biz_id": bk_biz_id,
Expand All @@ -284,10 +284,15 @@ def get_node(cls, bk_biz_id, app_name, service_name):
try:
response = api.apm_api.query_topo_node(**params)
if not response:
raise ValueError(f"[ServiceHandler] 拓扑节点: {service_name} 不存在,请检查上报数据是否包含此服务")
if raise_exception:
raise ValueError(f"[ServiceHandler] 拓扑节点: {service_name} 不存在,请检查上报数据是否包含此服务")
else:
return None
return response[0]
except BKAPIError as e:
raise ValueError(f"[ServiceHandler] 查询拓扑节点信息失败,错误: {e}")
if raise_exception:
raise ValueError(f"[ServiceHandler] 查询拓扑节点信息失败,错误: {e}")
return None

@classmethod
def list_nodes(cls, bk_biz_id, app_name):
Expand Down
10 changes: 8 additions & 2 deletions bkmonitor/packages/apm_web/meta/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,7 +1061,13 @@ def key_name_map(self):
def perform_request(self, data):
node_info = ServiceHandler.get_node(data["bk_biz_id"], data["app_name"], data["service_name"])
if not node_info:
raise ValueError(f"节点: {data['service_name']} 暂未发现,请检查上报数据中是否包含此服务")
return [
{
"name": _("数据状态"),
"type": "string",
"value": _("无数据(暂未发现此服务)"),
}
]

instance_count = api.apm_api.query_instance(
bk_biz_id=data["bk_biz_id"],
Expand Down Expand Up @@ -1177,7 +1183,7 @@ def perform_request(self, validated_data):

return [
{
"name": _("类型"),
"name": _("接口类型"),
"type": "string",
"value": CategoryEnum.get_label_by_key(endpoint_info.get("category"))
if endpoint_info.get("category")
Expand Down
18 changes: 17 additions & 1 deletion bkmonitor/packages/apm_web/metric/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,21 @@ class StatisticsMetric(ChoicesEnum):
_choices_labels = (
(REQUEST_COUNT, "请求数"),
(ERROR_COUNT, "错误数"),
(AVG_DURATION, "平均耗时"),
(AVG_DURATION, "响应耗时"),
)


class ErrorMetricCategory(ChoicesEnum):
"""
错误数状态码分类
(在概览页面指标详情时勾选了某个错误码时需要传入此错误码来自于是 http 还是 grpc 用于进行不同查询条件的查询)
对应图表配置字段: apm_time_series_category
"""

HTTP = "http"
GRPC = "grpc"

_choices_labels = (
(HTTP, "http 错误码"),
(GRPC, "错误数"),
)
25 changes: 19 additions & 6 deletions bkmonitor/packages/apm_web/metric/handler/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from apm_web.constants import TopoNodeKind
from apm_web.handlers.service_handler import ServiceHandler
from apm_web.metric.constants import StatisticsMetric
from apm_web.metric.constants import ErrorMetricCategory, StatisticsMetric
from apm_web.metric_handler import (
MetricHandler,
ServiceFlowAvgDuration,
Expand Down Expand Up @@ -86,6 +86,7 @@ class Template:
other_service_field_index: int = 1
unit: Callable = None
dimension: str = None
dimension_category: str = None
filter_dict: dict = field(default_factory=dict)

@classmethod
Expand Down Expand Up @@ -266,7 +267,7 @@ class ServiceMetricStatistics(BaseQuery):
virtual_service_name = _("其他服务")

@classmethod
def get_template(cls, metric_name, kind, dimension, service_name=None):
def get_template(cls, metric_name, kind, dimension, service_name=None, dimension_category=None):

dimension_metric = cls.template_mapping.get(metric_name, {}).get(dimension)
if not dimension_metric:
Expand All @@ -280,6 +281,7 @@ def get_template(cls, metric_name, kind, dimension, service_name=None):
res.dimension = dimension
if service_name:
res.filter_dict.update(res.get_filter_dict(service_name, kind))
res.dimension_category = dimension_category
return res

def __init__(self, *args, **kwargs):
Expand All @@ -299,16 +301,27 @@ def list(self, template: Template):
).get_range_values_mapping(ignore_keys=template.ignore_keys)
elif self.data_type == StatisticsMetric.ERROR_COUNT.value:
# 错误数的维度是错误码
if template.dimension_category not in ErrorMetricCategory.get_dict_choices():
raise ValueError(f"[指标统计] 查询错误码为: {template.dimension} 时需要指定来源类型 (Http / Rpc)")

if template.dimension_category == ErrorMetricCategory.HTTP.value:
if self.params.get("option_kind") == "caller":
wheres = [{"key": "from_span_http_status_code", "method": "eq", "value": template.dimension}]
else:
wheres = [{"key": "to_span_http_status_code", "method": "eq", "value": template.dimension}]
else:
if self.params.get("option_kind") == "caller":
wheres = [{"key": "from_span_grpc_status_code", "method": "eq", "value": template.dimension}]
else:
wheres = [{"key": "to_span_grpc_status_code", "method": "eq", "value": template.dimension}]

values_mapping = template.metric(
self.application,
self.start_time,
self.end_time,
filter_dict=template.filter_dict,
group_by=template.table_group_by,
where=[
{"key": "http_status_code", "method": "eq", "value": template.dimension},
{"key": "rpc_grpc_status_code", "method": "eq", "value": template.dimension, "condition": "or"},
],
where=wheres,
interval=get_interval_number(self.start_time, self.end_time),
).get_range_values_mapping(ignore_keys=template.ignore_keys)
else:
Expand Down
11 changes: 9 additions & 2 deletions bkmonitor/packages/apm_web/metric/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from apm_web.handlers.host_handler import HostHandler
from apm_web.handlers.service_handler import ServiceHandler
from apm_web.icon import get_icon
from apm_web.metric.constants import StatisticsMetric
from apm_web.metric.constants import ErrorMetricCategory, StatisticsMetric
from apm_web.metric.handler.statistics import ServiceMetricStatistics
from apm_web.metric.handler.top_n import get_top_n_query_type, load_top_n_handler
from apm_web.metric_handler import (
Expand Down Expand Up @@ -165,6 +165,7 @@ def perform_request(self, validate_data):
validate_data["bk_biz_id"],
validate_data["app_name"],
validate_data["service_name"],
raise_exception=False,
)
if not node:
return self.fill_unit(resource.grafana.graph_unify_query(unify_query_params), validate_data.get("unit"))
Expand Down Expand Up @@ -2469,13 +2470,19 @@ class RequestSerializer(serializers.Serializer):
data_type = serializers.ChoiceField(label="指标类型", choices=StatisticsMetric.get_choices())
# 请求数无维度 错误数维度为 总数量+状态码 响应耗时维度为 平均耗时+MAX/MIN/P90/...
dimension = serializers.CharField(label="下拉框维度", required=False, default="default")
dimension_category = serializers.ChoiceField(
label="下拉框维度分类",
choices=ErrorMetricCategory.get_choices(),
required=False,
)

def perform_request(self, validated_data):
template = ServiceMetricStatistics.get_template(
validated_data["data_type"],
validated_data.pop("option_kind"),
validated_data.get("option_kind"),
validated_data.pop("dimension"),
validated_data.get("service_name"),
validated_data.get("dimension_category"),
)
s = ServiceMetricStatistics(**validated_data)
return s.list(template)
35 changes: 34 additions & 1 deletion bkmonitor/packages/apm_web/topo/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,38 @@ class BarChartDataType(ChoicesEnum):
AVG_DURATION_CALLEE = "avg_duration_callee"
INSTANCE_COUNT = "instance_count"

ERROR_COUNT_CALLER = "error_count_caller"
ERROR_COUNT_CALLEE = "error_count_callee"
DURATION_MAX_CALLER = "duration_max_caller"
DURATION_MAX_CALLEE = "duration_max_callee"
DURATION_MIN_CALLER = "duration_min_callee"
DURATION_MIN_CALLEE = "duration_min_callee"
DURATION_P50_CALLER = "duration_p50_caller"
DURATION_P50_CALLEE = "duration_p50_callee"
DURATION_P99_CALLER = "duration_p99_caller"
DURATION_P99_CALLEE = "duration_p99_callee"
DURATION_P95_CALLER = "duration_p95_caller"
DURATION_P95_CALLEE = "duration_p95_callee"

# 下拉框选项共下面五项
_choices_labels = (
(Apdex, _("apdex")),
(Alert, _("告警事件")),
(ErrorRateCaller, _("主调错误率")),
(ErrorRateCallee, _("被调错误率")),
(ErrorRate, _("错误率")),
(ERROR_COUNT_CALLER, _("主调错误数")),
(ERROR_COUNT_CALLEE, _("被调错误数")),
(DURATION_MAX_CALLER, _("主调最大耗时")),
(DURATION_MAX_CALLEE, _("被调最大耗时")),
(DURATION_MIN_CALLER, _("主调最小耗时")),
(DURATION_MIN_CALLEE, _("被调最小耗时")),
(DURATION_P50_CALLER, _("主调 P50 耗时")),
(DURATION_P50_CALLEE, _("被调 P50 耗时")),
(DURATION_P99_CALLER, _("主调 P99 耗时")),
(DURATION_P99_CALLEE, _("被调 P99 耗时")),
(DURATION_P95_CALLER, _("主调 P95 耗时")),
(DURATION_P95_CALLEE, _("被调 P95 耗时")),
)


Expand Down Expand Up @@ -76,10 +101,12 @@ class GraphViewType(ChoicesEnum):

TOPO = "topo"
TABLE = "table"
TOPO_DIFF = "topo_diff"

_choices_labels = (
(TOPO, _("视图")),
(TABLE, _("表格")),
(TOPO_DIFF, _("视图(对比模式)")),
)


Expand Down Expand Up @@ -130,5 +157,11 @@ class TopoLinkType(ChoicesEnum):
"""拓扑图中可供跳转的链接日期"""

ALERT = "alert"
TOPO_SOURCE = "topo_source"

_choices_labels = (ALERT, _("跳转到告警中心"))
_choices_labels = (
ALERT,
_("跳转到告警中心"),
TOPO_SOURCE,
_("资源拓扑链接跳转处理"),
)
12 changes: 6 additions & 6 deletions bkmonitor/packages/apm_web/topo/handle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@


class BaseQuery:
def __init__(self, data_type, bk_biz_id, app_name, start_time, end_time, service_name=None, **extra_params):
def __init__(self, bk_biz_id, app_name, start_time, end_time, data_type=None, service_name=None, **extra_params):
self.bk_biz_id = bk_biz_id
self.app_name = app_name
self.data_type = data_type
self.start_time = start_time
self.end_time = end_time
self.delta = self.end_time - self.start_time
self.data_type = data_type
self.service_name = service_name
self.params = extra_params.get("extra_params") if extra_params else {}
self.params = extra_params if extra_params else {}

self.application = Application.objects.filter(bk_biz_id=bk_biz_id, app_name=app_name).get()
self.metrics_table = self.application.metric_result_table_id
Expand All @@ -33,14 +33,14 @@ def convert_metric_to_condition(self) -> [list]:
return [{"key": "service_name", "method": "eq", "value": [self.service_name]}] if self.service_name else []

def get_metric(self, metric_clz: Type[MetricHandler], **kwargs):
return metric_clz(**self.common_params, **kwargs)
return metric_clz(**self.common_params(), **kwargs)

@property
def common_params(self):
def common_params(self, **kwargs):
return {
"application": self.application,
"start_time": self.start_time,
"end_time": self.end_time,
**kwargs,
}


Expand Down
47 changes: 47 additions & 0 deletions bkmonitor/packages/apm_web/topo/handle/bar_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
"""
import copy
import itertools
import json
import urllib.parse
from dataclasses import asdict, dataclass, field
from typing import Dict, List

Expand Down Expand Up @@ -228,3 +230,48 @@ def get_service_overview_tab_link(cls, bk_biz_id, app_name, service_name, start_
f"to={end_time}&"
f"dashboardId={dashboard_id}"
)

@classmethod
def get_host_monitor_link(cls, bk_host_id, start_time, end_time):
"""获取某主机的主机监控地址"""
return f"/performance/detail/{bk_host_id}?from={start_time * 1000}&to={end_time * 1000}"

@classmethod
def get_pod_monitor_link(cls, bcs_cluster_id, namespace, pod, start_time, end_time):
"""获取某 Pod 的 K8S 监控地址"""
query_data = {
"selectorSearch": [
{
"keyword": pod,
}
]
}
encode_query = urllib.parse.quote(json.dumps(query_data))

return (
f"/k8s?filter-bcs_cluster_id={bcs_cluster_id}&"
f"filter-namespace={namespace}&"
f"filter-pod_name={pod}&dashboardId=pod&sceneId=kubernetes&sceneType=detail&"
f"from={start_time * 1000}&to={end_time * 1000}&"
f"queryData={encode_query}"
)

@classmethod
def get_service_monitor_link(cls, bcs_cluster_id, namespace, service, start_time, end_time):
"""获取某 Service 的 K8S 监控地址"""
query_data = {
"selectorSearch": [
{
"keyword": service,
}
]
}
encode_query = urllib.parse.quote(json.dumps(query_data))

return (
f"/k8s?filter-bcs_cluster_id={bcs_cluster_id}&"
f"filter-namespace={namespace}&"
f"filter-service_name={service}&"
f"from={start_time * 1000}&to={end_time * 1000}&"
f"dashboardId=service&sceneId=kubernetes&sceneType=detail&queryData={encode_query}"
)
Loading

0 comments on commit 19ff702

Please sign in to comment.