diff --git a/bklog/app.yml b/bklog/app.yml index d93ac3271..d00335f29 100644 --- a/bklog/app.yml +++ b/bklog/app.yml @@ -5,7 +5,7 @@ is_use_celery: True author: 蓝鲸智云 introduction: 蓝鲸智云日志平台是为了解决运维场景中查询日志难的问题而推出的一款Saas,基于业界主流的全文检索引擎,通过蓝鲸智云的专属agent进行日志采集,无需登录各台机器,集中管理所有日志。 introduction_en: BlueKing Log System is a SaaS to solve the problem that it is difficult to query logs in operation and maintenance scenarios, With full-text search engine based on the mainstream in the industry. It collects logs through BlueKing's exclusive agent without logging in to each machine and manages all logs centrally. -version: 4.7.0 +version: 4.6.6 category: 运维工具 desktop: width: 1300 diff --git a/bklog/apps/api/modules/monitor.py b/bklog/apps/api/modules/monitor.py index e5f0167d5..2fee8708e 100644 --- a/bklog/apps/api/modules/monitor.py +++ b/bklog/apps/api/modules/monitor.py @@ -81,14 +81,6 @@ def __init__(self): default_return_value=None, before_request=add_esb_info_before_request, ) - self.save_alarm_strategy_v3 = DataAPI( - method="POST", - url=MONITOR_APIGATEWAY_ROOT + "save_alarm_strategy_v3/", - module=self.MODULE, - description="保存告警策略V3", - default_return_value=None, - before_request=add_esb_info_before_request, - ) self.query_log_relation = DataAPI( method="POST", url=MONITOR_APIGATEWAY_ROOT + "query_log_relation", diff --git a/bklog/apps/api/modules/utils.py b/bklog/apps/api/modules/utils.py index e34a7b110..ec1c8fc2c 100644 --- a/bklog/apps/api/modules/utils.py +++ b/bklog/apps/api/modules/utils.py @@ -175,8 +175,7 @@ def add_esb_info_before_request(params): def add_esb_info_before_request_for_bkdata_token(params): # pylint: disable=function-name-too-long req = get_request() - skip_check = getattr(req, "skip_check", False) - if settings.BKAPP_IS_BKLOG_API and not skip_check: + if settings.BKAPP_IS_BKLOG_API: auth_info = EsquerySearchPermissions.get_auth_info(req) if auth_info["bk_app_code"] in settings.ESQUERY_WHITE_LIST: # 在白名单内的 app 使用超级权限 diff --git a/bklog/apps/constants.py b/bklog/apps/constants.py index e1180843a..1e260e900 100644 --- a/bklog/apps/constants.py +++ b/bklog/apps/constants.py @@ -177,13 +177,3 @@ class SpacePropertyEnum(ChoicesEnum): SPACE_TYPE = "space_type" _choices_labels = (SPACE_TYPE, _("空间类型")) - - -class ApiTokenAuthType(ChoicesEnum): - """ - API Token鉴权类型 - """ - - GRAFANA = "Grafana" - - _choices_labels = ((GRAFANA, _("Grafana")),) diff --git a/bklog/apps/grafana/data_source.py b/bklog/apps/grafana/data_source.py index ba79ac0ec..ac013fcb2 100644 --- a/bklog/apps/grafana/data_source.py +++ b/bklog/apps/grafana/data_source.py @@ -20,16 +20,13 @@ the project delivered to anyone in the future. """ import json -from copy import deepcopy from dataclasses import dataclass from typing import Any, Dict, List, Union from apps.api import BkDataQueryApi -from apps.constants import ApiTokenAuthType from apps.feature_toggle.handlers.toggle import FeatureToggleObject from apps.feature_toggle.models import FeatureToggle from apps.feature_toggle.plugins.constants import GRAFANA_CUSTOM_ES_DATASOURCE -from apps.log_commons.models import ApiAuthToken from apps.log_esquery.esquery.client.QueryClient import QueryClient from apps.log_esquery.esquery.client.QueryClientBkData import QueryClientBkData from apps.log_esquery.esquery.client.QueryClientEs import QueryClientEs @@ -46,22 +43,13 @@ class CustomIndexSetESDataSource: """可以转换成Grafana DataSource的索引集""" - space_uid: str = "" index_set_id: int = 0 index_set_name: str = "" time_field: str = DEFAULT_TIME_FIELD - token: str = "" - - @classmethod - def get_token(cls, space_uid: str): - """获取token""" - token_obj, __ = ApiAuthToken.objects.get_or_create(space_uid=space_uid, type=ApiTokenAuthType.GRAFANA.value) - return token_obj.token @classmethod def list(cls, space_uid: str) -> List["CustomIndexSetESDataSource"]: """获取列表""" - token = cls.get_token(space_uid=space_uid) index_sets: List["CustomIndexSetESDataSource"] = [] index_set_objs = LogIndexSet.objects.filter(space_uid=space_uid).iterator() for index_set_obj in index_set_objs: @@ -73,13 +61,11 @@ def list(cls, space_uid: str) -> List["CustomIndexSetESDataSource"]: continue index_sets.append( cls( - space_uid=space_uid, index_set_id=index_set_obj.index_set_id, index_set_name=cls.generate_datasource_name( scenario_id=index_set_obj.scenario_id, index_set_name=index_set_obj.index_set_name ), time_field=index_set_obj.time_field, - token=token, ) ) return index_sets @@ -96,25 +82,14 @@ def generate_datasource_name(scenario_id: str, index_set_name: str) -> str: def to_datasource(self) -> Datasource: """索引 -> Grafana ES数据源""" - json_data = { - "timeField": self.time_field, - # 因为监控的Grafana版本已经到10, 默认支持的ES版本是7.10+, 但是日志的Grafana是8, 兼容两边将自定义ES数据源的版本固定住7.10 - "esVersion": "7.10.0", - "tlsSkipVerify": True, - "httpHeaderName1": "X-BKLOG-SPACE-UID", - "httpHeaderName2": "X-BKLOG-TOKEN", - } + json_data = {"timeField": self.time_field} return Datasource( name=self.index_set_name, database=str(self.index_set_id), - access="proxy", + access="direct", type="elasticsearch", - url=f"{settings.BK_IAM_RESOURCE_API_HOST}/grafana/custom_es_datasource", + url="custom_es_datasource", jsonData=json_data, - secureJsonData={ - "httpHeaderValue1": self.space_uid, - "httpHeaderValue2": self.token, - }, ) @classmethod @@ -149,52 +124,6 @@ def disable_space(cls, bk_biz_id: int): feature_toggle_obj.save() -class ESBodyAdapter: - """该类用于兼容Grafana ES7的语法与日志检索的body查询语法""" - - def __init__(self, body: Dict[str, Any]): - self.body = body - - @staticmethod - def adapt_interval(body: Dict[str, Any] = None) -> Dict[str, Any]: - """ - data_histogram的时间间隔字段名为fixed_interval, 但是我们的接口是interval - """ - new_dict = {} - for k, v in body.items(): - if k == "date_histogram" and "fixed_interval" in v: - v["interval"] = v.pop("fixed_interval") - if isinstance(v, dict): - new_dict[k] = ESBodyAdapter.adapt_interval(v) - else: - new_dict[k] = v - return new_dict - - @staticmethod - def adapt_aggs(body: Dict[str, Any] = None): - """ - 聚合的时候, order的字段名为_key, 但是我们的接口是_term - """ - if isinstance(body, dict): - for k, v in body.items(): - if k == "aggs": - for agg_key in v: - if ( - "terms" in v[agg_key] - and "order" in v[agg_key]["terms"] - and "_key" in v[agg_key]["terms"]["order"] - ): - v[agg_key]["terms"]["order"] = {"_term": v[agg_key]["terms"]["order"]["_key"]} - ESBodyAdapter.adapt_aggs(v) - - def adapt(self): - """适配Grafana ES请求""" - body = deepcopy(self.body) - body = self.adapt_interval(body=body) - self.adapt_aggs(body=body) - return body - - class CustomESDataSourceTemplate: """ 自定义ES数据源模板模板, 各个Scenario的数据源都继承这个模板 @@ -223,42 +152,10 @@ def get_index(self): ] ) - @staticmethod - def compatible_mapping(mapping: Dict[str, Any]) -> Dict[str, Any]: - """ - 兼容Grafana高版本只支持7.10+以上的情况, mapping结构需要调整 - """ - result = dict() - for index, value in mapping.items(): - mapping_dict: dict = value.get("mappings", {}) - # 日志接口返回的mapping结构中有一层是索引名, - if len(mapping_dict) == 1 and list(mapping_dict.keys())[0] in index: - result[index] = {"mappings": list(mapping_dict.values())[0]} - continue - result[index] = value - return result - def mapping(self): - """ - 获取mapping - """ - mapping = self._mapping() - return self.compatible_mapping(mapping=mapping) - - def _mapping(self): - """ - 各个继承类如果需要自定义mapping, 重写这个方法 - """ return self.get_client().mapping(index=self.index) def query(self, body: Dict[str, Any]): - body = ESBodyAdapter(body=body).adapt() - return self._query(body=body) - - def _query(self, body: Dict[str, Any]): - """ - 各个继承类如果需要自定义查询逻辑, 重写这个方法 - """ return self.get_client().query(index=self.index, body=body) def msearch(self, sql_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: @@ -309,10 +206,10 @@ def query_bkdata(self, body: Dict[str, Any] = None) -> List[Dict[str, Any]]: params.update({"bkdata_authentication_method": "user", "bk_username": "admin", "operator": "admin"}) return BkDataQueryApi.query(params, request_cookies=False)["list"] - def _query(self, body: Dict[str, Any]): + def query(self, body: Dict[str, Any]): return self.query_bkdata(body=body) - def _mapping(self): + def mapping(self): return self.query_bkdata() diff --git a/bklog/apps/grafana/handlers/query.py b/bklog/apps/grafana/handlers/query.py index 1bf6387ed..5d9415c3e 100644 --- a/bklog/apps/grafana/handlers/query.py +++ b/bklog/apps/grafana/handlers/query.py @@ -34,10 +34,10 @@ ) from apps.iam import ActionEnum, Permission, ResourceEnum from apps.log_desensitize.handlers.desensitize import DesensitizeHandler -from apps.log_desensitize.models import DesensitizeFieldConfig +from apps.log_desensitize.handlers.utils import desensitize_params_init +from apps.log_desensitize.models import DesensitizeConfig, DesensitizeFieldConfig from apps.log_search.constants import GlobalCategoriesEnum from apps.log_search.exceptions import BaseSearchIndexSetDataDoseNotExists -from apps.log_search.handlers.index_set import IndexSetHandler from apps.log_search.handlers.search.aggs_handlers import AggsViewAdapter from apps.log_search.handlers.search.search_handlers_esquery import SearchHandler from apps.log_search.models import LogIndexSet, Scenario @@ -59,7 +59,7 @@ class GrafanaQueryHandler: {"id": "min", "name": "MIN"}, {"id": "max", "name": "MAX"}, {"id": "avg", "name": "AVG"}, - {"id": "cardinality", "name": "UNIQUE_COUNT"}, + {"id": "cardinality", "name": "UNIQUE_COUNT"} ] CONDITION_CHOICES = [ @@ -124,7 +124,7 @@ def _get_buckets(self, records, record, dimensions, aggregations, metric_field, record[metric_field] = aggregations.get(metric_field).get("value") records.append(copy.deepcopy(record)) - def _format_time_series(self, params, data, time_field, desensitize_configs=None): + def _format_time_series(self, params, data, time_field, desensitize_entities=None): """ 转换为Grafana TimeSeries的格式 :param params: 请求参数 @@ -139,12 +139,9 @@ def _format_time_series(self, params, data, time_field, desensitize_configs=None :rtype: list """ formatted_data = defaultdict(list) - desensitize_configs = desensitize_configs or [] - desensitize_handler = DesensitizeHandler(desensitize_configs) for record in data: # 字段脱敏处理 - if desensitize_configs: - record = desensitize_handler.transform_dict(record) + record = DesensitizeHandler(desensitize_entities).transform_dict(record) dimensions = tuple( sorted( (key, value) @@ -276,9 +273,7 @@ def query(self, query_dict: dict): self.check_panel_permission(query_dict["dashboard_id"], query_dict["panel_id"], query_dict["result_table_id"]) # 初始化DB脱敏配置 - desensitize_field_config_objs = DesensitizeFieldConfig.objects.filter( - index_set_id=query_dict["result_table_id"] - ) + desensitize_field_config_objs = DesensitizeFieldConfig.objects.filter(index_set_id=query_dict["result_table_id"]) desensitize_configs = [ { @@ -286,11 +281,12 @@ def query(self, query_dict: dict): "rule_id": field_config_obj.rule_id or 0, "operator": field_config_obj.operator, "params": field_config_obj.params, - "match_pattern": field_config_obj.match_pattern, - } - for field_config_obj in desensitize_field_config_objs + } for field_config_obj in desensitize_field_config_objs ] + # 初始化脱敏工厂参数 + desensitize_entities = desensitize_params_init(desensitize_configs=desensitize_configs) + time_field = SearchHandler(query_dict["result_table_id"], {}).time_field # 如果是统计数量,则无需提供指标字段,用 _id 字段统计即可 @@ -336,7 +332,7 @@ def query(self, query_dict: dict): records = [] self._get_buckets(records, {}, all_dimensions, result["aggregations"], query_dict["metric_field"]) - records = self._format_time_series(query_dict, records, search_handler.time_field, desensitize_configs) + records = self._format_time_series(query_dict, records, search_handler.time_field, desensitize_entities) return records @@ -406,9 +402,7 @@ def get_metric_list(self, category_id=None): space_uid = self.space_uid if not space_uid: return [] - - space_uids = IndexSetHandler.get_all_related_space_uids(space_uid) - index_set_list = LogIndexSet.objects.filter(space_uid__in=space_uids) + index_set_list = LogIndexSet.objects.filter(space_uid=space_uid) if category_id: index_set_list = index_set_list.filter(category_id=category_id) @@ -689,19 +683,6 @@ def _query_dimension(self, params): return [{"label": v, "value": v} for v in dimension_values] - def _query_index_set(self, params): - """ - 查询维度 - """ - metrics = self.get_metric_list() - - results = [] - - for group in metrics: - for metric in group["children"]: - results.append({"label": metric["name"], "value": metric["id"]}) - return results - def get_variable_value(self, variable_type, params): query_cmdb = partial(self._query_cmdb, variable_type=variable_type) query_processor = { @@ -709,7 +690,6 @@ def get_variable_value(self, variable_type, params): "module": query_cmdb, "set": query_cmdb, "dimension": self._query_dimension, - "index_set": self._query_index_set, } if variable_type not in query_processor: diff --git a/bklog/apps/grafana/serializers.py b/bklog/apps/grafana/serializers.py index 18deff0e1..f40457674 100644 --- a/bklog/apps/grafana/serializers.py +++ b/bklog/apps/grafana/serializers.py @@ -31,7 +31,7 @@ class GetVariableFieldSerializer(serializers.Serializer): class GetVariableValueSerializer(serializers.Serializer): bk_biz_id = serializers.IntegerField(label=_("业务ID")) - type = serializers.ChoiceField(label=_("查询类型"), choices=["dimension", "host", "module", "set", "index_set"]) + type = serializers.ChoiceField(label=_("查询类型"), choices=["dimension", "host", "module", "set"]) params = serializers.DictField(label=_("查询参数")) diff --git a/bklog/apps/log_clustering/components/collections/flow_component.py b/bklog/apps/log_clustering/components/collections/flow_component.py index dca90b7aa..d5d0a4568 100644 --- a/bklog/apps/log_clustering/components/collections/flow_component.py +++ b/bklog/apps/log_clustering/components/collections/flow_component.py @@ -154,8 +154,12 @@ def _execute(self, data, parent_data): index_set_id = data.get_one_of_inputs("index_set_id") log_index_set = LogIndexSet.objects.filter(index_set_id=index_set_id).first() LogIndexSet.set_tag(log_index_set.index_set_id, InnerTag.CLUSTERING.value) + clustering_config = ClusteringConfig.get_by_index_set_id(index_set_id=index_set_id) if log_index_set: - ClusteringMonitorHandler(index_set_id=log_index_set.index_set_id).create_new_cls_strategy() + bk_biz_id = clustering_config.bk_biz_id + ClusteringMonitorHandler( + index_set_id=log_index_set.index_set_id, bk_biz_id=bk_biz_id + ).create_new_cls_strategy() return True @@ -216,7 +220,8 @@ def _execute(self, data, parent_data): new_cls_index_set.save() log_index_set = LogIndexSet.objects.filter(index_set_id=new_cls_index_set.index_set_id).first() LogIndexSet.set_tag(log_index_set.index_set_id, InnerTag.CLUSTERING.value) - ClusteringMonitorHandler(index_set_id=log_index_set.index_set_id).create_new_cls_strategy() + bk_biz_id = clustering_config.bk_biz_id + ClusteringMonitorHandler(index_set_id=log_index_set.index_set_id, bk_biz_id=bk_biz_id).create_new_cls_strategy() return True @@ -314,9 +319,6 @@ def _execute(self, data, parent_data): flow = DataFlowHandler().create_predict_flow(index_set_id=index_set_id) is_collect_index_set = bool(data.get_one_of_inputs("collector_config_id")) if is_collect_index_set: - # 添加索引集表标签 - log_index_set = LogIndexSet.objects.filter(index_set_id=index_set_id).first() - LogIndexSet.set_tag(log_index_set.index_set_id, InnerTag.CLUSTERING.value) # 采集项要继续消费,能跟历史数据无缝衔接,避免丢数据 consuming_mode = "continue" else: diff --git a/bklog/apps/log_clustering/constants.py b/bklog/apps/log_clustering/constants.py index 032bd1575..f23ed5056 100644 --- a/bklog/apps/log_clustering/constants.py +++ b/bklog/apps/log_clustering/constants.py @@ -78,23 +78,6 @@ ] DEFAULT_CLUSTERING_ITEM_NAME = _("日志聚类新类(近24H)") DEFAULT_METRIC = "event_time" - -# 保存告警策略 v3部分参数 -DEFAULT_AGG_METHOD = "SUM" -ITEM_NAME_CLUSTERING = "SUM(log_count)" -DEFAULT_METRIC_CLUSTERING = "log_count" -ALARM_INTERVAL_CLUSTERING = 7200 -AGG_DIMENSION = ["sensitivity", "signature"] -AGG_CONDITION = [ - {"key": "sensitivity", "dimension_name": "sensitivity", "value": ["__dist_05"], "method": "eq", "condition": "and"} -] -TRIGGER_CONFIG = { - "count": 1, - "check_window": 5, - "uptime": {"calendars": [], "time_ranges": [{"start": "00:00", "end": "23:59"}]}, -} - - DEFAULT_DETECTS = [ { "level": 2, @@ -111,6 +94,7 @@ "alarm_interval": 1440, "send_recovery_alarm": False, } + NOT_NEED_EDIT_NODES = ["format_signature"] DEFAULT_PATTERN_MONITOR_MSG = """{{content.level}} diff --git a/bklog/apps/log_clustering/exceptions.py b/bklog/apps/log_clustering/exceptions.py index b4290bbb5..5e9861423 100644 --- a/bklog/apps/log_clustering/exceptions.py +++ b/bklog/apps/log_clustering/exceptions.py @@ -19,9 +19,10 @@ We undertake not to change the open source license (MIT license) applicable to the current version of the project delivered to anyone in the future. """ -from apps.exceptions import BaseException, ErrorCode from django.utils.translation import ugettext_lazy as _ +from apps.exceptions import BaseException, ErrorCode + # ================================================= # 日志聚类模块 # ================================================= @@ -90,8 +91,3 @@ class BkdataFieldsException(BaseClusteringException): class ModelReleaseNotFoundException(BaseClusteringException): ERROR_CODE = "013" MESSAGE = _("模型找不到对应的发布版本: {model_id}") - - -class IndexSetHasClsStrategyException(BaseClusteringException): - ERROR_CODE = "014" - MESSAGE = _("该索引集已经创建告警策略: {index_set_id}") diff --git a/bklog/apps/log_clustering/handlers/clustering_config.py b/bklog/apps/log_clustering/handlers/clustering_config.py index 63b8cb6a2..894db912b 100644 --- a/bklog/apps/log_clustering/handlers/clustering_config.py +++ b/bklog/apps/log_clustering/handlers/clustering_config.py @@ -56,10 +56,6 @@ from apps.utils.local import activate_request from apps.utils.log import logger from apps.utils.thread import generate_request -from bkm_space.api import SpaceApi -from bkm_space.define import SpaceTypeEnum -from bkm_space.errors import NoRelatedResourceError -from bkm_space.utils import bk_biz_id_to_space_uid from django.utils.translation import ugettext_lazy as _ @@ -120,9 +116,6 @@ def update_or_create(self, params: dict): bk_biz_id = params["bk_biz_id"] filter_rules = params["filter_rules"] signature_enable = params["signature_enable"] - # 非业务类型的项目空间业务 id 为负数,需要通过 Space 的关系拿到其关联的真正的业务ID。然后以这个关联业务ID在计算平台操作, 没有则不允许创建聚类 - related_space_pre_bk_biz_id = params["bk_biz_id"] - bk_biz_id = self.validate_bk_biz_id(related_space_pre_bk_biz_id) from apps.log_clustering.handlers.pipline_service.aiops_service import ( operator_aiops_service, ) @@ -208,7 +201,6 @@ def update_or_create(self, params: dict): signature_enable=signature_enable, source_rt_name=source_rt_name, category_id=category_id, - related_space_pre_bk_biz_id=related_space_pre_bk_biz_id, # 查询space关联的真实业务之前的业务id ) if signature_enable: self.create_service( @@ -378,22 +370,3 @@ def pre_check_fields(cls, fields, etl_config, clustering_fields): raise ValueError(BkdataFieldsException(BkdataFieldsException.MESSAGE.format(field=clustering_fields))) return True - - @staticmethod - def validate_bk_biz_id(bk_biz_id: int) -> int: - """ - 注入业务id校验 - :return: - """ - - # 业务id为正数,表示空间类型是bkcc,可以调用cmdb相关接口 - bk_biz_id = int(bk_biz_id) - if bk_biz_id > 0: - return bk_biz_id - # 业务id为负数,需要获取空间关联的真实业务id - space_uid = bk_biz_id_to_space_uid(bk_biz_id) - space = SpaceApi.get_related_space(space_uid, SpaceTypeEnum.BKCC.value) - if space: - return space.bk_biz_id - # 无业务关联的空间,不允许创建日志聚类 当前抛出异常 - raise NoRelatedResourceError(_(f"当前业务:{bk_biz_id}通过Space关系查询不到关联的真实业务ID,不允许创建日志聚类").format(bk_biz_id=bk_biz_id)) diff --git a/bklog/apps/log_clustering/handlers/clustering_monitor.py b/bklog/apps/log_clustering/handlers/clustering_monitor.py index cee2f9c98..1d5154fdd 100644 --- a/bklog/apps/log_clustering/handlers/clustering_monitor.py +++ b/bklog/apps/log_clustering/handlers/clustering_monitor.py @@ -21,17 +21,11 @@ """ from apps.api import MonitorApi -from apps.feature_toggle.handlers.toggle import FeatureToggleObject -from apps.feature_toggle.plugins.constants import BKDATA_CLUSTERING_TOGGLE from apps.log_clustering.constants import ( - AGG_CONDITION, - AGG_DIMENSION, AGGS_FIELD_PREFIX, - ALARM_INTERVAL_CLUSTERING, DEFAULT_ACTION_CONFIG, DEFAULT_ACTION_TYPE, DEFAULT_AGG_INTERVAL, - DEFAULT_AGG_METHOD, DEFAULT_AGG_METHOD_BKDATA, DEFAULT_ALGORITHMS, DEFAULT_CLUSTERING_ITEM_NAME, @@ -43,21 +37,15 @@ DEFAULT_EXPRESSION, DEFAULT_LABEL, DEFAULT_METRIC, - DEFAULT_METRIC_CLUSTERING, DEFAULT_NO_DATA_CONFIG, DEFAULT_PATTERN_MONITOR_MSG, DEFAULT_PATTERN_RECOVER_MSG, DEFAULT_SCENARIO, DEFAULT_TIME_FIELD, - ITEM_NAME_CLUSTERING, - TRIGGER_CONFIG, ActionEnum, StrategiesType, ) -from apps.log_clustering.exceptions import ( - ClusteringIndexSetNotExistException, - IndexSetHasClsStrategyException, -) +from apps.log_clustering.exceptions import ClusteringIndexSetNotExistException from apps.log_clustering.models import ClusteringConfig, SignatureStrategySettings from apps.log_clustering.utils.monitor import MonitorUtils from apps.log_search.models import LogIndexSet @@ -66,8 +54,9 @@ class ClusteringMonitorHandler(object): - def __init__(self, index_set_id): + def __init__(self, index_set_id, bk_biz_id: int): self.index_set_id = index_set_id + self.bk_biz_id = bk_biz_id self.index_set = LogIndexSet.objects.filter(index_set_id=self.index_set_id).first() if not self.index_set: raise ClusteringIndexSetNotExistException( @@ -75,12 +64,6 @@ def __init__(self, index_set_id): ) self.log_index_set_data, *_ = self.index_set.indexes self.clustering_config = ClusteringConfig.get_by_index_set_id(index_set_id=self.index_set_id) - self.bk_biz_id = ( - self.clustering_config.bk_biz_id - if not self.clustering_config.related_space_pre_bk_biz_id - else self.clustering_config.related_space_pre_bk_biz_id - ) - self.conf = FeatureToggleObject.toggle(BKDATA_CLUSTERING_TOGGLE).feature_config def update_strategies(self, pattern_level, actions): result = True @@ -203,147 +186,6 @@ def save_strategy( signature_strategy_settings.save() return strategy - @atomic - def save_clustering_strategy( - self, - pattern_level="", - signature="", - table_id=None, - metric="", - strategy_type=StrategiesType.NEW_CLS_strategy, # 新类告警 - ): - if SignatureStrategySettings.objects.filter( - index_set_id=self.index_set_id, strategy_type=strategy_type - ).exists(): - raise IndexSetHasClsStrategyException( - IndexSetHasClsStrategyException.MESSAGE.format(index_set_id=self.index_set_id) - ) - signature_strategy_settings = SignatureStrategySettings.objects.create( - **{ - "signature": signature, - "index_set_id": self.index_set_id, - "strategy_id": None, - "bk_biz_id": self.bk_biz_id, - "pattern_level": pattern_level, - "strategy_type": strategy_type, - } - ) - anomaly_template = DEFAULT_PATTERN_MONITOR_MSG.replace( - "__clustering_field__", self.clustering_config.clustering_fields - ) - label_index_set_id = self.clustering_config.new_cls_index_set_id or self.index_set_id - - name = _("{} - 日志数量异常告警").format(self.index_set.index_set_name) - labels = DEFAULT_LABEL.copy() - labels += [f"LogClustering/Count/{label_index_set_id}"] - items = [ - { - "name": ITEM_NAME_CLUSTERING, - "no_data_config": DEFAULT_NO_DATA_CONFIG, - "target": [], - "expression": DEFAULT_EXPRESSION, - "functions": [], - "origin_sql": "", - "query_configs": [ - { - "data_source_label": DEFAULT_DATA_SOURCE_LABEL_BKDATA, - "data_type_label": DEFAULT_DATA_TYPE_LABEL_BKDATA, - "alias": DEFAULT_EXPRESSION, - "result_table_id": table_id, - "agg_method": DEFAULT_AGG_METHOD, - "agg_interval": self.conf.get("agg_interval", 60), - "agg_dimension": AGG_DIMENSION, - "agg_condition": AGG_CONDITION, - "metric_field": metric, - "unit": "", - "metric_id": "bk_data.{table_id}.{metric}".format(table_id=table_id, metric=metric), - "index_set_id": "", - "query_string": "*", - "custom_event_name": "log_count", - "functions": [], - "time_field": "dtEventTimeStamp", - "bkmonitor_strategy_id": "log_count", - "alert_name": "log_count", - } - ], - "algorithms": [ - { - "level": 2, - "type": "IntelligentDetect", - "config": { - "plan_id": self.conf.get("algorithm_plan_id"), - "visual_type": "score", - "args": { - "$model_file_id": self.clustering_config.model_output_rt, # 预测节点输出 - "$new_class_alert_interval": "30", - "$new_class_alert_th": "1", - }, - }, - "unit_prefix": "", - } - ], - } - ] - detects = [ - { - "level": 2, - "expression": "", - "trigger_config": TRIGGER_CONFIG, - "recovery_config": {"check_window": 5}, - "connector": "and", - } - ] - notice = { - "config_id": 0, - "user_groups": [ - MonitorUtils.get_or_create_notice_group( - log_index_set_id=label_index_set_id, - bk_biz_id=self.bk_biz_id, - ) - ], - "signal": ["abnormal"], - "options": { - "converge_config": {"need_biz_converge": True}, - "exclude_notice_ways": {"recovered": [], "closed": [], "ack": []}, - "noise_reduce_config": {"is_enabled": False, "count": 10, "dimensions": []}, - "upgrade_config": {"is_enabled": False, "user_groups": []}, - "assign_mode": ["by_rule", "only_notice"], - "chart_image_enabled": False, - }, - "config": { - "interval_notify_mode": "standard", - "notify_interval": ALARM_INTERVAL_CLUSTERING, - "template": [ - { - "signal": "abnormal", - "message_tmpl": anomaly_template, - "title_tmpl": "{{business.bk_biz_name}} - {{alarm.name}}{{alarm.display_type}}", - } - ], - }, - } - strategy = MonitorApi.save_alarm_strategy_v3( - params={ - "type": "monitor", - "bk_biz_id": self.bk_biz_id, - "scenario": DEFAULT_SCENARIO, - "name": name, - "labels": labels, - "is_enabled": True, - "items": items, - "detects": detects, - "actions": [], - "notice": notice, - } - ) - strategy_id = strategy["id"] - signature_strategy_settings.strategy_id = strategy_id - signature_strategy_settings.save() - - self.clustering_config.log_count_agg_rt = f"{table_id}_{strategy_id}_plan_{self.conf.get('algorithm_plan_id')}" - self.clustering_config.save() - return strategy_id - def delete_strategy(self, strategy_id): MonitorApi.delete_alarm_strategy_v2(params={"bk_biz_id": self.bk_biz_id, "ids": [strategy_id]}) SignatureStrategySettings.objects.filter(strategy_id=strategy_id).delete() @@ -426,14 +268,3 @@ def create_new_cls_strategy(self): return self.save_strategy( table_id=table_id, metric=DEFAULT_METRIC, strategy_type=StrategiesType.NEW_CLS_strategy ) - - def create_clustering_new_cls_strategy(self): - # 取数量聚合后的结果表 - table_id = ( - self.clustering_config.new_cls_pattern_rt - if self.clustering_config.new_cls_pattern_rt - else self.clustering_config.log_count_aggregation_flow["log_count_aggregation"]["result_table_id"] - ) - return self.save_clustering_strategy( - table_id=table_id, metric=DEFAULT_METRIC_CLUSTERING, strategy_type=StrategiesType.NEW_CLS_strategy - ) diff --git a/bklog/apps/log_clustering/handlers/data_access/data_access.py b/bklog/apps/log_clustering/handlers/data_access/data_access.py index 6427e50f1..b4f3b9d8c 100644 --- a/bklog/apps/log_clustering/handlers/data_access/data_access.py +++ b/bklog/apps/log_clustering/handlers/data_access/data_access.py @@ -141,15 +141,6 @@ def create_or_update_bkdata_etl(self, collector_config_id, fields, etl_params): else: result_table_name = collector_config.collector_config_name_en - # 当用户使用了自定义字段作为时间字段,则会产生同名字段,需要去重 - fields_names = set() - dedupe_fields_config = [] - for field in fields_config: - field_name = field.get("alias_name") if field.get("alias_name") else field.get("field_name") - if field_name not in fields_names: - dedupe_fields_config.append(field) - fields_names.add(field_name) - params = { "raw_data_id": clustering_config.bkdata_data_id, "result_table_name": result_table_name, @@ -165,7 +156,7 @@ def create_or_update_bkdata_etl(self, collector_config_id, fields, etl_params): "is_dimension": field.get("tag", "dimension") == "dimension", "field_index": index, } - for index, field in enumerate(dedupe_fields_config, 1) + for index, field in enumerate(fields_config, 1) ], "json_config": json.dumps(bkdata_json_config), "bk_username": self.conf.get("bk_username"), diff --git a/bklog/apps/log_clustering/handlers/dataflow/data_cls.py b/bklog/apps/log_clustering/handlers/dataflow/data_cls.py index 82d2064cc..00732d33d 100644 --- a/bklog/apps/log_clustering/handlers/dataflow/data_cls.py +++ b/bklog/apps/log_clustering/handlers/dataflow/data_cls.py @@ -426,8 +426,10 @@ class LogCountAggregationFlowCls(object): table_name_no_id: str result_table_id: str log_count_aggregation: RealTimeCls + log_count_pattern_format: RealTimeCls tspider_storage: TspiderStorageCls storage_type: str + hdfs_storage: HDFSStorageCls bk_biz_id: int cluster: str = "" diff --git a/bklog/apps/log_clustering/handlers/dataflow/dataflow_handler.py b/bklog/apps/log_clustering/handlers/dataflow/dataflow_handler.py index b954d9934..53e00a319 100644 --- a/bklog/apps/log_clustering/handlers/dataflow/dataflow_handler.py +++ b/bklog/apps/log_clustering/handlers/dataflow/dataflow_handler.py @@ -98,7 +98,6 @@ ) from apps.log_clustering.models import ClusteringConfig from apps.log_databus.models import CollectorConfig -from apps.log_search.constants import InnerTag from apps.log_search.models import LogIndexSet from apps.utils.log import logger from django.conf import settings @@ -142,8 +141,8 @@ def get_clustering_training_params(cls, clustering_config): "delimeter": clustering_config.delimeter, "max_log_length": clustering_config.max_log_length, "is_case_sensitive": clustering_config.is_case_sensitive, - "depth": clustering_config.depth, - "max_child": clustering_config.max_child, + "depth": OnlineTaskTrainingArgs.DEPTH, + "max_child": OnlineTaskTrainingArgs.MAX_CHILD, "use_offline_model": OnlineTaskTrainingArgs.USE_OFFLINE_MODEL, "max_dist_list": clustering_config.max_dist_list, } @@ -1518,7 +1517,8 @@ def _init_predict_flow( all_fields_dict = self.get_fields_dict(clustering_config=clustering_config) # 从BkDataMetaApi 获取字段信息 用于回填模版 all_fields = DataAccessHandler.get_fields(result_table_id=result_table_id) - exclude_message_fields = [field for field in all_fields if field["field_name"] != clustering_fields] + # all_fields 排除message字段 透传字段 + exclude_message_fields = [field for field in all_fields if field["field_name"] != "message"] # 去除不聚类的字段 转换前所有字段 比如 meeage 转换成 log is_dimension_fields = [ field["field_name"] for field in all_fields if field["field_name"] not in NOT_CONTAIN_SQL_FIELD_LIST @@ -1753,16 +1753,25 @@ def _init_log_count_aggregation_flow( log_count_signatures=log_count_signatures, table_name_no_id=table_name_no_id, result_table_id=result_table_id, + # 日志数量统计 log_count_aggregation=RealTimeCls( fields="", table_name=f"bklog_{index_set_id}_agg", result_table_id=f"{bk_biz_id}_bklog_{index_set_id}_agg", filter_rule=log_count_signatures_filter_rule, ), + # 日志Pattern打平 + log_count_pattern_format=RealTimeCls( + fields="", + table_name=f"bklog_{index_set_id}_sign", + result_table_id=f"{bk_biz_id}_bklog_{index_set_id}_sign", + filter_rule=log_count_signatures_filter_rule, + ), tspider_storage=TspiderStorageCls( cluster=self.conf.get("tspider_cluster"), expires=self.conf.get("log_count_tspider_expires") ), storage_type=storage_type, + hdfs_storage=HDFSStorageCls(table_name="", expires=self.conf.get("log_count_hdfs_expires")), bk_biz_id=bk_biz_id, cluster=self.get_model_available_storage_cluster(), ) @@ -1805,8 +1814,5 @@ def create_log_count_aggregation_flow(self, index_set_id): clustering_config.log_count_aggregation_flow = log_count_aggregation_flow_dict clustering_config.log_count_aggregation_flow_id = result["flow_id"] - clustering_config.new_cls_pattern_rt = log_count_aggregation_flow_dict["log_count_aggregation"][ - "result_table_id" - ] clustering_config.save() return result diff --git a/bklog/apps/log_clustering/migrations/0023_clusteringconfig_related_space_pre_bk_biz_id.py b/bklog/apps/log_clustering/migrations/0023_clusteringconfig_related_space_pre_bk_biz_id.py deleted file mode 100644 index 206a7f5fc..000000000 --- a/bklog/apps/log_clustering/migrations/0023_clusteringconfig_related_space_pre_bk_biz_id.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 3.2.15 on 2023-09-06 07:35 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('log_clustering', '0022_auto_20230831_1658'), - ] - - operations = [ - migrations.AddField( - model_name='clusteringconfig', - name='related_space_pre_bk_biz_id', - field=models.IntegerField(blank=True, null=True, verbose_name='关联空间业务id之前的业务id'), - ), - ] diff --git a/bklog/apps/log_clustering/models.py b/bklog/apps/log_clustering/models.py index 8fe8df52a..7abf612d4 100644 --- a/bklog/apps/log_clustering/models.py +++ b/bklog/apps/log_clustering/models.py @@ -85,7 +85,6 @@ class ClusteringConfig(SoftDeleteModel): clustering_fields = models.CharField(_("聚合字段"), max_length=128) filter_rules = models.JSONField(_("过滤规则"), null=True, blank=True) bk_biz_id = models.IntegerField(_("业务id")) - related_space_pre_bk_biz_id = models.IntegerField(_("关联空间业务id之前的业务id"), null=True, blank=True) pre_treat_flow = models.JSONField(_("预处理flow配置"), null=True, blank=True) new_cls_pattern_rt = models.CharField(_("新类结果表id"), max_length=255, default="", null=True, blank=True) new_cls_index_set_id = models.IntegerField(_("新聚类类索引集id"), null=True, blank=True) diff --git a/bklog/apps/log_clustering/tasks/msg.py b/bklog/apps/log_clustering/tasks/msg.py index 7cccf9162..8a76ad739 100644 --- a/bklog/apps/log_clustering/tasks/msg.py +++ b/bklog/apps/log_clustering/tasks/msg.py @@ -52,9 +52,7 @@ def send(index_set_id): msg = _("[待审批] 有新聚类创建,请关注!索引集id: {}, 索引集名称: {}, 业务id: {}, 业务名称: {}, 创建者: {}, 过去一天的数据量doc_count={}").format( index_set_id, log_index_set.index_set_name, - clustering_config.bk_biz_id - if not clustering_config.related_space_pre_bk_biz_id - else clustering_config.related_space_pre_bk_biz_id, + clustering_config.bk_biz_id, space.space_name, clustering_config.created_by, doc_count, @@ -76,9 +74,7 @@ def send(index_set_id): ).format( index_set_id, log_index_set.index_set_name, - clustering_config.bk_biz_id - if not clustering_config.related_space_pre_bk_biz_id - else clustering_config.related_space_pre_bk_biz_id, + clustering_config.bk_biz_id, space.space_name, clustering_config.created_by, doc_count, diff --git a/bklog/apps/log_clustering/tasks/sync_pattern.py b/bklog/apps/log_clustering/tasks/sync_pattern.py index c3dc9194b..b8ce65ed5 100644 --- a/bklog/apps/log_clustering/tasks/sync_pattern.py +++ b/bklog/apps/log_clustering/tasks/sync_pattern.py @@ -40,7 +40,7 @@ from celery.task import periodic_task, task -@periodic_task(run_every=crontab(minute="*/10")) +@periodic_task(run_every=crontab(hour="*/1")) def sync_pattern(): model_ids = AiopsModel.objects.all().values_list("model_id", flat=True) @@ -55,6 +55,7 @@ def sync_pattern(): @task(ignore_result=True) def sync(model_id=None, model_output_rt=None): + if model_id: try: release_id = AiopsModelHandler().get_latest_released_id(model_id=model_id) @@ -125,6 +126,7 @@ def get_pattern(content) -> list: origin_log = sensitive_pattern[ORIGIN_LOG_INDEX][0] pattern_str = "" for pattern in sensitive_pattern[PATTERN_INDEX]: + if hasattr(pattern, "name"): value = pattern.value name = f"#{pattern.name}#" diff --git a/bklog/apps/log_clustering/utils/monitor.py b/bklog/apps/log_clustering/utils/monitor.py index 0f5a77e2a..c94cc535c 100644 --- a/bklog/apps/log_clustering/utils/monitor.py +++ b/bklog/apps/log_clustering/utils/monitor.py @@ -19,18 +19,16 @@ We undertake not to change the open source license (MIT license) applicable to the current version of the project delivered to anyone in the future. """ +from django.utils.translation import ugettext_lazy as _ # noqa + from apps.api import MonitorApi from apps.feature_toggle.handlers.toggle import FeatureToggleObject from apps.feature_toggle.plugins.constants import BKDATA_CLUSTERING_TOGGLE -from apps.log_clustering.constants import ( - DEFAULT_NOTICE_WAY, - DEFAULT_NOTIFY_RECEIVER_TYPE, -) +from apps.log_clustering.constants import DEFAULT_NOTIFY_RECEIVER_TYPE, DEFAULT_NOTICE_WAY from apps.log_clustering.exceptions import ClusteringClosedException from apps.log_clustering.models import NoticeGroup from apps.log_databus.constants import ADMIN_REQUEST_USER, EMPTY_REQUEST_USER from apps.log_search.models import LogIndexSet -from django.utils.translation import ugettext_lazy as _ # noqa class MonitorUtils(object): @@ -60,14 +58,12 @@ def get_or_create_notice_group(cls, log_index_set_id, bk_biz_id): notice_receiver = cls.generate_notice_receiver(receivers=maintainers, notice_tye=DEFAULT_NOTIFY_RECEIVER_TYPE) group = cls.save_notice_group( bk_biz_id=bk_biz_id, - name=_("{}_{}运维人员").format(log_index_set_id, log_index_set.index_set_name), + name=_("{}_{}运维人员").format(bk_biz_id, log_index_set.index_set_name), message="", notice_receiver=notice_receiver, notice_way=DEFAULT_NOTICE_WAY, ) - NoticeGroup.objects.get_or_create( - index_set_id=log_index_set_id, notice_group_id=group["id"], bk_biz_id=bk_biz_id - ) + NoticeGroup.objects.create(index_set_id=log_index_set_id, notice_group_id=group["id"], bk_biz_id=bk_biz_id) return group["id"] @classmethod diff --git a/bklog/apps/log_clustering/views/clustering_config_views.py b/bklog/apps/log_clustering/views/clustering_config_views.py index a6ae44af2..4ddf3596a 100644 --- a/bklog/apps/log_clustering/views/clustering_config_views.py +++ b/bklog/apps/log_clustering/views/clustering_config_views.py @@ -118,15 +118,6 @@ def fail_pipeline(self, request, *args, **kwargs): action_result = task_service.forced_fail(request.query_params.get("node_id", "")) return Response({"result": action_result.result, "message": action_result.message}) - @detail_route(methods=["GET"], url_path="create_new_cls_strategy") - def create_clustering_new_cls_strategy(self, request, *args, index_set_id=None, **kwargs): - from apps.log_clustering.handlers.clustering_monitor import ( - ClusteringMonitorHandler, - ) - - strategy_id = ClusteringMonitorHandler(index_set_id=index_set_id).create_clustering_new_cls_strategy() - return Response({"strategy_id": strategy_id}) - @detail_route(methods=["POST"]) def create_or_update(self, request, *args, **kwargs): """ diff --git a/bklog/apps/log_clustering/views/clustering_monitor_views.py b/bklog/apps/log_clustering/views/clustering_monitor_views.py index f21844ded..7da85cf61 100644 --- a/bklog/apps/log_clustering/views/clustering_monitor_views.py +++ b/bklog/apps/log_clustering/views/clustering_monitor_views.py @@ -19,16 +19,14 @@ We undertake not to change the open source license (MIT license) applicable to the current version of the project delivered to anyone in the future. """ +from rest_framework.response import Response + from apps.generic import APIViewSet from apps.log_clustering.constants import StrategiesType from apps.log_clustering.handlers.clustering_monitor import ClusteringMonitorHandler from apps.log_clustering.models import SignatureStrategySettings -from apps.log_clustering.serializers import ( - UpdateNewClsStrategySerializer, - UpdateStrategiesSerializer, -) +from apps.log_clustering.serializers import UpdateStrategiesSerializer, UpdateNewClsStrategySerializer from apps.utils.drf import detail_route -from rest_framework.response import Response class ClusteringMonitorViewSet(APIViewSet): @@ -92,7 +90,7 @@ def update_strategies(self, request, *args, index_set_id=None, **kwargs): """ params = self.params_valid(UpdateStrategiesSerializer) return Response( - ClusteringMonitorHandler(index_set_id=index_set_id).update_strategies( + ClusteringMonitorHandler(index_set_id=index_set_id, bk_biz_id=params["bk_biz_id"]).update_strategies( pattern_level=params["pattern_level"], actions=params["actions"] ) ) @@ -154,7 +152,7 @@ def update_new_cls_strategy(self, request, *args, index_set_id=None, **kwargs): """ params = self.params_valid(UpdateNewClsStrategySerializer) return Response( - ClusteringMonitorHandler(index_set_id=index_set_id).update_new_cls_strategy( + ClusteringMonitorHandler(index_set_id=index_set_id, bk_biz_id=params["bk_biz_id"]).update_new_cls_strategy( action=params["action"], strategy_id=params.get("strategy_id") ) ) diff --git a/bklog/apps/log_commons/admin.py b/bklog/apps/log_commons/admin.py deleted file mode 100644 index 32a1468a4..000000000 --- a/bklog/apps/log_commons/admin.py +++ /dev/null @@ -1,17 +0,0 @@ -from apps.log_commons.models import ApiAuthToken -from apps.utils.admin import AppModelAdmin -from django.contrib import admin - - -@admin.register(ApiAuthToken) -class ApiAuthTokenAdmin(AppModelAdmin): - list_display = [ - "id", - "type", - "token", - "space_uid", - "created_at", - "created_by", - "expire_time", - ] - search_fields = ["id", "space_uid", "type", "created_by"] diff --git a/bklog/apps/log_commons/migrations/0001_initial.py b/bklog/apps/log_commons/migrations/0001_initial.py deleted file mode 100644 index b78cd753b..000000000 --- a/bklog/apps/log_commons/migrations/0001_initial.py +++ /dev/null @@ -1,35 +0,0 @@ -# Generated by Django 3.2.15 on 2023-09-18 06:21 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - initial = True - - dependencies = [] - - operations = [ - migrations.CreateModel( - name='ApiAuthToken', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('created_at', models.DateTimeField(auto_now_add=True, db_index=True, verbose_name='创建时间')), - ('created_by', models.CharField(default='', max_length=32, verbose_name='创建者')), - ('updated_at', models.DateTimeField(auto_now=True, db_index=True, null=True, verbose_name='更新时间')), - ('updated_by', models.CharField(blank=True, default='', max_length=32, verbose_name='修改者')), - ( - 'space_uid', - models.CharField(blank=True, db_index=True, default='', max_length=256, verbose_name='空间唯一标识'), - ), - ('token', models.CharField(default='JFsMANBMj0xZSZV8', max_length=32, verbose_name='鉴权令牌')), - ('type', models.CharField(choices=[('Grafana', 'Grafana')], max_length=32, verbose_name='鉴权类型')), - ('params', models.JSONField(default=dict, verbose_name='鉴权参数')), - ('expire_time', models.DateTimeField(default=None, null=True, verbose_name='过期时间')), - ], - options={ - 'verbose_name': 'API鉴权令牌', - 'verbose_name_plural': 'API鉴权令牌', - }, - ), - ] diff --git a/bklog/apps/log_commons/migrations/0002_alter_apiauthtoken_token.py b/bklog/apps/log_commons/migrations/0002_alter_apiauthtoken_token.py deleted file mode 100644 index f4c80026d..000000000 --- a/bklog/apps/log_commons/migrations/0002_alter_apiauthtoken_token.py +++ /dev/null @@ -1,21 +0,0 @@ -# Generated by Django 3.2.15 on 2023-10-12 07:29 - -import apps.log_commons.models -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('log_commons', '0001_initial'), - ] - - operations = [ - migrations.AlterField( - model_name='apiauthtoken', - name='token', - field=models.CharField( - default=apps.log_commons.models.get_random_string_16, max_length=32, verbose_name='鉴权令牌' - ), - ), - ] diff --git a/bklog/apps/log_commons/migrations/__init__.py b/bklog/apps/log_commons/migrations/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/bklog/apps/log_commons/models.py b/bklog/apps/log_commons/models.py deleted file mode 100644 index 177a01df4..000000000 --- a/bklog/apps/log_commons/models.py +++ /dev/null @@ -1,39 +0,0 @@ -from datetime import datetime - -import pytz -from apps.constants import ApiTokenAuthType -from apps.models import OperateRecordModel -from django.db import models -from django.utils.crypto import get_random_string -from django.utils.translation import ugettext_lazy as _ - - -def get_random_string_16() -> str: - """ - 获取16位随机字符串 - :return: - """ - return get_random_string(length=16) - - -class ApiAuthToken(OperateRecordModel): - """API鉴权令牌""" - - space_uid = models.CharField(_("空间唯一标识"), blank=True, default="", max_length=256, db_index=True) - token = models.CharField(_("鉴权令牌"), max_length=32, default=get_random_string_16) - type = models.CharField(_("鉴权类型"), max_length=32, choices=ApiTokenAuthType.get_choices()) - params = models.JSONField(_("鉴权参数"), default=dict) - expire_time = models.DateTimeField(_("过期时间"), null=True, default=None) - - class Meta: - verbose_name = _("API鉴权令牌") - verbose_name_plural = _("API鉴权令牌") - - def is_expired(self): - """ - 判断token是否过期 - """ - # 未设置过期时间,不判断是否过期 - if not self.expire_time: - return False - return self.expire_time < datetime.now(tz=pytz.utc) diff --git a/bklog/apps/log_databus/constants.py b/bklog/apps/log_databus/constants.py index 05d7ff6d5..292e8b1ce 100644 --- a/bklog/apps/log_databus/constants.py +++ b/bklog/apps/log_databus/constants.py @@ -519,6 +519,8 @@ class InfoTypeEnum(ChoicesEnum): DEFAULT_KAFKA_SECURITY_PROTOCOL = "PLAINTEXT" DEFAULT_KAFKA_SASL_MECHANISM = "PLAIN" +TABLE_TRANSFER = "pushgateway_transfer_metircs.base" + # 调用GSE的'接收端配置接口'以及'路由接口'时使用 DEFAULT_GSE_API_PLAT_NAME = "bkmonitor" # GSE分配给监控的平台名称,不随APP_CODE变更,请不要修改 diff --git a/bklog/apps/log_databus/handlers/check_collector/checker/es_checker.py b/bklog/apps/log_databus/handlers/check_collector/checker/es_checker.py index 18ee318d4..4629b9481 100644 --- a/bklog/apps/log_databus/handlers/check_collector/checker/es_checker.py +++ b/bklog/apps/log_databus/handlers/check_collector/checker/es_checker.py @@ -20,8 +20,6 @@ the project delivered to anyone in the future. """ import datetime -from datetime import timedelta -from typing import Any, Dict, List from apps.api import TransferApi from apps.log_databus.constants import ( @@ -32,44 +30,25 @@ from apps.log_databus.handlers.check_collector.checker.base_checker import Checker from apps.log_databus.handlers.storage import StorageHandler from apps.log_esquery.utils.es_client import get_es_client -from apps.log_esquery.utils.es_route import EsRoute from apps.log_measure.exceptions import EsConnectFailException -from apps.log_search.models import Scenario +from apps.utils.time_handler import strftime_local from django.utils.translation import ugettext as _ -def get_next_date(date_str: str, interval: int) -> str: - """ - 获取索引最新的分片, 根据索引分片的时间戳, 如果下一个分片是未来时间, 则返回当前索引分片的时间 - :param date_str: 日期字符串, 格式: %Y%m%d, 例如: 20210101 - :param interval: 时间间隔, 例如: 1 - :return: 下一时间间隔的日期, 例如: 20210102 - """ - date_format = "%Y%m%d" - date_obj = datetime.datetime.strptime(date_str, date_format) - now = datetime.datetime.now() - next_date_obj = date_obj + timedelta(days=interval) - if next_date_obj > now: - return date_str - return next_date_obj.strftime(date_format) - - class EsChecker(Checker): CHECKER_NAME = "es checker" - def __init__(self, table_id: str, bk_data_name: str, *args, **kwargs): + def __init__(self, table_id, bk_data_name, *args, **kwargs): super().__init__(*args, **kwargs) self.table_id = table_id self.bk_data_name = bk_data_name self.result_table = {} self.cluster_config = {} self.cluster_id = 0 - self.retention: int = 0 # 物理索引列表 self.indices = [] self.es_client = None - self.index_pattern = table_id.replace(".", "_") - self.latest_date: str = "" + self.index_pattern = "" def pre_run(self): try: @@ -79,7 +58,6 @@ def pre_run(self): self.result_table = result.get(self.table_id, {}) self.cluster_config = self.result_table.get("cluster_config", {}) self.cluster_id = self.cluster_config.get("cluster_id", 0) - self.retention = self.result_table.get("storage_config", {}).get("retention", 0) except Exception as e: self.append_error_info(_("[TransferApi] [get_result_table_storage] 失败, err: {e}").format(e=e)) @@ -93,30 +71,44 @@ def get_indices(self): """ 获取物理索引的名称 """ - # 查该采集项的物理索引而不是该采集项所在集群的所有物理索引 - result: List[Dict[str, Any]] = EsRoute(scenario_id=Scenario.LOG, indices=self.table_id).cat_indices() - self.indices = StorageHandler.sort_indices(result) - + indices = None + # 可能会请求失败, 重试几次 + for i in range(RETRY_TIMES): + try: + indices = StorageHandler(self.cluster_id).indices() + if indices is not None: + break + except Exception as e: # disable + self.append_warning_info(_("获取物理索引失败第{cnt}次, err: {e}").format(cnt=i + 1, e=e)) + if not indices: + self.append_error_info(_("获取物理索引为空")) + return + for i in indices: + if i["index_pattern"] == self.bk_data_name or i["index_pattern"] == self.table_id.replace(".", "_"): + self.index_pattern = i["index_pattern"] + self.indices = i["indices"] + break if not self.indices: self.append_error_info(_("获取物理索引为空")) return + for i in self.indices: self.append_normal_info(_("物理索引: {}, 健康: {}, 状态: {}").format(i["index"], i["health"], i["status"])) hot_node_count = 0 + for node in StorageHandler(self.cluster_id).cluster_nodes(): if node.get("tag") == "hot": hot_node_count += 1 latest_indices = self.indices[0] - self.latest_date = latest_indices["index"].split("_")[-2] query_body = {"size": 1} query_data = self.es_client.search(index=latest_indices["index"], body=query_body) latest_data = query_data.get("hits", {}).get("hits", []) latest_data = latest_data[0] if latest_data else None self.append_normal_info(_("最近物理索引:{} 最新一条数据为:{}").format(latest_indices["index"], latest_data)) - if int(latest_indices["pri"]) < hot_node_count: + if latest_indices["pri"] < hot_node_count: self.append_warning_info( _("最近物理索引分片数量小于热节点分片数量, 可能会造成性能问题, 当前索引分片数{}, 热节点分片数{}").format(latest_indices["pri"], hot_node_count) ) @@ -156,15 +148,18 @@ def get_index_alias(self): return index_alias_info_dict = self.es_client.indices.get_alias(index=[i["index"] for i in self.indices]) - now_datetime = get_next_date(date_str=self.latest_date, interval=self.retention) + now_datetime = strftime_local(datetime.datetime.now(), "%Y%m%d") now_read_index_alias = "{}_{}{}".format(self.index_pattern, now_datetime, INDEX_READ_SUFFIX) now_write_index_alias = "{}{}_{}".format(INDEX_WRITE_PREFIX, now_datetime, self.index_pattern) + now_read_index_alias_exist = False + now_write_index_alias_exist = False + for i in self.indices: # index 物理索引名 physical_index = i["index"] - aliases = index_alias_info_dict.get(physical_index, {}).get("aliases", {}) + aliases = index_alias_info_dict.get(physical_index) if not aliases: self.append_error_info(_("物理索引: {physical_index} 不存在alias别名").format(physical_index=physical_index)) continue @@ -175,7 +170,12 @@ def get_index_alias(self): ) return - if now_read_index_alias in aliases and now_write_index_alias in aliases: + if aliases.get(now_read_index_alias): + now_read_index_alias_exist = True + if aliases.get(now_write_index_alias): + now_write_index_alias_exist = True + + if now_read_index_alias_exist and now_write_index_alias_exist: self.append_normal_info( _("索引: [{index_pattern}] 当天[{now_datetime}]读写别名已成功创建").format( index_pattern=self.index_pattern, now_datetime=now_datetime diff --git a/bklog/apps/log_databus/handlers/check_collector/checker/kafka_checker.py b/bklog/apps/log_databus/handlers/check_collector/checker/kafka_checker.py index 968b96333..85bf736ef 100644 --- a/bklog/apps/log_databus/handlers/check_collector/checker/kafka_checker.py +++ b/bklog/apps/log_databus/handlers/check_collector/checker/kafka_checker.py @@ -33,8 +33,7 @@ from apps.log_databus.handlers.check_collector.checker.base_checker import Checker from django.conf import settings from django.utils.translation import ugettext as _ -from kafka import KafkaConsumer -from kafka.structs import TopicPartition +from kafka import KafkaConsumer, TopicPartition logger = logging.getLogger() @@ -106,7 +105,7 @@ def get_kafka_test_group_latest_log(self, kafka_info: dict): if len(log_content) == message_count: self.latest_log.extend(log_content) consumer.close() - break + return if _msg.offset == end_offset - 1: break @@ -120,8 +119,4 @@ def get_kafka_test_group_latest_log(self, kafka_info: dict): if not log_content: self.append_error_info(_("{host}:{port}, topic: {topic}, 无数据").format(host=host, port=port, topic=topic)) else: - self.append_normal_info( - _("{host}:{port}, topic: {topic}, 有数据, 数据采样: {example}").format( - host=host, port=port, topic=topic, example=json.dumps(log_content[0]) - ) - ) + self.append_normal_info(_("{host}:{port}, topic: {topic}, 有数据").format(host=host, port=port, topic=topic)) diff --git a/bklog/apps/log_databus/handlers/check_collector/checker/metadata_checker.py b/bklog/apps/log_databus/handlers/check_collector/checker/metadata_checker.py index 8cf82a40b..38f445ab9 100644 --- a/bklog/apps/log_databus/handlers/check_collector/checker/metadata_checker.py +++ b/bklog/apps/log_databus/handlers/check_collector/checker/metadata_checker.py @@ -22,12 +22,13 @@ import logging import time +from django.conf import settings + from apps.log_databus.constants import META_DATA_CRON_REFRESH_TASK_NAME_LIST from apps.log_databus.handlers.check_collector.checker.base_checker import Checker from apps.log_search.constants import TimeEnum from bk_monitor.api.client import Client from config.domains import MONITOR_APIGATEWAY_ROOT -from django.conf import settings logger = logging.getLogger() @@ -56,6 +57,9 @@ def check_cron_refresh_task(self): params = self._build_unify_query_params(task_name) try: result = self.bk_monitor_client.unify_query(data=params) + if result.get("result"): + self.append_error_info(f"task name: {task_name} have error : {result.get('message')}") + continue series = result.get("series", []) if not series: self.append_error_info(f"task name: {task_name} not have execute history") @@ -66,8 +70,6 @@ def check_cron_refresh_task(self): continue if len(datapoints) != int(TimeEnum.ONE_HOUR_SECOND.value / TimeEnum.ONE_MINUTE_SECOND.value): self.append_error_info(f"task name: {task_name} execute have omissions") - continue - self.append_normal_info(f"task name: {task_name} execute success") except Exception as e: self.append_error_info(str(e)) continue diff --git a/bklog/apps/log_databus/handlers/check_collector/checker/transfer_checker.py b/bklog/apps/log_databus/handlers/check_collector/checker/transfer_checker.py index e814cc369..8c3d96d70 100644 --- a/bklog/apps/log_databus/handlers/check_collector/checker/transfer_checker.py +++ b/bklog/apps/log_databus/handlers/check_collector/checker/transfer_checker.py @@ -26,7 +26,7 @@ import requests from apps.feature_toggle.handlers.toggle import FeatureToggleObject from apps.feature_toggle.plugins.constants import CHECK_COLLECTOR_CUSTOM_CONFIG -from apps.log_databus.constants import TRANSFER_METRICS, EtlConfig +from apps.log_databus.constants import TABLE_TRANSFER, TRANSFER_METRICS, EtlConfig from apps.log_databus.handlers.check_collector.checker.base_checker import Checker from apps.log_databus.handlers.etl_storage import EtlStorage from apps.log_databus.models import CleanStash @@ -83,38 +83,7 @@ def get_transfer_metric(self, metric_name: str): app_code = settings.APP_CODE app_secret = settings.SECRET_KEY monitor_host = MONITOR_APIGATEWAY_ROOT - bk_biz_id = settings.BKMONITOR_BK_BIZ_ID - - end_time = int(time.time()) - start_time = end_time - TimeEnum.FIVE_MINUTE_SECOND.value - params = { - "down_sample_range": "1m", - "step": "auto", - "start_time": start_time, - "end_time": end_time, - "expression": "a", - "alias": "a", - "name": f"SUM({metric_name})", - "query_configs": [ - { - "data_source_label": "custom", - "data_type_label": "time_series", - "data_label": "bk_monitor", - "metrics": [{"field": metric_name, "method": "SUM", "alias": "a"}], - "table": settings.TABLE_TRANSFER, - "group_by": [], - "where": [{"key": "data_id", "method": "eq", "value": [self.bk_data_id]}], - "interval": TimeEnum.ONE_MINUTE_SECOND.value, - "interval_unit": "s", - "time_field": "time", - "filter_dict": {}, - "functions": [], - } - ], - "target": [], - "bk_biz_id": bk_biz_id, - } - # 优先使用特性开关中配置 + if FeatureToggleObject.switch(CHECK_COLLECTOR_CUSTOM_CONFIG): transfer_custom_config = FeatureToggleObject.toggle(CHECK_COLLECTOR_CUSTOM_CONFIG).feature_config.get( "transfer_custom_config", {} @@ -124,29 +93,69 @@ def get_transfer_metric(self, metric_name: str): monitor_host = transfer_custom_config.get("monitor_host") bk_token = transfer_custom_config.get("bk_token") bk_biz_id = transfer_custom_config.get("bk_biz_id") - params["bk_biz_id"] = bk_biz_id + + end_time = int(time.time()) + start_time = end_time - TimeEnum.FIVE_MINUTE_SECOND.value + params_temp = { + "down_sample_range": "1m", + "step": "auto", + "start_time": start_time, + "end_time": end_time, + "expression": "a", + "display": True, + "query_configs": [ + { + "data_source_label": "bk_monitor", + "data_type_label": "time_series", + "metrics": [{"field": metric_name, "method": "SUM", "alias": "a"}], + "table": "", + "group_by": [], + "display": True, + "where": [{"key": "id", "method": "eq", "value": [self.bk_data_id]}], + "interval": TimeEnum.ONE_MINUTE_SECOND.value, + "interval_unit": "s", + "time_field": "time", + "filter_dict": {}, + "functions": [], + } + ], + "target": [], + "bk_biz_id": bk_biz_id, + } + headers = {"Content-Type": "application/json", "Cookie": f"bk_token={bk_token}"} + try: response = requests.post( - url=monitor_host + "time_series/unify_query/", data=json.dumps(params), headers=headers + url=monitor_host + "time_series/unify_query/", data=json.dumps(params_temp), headers=headers ) + result = response.json() + + if not result["result"]: + message = _("[请求监控接口] [unify query] 获取 {metric_name} 数据失败, err: {err}").format( + metric_name=metric_name, err=result["message"] + ) + logger.error(message) + self.append_warning_info(message) + return + + series = result["data"].get("series", []) + if not series: + message = _("[请求监控接口] [unify query] 获取 {metric_name} 数据为空").format(metric_name=metric_name) + self.append_warning_info(message) + return + + value, __ = series[0]["datapoints"][-1] + self.append_normal_info( + _("[请求监控接口] [unify query] {metric_name}: {value}").format(metric_name=metric_name, value=value) + ) except Exception as e: - message = _("[请求监控接口] [unify_query] 获取 {metric_name} 数据失败, err: {e}").format( + message = _("[请求监控接口] [unify query] 获取 {metric_name} 数据失败, err: {e}").format( metric_name=metric_name, e=e ) logger.error(message) self.append_warning_info(message) - return - # 直接请求的结果带有RESTFul的序列化格式 {result, code, message} - if not result["result"]: - message = _("[请求监控接口] [unify_query] 获取 {metric_name} 数据失败, err: {err}").format( - metric_name=metric_name, err=result["message"] - ) - logger.error(message) - self.append_warning_info(message) - return - series = result["data"].get("series", []) else: bk_monitor_client = Client( bk_app_code=app_code, @@ -155,27 +164,27 @@ def get_transfer_metric(self, metric_name: str): report_host=f"{settings.BKMONITOR_CUSTOM_PROXY_IP}/", bk_username="admin", ) + params = { + "sql": f"select sum({metric_name}) as {metric_name} from {TABLE_TRANSFER} \ + where time >= '1m' and id == {self.bk_data_id}" + } try: - result = bk_monitor_client.unify_query(data=params) + result = bk_monitor_client.get_ts_data(data=params) + for ts_data in result["list"]: + value = ts_data[metric_name] + if ts_data["id"] == self.bk_data_id: + self.append_normal_info( + _("[请求监控接口] [get_ts_data] {metric_name}: {value}").format( + metric_name=metric_name, value=value + ) + ) + return + message = _("[请求监控接口] [get_ts_data] 获取 {metric_name} 数据为空").format(metric_name=metric_name) + self.append_warning_info(message) + except Exception as e: - message = _("[请求监控接口] [unify_query] 获取 {metric_name} 数据失败, err: {e}").format( + message = _("[请求监控接口] [get_ts_data] 获取 {metric_name} 数据失败, err: {e}").format( metric_name=metric_name, e=e ) logger.error(message) self.append_warning_info(message) - return - series = result.get("series", []) - # 统一处理series - if not series or not series[0]["datapoints"]: - message = _("[请求监控接口] [unify_query] 获取 {metric_name} 数据为空").format(metric_name=metric_name) - self.append_warning_info(message) - return - - datapoints = [i[0] for i in series[0]["datapoints"] if i[0]] - if not datapoints: - message = _("[请求监控接口] [unify_query] 获取 {metric_name} 数据为空").format(metric_name=metric_name) - self.append_warning_info(message) - return - self.append_normal_info( - _("[请求监控接口] [unify_query] {metric_name}: {value}").format(metric_name=metric_name, value=max(datapoints)) - ) diff --git a/bklog/apps/log_databus/handlers/check_collector/handler.py b/bklog/apps/log_databus/handlers/check_collector/handler.py index 05fcc4853..7ed2805d6 100644 --- a/bklog/apps/log_databus/handlers/check_collector/handler.py +++ b/bklog/apps/log_databus/handlers/check_collector/handler.py @@ -63,8 +63,8 @@ def __init__( self.bk_data_name = None self.bk_data_id = None self.bk_biz_id = None - self.target_server: Dict[str, Any] = {} - self.collector_config: Optional[CollectorConfig] + self.target_server: Optional[Dict[str, Any], None] = None + self.collector_config: Optional[CollectorConfig, None] = None self.gse_path = gse_path or os.environ.get("GSE_ROOT_PATH", GSE_PATH) self.ipc_path = ipc_path or os.environ.get("GSE_IPC_PATH", IPC_PATH) @@ -167,9 +167,7 @@ def execute_check(self): ) transfer_checker.run() - es_checker = EsChecker( - table_id=self.table_id, bk_data_name=self.bk_data_name, check_collector_record=self.record - ) + es_checker = EsChecker(self.table_id, self.bk_data_name, check_collector_record=self.record) es_checker.run() meta_data_checker = MetaDataChecker(check_collector_record=self.record) diff --git a/bklog/apps/log_databus/handlers/collector.py b/bklog/apps/log_databus/handlers/collector.py index 9e3c830bf..42fbc084f 100644 --- a/bklog/apps/log_databus/handlers/collector.py +++ b/bklog/apps/log_databus/handlers/collector.py @@ -129,12 +129,14 @@ from apps.log_databus.serializers import ContainerCollectorYamlSerializer from apps.log_databus.tasks.bkdata import async_create_bkdata_data_id from apps.log_esquery.utils.es_route import EsRoute -from apps.log_measure.events import NOTIFY_EVENT from apps.log_search.constants import ( CMDB_HOST_SEARCH_FIELDS, + DEFAULT_TIME_FIELD, CollectorScenarioEnum, CustomTypeEnum, GlobalCategoriesEnum, + TimeFieldTypeEnum, + TimeFieldUnitEnum, ) from apps.log_search.handlers.biz import BizHandler from apps.log_search.handlers.index_set import IndexSetHandler @@ -886,7 +888,6 @@ def update_or_create(self, params: dict) -> dict: if is_create: self._authorization_collector(self.data) - self.send_create_notify(self.data) try: collector_scenario = CollectorScenario.get_instance(self.data.collector_scenario_id) self._update_or_create_subscription( @@ -2370,7 +2371,7 @@ def custom_create( # create custom Log Group if custom_type == CustomTypeEnum.OTLP_LOG.value: self.create_custom_log_group(self.data) - self.send_create_notify(self.data) + return { "collector_config_id": self.data.collector_config_id, "index_set_id": self.data.index_set_id, @@ -2792,7 +2793,7 @@ def list_bcs_collector_without_rule(cls, bcs_cluster_id: str, bk_biz_id: int): """ 该函数是为了获取容器采集项, 但是不是通过BCS规则创建的采集项 """ - # 通用函数, 获取非BCS创建的容器采集项, 以及对应容器采集的map + # 过滤掉BCS创建的采集项 queryset = CollectorConfig.objects.filter( rule_id=0, environment=Environment.CONTAINER, @@ -3036,7 +3037,7 @@ def create_bcs_container_config(self, data, bk_app_code="bk_bcs"): conf=conf, async_bkdata=False, ) - new_path_cls_index_set, new_std_cls_index_set = self.get_or_create_bcs_project_index_set( + new_path_cls_index_set, new_std_cls_index_set = self.create_or_update_bcs_project_index_set( bcs_project_id=data["project_id"], bcs_cluster_id=data["bcs_cluster_id"], bk_biz_id=data["bk_biz_id"], @@ -3103,9 +3104,6 @@ def create_bcs_container_config(self, data, bk_app_code="bk_bcs"): ) ContainerCollectorConfig.objects.bulk_create(container_collector_config_list) - - self.send_create_notify(path_collector_config) - return { "rule_id": bcs_rule.id, "rule_file_index_set_id": path_collector_config.index_set_id, @@ -3152,28 +3150,81 @@ def sync_bcs_container_bkdata_id(data: Dict[str, Any]): if data["rule_std_collector_config_id"]: async_create_bkdata_data_id.delay(data["rule_std_collector_config_id"]) + def create_or_update_bcs_project_index_set(self, bcs_project_id, bcs_cluster_id, bk_biz_id, storage_cluster_id): + space_uid = bk_biz_id_to_space_uid(bk_biz_id) + lower_cluster_id = self.convert_lower_cluster_id(bcs_cluster_id) + + src_index_list = LogIndexSet.objects.filter(space_uid=space_uid, bcs_project_id=bcs_project_id) + + path_index_set_name = f"{bcs_cluster_id}_path" + path_index_set = src_index_list.filter(index_set_name=path_index_set_name).first() + if not path_index_set: + path_index_set = IndexSetHandler.create( + index_set_name=path_index_set_name, + space_uid=space_uid, + storage_cluster_id=storage_cluster_id, + scenario_id=Scenario.ES, + view_roles=None, + indexes=[ + { + "bk_biz_id": bk_biz_id, + "result_table_id": build_result_table_id(bk_biz_id, f"{lower_cluster_id}_*_path_*").replace( + ".", "_" + ), + "result_table_name": path_index_set_name, + "time_field": DEFAULT_TIME_FIELD, + } + ], + username="admin", + category_id="kubernetes", + bcs_project_id=bcs_project_id, + is_editable=False, + time_field=DEFAULT_TIME_FIELD, + time_field_type=TimeFieldTypeEnum.DATE.value, + time_field_unit=TimeFieldUnitEnum.MILLISECOND.value, + ) + + std_index_set_name = f"{bcs_cluster_id}_std" + std_index_set = src_index_list.filter(index_set_name=std_index_set_name).first() + if not std_index_set: + std_index_set = IndexSetHandler.create( + index_set_name=std_index_set_name, + space_uid=space_uid, + storage_cluster_id=storage_cluster_id, + scenario_id=Scenario.ES, + view_roles=None, + indexes=[ + { + "bk_biz_id": bk_biz_id, + "result_table_id": build_result_table_id(bk_biz_id, f"{lower_cluster_id}_*_std_*").replace( + ".", "_" + ), + "result_table_name": std_index_set_name, + "time_field": DEFAULT_TIME_FIELD, + } + ], + username="admin", + category_id="kubernetes", + bcs_project_id=bcs_project_id, + is_editable=False, + time_field=DEFAULT_TIME_FIELD, + time_field_type=TimeFieldTypeEnum.DATE.value, + time_field_unit=TimeFieldUnitEnum.MILLISECOND.value, + ) + + return path_index_set, std_index_set + @staticmethod - def get_or_create_bcs_project_index_set(bcs_cluster_id, bk_biz_id, storage_cluster_id, bcs_project_id=""): + def convert_lower_cluster_id(bcs_cluster_id: str): """ - 获取或创建BCS项目索引集 + 将集群ID转换为小写 + 例如: BCS-K8S-12345 -> bcs_k8s_12345 """ - path_index_set = IndexSetHandler.get_or_create_bcs_project_path_index_set( - bcs_cluster_id=bcs_cluster_id, - bk_biz_id=bk_biz_id, - storage_cluster_id=storage_cluster_id, - bcs_project_id=bcs_project_id, - ) - std_index_set = IndexSetHandler.get_or_create_bcs_project_std_index_set( - bcs_cluster_id=bcs_cluster_id, - bk_biz_id=bk_biz_id, - storage_cluster_id=storage_cluster_id, - bcs_project_id=bcs_project_id, - ) - return path_index_set, std_index_set + return bcs_cluster_id.lower().replace("-", "_") @classmethod def generate_collector_config_name(cls, bcs_cluster_id, collector_config_name, collector_config_name_en): - lower_cluster_id = convert_lower_cluster_id(bcs_cluster_id) + lower_cluster_id = cls.convert_lower_cluster_id(bcs_cluster_id) return { "bcs_path_collector": { "collector_config_name": f"{bcs_cluster_id}_{collector_config_name}_path", @@ -3735,11 +3786,9 @@ def get_labels(self, topo_type, bcs_cluster_id, namespace, name): @classmethod def generate_label(cls, obj_dict): - if not obj_dict or not obj_dict["items"]: + if not obj_dict["items"]: return [] obj_item, *_ = obj_dict["items"] - if not obj_item["metadata"]["labels"]: - return [] return [ {"key": label_key, "value": label_valus} for label_key, label_valus in obj_item["metadata"]["labels"].items() @@ -4078,7 +4127,6 @@ def fast_create(self, params: dict) -> dict: params["table_id"] = params["collector_config_name_en"] index_set_id = self.create_or_update_clean_config(False, params).get("index_set_id", 0) - self.send_create_notify(self.data) return { "collector_config_id": self.data.collector_config_id, "bk_data_id": self.data.bk_data_id, @@ -4337,26 +4385,6 @@ def container_dict_configs_to_yaml( return yaml.safe_dump_all(result) - @classmethod - def send_create_notify(cls, collector_config: CollectorConfig): - try: - space = Space.objects.get(bk_biz_id=collector_config.bk_biz_id) - space_uid = space.space_uid - space_name = space.space_name - except Space.DoesNotExist: - space_uid = collector_config.bk_biz_id - space_name = collector_config.bk_biz_id - content = _("有新采集项创建,请关注!采集项ID: {}, 采集项名称: {}, 空间ID: {}, 空间名称: {}, 创建者: {}, 来源: {}").format( - collector_config.collector_config_id, - collector_config.collector_config_name, - space_uid, - space_name, - collector_config.created_by, - collector_config.bk_app_code, - ) - - NOTIFY_EVENT(content=content, dimensions={"space_uid": space_uid, "msg_type": "create_collector_config"}) - def get_data_link_id(bk_biz_id: int, data_link_id: int = 0) -> int: """ @@ -4439,11 +4467,3 @@ def build_result_table_id(bk_biz_id: int, collector_config_name_en: str) -> str: f"{settings.TABLE_SPACE_PREFIX}_{-bk_biz_id}_{settings.TABLE_ID_PREFIX}.{collector_config_name_en}" ) return result_table_id - - -def convert_lower_cluster_id(bcs_cluster_id: str): - """ - 将集群ID转换为小写 - 例如: BCS-K8S-12345 -> bcs_k8s_12345 - """ - return bcs_cluster_id.lower().replace("-", "_") diff --git a/bklog/apps/log_databus/handlers/collector_plugin/base.py b/bklog/apps/log_databus/handlers/collector_plugin/base.py index 34027b9f3..976ab1a25 100644 --- a/bklog/apps/log_databus/handlers/collector_plugin/base.py +++ b/bklog/apps/log_databus/handlers/collector_plugin/base.py @@ -147,7 +147,6 @@ def _update_or_create(self, params: dict) -> bool: "allocation_min_days": params.get("allocation_min_days", 0), "storage_replies": params.get("storage_replies", 1), "storage_shards_nums": params.get("storage_shards_nums", 1), - "storage_shards_size": params.get("storage_shards_size", 10), "etl_config": params.get("etl_config"), "etl_params": params.get("etl_params", {}), "fields": params.get("fields", []), diff --git a/bklog/apps/log_databus/handlers/collector_scenario/redis_slowlog.py b/bklog/apps/log_databus/handlers/collector_scenario/redis_slowlog.py index c6933cf0e..28d555ecc 100644 --- a/bklog/apps/log_databus/handlers/collector_scenario/redis_slowlog.py +++ b/bklog/apps/log_databus/handlers/collector_scenario/redis_slowlog.py @@ -21,7 +21,7 @@ """ from apps.feature_toggle.handlers.toggle import FeatureToggleObject from apps.feature_toggle.plugins.constants import IS_AUTO_DEPLOY_PLUGIN -from apps.log_databus.constants import EtlConfig, LogPluginInfo +from apps.log_databus.constants import LogPluginInfo, EtlConfig from apps.log_databus.handlers.collector_scenario.base import CollectorScenario from apps.utils.log import logger from django.utils.translation import ugettext as _ @@ -119,14 +119,7 @@ def get_built_in_config(cls, es_version="5.X", etl_config=EtlConfig.BK_LOG_TEXT) """ return { "option": { - "es_unique_field_list": [ - "cloudId", - "serverIp", - "gseIndex", - "iterationIndex", - "bk_host_id", - "dtEventTimeStamp", - ], + "es_unique_field_list": ["cloudId", "serverIp", "gseIndex", "iterationIndex", "bk_host_id"], "separator_node_source": "", "separator_node_action": "", "separator_node_name": "", diff --git a/bklog/apps/log_databus/handlers/collector_scenario/row.py b/bklog/apps/log_databus/handlers/collector_scenario/row.py index 1b1a7d1fa..1abc7e038 100644 --- a/bklog/apps/log_databus/handlers/collector_scenario/row.py +++ b/bklog/apps/log_databus/handlers/collector_scenario/row.py @@ -21,7 +21,7 @@ """ from apps.feature_toggle.handlers.toggle import FeatureToggleObject from apps.feature_toggle.plugins.constants import IS_AUTO_DEPLOY_PLUGIN -from apps.log_databus.constants import EtlConfig, LogPluginInfo +from apps.log_databus.constants import LogPluginInfo, EtlConfig from apps.log_databus.handlers.collector_scenario.base import CollectorScenario from apps.log_databus.handlers.collector_scenario.utils import ( deal_collector_scenario_param, @@ -216,15 +216,7 @@ def get_built_in_config(cls, es_version="5.X", etl_config=EtlConfig.BK_LOG_TEXT) """ return { "option": { - "es_unique_field_list": [ - "cloudId", - "serverIp", - "path", - "gseIndex", - "iterationIndex", - "bk_host_id", - "dtEventTimeStamp", - ], + "es_unique_field_list": ["cloudId", "serverIp", "path", "gseIndex", "iterationIndex", "bk_host_id"], "separator_node_source": "", "separator_node_action": "", "separator_node_name": "", diff --git a/bklog/apps/log_databus/handlers/collector_scenario/section.py b/bklog/apps/log_databus/handlers/collector_scenario/section.py index 0ce64f2f1..562cf00ca 100644 --- a/bklog/apps/log_databus/handlers/collector_scenario/section.py +++ b/bklog/apps/log_databus/handlers/collector_scenario/section.py @@ -21,7 +21,7 @@ """ from apps.feature_toggle.handlers.toggle import FeatureToggleObject from apps.feature_toggle.plugins.constants import IS_AUTO_DEPLOY_PLUGIN -from apps.log_databus.constants import EtlConfig, LogPluginInfo +from apps.log_databus.constants import LogPluginInfo, EtlConfig from apps.log_databus.handlers.collector_scenario.base import CollectorScenario from apps.log_databus.handlers.collector_scenario.utils import ( deal_collector_scenario_param, @@ -225,15 +225,7 @@ def get_built_in_config(cls, es_version="5.X", etl_config=EtlConfig.BK_LOG_TEXT) """ return { "option": { - "es_unique_field_list": [ - "cloudId", - "serverIp", - "path", - "gseIndex", - "iterationIndex", - "bk_host_id", - "dtEventTimeStamp", - ], + "es_unique_field_list": ["cloudId", "serverIp", "path", "gseIndex", "iterationIndex", "bk_host_id"], "separator_node_source": "", "separator_node_action": "", "separator_node_name": "", diff --git a/bklog/apps/log_databus/handlers/collector_scenario/syslog.py b/bklog/apps/log_databus/handlers/collector_scenario/syslog.py index c9af69fe2..66d00549d 100644 --- a/bklog/apps/log_databus/handlers/collector_scenario/syslog.py +++ b/bklog/apps/log_databus/handlers/collector_scenario/syslog.py @@ -19,9 +19,10 @@ We undertake not to change the open source license (MIT license) applicable to the current version of the project delivered to anyone in the future. """ -from apps.log_databus.constants import EtlConfig, LogPluginInfo +from apps.log_databus.constants import LogPluginInfo, EtlConfig from apps.log_databus.handlers.collector_scenario import CollectorScenario from apps.log_databus.handlers.collector_scenario.utils import build_es_option_type + from django.utils.translation import ugettext as _ @@ -72,14 +73,7 @@ def get_built_in_config(cls, es_version="5.X", etl_config=EtlConfig.BK_LOG_TEXT) """ built_in_config = { "option": { - "es_unique_field_list": [ - "cloudId", - "serverIp", - "gseIndex", - "iterationIndex", - "bk_host_id", - "dtEventTimeStamp", - ], + "es_unique_field_list": ["cloudId", "serverIp", "gseIndex", "iterationIndex"], "separator_node_source": "", "separator_node_action": "", "separator_node_name": "", @@ -161,40 +155,39 @@ def get_built_in_config(cls, es_version="5.X", etl_config=EtlConfig.BK_LOG_TEXT) }, } if etl_config == EtlConfig.BK_LOG_TEXT: - built_in_config["fields"].extend( - [ - { - "field_name": "syslogSource", - "field_type": "object", - "tag": "dimension", - "alias_name": "log", - "description": "客户端信息", - "option": build_es_option_type("object", es_version), - }, - { - "field_name": "syslogLabel", - "field_type": "object", - "tag": "dimension", - "alias_name": "syslog", - "description": "严重程度", - "option": build_es_option_type("object", es_version), - }, - { - "field_name": "syslogEvent", - "field_type": "object", - "tag": "dimension", - "alias_name": "event", - "description": "日志级别", - "option": build_es_option_type("object", es_version), - }, - { - "field_name": "syslogProcess", - "field_type": "object", - "tag": "dimension", - "alias_name": "process", - "description": "应用程序", - "option": build_es_option_type("object", es_version), - }, - ] - ) + built_in_config["fields"].extend([ + { + "field_name": "syslogSource", + "field_type": "object", + "tag": "dimension", + "alias_name": "log", + "description": "客户端信息", + "option": build_es_option_type("object", es_version), + }, + { + "field_name": "syslogLabel", + "field_type": "object", + "tag": "dimension", + "alias_name": "syslog", + "description": "严重程度", + "option": build_es_option_type("object", es_version), + }, + { + "field_name": "syslogEvent", + "field_type": "object", + "tag": "dimension", + "alias_name": "event", + "description": "日志级别", + "option": build_es_option_type("object", es_version), + }, + { + "field_name": "syslogProcess", + "field_type": "object", + "tag": "dimension", + "alias_name": "process", + "description": "应用程序", + "option": build_es_option_type("object", es_version), + }, + + ]) return built_in_config diff --git a/bklog/apps/log_databus/handlers/collector_scenario/wineventlog.py b/bklog/apps/log_databus/handlers/collector_scenario/wineventlog.py index 668435041..651bdf68d 100644 --- a/bklog/apps/log_databus/handlers/collector_scenario/wineventlog.py +++ b/bklog/apps/log_databus/handlers/collector_scenario/wineventlog.py @@ -21,7 +21,7 @@ """ from apps.feature_toggle.handlers.toggle import FeatureToggleObject from apps.feature_toggle.plugins.constants import IS_AUTO_DEPLOY_PLUGIN -from apps.log_databus.constants import EtlConfig, LogPluginInfo +from apps.log_databus.constants import LogPluginInfo, EtlConfig from apps.log_databus.handlers.collector_scenario import CollectorScenario from apps.log_databus.handlers.collector_scenario.utils import build_es_option_type from apps.utils.log import logger @@ -109,17 +109,11 @@ def parse_steps(cls, steps): "winlog_level": first_event["level"].split(",") if first_event["level"] else [], "winlog_event_id": first_event["event_id"].split(",") if first_event["event_id"] else [], "winlog_source": local["provider_name"], - "winlog_content": [local["filters"][0]["conditions"][0]["key"]] if local["filters"] else [], + "winlog_content": [local["filters"][0]["conditions"][0]["key"]] if local["filters"] else [] } except (IndexError, KeyError, ValueError) as e: logger.exception(f"parse step config failed config => {steps},error => {e}") - return { - "winlog_name": [], - "winlog_level": [], - "winlog_event_id": [], - "winlog_source": [], - "winlog_content": [], - } + return {"winlog_name": [], "winlog_level": [], "winlog_event_id": [], "winlog_source": [], "winlog_content": []} @classmethod def get_built_in_config(cls, es_version="5.X", etl_config=EtlConfig.BK_LOG_TEXT): @@ -135,7 +129,6 @@ def get_built_in_config(cls, es_version="5.X", etl_config=EtlConfig.BK_LOG_TEXT) "winEventChannel", "winEventRecordId", "bk_host_id", - "dtEventTimeStamp", ], "separator_node_source": "", "separator_node_action": "", diff --git a/bklog/apps/log_databus/handlers/etl/bkbase.py b/bklog/apps/log_databus/handlers/etl/bkbase.py index 09d35131e..6a86d14de 100644 --- a/bklog/apps/log_databus/handlers/etl/bkbase.py +++ b/bklog/apps/log_databus/handlers/etl/bkbase.py @@ -23,6 +23,8 @@ import json from typing import Union +from django.conf import settings + from apps.api import BkDataDatabusApi from apps.log_databus.constants import BKDATA_ES_TYPE_MAP from apps.log_databus.exceptions import BKBASEStorageNotExistException @@ -33,7 +35,6 @@ from apps.log_databus.handlers.storage import StorageHandler from apps.log_databus.models import CollectorConfig, CollectorPlugin from apps.utils.local import get_request_username -from django.conf import settings class BKBaseEtlHandler(EtlHandler): @@ -94,15 +95,6 @@ def update_or_create(self, instance: Union[CollectorConfig, CollectorPlugin], pa bkdata_json_config = etl_storage.get_bkdata_etl_config(fields, etl_params, built_in_config) fields_config.append({"alias_name": "time", "field_name": "time", "option": {"es_type": "long"}}) - # 当用户使用了自定义字段作为时间字段,则会产生同名字段,需要去重 - fields_names = set() - dedupe_fields_config = [] - for field in fields_config: - field_name = field.get("alias_name") if field.get("alias_name") else field.get("field_name") - if field_name not in fields_names: - dedupe_fields_config.append(field) - fields_names.add(field_name) - bkdata_params = { "raw_data_id": instance.bk_data_id, "result_table_name": f"{settings.TABLE_ID_PREFIX}_{instance.get_en_name()}", @@ -119,7 +111,7 @@ def update_or_create(self, instance: Union[CollectorConfig, CollectorPlugin], pa "is_dimension": field.get("tag", "dimension") == "dimension", "field_index": index, } - for index, field in enumerate(dedupe_fields_config, 1) + for index, field in enumerate(fields_config, 1) ], "json_config": json.dumps(bkdata_json_config), } diff --git a/bklog/apps/log_databus/handlers/etl/transfer.py b/bklog/apps/log_databus/handlers/etl/transfer.py index f11a02437..92bc02c6b 100644 --- a/bklog/apps/log_databus/handlers/etl/transfer.py +++ b/bklog/apps/log_databus/handlers/etl/transfer.py @@ -19,6 +19,9 @@ We undertake not to change the open source license (MIT license) applicable to the current version of the project delivered to anyone in the future. """ +from django.conf import settings +from django.core.cache import cache + from apps.constants import UserOperationActionEnum, UserOperationTypeEnum from apps.decorators import user_operation_record from apps.log_clustering.handlers.clustering_config import ClusteringConfigHandler @@ -34,7 +37,6 @@ from apps.log_databus.models import CleanStash from apps.log_search.constants import CollectorScenarioEnum from apps.utils.local import get_request_username -from django.conf import settings class TransferEtlHandler(EtlHandler): @@ -78,14 +80,6 @@ def update_or_create( etl_params["separator_node_action"] = "" log_clustering_fields = CollectorScenario.log_clustering_fields(cluster_info["cluster_config"]["version"]) fields = CollectorScenario.fields_insert_field_index(source_fields=fields, dst_fields=log_clustering_fields) - - # 涉及到字段映射的,需要把前缀去掉,比如 bk_separator_object.abc => abc - for field in fields: - if "option" in field and "real_path" in field["option"]: - field["option"]["real_path"] = field["option"]["real_path"].replace( - f"{EtlStorage.separator_node_name}.", "" - ) - update_clustering_clean.delay(index_set_id=clustering_handler.data.index_set_id) # 暂时去掉这个效验逻辑,底下的逻辑都是幂等的,可以继续也必须继续往下走 diff --git a/bklog/apps/log_databus/handlers/etl_storage/base.py b/bklog/apps/log_databus/handlers/etl_storage/base.py index 952503caf..39ed16158 100644 --- a/bklog/apps/log_databus/handlers/etl_storage/base.py +++ b/bklog/apps/log_databus/handlers/etl_storage/base.py @@ -120,29 +120,6 @@ def get_result_table_fields(self, fields, etl_params, built_in_config, es_versio else {"es_type": "text"}, } ) - # 是否保留用户未定义字段 - if etl_params.get("retain_extra_json"): - field_list.append( - { - "field_name": "__ext_json", - "field_type": "object", - "tag": "dimension", - "alias_name": "ext_json", - "description": _("用户未定义JSON字段"), - "option": { - "es_type": "object", - "es_doc_values": True, - "es_include_in_all": False, - "real_path": f"{self.separator_node_name}.ext_json", - } - if es_version.startswith("5.") - else { - "es_type": "object", - "es_doc_values": True, - "real_path": f"{self.separator_node_name}.ext_json", - }, - }, - ) # 默认使用上报时间做为数据时间 time_field = built_in_config["time_field"] @@ -199,8 +176,7 @@ def get_result_table_fields(self, fields, etl_params, built_in_config, es_versio # 时间字段处理 if field["is_time"]: time_field["alias_name"] = source_field - if field_option.get("real_path"): - time_field["option"]["real_path"] = field_option["real_path"] + time_field["option"]["real_path"] = field_option["real_path"] time_field["option"]["time_zone"] = field["option"]["time_zone"] time_field["option"]["time_format"] = field["option"]["time_format"] time_field["option"]["field_index"] = field_option["field_index"] @@ -512,43 +488,6 @@ def _to_bkdata_assign(self, field): "type": BKDATA_ES_TYPE_MAP.get(field.get("option").get("es_type"), "string"), } - def _to_bkdata_assign_obj(self, field): - key = field.get("alias_name") - if not key: - key = field.get("field_name") - return { - "key": "__all_keys__", - "assign_to": key, - "type": BKDATA_ES_TYPE_MAP.get(field.get("option").get("es_type"), "string"), - } - - def _get_built_in_fields_type_fields(self, built_in_fields): - built_in_fields_type_object = [field for field in built_in_fields if field["field_type"] == "object"] - built_in_fields_no_type_object = [field for field in built_in_fields if field["field_type"] != "object"] - if len(built_in_fields_no_type_object) == 0: - access_built_in_fields_type_object = [] - else: - access_built_in_fields_type_object = [ - { - "type": "access", - "subtype": "access_obj", - "label": "label60f0af", - "key": field.get("alias_name") if field.get("alias_name") else field.get("field_name"), - "result": f'{field.get("alias_name") if field.get("alias_name") else field.get("field_name")}_json', - "default_type": "null", - "default_value": "", - "next": { - "type": "assign", - "subtype": "assign_json", - "label": "label2af98b", - "assign": [self._to_bkdata_assign_obj(field)], - "next": None, - }, - } - for field in built_in_fields_type_object - ] - return built_in_fields_type_object, built_in_fields_no_type_object, access_built_in_fields_type_object - def _to_bkdata_conf(self, time_field): return { "output_field_name": "timestamp", diff --git a/bklog/apps/log_databus/handlers/etl_storage/bk_log_delimiter.py b/bklog/apps/log_databus/handlers/etl_storage/bk_log_delimiter.py index 26188a61c..38315c599 100644 --- a/bklog/apps/log_databus/handlers/etl_storage/bk_log_delimiter.py +++ b/bklog/apps/log_databus/handlers/etl_storage/bk_log_delimiter.py @@ -22,21 +22,21 @@ import copy import json +from django.utils.translation import ugettext_lazy as _ + +from apps.utils.db import array_group from apps.exceptions import ValidationError +from apps.utils.log import logger +from apps.log_databus.constants import EtlConfig, BKDATA_ES_TYPE_MAP +from apps.log_databus.handlers.etl_storage import EtlStorage +from apps.log_databus.exceptions import EtlDelimiterParseException +from apps.log_databus.handlers.etl_storage.utils.transfer import preview from apps.log_databus.constants import ( - BKDATA_ES_TYPE_MAP, - ETL_DELIMITER_DELETE, ETL_DELIMITER_END, + ETL_DELIMITER_DELETE, ETL_DELIMITER_IGNORE, FIELD_TEMPLATE, - EtlConfig, ) -from apps.log_databus.exceptions import EtlDelimiterParseException -from apps.log_databus.handlers.etl_storage import EtlStorage -from apps.log_databus.handlers.etl_storage.utils.transfer import preview -from apps.utils.db import array_group -from apps.utils.log import logger -from django.utils.translation import ugettext_lazy as _ class BkLogDelimiterEtlStorage(EtlStorage): @@ -150,12 +150,6 @@ def parse_result_table_config(cls, result_table_config, result_table_storage=Non def get_bkdata_etl_config(self, fields, etl_params, built_in_config): retain_original_text = etl_params.get("retain_original_text", False) built_in_fields = built_in_config.get("fields", []) - ( - built_in_fields_type_object, - built_in_fields_no_type_object, - access_built_in_fields_type_object, - ) = self._get_built_in_fields_type_fields(built_in_fields) - result_table_fields = self.get_result_table_fields(fields, etl_params, copy.deepcopy(built_in_config)) time_field = result_table_fields.get("time_field") bkdata_fields = [field for field in fields if not field["is_delete"]] @@ -220,7 +214,7 @@ def get_bkdata_etl_config(self, fields, etl_params, built_in_config): ) + [ self._to_bkdata_assign(built_in_field) - for built_in_field in built_in_fields_no_type_object + for built_in_field in built_in_fields if built_in_field.get("flat_field", False) ], "type": "assign", @@ -248,11 +242,10 @@ def get_bkdata_etl_config(self, fields, etl_params, built_in_config): "next": None, "subtype": "assign_obj", "label": "labelf676c9", - "assign": self._get_bkdata_default_fields(built_in_fields_no_type_object, time_field), + "assign": self._get_bkdata_default_fields(built_in_fields, time_field), "type": "assign", }, - ] - + access_built_in_fields_type_object, + ], "name": "", "label": None, "type": "branch", diff --git a/bklog/apps/log_databus/handlers/etl_storage/bk_log_json.py b/bklog/apps/log_databus/handlers/etl_storage/bk_log_json.py index 50b27432c..99d2a2ec4 100644 --- a/bklog/apps/log_databus/handlers/etl_storage/bk_log_json.py +++ b/bklog/apps/log_databus/handlers/etl_storage/bk_log_json.py @@ -73,12 +73,6 @@ def get_result_table_config(self, fields, etl_params, built_in_config, es_versio def get_bkdata_etl_config(self, fields, etl_params, built_in_config): retain_original_text = etl_params.get("retain_original_text", False) built_in_fields = built_in_config.get("fields", []) - ( - built_in_fields_type_object, - built_in_fields_no_type_object, - access_built_in_fields_type_object, - ) = self._get_built_in_fields_type_fields(built_in_fields) - result_table_fields = self.get_result_table_fields(fields, etl_params, copy.deepcopy(built_in_config)) time_field = result_table_fields.get("time_field") bkdata_fields = [field for field in fields if not field["is_delete"]] @@ -137,7 +131,7 @@ def get_bkdata_etl_config(self, fields, etl_params, built_in_config): ) + [ self._to_bkdata_assign(built_in_field) - for built_in_field in built_in_fields_no_type_object + for built_in_field in built_in_fields if built_in_field.get("flat_field", False) ], "type": "assign", @@ -162,11 +156,10 @@ def get_bkdata_etl_config(self, fields, etl_params, built_in_config): "next": None, "subtype": "assign_obj", "label": "labelf676c9", - "assign": self._get_bkdata_default_fields(built_in_fields_no_type_object, time_field), + "assign": self._get_bkdata_default_fields(built_in_fields, time_field), "type": "assign", }, - ] - + access_built_in_fields_type_object, + ], "name": "", "label": None, "type": "branch", diff --git a/bklog/apps/log_databus/handlers/etl_storage/bk_log_regexp.py b/bklog/apps/log_databus/handlers/etl_storage/bk_log_regexp.py index a7454cc8f..b86410d94 100644 --- a/bklog/apps/log_databus/handlers/etl_storage/bk_log_regexp.py +++ b/bklog/apps/log_databus/handlers/etl_storage/bk_log_regexp.py @@ -22,11 +22,12 @@ import copy import re +from django.utils.translation import ugettext_lazy as _ + from apps.exceptions import ValidationError from apps.log_databus.constants import EtlConfig from apps.log_databus.handlers.etl_storage import EtlStorage from apps.log_databus.handlers.etl_storage.utils.transfer import preview -from django.utils.translation import ugettext_lazy as _ class BkLogRegexpEtlStorage(EtlStorage): @@ -101,12 +102,6 @@ def get_result_table_config(self, fields, etl_params, built_in_config, es_versio def get_bkdata_etl_config(self, fields, etl_params, built_in_config): retain_original_text = etl_params.get("retain_original_text", False) built_in_fields = built_in_config.get("fields", []) - ( - built_in_fields_type_object, - built_in_fields_no_type_object, - access_built_in_fields_type_object, - ) = self._get_built_in_fields_type_fields(built_in_fields) - result_table_fields = self.get_result_table_fields(fields, etl_params, copy.deepcopy(built_in_config)) time_field = result_table_fields.get("time_field") return { @@ -151,7 +146,7 @@ def get_bkdata_etl_config(self, fields, etl_params, built_in_config): ) + [ self._to_bkdata_assign(built_in_field) - for built_in_field in built_in_fields_no_type_object + for built_in_field in built_in_fields if built_in_field.get("flat_field", False) ], "next": None, @@ -199,11 +194,10 @@ def get_bkdata_etl_config(self, fields, etl_params, built_in_config): "type": "assign", "subtype": "assign_obj", "label": "labelf676c9", - "assign": self._get_bkdata_default_fields(built_in_fields_no_type_object, time_field), + "assign": self._get_bkdata_default_fields(built_in_fields, time_field), "next": None, }, - ] - + access_built_in_fields_type_object, + ], }, }, "conf": self._to_bkdata_conf(time_field), diff --git a/bklog/apps/log_databus/handlers/etl_storage/bk_log_text.py b/bklog/apps/log_databus/handlers/etl_storage/bk_log_text.py index 5ac958598..b767b43ea 100644 --- a/bklog/apps/log_databus/handlers/etl_storage/bk_log_text.py +++ b/bklog/apps/log_databus/handlers/etl_storage/bk_log_text.py @@ -69,12 +69,6 @@ def get_result_table_config(self, fields, etl_params, built_in_config, es_versio def get_bkdata_etl_config(self, fields, etl_params, built_in_config): built_in_fields = built_in_config.get("fields", []) - ( - built_in_fields_type_object, - built_in_fields_no_type_object, - access_built_in_fields_type_object, - ) = self._get_built_in_fields_type_fields(built_in_fields) - result_table_fields = self.get_result_table_fields(fields, etl_params, copy.deepcopy(built_in_config)) time_field = result_table_fields.get("time_field") @@ -97,7 +91,7 @@ def get_bkdata_etl_config(self, fields, etl_params, built_in_config): ] + [ self._to_bkdata_assign(built_in_field) - for built_in_field in built_in_fields_no_type_object + for built_in_field in built_in_fields if built_in_field.get("flat_field", False) ], "type": "assign", @@ -117,11 +111,10 @@ def get_bkdata_etl_config(self, fields, etl_params, built_in_config): "next": None, "subtype": "assign_obj", "label": "labelf676c9", - "assign": self._get_bkdata_default_fields(built_in_fields_no_type_object, time_field), + "assign": self._get_bkdata_default_fields(built_in_fields, time_field), "type": "assign", }, - ] - + access_built_in_fields_type_object, + ], "name": "", "label": None, "type": "branch", diff --git a/bklog/apps/log_databus/handlers/etl_storage/utils/darwin/transfer-min b/bklog/apps/log_databus/handlers/etl_storage/utils/darwin/transfer-min old mode 100644 new mode 100755 diff --git a/bklog/apps/log_databus/handlers/link.py b/bklog/apps/log_databus/handlers/link.py index 9723be887..8639a44a0 100644 --- a/bklog/apps/log_databus/handlers/link.py +++ b/bklog/apps/log_databus/handlers/link.py @@ -21,18 +21,14 @@ """ from apps.api import TransferApi -from apps.log_databus.constants import ( - KAFKA_CLUSTER_TYPE, - REGISTERED_SYSTEM_DEFAULT, - STORAGE_CLUSTER_TYPE, -) from apps.log_databus.exceptions import ( DataLinkConfigNotExistException, - EditLinkException, - SameLinkNameException, StorageNotExistException, + SameLinkNameException, + EditLinkException, ) from apps.log_databus.models import DataLinkConfig +from apps.log_databus.constants import KAFKA_CLUSTER_TYPE, STORAGE_CLUSTER_TYPE, REGISTERED_SYSTEM_DEFAULT class DataLinkHandler(object): @@ -50,7 +46,7 @@ def list(param): """ 获取所有链路信息 """ - link_objects = DataLinkConfig.objects.order_by("is_edge_transport", "-updated_at").all() + link_objects = DataLinkConfig.objects.all() if param.get("bk_biz_id"): link_objects = link_objects.filter(bk_biz_id__in=[0, param["bk_biz_id"]]) response = [ diff --git a/bklog/apps/log_desensitize/__init__.py b/bklog/apps/log_desensitize/__init__.py index c54405174..e69de29bb 100644 --- a/bklog/apps/log_desensitize/__init__.py +++ b/bklog/apps/log_desensitize/__init__.py @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Tencent is pleased to support the open source community by making BK-LOG 蓝鲸日志平台 available. -Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. -BK-LOG 蓝鲸日志平台 is licensed under the MIT License. -License for BK-LOG 蓝鲸日志平台: --------------------------------------------------------------------- -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all copies or substantial -portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT -LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN -NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -We undertake not to change the open source license (MIT license) applicable to the current version of -the project delivered to anyone in the future. -""" \ No newline at end of file diff --git a/bklog/apps/log_desensitize/exceptions.py b/bklog/apps/log_desensitize/exceptions.py deleted file mode 100644 index 5e067b244..000000000 --- a/bklog/apps/log_desensitize/exceptions.py +++ /dev/null @@ -1,26 +0,0 @@ -from apps.exceptions import BaseException, ErrorCode -from django.utils.translation import ugettext_lazy as _ - -# ================================================= -# 日志脱敏 -# ================================================= - - -class BaseDesensitizeRuleException(BaseException): - MODULE_CODE = ErrorCode.BKLOG_COLLECTOR_CONFIG - MESSAGE = _("日志脱敏规则模块异常") - - -class DesensitizeRuleNotExistException(BaseDesensitizeRuleException): - ERROR_CODE = "001" - MESSAGE = _("脱敏规则 [{id}] 不存在") - - -class DesensitizeRuleNameExistException(BaseDesensitizeRuleException): - ERROR_CODE = "002" - MESSAGE = _("脱敏规则名称: [{name}] 已存在") - - -class DesensitizeRuleRegexCompileException(BaseDesensitizeRuleException): - ErrorCode = "003" - MESSAGE = _("脱敏规则(ID [{rule_id}] ): 正则表达式 [{pattern}] 编译失败") diff --git a/bklog/apps/log_desensitize/handlers/desensitize.py b/bklog/apps/log_desensitize/handlers/desensitize.py index 820260ab3..d41d7ee5b 100644 --- a/bklog/apps/log_desensitize/handlers/desensitize.py +++ b/bklog/apps/log_desensitize/handlers/desensitize.py @@ -21,18 +21,14 @@ """ import re -from django.db.models import Q +from typing import List from django.utils.translation import ugettext_lazy as _ + from apps.exceptions import ValidationError -from apps.log_desensitize.exceptions import ( - DesensitizeRuleNotExistException, - DesensitizeRuleNameExistException, - DesensitizeRuleRegexCompileException -) from apps.log_desensitize.handlers.desensitize_operator import OPERATOR_MAPPING +from apps.log_desensitize.handlers.entity.desensitize_config_entity import DesensitizeConfigEntity from apps.log_desensitize.models import DesensitizeRule -from apps.models import model_to_dict class DesensitizeHandler(object): @@ -41,68 +37,49 @@ class DesensitizeHandler(object): 接收配置规则的列表, 进行规则匹配, 并调用相关的脱敏算子进行处理, 规则列表以流水线的方式处理 """ - def __init__(self, desensitize_config_info): - - # 构建字段绑定的规则mapping {"field_a": [{"rule_id":1, "operator": "mask_shield"}]} - self.field_rule_mapping = dict() - - self.rule_ids = [_info["rule_id"] for _info in desensitize_config_info if _info.get("rule_id")] - - # 过滤出当前脱敏配置的关联规则中生效的规则 - self.effective_rule_ids = [] if not self.rule_ids else list(DesensitizeRule.objects.filter(id__in=self.rule_ids, is_active=True).values_list("id", flat=True)) - - # 脱敏配置的标志序号 - sign_num = 1 - - for _config in desensitize_config_info: - - # 如果绑定了脱敏规则 判断绑定的规则当前是否删除或者未启用 - rule_id = _config.get("rule_id") - - if rule_id and rule_id not in self.effective_rule_ids: - continue + def __init__(self, desensitize_config_list: List[DesensitizeConfigEntity]): + self.desensitize_config_list = desensitize_config_list + rule_ids = [_config.rule_id for _config in desensitize_config_list if _config.rule_id] - field_name = _config["field_name"] + # 初始化脱敏规则实例 + if rule_ids: + desensitize_rule_obj = DesensitizeRule.objects.filter(id__in=rule_ids) + desensitize_rule = {obj.id: obj for obj in desensitize_rule_obj} + else: + desensitize_rule = {} - operator = _config["operator"] + # 针对每一个配置,生成一个包含 "匹配模式", "匹配字段", "脱敏算子实例" + for entity in self.desensitize_config_list: - if not operator or not field_name: - continue + # 查询对应的脱敏规则 实例化算子 + if entity.rule_id: + rule_obj = desensitize_rule.get(entity.rule_id, DesensitizeRule) + entity.operator = rule_obj.operator + entity.params = rule_obj.params + entity.match_pattern = rule_obj.match_pattern or "" # 生成配置对应的算子实例 - if operator not in OPERATOR_MAPPING.keys(): - raise ValidationError(_("{} 算子能力尚未实现").format(operator)) + if entity.operator not in OPERATOR_MAPPING.keys(): + raise ValidationError(_("{} 算子能力尚未实现").format(entity.operator)) - if field_name not in self.field_rule_mapping.keys(): - self.field_rule_mapping[field_name] = list() - - operator_cls = OPERATOR_MAPPING[operator] + operator_cls = OPERATOR_MAPPING[entity.operator] # 实例化算子 - _config["operator_obj"] = operator_cls() if not _config["params"] else operator_cls(**_config["params"]) - - # 编译正则表达式 - try: - _config["__regex__"] = None if not _config.get("match_pattern") else re.compile(_config["match_pattern"]) - except re.error: - raise DesensitizeRuleRegexCompileException( - DesensitizeRuleRegexCompileException.MESSAGE.format( - rule_id=rule_id, - pattern=_config["match_pattern"] - ) - ) + entity.operator_obj = operator_cls() if not entity.params else operator_cls(**entity.params) - # 添加配置序号 兼容没有绑定脱敏规则的纯配置脱敏模式 - _config["__id__"] = sign_num + def transform_text(self, text: str = None): + """ + 纯文本格式处理 兼容快速调试 + """ - self.field_rule_mapping[field_name].append(_config) + if not text: + return "" - sign_num += 1 + # 文本处理 + for entity in self.desensitize_config_list: + text = self._match_transform(entity, text) - if self.field_rule_mapping: - # 对字段绑定的规则按照优先级排序 sort_index 越小的优先级越高 - for _field_name, _config in self.field_rule_mapping.items(): - self.field_rule_mapping[_field_name] = sorted(_config, key=lambda x: x["sort_index"]) + return text def transform_dict(self, log_content: dict = None): """ @@ -110,194 +87,32 @@ def transform_dict(self, log_content: dict = None): 处理字典类型 单条log内容的格式 {"field_a": 12345, "field_b": "abc"} 根据脱敏配置列表 desensitize_config_list 以流水线方式处理 log 字段的内容 """ - if not self.field_rule_mapping or not log_content: - return log_content - for _field, _rules in self.field_rule_mapping.items(): - text = log_content.get(_field) - if not text or not _rules: + for entity in self.desensitize_config_list: + + text = log_content.get(entity.field_name, None) + + if text is None: continue - log_content[_field] = self.transform(log=str(text), rules=_rules) + + # 文本处理 + text = self._match_transform(entity, str(text)) + + # 重新赋值 log_content + log_content[entity.field_name] = text return log_content @staticmethod - def _match_transform(rule: dict, text: str = "", context: dict = None): + def _match_transform(entity: DesensitizeConfigEntity, text: str = ""): """ 公共方法 匹配文本并进行算子处理 """ + pattern = entity.match_pattern or "" + match = re.match(pattern, text) + context = match.groupdict() if match else {} # 文本处理 - text = rule["operator_obj"].transform(text, context) + text = entity.operator_obj.transform(text, context) return text - - @staticmethod - def find_substrings_by_rule(log: str, rule: dict): - """ - 找出所有匹配正则的起止位置 - """ - - # 匹配表达式未指定的情况下 默认整个字段全部处理 - regex = rule.get("__regex__") - if not regex: - return [ - { - "src": log, - "start": 0, - "end": len(str(log)), - "group_dict": dict(), - "__id__": rule["__id__"], - "rule": rule - } - ] - - # 使用finditer()函数找到所有匹配的子串 - matches = regex.finditer(log) - - results = [] - # 输出匹配的子串及其起止位置 - for match in matches: - results.append({ - "src": match.group(), - "start": match.start(), - "end": match.end(), - "group_dict": match.groupdict(), - "__id__": rule["__id__"], - "rule": rule - }) - return results - - @staticmethod - def merge_substrings(first, second): - """ - 合并子串匹配结果,剔除出现重叠的子串 - """ - - def is_overlap(item1, item2): - return not (item1["start"] >= item2["end"] or item1["end"] < item2["start"]) - - result = first.copy() - - for second_item in second: - is_overlapping = False - - for first_item in first: - if is_overlap(first_item, second_item): - is_overlapping = True - break - - if not is_overlapping: - result.append(second_item) - - return result - - def transform(self, log: str, rules: list): - substrings = [] - for rule in rules: - rule_substrings = self.find_substrings_by_rule(log, rule) - substrings = self.merge_substrings(substrings, rule_substrings) - substrings.sort(key=lambda x: x["start"]) - - last_end = 0 - outputs = [] - for substring in substrings: - outputs.append(log[last_end:substring["start"]]) - # 文本处理 - _text = self._match_transform(substring["rule"], str(substring["src"]), substring["group_dict"]) - outputs.append(_text) - last_end = substring["end"] - - # 末尾补充 - outputs.append(log[last_end:len(log)]) - return "".join(outputs) - - -class DesensitizeRuleHandler(object): - """ - 脱敏规则 - """ - - def __init__(self, rule_id=None): - self.rule_id = rule_id - self.data = None - if rule_id: - try: - self.data = DesensitizeRule.objects.get(id=self.rule_id) - except DesensitizeRule.DoesNotExist: - raise DesensitizeRuleNotExistException( - DesensitizeRuleNotExistException.MESSAGE.format(id=self.rule_id) - ) - - def create_or_update(self, params: dict): - """ - 创建更新脱敏规则 - """ - # 重名校验 - query_params = {"rule_name": params["rule_name"]} - - if not self.data: - # 创建 - if params["is_public"]: - query_params.update({"is_public": True}) - else: - query_params.update({"space_uid": params["space_uid"]}) - _qs = DesensitizeRule.objects.filter(**query_params) - else: - # 更新 - if self.data.is_public: - query_params.update({"is_public": True}) - else: - query_params.update({"space_uid": self.data.space_uid}) - _qs = DesensitizeRule.objects.filter(**query_params).exclude(id=self.rule_id) - - if _qs.exists(): - raise DesensitizeRuleNameExistException( - DesensitizeRuleNameExistException.MESSAGE.format(name=params["rule_name"]) - ) - - model_field = { - "rule_name": params["rule_name"], - "operator": params["operator"], - "params": params["operator_params"], - "match_pattern": params["match_pattern"], - "match_fields": params["match_fields"], - } - - if not self.data: - # 创建脱敏规则 - model_field.update( - { - "is_public": params["is_public"], - "space_uid": params.get("space_uid") or "", - } - ) - obj = DesensitizeRule.objects.create(**model_field) - return model_to_dict(obj) - else: - # 更新脱敏规则 - DesensitizeRule.objects.filter(id=self.rule_id).update(**model_field) - return {"id": self.rule_id} - - def list(self, is_public: bool, space_uid: str): - """ - 脱敏规则列表 - """ - objs = DesensitizeRule.objects.filter().all() - - if space_uid: - # 返回全局规则&当前业务下的规则 - objs.filter(Q(is_public=is_public) | Q(space_uid=space_uid)) - else: - # 只返回全局规则 - objs.filter(is_public=is_public) - - return [model_to_dict(obj) for obj in objs] - - def retrieve(self): - """脱敏规则详情""" - return model_to_dict(self.data) - - def destroy(self): - """脱敏规则删除""" - self.data.delete() diff --git a/bklog/apps/log_desensitize/handlers/entity/__init__.py b/bklog/apps/log_desensitize/handlers/entity/__init__.py new file mode 100644 index 000000000..fcbe7eb8a --- /dev/null +++ b/bklog/apps/log_desensitize/handlers/entity/__init__.py @@ -0,0 +1,20 @@ +""" +Tencent is pleased to support the open source community by making BK-LOG 蓝鲸日志平台 available. +Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +BK-LOG 蓝鲸日志平台 is licensed under the MIT License. +License for BK-LOG 蓝鲸日志平台: +-------------------------------------------------------------------- +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +We undertake not to change the open source license (MIT license) applicable to the current version of +the project delivered to anyone in the future. +""" \ No newline at end of file diff --git a/bklog/apps/log_desensitize/handlers/entity/desensitize_config_entity.py b/bklog/apps/log_desensitize/handlers/entity/desensitize_config_entity.py new file mode 100644 index 000000000..6086e1271 --- /dev/null +++ b/bklog/apps/log_desensitize/handlers/entity/desensitize_config_entity.py @@ -0,0 +1,37 @@ +""" +Tencent is pleased to support the open source community by making BK-LOG 蓝鲸日志平台 available. +Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +BK-LOG 蓝鲸日志平台 is licensed under the MIT License. +License for BK-LOG 蓝鲸日志平台: +-------------------------------------------------------------------- +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +We undertake not to change the open source license (MIT license) applicable to the current version of +the project delivered to anyone in the future. +""" +from apps.log_desensitize.handlers.desensitize_operator.base import DesensitizeMethodBase +from dataclasses import dataclass, field + + +@dataclass +class DesensitizeConfigEntity(object): + """ + 脱敏配置实体 + """ + field_name: str = "" + operator: str = "" + params: dict = field(default_factory=dict) + match_pattern: str = "" + rule_id: int = 0 + operator_obj: DesensitizeMethodBase = None + exclude_rules: list = field(default_factory=list) + match_fields: list = field(default_factory=list) diff --git a/bklog/apps/log_desensitize/admin.py b/bklog/apps/log_desensitize/handlers/utils.py similarity index 54% rename from bklog/apps/log_desensitize/admin.py rename to bklog/apps/log_desensitize/handlers/utils.py index 3c29d3c35..c0dbb531c 100644 --- a/bklog/apps/log_desensitize/admin.py +++ b/bklog/apps/log_desensitize/handlers/utils.py @@ -19,58 +19,34 @@ We undertake not to change the open source license (MIT license) applicable to the current version of the project delivered to anyone in the future. """ -from django.contrib import admin -from apps.utils.admin import AppModelAdmin -from apps.log_desensitize.models import DesensitizeRule, DesensitizeConfig, DesensitizeFieldConfig +from typing import List +from apps.log_desensitize.handlers.entity.desensitize_config_entity import DesensitizeConfigEntity -@admin.register(DesensitizeRule) -class DesensitizeRuleAdmin(AppModelAdmin): - list_display = [ - "rule_name", - "operator", - "params", - "match_pattern", - "space_uid", - "is_public", - "match_fields", - "is_active", - "created_at", - "created_by", - "is_deleted", - ] - search_fields = [ - "rule_name", - "operator", - "space_uid", - "is_public", - "is_active" - ] +def desensitize_params_init(desensitize_configs: List = None): + """ + 脱敏工厂参数初始化逻辑 + """ -@admin.register(DesensitizeConfig) -class DesensitizeConfigAdmin(AppModelAdmin): - list_display = ["index_set_id", "text_fields", "created_at", "created_by"] - search_fields = ["index_set_id"] + if not desensitize_configs: + return [] + # 初始化脱敏工厂参数 + desensitize_entities = list() -@admin.register(DesensitizeFieldConfig) -class DesensitizeFieldConfigAdmin(AppModelAdmin): - list_display = [ - "index_set_id", - "field_name", - "rule_id", - "operator", - "params", - "match_pattern", - "created_at", - "created_by" - ] - search_fields = [ - "index_set_id", - "field_name", - "rule_id", - "operator", - "params", - "match_pattern", - ] + for _config in desensitize_configs: + # 初始化实体对象 + + desensitize_entity = DesensitizeConfigEntity( + field_name=_config.get("field_name"), + operator=_config.get("operator"), + params=_config.get("params"), + match_pattern=_config.get("match_pattern"), + rule_id=_config.get("rule_id"), + exclude_rules=_config.get("exclude_rules"), + match_fields=_config.get("match_fields") + ) + desensitize_entities.append(desensitize_entity) + + return desensitize_entities diff --git a/bklog/apps/log_desensitize/migrations/0002_desensitizefieldconfig_match_pattern.py b/bklog/apps/log_desensitize/migrations/0002_desensitizefieldconfig_match_pattern.py deleted file mode 100644 index 2ef7d7100..000000000 --- a/bklog/apps/log_desensitize/migrations/0002_desensitizefieldconfig_match_pattern.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 3.2.15 on 2023-09-01 05:42 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('log_desensitize', '0001_initial'), - ] - - operations = [ - migrations.AddField( - model_name='desensitizefieldconfig', - name='match_pattern', - field=models.TextField(blank=True, default='', null=True, verbose_name='匹配模式'), - ), - ] diff --git a/bklog/apps/log_desensitize/models.py b/bklog/apps/log_desensitize/models.py index ed9c9184b..d90bfc54d 100644 --- a/bklog/apps/log_desensitize/models.py +++ b/bklog/apps/log_desensitize/models.py @@ -65,7 +65,6 @@ class DesensitizeFieldConfig(OperateRecordModel): index_set_id = models.IntegerField(_("索引集ID"), db_index=True) field_name = models.CharField(_("字段名称"), max_length=64, blank=True, default="") rule_id = models.IntegerField(_("脱敏规则ID"), default=0, db_index=True) - match_pattern = models.TextField(_("匹配模式"), default="", null=True, blank=True) operator = models.CharField(_("脱敏算子"), max_length=64, choices=DesensitizeOperator.get_choices()) params = models.JSONField(_("脱敏参数"), default=dict, null=True) sort_index = models.IntegerField(_("优先级"), null=True, default=0) diff --git a/bklog/apps/log_desensitize/serializers.py b/bklog/apps/log_desensitize/serializers.py deleted file mode 100644 index 5baf4eab8..000000000 --- a/bklog/apps/log_desensitize/serializers.py +++ /dev/null @@ -1,89 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Tencent is pleased to support the open source community by making BK-LOG 蓝鲸日志平台 available. -Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. -BK-LOG 蓝鲸日志平台 is licensed under the MIT License. -License for BK-LOG 蓝鲸日志平台: --------------------------------------------------------------------- -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all copies or substantial -portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT -LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN -NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -We undertake not to change the open source license (MIT license) applicable to the current version of -the project delivered to anyone in the future. -""" -import re - -from apps.exceptions import ValidationError - -from rest_framework import serializers -from django.utils.translation import ugettext_lazy as _ - -from apps.log_desensitize.constants import DesensitizeOperator -from apps.log_desensitize.handlers.desensitize_operator import OPERATOR_MAPPING -from bkm_space.serializers import SpaceUIDField - - -class DesensitizeRuleListSerializer(serializers.Serializer): - space_uid = SpaceUIDField(label=_("空间唯一标识"), required=False) - is_public = serializers.BooleanField(label=_("是否为全局规则"), required=False, default=True) - - def validate(self, attrs): - attrs = super().validate(attrs) - - if not attrs["is_public"] and not attrs["space_uid"]: - raise ValidationError(_("空间唯一标识不能为空")) - - return attrs - - -class DesensitizeRuleSerializer(serializers.Serializer): - rule_name = serializers.CharField(label=_("脱敏规则名称"), required=True, max_length=64) - match_fields = serializers.ListField(label=_("匹配字段名"), child=serializers.CharField(), required=False, default=list) - match_pattern = serializers.CharField(label=_("匹配表达式"), required=False, allow_null=True, default="") - space_uid = SpaceUIDField(label=_("空间唯一标识"), required=False) - operator = serializers.ChoiceField(label=_("脱敏算子"), choices=DesensitizeOperator.get_choices(), required=True) - operator_params = serializers.DictField(label=_("脱敏配置参数"), required=False) - is_public = serializers.BooleanField(label=_("是否为全局规则"), required=False, default=False) - - def validate(self, attrs): - attrs = super().validate(attrs) - - # 脱敏算子校验逻辑 - if not attrs.get("operator"): - raise ValidationError(_("脱敏算子不能为空")) - - # 校验正则表达式的合法性 - match_pattern = attrs.get("match_pattern") - if match_pattern: - try: - re.compile(match_pattern) - except re.error: - raise ValidationError(_("正则表达式 [{}] 不合法").format(match_pattern)) - - desensitize_cls = OPERATOR_MAPPING.get(attrs.get("operator")) - - if not desensitize_cls: - raise ValidationError(_("[{}] 脱敏算子类型暂未支持").format(attrs.get("operator"))) - - if not attrs.get("operator_params"): - return attrs - - desensitize_serializer = desensitize_cls.ParamsSerializer(data=attrs.get("operator_params"), many=False) - - # 脱敏参数校验 - desensitize_serializer.is_valid(raise_exception=True) - - data = desensitize_serializer.validated_data - - # 赋值 - attrs["operator_params"] = dict(data) - - return attrs diff --git a/bklog/apps/log_desensitize/urls.py b/bklog/apps/log_desensitize/urls.py index 3414ea80e..1096bcb78 100644 --- a/bklog/apps/log_desensitize/urls.py +++ b/bklog/apps/log_desensitize/urls.py @@ -22,11 +22,11 @@ from django.conf.urls import include, url from rest_framework import routers -from apps.log_desensitize.views import desensitize_rule_views +from apps.log_databus.views import collector_views router = routers.DefaultRouter(trailing_slash=True) -router.register(r"desensitize/rule", desensitize_rule_views.DesensitizeRuleViesSet, basename="rule") +router.register(r"collectors", collector_views.CollectorViewSet, basename="collectors") urlpatterns = [ diff --git a/bklog/apps/log_desensitize/views/desensitize_rule_views.py b/bklog/apps/log_desensitize/views/desensitize_rule_views.py deleted file mode 100644 index bf04caf8f..000000000 --- a/bklog/apps/log_desensitize/views/desensitize_rule_views.py +++ /dev/null @@ -1,241 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Tencent is pleased to support the open source community by making BK-LOG 蓝鲸日志平台 available. -Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. -BK-LOG 蓝鲸日志平台 is licensed under the MIT License. -License for BK-LOG 蓝鲸日志平台: --------------------------------------------------------------------- -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all copies or substantial -portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT -LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN -NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -We undertake not to change the open source license (MIT license) applicable to the current version of -the project delivered to anyone in the future. -""" -from apps.generic import ModelViewSet -from apps.log_desensitize.handlers.desensitize import DesensitizeRuleHandler -from apps.log_desensitize.models import DesensitizeRule - -from rest_framework.response import Response - -from apps.log_desensitize.serializers import DesensitizeRuleSerializer, DesensitizeRuleListSerializer - - -class DesensitizeRuleViesSet(ModelViewSet): - """ - 脱敏规则 - """ - lookup_field = "id" - model = DesensitizeRule - - def get_permissions(self): - return [] - - def list(self, request, *args, **kwargs): - """ - @api {GET} /api/v1/desensitize/rule/?space_uid=$space_uid&is_public=$is_public 脱敏规则列表 - @apiName desensitize_rule list - @apiGroup DesensitizeRule - @apiSuccess {String} rule_name 脱敏规则名称 - @apiSuccess {Array} match_fields 匹配的字段名列表 - @apiSuccess {String} match_pattern 匹配表达式 - @apiSuccess {String} operator 脱敏算子 可选字段 ‘mask_shield, text_replace’ - @apiSuccess {Json} operator_params 脱敏算子参数 - @apiSuccess {Int} operator_params.preserve_head 掩码屏蔽算子参数 保留前几位 默认 0 - @apiSuccess {Int} operator_params.preserve_tail 掩码屏蔽算子参数 保留后几位 默认 0 - @apiSuccess {String} operator_params.replace_mark 掩码屏蔽算子参数 替换符号 默认 * - @apiSuccess {String} operator_params.template_string 文本替换算子参数 替换模板 - @apiSuccess {Bool} is_active 是否启用 - @apiSuccess {Bool} is_public 是否全局规则 - @apiSuccess {Int} access_num 接入项总数 - @apiSuccess {Array} access_info 接入项 - @apiSuccess {String} access_info.scenario_id 接入场景 - @apiSuccess {String} access_info.scenario_name 接入场景名称 - @apiSuccess {Array} access_info.ids 接入场景ID列表 - @apiParamapiSuccess {Json} 返回示例: - { - "message": "", - "code": 0, - "data": [ - { - "id": 1, - "rule_name": "测试脱敏规则1", - "match_fields": ["phone", "number"] - "match_pattern": "\\d+", - "operator": "mask_shield", - "operator_params": { - "preserve_head": 1, - "preserve_tail": 2, - "replace_mark": "*" - }, - "is_active": true, - "is_public": false, - "access_num": 4439, - "access_info": [ - { - "scenario_id": "log", - "scenario_name": "采集接入", - "ids": [1,2,3] - }, - { - "scenario_id": "log", - "scenario_name": "自定义上报", - "ids": [4,5,6] - }, - { - "scenario_id": "bkdata", - "scenario_name": "数据平台", - "ids": [7,8,9] - }, - { - "scenario_id": "es", - "scenario_name": "第三方ES", - "ids": [10,11,12] - } - ] - } - ], - "result": true - } - """ - data = self.params_valid(DesensitizeRuleListSerializer) - return Response(DesensitizeRuleHandler().list(is_public=data["is_public"], space_uid=data["space_uid"])) - - def create(self, request, *args, **kwargs): - """ - @api {POST} /api/v1/desensitize/rule/ 创建脱敏规则 - @apiName desensitize_rule create - @apiGroup DesensitizeRule - @apiParam {String} space_uid 空间唯一标识 - @apiParam {String} rule_name 脱敏规则名称 - @apiParam {Array} match_fields 匹配的字段名列表 - @apiParam {String} match_pattern 匹配表达式 - @apiParam {String} operator 脱敏算子 可选字段 ‘mask_shield, text_replace’ - @apiParam {Json} operator_params 脱敏算子参数 - @apiParam {Int} operator_params.preserve_head 掩码屏蔽算子参数 保留前几位 默认 0 - @apiParam {Int} operator_params.preserve_tail 掩码屏蔽算子参数 保留后几位 默认 0 - @apiParam {String} operator_params.replace_mark 掩码屏蔽算子参数 替换符号 默认 * - @apiParam {String} operator_params.template_string 文本替换算子参数 替换模板 - @apiParam {Bool} is_public 是否为全局规则 - @apiParamExample {Json} 请求示例: - { - "space_uid": "bkcc__2", - "rule_name": "测试脱敏规则", - "match_fields": ["phone", "number"], - "match_pattern": "\\d+", - "operator": "mask_shield", - "operator_params": { - "preserve_head": 1, - "preserve_tail": 2, - "replace_mark": "*" - }, - "is_public": false - } - @apiSuccessExample {json} 成功返回 - { - "message": "", - "code": 0, - "data": { - "id": 1 - }, - "result": true - } - """ - data = self.params_valid(DesensitizeRuleSerializer) - return Response(DesensitizeRuleHandler().create_or_update(params=data)) - - def update(self, request, *args, id=None, **kwargs): - """ - @api {PUT} /api/v1/desensitize/rule/$rule_id/ 创建脱敏规则 - @apiName desensitize_rule update - @apiGroup DesensitizeRule - @apiParam {String} rule_name 脱敏规则名称 - @apiParam {Array} match_fields 匹配的字段名列表 - @apiParam {String} match_pattern 匹配表达式 - @apiParam {String} operator 脱敏算子 可选字段 ‘mask_shield, text_replace’ - @apiParam {Json} operator_params 脱敏算子参数 - @apiParam {Int} operator_params.preserve_head 掩码屏蔽算子参数 保留前几位 默认 0 - @apiParam {Int} operator_params.preserve_tail 掩码屏蔽算子参数 保留后几位 默认 0 - @apiParam {String} operator_params.replace_mark 掩码屏蔽算子参数 替换符号 默认 * - @apiParam {String} operator_params.template_string 文本替换算子参数 替换模板 - @apiParamExample {Json} 请求示例: - { - "rule_name": "测试脱敏规则", - "match_fields": ["phone", "number"], - "match_pattern": "\\d+", - "operator": "mask_shield", - "operator_params": { - "preserve_head": 1, - "preserve_tail": 2, - "replace_mark": "*" - } - } - @apiSuccessExample {json} 成功返回 - { - "message": "", - "code": 0, - "data": { - "id": 1 - }, - "result": true - } - """ - data = self.params_valid(DesensitizeRuleSerializer) - return Response(DesensitizeRuleHandler(rule_id=int(id)).create_or_update(params=data)) - - def retrieve(self, request, *args, id=None, **kwargs): - """ - @api {GET} /api/v1/desensitize/rule/{$rule_id}/ 脱敏规则详情 - @apiName desensitize_rule retrieve - @apiGroup DesensitizeRule - @apiParam {Int} rule_id 脱敏规则ID - @apiSuccess {String} rule_name 脱敏规则名称 - @apiSuccess {Array} match_fields 匹配的字段名列表 - @apiSuccess {String} match_pattern 匹配表达式 - @apiSuccess {String} operator 脱敏算子 可选字段 ‘mask_shield, text_replace’ - @apiSuccess {Json} operator_params 脱敏算子参数 - @apiSuccess {Int} operator_params.preserve_head 掩码屏蔽算子参数 保留前几位 默认 0 - @apiSuccess {Int} operator_params.preserve_tail 掩码屏蔽算子参数 保留后几位 默认 0 - @apiSuccess {String} operator_params.replace_mark 掩码屏蔽算子参数 替换符号 默认 * - @apiSuccess {String} operator_params.template_string 文本替换算子参数 替换模板 - @apiParamapiSuccess {Json} 返回示例: - { - "message": "", - "code": 0, - "data": { - "rule_name": "测试脱敏规则", - "match_fields": ["phone", "number"] - "match_pattern": "\\d+", - "operator": "mask_shield", - "operator_params": { - "preserve_head": 1, - "preserve_tail": 2, - "replace_mark": "*" - } - }, - "result": true - } - """ - return Response(DesensitizeRuleHandler(rule_id=int(id)).retrieve()) - - def destroy(self, request, *args, id=None, **kwargs): - """ - @api {DELETE} /api/v1/desensitize/rule/{$rule_id}/ 删除脱敏规则 - @apiName desensitize_rule delete - @apiGroup DesensitizeRule - @apiSuccessExample {json} 成功返回: - { - "result": true, - "data": null, - "code": 0, - "message": "" - } - """ - return Response(DesensitizeRuleHandler(rule_id=int(id)).destroy()) diff --git a/bklog/apps/log_desensitize/views/desensitize_views.py b/bklog/apps/log_desensitize/views/desensitize_views.py new file mode 100644 index 000000000..5330603c4 --- /dev/null +++ b/bklog/apps/log_desensitize/views/desensitize_views.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +""" +Tencent is pleased to support the open source community by making BK-LOG 蓝鲸日志平台 available. +Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +BK-LOG 蓝鲸日志平台 is licensed under the MIT License. +License for BK-LOG 蓝鲸日志平台: +-------------------------------------------------------------------- +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +We undertake not to change the open source license (MIT license) applicable to the current version of +the project delivered to anyone in the future. +""" +from apps.generic import ModelViewSet +from apps.iam.handlers.drf import ViewBusinessPermission +from apps.log_desensitize.models import DesensitizeRule + + +class DesensitizeViesSet(ModelViewSet): + """ + 脱敏规则 + """ + lookup_field = "id" + model = DesensitizeRule + + def get_permissions(self): + return [ViewBusinessPermission()] + + def get_queryset(self): + qs = self.model.objects + return qs diff --git a/bklog/apps/log_esquery/esquery/esquery.py b/bklog/apps/log_esquery/esquery/esquery.py index 8ad878aef..e48a78390 100644 --- a/bklog/apps/log_esquery/esquery/esquery.py +++ b/bklog/apps/log_esquery/esquery/esquery.py @@ -20,36 +20,34 @@ the project delivered to anyone in the future. """ import json -from typing import Any, Dict, List, Tuple -from apps.log_esquery.esquery.builder.query_filter_builder import QueryFilterBuilder -from apps.log_esquery.esquery.builder.query_index_optimizer import QueryIndexOptimizer -from apps.log_esquery.esquery.builder.query_sort_builder import QuerySortBuilder -from apps.log_esquery.esquery.builder.query_string_builder import QueryStringBuilder -from apps.log_esquery.esquery.builder.query_time_builder import QueryTimeBuilder -from apps.log_esquery.esquery.client.QueryClient import QueryClient -from apps.log_esquery.esquery.dsl_builder.dsl_builder import DslBuilder +from typing import List, Dict, Any, Tuple +from dateutil import tz + from apps.log_esquery.type_constants import ( - type_addition, - type_index_set_string, type_search_dict, + type_index_set_string, type_time_range_dict, + type_addition, ) -from apps.log_search.exceptions import ( - ScenarioNotSupportedException, - ScenarioQueryIndexFailException, -) +from apps.log_esquery.esquery.builder.query_time_builder import QueryTimeBuilder +from apps.log_esquery.esquery.builder.query_string_builder import QueryStringBuilder +from apps.log_esquery.esquery.builder.query_filter_builder import QueryFilterBuilder +from apps.log_esquery.esquery.builder.query_index_optimizer import QueryIndexOptimizer +from apps.log_esquery.esquery.builder.query_sort_builder import QuerySortBuilder +from apps.log_esquery.esquery.dsl_builder.dsl_builder import DslBuilder from apps.log_search.models import Scenario, Space, SpaceApi +from apps.log_esquery.esquery.client.QueryClient import QueryClient from apps.utils.log import logger +from apps.log_search.exceptions import ScenarioQueryIndexFailException, ScenarioNotSupportedException from apps.utils.time_handler import generate_time_range from bkm_space.utils import bk_biz_id_to_space_uid -from dateutil import tz class EsQuery(object): def __init__(self, search_dict: type_search_dict): + self.search_dict: Dict[str, Any] = search_dict - self.include_nested_fields: bool = search_dict.get("include_nested_fields", True) def _init_common_args(self): # 初始刷查询场景类型 bkdata log 或者 es, 以及连接信息ID @@ -88,6 +86,7 @@ def _init_search_after_args(self): return search_after, track_total_hits def _optimizer(self, indices, scenario_id, start_time, end_time, time_zone, use_time_range): + # 优化query_string query_string: str = self.search_dict.get("query_string") query_string = QueryStringBuilder(query_string).query_string @@ -161,7 +160,7 @@ def search(self): indices, scenario_id, start_time, end_time, time_zone, use_time_range ) size, start, aggs, highlight, scroll, collapse = self._init_other_args() - mappings = self.mapping() if self.include_nested_fields else [] + mappings = self.mapping() # 调用DSL生成器 body = DslBuilder( @@ -308,9 +307,7 @@ def indices(self): if not relate_space_obj: continue relate_bk_biz_id = relate_space_obj.bk_biz_id - relate_indices = client.indices( - bk_biz_id=relate_bk_biz_id, result_table_id=indices, with_storage=with_storage - ) + relate_indices = client.indices(bk_biz_id=relate_bk_biz_id, result_table_id=indices, with_storage=with_storage) result.extend(relate_indices) return result diff --git a/bklog/apps/log_esquery/serializers.py b/bklog/apps/log_esquery/serializers.py index 9a5bf2178..d4f261545 100644 --- a/bklog/apps/log_esquery/serializers.py +++ b/bklog/apps/log_esquery/serializers.py @@ -20,18 +20,21 @@ the project delivered to anyone in the future. """ -from apps.exceptions import ValidationError +from django.utils.translation import ugettext_lazy as _ +from rest_framework import serializers + from apps.log_esquery.constants import ES_ROUTE_ALLOW_URL +from apps.log_search.models import Scenario +from apps.exceptions import ValidationError from apps.log_esquery.exceptions import ( BaseSearchIndexSetDataDoseNotExists, BaseSearchIndexSetException, BaseSearchIndexSetIdTimeFieldException, ) + +from apps.log_search.models import LogIndexSet from apps.log_search.constants import SCROLL -from apps.log_search.models import LogIndexSet, Scenario from apps.utils.cache import cache_one_minute -from django.utils.translation import ugettext_lazy as _ -from rest_framework import serializers class EsQuerySearchAttrSerializer(serializers.Serializer): @@ -48,10 +51,7 @@ class EsQuerySearchAttrSerializer(serializers.Serializer): time_field_type = serializers.CharField(required=False, default="date", allow_blank=True, allow_null=True) time_field_unit = serializers.CharField(required=False, default="second", allow_blank=True, allow_null=True) - use_time_range = serializers.BooleanField( - default=True, - label=_("默认使用time_range的方式检索"), - ) + use_time_range = serializers.BooleanField(default=True, label=_("默认使用time_range的方式检索"),) include_start_time = serializers.BooleanField(default=True, label=_("是否包含开始时间点(gte/gt)")) include_end_time = serializers.BooleanField(default=True, label=_("是否包含结束时间点(lte/lt)")) start_time = serializers.CharField(required=False, default="", allow_blank=True, allow_null=True) @@ -93,9 +93,6 @@ class EsQuerySearchAttrSerializer(serializers.Serializer): # 添加scroll参数 scroll = serializers.CharField(required=False, allow_null=True, allow_blank=True) - # 是否包含嵌套字段 - include_nested_fields = serializers.BooleanField(required=False, default=True) - def validate(self, attrs): attrs = super().validate(attrs) @@ -143,6 +140,8 @@ def deal_filter(self, attrs): if operator in [ "is one of", "is not one of", + "eq", + "neq", "=", "!=", "=~", @@ -150,7 +149,7 @@ def deal_filter(self, attrs): "contains", "not contains", "contains match phrase", - "not contains match phrase", + "not contains match phrase" ]: # 逗号分隔是存在问题的 new_value = value.split(",") diff --git a/bklog/apps/log_measure/tasks/report.py b/bklog/apps/log_measure/tasks/report.py index 19adef601..fa7192635 100644 --- a/bklog/apps/log_measure/tasks/report.py +++ b/bklog/apps/log_measure/tasks/report.py @@ -21,14 +21,16 @@ """ import time -from apps.log_measure.constants import COLLECTOR_IMPORT_PATHS -from apps.log_measure.utils.metric import MetricUtils -from bk_monitor.handler.monitor import BKMonitor -from bk_monitor.utils.metric import clear_registered_metrics +from django.conf import settings from celery.schedules import crontab from celery.task import periodic_task, task + +from apps.log_measure.utils.metric import MetricUtils +from apps.log_measure.constants import COLLECTOR_IMPORT_PATHS +from bk_monitor.utils.metric import clear_registered_metrics + from config.domains import MONITOR_APIGATEWAY_ROOT -from django.conf import settings +from bk_monitor.handler.monitor import BKMonitor @periodic_task(run_every=crontab(minute="*/1")) diff --git a/bklog/apps/log_search/constants.py b/bklog/apps/log_search/constants.py index cfe1b5a80..b8e71fcd3 100644 --- a/bklog/apps/log_search/constants.py +++ b/bklog/apps/log_search/constants.py @@ -348,7 +348,6 @@ class GlobalTypeEnum(ChoicesEnum): IS_K8S_DEPLOY = "is_k8s_deploy" PAAS_API_HOST = "paas_api_host" BK_DOMAIN = "bk_domain" - RETAIN_EXTRA_JSON = "retain_extra_json" _choices_labels = ( (CATEGORY, _("数据分类")), @@ -370,7 +369,6 @@ class GlobalTypeEnum(ChoicesEnum): (IS_K8S_DEPLOY, _("是否容器化部署")), (PAAS_API_HOST, _("网关地址")), (BK_DOMAIN, _("蓝鲸域名")), - (RETAIN_EXTRA_JSON, _("是否开启保留额外JSON字段开关")), ) @@ -857,6 +855,14 @@ class TimeFieldUnitEnum(ChoicesEnum): _choices_labels = ((SECOND, _("second")), (MILLISECOND, _("millisecond")), (MICROSECOND, _("microsecond"))) +# 时间单位倍数映射关系 +TIME_FIELD_MULTIPLE_MAPPING = { + TimeFieldUnitEnum.SECOND.value: 1000, + TimeFieldUnitEnum.MILLISECOND.value: 1, + TimeFieldUnitEnum.MICROSECOND.value: 1 / 1000, +} + + class FieldDataTypeEnum(ChoicesEnum): """ 字段类型 @@ -1153,7 +1159,6 @@ def get_choices_list_dict(cls) -> list: "gseIndex", "iterationIndex", "__ext", - "__ext_json", "log", "dtEventTimeStamp", "datetime", @@ -1254,6 +1259,17 @@ class UserFunctionGuideType(ChoicesEnum): _choices_keys = (SEARCH_FAVORITE,) +class IndexSetType(ChoicesEnum): + """ + 索引集类型 + """ + + SINGLE = "single" + UNION = "union" + + _choices_labels = ((SINGLE, _("单索引集")), (UNION, _("联合索引集"))) + + # 索引集无数据检查缓存前缀 INDEX_SET_NO_DATA_CHECK_PREFIX = "index_set_no_data_check_prefix" diff --git a/bklog/apps/log_search/decorators.py b/bklog/apps/log_search/decorators.py index 82861660e..ea89e8bf1 100644 --- a/bklog/apps/log_search/decorators.py +++ b/bklog/apps/log_search/decorators.py @@ -22,6 +22,7 @@ import functools import time +from apps.log_search.constants import IndexSetType from apps.log_search.models import UserIndexSetSearchHistory @@ -39,6 +40,7 @@ def wrapper(*args, **kwargs): # 更新查询耗时和记录history result.data["took"] = time_consume history_obj = result.data.get("history_obj") + union_search_history_obj = result.data.get("union_search_history_obj") if history_obj: UserIndexSetSearchHistory.objects.create( index_set_id=history_obj["index_set_id"], @@ -47,6 +49,17 @@ def wrapper(*args, **kwargs): duration=time_consume, ) del result.data["history_obj"] + + if union_search_history_obj: + UserIndexSetSearchHistory.objects.create( + index_set_ids=union_search_history_obj["index_set_ids"], + params=union_search_history_obj["params"], + search_type=union_search_history_obj["search_type"], + duration=time_consume, + index_set_type=IndexSetType.UNION.value + ) + del result.data["union_search_history_obj"] + return result return wrapper diff --git a/bklog/apps/log_search/exceptions.py b/bklog/apps/log_search/exceptions.py index 044e882c6..56256d078 100644 --- a/bklog/apps/log_search/exceptions.py +++ b/bklog/apps/log_search/exceptions.py @@ -232,11 +232,6 @@ class DesensitizeConfigCreateOrUpdateException(BaseIndexSetException): MESSAGE = _("创建或者更新索引集脱敏配置失败:{e}") -class DesensitizeRuleException(BaseIndexSetException): - ERROR_CODE = "326" - MESSAGE = _("[{field_name}] 选定的脱敏规则 [ID:{rule_id}] 不存在或未启用") - - # ================================================= # 管理-检索 # ================================================= @@ -432,6 +427,16 @@ class IntegerErrorException(BaseException): MESSAGE = _("{num} 不是一个合法的数值, 请输入合法数值") +class UnionSearchFieldsFailException(BaseSearchException): + ERROR_CODE = "439" + MESSAGE = _("联合检索获取索引Mapping字段信息失败") + + +class UnionSearchErrorException(BaseSearchException): + ERROR_CODE = "440" + MESSAGE = _("联合检索API异常") + + # ================================================= # 导出 # ================================================= diff --git a/bklog/apps/log_search/handlers/index_set.py b/bklog/apps/log_search/handlers/index_set.py index c7ddc4a58..39185a68a 100644 --- a/bklog/apps/log_search/handlers/index_set.py +++ b/bklog/apps/log_search/handlers/index_set.py @@ -21,26 +21,18 @@ """ import re from collections import defaultdict -from typing import Dict, List, Optional, Tuple +from typing import Optional from apps.api import BkLogApi, TransferApi from apps.constants import UserOperationActionEnum, UserOperationTypeEnum from apps.decorators import user_operation_record from apps.feature_toggle.handlers.toggle import feature_switch from apps.iam import Permission, ResourceEnum -from apps.log_databus.constants import ( - STORAGE_CLUSTER_TYPE, - ContainerCollectorType, - Environment, -) +from apps.log_databus.constants import STORAGE_CLUSTER_TYPE from apps.log_databus.handlers.storage import StorageHandler -from apps.log_databus.models import CollectorConfig, ContainerCollectorConfig +from apps.log_databus.models import CollectorConfig from apps.log_desensitize.constants import MODEL_TO_DICT_EXCLUDE_FIELD -from apps.log_desensitize.models import ( - DesensitizeConfig, - DesensitizeFieldConfig, - DesensitizeRule, -) +from apps.log_desensitize.models import DesensitizeConfig, DesensitizeFieldConfig from apps.log_esquery.utils.es_route import EsRoute from apps.log_search.constants import ( BKDATA_INDEX_RE, @@ -54,9 +46,6 @@ TimeFieldUnitEnum, ) from apps.log_search.exceptions import ( - DesensitizeConfigCreateOrUpdateException, - DesensitizeConfigDoseNotExistException, - DesensitizeRuleException, IndexCrossClusterException, IndexListDataException, IndexSetDoseNotExistException, @@ -68,6 +57,8 @@ ScenarioNotSupportedException, SearchUnKnowTimeField, UnauthorizedResultTableException, + DesensitizeConfigDoseNotExistException, + DesensitizeConfigCreateOrUpdateException, ) from apps.log_search.handlers.search.mapping_handlers import MappingHandlers from apps.log_search.models import ( @@ -87,12 +78,10 @@ from apps.utils.local import get_request_app_code, get_request_username from apps.utils.log import logger from apps.utils.thread import MultiExecuteFunc -from bkm_space.api import SpaceApi -from bkm_space.define import SpaceTypeEnum -from bkm_space.utils import bk_biz_id_to_space_uid, space_uid_to_bk_biz_id +from apps.utils.time_handler import strftime_local +from bkm_space.utils import space_uid_to_bk_biz_id from django.conf import settings from django.db import transaction -from django.db.models import Q from django.utils.translation import ugettext as _ @@ -131,30 +120,9 @@ def config(self, config_id: int): else: user_operation_record.delay(operation_record) - @classmethod - def get_all_related_space_uids(cls, space_uid): - """ - 获取当前空间所关联的所有空间ID列表,包括自身 - """ - space_uids = [space_uid] - space = SpaceApi.get_space_detail(space_uid=space_uid) - if space and space.space_type_id == SpaceTypeEnum.BKCC.value: - # 如果查询的是业务空间,则将其关联的其他类型空间的索引集也一并查询出来 - related_space_list = space.extend.get("resources") or [] - space_uids.extend( - [ - SpaceApi.gen_space_uid( - space_type=relate_space["resource_type"], space_id=relate_space["resource_id"] - ) - for relate_space in related_space_list - ] - ) - return space_uids - @classmethod def get_user_index_set(cls, space_uid, scenarios=None): - space_uids = cls.get_all_related_space_uids(space_uid) - index_sets = LogIndexSet.get_index_set(scenarios=scenarios, space_uids=space_uids) + index_sets = LogIndexSet.get_index_set(scenarios=scenarios, space_uid=space_uid) # 补充采集场景 collector_config_ids = [ index_set["collector_config_id"] for index_set in index_sets if index_set["collector_config_id"] @@ -571,88 +539,36 @@ def update_or_create_desensitize_config(self, params: dict): """ 创建或更新脱敏配置 """ + # 校验索引集是否存在 + + index_obj_exists = LogIndexSet.objects.filter(index_set_id=self.index_set_id).exists() + if not index_obj_exists: + raise IndexSetDoseNotExistException() try: - obj, created = DesensitizeConfig.objects.update_or_create( - index_set_id=self.index_set_id, defaults={"text_fields": params["text_fields"]} + DesensitizeConfig.objects.update_or_create( + index_set_id=self.index_set_id, + defaults={ + "text_fields": params["text_fields"] + } ) - # 兼容以脱敏配置直接入库 # 创建&更新脱敏字段配置信息 先删除在创建 - if not created: - DesensitizeFieldConfig.objects.filter(index_set_id=self.index_set_id, rule_id=0).delete() - - if not created: - # 更新操作 查询出当前业务下和全局的脱敏规则 包含已删除的 - rule_objs = DesensitizeRule.origin_objects.filter( - Q(space_uid=self.data.space_uid) | Q(is_public=True) - ).all() - else: - # 创建操作 查询出当前业务下和全局的脱敏规则 不包含已删除的 - rule_objs = DesensitizeRule.objects.filter(Q(space_uid=self.data.space_uid) | Q(is_public=True)).all() + DesensitizeFieldConfig.objects.filter(index_set_id=self.index_set_id).delete() - rules_mapping = {rule_obj.id: model_to_dict(rule_obj) for rule_obj in rule_objs} - - # 构建配置直接入库批量创建参数列表 + # 构建入库参数 bulk_create_list = list() for field_config in params["field_configs"]: - field_name = field_config.get("field_name") - sort_index = 0 - rule_ids = list() - for rule in field_config.get("rules"): - rule_id = rule.get("rule_id") - rule_state = rule.get("state") - model_params = { - "operator": "", - "params": "", - "match_pattern": "", - } - if rule_id: - # 根据规则状态执行不同的操作 - if rule_state in ["add", "update"]: - if rule_id not in rules_mapping.keys(): - raise DesensitizeRuleException( - DesensitizeRuleException.MESSAGE.format(field_name=field_name, rule_id=rule_id) - ) - # 读最新的配置入库 - rule_info = rules_mapping[rule_id] - model_params["operator"] = rule_info["operator"] - model_params["params"] = rule_info["params"] - model_params["match_pattern"] = rule_info["match_pattern"] - model_params["sort_index"] = sort_index - DesensitizeFieldConfig.objects.update_or_create( - index_set_id=self.index_set_id, - field_name=field_name, - rule_id=rule_id, - defaults=model_params, - ) - else: - # 只更新优先级 - DesensitizeFieldConfig.objects.filter( - index_set_id=self.index_set_id, field_name=field_name, rule_id=rule_id - ).update(sort_index=sort_index) - rule_ids.append(rule_id) - - else: - bulk_create_params = { - "index_set_id": self.index_set_id, - "field_name": field_name, - "rule_id": 0, - "operator": rule.get("operator"), - "params": rule.get("params"), - "sort_index": sort_index, - } - bulk_create_list.append(DesensitizeFieldConfig(**bulk_create_params)) - - sort_index += 1 - if rule_ids and not created: - # 处理删除的字段配置 删除字段绑定库里存在但是更新时不存在的规则ID - DesensitizeFieldConfig.objects.filter( - index_set_id=self.index_set_id, field_name=field_name - ).exclude(rule_id__in=rule_ids).delete() - - if bulk_create_list: - DesensitizeFieldConfig.objects.bulk_create(bulk_create_list) + model_params = { + "index_set_id": self.index_set_id, + "field_name": field_config.get("field_name"), + "rule_id": field_config.get("rule_id") or 0, + "operator": field_config.get("operator"), + "params": field_config.get("params"), + } + bulk_create_list.append(DesensitizeFieldConfig(**model_params)) + + DesensitizeFieldConfig.objects.bulk_create(bulk_create_list) except Exception as e: raise DesensitizeConfigCreateOrUpdateException(DesensitizeConfigCreateOrUpdateException.MESSAGE.format(e=e)) @@ -671,17 +587,8 @@ def desensitize_config_retrieve(self): # 构建返回数据 ret = model_to_dict(desensitize_obj) ret["field_configs"] = list() - - # 构建字段绑定的 rules {"field_name": [{rule_id: 1}]} - field_info_mapping = dict() - for _obj in desensitize_field_config_objs: - field_name = _obj.field_name - if field_name not in field_info_mapping.keys(): - field_info_mapping[field_name] = list() - field_info_mapping[field_name].append(model_to_dict(_obj, exclude=MODEL_TO_DICT_EXCLUDE_FIELD)) - - for _field_name, _rules in field_info_mapping.items(): - ret["field_configs"].append({"field_name": _field_name, "rules": _rules}) + for field_config_obj in desensitize_field_config_objs: + ret["field_configs"].append(model_to_dict(field_config_obj, exclude=MODEL_TO_DICT_EXCLUDE_FIELD)) return ret @@ -797,6 +704,7 @@ def replace( time_field_type=None, time_field_unit=None, ): + # 检查索引集是否存在 index_set_obj = LogIndexSet.objects.filter(index_set_name=index_set_name).first() if index_set_obj and index_set_obj.source_app_code != bk_app_code: @@ -898,243 +806,6 @@ def replace( return index_set - @staticmethod - def get_or_create_bcs_project_std_index_set(bcs_cluster_id, bk_biz_id, storage_cluster_id, bcs_project_id=""): - """ - 创建或获取 bcs project std 索引集 - """ - from apps.log_databus.handlers.collector import ( - build_result_table_id, - convert_lower_cluster_id, - ) - - space_uid = bk_biz_id_to_space_uid(bk_biz_id) - lower_cluster_id = convert_lower_cluster_id(bcs_cluster_id) - src_index_list = LogIndexSet.objects.filter(space_uid=space_uid) - std_index_set_name = f"{bcs_cluster_id}_std" - std_index_set = src_index_list.filter(index_set_name=std_index_set_name).first() - if not std_index_set: - std_index_set = IndexSetHandler.create( - index_set_name=std_index_set_name, - space_uid=space_uid, - storage_cluster_id=storage_cluster_id, - scenario_id=Scenario.ES, - view_roles=None, - indexes=[ - { - "bk_biz_id": bk_biz_id, - "result_table_id": build_result_table_id(bk_biz_id, f"{lower_cluster_id}_*_std_*").replace( - ".", "_" - ), - "result_table_name": std_index_set_name, - "time_field": DEFAULT_TIME_FIELD, - } - ], - username="admin", - category_id="kubernetes", - bcs_project_id=bcs_project_id, - is_editable=False, - time_field=DEFAULT_TIME_FIELD, - time_field_type=TimeFieldTypeEnum.DATE.value, - time_field_unit=TimeFieldUnitEnum.MILLISECOND.value, - ) - return std_index_set - - @staticmethod - def get_or_create_bcs_project_path_index_set(bcs_cluster_id, bk_biz_id, storage_cluster_id, bcs_project_id=""): - """ - 创建或获取 bcs project path 索引集 - """ - from apps.log_databus.handlers.collector import ( - build_result_table_id, - convert_lower_cluster_id, - ) - - space_uid = bk_biz_id_to_space_uid(bk_biz_id) - lower_cluster_id = convert_lower_cluster_id(bcs_cluster_id) - - src_index_list = LogIndexSet.objects.filter(space_uid=space_uid) - - path_index_set_name = f"{bcs_cluster_id}_path" - path_index_set = src_index_list.filter(index_set_name=path_index_set_name).first() - if not path_index_set: - path_index_set = IndexSetHandler.create( - index_set_name=path_index_set_name, - space_uid=space_uid, - storage_cluster_id=storage_cluster_id, - scenario_id=Scenario.ES, - view_roles=None, - indexes=[ - { - "bk_biz_id": bk_biz_id, - "result_table_id": build_result_table_id(bk_biz_id, f"{lower_cluster_id}_*_path_*").replace( - ".", "_" - ), - "result_table_name": path_index_set_name, - "time_field": DEFAULT_TIME_FIELD, - } - ], - username="admin", - category_id="kubernetes", - bcs_project_id=bcs_project_id, - is_editable=False, - time_field=DEFAULT_TIME_FIELD, - time_field_type=TimeFieldTypeEnum.DATE.value, - time_field_unit=TimeFieldUnitEnum.MILLISECOND.value, - ) - return path_index_set - - @classmethod - def list_non_bcs_cluster_indexes(cls, bcs_cluster_id: str, bk_biz_id: int) -> Dict[str, List[str]]: - """ - 获取非BCS创建的容器索引集, 按照std和path分类 - :param bcs_cluster_id: bcs集群ID - :param bk_biz_id: 业务ID - """ - indexes: Dict[str, List[str]] = {"std": [], "path": []} - # 通用函数, 获取非BCS创建的容器采集项, 以及对应容器采集的map - queryset = CollectorConfig.objects.filter( - rule_id=0, - environment=Environment.CONTAINER, - bk_biz_id=bk_biz_id, - bcs_cluster_id=bcs_cluster_id, - # 过滤掉未完成的采集项, 因为未完成的采集项table_id会为空 - table_id__isnull=False, - ) - collectors = queryset.all() - if not collectors: - return indexes - # 获取采集项对应的容器采集配置 - container_collector_config_queryset = ContainerCollectorConfig.objects.filter( - collector_config_id__in=list(collectors.values_list("collector_config_id", flat=True)), - collector_type__in=[ContainerCollectorType.CONTAINER, ContainerCollectorType.STDOUT], - ) - container_collector_configs = container_collector_config_queryset.all() - container_config_map: Dict[int, ContainerCollectorConfig] = { - c.collector_config_id: c for c in container_collector_configs - } - - for collector in collectors: - if not container_config_map.get(collector.collector_config_id): - continue - container_config = container_config_map[collector.collector_config_id] - result_table_id = "{table_id}_*".format(table_id=collector.table_id.replace(".", "_")) - if container_config.collector_type == ContainerCollectorType.STDOUT: - indexes["std"].append(result_table_id) - else: - indexes["path"].append(result_table_id) - return indexes - - @classmethod - def is_bcs_index_set(cls, index_set: LogIndexSet) -> Tuple[bool, Optional[CollectorConfig]]: - if not index_set.collector_config_id: - return False, None - # 判断是否是容器采集 - collector_config = ( - CollectorConfig.objects.filter( - collector_config_id=index_set.collector_config_id, - environment=Environment.CONTAINER, - ) - .exclude(rule_id=0) - .first() - ) - if not collector_config: - return False, None - return True, collector_config - - @classmethod - def is_container_index_set( - cls, index_set: LogIndexSet - ) -> Tuple[bool, Optional[CollectorConfig], Optional[ContainerCollectorConfig]]: - if not index_set.collector_config_id: - return False, None, None - # 判断是否是容器采集 - collector_config = CollectorConfig.objects.filter( - collector_config_id=index_set.collector_config_id, - environment=Environment.CONTAINER, - ).first() - if not collector_config: - return False, None, None - # 判断是否是std或者path的容器采集 - queryset = ContainerCollectorConfig.objects.filter( - collector_config_id=collector_config.collector_config_id, - collector_type__in=[ContainerCollectorType.CONTAINER, ContainerCollectorType.STDOUT], - ) - container_config = queryset.first() - if not container_config: - return False, None, None - return True, collector_config, container_config - - @classmethod - def sync_container_indexes(cls, index_set: LogIndexSet): - """ - 同步非BCS创建的索引, 将其添加到对应的BCS索引集中 - """ - is_container_index_set, collector_config, container_config = cls.is_container_index_set(index_set=index_set) - if not is_container_index_set: - return - indexes: Dict[str, List[str]] = IndexSetHandler.list_non_bcs_cluster_indexes( - bk_biz_id=collector_config.bk_biz_id, - bcs_cluster_id=collector_config.bcs_cluster_id, - ) - enable_std = container_config.collector_type == ContainerCollectorType.STDOUT - if enable_std: - bcs_index_set = cls.get_or_create_bcs_project_std_index_set( - bcs_cluster_id=collector_config.bcs_cluster_id, - bk_biz_id=collector_config.bk_biz_id, - storage_cluster_id=index_set.storage_cluster_id, - bcs_project_id=index_set.bcs_project_id, - ) - IndexSetHandler.sync_bcs_indexes(index_set=bcs_index_set, bcs_indexes=indexes, enable_std=True) - else: - bcs_index_set = cls.get_or_create_bcs_project_path_index_set( - bcs_cluster_id=collector_config.bcs_cluster_id, - bk_biz_id=collector_config.bk_biz_id, - storage_cluster_id=index_set.storage_cluster_id, - bcs_project_id=index_set.bcs_project_id, - ) - IndexSetHandler.sync_bcs_indexes(index_set=bcs_index_set, bcs_indexes=indexes, enable_std=False) - - @classmethod - def sync_bcs_indexes(cls, index_set: LogIndexSet, bcs_indexes: Dict[str, List[str]], enable_std: bool = True): - """ - 同步BCS创建的索引集索引, 拉取符合规则的所有std和path的索引集, 添加到这个索引集中 - """ - is_bcs_index_set, collector_config = cls.is_bcs_index_set(index_set=index_set) - if not is_bcs_index_set: - return - if enable_std: - indexes = bcs_indexes.get("std", []) - result_table_name = f"{collector_config.bcs_cluster_id}_std" - else: - indexes = bcs_indexes.get("path", []) - result_table_name = f"{collector_config.bcs_cluster_id}_path" - if not indexes: - return - # 获取BCS索引集已有的索引 - bcs_indexes = ( - LogIndexSetData.objects.filter(index_set_id=index_set.index_set_id) - .exclude(result_table_name=result_table_name) - .all() - ) - # 删除不在索引列表中的索引 - for index in bcs_indexes: - if index.result_table_id in bcs_indexes: - continue - index.delete() - # 获取要添加的索引 - diff_rt_id_list = list(set(indexes).difference({index.result_table_id for index in bcs_indexes})) - for rt in diff_rt_id_list: - LogIndexSetDataHandler( - index_set_data=index_set, - bk_biz_id=collector_config.bk_biz_id, - time_filed=DEFAULT_TIME_FIELD, - result_table_id=rt, - storage_cluster_id=index_set.storage_cluster_id, - result_table_name=result_table_name, - bk_username=get_request_username(), - ).add_index() - class BaseIndexSetHandler(object): scenario_id = None @@ -1238,7 +909,6 @@ def delete_index_set(self, index_set_obj): self.pre_delete() self.delete() - self.post_delete(index_set=index_set_obj) def pre_create(self): """ @@ -1445,33 +1115,10 @@ def post_create(self, index_set): class EsIndexSetHandler(BaseIndexSetHandler): scenario_id = Scenario.ES - def post_create(self, index_set: LogIndexSet): - super(EsIndexSetHandler, self).post_create(index_set) - is_bcs_index_set, collector_config = IndexSetHandler.is_bcs_index_set(index_set) - if not is_bcs_index_set: - return - indexes: Dict[str, List[str]] = IndexSetHandler.list_non_bcs_cluster_indexes( - bk_biz_id=collector_config.bk_biz_id, - bcs_cluster_id=collector_config.bcs_cluster_id, - ) - IndexSetHandler.sync_bcs_indexes(index_set=index_set, bcs_indexes=indexes, enable_std=True) - IndexSetHandler.sync_bcs_indexes(index_set=index_set, bcs_indexes=indexes, enable_std=False) - class LogIndexSetHandler(BaseIndexSetHandler): scenario_id = Scenario.LOG - def post_create(self, index_set: LogIndexSet): - """ - LOG场景创建索引集后,判断如果是容器采集索引, 且非BCS创建的容器采集索引, 则需要将该索引的通配形式添加到对应集群的std和path索引集中 - """ - super(LogIndexSetHandler, self).post_create(index_set) - IndexSetHandler.sync_container_indexes(index_set=index_set) - - def post_delete(self, index_set: LogIndexSet): - super(LogIndexSetHandler, self).post_delete(index_set) - IndexSetHandler.sync_container_indexes(index_set=index_set) - class LogIndexSetDataHandler(object): def __init__( diff --git a/bklog/apps/log_search/handlers/search/aggs_handlers.py b/bklog/apps/log_search/handlers/search/aggs_handlers.py index 4d0bd88f1..77abb290e 100644 --- a/bklog/apps/log_search/handlers/search/aggs_handlers.py +++ b/bklog/apps/log_search/handlers/search/aggs_handlers.py @@ -32,6 +32,7 @@ ) from apps.utils.local import get_local_param from apps.utils.log import logger +from apps.utils.thread import MultiExecuteFunc from apps.utils.time_handler import ( DTEVENTTIMESTAMP_MULTIPLICATOR, generate_time_range, @@ -167,7 +168,10 @@ def date_histogram(cls, index_set_id, query_data: dict): time_format = cls.TIME_FORMAT_MAP.get(interval, cls.TIME_FORMAT) datetime_format = cls.DATETIME_FORMAT_MAP.get(interval, cls.DATETIME_FORMAT) - time_field, time_field_type, time_field_unit = SearchHandlerEsquery.init_time_field(index_set_id) + esquery_obj = SearchHandlerEsquery(index_set_id, copy.deepcopy(query_data)) + time_field = esquery_obj.time_field + time_field_type = esquery_obj.time_field_type + # https://github.com/elastic/elasticsearch/issues/42270 非date类型不支持timezone, time format也无效 if time_field_type == TimeFieldTypeEnum.DATE.value: min_value = start_time.timestamp * 1000 @@ -183,9 +187,9 @@ def date_histogram(cls, index_set_id, query_data: dict): ) else: num = 10 ** 3 - if time_field_unit == TimeFieldUnitEnum.SECOND.value: + if esquery_obj.time_field_unit == TimeFieldUnitEnum.SECOND.value: num = 1 - elif time_field_unit == TimeFieldUnitEnum.MICROSECOND.value: + elif esquery_obj.time_field_unit == TimeFieldUnitEnum.MICROSECOND.value: num = 10 ** 6 min_value = start_time.timestamp * num max_value = end_time.timestamp * num @@ -207,9 +211,9 @@ def date_histogram(cls, index_set_id, query_data: dict): if time_field_type != TimeFieldTypeEnum.DATE.value: buckets = result.get("aggregations", {}).get("group_by_histogram", {}).get("buckets", []) time_multiplicator = 1 / (10 ** 3) - if time_field_unit == TimeFieldUnitEnum.SECOND.value: + if esquery_obj.time_field_unit == TimeFieldUnitEnum.SECOND.value: time_multiplicator = 1 - elif time_field_unit == TimeFieldUnitEnum.MICROSECOND.value: + elif esquery_obj.time_field_unit == TimeFieldUnitEnum.MICROSECOND.value: time_multiplicator = 1 / (10 ** 6) for _buckets in buckets: _buckets["key_as_string"] = timestamp_to_timeformat( @@ -316,6 +320,7 @@ def date_histogram(self, index_set_id, query_data: dict): histogram_dict = {} labels = [] for _data in histogram_data.get("buckets", []): + # labels 横坐标时间轴 labels.append(_data.get("key_as_string")) @@ -333,6 +338,7 @@ def date_histogram(self, index_set_id, query_data: dict): # doc: key, count buckets = _data.get(field, {}).get("buckets", []) for _doc in buckets: + # 获取指标值和doc_count if metric_key == "doc_count": doc_count = doc_value = _doc.get("doc_count") or 0 @@ -385,6 +391,45 @@ def date_histogram(self, index_set_id, query_data: dict): return_data["aggs"] = self._del_empty_histogram(return_data["aggs"]) return return_data + @staticmethod + def union_search_date_histogram(query_data: dict): + index_set_ids = query_data.get("index_set_ids", []) + + # 多线程请求数据 + multi_execute_func = MultiExecuteFunc() + + for index_set_id in index_set_ids: + params = {"index_set_id": index_set_id, "query_data": query_data} + multi_execute_func.append( + result_key=f"union_search_date_histogram_{index_set_id}", + func=AggsViewAdapter().date_histogram, + params=params, + multi_func_params=True, + ) + + multi_result = multi_execute_func.run() + + buckets_info = dict() + # 处理返回结果 + for index_set_id in index_set_ids: + result = multi_result.get(f"union_search_date_histogram_{index_set_id}", {}) + aggs = result.get("aggs", {}) + if not aggs: + continue + buckets = aggs["group_by_histogram"]["buckets"] + for bucket in buckets: + key_as_string = bucket["key_as_string"] + if key_as_string not in buckets_info: + buckets_info[key_as_string] = bucket + else: + buckets_info[key_as_string]["doc_count"] += bucket["doc_count"] + + ret_data = ( + {"aggs": {"group_by_histogram": {"buckets": buckets_info.values()}}} if buckets_info else {"aggs": {}} + ) + + return ret_data + def _del_empty_histogram(self, aggs): """ 将对应data.count为空的label去除 diff --git a/bklog/apps/log_search/handlers/search/async_export_handlers.py b/bklog/apps/log_search/handlers/search/async_export_handlers.py index 39d0ac21e..fa9dd409e 100644 --- a/bklog/apps/log_search/handlers/search/async_export_handlers.py +++ b/bklog/apps/log_search/handlers/search/async_export_handlers.py @@ -31,6 +31,7 @@ MAX_GET_ATTENTION_SIZE, ExportStatus, ExportType, + IndexSetType, ) from apps.log_search.exceptions import ( MissAsyncExportException, @@ -51,9 +52,17 @@ class AsyncExportHandlers(object): - def __init__(self, index_set_id: int, bk_biz_id, search_dict: dict = None, export_fields=None): + def __init__( + self, + index_set_id: int = None, + bk_biz_id=None, + search_dict: dict = None, + export_fields=None, + index_set_ids: list = None, + ): self.index_set_id = index_set_id self.bk_biz_id = bk_biz_id + self.index_set_ids = index_set_ids if search_dict: self.search_dict = search_dict self.search_handler = SearchHandler( @@ -135,11 +144,16 @@ def _get_search_url(self): ) return search_url - def get_export_history(self, request, view, show_all=False): - # 这里当show_all为true的时候则给前端返回当前业务全部导出历史 - query_set = AsyncTask.objects.filter(bk_biz_id=self.bk_biz_id) - if not show_all: - query_set = query_set.filter(index_set_id=self.index_set_id) + def get_export_history(self, request, view, show_all=False, is_union_search=False): + if is_union_search: + query_set = AsyncTask.objects.filter(bk_biz_id=self.bk_biz_id, index_set_type=IndexSetType.UNION.value) + if not show_all: + query_set = query_set.filter(index_set_ids=self.index_set_ids) + else: + # 这里当show_all为true的时候则给前端返回当前业务全部导出历史 + query_set = AsyncTask.objects.filter(bk_biz_id=self.bk_biz_id, index_set_type=IndexSetType.SINGLE.value) + if not show_all: + query_set = query_set.filter(index_set_id=self.index_set_id) pg = DataPageNumberPagination() page_export_task_history = pg.paginate_queryset( queryset=query_set.order_by("-created_at", "created_by"), request=request, view=view @@ -149,21 +163,23 @@ def get_export_history(self, request, view, show_all=False): ) res = pg.get_paginated_response( [ - self.generate_export_history(model_to_dict(history), index_set_retention) + self.generate_export_history( + model_to_dict(history), index_set_retention, is_union_search=is_union_search + ) for history in page_export_task_history ] ) return res @classmethod - def generate_export_history(cls, export_task_history, index_set_retention): + def generate_export_history(cls, export_task_history, index_set_retention, is_union_search=False): download_able = cls.judge_download_able(export_task_history["export_status"]) retry_able = cls.judge_retry_able( export_task_history["end_time"], retention=index_set_retention.get(export_task_history["index_set_id"]) ) - return { + + res = { "id": export_task_history["id"], - "log_index_set_id": export_task_history["index_set_id"], "search_dict": export_task_history["request_param"], "start_time": export_task_history["start_time"], "end_time": export_task_history["end_time"], @@ -178,8 +194,20 @@ def generate_export_history(cls, export_task_history, index_set_retention): "export_completed_at": export_task_history["completed_at"], "download_able": download_able, "retry_able": retry_able, + "index_set_type": export_task_history["index_set_type"], } + if not is_union_search: + res.update({"log_index_set_id": export_task_history["index_set_id"]}) + else: + res.update( + { + "log_index_set_ids": export_task_history["index_set_ids"], + } + ) + + return res + @classmethod def judge_download_able(cls, status): if status == ExportStatus.DOWNLOAD_EXPIRED: diff --git a/bklog/apps/log_search/handlers/search/favorite_handlers.py b/bklog/apps/log_search/handlers/search/favorite_handlers.py index 18e9c5940..8692ed3d1 100644 --- a/bklog/apps/log_search/handlers/search/favorite_handlers.py +++ b/bklog/apps/log_search/handlers/search/favorite_handlers.py @@ -27,7 +27,7 @@ INDEX_SET_NOT_EXISTED, FavoriteGroupType, FavoriteListOrderType, - FavoriteVisibleType, + FavoriteVisibleType, IndexSetType, ) from apps.log_search.exceptions import ( FavoriteAlreadyExistException, @@ -74,25 +74,49 @@ def __init__(self, favorite_id: int = None, space_uid: str = None) -> None: def retrieve(self) -> dict: """收藏详情""" result = model_to_dict(self.data) - if LogIndexSet.objects.filter(index_set_id=result["index_set_id"]).exists(): - result["is_active"] = True - result["index_set_name"] = LogIndexSet.objects.get(index_set_id=result["index_set_id"]).index_set_name + if result["index_set_type"] == IndexSetType.UNION.value: + active_index_set_id_dict = { + i["index_set_id"]: {"index_set_name": i["index_set_name"], "is_active": i["is_active"]} + for i in LogIndexSet.objects.filter(index_set_id__in=result["index_set_ids"]).values( + "index_set_id", "index_set_name", "is_active" + ) + } + is_actives = [] + index_set_names = [] + for index_set_id in result["index_set_ids"]: + if active_index_set_id_dict.get(index_set_id): + is_actives.append(active_index_set_id_dict[index_set_id]["is_active"]) + index_set_names.append(active_index_set_id_dict[index_set_id]["index_set_name"]) + else: + is_actives.append(False) + index_set_names.append(INDEX_SET_NOT_EXISTED) + result["is_actives"] = is_actives + result["index_set_names"] = index_set_names else: - result["is_active"] = False - result["index_set_name"] = INDEX_SET_NOT_EXISTED + if LogIndexSet.objects.filter(index_set_id=result["index_set_id"]).exists(): + result["is_active"] = True + result["index_set_name"] = LogIndexSet.objects.get(index_set_id=result["index_set_id"]).index_set_name + else: + result["is_active"] = False + result["index_set_name"] = INDEX_SET_NOT_EXISTED result["query_string"] = generate_query_string(self.data.params) result["created_at"] = result["created_at"] result["updated_at"] = result["updated_at"] return result - def list_group_favorites(self, order_type: str = FavoriteListOrderType.NAME_ASC.value) -> list: + def list_group_favorites(self, order_type: str = FavoriteListOrderType.NAME_ASC.value, index_set_type: str = IndexSetType.SINGLE.value) -> list: """收藏栏分组后且排序后的收藏列表""" # 获取排序后的分组 groups = FavoriteGroupHandler(space_uid=self.space_uid).list() group_info = {i["id"]: i for i in groups} # 将收藏分组 - favorites = Favorite.get_user_favorite(space_uid=self.space_uid, username=self.username, order_type=order_type) + favorites = Favorite.get_user_favorite( + space_uid=self.space_uid, + username=self.username, + order_type=order_type, + index_set_type=index_set_type + ) favorites_by_group = defaultdict(list) for favorite in favorites: favorites_by_group[favorite["group_id"]].append(favorite) @@ -106,12 +130,17 @@ def list_group_favorites(self, order_type: str = FavoriteListOrderType.NAME_ASC. for group in groups ] - def list_favorites(self, order_type: str = FavoriteListOrderType.NAME_ASC.value) -> list: + def list_favorites(self, order_type: str = FavoriteListOrderType.NAME_ASC.value, index_set_type: str = IndexSetType.SINGLE.value) -> list: """管理界面列出根据name A-Z排序的所有收藏""" # 获取排序后的分组 groups = FavoriteGroupHandler(space_uid=self.space_uid).list() group_info = {i["id"]: i for i in groups} - favorites = Favorite.get_user_favorite(space_uid=self.space_uid, username=self.username, order_type=order_type) + favorites = Favorite.get_user_favorite( + space_uid=self.space_uid, + username=self.username, + order_type=order_type, + index_set_type=index_set_type + ) return [ { "id": fi["id"], @@ -147,6 +176,8 @@ def create_or_update( display_fields: list, index_set_id: int = None, group_id: int = None, + index_set_ids: list = None, + index_set_type: str = IndexSetType.SINGLE.value ) -> dict: # 构建params params = {"ip_chooser": ip_chooser, "addition": addition, "keyword": keyword, "search_fields": search_fields} @@ -200,6 +231,8 @@ def create_or_update( visible_type=visible_type, is_enable_display_fields=is_enable_display_fields, display_fields=display_fields, + index_set_ids=index_set_ids, + index_set_type=index_set_type ) return model_to_dict(self.data) diff --git a/bklog/apps/log_search/handlers/search/mapping_handlers.py b/bklog/apps/log_search/handlers/search/mapping_handlers.py index 7f78bd66b..440757c4a 100644 --- a/bklog/apps/log_search/handlers/search/mapping_handlers.py +++ b/bklog/apps/log_search/handlers/search/mapping_handlers.py @@ -50,7 +50,7 @@ Scenario, UserIndexSetFieldsConfig, ) -from apps.utils.cache import cache_one_minute, cache_ten_minute +from apps.utils.cache import cache_ten_minute from apps.utils.local import get_local_param, get_request_username from apps.utils.time_handler import generate_time_range from django.conf import settings @@ -210,7 +210,7 @@ def get_final_fields(self): fields_list = self._combine_description_field(fields_list) return self._combine_fields(fields_list) - def get_all_fields_by_index_id(self, scope=SearchScopeEnum.DEFAULT.value): + def get_all_fields_by_index_id(self, scope=SearchScopeEnum.DEFAULT.value, is_union_search=False): """ get_all_fields_by_index_id @param scope: @@ -220,6 +220,12 @@ def get_all_fields_by_index_id(self, scope=SearchScopeEnum.DEFAULT.value): # search_context情况,默认只显示log字段 # if scope in CONTEXT_SCOPE: # return self._get_context_fields(final_fields_list) + + # 其它情况 + default_config = self.get_or_create_default_config(scope=scope) + if is_union_search: + return final_fields_list, default_config.display_fields + username = get_request_username() user_index_set_config_obj = UserIndexSetFieldsConfig.get_config( index_set_id=self.index_set_id, username=username, scope=scope @@ -247,8 +253,6 @@ def get_all_fields_by_index_id(self, scope=SearchScopeEnum.DEFAULT.value): final_field["is_display"] = True return final_fields_list, display_fields_list - # 其它情况 - default_config = self.get_or_create_default_config(scope=scope) return final_fields_list, default_config.display_fields @atomic @@ -282,7 +286,7 @@ def get_default_sort_list( default_sort_tag: bool = False, ): """默认字段排序规则""" - time_field = cls._get_time_field(index_set_id) + time_field = cls.get_time_field(index_set_id) if scope in ["trace_detail", "trace_scatter"]: return [[time_field, "asc"]] if default_sort_tag and scenario_id == Scenario.BKDATA: @@ -300,7 +304,7 @@ def get_default_fields(self, scope=SearchScopeEnum.DEFAULT.value): _field["is_display"] = True return final_fields_list, ["log"] return final_fields_list, [] - display_fields_list = [self._get_time_field(self.index_set_id)] + display_fields_list = [self.get_time_field(self.index_set_id)] if self._get_object_field(final_fields_list): display_fields_list.append(self._get_object_field(final_fields_list)) display_fields_list.extend(self._get_text_fields(final_fields_list)) @@ -315,7 +319,7 @@ def get_default_fields(self, scope=SearchScopeEnum.DEFAULT.value): return final_fields_list, display_fields_list @classmethod - def _get_time_field(cls, index_set_id: int): + def get_time_field(cls, index_set_id: int): """获取索引时间字段""" index_set_obj: LogIndexSet = LogIndexSet.objects.filter(index_set_id=index_set_id).first() if index_set_obj.scenario_id in [Scenario.BKDATA, Scenario.LOG]: @@ -360,7 +364,6 @@ def _get_text_fields(self, final_fields_list: list): def _get_mapping(self): return self._get_latest_mapping(index_set_id=self.index_set_id) - @cache_one_minute("latest_mapping_key_{index_set_id}") def _get_latest_mapping(self, *, index_set_id): # noqa start_time, end_time = generate_time_range("1d", "", "", self.time_zone) latest_mapping = BkLogApi.mapping( @@ -875,6 +878,7 @@ def _analyze_path_fields(fields): return _("必须ip或者container_id字段") elif "gseIndex" in fields_list: + if "serverIp" not in fields_list: return _("必须serverIp字段") diff --git a/bklog/apps/log_search/handlers/search/pre_search_handlers.py b/bklog/apps/log_search/handlers/search/pre_search_handlers.py index bcc818baa..73eac6439 100644 --- a/bklog/apps/log_search/handlers/search/pre_search_handlers.py +++ b/bklog/apps/log_search/handlers/search/pre_search_handlers.py @@ -21,7 +21,6 @@ """ from apps.api import BkLogApi -from apps.log_search.constants import FieldDataTypeEnum from apps.log_search.handlers.search.mapping_handlers import MappingHandlers from apps.log_trace.handlers.trace_field_handlers import TraceMappingAdapter from apps.utils.cache import cache_one_day @@ -34,30 +33,23 @@ def __init__(self): @classmethod def pre_check_fields(cls, indices: str, scenario_id: str, storage_cluster_id: int) -> dict: trace_type = None - default_sort_tag: bool = False result_table_id_list: list = indices.split(",") if not result_table_id_list: - return {"default_sort_tag": default_sort_tag, "trace_type": trace_type} + return {"default_sort_tag": False, "trace_type": trace_type} result_table_id, *_ = result_table_id_list # get fields from cache fields = cls._get_fields( result_table_id=result_table_id, scenario_id=scenario_id, storage_cluster_id=storage_cluster_id ) - # 是否包含嵌套字段 - include_nested_fields: bool = [x["field_type"] for x in fields].count(FieldDataTypeEnum.NESTED.value) > 0 # Trace type trace_type = TraceMappingAdapter.adapter(fields) + fields_list: list = [x["field"] for x in fields] if ("gseindex" in fields_list and "_iteration_idx" in fields_list) or ( "gseIndex" in fields_list and "iterationIndex" in fields_list ): - default_sort_tag = True - return { - "default_sort_tag": default_sort_tag, - "trace_type": trace_type, - "fields_from_es": fields, - "include_nested_fields": include_nested_fields, - } + return {"default_sort_tag": True, "trace_type": trace_type, "fields_from_es": fields} + return {"default_sort_tag": False, "trace_type": trace_type, "fields_from_es": fields} @staticmethod @cache_one_day("fields_{result_table_id}") diff --git a/bklog/apps/log_search/handlers/search/search_handlers_esquery.py b/bklog/apps/log_search/handlers/search/search_handlers_esquery.py index 6d9e94633..e1136d813 100644 --- a/bklog/apps/log_search/handlers/search/search_handlers_esquery.py +++ b/bklog/apps/log_search/handlers/search/search_handlers_esquery.py @@ -20,18 +20,22 @@ the project delivered to anyone in the future. """ import copy +import functools import hashlib import json +from operator import itemgetter from typing import Any, Dict, List, Union from apps.api import BcsCcApi, BkLogApi, MonitorApi from apps.api.base import DataApiRetryClass from apps.exceptions import ApiRequestError, ApiResultError from apps.feature_toggle.handlers.toggle import FeatureToggleObject +from apps.iam import ActionEnum, Permission, ResourceEnum from apps.log_clustering.models import ClusteringConfig from apps.log_databus.constants import EtlConfig from apps.log_databus.models import CollectorConfig from apps.log_desensitize.handlers.desensitize import DesensitizeHandler +from apps.log_desensitize.handlers.utils import desensitize_params_init from apps.log_desensitize.models import DesensitizeConfig, DesensitizeFieldConfig from apps.log_search.constants import ( ASYNC_SORTED, @@ -44,7 +48,9 @@ MAX_RESULT_WINDOW, MAX_SEARCH_SIZE, SCROLL, + TIME_FIELD_MULTIPLE_MAPPING, FieldDataTypeEnum, + IndexSetType, OperatorEnum, TimeEnum, TimeFieldTypeEnum, @@ -61,7 +67,10 @@ SearchExceedMaxSizeException, SearchIndexNoTimeFieldException, SearchNotTimeFieldType, + SearchUnKnowTimeField, SearchUnKnowTimeFieldType, + UnionSearchErrorException, + UnionSearchFieldsFailException, ) from apps.log_search.handlers.es.dsl_bkdata_builder import ( DslBkDataCreateSearchContextBody, @@ -88,6 +97,7 @@ from apps.utils.local import get_local_param, get_request_username from apps.utils.log import logger from apps.utils.lucene import generate_query_string +from apps.utils.thread import MultiExecuteFunc from bkm_ipchooser.constants import CommonEnum from django.conf import settings from django.core.cache import cache @@ -146,8 +156,6 @@ def __init__( self.search_dict.update( PreSearchHandlers.pre_check_fields(self.indices, self.scenario_id, self.storage_cluster_id) ) - # 是否包含嵌套字段 - self.include_nested_fields: bool = self.search_dict.get("include_nested_fields", True) # 检索历史记录 self.addition = copy.deepcopy(search_dict.get("addition", [])) @@ -155,7 +163,7 @@ def __init__( self.use_time_range = search_dict.get("use_time_range", True) # 构建时间字段 - self.time_field, self.time_field_type, self.time_field_unit = self.init_time_field( + self.time_field, self.time_field_type, self.time_field_unit = self._init_time_field( index_set_id, self.scenario_id ) if not self.time_field: @@ -244,7 +252,7 @@ def __init__( self.request_username = get_request_username() # 透传脱敏配置 - self.desensitize_config_list = self.search_dict.get("desensitize_configs", []) + desensitize_configs = self.search_dict.get("desensitize_configs", []) # 初始化DB脱敏配置 desensitize_config_obj = DesensitizeConfig.objects.filter(index_set_id=self.index_set_id).first() @@ -259,18 +267,17 @@ def __init__( "rule_id": field_config_obj.rule_id or 0, "operator": field_config_obj.operator, "params": field_config_obj.params, - "match_pattern": field_config_obj.match_pattern, - "sort_index": field_config_obj.sort_index, } for field_config_obj in desensitize_field_config_objs ] - if field_configs: - self.desensitize_config_list.extend(field_configs) - # 初始化脱敏工厂对象 - self.desensitize_handler = DesensitizeHandler(self.desensitize_config_list) + desensitize_configs.extend(field_configs) + + # 初始化脱敏工厂参数 + self.desensitize_config_list = desensitize_params_init(desensitize_configs=desensitize_configs) def fields(self, scope="default"): + is_union_search = self.search_dict.get("is_union_search", False) mapping_handlers = MappingHandlers( self.indices, self.index_set_id, @@ -280,8 +287,13 @@ def fields(self, scope="default"): start_time=self.start_time, end_time=self.end_time, ) - field_result, display_fields = mapping_handlers.get_all_fields_by_index_id(scope=scope) - sort_list: list = MappingHandlers.get_sort_list_by_index_id(index_set_id=self.index_set_id, scope=scope) + field_result, display_fields = mapping_handlers.get_all_fields_by_index_id( + scope=scope, is_union_search=is_union_search + ) + if not is_union_search: + sort_list: list = MappingHandlers.get_sort_list_by_index_id(index_set_id=self.index_set_id, scope=scope) + else: + sort_list = list() # 校验sort_list字段是否存在 field_result_list = [i["field_name"] for i in field_result] @@ -299,6 +311,10 @@ def fields(self, scope="default"): "time_field_unit": self.time_field_unit, "config": [], } + + if is_union_search: + return result_dict + for fields_config in [ self.bcs_web_console(field_result_list), self.bk_log_to_trace(), @@ -489,7 +505,6 @@ def search(self, search_type="default"): "time_field_unit": self.time_field_unit, "scroll": self.scroll, "collapse": self.collapse, - "include_nested_fields": self.include_nested_fields, } ) except ApiResultError as e: @@ -508,7 +523,9 @@ def search(self, search_type="default"): # 保存检索历史,按用户、索引集、检索条件缓存5分钟 # 保存首页检索和trace通用查询检索历史 - if search_type: + # 联合检索不保存单个索引集的检索历史 + is_union_search = self.search_dict.get("is_union_search", False) + if search_type and not is_union_search: self._save_history(result, search_type) # 补充scroll id @@ -587,6 +604,7 @@ def _can_scroll(self, result) -> bool: ) def _scroll(self, search_result): + scroll_result = copy.deepcopy(search_result) scroll_size = len(scroll_result["hits"]["hits"]) result_size = len(search_result["hits"]["hits"]) @@ -784,30 +802,49 @@ def _get_cache_key(basic_key, params): return cache_key @staticmethod - def search_history(index_set_id=None, **kwargs): + def search_history(index_set_id=None, index_set_ids=None, is_union_search=False, **kwargs): """ search_history @param index_set_id: + @param is_union_search: + @param index_set_ids: @param kwargs: @return: """ username = get_request_username() - if index_set_id: - history_obj = ( - UserIndexSetSearchHistory.objects.filter( - is_deleted=False, created_by=username, index_set_id=index_set_id, search_type="default" + if not is_union_search: + if index_set_id: + history_obj = ( + UserIndexSetSearchHistory.objects.filter( + is_deleted=False, + created_by=username, + index_set_id=index_set_id, + search_type="default", + index_set_type=IndexSetType.SINGLE.value, + ) + .order_by("-rank", "-created_at")[:10] + .values("id", "params") + ) + else: + history_obj = ( + UserIndexSetSearchHistory.objects.filter( + is_deleted=False, + search_type="default", + created_at__range=[kwargs["start_time"], kwargs["end_time"]], + index_set_type=IndexSetType.SINGLE.value, + ) + .order_by("created_by", "-created_at") + .values("id", "params", "created_by", "created_at") ) - .order_by("-rank", "-created_at")[:10] - .values("id", "params") - ) else: history_obj = ( UserIndexSetSearchHistory.objects.filter( is_deleted=False, search_type="default", - created_at__range=[kwargs["start_time"], kwargs["end_time"]], + index_set_ids=index_set_ids, + index_set_type=IndexSetType.UNION.value, ) - .order_by("created_by", "-created_at") + .order_by("-rank", "-created_at")[:10] .values("id", "params", "created_by", "created_at") ) history_obj = SearchHandler._deal_repeat_history(history_obj) @@ -1081,10 +1118,7 @@ def _init_indices_str(self, index_set_id: int) -> str: ) raise BaseSearchIndexSetException(BaseSearchIndexSetException.MESSAGE.format(index_set_id=index_set_id)) - @staticmethod - def init_time_field(index_set_id: int, scenario_id: str = None) -> tuple: - if not scenario_id: - scenario_id = LogIndexSet.objects.filter(index_set_id=index_set_id).first().scenario_id + def _init_time_field(self, index_set_id: int, scenario_id: str) -> tuple: # get timestamp field if scenario_id in [Scenario.BKDATA, Scenario.LOG]: return "dtEventTimeStamp", TimeFieldTypeEnum.DATE.value, TimeFieldUnitEnum.SECOND.value @@ -1105,16 +1139,20 @@ def _init_sort(self) -> list: index_set_id = self.search_dict.get("index_set_id") # 获取用户对sort的排序需求 sort_list: List = self.search_dict.get("sort_list", []) + is_union_search = self.search_dict.get("is_union_search", False) + if sort_list: return sort_list - # 用户已设置排序规则 - username = get_request_username() + + # 用户已设置排序规则 (联合检索时不使用用户在单个索引集上设置的排序规则) scope = self.search_dict.get("search_type", "default") - config_obj = UserIndexSetFieldsConfig.get_config(index_set_id=index_set_id, username=username, scope=scope) - if config_obj: - sort_list = config_obj.sort_list - if sort_list: - return sort_list + if not is_union_search: + username = get_request_username() + config_obj = UserIndexSetFieldsConfig.get_config(index_set_id=index_set_id, username=username, scope=scope) + if config_obj: + sort_list = config_obj.sort_list + if sort_list: + return sort_list # 安全措施, 用户未设置排序规则,且未创建默认配置时, 使用默认排序规则 from apps.log_search.handlers.search.mapping_handlers import MappingHandlers @@ -1344,6 +1382,9 @@ def _deal_query_result(self, result_dict: dict) -> dict: # 脱敏处理 if self.desensitize_config_list: log = self._log_desensitize(log) + # 联合检索补充索引集信息 + if self.search_dict.get("is_union_search", False): + log["__index_set_id__"] = self.index_set_id log = self._add_cmdb_fields(log) if self.export_fields: new_origin_log = {} @@ -1365,7 +1406,8 @@ def _deal_query_result(self, result_dict: dict) -> dict: log_list.append(log) continue if not self.desensitize_config_list: - log = self._deal_object_highlight(log=log, highlight=hit["highlight"]) + for key in hit["highlight"]: + log[key] = "".join(hit["highlight"][key]) log_list.append(log) result.update( @@ -1381,27 +1423,6 @@ def _deal_query_result(self, result_dict: dict) -> dict: result.update({"aggs": agg_dict}) return result - @staticmethod - def nested_dict_from_dotted_key(dotted_dict: Dict[str, Any]) -> Dict[str, Any]: - result = {} - for key, value in dotted_dict.items(): - parts = key.split('.') - current_level = result - for part in parts[:-1]: - if part not in current_level: - current_level[part] = {} - current_level = current_level[part] - current_level[parts[-1]] = "".join(value) - return result - - def _deal_object_highlight(self, log: Dict[str, Any], highlight: Dict[str, Any]) -> Dict[str, Any]: - """ - 兼容Object类型字段的高亮 - ES层会返回打平后的高亮字段, 该函数将其高亮的字段更新至对应Object字段 - """ - log.update(self.nested_dict_from_dotted_key(dotted_dict=highlight)) - return log - def _log_desensitize(self, log: dict = None): """ 字段脱敏 @@ -1413,7 +1434,7 @@ def _log_desensitize(self, log: dict = None): log_content_tmp = copy.deepcopy(log) # 字段脱敏处理 - log = self.desensitize_handler.transform_dict(log) + log = DesensitizeHandler(self.desensitize_config_list).transform_dict(log) # 处理原文字段 if not self.text_fields: @@ -1424,8 +1445,8 @@ def _log_desensitize(self, log: dict = None): if text_field not in log.keys(): continue - for _config in self.desensitize_config_list: - field_name = _config["field_name"] + for entity in self.desensitize_config_list: + field_name = entity.field_name if field_name not in log.keys(): continue log[text_field] = log[text_field].replace(str(log_content_tmp[field_name]), str(log[field_name])) @@ -1487,6 +1508,7 @@ def _analyze_context_result( mark_gseIndex: int = None # pylint: disable=invalid-name ) -> Dict[str, Any]: + log_list_reversed: list = log_list if self.start < 0: log_list_reversed = list(reversed(log_list)) @@ -1690,3 +1712,307 @@ def _get_user_sorted_list(self, sorted_fields): user_sort_list.append([sorted_field, ASYNC_SORTED]) return user_sort_list + + +class UnionSearchHandler(object): + """ + 联合检索 + """ + + def __init__(self, search_dict=None): + if search_dict is None: + search_dict = {} + self.search_dict = search_dict + self.union_configs = search_dict.get("union_configs", []) + self.sort_list = search_dict.get("sort_list", []) + if search_dict.get("index_set_ids", []): + self.index_set_ids = list(set(search_dict["index_set_ids"])) + else: + self.index_set_ids = list({info["index_set_id"] for info in self.union_configs}) + + def _init_sort_list(self, index_set_id): + sort_list = self.search_dict.get("sort_list", []) + if not sort_list: + return sort_list + + new_sort_list = list() + # 判断是否指定了自定义的时间字段 + for sort_info in copy.deepcopy(sort_list): + _time_field, _sort = sort_info + if _time_field == "unionSearchTimeStamp": + sort_info[0] = MappingHandlers.get_time_field(index_set_id=index_set_id) + new_sort_list.append(sort_info) + + return new_sort_list + + def union_search(self, is_export=False): + + index_set_objs = LogIndexSet.objects.filter(index_set_id__in=self.index_set_ids) + if not index_set_objs: + raise BaseSearchIndexSetException( + BaseSearchIndexSetException.MESSAGE.format(index_set_id=self.index_set_ids) + ) + + # 权限校验逻辑 + self._iam_check() + + index_set_obj_mapping = {obj.index_set_id: obj for obj in index_set_objs} + + # 构建请求参数 + params = { + "ip_chooser": self.search_dict.get("ip_chooser"), + "bk_biz_id": self.search_dict.get("bk_biz_id"), + "addition": self.search_dict.get("addition"), + "start_time": self.search_dict.get("start_time"), + "end_time": self.search_dict.get("end_time"), + "time_range": self.search_dict.get("time_range"), + "keyword": self.search_dict.get("keyword"), + "size": self.search_dict.get("size"), + "is_union_search": True, + } + + multi_execute_func = MultiExecuteFunc() + if is_export: + for index_set_id in self.index_set_ids: + search_dict = copy.deepcopy(params) + search_dict["begin"] = self.search_dict.get("begin", 0) + search_dict["sort_list"] = self._init_sort_list(index_set_id=index_set_id) + search_handler = SearchHandler( + index_set_id=index_set_id, + search_dict=search_dict, + export_fields=self.search_dict.get("export_fields", []), + ) + multi_execute_func.append(f"union_search_{index_set_id}", search_handler.search) + else: + for union_config in self.union_configs: + search_dict = copy.deepcopy(params) + search_dict["begin"] = union_config.get("begin", 0) + search_dict["sort_list"] = self._init_sort_list(index_set_id=union_config["index_set_id"]) + search_handler = SearchHandler(index_set_id=union_config["index_set_id"], search_dict=search_dict) + multi_execute_func.append(f"union_search_{union_config['index_set_id']}", search_handler.search) + + # 执行线程 + multi_result = multi_execute_func.run() + + if not multi_result: + raise UnionSearchErrorException() + + # 处理返回结果 + result_log_list = list() + result_origin_log_list = list() + total = 0 + took = 0 + for index_set_id in self.index_set_ids: + ret = multi_result.get(f"union_search_{index_set_id}") + result_log_list.extend(ret["list"]) + result_origin_log_list.extend(ret["origin_log_list"]) + total += int(ret["total"]) + took = max(took, ret["took"]) + + # 数据排序处理 兼容第三方ES检索排序 + time_fields = set() + time_fields_type = set() + time_fields_unit = set() + for index_set_obj in index_set_objs: + if not index_set_obj.time_field or not index_set_obj.time_field_type or not index_set_obj.time_field_unit: + raise SearchUnKnowTimeField() + time_fields.add(index_set_obj.time_field) + time_fields_type.add(index_set_obj.time_field_type) + time_fields_unit.add(index_set_obj.time_field_unit) + + is_use_custom_time_field = False + + if len(time_fields) != 1 or len(time_fields_type) != 1 or len(time_fields_unit) != 1: + # 标准化时间字段 + is_use_custom_time_field = True + for info in result_log_list: + index_set_obj = index_set_obj_mapping.get(info["__index_set_id__"]) + num = TIME_FIELD_MULTIPLE_MAPPING.get(index_set_obj.time_field_unit, 1) + info["unionSearchTimeStamp"] = int(info[index_set_obj.time_field]) * num + + for info in result_origin_log_list: + index_set_obj = index_set_obj_mapping.get(info["__index_set_id__"]) + num = TIME_FIELD_MULTIPLE_MAPPING.get(index_set_obj.time_field_unit, 1) + info["unionSearchTimeStamp"] = int(info[index_set_obj.time_field]) * num + + if not self.sort_list: + # 默认使用时间字段排序 + if not is_use_custom_time_field: + # 时间字段相同 直接以相同时间字段为key进行排序 默认为降序 + result_log_list = sorted(result_log_list, key=itemgetter(list(time_fields)[0]), reverse=True) + result_origin_log_list = sorted( + result_origin_log_list, key=itemgetter(list(time_fields)[0]), reverse=True + ) + else: + # 时间字段/时间字段格式/时间字段单位不同 标准化时间字段作为key进行排序 标准字段单位为 millisecond + result_log_list = sorted(result_log_list, key=itemgetter("unionSearchTimeStamp"), reverse=True) + result_origin_log_list = sorted( + result_origin_log_list, key=itemgetter("unionSearchTimeStamp"), reverse=True + ) + else: + result_log_list = sorted(result_log_list, key=functools.cmp_to_key(self._sort_compare)) + result_origin_log_list = sorted(result_origin_log_list, key=functools.cmp_to_key(self._sort_compare)) + + # 处理分页 + result_log_list = result_log_list[: self.search_dict.get("size")] + result_origin_log_list = result_origin_log_list[: self.search_dict.get("size")] + + # 日志导出提前返回 + if is_export: + return {"origin_log_list": result_origin_log_list} + + # 统计返回的数据中各个索引集分别占了多少条数据 用于下次begin查询 + result_log_index_set_ids = [result_log["__index_set_id__"] for result_log in result_log_list] + + for union_config in self.union_configs: + union_config["begin"] = union_config["begin"] + result_log_index_set_ids.count(union_config["index_set_id"]) + + res = { + "total": total, + "took": took, + "list": result_log_list, + "origin_log_list": result_origin_log_list, + "union_configs": self.union_configs, + } + + # 保存联合检索检索历史 + self._save_union_search_history(res) + + return res + + def _sort_compare(self, x, y): + """ + 排序比较函数 + """ + for sort_info in self.sort_list: + field_name, order = sort_info + if x[field_name] != y[field_name]: + if order == "desc": + return (x[field_name] < y[field_name]) - (x[field_name] > y[field_name]) + else: + return (x[field_name] > y[field_name]) - (x[field_name] < y[field_name]) + return 0 + + def _iam_check(self): + """ + 权限校验逻辑 要求拥有所有索引集检索权限 + """ + if settings.IGNORE_IAM_PERMISSION: + return True + client = Permission() + resources = [{"type": ResourceEnum.INDICES.id, "id": index_set_id} for index_set_id in self.index_set_ids] + resources = client.batch_make_resource(resources) + is_allowed = client.is_allowed(ActionEnum.SEARCH_LOG.id, resources, raise_exception=True) + return is_allowed + + def _save_union_search_history(self, result, search_type="default"): + params = { + "keyword": self.search_dict.get("keyword"), + "ip_chooser": self.search_dict.get("ip_chooser"), + "addition": self.search_dict.get("addition"), + "start_time": self.search_dict.get("start_time"), + "end_time": self.search_dict.get("end_time"), + "time_range": self.search_dict.get("time_range"), + } + + result.update( + { + "union_search_history_obj": { + "params": params, + "index_set_ids": sorted(self.index_set_ids), + "search_type": search_type, + } + } + ) + + return result + + @staticmethod + def union_search_fields(data): + """ + 获取字段mapping信息 + """ + index_set_ids = data.get("index_set_ids") + start_time = data.get("start_time", "") + end_time = data.get("end_time", "") + + index_set_objs = LogIndexSet.objects.filter(index_set_id__in=index_set_ids) + + if not index_set_objs: + raise BaseSearchIndexSetException(BaseSearchIndexSetException.MESSAGE.format(index_set_id=index_set_ids)) + + multi_execute_func = MultiExecuteFunc() + + # 构建请求参数 + params = {"start_time": start_time, "end_time": end_time, "is_union_search": True} + + for index_set_id in index_set_ids: + + search_handler = SearchHandler(index_set_id, params) + multi_execute_func.append(f"union_search_fields_{index_set_id}", search_handler.fields) + + multi_result = multi_execute_func.run() + + if not multi_result: + raise UnionSearchFieldsFailException() + + # 处理返回结果 + total_fields = list() + fields_info = dict() + union_field_names = list() + union_display_fields = list() + union_time_fields = set() + union_time_fields_type = set() + union_time_fields_unit = set() + for index_set_id in index_set_ids: + result = multi_result[f"union_search_fields_{index_set_id}"] + fields = result["fields"] + fields_info[index_set_id] = fields + display_fields = result["display_fields"] + for field_info in fields: + field_name = field_info["field_name"] + field_type = field_info["field_type"] + if field_name not in union_field_names: + total_fields.append(field_info) + union_field_names.append(field_info["field_name"]) + else: + # 判断字段类型是否一致 不一致则标记为类型冲突 + _index = union_field_names.index(field_name) + if field_type != total_fields[_index]["field_type"]: + total_fields[_index]["field_type"] = "conflict" + + # 处理默认显示字段 + union_display_fields.extend(display_fields) + + # 处理公共的默认显示字段 + union_display_fields = list( + {display_field for display_field in union_display_fields if union_display_fields.count(display_field) > 1} + ) + + # 处理时间字段 + for index_set_obj in index_set_objs: + if not index_set_obj.time_field or not index_set_obj.time_field_type or not index_set_obj.time_field_unit: + raise SearchUnKnowTimeField() + union_time_fields.add(index_set_obj.time_field) + union_time_fields_type.add(index_set_obj.time_field_type) + union_time_fields_unit.add(index_set_obj.time_field_unit) + + # 处理公共的时间字段 + if len(union_time_fields) != 1 or len(union_time_fields_type) != 1 or len(union_time_fields_unit) != 1: + time_field = "unionSearchTimeStamp" + time_field_type = "date" + time_field_unit = "millisecond" + else: + time_field = list(union_time_fields)[0] + time_field_type = list(union_time_fields_type)[0] + time_field_unit = list(union_time_fields_unit)[0] + + ret = { + "fields": total_fields, + "fields_info": fields_info, + "display_fields": union_display_fields, + "time_field": time_field, + "time_field_type": time_field_type, + "time_field_unit": time_field_unit, + } + return ret diff --git a/bklog/apps/log_search/migrations/0068_auto_20230802_1608.py b/bklog/apps/log_search/migrations/0068_auto_20230802_1608.py new file mode 100644 index 000000000..7a96a7605 --- /dev/null +++ b/bklog/apps/log_search/migrations/0068_auto_20230802_1608.py @@ -0,0 +1,48 @@ +# Generated by Django 3.2.15 on 2023-08-02 08:08 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('log_search', '0067_auto_20230620_1609'), + ] + + operations = [ + migrations.AddField( + model_name='asynctask', + name='index_set_ids', + field=models.JSONField(default=list, null=True, verbose_name='索引集ID列表'), + ), + migrations.AddField( + model_name='asynctask', + name='index_set_type', + field=models.CharField(choices=[('single', '单索引集'), ('union', '联合索引集')], default='single', max_length=32, verbose_name='索引集类型'), + ), + migrations.AddField( + model_name='userindexsetsearchhistory', + name='index_set_ids', + field=models.JSONField(default=list, null=True, verbose_name='索引集ID列表'), + ), + migrations.AddField( + model_name='userindexsetsearchhistory', + name='index_set_type', + field=models.CharField(choices=[('single', '单索引集'), ('union', '联合索引集')], default='single', max_length=32, verbose_name='索引集类型'), + ), + migrations.AlterField( + model_name='asynctask', + name='index_set_id', + field=models.IntegerField(blank=True, null=True, verbose_name='索引集id'), + ), + migrations.AlterField( + model_name='asynctask', + name='scenario_id', + field=models.CharField(blank=True, max_length=64, null=True, verbose_name='接入场景'), + ), + migrations.AlterField( + model_name='userindexsetsearchhistory', + name='index_set_id', + field=models.IntegerField(default=None, null=True, verbose_name='索引集ID'), + ), + ] diff --git a/bklog/apps/log_search/migrations/0069_auto_20230803_1741.py b/bklog/apps/log_search/migrations/0069_auto_20230803_1741.py new file mode 100644 index 000000000..09ac92646 --- /dev/null +++ b/bklog/apps/log_search/migrations/0069_auto_20230803_1741.py @@ -0,0 +1,28 @@ +# Generated by Django 3.2.15 on 2023-08-03 09:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('log_search', '0068_auto_20230802_1608'), + ] + + operations = [ + migrations.AddField( + model_name='favorite', + name='index_set_ids', + field=models.JSONField(default=list, null=True, verbose_name='索引集ID列表'), + ), + migrations.AddField( + model_name='favorite', + name='index_set_type', + field=models.CharField(choices=[('single', '单索引集'), ('union', '联合索引集')], default='single', max_length=32, verbose_name='索引集类型'), + ), + migrations.AlterField( + model_name='favorite', + name='index_set_id', + field=models.IntegerField(default=None, null=True, verbose_name='索引集ID'), + ), + ] diff --git a/bklog/apps/log_search/models.py b/bklog/apps/log_search/models.py index ed0eb6519..9fb8d8a24 100644 --- a/bklog/apps/log_search/models.py +++ b/bklog/apps/log_search/models.py @@ -48,6 +48,7 @@ FieldDateFormatEnum, GlobalCategoriesEnum, GlobalTypeEnum, + IndexSetType, InnerTag, SearchScopeEnum, SeparatorEnum, @@ -163,7 +164,6 @@ def get_configs(cls) -> dict: ) # Cookie域名 configs[GlobalTypeEnum.BK_DOMAIN.value] = settings.BK_DOMAIN - configs[GlobalTypeEnum.RETAIN_EXTRA_JSON.value] = settings.RETAIN_EXTRA_JSON return configs class Meta: @@ -477,14 +477,14 @@ def get_indexes(self, has_applied=None, project_info=True): ] @classmethod - def get_index_set(cls, index_set_ids=None, scenarios=None, space_uids=None, is_trace_log=False, show_indices=True): + def get_index_set(cls, index_set_ids=None, scenarios=None, space_uid=None, is_trace_log=False, show_indices=True): qs = cls.objects.filter(is_active=True) if index_set_ids: qs = qs.filter(index_set_id__in=index_set_ids) if scenarios and isinstance(scenarios, list): qs = qs.filter(scenario_id__in=scenarios) - if space_uids: - qs = qs.filter(space_uid__in=space_uids) + if space_uid: + qs = qs.filter(space_uid=space_uid) if is_trace_log: qs = qs.filter(is_trace_log=is_trace_log) @@ -676,11 +676,15 @@ class Meta: class UserIndexSetSearchHistory(SoftDeleteModel): - index_set_id = models.IntegerField(_("索引集ID")) + index_set_id = models.IntegerField(_("索引集ID"), null=True, default=None) params = JsonField(_("检索条件"), null=True, default=None) search_type = models.CharField(_("检索类型"), max_length=32, default="default") duration = models.FloatField(_("查询耗时"), null=True, default=None) rank = models.IntegerField(_("排序"), default=0) + index_set_ids = models.JSONField(_("索引集ID列表"), null=True, default=list) + index_set_type = models.CharField( + _("索引集类型"), max_length=32, choices=IndexSetType.get_choices(), default=IndexSetType.SINGLE.value + ) class Meta: verbose_name = _("索引集用户检索记录") @@ -777,13 +781,17 @@ class FavoriteSearch(SoftDeleteModel): class Favorite(OperateRecordModel): space_uid = models.CharField(_("空间唯一标识"), blank=True, default="", max_length=256, db_index=True) - index_set_id = models.IntegerField(_("索引集ID")) + index_set_id = models.IntegerField(_("索引集ID"), null=True, default=None) name = models.CharField(_("收藏名称"), max_length=255) group_id = models.IntegerField(_("收藏组ID"), db_index=True) params = JsonField(_("检索条件"), null=True, default=None) visible_type = models.CharField(_("可见类型"), max_length=64, choices=FavoriteVisibleType.get_choices()) # 个人 | 公开 is_enable_display_fields = models.BooleanField(_("是否同时显示字段"), default=False) display_fields = models.JSONField(_("显示字段"), blank=True, default=None) + index_set_ids = models.JSONField(_("索引集ID列表"), null=True, default=list) + index_set_type = models.CharField( + _("索引集类型"), max_length=32, choices=IndexSetType.get_choices(), default=IndexSetType.SINGLE.value + ) class Meta: verbose_name = _("检索收藏") @@ -793,12 +801,21 @@ class Meta: @classmethod def get_user_favorite( - cls, space_uid: str, username: str, order_type: str = FavoriteListOrderType.NAME_ASC.value + cls, + space_uid: str, + username: str, + order_type: str = FavoriteListOrderType.NAME_ASC.value, + index_set_type: str = IndexSetType.SINGLE.value, ) -> list: favorites = [] qs = cls.objects.filter( - Q(space_uid=space_uid, created_by=username, visible_type=FavoriteVisibleType.PRIVATE.value) - | Q(space_uid=space_uid, visible_type=FavoriteVisibleType.PUBLIC.value) + Q( + space_uid=space_uid, + created_by=username, + visible_type=FavoriteVisibleType.PRIVATE.value, + index_set_type=index_set_type, + ) + | Q(space_uid=space_uid, visible_type=FavoriteVisibleType.PUBLIC.value, index_set_type=index_set_type) ) if order_type == FavoriteListOrderType.NAME_ASC.value: qs = qs.order_by("name") @@ -807,7 +824,13 @@ def get_user_favorite( else: qs = qs.order_by("-updated_at") - index_set_id_list = list(qs.all().values_list("index_set_id", flat=True).distinct()) + if index_set_type == IndexSetType.SINGLE.value: + index_set_id_list = list(qs.all().values_list("index_set_id", flat=True).distinct()) + else: + index_set_id_list = list() + for obj in qs.all(): + index_set_id_list.extend(obj.index_set_ids) + index_set_id_list = list(set(index_set_id_list)) active_index_set_id_dict = { i["index_set_id"]: {"index_set_name": i["index_set_name"], "is_active": i["is_active"]} for i in LogIndexSet.objects.filter(index_set_id__in=index_set_id_list).values( @@ -816,12 +839,25 @@ def get_user_favorite( } for fi in qs.all(): fi_dict = model_to_dict(fi) - if active_index_set_id_dict.get(fi.index_set_id): - fi_dict["is_active"] = active_index_set_id_dict[fi.index_set_id]["is_active"] - fi_dict["index_set_name"] = active_index_set_id_dict[fi.index_set_id]["index_set_name"] + if index_set_type == IndexSetType.SINGLE.value: + if active_index_set_id_dict.get(fi.index_set_id): + fi_dict["is_active"] = active_index_set_id_dict[fi.index_set_id]["is_active"] + fi_dict["index_set_name"] = active_index_set_id_dict[fi.index_set_id]["index_set_name"] + else: + fi_dict["is_active"] = False + fi_dict["index_set_name"] = INDEX_SET_NOT_EXISTED else: - fi_dict["is_active"] = False - fi_dict["index_set_name"] = INDEX_SET_NOT_EXISTED + is_actives = [] + index_set_names = [] + for index_set_id in fi.index_set_ids: + if active_index_set_id_dict.get(index_set_id): + is_actives.append(active_index_set_id_dict[index_set_id]["is_active"]) + index_set_names.append(active_index_set_id_dict[index_set_id]["index_set_name"]) + else: + is_actives.append(False) + index_set_names.append(INDEX_SET_NOT_EXISTED) + fi_dict["is_actives"] = is_actives + fi_dict["index_set_names"] = index_set_names fi_dict["created_at"] = fi_dict["created_at"] fi_dict["updated_at"] = fi_dict["updated_at"] favorites.append(fi_dict) @@ -966,8 +1002,8 @@ class AsyncTask(OperateRecordModel): request_param = models.JSONField(_("检索请求参数")) sorted_param = models.JSONField(_("异步导出排序字段"), null=True, blank=True) - scenario_id = models.CharField(_("接入场景"), max_length=64) - index_set_id = models.IntegerField(_("索引集id")) + scenario_id = models.CharField(_("接入场景"), max_length=64, null=True, blank=True) + index_set_id = models.IntegerField(_("索引集id"), null=True, blank=True) result = models.BooleanField(_("异步导出结果"), default=False) failed_reason = models.TextField(_("异步导出异常原因"), null=True, blank=True) file_name = models.CharField(_("文件名"), max_length=256, null=True, blank=True) @@ -980,6 +1016,10 @@ class AsyncTask(OperateRecordModel): export_type = models.CharField(_("导出类型"), max_length=64, null=True, blank=True) bk_biz_id = models.IntegerField(_("业务ID"), null=True, default=None) completed_at = models.DateTimeField(_("任务完成时间"), null=True, blank=True) + index_set_ids = models.JSONField(_("索引集ID列表"), null=True, default=list) + index_set_type = models.CharField( + _("索引集类型"), max_length=32, choices=IndexSetType.get_choices(), default=IndexSetType.SINGLE.value + ) class Meta: db_table = "export_task" diff --git a/bklog/apps/log_search/serializers.py b/bklog/apps/log_search/serializers.py index 64a41c011..0d8d4bad2 100644 --- a/bklog/apps/log_search/serializers.py +++ b/bklog/apps/log_search/serializers.py @@ -33,6 +33,7 @@ from apps.log_search.constants import ( FavoriteListOrderType, FavoriteVisibleType, + IndexSetType, InstanceTypeEnum, TemplateType, ) @@ -156,10 +157,12 @@ class DesensitizeConfigSerializer(serializers.Serializer): """ rule_id = serializers.IntegerField(label=_("脱敏规则ID"), required=False) + field_name = serializers.CharField(label=_("字段名"), required=True) match_pattern = serializers.CharField(label=_("匹配模式"), required=False) operator = serializers.ChoiceField(label=_("脱敏算子"), choices=DesensitizeOperator.get_choices(), required=False) params = serializers.DictField(label=_("脱敏配置参数"), required=False) - state = serializers.CharField(label=_("规则状态"), required=False, default="add") + exclude_rules = serializers.ListField(child=serializers.IntegerField(), required=False) + match_fields = serializers.ListField(child=serializers.IntegerField(), required=False) def validate(self, attrs): attrs = super().validate(attrs) @@ -189,25 +192,8 @@ def validate(self, attrs): return attrs -class DesensitizeConfigsSerializer(serializers.Serializer): - field_name = serializers.CharField(label=_("字段名"), required=True) - rules = serializers.ListField(child=DesensitizeConfigSerializer(), required=True, allow_empty=False) - - def validate(self, attrs): - attrs = super().validate(attrs) - rules = attrs.get("rules") - field_name = attrs.get("field_name") - rule_ids = list() - for rule in rules: - rule_id = rule.get("rule_id") - if rule_id and rule_id in rule_ids: - raise ValidationError(_("【{}】字段绑定了多个相同的规则ID").format(field_name)) - - return attrs - - class CreateOrUpdateDesensitizeConfigSerializer(serializers.Serializer): - field_configs = serializers.ListField(child=DesensitizeConfigsSerializer(), required=True) + field_configs = serializers.ListField(child=DesensitizeConfigSerializer(), required=True) text_fields = serializers.ListField(child=serializers.CharField(), required=False) def validate(self, attrs): @@ -238,7 +224,12 @@ class SearchAttrSerializer(serializers.Serializer): aggs = serializers.DictField(required=False, default=dict) # 支持用户自定义排序 - sort_list = serializers.ListField(required=False, allow_null=True, allow_empty=True) + sort_list = serializers.ListField(required=False, allow_null=True, allow_empty=True, child=serializers.ListField()) + + # 脱敏配置 + desensitize_configs = serializers.ListSerializer( + label=_("脱敏配置"), required=False, child=DesensitizeConfigSerializer(), default=[] + ) is_scroll_search = serializers.BooleanField(label=_("是否scroll查询"), required=False, default=False) @@ -246,16 +237,37 @@ class SearchAttrSerializer(serializers.Serializer): is_return_doc_id = serializers.BooleanField(label=_("是否返回文档ID"), required=False, default=False) - # 脱敏配置 - desensitize_configs = serializers.ListSerializer( - label=_("脱敏配置"), required=False, child=DesensitizeConfigSerializer(), default=[] - ) - def validate(self, attrs): attrs = super().validate(attrs) + + # 校验sort_list + if attrs.get("sort_list"): + for sort_info in attrs.get("sort_list"): + field_name, order = sort_info + if order not in ["desc", "asc"]: + raise ValidationError(_("字段名【{}】的排序规则指定错误, 支持('desc', 降序),('asc', 升序)").format(field_name)) return attrs +class UnionConfigSerializer(serializers.Serializer): + index_set_id = serializers.IntegerField(label=_("索引集ID"), required=True) + begin = serializers.IntegerField(required=False, default=0) + + +class UnionSearchAttrSerializer(SearchAttrSerializer): + union_configs = serializers.ListField( + label=_("联合检索参数"), required=True, allow_empty=False, child=UnionConfigSerializer() + ) + + +class UnionSearchFieldsSerializer(serializers.Serializer): + start_time = serializers.DateTimeField(required=False, format="%Y-%m-%d %H:%M:%S") + end_time = serializers.DateTimeField(required=False, format="%Y-%m-%d %H:%M:%S") + index_set_ids = serializers.ListField( + label=_("索引集ID列表"), required=True, allow_empty=False, child=serializers.IntegerField() + ) + + class UserSearchHistorySerializer(serializers.Serializer): start_time = serializers.DateTimeField(required=False, format="%Y-%m-%d %H:%M:%S") end_time = serializers.DateTimeField(required=False, format="%Y-%m-%d %H:%M:%S") @@ -341,6 +353,16 @@ def validate(self, attrs): {"start_time": start_time.strftime("%Y-%m-%d %H:%M:%S"), "end_time": end_time.strftime("%Y-%m-%d %H:%M:%S")} ) + if export_dict.get("index_set_ids"): + for index_set_id in export_dict.get("index_set_ids"): + try: + int(index_set_id) + except ValueError: + raise ValidationError(_("索引集ID类型错误")) + export_dict["index_set_ids"] = sorted( + [int(index_set_id) for index_set_id in export_dict.get("index_set_ids")] + ) + attrs["export_dict"] = json.dumps(export_dict) return attrs @@ -378,6 +400,42 @@ class GetExportHistorySerializer(serializers.Serializer): bk_biz_id = serializers.IntegerField(label=_("业务id")) +class UnionSearchGetExportHistorySerializer(serializers.Serializer): + page = serializers.IntegerField(label=_("页码")) + pagesize = serializers.IntegerField(label=_("页面大小")) + show_all = serializers.BooleanField(label=_("是否展示业务全量导出历史")) + index_set_ids = serializers.CharField(label=_("联合检索索引集ID列表")) + bk_biz_id = serializers.IntegerField(label=_("业务id")) + + def validate(self, attrs): + + # 索引集ID格式校验 + index_set_ids = attrs["index_set_ids"].split(",") + + for index_set_id in index_set_ids: + try: + int(index_set_id) + except ValueError: + raise ValidationError(_("索引集ID类型错误")) + return attrs + + +class UnionSearchHistorySerializer(serializers.Serializer): + index_set_ids = serializers.CharField(label=_("联合检索索引集ID列表")) + + def validate(self, attrs): + + # 索引集ID格式校验 + index_set_ids = attrs["index_set_ids"].split(",") + + for index_set_id in index_set_ids: + try: + int(index_set_id) + except ValueError: + raise ValidationError(_("索引集ID类型错误")) + return attrs + + class SourceDetectSerializer(serializers.Serializer): es_host = serializers.CharField(label=_("ES HOST")) es_port = serializers.IntegerField(label=_("ES 端口")) @@ -437,7 +495,7 @@ class CreateFavoriteSerializer(serializers.Serializer): space_uid = SpaceUIDField(label=_("空间唯一标识"), required=True) name = serializers.CharField(label=_("收藏组名"), max_length=256, required=True) - index_set_id = serializers.IntegerField(label=_("索引集ID"), required=True) + index_set_id = serializers.IntegerField(label=_("索引集ID"), required=False) group_id = serializers.IntegerField(label=_("收藏组ID"), required=False) visible_type = serializers.ChoiceField(choices=FavoriteVisibleType.get_choices(), required=True) ip_chooser = serializers.DictField(default={}, required=False) @@ -446,11 +504,26 @@ class CreateFavoriteSerializer(serializers.Serializer): search_fields = serializers.ListField(required=False, child=serializers.CharField(), default=[]) is_enable_display_fields = serializers.BooleanField(required=False, default=False) display_fields = serializers.ListField(required=False, child=serializers.CharField(), default=[]) + index_set_ids = serializers.ListField( + label=_("索引集ID列表"), required=False, child=serializers.IntegerField(), default=[] + ) + index_set_type = serializers.ChoiceField( + label=_("索引集类型"), required=False, choices=IndexSetType.get_choices(), default=IndexSetType.SINGLE.value + ) def validate(self, attrs): attrs = super().validate(attrs) if attrs["is_enable_display_fields"] and not attrs["display_fields"]: raise serializers.ValidationError(_("同时显示字段开启时, 显示字段不能为空")) + + if attrs["index_set_type"] == IndexSetType.SINGLE.value and not attrs.get("index_set_id"): + raise serializers.ValidationError(_("索引集ID不能为空")) + elif attrs["index_set_type"] == IndexSetType.UNION.value and not attrs.get("index_set_ids"): + raise serializers.ValidationError(_("索引集ID列表不能为空")) + elif attrs["index_set_type"] == IndexSetType.UNION.value: + # 对index_set_ids排序处理 这里主要是为了兼容前端传递索引集列表ID顺序不一致问题 [1,2] [2,1] ->[1,2] + attrs["index_set_ids"] = sorted(attrs["index_set_ids"]) + return attrs @@ -502,6 +575,9 @@ class FavoriteListSerializer(serializers.Serializer): required=False, default=FavoriteListOrderType.UPDATED_AT_DESC.value, ) + index_set_type = serializers.ChoiceField( + label=_("索引集类型"), required=False, choices=IndexSetType.get_choices(), default=IndexSetType.SINGLE.value + ) class CreateFavoriteGroupSerializer(serializers.Serializer): diff --git a/bklog/apps/log_search/tasks/cmdb.py b/bklog/apps/log_search/tasks/cmdb.py index 74e62ebd6..8d937f201 100644 --- a/bklog/apps/log_search/tasks/cmdb.py +++ b/bklog/apps/log_search/tasks/cmdb.py @@ -22,7 +22,7 @@ from celery.task import periodic_task # noqa -@periodic_task(run_every=crontab(minute="0", hour="*/12")) +@periodic_task(run_every=crontab(hour="*/12")) def refresh_cmdb(): from apps.utils.core.cache.cmdb_host import CmdbHostCache diff --git a/bklog/apps/log_search/tasks/indexsetprecheck.py b/bklog/apps/log_search/tasks/indexsetprecheck.py index cda22083e..b75ddff2d 100644 --- a/bklog/apps/log_search/tasks/indexsetprecheck.py +++ b/bklog/apps/log_search/tasks/indexsetprecheck.py @@ -20,13 +20,12 @@ the project delivered to anyone in the future. """ -from typing import Dict, List - -from apps.api import BkDataMetaApi, TransferApi -from apps.log_search.models import LogIndexSet, LogIndexSetData, Scenario -from apps.utils.log import logger -from celery.schedules import crontab +from typing import List, Dict from celery.task import periodic_task +from celery.schedules import crontab +from apps.utils.log import logger +from apps.log_search.models import LogIndexSetData, LogIndexSet, Scenario +from apps.api import BkDataMetaApi, TransferApi class IndexSetPreCheckIns(object): @@ -240,7 +239,7 @@ def _update_pre_check_ret_to_db(cls, ret_dict: dict): ) -@periodic_task(run_every=crontab(minute="0", hour="*/12")) +@periodic_task(run_every=crontab(hour="*/12")) def index_set_pre_check(): IndexSetPreCheckIns.pre_check_indexset() return None diff --git a/bklog/apps/log_search/tasks/mapping.py b/bklog/apps/log_search/tasks/mapping.py index f6fe42d17..81dc43e3a 100644 --- a/bklog/apps/log_search/tasks/mapping.py +++ b/bklog/apps/log_search/tasks/mapping.py @@ -19,15 +19,16 @@ We undertake not to change the open source license (MIT license) applicable to the current version of the project delivered to anyone in the future. """ +from celery.schedules import crontab +from celery.task import periodic_task, task + +from apps.utils.log import logger from apps.exceptions import ApiResultError from apps.log_search.constants import BkDataErrorCode from apps.log_search.models import LogIndexSet -from apps.utils.log import logger -from celery.schedules import crontab -from celery.task import periodic_task, task -@periodic_task(run_every=crontab(minute="*/10")) +@periodic_task(run_every=crontab(minute="*/5")) def sync_index_set_mapping_snapshot(): logger.info("[sync_index_set_mapping_snapshot] task publish start") index_set_list = LogIndexSet.objects.filter(is_active=True) diff --git a/bklog/apps/log_search/views/aggs_views.py b/bklog/apps/log_search/views/aggs_views.py index 15e19901b..eb7bb19b6 100644 --- a/bklog/apps/log_search/views/aggs_views.py +++ b/bklog/apps/log_search/views/aggs_views.py @@ -23,12 +23,12 @@ from rest_framework import serializers from rest_framework.response import Response -from apps.utils.drf import detail_route +from apps.utils.drf import detail_route, list_route from apps.generic import APIViewSet from apps.iam import ActionEnum, ResourceEnum from apps.iam.handlers.drf import InstanceActionPermission from apps.log_search.handlers.search.aggs_handlers import AggsViewAdapter -from apps.log_trace.serializers import AggsTermsSerializer, DateHistogramSerializer +from apps.log_trace.serializers import AggsTermsSerializer, DateHistogramSerializer, UnionSearchDateHistogramSerializer class AggsViewSet(APIViewSet): @@ -171,3 +171,80 @@ def date_histogram(self, request, index_set_id=None): """ data = self.params_valid(DateHistogramSerializer) return Response(AggsViewAdapter().date_histogram(index_set_id, data)) + + @list_route(methods=["POST"], url_path="aggs/union_search/date_histogram") + def union_search_date_histogram(self, request, *args, **kwargs): + """ + @api {post} /search/index_set/aggs/union_search/date_histogram/ 04_Trace-按照时间聚合 + @apiName date_agg_trace_log + @apiDescription 联合检索生成时间线曲线图 + @apiGroup 17_Trace + @apiParam {String} start_time 开始时间 + @apiParam {String} end_time 结束时间 + @apiParam {String} time_range 时间标识符符["15m", "30m", "1h", "4h", "12h", "1d", "customized"] + @apiParam {List} fields 需要聚合字段 需要请求的聚合字段doc_count [{"term_filed": "tags.result_code"}, + {"term_filed": "tags.local_service", "metric_type": "avg", "metric_field": "duration"}] + @apiParam {String} interval 聚合周期,默认为"auto",后台会具体调整 + @apiParam {String} keyword 搜索关键字 + @apiParam {Json} addition 搜索条件 + @apiParamExample {Json} 请求参数 + { + "keyword": "*", + "time_range": "customized", + "start_time": "2023-07-31 16:02:13", + "end_time": "2023-07-31 16:17:13", + "host_scopes": { + "modules": [], + "ips": "", + "target_nodes": [], + "target_node_type": "" + }, + "ip_chooser": {}, + "addition": [], + "begin": 0, + "size": 500, + "interval": "1m", + "pickerTimeRange": [ + "now-30m", + "now" + ], + "index_set_ids": [146,147] + } + + @apiSuccessExample {json} 成功返回: + { + "result": true, + "data": { + "aggs": { + "group_by_histogram": { + "buckets": [ + { + "key_as_string": "16:05", + "key": 1690790700000, + "doc_count": 6 + }, + { + "key_as_string": "16:06", + "key": 1690790760000, + "doc_count": 3 + }, + { + "key_as_string": "16:07", + "key": 1690790820000, + "doc_count": 3 + }, + { + "key_as_string": "16:08", + "key": 1690790880000, + "doc_count": 3 + } + ] + } + } + }, + "code": 0, + "message": "" + } + """ + data = self.params_valid(UnionSearchDateHistogramSerializer) + return Response(AggsViewAdapter().union_search_date_histogram(data)) diff --git a/bklog/apps/log_search/views/favorite_search_views.py b/bklog/apps/log_search/views/favorite_search_views.py index 26727e662..19f06302f 100644 --- a/bklog/apps/log_search/views/favorite_search_views.py +++ b/bklog/apps/log_search/views/favorite_search_views.py @@ -141,7 +141,10 @@ def list(self, request, *args, **kwargs): } """ data = self.params_valid(FavoriteListSerializer) - return Response(FavoriteHandler(space_uid=data.get("space_uid")).list_favorites(order_type=data["order_type"])) + return Response(FavoriteHandler(space_uid=data.get("space_uid")).list_favorites( + order_type=data["order_type"], + index_set_type=data["index_set_type"] + )) @list_route(methods=["GET"]) def list_by_group(self, request, *args, **kwargs): @@ -196,7 +199,10 @@ def list_by_group(self, request, *args, **kwargs): """ data = self.params_valid(FavoriteListSerializer) return Response( - FavoriteHandler(space_uid=data.get("space_uid")).list_group_favorites(order_type=data["order_type"]) + FavoriteHandler(space_uid=data.get("space_uid")).list_group_favorites( + order_type=data["order_type"], + index_set_type=data["index_set_type"] + ) ) def create(self, request, *args, **kwargs): @@ -279,6 +285,8 @@ def create(self, request, *args, **kwargs): is_enable_display_fields=data["is_enable_display_fields"], display_fields=data["display_fields"], group_id=data["group_id"], + index_set_ids=data["index_set_ids"], + index_set_type=data["index_set_type"], ) return Response(favorite_search) diff --git a/bklog/apps/log_search/views/search_views.py b/bklog/apps/log_search/views/search_views.py index 64b5b28a4..5e2ba3e27 100644 --- a/bklog/apps/log_search/views/search_views.py +++ b/bklog/apps/log_search/views/search_views.py @@ -43,6 +43,7 @@ RESULT_WINDOW_COST_TIME, ExportStatus, ExportType, + IndexSetType, SearchScopeEnum, ) from apps.log_search.decorators import search_history_record @@ -55,6 +56,7 @@ from apps.log_search.handlers.search.search_handlers_esquery import ( SearchHandler as SearchHandlerEsquery, ) +from apps.log_search.handlers.search.search_handlers_esquery import UnionSearchHandler from apps.log_search.models import AsyncTask, LogIndexSet from apps.log_search.permission import Permission from apps.log_search.serializers import ( @@ -66,6 +68,10 @@ SearchExportSerializer, SearchIndexSetScopeSerializer, SearchUserIndexSetConfigSerializer, + UnionSearchAttrSerializer, + UnionSearchFieldsSerializer, + UnionSearchGetExportHistorySerializer, + UnionSearchHistorySerializer, UpdateIndexSetFieldsConfigSerializer, ) from apps.utils.drf import detail_route, list_route @@ -966,3 +972,378 @@ def history(self, request, *args, **kwargs): """ index_set_id = kwargs.get("index_set_id") return Response(SearchHandlerEsquery.search_history(index_set_id)) + + @list_route(methods=["POST"], url_path="union_search") + @search_history_record + def union_search(self, request, *args, **kwargs): + """ + @api {post} /search/index_set/union_search/ 11_联合检索-日志内容 + @apiName union_search_log + @apiGroup 11_Search + @apiParam {String} start_time 开始时间 + @apiParam {String} end_time 结束时间 + @apiParam {String} time_range 时间标识符符["15m", "30m", "1h", "4h", "12h", "1d", "customized"] + @apiParam {String} keyword 搜索关键字 + @apiParam {Json} ip_chooser IP列表 + @apiParam {Array[Json]} addition 搜索条件 + @apiParam {Int} size 条数 + @apiParam {Array[Json]} union_configs 联合检索索引集配置 + @apiParam {Int} union_configs.index_set_id 索引集ID + @apiParam {Int} union_configs.begin 索引对应的滚动条数 + @apiParamExample {Json} 请求参数 + { + "start_time": "2019-06-11 00:00:00", + "end_time": "2019-06-12 11:11:11", + "time_range": "customized" + "keyword": "error", + "host_scopes": { + "modules": [ + { + "bk_obj_id": "module", + "bk_inst_id": 4 + }, + { + "bk_obj_id": "set", + "bk_inst_id": 4 + } + ], + "ips": "127.0.0.1, 127.0.0.2" + }, + "addition": [ + { + "key": "ip", + "method": "is", + "value": "127.0.0.1", + "condition": "and", (默认不传是and,只支持and or) + "type": "field" (默认field 目前支持field,其他无效) + } + ], + "size": 15, + "union_configs": [ + { + "index_set_id": 146, + "begin": 0 + }, + { + "index_set_id": 147, + "begin": 0 + } + ] + } + + @apiSuccessExample {json} 成功返回: + { + "message": "", + "code": 0, + "data": { + "total": 100, + "took": 0.29, + "list": [ + { + "srcDataId": "2087", + "dtEventTimeStamp": 1534825132000, + "moduleName": "公共组件->consul", + "log": "is_cluster-COMMON: ok", + "sequence": 1, + "dtEventTime": "2018-08-21 04:18:52", + "timestamp": 1534825132, + "serverIp": "127.0.0.1", + "errorCode": "0", + "gseIndex": 152358, + "dstDataId": "2087", + "worldId": "-1", + "logTime": "2018-08-21 12:18:52", + "path": "/tmp/health_check.log", + "platId": 0, + "localTime": "2018-08-21 04:18:00" + } + ], + "origin_log_list": [ + { + "srcDataId": "2087", + "dtEventTimeStamp": 1534825132000, + "moduleName": "公共组件->consul", + "log": "is_cluster-COMMON: ok", + "sequence": 1, + "dtEventTime": "2018-08-21 04:18:52", + "timestamp": 1534825132, + "serverIp": "127.0.0.1", + "errorCode": "0", + "gseIndex": 152358, + "dstDataId": "2087", + "worldId": "-1", + "logTime": "2018-08-21 12:18:52", + "path": "/tmp/health_check.log", + "platId": 0, + "localTime": "2018-08-21 04:18:00" + } + ], + "union_configs": [ + { + "index_set_id": 146, + "begin": 7 + }, + { + "index_set_id": 147, + "begin": 3 + } + ] + + }, + "result": true + } + """ + data = self.params_valid(UnionSearchAttrSerializer) + return Response(UnionSearchHandler(data).union_search()) + + @list_route(methods=["POST"], url_path="union_search/fields") + def union_search_fields(self, request, *args, **kwargs): + """ + @api {POST} /search/index_set/union_search/fields/?scope=search_context 联合检索-获取索引集配置 + @apiDescription 联合检索-获取字段Mapping字段信息 + @apiName union_search_fields + @apiGroup 11_Search + @apiParam {String} [start_time] 开始时间(非必填) + @apiParam {String} [end_time] 结束时间(非必填) + @apiParam {Array[Int]} [index_set_ids] 索引集ID + @apiSuccess {String} display_fields 列表页显示的字段 + @apiSuccess {String} fields.field_name 字段名 + @apiSuccess {String} fields.field_alias 字段中文称 (为空时会直接取description) + @apiSuccess {String} fields.description 字段说明 + @apiSuccess {String} fields.field_type 字段类型 + @apiSuccess {Bool} fields.is_display 是否显示给用户 + @apiSuccess {Bool} fields.is_editable 是否可以编辑(是否显示) + @apiSuccess {Bool} fields.es_doc_values 是否聚合字段 + @apiSuccess {Bool} fields.is_analyzed 是否分词字段 + @apiSuccess {String} time_field 时间字段 + @apiSuccess {String} time_field_type 时间字段类型 + @apiSuccess {String} time_field_unit 时间字段单位 + @apiSuccessExample {json} 成功返回: + { + "message": "", + "code": 0, + "data": { + "display_fields": ["dtEventTimeStamp", "log"], + "fields": [ + { + "field_name": "log", + "field_alias": "日志", + "field_type": "text", + "is_display": true, + "is_editable": true, + "description": "日志", + "es_doc_values": false + }, + { + "field_name": "dtEventTimeStamp", + "field_alias": "时间", + "field_type": "date", + "is_display": true, + "is_editable": true, + "description": "描述", + "es_doc_values": true + } + ], + }, + "result": true + } + """ + data = self.params_valid(UnionSearchFieldsSerializer) + return Response(UnionSearchHandler().union_search_fields(data)) + + @list_route(methods=["GET"], url_path="union_search/export") + def union_search_export(self, request, *args, **kwargs): + """ + @api {get} /search/index_set/union_search/export/ 14_联合检索-导出日志 + @apiName search_log_export + @apiGroup 11_Search + @apiParam {Dict} export_dict 序列化后的查询字典 + @apiParam {String} start_time 开始时间 + @apiParam {String} end_time 结束时间 + @apiParam {String} time_range 时间标识符符["15m", "30m", "1h", "4h", "12h", "1d", "customized"] + @apiParam {String} keyword 搜索关键字 + @apiParam {Json} ip IP列表 + @apiParam {Json} addition 搜索条件 + @apiParam {Int} start 起始位置 + @apiParam {Array} index_set_ids 索引集列表 + @apiDescription 直接下载结果 + @apiParamExample {Json} 请求参数 + /search/index_set/union_search/export/ + ?export_dict={"start_time":"2019-06-26 00:00:00","end_time":"2019-06-27 11:11:11","time_range":"customized", + "keyword":"error","host_scopes":{"modules":[{"bk_obj_id":"module","bk_inst_id":4}, + {"bk_obj_id":"set","bk_inst_id":4}],"ips":"127.0.0.1, 127.0.0.2"}, + "addition":[{"field":"ip","operator":"eq","value":[]}],"begin":0,"size":10000,"index_set_ids": [146, 147]} + + @apiSuccessExample text/plain 成功返回: + {"a": "good", "b": {"c": ["d", "e"]}} + {"a": "good", "b": {"c": ["d", "e"]}} + {"a": "good", "b": {"c": ["d", "e"]}} + """ + + params = self.params_valid(SearchExportSerializer).get("export_dict") + data = json.loads(params) + request_data = copy.deepcopy(data) + index_set_ids = sorted(data.get("index_set_ids", [])) + + output = StringIO() + search_handler = UnionSearchHandler(search_dict=data) + result = search_handler.union_search(is_export=True) + result_list = result.get("origin_log_list") + for item in result_list: + output.write(f"{json.dumps(item, ensure_ascii=False)}\n") + response = HttpResponse(output.getvalue()) + response["Content-Type"] = "application/x-msdownload" + + file_name = "bk_log_union_search_{}.txt".format("_".join([str(i) for i in index_set_ids])) + file_name = parse.quote(file_name, encoding="utf8") + file_name = parse.unquote(file_name, encoding="ISO8859_1") + response["Content-Disposition"] = 'attachment;filename="{}"'.format(file_name) + + # 保存下载历史 + AsyncTask.objects.create( + request_param=request_data, + result=True, + completed_at=timezone.now(), + export_status=ExportStatus.SUCCESS, + start_time=data["start_time"], + end_time=data["end_time"], + export_type=ExportType.SYNC, + index_set_ids=index_set_ids, + index_set_type=IndexSetType.UNION.value, + bk_biz_id=data.get("bk_biz_id"), + ) + + return response + + @list_route(methods=["GET"], url_path="union_search/export_history") + def union_search_get_export_history(self, request, *args, **kwargs): + """ + @api {get} /search/index_set/union_search/export_history/?page=1&pagesize=10 联合检索-导出历史 + @apiDescription 联合检索-导出历史 + @apiName export_history + @apiGroup 11_Search + @apiParam {Int} index_set_id 索引集id + @apiParam {Int} page 当前页 + @apiParam {Int} pagesize 页面大小 + @apiParam {Bool} show_all 是否展示所有历史 + @apiParam {String} index_set_ids 索引集ID "146,147" + @apiSuccess {Int} total 返回大小 + @apiSuccess {list} list 返回结果列表 + @apiSuccess {Int} list.id 导出历史任务id + @apiSuccess {Int} list.log_index_set_id 导出索引集id + @apiSuccess {Str} list.search_dict 导出请求参数 + @apiSuccess {Str} list.start_time 导出请求所选择开始时间 + @apiSuccess {Str} list.end_time 导出请求所选择结束时间 + @apiSuccess {Str} list.export_type 导出请求类型 + @apiSuccess {Str} list.export_status 导出状态 + @apiSuccess {Str} list.error_msg 导出请求异常原因 + @apiSuccess {Str} list.download_url 异步导出下载地址 + @apiSuccess {Str} list.export_pkg_name 异步导出打包名 + @apiSuccess {int} list.export_pkg_size 异步导出包大小 单位M + @apiSuccess {Str} list.export_created_at 异步导出创建时间 + @apiSuccess {Str} list.export_created_by 异步导出创建者 + @apiSuccess {Str} list.export_completed_at 异步导出成功时间 + @apiSuccess {Bool} list.download_able 是否可下载(不可下载禁用下载按钮且hover提示"下载链接过期") + @apiSuccess {Bool} list.retry_able 是否可重试(不可重试禁用对应按钮且hover提示"数据源过期") + @apiSuccessExample {json} 成功返回: + { + "result": true, + "data": { + "total": 1, + "list": [ + { + "id": 25, + "search_dict": { + "size": 100, + "begin": 0, + "keyword": "*", + "addition": [], + "end_time": "2023-08-02 17:26:33", + "interval": "auto", + "ip_chooser": {}, + "start_time": "2023-08-02 17:11:33", + "time_range": "customized", + "host_scopes": { + "ips": "", + "modules": [], + "target_nodes": [], + "target_node_type": "" + }, + "export_fields": [], + "index_set_ids": [ + 146, + 147 + ] + }, + "start_time": "2023-08-02 17:11:33", + "end_time": "2023-08-02 17:26:33", + "export_type": "sync", + "export_status": "success", + "error_msg": null, + "download_url": null, + "export_pkg_name": null, + "export_pkg_size": null, + "export_created_at": "2023-08-02T09:32:33.547018Z", + "export_created_by": "admin", + "export_completed_at": "2023-08-02T09:32:32.303892Z", + "download_able": true, + "retry_able": true, + "index_set_type": "union", + "index_set_ids": [ + 146, + 147 + ] + } + ] + }, + "code": 0, + "message": "" + } + """ + data = self.params_valid(UnionSearchGetExportHistorySerializer) + index_set_ids = sorted([int(index_set_id) for index_set_id in data["index_set_ids"].split(",")]) + return AsyncExportHandlers(index_set_ids=index_set_ids, bk_biz_id=data["bk_biz_id"]).get_export_history( + request=request, view=self, show_all=data["show_all"], is_union_search=True + ) + + @list_route(methods=["GET"], url_path="union_search/history") + def union_search_history(self, request, *args, **kwargs): + """ + @api {get} /search/index_set/union_search/history/ 06_搜索-检索历史 + @apiDescription 检索历史记录 + @apiName union_search_index_set_user_history + @apiGroup 11_Search + @apiSuccessExample {json} 成功返回: + { + "message": "", + "code": 0, + "data": [ + { + "id": 13, + "params": { + "keyword": "*", + "host_scopes": { + "modules": [ + { + "bk_inst_id": 25, + "bk_obj_id": "module" + } + ], + "ips": "127.0.0.1,127.0.0.2" + }, + "addition": [ + { + "field": "cloudId", + "operator": "is", + "value": "0" + } + ] + }, + "query_string": "keyword:* ADN modules:25 AND ips:127.0.0.1,127.0.0.2" + }], + "result": true + } + """ + data = self.params_valid(UnionSearchHistorySerializer) + index_set_ids = sorted([int(index_set_id) for index_set_id in data["index_set_ids"].split(",")]) + return Response(SearchHandlerEsquery.search_history(index_set_ids=index_set_ids, is_union_search=True)) diff --git a/bklog/apps/log_trace/serializers.py b/bklog/apps/log_trace/serializers.py index 5264cf186..a5addd61b 100644 --- a/bklog/apps/log_trace/serializers.py +++ b/bklog/apps/log_trace/serializers.py @@ -123,3 +123,6 @@ def validate(self, attrs): fields = serializers.ListField(child=DateHistogramFieldSerializer(), required=False, default=[]) interval = serializers.CharField(required=False, default="auto", max_length=16) + +class UnionSearchDateHistogramSerializer(DateHistogramSerializer): + index_set_ids = serializers.ListField(label=_("索引集ID列表"), required=True, allow_empty=False, child=serializers.IntegerField()) diff --git a/bklog/apps/middleware/api_token_middleware.py b/bklog/apps/middleware/api_token_middleware.py deleted file mode 100644 index 8299aa1f1..000000000 --- a/bklog/apps/middleware/api_token_middleware.py +++ /dev/null @@ -1,44 +0,0 @@ -from apps.log_commons.models import ApiAuthToken -from blueapps.account import get_user_model -from blueapps.account.middlewares import LoginRequiredMiddleware -from django.contrib import auth -from django.contrib.auth.backends import ModelBackend -from django.http import HttpResponseForbidden - - -class ApiTokenAuthBackend(ModelBackend): - def authenticate(self, request, username=None, **kwargs): - try: - user_model = get_user_model() - user, _ = user_model.objects.get_or_create(username=username, defaults={"nickname": username}) - except Exception: - return None - return user - - -class ApiTokenAuthenticationMiddleware(LoginRequiredMiddleware): - def process_view(self, request, view, *args, **kwargs): - if "HTTP_X_BKLOG_SPACE_UID" in request.META and "HTTP_X_BKLOG_TOKEN" in request.META: - space_uid = request.META["HTTP_X_BKLOG_SPACE_UID"] - token = request.META["HTTP_X_BKLOG_TOKEN"] - try: - record = ApiAuthToken.objects.get(token=token, space_uid=space_uid) - except ApiAuthToken.DoesNotExist: - record = None - - if not record: - return HttpResponseForbidden("not valid token") - - if record.is_expired(): - return HttpResponseForbidden("token has expired") - - # grafana、as_code场景权限模式:替换请求用户为令牌创建者 - if record.type.lower() in ["grafana"]: - user = auth.authenticate(username="system") - auth.login(request, user, backend="apps.middleware.api_token_middleware.ApiTokenAuthBackend") - request.skip_check = True - else: - request.token = token - return - - return super(ApiTokenAuthenticationMiddleware, self).process_view(request, view, *args, **kwargs) diff --git a/bklog/apps/models.py b/bklog/apps/models.py index f1e76cd26..2f16713e6 100644 --- a/bklog/apps/models.py +++ b/bklog/apps/models.py @@ -21,13 +21,14 @@ """ import json -from apps.utils.base_crypt import BaseCrypt -from apps.utils.local import get_request_username -from django.core import exceptions from django.core.serializers.json import DjangoJSONEncoder -from django.db import models -from django.utils import timezone from django.utils.translation import ugettext_lazy as _ +from django.utils import timezone +from django.core import exceptions +from django.db import models + +from apps.utils.base_crypt import BaseCrypt +from apps.utils.local import get_request_username class JSONEncoderSupportSet(DjangoJSONEncoder): @@ -185,9 +186,7 @@ class OperateRecordQuerySet(models.query.QuerySet): """ def update(self, **kwargs): - if get_request_username(default=""): - # 非用户请求,不对更新人进行修改 - kwargs.update({"updated_at": timezone.now(), "updated_by": get_request_username()}) + kwargs.update({"updated_at": timezone.now(), "updated_by": get_request_username()}) super().update(**kwargs) @@ -221,10 +220,8 @@ def save(self, *args, **kwargs): self.created_at = timezone.now() self.created_by = get_request_username() - if get_request_username(default="") or not self.updated_by: - # 当前是web请求,或者原先没有设置updated_by,则进行更新 - self.updated_at = timezone.now() - self.updated_by = get_request_username() + self.updated_at = timezone.now() + self.updated_by = get_request_username() super().save(*args, **kwargs) class Meta: diff --git a/bklog/apps/tests/log_desensitize/test_desensitize_handle.py b/bklog/apps/tests/log_desensitize/test_desensitize_handle.py index 25e717bbb..e1fdf8399 100644 --- a/bklog/apps/tests/log_desensitize/test_desensitize_handle.py +++ b/bklog/apps/tests/log_desensitize/test_desensitize_handle.py @@ -23,39 +23,52 @@ from apps.log_desensitize.constants import DesensitizeOperator from apps.log_desensitize.handlers.desensitize import DesensitizeHandler +from apps.log_desensitize.handlers.entity.desensitize_config_entity import DesensitizeConfigEntity class TestDesensitizeOperator(TestCase): """ 脱敏工厂单元测试 """ + + def test_transform_text(self): + entity_test1 = DesensitizeConfigEntity( + operator=DesensitizeOperator.MASK_SHIELD.value, + params={ + "preserve_head": 3, + "preserve_tail": 3, + } + ) + text = "13234345678" + desensitize_config_list = [entity_test1] + + self.assertEqual( + DesensitizeHandler( + desensitize_config_list=desensitize_config_list + ).transform_text(text), "132*****678") + def test_transform_dict(self): - param_1 = { - "field_name": "test_field_1", - "rule_id": 0, - "operator": DesensitizeOperator.MASK_SHIELD.value, - "params": { + entity_param_1 = DesensitizeConfigEntity( + field_name="test_field_1", + operator=DesensitizeOperator.MASK_SHIELD.value, + params={ "preserve_head": 3, "preserve_tail": 3, - }, - "match_pattern": "", - "sort_index": 0 - } - param_2 = { - "field_name": "test_field_2", - "rule_id": 0, - "operator": DesensitizeOperator.TEXT_REPLACE.value, - "params": { + } + ) + entity_param_2 = DesensitizeConfigEntity( + field_name="test_field_2", + operator=DesensitizeOperator.TEXT_REPLACE.value, + params={ "template_string": "abc${partNum}defg", }, - "match_pattern": r"\d{3}(?P\d{4})\d{4}", - "sort_index": 1 - } + match_pattern=r"\d{3}(?P\d{4})\d{4}" + ) text = {"test_field_1": "13234345678", "test_field_2": "13234345678"} - desensitize_config_info = [param_1, param_2] + desensitize_config_list = [entity_param_1, entity_param_2] - result = DesensitizeHandler(desensitize_config_info=desensitize_config_info).transform_dict(text) + result = DesensitizeHandler(desensitize_config_list=desensitize_config_list).transform_dict(text) self.assertEqual(result.get("test_field_1"), "132*****678") self.assertEqual(result.get("test_field_2"), "abc3434defg") diff --git a/bklog/apps/tests/log_search/test_search.py b/bklog/apps/tests/log_search/test_search.py index 430f64395..2340d939c 100644 --- a/bklog/apps/tests/log_search/test_search.py +++ b/bklog/apps/tests/log_search/test_search.py @@ -19,12 +19,13 @@ We undertake not to change the open source license (MIT license) applicable to the current version of the project delivered to anyone in the future. """ +import arrow + +from django.test import TestCase from unittest.mock import patch -import arrow from apps.log_search.constants import LOG_ASYNC_FIELDS from apps.log_search.handlers.search.search_handlers_esquery import SearchHandler -from django.test import TestCase INDEX_SET_ID = 0 SEARCH_DICT = {"size": 100000} @@ -80,11 +81,11 @@ class TestSearchHandler(TestCase): lambda _, index_set_id: "", ) @patch( - "apps.log_search.handlers.search.search_handlers_esquery.SearchHandler.init_time_field", + "apps.log_search.handlers.search.search_handlers_esquery.SearchHandler._init_time_field", lambda _, index_set_id, scenario_id: ("dtEventTimeStamp", "time", "s"), ) @patch( - "apps.log_search.handlers.search.mapping_handlers.MappingHandlers._get_time_field", lambda _: "dtEventTimeStamp" + "apps.log_search.handlers.search.mapping_handlers.MappingHandlers.get_time_field", lambda _: "dtEventTimeStamp" ) def setUp(self) -> None: self.search_handler = SearchHandler(index_set_id=INDEX_SET_ID, search_dict=SEARCH_DICT, pre_check_enable=False) diff --git a/bklog/apps/utils/local.py b/bklog/apps/utils/local.py index 0a99157ee..105354281 100644 --- a/bklog/apps/utils/local.py +++ b/bklog/apps/utils/local.py @@ -23,13 +23,15 @@ """ 记录线程变量 """ -import sys # noqa import uuid # noqa +import sys # noqa from threading import local # noqa -from apps.exceptions import BaseException # noqa from django.conf import settings # noqa +from apps.exceptions import BaseException # noqa + + _local = local() @@ -44,15 +46,13 @@ def activate_request(request, request_id=None): return request -def get_request(peaceful=False): +def get_request(): """ 获取线程请求request """ try: return _local.request except AttributeError: - if peaceful: - return None raise BaseException(u"request thread error!") @@ -66,7 +66,7 @@ def get_request_id(): return str(uuid.uuid4()) -def get_request_username(default="admin"): +def get_request_username(): """ 获取请求的用户名 """ @@ -76,7 +76,7 @@ def get_request_username(default="admin"): with ignored(Exception): username = get_request().user.username if not username and "celery" in sys.argv: - username = default + username = "admin" return username diff --git a/bklog/apps/utils/thread.py b/bklog/apps/utils/thread.py index e6cf776d9..fd7d01e3a 100644 --- a/bklog/apps/utils/thread.py +++ b/bklog/apps/utils/thread.py @@ -34,7 +34,7 @@ class FuncThread: - def __init__(self, func, params, result_key, results, use_request=True): + def __init__(self, func, params, result_key, results, use_request=True, multi_func_params=False): self.func = func self.params = params self.result_key = result_key @@ -45,6 +45,7 @@ def __init__(self, func, params, result_key, results, use_request=True): self.requests = get_request() self.trace_context = get_current() self.timezone = get_local_param("time_zone") + self.multi_func_params = multi_func_params def _init_context(self): with ignored(Exception): @@ -59,7 +60,7 @@ def run(self): if self.use_request and self.requests: activate_request(self.requests) if self.params: - self.results[self.result_key] = self.func(self.params) + self.results[self.result_key] = self.func(self.params) if not self.multi_func_params else self.func(**self.params) else: self.results[self.result_key] = self.func() @@ -78,11 +79,11 @@ def __init__(self, max_workers=None): self.task_list = [] self.max_workers = max_workers - def append(self, result_key, func, params=None, use_request=True): + def append(self, result_key, func, params=None, use_request=True, multi_func_params=False): if result_key in self.results: raise ValueError(f"result_key: {result_key} is duplicate. Please rename it.") task = FuncThread( - func=func, params=params, result_key=result_key, results=self.results, use_request=use_request + func=func, params=params, result_key=result_key, results=self.results, use_request=use_request, multi_func_params=multi_func_params ) self.task_list.append(task) diff --git a/bklog/bk_dataview/grafana/provisioning.py b/bklog/bk_dataview/grafana/provisioning.py index 7913d143d..3718f53ee 100644 --- a/bklog/bk_dataview/grafana/provisioning.py +++ b/bklog/bk_dataview/grafana/provisioning.py @@ -24,7 +24,7 @@ import logging import os.path from dataclasses import dataclass -from typing import Dict, List, Union +from typing import Dict, Union, List import yaml @@ -52,7 +52,6 @@ class Datasource: version: int = 0 orgId: int = -1 is_delete: bool = False - secureJsonData: Union[None, Dict] = None @dataclass diff --git a/bklog/bkm_ipchooser/tools/gse_tool.py b/bklog/bkm_ipchooser/tools/gse_tool.py index b50fbdd1b..4205057c0 100644 --- a/bklog/bkm_ipchooser/tools/gse_tool.py +++ b/bklog/bkm_ipchooser/tools/gse_tool.py @@ -39,9 +39,7 @@ def fill_agent_status(cls, cc_hosts: List[Dict]) -> List[Dict]: host_map[f"{bk_cloud_id}:{ip}"] = index index += 1 - # 如果没有有效的主机,则直接返回 - if not hosts: - return cc_hosts + try: # 添加no_request参数, 多线程调用时,保证用户信息不漏传 status_map = BkApi.get_agent_status({"hosts": hosts, "no_request": True}) @@ -70,9 +68,7 @@ def fill_agent_status(cls, cc_hosts: List[Dict]) -> List[Dict]: agent_id_list.append(bk_agent_id) host_map[bk_agent_id] = index index += 1 - # 如果没有有效的主机,则直接返回 - if not agent_id_list: - return cc_hosts + try: # 添加no_request参数, 多线程调用时,保证用户信息不漏传 agents = BkApi.get_agent_status_v2({"agent_id_list": agent_id_list, "no_request": True}) diff --git a/bklog/config/default.py b/bklog/config/default.py index 463a3ce4d..22eb5d0f7 100644 --- a/bklog/config/default.py +++ b/bklog/config/default.py @@ -117,9 +117,7 @@ # Auth middleware "blueapps.account.middlewares.BkJwtLoginRequiredMiddleware", "blueapps.account.middlewares.WeixinLoginRequiredMiddleware", - # "blueapps.account.middlewares.LoginRequiredMiddleware", - # 注释掉是因为ApiTokenAuthenticationMiddleware中针对非TOKEN校验的会继承父类 - "apps.middleware.api_token_middleware.ApiTokenAuthenticationMiddleware", + "blueapps.account.middlewares.LoginRequiredMiddleware", # exception middleware "blueapps.core.exceptions.middleware.AppExceptionMiddleware", # 自定义中间件 @@ -474,7 +472,6 @@ def redirect_func(request): # =============================================================================== AUTH_USER_MODEL = "account.User" AUTHENTICATION_BACKENDS = ( - "apps.middleware.api_token_middleware.ApiTokenAuthBackend", "blueapps.account.backends.BkJwtBackend", "blueapps.account.backends.UserBackend", "django.contrib.auth.backends.ModelBackend", @@ -833,9 +830,6 @@ def redirect_func(request): BKMONITOR_CUSTOM_PROXY_IP = os.environ.get( "BKAPP_BKMONITOR_CUSTOM_PROXY_IP", "http://report.bkmonitorv3.service.consul:10205" ) -# 蓝鲸监控平台的业务ID -BKMONITOR_BK_BIZ_ID = os.environ.get("BKAPP_BKMONITOR_BK_BIZ_ID", BLUEKING_BK_BIZ_ID) -TABLE_TRANSFER = os.environ.get("BKAPP_TABLE_TRANSFER", "pushgateway_transfer_metircs.base") # =============================================================================== # EsQuery @@ -977,9 +971,6 @@ def redirect_func(request): # 容器下发CR全局标签 CONTAINER_COLLECTOR_CR_LABEL_BKENV: str = os.getenv("BKAPP_CONTAINER_COLLECTOR_CR_LABEL_BKENV", "") -# 是否开启RETAIN_EXTRA_JSON -RETAIN_EXTRA_JSON = os.getenv("BKAPP_RETAIN_EXTRA_JSON", "off") == "on" - # ============================================================================== # Templates # ============================================================================== @@ -1092,10 +1083,6 @@ def redirect_func(request): CHECK_COLLECTOR_SWITCH: bool = os.getenv("CHECK_COLLECTOR_SWITCH", "off") == "on" # ============================================================================== -# ============================================================================== -# HTTPS 代理转发 -SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https") -# ============================================================================== """ 以下为框架代码 请勿修改 diff --git a/bklog/docs/apidocs/bk_log.yaml b/bklog/docs/apidocs/bk_log.yaml index 0c3d8bc71..76ac13a13 100644 --- a/bklog/docs/apidocs/bk_log.yaml +++ b/bklog/docs/apidocs/bk_log.yaml @@ -652,28 +652,6 @@ dest_http_method: POST is_hidden: True -- path: /v2/bk_log/grafana/custom_es_datasource/{index_set_id}/_mapping - name: custom_es_datasource_mapping - label: 日志平台-自定义ES数据源获取Mapping - label_en: bk_log es grafana_custom_es_datasource mapping - method: GET - api_type: query - comp_codename: generic.v2.bk_log.bk_log_component - dest_path: /grafana/custom_es_datasource/{index_set_id}/_mapping - dest_http_method: GET - is_hidden: True - -- path: /v2/bk_log/grafana/custom_es_datasource/_msearch - name: custom_es_datasource_msearch - label: 日志平台-自定义ES数据源msearch - label_en: bk_log es grafana_custom_es_datasource msearch - method: POST - api_type: query - comp_codename: generic.v2.bk_log.bk_log_component - dest_path: /grafana/custom_es_datasource/_msearch - dest_http_method: POST - is_hidden: True - - path: /v2/bk_log/grafana/target_tree/ name: grafana_target_tree label: 日志平台-grafana查询接口 @@ -1038,7 +1016,7 @@ is_hidden: True - path: /v2/bk_log/search_index_set/{index_set_id}/search/ - name: search_log + name: search log label: 日志平台-日志检索 label_en: bk_log search log method: POST @@ -1124,69 +1102,3 @@ dest_path: /api/v1/search/index_set/aggs/union_search/date_histogram/ dest_http_method: POST is_hidden: True - -- path: /v2/bk_log/databus_collectors/list_namespace/ - name: databus_collector_list_bcs_namespace - label: 日志平台-获取集群namespace列表 - label_en: bk_log list bcs cluster namespace - method: GET - api_type: query - comp_codename: generic.v2.bk_log.bk_log_component - dest_path: /api/v1/databus/collectors/list_namespace/ - dest_http_method: GET - is_hidden: True - -- path: /v2/bk_log/databus_collectors/list_workload_type/ - name: databus_collector_list_bcs_workload_type - label: 日志平台-获取集群工作负载类型列表 - label_en: bk_log list bcs cluster workload type - method: GET - api_type: query - comp_codename: generic.v2.bk_log.bk_log_component - dest_path: /api/v1/databus/collectors/list_workload_type/ - dest_http_method: GET - is_hidden: True - -- path: /v2/bk_log/databus_collectors/get_workload/ - name: databus_collector_list_bcs_workload - label: 日志平台-获取集群工作负载列表 - label_en: bk_log list bcs cluster workload - method: GET - api_type: query - comp_codename: generic.v2.bk_log.bk_log_component - dest_path: /api/v1/databus/collectors/get_workload/ - dest_http_method: GET - is_hidden: True - -- path: /v2/bk_log/databus_collectors/get_labels/ - name: databus_collector_list_bcs_labels - label: 日志平台-获取集群标签列表 - label_en: bk_log list bcs cluster labels - method: POST - api_type: query - comp_codename: generic.v2.bk_log.bk_log_component - dest_path: /api/v1/databus/collectors/get_labels/ - dest_http_method: GET - is_hidden: True - -- path: /v2/bk_log/databus_collectors/list_topo/ - name: databus_collector_list_bcs_topo - label: 日志平台-获取集群标签TOPO - label_en: bk_log list bcs cluster topo - method: POST - api_type: query - comp_codename: generic.v2.bk_log.bk_log_component - dest_path: /api/v1/databus/collectors/list_topo/ - dest_http_method: GET - is_hidden: True - -- path: /v2/bk_log/databus_collectors/preview_containers/ - name: databus_collector_list_bcs_container_preview - label: 日志平台-获取集群container预览 - label_en: bk_log list bcs cluster container preview - method: POST - api_type: query - comp_codename: generic.v2.bk_log.bk_log_component - dest_path: /api/v1/databus/collectors/preview_containers/ - dest_http_method: POST - is_hidden: True diff --git a/bklog/home_application/handlers/healthz_metrics/service_module.py b/bklog/home_application/handlers/healthz_metrics/service_module.py index c97757dfe..f0db8b17b 100644 --- a/bklog/home_application/handlers/healthz_metrics/service_module.py +++ b/bklog/home_application/handlers/healthz_metrics/service_module.py @@ -19,17 +19,15 @@ We undertake not to change the open source license (MIT license) applicable to the current version of the project delivered to anyone in the future. """ -import logging +import os import time +import logging import requests -from django.conf import settings +from settings import SERVICE_LISTENING_DOMAIN from django.utils.translation import ugettext as _ -from home_application.handlers.metrics import ( - HealthzMetric, - NamespaceData, - register_healthz_metric, -) + +from home_application.handlers.metrics import register_healthz_metric, HealthzMetric, NamespaceData logger = logging.getLogger() @@ -52,19 +50,20 @@ def ping(): data = [] result = HealthzMetric(status=False, metric_name="home") start_time = time.time() - url = settings.BK_IAM_RESOURCE_API_HOST - if not url: + if not SERVICE_LISTENING_DOMAIN: result.status = True result.message = _("监听域名未配置, 跳过检查") data.append(result) return data + port = os.environ.get("PORT", 8000) + url = f"{SERVICE_LISTENING_DOMAIN}:{port}/" try: resp = requests.get(url) if resp.status_code == 200: result.status = True else: result.message = f"failed to call {url}, status_code: {resp.status_code}, msg: {resp.text}" - result.suggestion = _("确认服务是否异常, 若无异常, 则检查配置settings.BK_IAM_RESOURCE_API_HOST是否正确") + result.suggestion = _("确认服务是否异常, 若无异常, 则检查环境变量SERVICE_LISTENING_DOMAIN是否配置正确") except Exception as e: # pylint: disable=broad-except logger.error(f"failed to call {url}, err: {e}") return data diff --git a/bklog/templates/flow/log_count_aggregation_flow.json b/bklog/templates/flow/log_count_aggregation_flow.json index db56dc192..eb0a3a202 100644 --- a/bklog/templates/flow/log_count_aggregation_flow.json +++ b/bklog/templates/flow/log_count_aggregation_flow.json @@ -46,6 +46,41 @@ "y": 176 } }, + { + "bk_biz_id": {{log_count_aggregation.bk_biz_id}}, + "sql": "SELECT CONCAT('__', sensitivity) as sensitivity, signature FROM {{log_count_aggregation.result_table_id}}, lateral table(udf_deal_model_json(log_signature)) as T(sensitivity,signature) {{log_count_aggregation.log_count_pattern_format.filter_rule}}", + "table_name": "{{log_count_aggregation.log_count_pattern_format.table_name}}", + "name": "日志Pattern打平", + "count_freq": null, + "waiting_time": null, + "window_time": null, + "window_type": "none", + "counter": null, + "output_name": "{{log_count_aggregation.log_count_pattern_format.result_table_id}}", + "session_gap": null, + "expired_time": null, + "window_lateness": { + "allowed_lateness": false, + "lateness_time": 1, + "lateness_count_freq": 60 + }, + "correct_config_id": null, + "is_open_correct": false, + "id": 447375, + "from_nodes": [ + { + "id": 447364, + "from_result_table_ids": [ + "{{log_count_aggregation.result_table_id}}" + ] + } + ], + "node_type": "realtime", + "frontend_info": { + "x": 513, + "y": 271 + } + }, { "name": "{{log_count_aggregation.log_count_aggregation.table_name}}({{log_count_aggregation.storage_type}})", "result_table_id": "{{log_count_aggregation.log_count_aggregation.result_table_id}}", @@ -72,5 +107,28 @@ "x": 796, "y": 172 } + }, + { + "result_table_id": "{{log_count_aggregation.log_count_pattern_format.result_table_id}}", + "name": "{{log_count_aggregation.log_count_pattern_format.table_name}}(hdfs_storage)", + "bk_biz_id": {{log_count_aggregation.bk_biz_id}}, + "cluster": "{{log_count_aggregation.cluster}}", + "expires": {{log_count_aggregation.hdfs_storage.expires}}, + "dimension_table": false, + "storage_keys": [ ], + "id": 447377, + "from_nodes": [ + { + "id": 447375, + "from_result_table_ids": [ + "{{log_count_aggregation.log_count_pattern_format.result_table_id}}" + ] + } + ], + "node_type": "hdfs_storage", + "frontend_info": { + "x": 796, + "y": 280 + } } ] \ No newline at end of file diff --git a/bklog/templates/flow/predict_flow.json b/bklog/templates/flow/predict_flow.json index 8216be6e0..f8658991d 100644 --- a/bklog/templates/flow/predict_flow.json +++ b/bklog/templates/flow/predict_flow.json @@ -668,7 +668,7 @@ "bk_biz_id": {{predict.bk_biz_id}}, "name": "合并日志", "table_name": "{{predict.merge_log.table_name}}", - "output_name": "{{predict.merge_log.table_name}}", + "output_name": "{{predict.merge_log.result_table_id}}", "description": "合并日志", "config": [], "id": 431246, diff --git a/bklog/templates/flow/predict_flow_bkdata.json b/bklog/templates/flow/predict_flow_bkdata.json index 0d843d2c2..58ff2aaab 100644 --- a/bklog/templates/flow/predict_flow_bkdata.json +++ b/bklog/templates/flow/predict_flow_bkdata.json @@ -668,7 +668,7 @@ "bk_biz_id": {{predict.bk_biz_id}}, "name": "合并日志", "table_name": "{{predict.merge_log.table_name}}", - "output_name": "{{predict.merge_log.table_name}}", + "output_name": "{{predict.merge_log.result_table_id}}", "description": "合并日志", "config": [], "id": 431246, diff --git a/bklog/web/src/common/util.js b/bklog/web/src/common/util.js index caa829807..8e23a49e5 100644 --- a/bklog/web/src/common/util.js +++ b/bklog/web/src/common/util.js @@ -28,7 +28,6 @@ import html2canvas from 'html2canvas'; import JSONBigNumber from 'json-bignumber'; -import store from '../store'; /** * 函数柯里化 * @@ -776,142 +775,3 @@ export const setDefaultSettingSelectFiled = (key, filed) => { selectObj[key] = filed; localStorage.setItem('TABLE_SELECT_FILED', JSON.stringify(selectObj)); }; - -/** - * 防抖装饰器 - * @param delay - */ -export const Debounce = (delay = 200) => (target, key, descriptor) => { - const originFunction = descriptor.value; - const getNewFunction = () => { - let timer; - const newFunction = function (...args) { - if (timer) window.clearTimeout(timer); - timer = setTimeout(() => { - originFunction.call(this, ...args); - }, delay); - }; - return newFunction; - }; - descriptor.value = getNewFunction(); - return descriptor; -}; - -/** - * 获取 row[key] 内容 - * @example return row.a.b || row['a.b'] - * @param {Object} row - * @param {String} key - * @param {String} fieldType - * @param {Boolean} isFormatDate - * @return {String|Number} - */ -export const parseTableRowData = (row, key, fieldType, isFormatDate = store.state.isFormatDate) => { - const keyArr = key.split('.'); - let data; - - try { - if (keyArr.length === 1) { - data = row[key]; - } else { - for (let index = 0; index < keyArr.length; index++) { - const item = keyArr[index]; - - if (index === 0) { - data = row[item]; - continue; - } - - if (data === undefined) { - break; - } - - if (data[item]) { - data = data[item]; - } else { - // 如果 x.y 不存在 返回 x['y.z'] x['y.z.z.z'] ... - const validKey = keyArr.splice(index, keyArr.length - index).join('.'); - data = data[validKey]; - break; - } - } - } - } catch (e) { - console.warn('List data analyses error:', e); - data = '--'; - } - - if (isFormatDate && fieldType === 'date') { - return formatDate(Number(data)) || data || '--'; - } - - if (Array.isArray(data)) { - return data.toString(); - } - - if (typeof data === 'object' && data !== null) { - return JSON.stringify(data); - } - - return (data || data === 0) ? data : '--'; -}; - -/** - * @desc: 计算字符串像素长度 - * @param {String} str 字符串 - * @param {String} fontSize 像素大小 默认12px - * @returns {Number} 两个对象是否相同 - */ -export const getTextPxWidth = (str, fontSize = '12px') => { - let result = 10; - const ele = document.createElement('span'); - // 字符串中带有换行符时,会被自动转换成
标签,若需要考虑这种情况,可以替换成空格,以获取正确的宽度 - // str = str.replace(/\\n/g,' ').replace(/\\r/g,' '); - ele.innerText = str; - // 不同的大小和不同的字体都会导致渲染出来的字符串宽度变化,可以传入尽可能完备的样式信息 - ele.style.fontSize = fontSize; - // 由于父节点的样式会影响子节点,这里可按需添加到指定节点上 - document.body.append(ele); - result = ele.offsetWidth; - document.body.removeChild(ele); - - return result; -}; - -/** - * @desc: 计算 - * @param {String} str 字符串 - * @param {String} fontSize 像素大小 默认12px - * @returns {Number} 长度 - */ -export const calculateTableColsWidth = (field, list) => { - // 取首屏前10条日志数据未计算模板 - const firstLoadList = list.slice(0, 10); - // 通过排序获取最大的字段值 - firstLoadList.sort((a, b) => { - return parseTableRowData(b, field.field_name, field.field_type).length - - parseTableRowData(a, field.field_name, field.field_type).length; - }); - if (firstLoadList[0]) { - // 去掉高亮标签 保证不影响实际展示长度计算 - const fieldValue = String(parseTableRowData(firstLoadList[0], field.field_name, field.field_type)) - .replace(//g, '') - .replace(/<\/mark>/g, ''); - // 实际字段值长度 - const fieldValueLen = getTextPxWidth(fieldValue); - // 字段名长度 需保证字段名完全显示 - const fieldNameLen = getTextPxWidth(field.field_name); - - // 600为默认自适应最大宽度 - if (fieldValueLen > 600) return 600; - - - // 当内容长度小于字段名长度 要保证表头字段名显示完整 80为 padding、排序icon、隐藏列icon - if (fieldValueLen < fieldNameLen + 80) return fieldNameLen + 80; - - // 默认计算长度 40为padding - return fieldValueLen + 40; - } - - return field.width; -}; diff --git a/bklog/web/src/components/biz-menu/index.vue b/bklog/web/src/components/biz-menu/index.vue index 34d188d6d..40326714d 100644 --- a/bklog/web/src/components/biz-menu/index.vue +++ b/bklog/web/src/components/biz-menu/index.vue @@ -52,8 +52,8 @@ :placeholder="$t('搜索')" :clearable="false" :value="keyword" - @clear="handleBizSearchDebounce" - @change="handleBizSearchDebounce" + @clear="handleBizSearch" + @change="handleBizSearch" >
    + + {{ $t("申请权限") }} +
diff --git a/bklog/web/src/components/collection-access/components/step-add/config-log-set-item.vue b/bklog/web/src/components/collection-access/components/step-add/config-log-set-item.vue index 59a03f97e..7bda6f8a7 100644 --- a/bklog/web/src/components/collection-access/components/step-add/config-log-set-item.vue +++ b/bklog/web/src/components/collection-access/components/step-add/config-log-set-item.vue @@ -436,16 +436,6 @@ export default { name: this.$t('级别'), isSelect: false, }, - { - id: 'winlog_source', - name: this.$t('事件来源'), - isSelect: false, - }, - { - id: 'winlog_content', - name: this.$t('事件内容'), - isSelect: false, - }, ], eventSettingList: [ { type: 'winlog_event_id', list: [], isCorrect: true }, @@ -652,9 +642,6 @@ export default { case 'winlog_level': this.eventSettingList[index].isCorrect = item.list.every(Boolean); break; - default: - this.eventSettingList[index].isCorrect = true; - break; } }, pasteFn(v, index) { diff --git a/bklog/web/src/components/collection-access/field-table.vue b/bklog/web/src/components/collection-access/field-table.vue index a63d6c444..7b3d42285 100644 --- a/bklog/web/src/components/collection-access/field-table.vue +++ b/bklog/web/src/components/collection-access/field-table.vue @@ -30,15 +30,6 @@ {{ $t('保留原始日志') }} -->
- - {{ $t('保留未定义字段') }} - item.labelSelector = []); // 切换环境清空label - } + this.formData.configs.forEach(item => item.labelSelector = []); // 切换环境清空label }, handleAddExtraLabel() { this.formData.extra_labels.push({ key: '', value: '' }); @@ -2338,7 +2322,7 @@ export default { .win-content { padding-bottom: 20px; position: relative; - left: 118px; + left: 150px; width: 76%; > span { diff --git a/bklog/web/src/components/collection-access/step-field.vue b/bklog/web/src/components/collection-access/step-field.vue index 07eaf80ae..02832e901 100644 --- a/bklog/web/src/components/collection-access/step-field.vue +++ b/bklog/web/src/components/collection-access/step-field.vue @@ -239,14 +239,11 @@ :is-edit-json="isUnmodifiable" :is-set-disabled="isSetDisabled" :extract-method="formData.etl_config" - :select-etl-config="params.etl_config" :deleted-visible="deletedVisible" :fields="formData.fields" :retain-original-value="formData.etl_params.retain_original_text" - :retain-extra-json="formData.etl_params.retain_extra_json" @deleteVisible="visibleHandle" @handleKeepLog="handleKeepLog" - @handleKeepField="handleKeepField" @standard="dialogVisible = true" @reset="getDetail"> @@ -520,7 +517,6 @@ export default { retain_original_text: true, separator_regexp: '', separator: '', - retain_extra_json: false, }, fields: [], visible_type: 'current_biz', // 可见范围单选项 @@ -806,7 +802,6 @@ export default { retain_original_text: true, separator_regexp: '', separator: '', - retain_extra_json: false, }, etl_params ? JSON.parse(JSON.stringify(etl_params)) : {}), // eslint-disable-line fields: etl_fields, visible_type, @@ -860,7 +855,6 @@ export default { retain_original_text: etl_params.retain_original_text, separator_regexp: etl_params.separator_regexp, separator: etl_params.separator, - retain_extra_json: etl_params.retain_extra_json ?? false, }, etl_fields: fields, visible_type, @@ -868,8 +862,7 @@ export default { /* eslint-disable */ if (etl_config !== 'bk_log_text') { const etlParams = { - retain_original_text: etl_params.retain_original_text, - retain_extra_json : etl_params.retain_extra_json ?? false + retain_original_text: etl_params.retain_original_text } if (etl_config === 'bk_log_delimiter') { etlParams.separator = etl_params.separator @@ -1101,7 +1094,6 @@ export default { retain_original_text: true, separator_regexp: '', separator: '', - retain_extra_json: false, }, etl_params ? JSON.parse(JSON.stringify(etl_params)) : {}), // eslint-disable-line fields: copyFields.filter(item => !item.is_built_in), }); @@ -1330,9 +1322,6 @@ export default { handleKeepLog(value) { this.formData.etl_params.retain_original_text = value; }, - handleKeepField(value) { - this.formData.etl_params.retain_extra_json = value; - }, judgeNumber(val) { const { value } = val; if (value === 0) return false; @@ -1419,7 +1408,6 @@ export default { retain_original_text: true, separator_regexp: '', separator: '', - retain_extra_json: false, }, etl_params ? JSON.parse(JSON.stringify(etl_params)) : {}), // eslint-disable-line fields: etl_fields, }); diff --git a/bklog/web/src/components/collection-access/step-storage.vue b/bklog/web/src/components/collection-access/step-storage.vue index 55d889308..579507ef2 100644 --- a/bklog/web/src/components/collection-access/step-storage.vue +++ b/bklog/web/src/components/collection-access/step-storage.vue @@ -309,7 +309,6 @@ export default { etl_config: 'bk_log_text', etl_params: { retain_original_text: true, - retain_extra_json: false, separator_regexp: '', separator: '', // separator_field_list: '' @@ -479,7 +478,6 @@ export default { view_roles, etl_params: { retain_original_text: etl_params.retain_original_text, - retain_extra_json: etl_params.retain_extra_json ?? false, separator_regexp: etl_params.separator_regexp, separator: etl_params.separator, }, @@ -495,8 +493,7 @@ export default { /* eslint-disable */ if (etl_config !== 'bk_log_text') { const etlParams = { - retain_original_text: etl_params.retain_original_text, - retain_extra_json: etl_params.retain_extra_json ?? false, + retain_original_text: etl_params.retain_original_text } if (etl_config === 'bk_log_delimiter') { etlParams.separator = etl_params.separator @@ -632,7 +629,6 @@ export default { etl_config: this.fieldType, etl_params: Object.assign({ retain_original_text: true, - retain_extra_json: false, separator_regexp: '', separator: '', // separator_field_list: '' diff --git a/bklog/web/src/language/lang/en/button.ts b/bklog/web/src/language/lang/en/button.ts index 83fb74ba8..9f3463803 100644 --- a/bklog/web/src/language/lang/en/button.ts +++ b/bklog/web/src/language/lang/en/button.ts @@ -42,12 +42,11 @@ export default { "成功": "Success", "完成": "Complete", "下载": "Download", - "复制": "Copy", + "复制": "Duplicate", "回溯": "Rehydrate", "全屏": "Full-Screen", "申请": "Apply", "搜索": "Search", - "关闭": "Close", "日志检索": "Explore", "下一步": "Next", "已收藏": "Saved", @@ -66,7 +65,7 @@ export default { "添加IP": "Add IP", "标准输出": "Stdout", "全部添加": "Add All", - "清洗": "Parsing", + "前往清洗": "Parsing", "查看所有": "View all", "新增索引": "Add Index", "新建分组": "New Group", diff --git a/bklog/web/src/language/lang/en/content.ts b/bklog/web/src/language/lang/en/content.ts index 62ac3e579..286ea91ec 100644 --- a/bklog/web/src/language/lang/en/content.ts +++ b/bklog/web/src/language/lang/en/content.ts @@ -70,6 +70,7 @@ export default { "系统出现异常": "System exception occurred", "了解接入详情": "Learn about access details", "设置显示字段": "Set display fields", + "设置排序权重": "Set sorting weight", "暂未进行检索": "No search yet", "优化查询语句": "Optimize query statements", "近24H新增": "Added in the last 24 hours", @@ -490,6 +491,5 @@ export default { "* 表示匹配多个(包括 0 个)任意字符": "* means match multiple (including 0) any characters", "例:a*d 可匹配 ad、abcd、a123d": "Example: a*d matches ad, abcd, a123d", "? 表示匹配单个任意字符": "? means match any single character", - "例:a?d 可匹配 abd、a1d": "Example: a?d matches abd, a1d", - "请前往 {0}": "Go to {0}", + "例:a?d 可匹配 abd、a1d": "Example: a?d matches abd, a1d" } diff --git a/bklog/web/src/language/lang/en/label.ts b/bklog/web/src/language/lang/en/label.ts index 5472a6812..e2ff78937 100644 --- a/bklog/web/src/language/lang/en/label.ts +++ b/bklog/web/src/language/lang/en/label.ts @@ -186,7 +186,7 @@ export default { "执行人": "Executor", "用户名": "Username", "管控区域": "BK-Net", - "是否可聚合": "Is Aggregable", + "可聚合": "Aggregable", "创建人": "Created By", "字段名": "Field Name", "空闲率": "Idle Ratio", @@ -531,11 +531,5 @@ export default { "不过滤": "No filter", "IP目标": "IP target", "使用通配符": "Use wildcards", - "文件来源主机": "File source host", - "将字段添加至表格中": "Add fields to table", - "将字段从表格中移除": "Remove fields from table", - "查询结果统计": "Query result statistics", - "排序权重": "Sorting weight", - "事件来源": "Event source", - "事件内容": "Event content" + "文件来源主机": "File source host" }; diff --git a/bklog/web/src/mixins/drag-mixin.js b/bklog/web/src/mixins/drag-mixin.js index 273cf78a1..ce8377f53 100644 --- a/bklog/web/src/mixins/drag-mixin.js +++ b/bklog/web/src/mixins/drag-mixin.js @@ -55,7 +55,6 @@ export default { this.currentScreenX = null; window.removeEventListener('mousemove', this.dragMoving); window.removeEventListener('mouseup', this.dragStop); - this.$store.commit('updateChartSize'); }, }, }; diff --git a/bklog/web/src/mixins/result-table-mixin.js b/bklog/web/src/mixins/result-table-mixin.js index 81fdf53d6..7564ec4d0 100644 --- a/bklog/web/src/mixins/result-table-mixin.js +++ b/bklog/web/src/mixins/result-table-mixin.js @@ -31,7 +31,6 @@ import RetrieveLoader from '@/skeleton/retrieve-loader'; import TableColumn from '@/views/retrieve/result-comp/table-column'; import ExpandView from '@/views/retrieve/result-table-panel/original-log/expand-view.vue'; import EmptyView from '@/views/retrieve/result-table-panel/original-log/empty-view'; -import TimeFormatterSwitcher from '@/views/retrieve/result-table-panel/original-log/time-formatter-switcher'; export default { components: { @@ -43,7 +42,6 @@ export default { ExpandView, RegisterColumn, EmptyView, - TimeFormatterSwitcher, }, mixins: [tableRowDeepViewMixin], props: { @@ -146,7 +144,7 @@ export default { } list.forEach((el, index) => { - el.width = widthObj[index] || el.width; + el.width = widthObj[index] === undefined ? 'default' : widthObj[index]; }); } }, @@ -238,7 +236,6 @@ export default { // eslint-disable-next-line no-unused-vars renderHeaderAliasName(h, { column, $index }) { const field = this.visibleFields[$index - 1]; - const isShowSwitcher = field.field_type === 'date'; if (field) { const fieldName = this.showFieldAlias ? this.fieldAliasMap[field.field_name] : field.field_name; const fieldType = field.field_type; @@ -261,33 +258,6 @@ export default { ], }), h('span', { directives: [{ name: 'bk-overflow-tips' }], class: 'title-overflow' }, [fieldName]), - h(TimeFormatterSwitcher, { - class: 'timer-formatter', - style: { - display: isShowSwitcher ? 'inline-block' : 'none', - }, - }), - h('i', { - class: `bk-icon icon-minus-circle-shape toggle-display ${this.visibleFields.length === 1 ? 'is-hidden' : ''}`, - directives: [ - { - name: 'bk-tooltips', - value: this.$t('将字段从表格中移除'), - }, - ], - on: { - click: (e) => { - e.stopPropagation(); - const displayFieldNames = []; - this.visibleFields.forEach((field) => { - if (field.field_name !== fieldName) { - displayFieldNames.push(field.field_name); - } - }); - this.$emit('fieldsUpdated', displayFieldNames, undefined, false); - }, - }, - }), ]); } }, @@ -361,7 +331,7 @@ export default { descending: 'desc', }; const sortList = !!column ? [[column.columnKey, sortMap[order]]] : []; - this.$emit('shouldRetrieve', { sort_list: sortList }, false); + this.$emit('shouldRetrieve', { sort_list: sortList }); }, }, }; diff --git a/bklog/web/src/mixins/table-row-deep-view-mixin.js b/bklog/web/src/mixins/table-row-deep-view-mixin.js index 7f0cfc9f8..396ebe1b5 100644 --- a/bklog/web/src/mixins/table-row-deep-view-mixin.js +++ b/bklog/web/src/mixins/table-row-deep-view-mixin.js @@ -20,7 +20,7 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE */ -import { parseTableRowData } from '@/common/util'; +import { formatDate } from '@/common/util'; export default { computed: { @@ -40,8 +40,53 @@ export default { * @return {String|Number} */ tableRowDeepView(row, key, fieldType, isFormatDate = this.isFormatDate) { - return parseTableRowData(row, key, fieldType, isFormatDate); - }, + const keyArr = key.split('.'); + let data; + + try { + if (keyArr.length === 1) { + data = row[key]; + } else { + for (let index = 0; index < keyArr.length; index++) { + const item = keyArr[index]; + + if (index === 0) { + data = row[item]; + continue; + } + + if (data === undefined) { + break; + } + + if (data[item]) { + data = data[item]; + } else { + // 如果 x.y 不存在 返回 x['y.z'] x['y.z.z.z'] ... + const validKey = keyArr.splice(index, keyArr.length - index).join('.'); + data = data[validKey]; + break; + } + } + } + } catch (e) { + console.warn('List data analyses error:', e); + data = '--'; + } + if (isFormatDate && fieldType === 'date') { + return formatDate(Number(data)) || data || '--'; + } + + if (Array.isArray(data)) { + return data.toString(); + } + + if (typeof data === 'object' && data !== null) { + return JSON.stringify(data); + } + + return (data || data === 0) ? data : '--'; + }, }, }; diff --git a/bklog/web/src/store/index.js b/bklog/web/src/store/index.js index 7b2364526..9926cb59d 100644 --- a/bklog/web/src/store/index.js +++ b/bklog/web/src/store/index.js @@ -90,7 +90,6 @@ const store = new Vuex.Store({ demoUid: '', spaceBgColor: '', // 空间颜色 isEnLanguage: false, - chartSizeNum: 0, // 自定义上报详情拖拽后 表格chart需要自适应新宽度 }, // 公共 getters getters: { @@ -113,7 +112,6 @@ const store = new Vuex.Store({ children.some(item => (item.id === 'permissionGroup' && item.project_manage === true))), spaceBgColor: state => state.spaceBgColor, isEnLanguage: state => state.isEnLanguage, - chartSizeNum: state => state.chartSizeNum, }, // 公共 mutations mutations: { @@ -225,9 +223,6 @@ const store = new Vuex.Store({ updateIsEnLanguage(state, val) { state.isEnLanguage = val; }, - updateChartSize(state) { - state.chartSizeNum += 1; - }, }, actions: { /** diff --git a/bklog/web/src/views/manage/manage-access/custom-report/components/intro-panel.vue b/bklog/web/src/views/manage/manage-access/custom-report/components/intro-panel.vue index 8ed7b7656..187a75d97 100644 --- a/bklog/web/src/views/manage/manage-access/custom-report/components/intro-panel.vue +++ b/bklog/web/src/views/manage/manage-access/custom-report/components/intro-panel.vue @@ -84,7 +84,6 @@ export default { }, handleActiveDetails(state) { this.$emit('handleActiveDetails', state ? state : !this.isOpenWindow); - this.$store.commit('updateChartSize'); }, }, }; diff --git a/bklog/web/src/views/manage/manage-access/custom-report/list.vue b/bklog/web/src/views/manage/manage-access/custom-report/list.vue index c371b6e90..fef220188 100644 --- a/bklog/web/src/views/manage/manage-access/custom-report/list.vue +++ b/bklog/web/src/views/manage/manage-access/custom-report/list.vue @@ -145,7 +145,7 @@ active: !(props.row.permission && props.row.permission[authorityMap.MANAGE_COLLECTION_AUTH]) }" @click="operateHandler(props.row, 'clean')"> - {{ $t('清洗') }} + {{ $t('前往清洗') }} - + - {{ $t('清洗') }} + {{ $t('前往清洗') }} @@ -557,7 +557,7 @@ export default { isAllowedCreate: null, columnSetting: { fields: settingFields, - selectedFields: [...settingFields.slice(3, 8), settingFields[2]], + selectedFields: settingFields.slice(1, 8), }, // 是否支持一键检测 enableCheckCollector: JSON.parse(window.ENABLE_CHECK_COLLECTOR), diff --git a/bklog/web/src/views/manage/manage-access/log-collection/collection-item/manage-collection/data-status/daily-chart.vue b/bklog/web/src/views/manage/manage-access/log-collection/collection-item/manage-collection/data-status/daily-chart.vue index 904fd8f30..48fcbc79c 100644 --- a/bklog/web/src/views/manage/manage-access/log-collection/collection-item/manage-collection/data-status/daily-chart.vue +++ b/bklog/web/src/views/manage/manage-access/log-collection/collection-item/manage-collection/data-status/daily-chart.vue @@ -85,16 +85,8 @@ export default { }; }, computed: { - ...mapGetters({ - chartSizeNum: 'chartSizeNum', - }), ...mapGetters('collect', ['curCollect']), }, - watch: { - chartSizeNum() { - this.resizeChart(); - }, - }, created() { this.fetchChartData(); }, diff --git a/bklog/web/src/views/manage/manage-access/log-collection/collection-item/manage-collection/data-status/minute-chart.vue b/bklog/web/src/views/manage/manage-access/log-collection/collection-item/manage-collection/data-status/minute-chart.vue index d23684b25..1389442e5 100644 --- a/bklog/web/src/views/manage/manage-access/log-collection/collection-item/manage-collection/data-status/minute-chart.vue +++ b/bklog/web/src/views/manage/manage-access/log-collection/collection-item/manage-collection/data-status/minute-chart.vue @@ -80,16 +80,8 @@ export default { }; }, computed: { - ...mapGetters({ - chartSizeNum: 'chartSizeNum', - }), ...mapGetters('collect', ['curCollect']), }, - watch: { - chartSizeNum() { - this.resizeChart(); - }, - }, created() { this.fetchChartData(); }, diff --git a/bklog/web/src/views/retrieve/condition-comp/agg-chart.vue b/bklog/web/src/views/retrieve/condition-comp/agg-chart.vue index f48257fbe..e4a2c6a67 100644 --- a/bklog/web/src/views/retrieve/condition-comp/agg-chart.vue +++ b/bklog/web/src/views/retrieve/condition-comp/agg-chart.vue @@ -132,7 +132,7 @@ export default { getIconPopover(operator, value) { if (this.fieldType === '__virtual__') return this.$t('该字段为平台补充 不可检索'); if (this.filterIsExist(operator, value)) return this.$t('已添加过滤条件'); - return `${this.fieldName} ${operator} ${value}`; + return operator; }, filterIsExist(operator, value) { if (this.fieldType === '__virtual__') return true; diff --git a/bklog/web/src/views/retrieve/condition-comp/field-filter-popover.vue b/bklog/web/src/views/retrieve/condition-comp/field-filter-popover.vue index 83d0961a1..99ef5b5d3 100644 --- a/bklog/web/src/views/retrieve/condition-comp/field-filter-popover.vue +++ b/bklog/web/src/views/retrieve/condition-comp/field-filter-popover.vue @@ -22,39 +22,44 @@ diff --git a/bklog/web/src/views/retrieve/result-table-panel/original-log/index.vue b/bklog/web/src/views/retrieve/result-table-panel/original-log/index.vue index 447ec07a9..e405c1492 100644 --- a/bklog/web/src/views/retrieve/result-table-panel/original-log/index.vue +++ b/bklog/web/src/views/retrieve/result-table-panel/original-log/index.vue @@ -42,7 +42,7 @@ {{ $t('换行') }}
- +