Skip to content

Commit

Permalink
--story=120070378 优化审计日志&执行后置初始化脚本 (merge request !2094)
Browse files Browse the repository at this point in the history
Squash merge branch 'bcs-cluster-manager-1030' into 'master'
--story=120070378 优化审计日志&执行后置初始化脚本

 



TAPD: --story=120070378
  • Loading branch information
evanxinli committed Oct 31, 2024
1 parent 7f969b0 commit fcab98b
Show file tree
Hide file tree
Showing 10 changed files with 2,003 additions and 1,896 deletions.
3,797 changes: 1,909 additions & 1,888 deletions bcs-services/bcs-cluster-manager/api/clustermanager/clustermanager.pb.go

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -12729,6 +12729,14 @@ message OperationLogDetail {
title : "resourceName",
description : "资源名称"
}];
bool allowRetry = 11 [(grpc.gateway.protoc_gen_swagger.options.openapiv2_field) = {
title : "allowRetry",
description : "如果该操作是异步任务, 控制任务是否允许重试"
}];
bool allowSkip = 12 [(grpc.gateway.protoc_gen_swagger.options.openapiv2_field) = {
title : "allowSkip",
description : "如果该操作是异步任务, 控制任务是否允许跳过"
}];
}

message ListTaskStepLogsResponse {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16874,6 +16874,18 @@
"type": "string",
"description": "资源名称",
"title": "resourceName"
},
"allowRetry": {
"type": "boolean",
"format": "boolean",
"description": "如果该操作是异步任务, 控制任务是否允许重试",
"title": "allowRetry"
},
"allowSkip": {
"type": "boolean",
"format": "boolean",
"description": "如果该操作是异步任务, 控制任务是否允许跳过",
"title": "allowSkip"
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,17 @@ func (ua *ListOperationLogsAction) appendTasks(taskIDs []string) error {
t.End = endTime

t.TaskName = autils.Translate(ua.ctx, t.TaskType, t.TaskName, "")
ua.resp.Data.Results[i].TaskType = t.TaskType

allowRetry := true
// attention: 开启CA节点自动扩缩容的任务不允许手动重试
if utils.SliceContainInString([]string{cloudprovider.UpdateNodeGroupDesiredNode.String(),
cloudprovider.CleanNodeGroupNodes.String()}, t.TaskType) &&
t.GetCommonParams()[cloudprovider.ManualKey.String()] != common.True {
allowRetry = false
}

ua.resp.Data.Results[i].AllowRetry = allowRetry
ua.resp.Data.Results[i].Message = autils.TranslateMsg(ua.ctx, v.ResourceType, v.TaskType, v.Message, t)
ua.resp.Data.Results[i].Task = t
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ func (ua *TaskRecordsAction) fetchTaskRecords() error {

allowRetry := true
// attention: 开启CA节点自动扩缩容不允许手动重试
if utils.StringContainInSlice(task.TaskType, []string{cloudprovider.UpdateNodeGroupDesiredNode.String(),
cloudprovider.CleanNodeGroupNodes.String()}) &&
task.GetCommonParams()[cloudprovider.ManualKey.String()] == common.False {
if utils.SliceContainInString([]string{cloudprovider.UpdateNodeGroupDesiredNode.String(),
cloudprovider.CleanNodeGroupNodes.String()}, task.TaskType) &&
task.GetCommonParams()[cloudprovider.ManualKey.String()] != common.True {
allowRetry = false
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ func (ua *RetryAction) validate() error {
if err := ua.req.Validate(); err != nil {
return err
}

retry := allowTaskRetry(ua.task)
if !retry {
return fmt.Errorf("autoscaler task can't manual retry")
}

// check task status
switch ua.task.Status {
case cloudprovider.TaskStatusInit, cloudprovider.TaskStatusRunning, cloudprovider.TaskStatusSuccess:
Expand Down
12 changes: 12 additions & 0 deletions bcs-services/bcs-cluster-manager/internal/actions/task/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,20 @@ import (
"github.com/Tencent/bk-bcs/bcs-services/bcs-cluster-manager/internal/cloudprovider"
"github.com/Tencent/bk-bcs/bcs-services/bcs-cluster-manager/internal/common"
"github.com/Tencent/bk-bcs/bcs-services/bcs-cluster-manager/internal/store"
"github.com/Tencent/bk-bcs/bcs-services/bcs-cluster-manager/internal/utils"
)

func allowTaskRetry(task *proto.Task) bool {
allowRetry := true
// attention: 开启CA节点自动扩缩容的任务不允许手动重试
if utils.SliceContainInString([]string{cloudprovider.UpdateNodeGroupDesiredNode.String(),
cloudprovider.CleanNodeGroupNodes.String()}, task.TaskType) && (task.GetCommonParams() != nil &&
task.GetCommonParams()[cloudprovider.ManualKey.String()] != common.True) {
allowRetry = false
}
return allowRetry
}

func updateTaskDataStatus(model store.ClusterManagerModel, task *proto.Task) error {
blog.Infof("updateTaskDataStatus[%s] taskType[%s]", task.TaskID, task.TaskType)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package common
import (
"context"
"fmt"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -207,7 +208,7 @@ func ExecuteScriptByJob(ctx context.Context, stepName, bizID, content string, ip
err error
)

servers, err := GetIPCloudIDByNodeIPs(ctx, ips)
servers, err := GetIPCloudIDByNodeIPs(ctx, bizID, ips)
if err != nil {
blog.Errorf("task[%s] ExecuteScriptByJob failed: %v", taskID, err)
return "", err
Expand Down Expand Up @@ -272,20 +273,52 @@ func ExecuteScriptByJob(ctx context.Context, stepName, bizID, content string, ip
return job.GetJobTaskLink(jobID), nil
}

func getBizHosts(bizID string) (map[int64]cmdb.HostData, error) {
biz, err := strconv.Atoi(bizID)
if err != nil {
blog.Errorf("strconv BusinessID to int failed: %v", err)
return nil, err
}
hosts, err := cmdb.GetCmdbClient().FetchAllHostsByBizID(biz, false)
if err != nil {
blog.Errorf("cmdb FetchAllHostsByBizID failed: %v", err)
return nil, err
}

var (
hostsMap = make(map[int64]cmdb.HostData, 0)
)
for i := range hosts {
hostsMap[hosts[i].BKHostID] = hosts[i]
}
return hostsMap, nil
}

// GetIPCloudIDByNodeIPs get serverIP cloudInfo by cmdb
func GetIPCloudIDByNodeIPs(ctx context.Context, ips []string) ([]job.ServerInfo, error) {
func GetIPCloudIDByNodeIPs(ctx context.Context, bizID string, ips []string) ([]job.ServerInfo, error) {
taskID := cloudprovider.GetTaskIDFromContext(ctx)

// withoutBiz maybe get other biz host
hostDetailData, err := cmdb.GetCmdbClient().QueryAllHostInfoWithoutBiz(ips)
if err != nil {
blog.Errorf("task[%s] GetIPCloudIDByNodeIPs failed: %v", taskID, err)
return nil, err
}

bizHostsMap, err := getBizHosts(bizID)
if err != nil {
return nil, err
}

var (
servers = make([]job.ServerInfo, 0)
)
for _, host := range hostDetailData {
_, ok := bizHostsMap[host.BKHostID]
if !ok {
continue
}

servers = append(servers, job.ServerInfo{
BkCloudID: uint64(host.BkCloudID),
Ip: host.BKHostInnerIP,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,14 @@ func (d *Daemon) reportClusterHealthStatus(error chan<- error) {
error <- errLocal
return
}
_, err = kubeCli.Discovery().ServerVersion()
if err != nil {
_, errLocal = kubeCli.Discovery().ServerVersion()
if errLocal != nil {
blog.Errorf("reportClusterHealthStatus GetClusterClient failed: %v", errLocal)
// if options.GetEditionInfo().IsCommunicationEdition() {}
_ = d.updateClusterStatus(cls.ClusterID, common.StatusConnectClusterFailed)

metrics.ReportCloudClusterHealthStatus(cls.Provider, cls.ClusterID, 0)
error <- err
error <- errLocal
return
}

Expand Down

0 comments on commit fcab98b

Please sign in to comment.