Skip to content

Commit

Permalink
perf: 容器化日志存储支持参数控制存储总量 TencentBlueKing#2655
Browse files Browse the repository at this point in the history
每类日志至少保留一个文件,防止日志采集丢失
  • Loading branch information
jsonwan committed Jan 8, 2024
1 parent fb82373 commit 8b5ce4d
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import org.apache.commons.lang3.StringUtils;
import org.slf4j.LoggerFactory;

import java.util.Collections;

/**
* 根据磁盘使用量清理过期日志文件任务
*/
Expand Down Expand Up @@ -84,7 +86,13 @@ public void checkVolumeAndClear() {
}

private int doCheckVolumeAndClear(long maxSizeBytes, String appLogDirPath) {
return FileUtil.checkVolumeAndClearOldestFiles(maxSizeBytes, appLogDirPath);
// 当前正在写入的日志文件不删除,防止日志采集丢失部分日志
String notDeleteFileSuffix = ".log";
return FileUtil.checkVolumeAndClearOldestFiles(
maxSizeBytes,
appLogDirPath,
Collections.singleton(notDeleteFileSuffix)
);
}

private String getAppLogDirPath() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
import com.tencent.bk.job.common.util.Base64Util;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.helpers.FormattingTuple;
import org.slf4j.helpers.MessageFormatter;

Expand Down Expand Up @@ -388,6 +390,20 @@ public static void showVolumeUsage(String path, long maxSizeBytes, long currentS
* @return 被成功清理的文件数量
*/
public static int checkVolumeAndClearOldestFiles(long maxSizeBytes, String targetDirPath) {
return checkVolumeAndClearOldestFiles(maxSizeBytes, targetDirPath, null);
}

/**
* 检查指定目录下的磁盘使用是否超出限制,超出限制则清理最旧的文件
*
* @param maxSizeBytes 最大限制字节数
* @param targetDirPath 目标目录路径
* @param exceptSuffixs 不删除的文件后缀名
* @return 被成功清理的文件数量
*/
public static int checkVolumeAndClearOldestFiles(long maxSizeBytes,
String targetDirPath,
Set<String> exceptSuffixs) {
if (null == targetDirPath) {
throw new IllegalArgumentException("TargetDirPath cannot be null");
}
Expand All @@ -406,6 +422,8 @@ public static int checkVolumeAndClearOldestFiles(long maxSizeBytes, String targe
fileList.sort(Comparator.comparingLong(File::lastModified));
// 记录删除失败的文件,下次不再列出
Set<String> deleteFailedFilePathSet = new HashSet<>();
// 记录忽略的文件,下次不再列出
Set<String> ignoredFilePathSet = new HashSet<>();
int count = 0;
while (currentSize > maxSizeBytes) {
if (fileList.isEmpty()) {
Expand All @@ -414,15 +432,29 @@ public static int checkVolumeAndClearOldestFiles(long maxSizeBytes, String targe
if (files == null || files.length == 0) return count;
fileList.addAll(Arrays.stream(files)
.filter(file -> !deleteFailedFilePathSet.contains(file.getAbsolutePath()))
.filter(file -> !ignoredFilePathSet.contains(file.getAbsolutePath()))
.collect(Collectors.toList())
);
fileList.sort(Comparator.comparingLong(File::lastModified));
}
if (fileList.isEmpty()) {
log.warn("Volume still overlimit after clear, deleteFailedFilePathSet={}", deleteFailedFilePathSet);
if (!deleteFailedFilePathSet.isEmpty()) {
log.warn(
"Volume still overlimit after clear, ignoredFilePathSet={}, deleteFailedFilePathSet={}",
ignoredFilePathSet,
deleteFailedFilePathSet
);
} else {
log.info("Volume still overlimit after clear, ignoredFilePathSet={}", ignoredFilePathSet);
}
return count;
}
File oldestFile = fileList.remove(0);
// 符合指定后缀名的文件不删除
if (matchSuffixs(oldestFile.getName(), exceptSuffixs)) {
ignoredFilePathSet.add(oldestFile.getAbsolutePath());
continue;
}
if (deleteFileAndRecordIfFail(oldestFile, deleteFailedFilePathSet)) {
count += 1;
log.info("Delete file {} because of volume overlimit", oldestFile.getAbsolutePath());
Expand All @@ -437,4 +469,26 @@ public static int checkVolumeAndClearOldestFiles(long maxSizeBytes, String targe
}
return count;
}

/**
* 判断文件名是否以指定的某些后缀名结尾
*
* @param fileName 文件名
* @param suffixs 后缀名集合
* @return 文件名是否以后缀名集合中的任意一个结尾
*/
private static boolean matchSuffixs(String fileName, Set<String> suffixs) {
if (CollectionUtils.isEmpty(suffixs)) {
return false;
}
if (StringUtils.isBlank(fileName)) {
return false;
}
for (String suffix : suffixs) {
if (fileName.endsWith(suffix)) {
return true;
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@

package com.tencent.bk.job.file.worker.task.clear;

import com.tencent.bk.job.common.util.file.FileUtil;
import com.tencent.bk.job.common.util.date.DateUtils;
import com.tencent.bk.job.common.util.file.FileUtil;
import com.tencent.bk.job.file.worker.config.WorkerConfig;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
Expand Down Expand Up @@ -104,7 +104,10 @@ public void checkVolumeAndClear() {

private void doCheckVolumeAndClear() {
long maxSizeBytes = workerConfig.getMaxSizeGB() * 1024L * 1024L * 1024L;
FileUtil.checkVolumeAndClearOldestFiles(maxSizeBytes, workerConfig.getWorkspaceDirPath());
int count = FileUtil.checkVolumeAndClearOldestFiles(maxSizeBytes, workerConfig.getWorkspaceDirPath());
if (count > 0) {
log.info("{} file cleared", count);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
<property name="FILE_LOG_PATTERN"
value="${FILE_LOG_PATTERN:-%d{${LOG_DATEFORMAT_PATTERN:-[yyyy-MM-dd HH:mm:ss.SSS]}} ${LOG_LEVEL_PATTERN:-%5p} ${PID:- } --- [%t] %-40.40logger{39} : %m%n${LOG_EXCEPTION_CONVERSION_WORD:-%wEx}}"/>
<property name="BK_LOG_DIR" value="${job.log.dir:-/data/bkee/logs/job}"/>
<property name="BK_LOG_DIR_FILE_WORKER" value="${BK_LOG_DIR}/${job.file.worker.instance-name:-job-file-worker}"/>
<property name="FILE_WORKER_LOG_FILE" value="${BK_LOG_DIR_FILE_WORKER}/worker.log"/>
<property name="ERROR_LOG_FILE" value="${BK_LOG_DIR_FILE_WORKER}/error.log"/>
<property name="APP_LOG_DIR" value="${BK_LOG_DIR}/${job.file.worker.instance-name:-job-file-worker}" scope="context"/>
<property name="FILE_WORKER_LOG_FILE" value="${APP_LOG_DIR}/worker.log"/>
<property name="ERROR_LOG_FILE" value="${APP_LOG_DIR}/error.log"/>

<springProperty name="LOG_KEEP_HOURS" source="log.keep-hours" defaultValue="168"/>

Expand Down
2 changes: 1 addition & 1 deletion support-files/kubernetes/charts/bk-job/VALUES_LOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ log:
clearByVolumeUsage:
# 是否开启自动清理任务,默认开启
enabled: true
# 服务后台日志可使用的最大磁盘空间(超出后将清理最旧的日志文件),单位支持B、KB、MB、GB、TB、PB,默认40GB
# 服务后台日志可使用的最大磁盘空间(超出后将清理最旧的日志文件,但每类日志文件至少保留一个),单位支持B、KB、MB、GB、TB、PB,默认40GB
maxVolume: 40GB
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,5 +160,5 @@ data:
clear-by-volume-usage:
# 是否开启自动清理任务,默认开启
enabled: {{ .Values.log.clearByVolumeUsage.enabled }}
# 服务后台日志可使用的最大磁盘空间(超出后将清理最旧的日志文件),单位支持B、KB、MB、GB、TB、PB,默认40GB
# 服务后台日志可使用的最大磁盘空间(超出后将清理最旧的日志文件,但每类日志文件至少保留一个),单位支持B、KB、MB、GB、TB、PB,默认40GB
max-volume: {{ .Values.log.clearByVolumeUsage.maxVolume }}
Original file line number Diff line number Diff line change
Expand Up @@ -371,13 +371,4 @@ data:
root: {{ .Values.bkFeedBackUrl }}
deploy:
mode: lite
log:
# 服务后台日志保留的小时数,默认168小时(7天)
keep-hours: 168
# 根据磁盘占用量自动清理后台日志相关配置
clear-by-volume-usage:
# 是否开启自动清理任务,默认开启
enabled: true
# 服务后台日志可使用的最大磁盘空间(超出后将清理最旧的日志文件),单位支持B、KB、MB、GB、TB、PB,默认200GB
max-volume: 200GB
{{- end }}
2 changes: 1 addition & 1 deletion support-files/kubernetes/charts/bk-job/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ log:
clearByVolumeUsage:
# 是否开启自动清理任务,默认开启
enabled: true
# 服务后台日志可使用的最大磁盘空间(超出后将清理最旧的日志文件),单位支持B、KB、MB、GB、TB、PB,默认40GB
# 服务后台日志可使用的最大磁盘空间(超出后将清理最旧的日志文件,但每类日志文件至少保留一个),单位支持B、KB、MB、GB、TB、PB,默认40GB
maxVolume: 40GB

## 蓝鲸日志采集配置
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,5 +168,5 @@ log:
clear-by-volume-usage:
# 是否开启自动清理任务,默认开启
enabled: true
# 服务后台日志可使用的最大磁盘空间(超出后将清理最旧的日志文件),单位支持B、KB、MB、GB、TB、PB,默认200GB
# 服务后台日志可使用的最大磁盘空间(超出后将清理最旧的日志文件,但每类日志文件至少保留一个),单位支持B、KB、MB、GB、TB、PB,默认200GB
max-volume: 200GB

0 comments on commit 8b5ce4d

Please sign in to comment.