Skip to content

Commit

Permalink
fs lock
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuanshenbsj1 authored and codope committed Nov 17, 2023
1 parent 3d0c450 commit 4720012
Show file tree
Hide file tree
Showing 13 changed files with 351 additions and 143 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -118,17 +120,25 @@ HoodieCleanerPlan requestClean(HoodieEngineContext context) {

context.setJobStatus(this.getClass().getSimpleName(), "Generating list of file slices to be cleaned: " + config.getTableName());

Map<String, Pair<Boolean, List<CleanFileInfo>>> cleanOpsWithPartitionMeta = context
.map(partitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean, earliestInstant)), cleanerParallelism)
.stream()
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Map<String, List<HoodieCleanFileInfo>> cleanOps = new HashMap<>();
List<String> partitionsToDelete = new ArrayList<>();
for (int i = 0; i < partitionsToClean.size(); i += cleanerParallelism) {
List<String> subPartitionsToClean = partitionsToClean.subList(i, Math.min(i + cleanerParallelism, partitionsToClean.size()));
Map<String, Pair<Boolean, List<CleanFileInfo>>> cleanOpsWithPartitionMeta = context
.map(subPartitionsToClean, partitionPathToClean -> Pair.of(partitionPathToClean, planner.getDeletePaths(partitionPathToClean, earliestInstant)), cleanerParallelism)
.stream()
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));

Map<String, List<HoodieCleanFileInfo>> cleanOps = cleanOpsWithPartitionMeta.entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey,
e -> CleanerUtils.convertToHoodieCleanFileInfoList(e.getValue().getValue())));
Map<String, List<HoodieCleanFileInfo>> subCleanOps = cleanOpsWithPartitionMeta.entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey,
e -> CleanerUtils.convertToHoodieCleanFileInfoList(e.getValue().getValue())));

List<String> partitionsToDelete = cleanOpsWithPartitionMeta.entrySet().stream().filter(entry -> entry.getValue().getKey()).map(Map.Entry::getKey)
.collect(Collectors.toList());
List<String> subPartitionsToDelete = cleanOpsWithPartitionMeta.entrySet().stream().filter(entry -> entry.getValue().getKey()).map(Map.Entry::getKey)
.collect(Collectors.toList());

cleanOps.putAll(subCleanOps);
partitionsToDelete.addAll(subPartitionsToDelete);
}

return new HoodieCleanerPlan(earliestInstant
.map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(), x.getState().name())).orElse(null),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -491,10 +491,13 @@ public Pair<Boolean, List<CleanFileInfo>> getDeletePaths(String partitionPath, O
Pair<Boolean, List<CleanFileInfo>> deletePaths;
if (policy == HoodieCleaningPolicy.KEEP_LATEST_COMMITS) {
deletePaths = getFilesToCleanKeepingLatestCommits(partitionPath, earliestCommitToRetain);
fileSystemView.cleanFileGroupsInPartitions(Collections.singletonList(partitionPath));
} else if (policy == HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
deletePaths = getFilesToCleanKeepingLatestVersions(partitionPath);
fileSystemView.cleanFileGroupsInPartitions(Collections.singletonList(partitionPath));
} else if (policy == HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS) {
deletePaths = getFilesToCleanKeepingLatestHours(partitionPath, earliestCommitToRetain);
fileSystemView.cleanFileGroupsInPartitions(Collections.singletonList(partitionPath));
} else {
throw new IllegalArgumentException("Unknown cleaning policy : " + policy.name());
}
Expand Down
Loading

0 comments on commit 4720012

Please sign in to comment.