Skip to content

Commit

Permalink
[HUDI-5253] HoodieMergeOnReadTableInputFormat could have duplicate re…
Browse files Browse the repository at this point in the history
…cords issue if it contains delta files while still splittable (apache#7264)
  • Loading branch information
boneanxs authored and fengjian committed Apr 5, 2023
1 parent 31e4e92 commit 116d1ee
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ public void testLogCompactionOnMORTable() throws Exception {
client.compact(compactionTimeStamp.get());

prevCommitTime = compactionTimeStamp.get();
//TODO: Below commits are creating duplicates when all the tests are run together. but individually they are passing.
for (int i = 0; i < 2; i++) {
// Upsert
newCommitTime = HoodieActiveTimeline.createNewInstantTime();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public boolean getBelongsToIncrementalQuery() {
}

public boolean isSplitable() {
return !toString().contains(".log") && !includeBootstrapFilePath();
return !toString().contains(".log") && deltaLogFiles.isEmpty() && !includeBootstrapFilePath();
}

public PathWithBootstrapFileStatus getPathWithBootstrapFileStatus() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.hudi.hadoop.realtime;

import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.hadoop.PathWithBootstrapFileStatus;

Expand Down Expand Up @@ -65,4 +66,16 @@ void pathNotSplitableForBootstrapScenario() throws IOException {
rtPath.setPathWithBootstrapFileStatus(path);
assertFalse(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, rtPath), "Path for bootstrap should not be splitable.");
}

@Test
void pathNotSplitableIfContainsDeltaFiles() throws IOException {
URI basePath = Files.createTempFile(tempDir, "target", ".parquet").toUri();
HoodieRealtimePath rtPath = new HoodieRealtimePath(new Path("foo"), "bar", basePath.toString(), Collections.emptyList(), "000", false, Option.empty());
assertTrue(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, rtPath), "Path only contains the base file should be splittable");

URI logPath = Files.createTempFile(tempDir, ".test", ".log.4_1-149-180").toUri();
HoodieLogFile logFile = new HoodieLogFile(fs.getFileStatus(new Path(logPath)));
rtPath = new HoodieRealtimePath(new Path("foo"), "bar", basePath.toString(), Collections.singletonList(logFile), "000", false, Option.empty());
assertFalse(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, rtPath), "Path contains log files should not be splittable.");
}
}

0 comments on commit 116d1ee

Please sign in to comment.