Skip to content

Commit

Permalink
refactor: polish record size estimation in compaction
Browse files Browse the repository at this point in the history
1. polish record size estimation in compaction

Signed-off-by: TheR1sing3un <[email protected]>
  • Loading branch information
TheR1sing3un committed Sep 27, 2024
1 parent 083b608 commit 25b4c74
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ public String toString() {
+ ", estimatedBaseFileSize=" + estimatedBaseFileSize
+ ", baseFileSorted=" + baseFileSorted
+ ", estimatedLogFileSize=" + estimatedLogFileSize
+ ", logFileNum=" + logFilePaths.size()
+ ", logFilePaths=" + logFilePaths
+ ", logFilesRealSize=" + logFilesRealSize
+ '}';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ public class SampleEstimator<R> implements SizeEstimator<R> {
private int sampleCount;

public SampleEstimator(SizeEstimator<R> underlyingEstimator) {
this(DEFAULT_TRIGGER_SAMPLE_THRESHOLD, DEFAULT_SAMPLE_WRIGHT, underlyingEstimator);
this(underlyingEstimator, DEFAULT_TRIGGER_SAMPLE_THRESHOLD);
}

public SampleEstimator(SizeEstimator<R> underlyingEstimator, int triggerSampleThreshold) {
this(triggerSampleThreshold, DEFAULT_SAMPLE_WRIGHT, underlyingEstimator);
this(underlyingEstimator, triggerSampleThreshold, DEFAULT_SAMPLE_WRIGHT);
}

// TODO: configure the triggerSampleThreshold and sampleWeight in the write config
public SampleEstimator(int triggerSampleThreshold, double sampleWeight, SizeEstimator<R> underlyingEstimator) {
public SampleEstimator(SizeEstimator<R> underlyingEstimator, int triggerSampleThreshold, double sampleWeight) {
this.triggerSampleThreshold = triggerSampleThreshold;
this.sampleWeight = sampleWeight;
this.underlyingEstimator = underlyingEstimator;
Expand All @@ -56,7 +56,7 @@ public SampleEstimator(int triggerSampleThreshold, double sampleWeight, SizeEsti
}

public SampleEstimator<R> newInstance() {
return new SampleEstimator<>(triggerSampleThreshold, sampleWeight, underlyingEstimator);
return new SampleEstimator<>(underlyingEstimator, triggerSampleThreshold, sampleWeight);
}

public long getPerEstimatedSize() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,10 @@ public void close() throws IOException {
}
}

class MemoryCombinedMap {
private class MemoryCombinedMap {
Map<K, V> map;
CombineFunc<K, V, V> combineFunc;
long currentMemorySize;
long estimatedAverageKeyValueSize;

public MemoryCombinedMap(Option<CombineFunc<K, V, V>> func) {
this.map = new HashMap<K, V>();
Expand All @@ -429,20 +429,21 @@ public V insert(K key, V value) {
map.compute(key, (k, oldValue) -> {
if (oldValue == null) {
V initValue = combineFunc.initCombine(key, value);
long estimatedSize = keySizeEstimator.sizeEstimate(key) + valueSizeEstimator.sizeEstimate(initValue);
currentMemorySize += estimatedSize;
sampleSize(key, initValue);
return initValue;
} else {
V combined = combineFunc.combine(key, value, oldValue);
// NOTE: call size-estimate function to sample, but not add to currentMemorySize
keySizeEstimator.sizeEstimate(key);
valueSizeEstimator.sizeEstimate(combined);
sampleSize(key, combined);
return combined;
}
});
return map.get(key);
}

private void sampleSize(K k, V v) {
estimatedAverageKeyValueSize = keySizeEstimator.sizeEstimate(k) + valueSizeEstimator.sizeEstimate(v);
}

public V get(K key) {
return map.get(key);
}
Expand All @@ -456,7 +457,7 @@ public int size() {
}

public long getCurrentMemorySize() {
return currentMemorySize;
return map.size() * estimatedAverageKeyValueSize;
}

public Iterator<V> getSortedIterator() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public class TestSampleEstimator {
@Test
public void testSampleEstimator() {
final AtomicLong returnValue = new AtomicLong(100);
SampleEstimator estimator = new SampleEstimator(100, 0.1, (r) -> returnValue.get());
SampleEstimator estimator = new SampleEstimator((r) -> returnValue.get(), 100, 0.1);
long estimatedSize = 0;
for (int i = 0; i < 1000; i++) {
estimatedSize = estimator.sizeEstimate(i);
Expand Down

0 comments on commit 25b4c74

Please sign in to comment.