Skip to content

Commit

Permalink
apacheGH-40038: [Java] The offset buffer of empty vector with variabl…
Browse files Browse the repository at this point in the history
…e-size layout should not be empty
  • Loading branch information
viirya committed Feb 12, 2024
1 parent d989191 commit 5eb34e1
Show file tree
Hide file tree
Showing 13 changed files with 42 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public void testZeroRowResultSet() throws Exception {
assertNotNull("VectorSchemaRoot from first next() result should never be null", root);
assertEquals("VectorSchemaRoot from empty ResultSet should have zero rows", 0, root.getRowCount());
assertFalse("hasNext() should return false on empty ResultSets after initial next() call", iter.hasNext());
root.close();
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ public void testCreateSchema() {

assertEquals(1, dictionaryUsed.size());
assertEquals(0, dictionaryUsed.first());

dictVec.close();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ public BaseLargeVariableWidthVector(Field field, final BufferAllocator allocator
lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1;
valueCount = 0;
lastSet = -1;
offsetBuffer = allocator.getEmpty();
// According to Arrow spec, the offsets buffer contains length + 1 elements
allocateOffsetBuffer(OFFSET_WIDTH);
validityBuffer = allocator.getEmpty();
valueBuffer = allocator.getEmpty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ public BaseVariableWidthVector(Field field, final BufferAllocator allocator) {
lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1;
valueCount = 0;
lastSet = -1;
offsetBuffer = allocator.getEmpty();
// According to Arrow spec, the offsets buffer contains length + 1 elements
allocateOffsetBuffer(OFFSET_WIDTH);
validityBuffer = allocator.getEmpty();
valueBuffer = allocator.getEmpty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,11 @@ public <T extends FieldVector> T getChild(String name, Class<T> clazz) {

protected ValueVector add(String childName, FieldType fieldType) {
FieldVector vector = fieldType.createNewSingleVector(childName, allocator, callBack);

if (conflictPolicy == ConflictPolicy.CONFLICT_REPLACE && vectors.containsKey(childName)) {
vectors.getAll(childName).forEach(c -> c.close());
}

putChild(childName, vector);
if (callBack != null) {
callBack.doWork();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ protected BaseRepeatedValueVector(String name, BufferAllocator allocator, CallBa
protected BaseRepeatedValueVector(String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) {
super(allocator);
this.name = name;
this.offsetBuffer = allocator.getEmpty();
// According to Arrow spec, the offsets buffer contains length + 1 elements
allocateOffsetBuffer(OFFSET_WIDTH, true);
this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null");
this.repeatedCallBack = callBack;
this.valueCount = 0;
Expand All @@ -83,7 +84,7 @@ public String getName() {
public boolean allocateNewSafe() {
boolean dataAlloc = false;
try {
allocateOffsetBuffer(offsetAllocationSizeInBytes);
allocateOffsetBuffer(offsetAllocationSizeInBytes, false);
dataAlloc = vector.allocateNewSafe();
} catch (Exception e) {
e.printStackTrace();
Expand All @@ -97,11 +98,13 @@ public boolean allocateNewSafe() {
return dataAlloc;
}

protected void allocateOffsetBuffer(final long size) {
protected void allocateOffsetBuffer(final long size, boolean init) {
final int curSize = (int) size;
offsetBuffer = allocator.buffer(curSize);
offsetBuffer.readerIndex(0);
offsetAllocationSizeInBytes = curSize;
if (!init) {
offsetAllocationSizeInBytes = curSize;
}
offsetBuffer.setZero(0, offsetBuffer.capacity());
}

Expand All @@ -112,7 +115,12 @@ public void reAlloc() {
}

protected void reallocOffsetBuffer() {
final long currentBufferCapacity = offsetBuffer.capacity();
final long currentBufferCapacity;
if (offsetBuffer.capacity() <= OFFSET_WIDTH) {
currentBufferCapacity = 0;
} else {
currentBufferCapacity = offsetBuffer.capacity();
}
long newAllocationSize = currentBufferCapacity * 2;
if (newAllocationSize == 0) {
if (offsetAllocationSizeInBytes > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ public void splitAndTransfer(int startIndex, int length) {
final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
to.clear();
to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH, false);
/* splitAndTransfer offset buffer */
for (int i = 0; i < length + 1; i++) {
final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ public void splitAndTransfer(int startIndex, int length) {
final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
to.clear();
to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH, false);
/* splitAndTransfer offset buffer */
for (int i = 0; i < length + 1; i++) {
final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public boolean isSet() {
@Override
public void setPosition(int index) {
super.setPosition(index);
if (vector.getOffsetBuffer().capacity() == 0) {
if (vector.getOffsetBuffer().capacity() <= OFFSET_WIDTH) {
currentOffset = 0;
maxOffset = 0;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,8 @@ public void testGetFieldTypeInfo() throws Exception {
VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
assertEquals(1, varcharChild.ordinal);
assertEquals(varcharChild.vector.getField(), children.get(1));

vector.close();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,8 @@ public void testGetTransferPairWithField() {
final ListVector toVector = (ListVector) transferPair.getTo();
// Field inside a new vector created by reusing a field should be the same in memory as the original field.
assertSame(toVector.getField(), fromVector.getField());

toVector.close();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,8 @@ public void testGetFieldTypeInfo() throws Exception {
VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
assertEquals(1, varcharChild.ordinal);
assertEquals(varcharChild.vector.getField(), children.get(1));

vector.close();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1139,7 +1139,8 @@ public void testSplitAndTransfer1() {

// split and transfer with slice starting at the beginning: this should not allocate anything new
sourceVector.splitAndTransferTo(0, 2, targetVector);
assertEquals(allocatedMem, allocator.getAllocatedMemory());
// `allocatedMem` contains initial 4 bytes for the offset buffer
assertEquals(allocatedMem - 4, allocator.getAllocatedMemory());
// The validity and offset buffers are sliced from a same buffer.See BaseFixedWidthVector#allocateBytes.
// Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. The refcnt of the
// offset buffer is increased as well for the same reason. This amounts to a total of 2.
Expand Down Expand Up @@ -1173,7 +1174,8 @@ public void testSplitAndTransfer2() {

// split and transfer with slice starting at the beginning: this should not allocate anything new
sourceVector.splitAndTransferTo(0, 2, targetVector);
assertEquals(allocatedMem, allocator.getAllocatedMemory());
// `allocatedMem` contains initial 4 bytes for the offset buffer
assertEquals(allocatedMem - 4, allocator.getAllocatedMemory());
// The validity and offset buffers are sliced from a same buffer.See BaseFixedWidthVector#allocateBytes.
// Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. The refcnt of the
// offset buffer is increased as well for the same reason. This amounts to a total of 2.
Expand Down Expand Up @@ -1215,7 +1217,8 @@ public void testSplitAndTransfer3() {
final long validitySize =
DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize(
BaseValueVector.getValidityBufferSizeFromCount(2));
assertEquals(allocatedMem + validitySize, allocator.getAllocatedMemory());
// `allocatedMem` contains initial 4 bytes for the offset buffer
assertEquals(allocatedMem + validitySize - 4, allocator.getAllocatedMemory());
// The validity and offset buffers are sliced from a same buffer.See BaseFixedWidthVector#allocateBytes.
// Since values up to the startIndex are empty/null, the offset buffer doesn't need to be reallocated and
// therefore its refcnt is increased by 1.
Expand Down Expand Up @@ -3216,7 +3219,7 @@ public void testEmptyBufBehavior() {
assertEquals(1, vector.getOffsetBuffer().refCnt());
assertEquals(0, vector.getDataBuffer().capacity());
assertEquals(0, vector.getValidityBuffer().capacity());
assertEquals(0, vector.getOffsetBuffer().capacity());
assertEquals(4, vector.getOffsetBuffer().capacity());

vector.allocateNew(valueCount);
assertEquals(1, vector.getDataBuffer().refCnt());
Expand All @@ -3239,7 +3242,7 @@ public void testEmptyBufBehavior() {
assertEquals(1, vector.getValidityBuffer().refCnt());
assertEquals(1, vector.getOffsetBuffer().refCnt());
assertEquals(0, vector.getValidityBuffer().capacity());
assertEquals(0, vector.getOffsetBuffer().capacity());
assertEquals(4, vector.getOffsetBuffer().capacity());

vector.setValueCount(valueCount);
vector.allocateNewSafe();
Expand Down

0 comments on commit 5eb34e1

Please sign in to comment.