Skip to content

Commit

Permalink
Allow loading of nested lazy RowVector (#5392)
Browse files Browse the repository at this point in the history
Summary:
Followup of: #5140

For cases when we have nested lazies in row vector, we should load them appropriately when decoding, or calling loadedvector(). This PR addresses this loading for lazies in row type.

I'm not sure if its safe to mutate the RowVector when loading.

Pull Request resolved: #5392

Reviewed By: bikramSingh91

Differential Revision: D47172716

Pulled By: pranjalssh

fbshipit-source-id: c27c3bf5e99a510d88459e276dca03b697bc59da
  • Loading branch information
pranjalssh authored and facebook-github-bot committed Jul 10, 2023
1 parent efeb747 commit b1c4ed5
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 1 deletion.
6 changes: 6 additions & 0 deletions velox/vector/BaseVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,12 @@ bool isLazyNotLoaded(const BaseVector& vector) {
case VectorEncoding::Simple::CONSTANT:
return vector.valueVector() ? isLazyNotLoaded(*vector.valueVector())
: false;
case VectorEncoding::Simple::ROW: {
const auto& children = vector.as<RowVector>()->children();
return std::any_of(children.begin(), children.end(), [](auto it) {
return it != nullptr && isLazyNotLoaded(*it);
});
}
default:
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion velox/vector/BaseVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ class BaseVector {
return this;
}

static VectorPtr loadedVectorShared(VectorPtr);
static VectorPtr loadedVectorShared(VectorPtr vector);

virtual const BufferPtr& values() const {
VELOX_UNSUPPORTED("Only flat vectors have a values buffer");
Expand Down
17 changes: 17 additions & 0 deletions velox/vector/ComplexVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,23 @@ class RowVector : public BaseVector {

uint64_t hashValueAt(vector_size_t index) const override;

BaseVector* loadedVector() override {
for (auto i = 0; i < childrenSize_; ++i) {
if (!children_[i]) {
continue;
}
auto newChild = BaseVector::loadedVectorShared(children_[i]);
if (children_[i].get() != newChild.get()) {
children_[i] = newChild;
}
}
return this;
}

const BaseVector* loadedVector() const override {
return const_cast<RowVector*>(this)->loadedVector();
}

std::unique_ptr<SimpleVector<uint64_t>> hashAll() const override;

/// Return the number of child vectors.
Expand Down
10 changes: 10 additions & 0 deletions velox/vector/LazyVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "velox/common/base/RawVector.h"
#include "velox/common/base/RuntimeMetrics.h"
#include "velox/common/time/Timer.h"
#include "velox/vector/ComplexVector.h"
#include "velox/vector/DecodedVector.h"

namespace facebook::velox {
Expand Down Expand Up @@ -109,6 +110,15 @@ void LazyVector::ensureLoadedRows(
SelectivityVector& baseRows) {
decoded.decode(*vector, rows, false);
if (decoded.base()->encoding() != VectorEncoding::Simple::LAZY) {
if (decoded.base()->encoding() == VectorEncoding::Simple::ROW &&
isLazyNotLoaded(*decoded.base())) {
auto children = decoded.base()->asUnchecked<RowVector>()->children();
for (auto& child : children) {
DecodedVector decodedChild;
ensureLoadedRows(child, rows, decodedChild, baseRows);
}
decoded.base()->loadedVector();
}
return;
}
auto lazyVector = decoded.base()->asUnchecked<LazyVector>();
Expand Down
31 changes: 31 additions & 0 deletions velox/vector/tests/DecodedVectorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,37 @@ TEST_F(DecodedVectorTest, dictionaryOverLazy) {
}
}

TEST_F(DecodedVectorTest, nestedLazy) {
constexpr vector_size_t size = 1000;
auto columnType = ROW({"a", "b"}, {INTEGER(), INTEGER()});

auto lazyVectorA = vectorMaker_.lazyFlatVector<int32_t>(
size,
[](vector_size_t i) { return i % 5; },
[](vector_size_t i) { return i % 7 == 0; });
auto lazyVectorB = vectorMaker_.lazyFlatVector<int32_t>(
size,
[](vector_size_t i) { return i % 3; },
[](vector_size_t i) { return i % 11 == 0; });

std::vector<VectorPtr> children{lazyVectorA, lazyVectorB};
auto rowVector = std::make_shared<RowVector>(
pool_.get(), columnType, BufferPtr(nullptr), size, children);
EXPECT_TRUE(isLazyNotLoaded(*rowVector.get()));

DecodedVector decoded(*rowVector, true);

auto child = decoded.base()->as<RowVector>()->childAt(0);
EXPECT_TRUE(child->isFlatEncoding());
assertEqualVectors(child, lazyVectorA);

child = decoded.base()->as<RowVector>()->childAt(1);
EXPECT_TRUE(child->isFlatEncoding());
assertEqualVectors(child, lazyVectorB);

EXPECT_FALSE(isLazyNotLoaded(*decoded.base()));
}

TEST_F(DecodedVectorTest, dictionaryOverConstant) {
testDictionaryOverConstant(10);
testDictionaryOverConstant(12.3);
Expand Down
57 changes: 57 additions & 0 deletions velox/vector/tests/LazyVectorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,63 @@ TEST_F(LazyVectorTest, lazyInDictionary) {
assertCopyableVector(wrapped);
}

TEST_F(LazyVectorTest, nestedLazy) {
constexpr vector_size_t size = 1000;
auto columnType = ROW({"a", "b"}, {INTEGER(), INTEGER()});

auto lazyVectorA = vectorMaker_.lazyFlatVector<int32_t>(
size,
[&](vector_size_t i) { return i % 5; },
[](vector_size_t i) { return i % 7 == 0; });
auto lazyVectorB = vectorMaker_.lazyFlatVector<int32_t>(
size,
[&](vector_size_t i) { return i % 3; },
[](vector_size_t i) { return i % 11 == 0; });

VectorPtr rowVector = makeRowVector({lazyVectorA, lazyVectorB});
EXPECT_TRUE(isLazyNotLoaded(*rowVector.get()));

SelectivityVector rows(rowVector->size(), false);
LazyVector::ensureLoadedRows(rowVector, rows);
EXPECT_FALSE(isLazyNotLoaded(*rowVector.get()));
}

TEST_F(LazyVectorTest, selectiveNestedLazy) {
constexpr vector_size_t size = 1000;
auto columnType = ROW({"a", "b"}, {INTEGER(), INTEGER()});
int loadedA = 0, loadedB = 0;
int expectedLoadedA = 0, expectedLoadedB = 0;

auto lazyVectorA =
vectorMaker_.lazyFlatVector<int32_t>(size, [&](vector_size_t i) {
++loadedA;
return i % 5;
});
auto lazyVectorB =
vectorMaker_.lazyFlatVector<int32_t>(size, [&](vector_size_t i) {
++loadedB;
return i % 3;
});

VectorPtr rowVector = makeRowVector({lazyVectorA, lazyVectorB});
EXPECT_TRUE(isLazyNotLoaded(*rowVector.get()));

SelectivityVector rows(rowVector->size(), false);
for (int i = 0; i < size; ++i) {
if (i % 7) {
rows.setValid(i, true);
++expectedLoadedA;
++expectedLoadedB;
}
}
rows.updateBounds();
LazyVector::ensureLoadedRows(rowVector, rows);
EXPECT_FALSE(isLazyNotLoaded(*rowVector.get()));
EXPECT_LT(expectedLoadedA, size);
EXPECT_EQ(loadedA, expectedLoadedA);
EXPECT_EQ(loadedB, expectedLoadedB);
}

TEST_F(LazyVectorTest, lazyInCostant) {
// Wrap Lazy vector in a Constant, load some indices and verify that the
// results.
Expand Down

0 comments on commit b1c4ed5

Please sign in to comment.