Skip to content

Commit

Permalink
Add tests for array-based joins (#76)
Browse files Browse the repository at this point in the history
Summary:
Add a test for hash join where build-side keys come from a small range
and allow for array-based lookup instead of a hash table.

Pull Request resolved: #76

Reviewed By: amitkdutta

Differential Revision: D30413171

Pulled By: mbasmanova

fbshipit-source-id: 85b5aa1c395e49a1fed163bd318faa465f5825de
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Aug 19, 2021
1 parent 7aa0562 commit 13c0fb2
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
48 changes: 48 additions & 0 deletions velox/exec/tests/HashJoinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,51 @@ TEST_F(HashJoinTest, lazyVectors) {
{{0, rightFile}, {10, leftFile}},
"SELECT t.c1 + 1 FROM t, u WHERE t.c0 = u.c0");
}

/// Test hash join where build-side keys come from a small range and allow for
/// array-based lookup instead of a hash table.
TEST_F(HashJoinTest, arrayBasedLookup) {
auto oddIndices = makeIndices(500, [](auto i) { return 2 * i + 1; });

auto leftVectors = {
// Join key vector is flat.
makeRowVector({
makeFlatVector<int32_t>(1'000, [](auto row) { return row; }),
makeFlatVector<int64_t>(1'000, [](auto row) { return row; }),
}),
// Join key vector is constant. There is a match in the build side.
makeRowVector({
BaseVector::createConstant(4, 2'000, pool_.get()),
makeFlatVector<int64_t>(2'000, [](auto row) { return row; }),
}),
// Join key vector is constant. There is no match.
makeRowVector({
BaseVector::createConstant(5, 2'000, pool_.get()),
makeFlatVector<int64_t>(2'000, [](auto row) { return row; }),
}),
// Join key vector is a dictionary.
makeRowVector({
wrapInDictionary(
oddIndices,
500,
makeFlatVector<int32_t>(1'000, [](auto row) { return row * 4; })),
makeFlatVector<int64_t>(1'000, [](auto row) { return row; }),
})};

// 100 key values in [0, 198] range.
auto rightVectors = {makeRowVector(
{makeFlatVector<int32_t>(100, [](auto row) { return row * 2; })})};

createDuckDbTable("t", {leftVectors});
createDuckDbTable("u", {rightVectors});

auto op =
PlanBuilder(10)
.values(leftVectors)
.hashJoin(
{0}, {0}, PlanBuilder(0).values(rightVectors).planNode(), "", {1})
.project({"c1 + 1"})
.planNode();

assertQuery(op, "SELECT t.c1 + 1 FROM t, u WHERE t.c0 = u.c0");
}
18 changes: 18 additions & 0 deletions velox/exec/tests/OperatorTestBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,24 @@ class OperatorTestBase : public testing::Test {
size, sizeAt, keyAt, valueAt, isNullAt, valueIsNullAt);
}

static VectorPtr
wrapInDictionary(BufferPtr indices, vector_size_t size, VectorPtr vector) {
return BaseVector::wrapInDictionary(
BufferPtr(nullptr), std::move(indices), size, std::move(vector));
}

BufferPtr makeIndices(
vector_size_t size,
const std::function<vector_size_t(vector_size_t)>& indexAt) const {
BufferPtr indices =
AlignedBuffer::allocate<vector_size_t>(size, pool_.get());
auto rawIndices = indices->asMutable<vector_size_t>();
for (int i = 0; i < size; i++) {
rawIndices[i] = indexAt(i);
}
return indices;
}

// Helper function for comparing vector results
template <typename T1, typename T2>
bool
Expand Down

0 comments on commit 13c0fb2

Please sign in to comment.