Skip to content

Commit

Permalink
[DAPHNE-#399] Aggregation kernels can return different value types (#402
Browse files Browse the repository at this point in the history
)

- AggAll, AggCol and AggRow used to return the same value type as the argument. In case of some aggregations (i.e. mean, stddev) that did not make sense for integer arguments.
- Extended these template kernels to support different return types.
- Added some test cases.
- Closes #399.
  • Loading branch information
aristotelis96 authored Jun 9, 2023
1 parent 8046dff commit 349bc39
Show file tree
Hide file tree
Showing 7 changed files with 347 additions and 287 deletions.
52 changes: 26 additions & 26 deletions src/runtime/local/kernels/AggAll.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,18 @@
// Struct for partial template specialization
// ****************************************************************************

template<class DT>
template<typename VTRes, class DTArg>
struct AggAll {
static typename DT::VT apply(AggOpCode opCode, const DT * arg, DCTX(ctx)) = delete;
static VTRes apply(AggOpCode opCode, const DTArg * arg, DCTX(ctx)) = delete;
};

// ****************************************************************************
// Convenience function
// ****************************************************************************

template<class DT>
typename DT::VT aggAll(AggOpCode opCode, const DT * arg, DCTX(ctx)) {
return AggAll<DT>::apply(opCode, arg, ctx);
template<typename VTRes, class DTArg>
VTRes aggAll(AggOpCode opCode, const DTArg * arg, DCTX(ctx)) {
return AggAll<VTRes, DTArg>::apply(opCode, arg, ctx);
}

// ****************************************************************************
Expand All @@ -52,33 +52,33 @@ typename DT::VT aggAll(AggOpCode opCode, const DT * arg, DCTX(ctx)) {
// scalar <- DenseMatrix
// ----------------------------------------------------------------------------

template<typename VT>
struct AggAll<DenseMatrix<VT>> {
static VT apply(AggOpCode opCode, const DenseMatrix<VT> * arg, DCTX(ctx)) {
template<typename VTRes, typename VTArg>
struct AggAll<VTRes, DenseMatrix<VTArg>> {
static VTRes apply(AggOpCode opCode, const DenseMatrix<VTArg> * arg, DCTX(ctx)) {
const size_t numRows = arg->getNumRows();
const size_t numCols = arg->getNumCols();

const VT * valuesArg = arg->getValues();
const VTArg * valuesArg = arg->getValues();

EwBinaryScaFuncPtr<VT, VT, VT> func;
VT agg;
EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func;
VTRes agg;
if (AggOpCodeUtils::isPureBinaryReduction(opCode)) {
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
agg = AggOpCodeUtils::template getNeutral<VT>(opCode);
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));
agg = AggOpCodeUtils::template getNeutral<VTRes>(opCode);
}
else {
// TODO Setting the function pointer yields the correct result.
// However, since MEAN and STDDEV are not sparse-safe, the program
// does not take the same path for doing the summation, and is less
// efficient.
// for MEAN and STDDDEV, we need to sum
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
agg = VT(0);
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
agg = VTRes(0);
}

for(size_t r = 0; r < numRows; r++) {
for(size_t c = 0; c < numCols; c++)
agg = func(agg, valuesArg[c], ctx);
agg = func(agg, static_cast<VTRes>(valuesArg[c]), ctx);
valuesArg += arg->getRowSkip();
}
if (AggOpCodeUtils::isPureBinaryReduction(opCode))
Expand All @@ -99,13 +99,13 @@ struct AggAll<DenseMatrix<VT>> {
// scalar <- CSRMatrix
// ----------------------------------------------------------------------------

template<typename VT>
struct AggAll<CSRMatrix<VT>> {
static VT aggArray(const VT * values, size_t numNonZeros, size_t numCells, EwBinaryScaFuncPtr<VT, VT, VT> func, bool isSparseSafe, VT neutral, DCTX(ctx)) {
template<typename VTRes, typename VTArg>
struct AggAll<VTRes, CSRMatrix<VTArg>> {
static VTRes aggArray(const VTArg * values, size_t numNonZeros, size_t numCells, EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func, bool isSparseSafe, VTRes neutral, DCTX(ctx)) {
if(numNonZeros) {
VT agg = values[0];
VTRes agg = static_cast<VTRes>(values[0]);
for(size_t i = 1; i < numNonZeros; i++)
agg = func(agg, values[i], ctx);
agg = func(agg, static_cast<VTRes>(values[i]), ctx);

if(!isSparseSafe && numNonZeros < numCells)
agg = func(agg, 0, ctx);
Expand All @@ -116,30 +116,30 @@ struct AggAll<CSRMatrix<VT>> {
return func(neutral, 0, ctx);
}

static VT apply(AggOpCode opCode, const CSRMatrix<VT> * arg, DCTX(ctx)) {
static VTRes apply(AggOpCode opCode, const CSRMatrix<VTArg> * arg, DCTX(ctx)) {
if(AggOpCodeUtils::isPureBinaryReduction(opCode)) {

EwBinaryScaFuncPtr<VT, VT, VT> func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));

return aggArray(
arg->getValues(0),
arg->getNumNonZeros(),
arg->getNumRows() * arg->getNumCols(),
func,
AggOpCodeUtils::isSparseSafe(opCode),
AggOpCodeUtils::template getNeutral<VT>(opCode),
AggOpCodeUtils::template getNeutral<VTRes>(opCode),
ctx
);
}
else { // The op-code is either MEAN or STDDEV.
EwBinaryScaFuncPtr<VT, VT, VT> func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
auto agg = aggArray(
arg->getValues(0),
arg->getNumNonZeros(),
arg->getNumRows() * arg->getNumCols(),
func,
true,
VT(0),
VTRes(0),
ctx
);
if (opCode == AggOpCode::MEAN)
Expand Down
72 changes: 37 additions & 35 deletions src/runtime/local/kernels/AggCol.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,35 +55,37 @@ void aggCol(AggOpCode opCode, DTRes *& res, const DTArg * arg, DCTX(ctx)) {
// DenseMatrix <- DenseMatrix
// ----------------------------------------------------------------------------

template<typename VT>
struct AggCol<DenseMatrix<VT>, DenseMatrix<VT>> {
static void apply(AggOpCode opCode, DenseMatrix<VT> *& res, const DenseMatrix<VT> * arg, DCTX(ctx)) {
template<typename VTRes, typename VTArg>
struct AggCol<DenseMatrix<VTRes>, DenseMatrix<VTArg>> {
static void apply(AggOpCode opCode, DenseMatrix<VTRes> *& res, const DenseMatrix<VTArg> * arg, DCTX(ctx)) {
const size_t numRows = arg->getNumRows();
const size_t numCols = arg->getNumCols();

if(res == nullptr)
res = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, false);
res = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, false);

const VT * valuesArg = arg->getValues();
VT * valuesRes = res->getValues();
const VTArg * valuesArg = arg->getValues();
VTRes * valuesRes = res->getValues();

EwBinaryScaFuncPtr<VT, VT, VT> func;
EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func;
if(AggOpCodeUtils::isPureBinaryReduction(opCode))
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));
else
// TODO Setting the function pointer yields the correct result.
// However, since MEAN and STDDEV are not sparse-safe, the program
// does not take the same path for doing the summation, and is less
// efficient.
// for MEAN and STDDDEV, we need to sum
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));

memcpy(valuesRes, valuesArg, numCols * sizeof(VT));

// memcpy(valuesRes, valuesArg, numCols * sizeof(VTRes));
// Can't memcpy because we might have different result type
for (size_t c = 0; c < numCols; c++)
valuesRes[c] = static_cast<VTRes>(valuesArg[c]);
for(size_t r = 1; r < numRows; r++) {
valuesArg += arg->getRowSkip();
for(size_t c = 0; c < numCols; c++)
valuesRes[c] = func(valuesRes[c], valuesArg[c], ctx);
valuesRes[c] = func(valuesRes[c], static_cast<VTRes>(valuesArg[c]), ctx);
}

if(AggOpCodeUtils::isPureBinaryReduction(opCode))
Expand All @@ -97,13 +99,13 @@ struct AggCol<DenseMatrix<VT>, DenseMatrix<VT>> {
if(opCode != AggOpCode::STDDEV)
return;

auto tmp = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, true);
VT * valuesT = tmp->getValues();
auto tmp = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, true);
VTRes * valuesT = tmp->getValues();
valuesArg = arg->getValues();

for(size_t r = 0; r < numRows; r++) {
for(size_t c = 0; c < numCols; c++) {
VT val = valuesArg[c] - valuesRes[c];
VTRes val = static_cast<VTRes>(valuesArg[c]) - valuesRes[c];
valuesT[c] = valuesT[c] + val * val;
}
valuesArg += arg->getRowSkip();
Expand All @@ -116,46 +118,46 @@ struct AggCol<DenseMatrix<VT>, DenseMatrix<VT>> {

// TODO We could avoid copying by returning tmp and destroying res. But
// that might be wrong if res was not nullptr initially.
memcpy(valuesRes, valuesT, numCols * sizeof(VT));
DataObjectFactory::destroy<DenseMatrix<VT>>(tmp);
memcpy(valuesRes, valuesT, numCols * sizeof(VTRes));
DataObjectFactory::destroy<DenseMatrix<VTRes>>(tmp);
}
};

// ----------------------------------------------------------------------------
// DenseMatrix <- CSRMatrix
// ----------------------------------------------------------------------------

template<typename VT>
struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {
static void apply(AggOpCode opCode, DenseMatrix<VT> *& res, const CSRMatrix<VT> * arg, DCTX(ctx)) {
template<typename VTRes, typename VTArg>
struct AggCol<DenseMatrix<VTRes>, CSRMatrix<VTArg>> {
static void apply(AggOpCode opCode, DenseMatrix<VTRes> *& res, const CSRMatrix<VTArg> * arg, DCTX(ctx)) {
const size_t numRows = arg->getNumRows();
const size_t numCols = arg->getNumCols();

if(res == nullptr)
res = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, true);
res = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, true);

VT * valuesRes = res->getValues();
VTRes * valuesRes = res->getValues();

EwBinaryScaFuncPtr<VT, VT, VT> func;
EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func;
if(AggOpCodeUtils::isPureBinaryReduction(opCode))
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));
else
// TODO Setting the function pointer yields the correct result.
// However, since MEAN and STDDEV are not sparse-safe, the program
// does not take the same path for doing the summation, and is less
// efficient.
// for MEAN and STDDDEV, we need to sum
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));

const VT * valuesArg = arg->getValues(0);
const VTArg * valuesArg = arg->getValues(0);
const size_t * colIdxsArg = arg->getColIdxs(0);

const size_t numNonZeros = arg->getNumNonZeros();

if(AggOpCodeUtils::isSparseSafe(opCode)) {
for(size_t i = 0; i < numNonZeros; i++) {
const size_t colIdx = colIdxsArg[i];
valuesRes[colIdx] = func(valuesRes[colIdx], valuesArg[i], ctx);
valuesRes[colIdx] = func(valuesRes[colIdx], static_cast<VTRes>(valuesArg[i]), ctx);
}
}
else {
Expand All @@ -164,19 +166,19 @@ struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {
const size_t numNonZerosFirstRowArg = arg->getNumNonZeros(0);
for(size_t i = 0; i < numNonZerosFirstRowArg; i++) {
size_t colIdx = colIdxsArg[i];
valuesRes[colIdx] = valuesArg[i];
valuesRes[colIdx] = static_cast<VTRes>(valuesArg[i]);
hist[colIdx]++;
}

if(arg->getNumRows() > 1) {
for(size_t i = numNonZerosFirstRowArg; i < numNonZeros; i++) {
const size_t colIdx = colIdxsArg[i];
valuesRes[colIdx] = func(valuesRes[colIdx], valuesArg[i], ctx);
valuesRes[colIdx] = func(valuesRes[colIdx], static_cast<VTRes>(valuesArg[i]), ctx);
hist[colIdx]++;
}
for(size_t c = 0; c < numCols; c++)
if(hist[c] < numRows)
valuesRes[c] = func(valuesRes[c], 0, ctx);
valuesRes[c] = func(valuesRes[c], VTRes(0), ctx);
}

delete[] hist;
Expand All @@ -193,13 +195,13 @@ struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {
if(opCode != AggOpCode::STDDEV)
return;

auto tmp = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, true);
VT * valuesT = tmp->getValues();
auto tmp = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, true);
VTRes * valuesT = tmp->getValues();

size_t * nnzCol = new size_t[numCols](); // initialized to zeros
for(size_t i = 0; i < numNonZeros; i++) {
const size_t colIdx = colIdxsArg[i];
VT val = valuesArg[i] - valuesRes[colIdx];
VTRes val = static_cast<VTRes>(valuesArg[i]) - valuesRes[colIdx];
valuesT[colIdx] = valuesT[colIdx] + val * val;
nnzCol[colIdx]++;
}
Expand All @@ -216,8 +218,8 @@ struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {

// TODO We could avoid copying by returning tmp and destroying res. But
// that might be wrong if res was not nullptr initially.
memcpy(valuesRes, valuesT, numCols * sizeof(VT));
DataObjectFactory::destroy<DenseMatrix<VT>>(tmp);
memcpy(valuesRes, valuesT, numCols * sizeof(VTRes));
DataObjectFactory::destroy<DenseMatrix<VTRes>>(tmp);

}
};
Expand Down
Loading

0 comments on commit 349bc39

Please sign in to comment.