Skip to content

Commit

Permalink
Merge pull request #1710 from CEED/jeremy/split-at-points
Browse files Browse the repository at this point in the history
Split AtPoints basis between Transpose/no
  • Loading branch information
jeremylt authored Nov 12, 2024
2 parents bc3a688 + 81ae615 commit be8d6f5
Show file tree
Hide file tree
Showing 10 changed files with 586 additions and 470 deletions.
15 changes: 9 additions & 6 deletions backends/cuda-ref/ceed-cuda-ref-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,9 @@ static int CeedBasisApplyAtPointsCore_Cuda(CeedBasis basis, bool apply_add, cons
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS",
max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1)));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpTransposeAtPoints", &data->InterpTransposeAtPoints));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradTransposeAtPoints", &data->GradTransposeAtPoints));
}

// Get read/write access to u, v
Expand All @@ -220,16 +222,17 @@ static int CeedBasisApplyAtPointsCore_Cuda(CeedBasis basis, bool apply_add, cons
// Basis action
switch (eval_mode) {
case CEED_EVAL_INTERP: {
void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
void *interp_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);

CeedCallBackend(CeedRunKernel_Cuda(ceed, data->InterpAtPoints, num_elem, block_size, interp_args));
CeedCallBackend(
CeedRunKernel_Cuda(ceed, is_transpose ? data->InterpTransposeAtPoints : data->InterpAtPoints, num_elem, block_size, interp_args));
} break;
case CEED_EVAL_GRAD: {
void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
void *grad_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);

CeedCallBackend(CeedRunKernel_Cuda(ceed, data->GradAtPoints, num_elem, block_size, grad_args));
CeedCallBackend(CeedRunKernel_Cuda(ceed, is_transpose ? data->GradTransposeAtPoints : data->GradAtPoints, num_elem, block_size, grad_args));
} break;
case CEED_EVAL_WEIGHT:
case CEED_EVAL_NONE: /* handled separately below */
Expand Down
2 changes: 2 additions & 0 deletions backends/cuda-ref/ceed-cuda-ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ typedef struct {
CUmodule moduleAtPoints;
CeedInt num_points;
CUfunction InterpAtPoints;
CUfunction InterpTransposeAtPoints;
CUfunction GradAtPoints;
CUfunction GradTransposeAtPoints;
CeedScalar *d_interp_1d;
CeedScalar *d_grad_1d;
CeedScalar *d_q_weight_1d;
Expand Down
15 changes: 9 additions & 6 deletions backends/cuda-shared/ceed-cuda-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,9 @@ static int CeedBasisApplyAtPointsCore_Cuda_shared(CeedBasis basis, bool apply_ad
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS",
max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1)));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpTransposeAtPoints", &data->InterpTransposeAtPoints));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints));
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradTransposeAtPoints", &data->GradTransposeAtPoints));
}

// Get read/write access to u, v
Expand All @@ -321,16 +323,17 @@ static int CeedBasisApplyAtPointsCore_Cuda_shared(CeedBasis basis, bool apply_ad
// Basis action
switch (eval_mode) {
case CEED_EVAL_INTERP: {
void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
void *interp_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);

CeedCallBackend(CeedRunKernel_Cuda(ceed, data->InterpAtPoints, num_elem, block_size, interp_args));
CeedCallBackend(
CeedRunKernel_Cuda(ceed, is_transpose ? data->InterpTransposeAtPoints : data->InterpAtPoints, num_elem, block_size, interp_args));
} break;
case CEED_EVAL_GRAD: {
void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
void *grad_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);

CeedCallBackend(CeedRunKernel_Cuda(ceed, data->GradAtPoints, num_elem, block_size, grad_args));
CeedCallBackend(CeedRunKernel_Cuda(ceed, is_transpose ? data->GradTransposeAtPoints : data->GradAtPoints, num_elem, block_size, grad_args));
} break;
case CEED_EVAL_WEIGHT:
case CEED_EVAL_NONE: /* handled separately below */
Expand Down
2 changes: 2 additions & 0 deletions backends/cuda-shared/ceed-cuda-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ typedef struct {
CUmodule moduleAtPoints;
CeedInt num_points;
CUfunction InterpAtPoints;
CUfunction InterpTransposeAtPoints;
CUfunction GradAtPoints;
CUfunction GradTransposeAtPoints;
CeedScalar *d_interp_1d;
CeedScalar *d_grad_1d;
CeedScalar *d_collo_grad_1d;
Expand Down
15 changes: 9 additions & 6 deletions backends/hip-ref/ceed-hip-ref-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,9 @@ static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS",
max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1)));
CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints));
CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpTransposeAtPoints", &data->InterpTransposeAtPoints));
CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints));
CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradTransposeAtPoints", &data->GradTransposeAtPoints));
}

// Get read/write access to u, v
Expand All @@ -218,16 +220,17 @@ static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const
// Basis action
switch (eval_mode) {
case CEED_EVAL_INTERP: {
void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
void *interp_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);

CeedCallBackend(CeedRunKernel_Hip(ceed, data->InterpAtPoints, num_elem, block_size, interp_args));
CeedCallBackend(
CeedRunKernel_Hip(ceed, is_transpose ? data->InterpTransposeAtPoints : data->InterpAtPoints, num_elem, block_size, interp_args));
} break;
case CEED_EVAL_GRAD: {
void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
void *grad_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);

CeedCallBackend(CeedRunKernel_Hip(ceed, data->GradAtPoints, num_elem, block_size, grad_args));
CeedCallBackend(CeedRunKernel_Hip(ceed, is_transpose ? data->GradTransposeAtPoints : data->GradAtPoints, num_elem, block_size, grad_args));
} break;
case CEED_EVAL_WEIGHT:
case CEED_EVAL_NONE: /* handled separately below */
Expand Down
2 changes: 2 additions & 0 deletions backends/hip-ref/ceed-hip-ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ typedef struct {
hipModule_t moduleAtPoints;
CeedInt num_points;
hipFunction_t InterpAtPoints;
hipFunction_t InterpTransposeAtPoints;
hipFunction_t GradAtPoints;
hipFunction_t GradTransposeAtPoints;
CeedScalar *d_interp_1d;
CeedScalar *d_grad_1d;
CeedScalar *d_q_weight_1d;
Expand Down
15 changes: 9 additions & 6 deletions backends/hip-shared/ceed-hip-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,9 @@ static int CeedBasisApplyAtPointsCore_Hip_shared(CeedBasis basis, bool apply_add
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS",
max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1)));
CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints));
CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpTransposeAtPoints", &data->InterpTransposeAtPoints));
CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints));
CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradTransposeAtPoints", &data->GradTransposeAtPoints));
}

// Get read/write access to u, v
Expand All @@ -380,16 +382,17 @@ static int CeedBasisApplyAtPointsCore_Hip_shared(CeedBasis basis, bool apply_add
// Basis action
switch (eval_mode) {
case CEED_EVAL_INTERP: {
void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
void *interp_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);

CeedCallBackend(CeedRunKernel_Hip(ceed, data->InterpAtPoints, num_elem, block_size, interp_args));
CeedCallBackend(
CeedRunKernel_Hip(ceed, is_transpose ? data->InterpTransposeAtPoints : data->InterpAtPoints, num_elem, block_size, interp_args));
} break;
case CEED_EVAL_GRAD: {
void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
void *grad_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);

CeedCallBackend(CeedRunKernel_Hip(ceed, data->GradAtPoints, num_elem, block_size, grad_args));
CeedCallBackend(CeedRunKernel_Hip(ceed, is_transpose ? data->GradTransposeAtPoints : data->GradAtPoints, num_elem, block_size, grad_args));
} break;
case CEED_EVAL_WEIGHT:
case CEED_EVAL_NONE: /* handled separately below */
Expand Down
2 changes: 2 additions & 0 deletions backends/hip-shared/ceed-hip-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ typedef struct {
hipModule_t moduleAtPoints;
CeedInt num_points;
hipFunction_t InterpAtPoints;
hipFunction_t InterpTransposeAtPoints;
hipFunction_t GradAtPoints;
hipFunction_t GradTransposeAtPoints;
CeedInt block_sizes[3]; // interp, grad, weight thread block sizes
CeedScalar *d_interp_1d;
CeedScalar *d_grad_1d;
Expand Down
Loading

0 comments on commit be8d6f5

Please sign in to comment.