Skip to content

Commit

Permalink
Style fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
nbeams committed Jun 28, 2023
1 parent 824ef0b commit 1563911
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 113 deletions.
14 changes: 5 additions & 9 deletions backends/hip-ref/ceed-hip-ref-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -616,8 +616,7 @@ static int CreatePBRestriction(CeedElemRestriction rstr, CeedElemRestriction *pb
//------------------------------------------------------------------------------
// Assemble diagonal setup
//------------------------------------------------------------------------------
static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op, const bool pointBlock,
CeedInt use_ceedsize_idx) {
static inline int CeedOperatorAssembleDiagonalSetup_Hip(CeedOperator op, const bool pointBlock, CeedInt use_ceedsize_idx) {
Ceed ceed;
CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
CeedQFunction qf;
Expand Down Expand Up @@ -803,8 +802,7 @@ static inline int CeedOperatorAssembleDiagonalCore_Hip(CeedOperator op, CeedVect
CeedCallBackend(CeedVectorGetLength(assembled, &assembled_length));
CeedCallBackend(CeedVectorGetLength(assembledqf, &assembledqf_length));
CeedInt use_ceedsize_idx = 0;
if ((assembled_length > INT_MAX) || (assembledqf_length > INT_MAX))
use_ceedsize_idx = 1;
if ((assembled_length > INT_MAX) || (assembledqf_length > INT_MAX)) use_ceedsize_idx = 1;

// Setup
if (!impl->diag) CeedCallBackend(CeedOperatorAssembleDiagonalSetup_Hip(op, pointBlock, use_ceedsize_idx));
Expand Down Expand Up @@ -991,8 +989,8 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceed
asmb->block_size_y = esize;
}
CeedCallBackend(CeedCompile_Hip(ceed, assembly_kernel_source, &asmb->module, 8, "NELEM", nelem, "NUMEMODEIN", num_emode_in, "NUMEMODEOUT",
num_emode_out, "NQPTS", nqpts, "NNODES", esize, "BLOCK_SIZE", block_size, "NCOMP", ncomp,
"CEEDSIZE", use_ceedsize_idx));
num_emode_out, "NQPTS", nqpts, "NNODES", esize, "BLOCK_SIZE", block_size, "NCOMP", ncomp, "CEEDSIZE",
use_ceedsize_idx));
CeedCallBackend(CeedGetKernel_Hip(ceed, asmb->module, fallback ? "linearAssembleFallback" : "linearAssemble", &asmb->linearAssemble));
CeedCallBackend(CeedFree(&assembly_kernel_path));
CeedCallBackend(CeedFree(&assembly_kernel_source));
Expand Down Expand Up @@ -1074,15 +1072,13 @@ static int CeedSingleOperatorAssemble_Hip(CeedOperator op, CeedInt offset, CeedV
CeedCallBackend(CeedVectorGetLength(values, &values_length));
CeedCallBackend(CeedVectorGetLength(assembled_qf, &assembled_qf_length));
CeedInt use_ceedsize_idx = 0;
if ((values_length > INT_MAX) || (assembled_qf_length > INT_MAX))
use_ceedsize_idx = 1;
if ((values_length > INT_MAX) || (assembled_qf_length > INT_MAX)) use_ceedsize_idx = 1;
// Setup
if (!impl->asmb) {
CeedCallBackend(CeedSingleOperatorAssembleSetup_Hip(op, use_ceedsize_idx));
assert(impl->asmb != NULL);
}


// Compute B^T D B
const CeedInt nelem = impl->asmb->nelem; // to satisfy clang-tidy
const CeedInt elemsPerBlock = impl->asmb->elemsPerBlock;
Expand Down
162 changes: 78 additions & 84 deletions backends/hip-ref/ceed-hip-ref-vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -460,112 +460,106 @@ static int CeedVectorNorm_Hip(CeedVector vec, CeedNormType type, CeedScalar *nor
switch (type) {
case CEED_NORM_1: {
if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
if (num_calls <= 1 )
CeedCallHipblas(ceed, hipblasSasum(handle, (CeedInt) length, (float *)d_array, 1, (float *)norm));
else {
float sub_norm = 0.0;
float *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (float *)d_array + (CeedSize)(i) * INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i) * INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasSasum(handle, (CeedInt) sub_length, (float *)d_array_start, 1, &sub_norm));
*norm += sub_norm;
}
}
if (num_calls <= 1) CeedCallHipblas(ceed, hipblasSasum(handle, (CeedInt)length, (float *)d_array, 1, (float *)norm));
else {
float sub_norm = 0.0;
float *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (float *)d_array + (CeedSize)(i)*INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasSasum(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &sub_norm));
*norm += sub_norm;
}
}
} else {
if (num_calls <= 1 )
CeedCallHipblas(ceed, hipblasDasum(handle, (CeedInt) length, (double *)d_array, 1, (double *)norm));
else {
double sub_norm = 0.0;
double *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (double *)d_array + (CeedSize)(i) * INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i) * INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasDasum(handle, (CeedInt) sub_length, (double *)d_array_start, 1, &sub_norm));
*norm += sub_norm;
}
}
if (num_calls <= 1) CeedCallHipblas(ceed, hipblasDasum(handle, (CeedInt)length, (double *)d_array, 1, (double *)norm));
else {
double sub_norm = 0.0;
double *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (double *)d_array + (CeedSize)(i)*INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasDasum(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &sub_norm));
*norm += sub_norm;
}
}
}
break;
}
case CEED_NORM_2: {
if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
if (num_calls <= 1 )
CeedCallHipblas(ceed, hipblasSnrm2(handle, (CeedInt) length, (float *)d_array, 1, (float *)norm));
else {
float sub_norm = 0.0, norm_sum = 0.0;
float *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (float *)d_array + (CeedSize)(i) * INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i) * INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasSnrm2(handle, (CeedInt) sub_length, (float *)d_array_start, 1, &sub_norm));
norm_sum += sub_norm * sub_norm;
}
*norm = sqrt(norm_sum);
}
if (num_calls <= 1) CeedCallHipblas(ceed, hipblasSnrm2(handle, (CeedInt)length, (float *)d_array, 1, (float *)norm));
else {
float sub_norm = 0.0, norm_sum = 0.0;
float *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (float *)d_array + (CeedSize)(i)*INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasSnrm2(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &sub_norm));
norm_sum += sub_norm * sub_norm;
}
*norm = sqrt(norm_sum);
}
} else {
if (num_calls <= 1 )
CeedCallHipblas(ceed, hipblasDnrm2(handle, (CeedInt) length, (double *)d_array, 1, (double *)norm));
else {
double sub_norm = 0.0, norm_sum = 0.0;
double *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (double *)d_array + (CeedSize)(i) * INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i) * INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasDnrm2(handle, (CeedInt) sub_length, (double *)d_array_start, 1, &sub_norm));
norm_sum += sub_norm * sub_norm;
}
*norm = sqrt(norm_sum);
}
if (num_calls <= 1) CeedCallHipblas(ceed, hipblasDnrm2(handle, (CeedInt)length, (double *)d_array, 1, (double *)norm));
else {
double sub_norm = 0.0, norm_sum = 0.0;
double *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (double *)d_array + (CeedSize)(i)*INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasDnrm2(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &sub_norm));
norm_sum += sub_norm * sub_norm;
}
*norm = sqrt(norm_sum);
}
}
break;
}
case CEED_NORM_MAX: {
CeedInt indx;
if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
if (num_calls <= 1 ) {
CeedCallHipblas(ceed, hipblasIsamax(handle, (CeedInt) length, (float *)d_array, 1, &indx));
if (num_calls <= 1) {
CeedCallHipblas(ceed, hipblasIsamax(handle, (CeedInt)length, (float *)d_array, 1, &indx));
CeedScalar normNoAbs;
CeedCallHip(ceed, hipMemcpy(&normNoAbs, impl->d_array + indx - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost));
*norm = fabs(normNoAbs);
}
else {
float sub_max = 0.0, current_max = 0.0;
float *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (float *)d_array + (CeedSize)(i) * INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i) * INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasIsamax(handle, (CeedInt) sub_length, (float *)d_array_start, 1, &indx));
} else {
float sub_max = 0.0, current_max = 0.0;
float *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (float *)d_array + (CeedSize)(i)*INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasIsamax(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &indx));
CeedCallHip(ceed, hipMemcpy(&sub_max, d_array_start + indx - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost));
if (fabs(sub_max) > current_max) current_max = fabs(sub_max);
}
*norm = current_max;
}
if (fabs(sub_max) > current_max) current_max = fabs(sub_max);
}
*norm = current_max;
}
} else {
if (num_calls <= 1 ) {
CeedCallHipblas(ceed, hipblasIdamax(handle, (CeedInt) length, (double *)d_array, 1, &indx));
if (num_calls <= 1) {
CeedCallHipblas(ceed, hipblasIdamax(handle, (CeedInt)length, (double *)d_array, 1, &indx));
CeedScalar normNoAbs;
CeedCallHip(ceed, hipMemcpy(&normNoAbs, impl->d_array + indx - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost));
*norm = fabs(normNoAbs);
}
else {
double sub_max = 0.0, current_max = 0.0;
double *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (double *)d_array + (CeedSize)(i) * INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i) * INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasIdamax(handle, (CeedInt) sub_length, (double *)d_array_start, 1, &indx));
} else {
double sub_max = 0.0, current_max = 0.0;
double *d_array_start;
for (CeedInt i = 0; i < num_calls; i++) {
d_array_start = (double *)d_array + (CeedSize)(i)*INT_MAX;
CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
CeedCallHipblas(ceed, hipblasIdamax(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &indx));
CeedCallHip(ceed, hipMemcpy(&sub_max, d_array_start + indx - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost));
if (fabs(sub_max) > current_max) current_max = fabs(sub_max);
}
*norm = current_max;
}
if (fabs(sub_max) > current_max) current_max = fabs(sub_max);
}
*norm = current_max;
}
}
break;
}
Expand Down
24 changes: 12 additions & 12 deletions backends/hip-ref/kernels/hip-ref-vector.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedSize size, Ce
// Set value on device memory
//------------------------------------------------------------------------------
extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, CeedScalar val) {
const int bsize = 512;
const int bsize = 512;
const CeedSize vecsize = length;
int gridsize = vecsize / bsize;
int gridsize = vecsize / bsize;

if (bsize * gridsize < vecsize) gridsize += 1;
hipLaunchKernelGGL(setValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length, val);
Expand All @@ -43,9 +43,9 @@ __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedSize size) {
// Take vector reciprocal in device memory
//------------------------------------------------------------------------------
extern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedSize length) {
const int bsize = 512;
const int bsize = 512;
const CeedSize vecsize = length;
int gridsize = vecsize / bsize;
int gridsize = vecsize / bsize;

if (bsize * gridsize < vecsize) gridsize += 1;
hipLaunchKernelGGL(rcpValueK, dim3(gridsize), dim3(bsize), 0, 0, d_array, length);
Expand All @@ -65,9 +65,9 @@ __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha,
// Compute x = alpha x on device
//------------------------------------------------------------------------------
extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
const int bsize = 512;
const int bsize = 512;
const CeedSize vecsize = length;
int gridsize = vecsize / bsize;
int gridsize = vecsize / bsize;

if (bsize * gridsize < vecsize) gridsize += 1;
hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha, length);
Expand All @@ -87,9 +87,9 @@ __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha,
// Compute y = alpha x + y on device
//------------------------------------------------------------------------------
extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
const int bsize = 512;
const int bsize = 512;
const CeedSize vecsize = length;
int gridsize = vecsize / bsize;
int gridsize = vecsize / bsize;

if (bsize * gridsize < vecsize) gridsize += 1;
hipLaunchKernelGGL(axpyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, x_array, length);
Expand All @@ -110,9 +110,9 @@ __global__ static void axpbyValueK(CeedScalar *__restrict__ y, CeedScalar alpha,
// Compute y = alpha x + beta y on device
//------------------------------------------------------------------------------
extern "C" int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) {
const int bsize = 512;
const int bsize = 512;
const CeedSize vecsize = length;
int gridsize = vecsize / bsize;
int gridsize = vecsize / bsize;

if (bsize * gridsize < vecsize) gridsize += 1;
hipLaunchKernelGGL(axpbyValueK, dim3(gridsize), dim3(bsize), 0, 0, y_array, alpha, beta, x_array, length);
Expand All @@ -132,9 +132,9 @@ __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScala
// Compute the pointwise multiplication w = x .* y on device
//------------------------------------------------------------------------------
extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
const int bsize = 512;
const int bsize = 512;
const CeedSize vecsize = length;
int gridsize = vecsize / bsize;
int gridsize = vecsize / bsize;

if (bsize * gridsize < vecsize) gridsize += 1;
hipLaunchKernelGGL(pointwiseMultValueK, dim3(gridsize), dim3(bsize), 0, 0, w_array, x_array, y_array, length);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#include <ceed.h>

#if CEEDSIZE
typedef CeedSize IndexType;
typedef CeedSize IndexType;
#else
typedef CeedInt IndexType;
typedef CeedInt IndexType;
#endif

//------------------------------------------------------------------------------
Expand Down
12 changes: 6 additions & 6 deletions include/ceed/jit-source/hip/hip-ref-operator-assemble.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#include <ceed.h>

#if CEEDSIZE
typedef CeedSize IndexType;
typedef CeedSize IndexType;
#else
typedef CeedInt IndexType;
typedef CeedInt IndexType;
#endif

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -42,7 +42,7 @@ extern "C" __launch_bounds__(BLOCK_SIZE) __global__
for (IndexType comp_in = 0; comp_in < NCOMP; comp_in++) {
for (IndexType comp_out = 0; comp_out < NCOMP; comp_out++) {
CeedScalar result = 0.0;
IndexType qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e;
IndexType qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e;
for (IndexType emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
IndexType b_in_index = emode_in * NQPTS * NNODES;
for (IndexType emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
Expand All @@ -54,7 +54,7 @@ extern "C" __launch_bounds__(BLOCK_SIZE) __global__
}
} // end of emode_out
} // end of emode_in
IndexType val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
IndexType val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
values_array[val_index] = result;
} // end of out component
} // end of in component
Expand Down Expand Up @@ -90,7 +90,7 @@ extern "C" __launch_bounds__(BLOCK_SIZE) __global__
for (IndexType comp_out = 0; comp_out < NCOMP; comp_out++) {
for (IndexType i = 0; i < NNODES; i++) {
CeedScalar result = 0.0;
IndexType qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e;
IndexType qf_index_comp = qcomp_in_stride * comp_in + qcomp_out_stride * comp_out + qe_stride * e;
for (IndexType emode_in = 0; emode_in < NUMEMODEIN; emode_in++) {
IndexType b_in_index = emode_in * NQPTS * NNODES;
for (IndexType emode_out = 0; emode_out < NUMEMODEOUT; emode_out++) {
Expand All @@ -102,7 +102,7 @@ extern "C" __launch_bounds__(BLOCK_SIZE) __global__
}
} // end of emode_out
} // end of emode_in
IndexType val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
IndexType val_index = comp_in_stride * comp_in + comp_out_stride * comp_out + e_stride * e + NNODES * i + l;
values_array[val_index] = result;
} // end of loop over element node index, i
} // end of out component
Expand Down

0 comments on commit 1563911

Please sign in to comment.