Run BoxReciprocalSums on only charged particles

GOMC-WSU · Sep 1, 2024 · 9f7a9c0 · 9f7a9c0
1 parent 417ba24
commit 9f7a9c0
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 9 deletions.
diff --git a/src/Ewald.cpp b/src/Ewald.cpp
@@ -213,10 +213,12 @@ void Ewald::BoxReciprocalSetup(uint box, XYZArray const &molCoords) {
     while (thisMol != end) {
       MoleculeKind const &thisKind = mols.GetKind(*thisMol);
       double lambdaCoef = GetLambdaCoef(*thisMol, box);
-      for (uint j = 0; j < thisKind.NumAtoms(); j++) {
-        thisBoxCoords.Set(i, molCoords[mols.MolStart(*thisMol) + j]);
-        chargeBox.push_back(thisKind.AtomCharge(j) * lambdaCoef);
-        i++;
+      for (uint j = 0; j < thisKind.NumAtoms(); ++j) {
+        if (thisKind.AtomCharge(j) != 0.0) {
+          thisBoxCoords.Set(i, molCoords[mols.MolStart(*thisMol) + j]);
+          chargeBox.push_back(thisKind.AtomCharge(j) * lambdaCoef);
+          i++;
+        }
       }
       thisMol++;
     }
@@ -301,10 +303,12 @@ void Ewald::BoxReciprocalSums(uint box, XYZArray const &molCoords) {
     while (thisMol != end) {
       MoleculeKind const &thisKind = mols.GetKind(*thisMol);
       double lambdaCoef = GetLambdaCoef(*thisMol, box);
-      for (uint j = 0; j < thisKind.NumAtoms(); j++) {
-        thisBoxCoords.Set(i, molCoords[mols.MolStart(*thisMol) + j]);
-        chargeBox.push_back(thisKind.AtomCharge(j) * lambdaCoef);
-        i++;
+      for (uint j = 0; j < thisKind.NumAtoms(); ++j) {
+        if (thisKind.AtomCharge(j) != 0.0) {
+          thisBoxCoords.Set(i, molCoords[mols.MolStart(*thisMol) + j]);
+          chargeBox.push_back(thisKind.AtomCharge(j) * lambdaCoef);
+          i++;
+        }
       }
       thisMol++;
     }

diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu
@@ -66,6 +66,8 @@ void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, XYZArray const &coords,
   checkLastErrorCUDA(__FILE__, __LINE__);
 #endif
 
+  // Fewer blocks are needed since we are doing one computation per image
+  blocksPerGrid = (imageSize + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
   BoxReciprocalGPU<<<blocksPerGrid, threadsPerBlock>>>(
       vars->gpu_prefact[box], vars->gpu_sumRnew[box], vars->gpu_sumInew[box],
       vars->gpu_recipEnergies, imageSize);
@@ -111,6 +113,8 @@ void CallBoxReciprocalSumsGPU(VariablesCUDA *vars, XYZArray const &coords,
   checkLastErrorCUDA(__FILE__, __LINE__);
 #endif
 
+  // Fewer blocks are needed since we are doing one computation per image
+  blocksPerGrid = (imageSize + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
   BoxReciprocalGPU<<<blocksPerGrid, threadsPerBlock>>>(
       vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], vars->gpu_sumInew[box],
       vars->gpu_recipEnergies, imageSize);
@@ -133,7 +137,7 @@ __global__ void BoxReciprocalSumsGPU(double *gpu_x, double *gpu_y,
                                      double *gpu_sumRnew, double *gpu_sumInew) {
   int image = blockIdx.x;
   double sumR = 0.0, sumI = 0.0;
-#pragma unroll 8
+#pragma unroll 16
   for (int particleID = threadIdx.x; particleID < atomNumber; particleID += THREADS_PER_BLOCK) {
     double dot = DotProductGPU(gpu_kx[image], gpu_ky[image],
                                gpu_kz[image], gpu_x[particleID],