From 2ac1d60f873bbf62c85d1ec8ac560b03c7fd2bf9 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Mon, 21 Jan 2019 17:10:28 +0100 Subject: [PATCH 1/9] core: fft: Made fft state a parameter. --- .../electrostatics_magnetostatics/p3m.cpp | 12 +-- .../electrostatics_magnetostatics/p3m.hpp | 8 +- src/core/fft.cpp | 73 +++++++++---------- src/core/fft.hpp | 11 +-- 4 files changed, 51 insertions(+), 53 deletions(-) diff --git a/src/core/electrostatics_magnetostatics/p3m.cpp b/src/core/electrostatics_magnetostatics/p3m.cpp index 1b5708914a1..69b8dfe5a86 100644 --- a/src/core/electrostatics_magnetostatics/p3m.cpp +++ b/src/core/electrostatics_magnetostatics/p3m.cpp @@ -289,7 +289,7 @@ void p3m_pre_init(void) { p3m.send_grid = nullptr; p3m.recv_grid = nullptr; - fft_pre_init(); + fft_common_pre_init(&fft); } void p3m_free() { @@ -366,8 +366,8 @@ void p3m_init() { (void *)p3m.rs_mesh)); int ca_mesh_size = - fft_init(&p3m.rs_mesh, p3m.local_mesh.dim, p3m.local_mesh.margin, - p3m.params.mesh, p3m.params.mesh_off, &p3m.ks_pnum); + fft_init(&p3m.rs_mesh, p3m.local_mesh.dim, p3m.local_mesh.margin, + p3m.params.mesh, p3m.params.mesh_off, &p3m.ks_pnum, fft); p3m.ks_mesh = Utils::realloc(p3m.ks_mesh, ca_mesh_size * sizeof(double)); P3M_TRACE(fprintf(stderr, "%d: p3m.rs_mesh ADR=%p\n", this_node, @@ -820,7 +820,7 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { /* and Perform forward 3D FFT (Charge Assignment Mesh). */ if (p3m.sum_q2 > 0) { p3m_gather_fft_grid(p3m.rs_mesh); - fft_perform_forw(p3m.rs_mesh); + fft_perform_forw(p3m.rs_mesh, fft); } // Note: after these calls, the grids are in the order yzx and not xyz // anymore!!! @@ -904,7 +904,7 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { } } /* Back FFT force component mesh */ - fft_perform_back(p3m.rs_mesh, /* check_complex */ !p3m.params.tuning); + fft_perform_back(p3m.rs_mesh, /* check_complex */ !p3m.params.tuning, fft); /* redistribute force component mesh */ p3m_spread_force_grid(p3m.rs_mesh); /* Assign force component from mesh to particle */ @@ -2350,7 +2350,7 @@ void p3m_calc_kspace_stress(double *stress) { } p3m_gather_fft_grid(p3m.rs_mesh); - fft_perform_forw(p3m.rs_mesh); + fft_perform_forw(p3m.rs_mesh, fft); force_prefac = coulomb.prefactor / (2.0 * box_l[0] * box_l[1] * box_l[2]); for (j[0] = 0; j[0] < fft.plan[3].new_mesh[RX]; j[0]++) { diff --git a/src/core/electrostatics_magnetostatics/p3m.hpp b/src/core/electrostatics_magnetostatics/p3m.hpp index ce0dd443af4..e7a5c3fb9cf 100755 --- a/src/core/electrostatics_magnetostatics/p3m.hpp +++ b/src/core/electrostatics_magnetostatics/p3m.hpp @@ -51,14 +51,16 @@ */ #include "config.hpp" + +#ifdef P3M + #include "debug.hpp" +#include "fft.hpp" #include "nonbonded_interactions/nonbonded_interaction_data.hpp" #include "p3m-common.hpp" #include "utils/math/AS_erfc_part.hpp" -#ifdef P3M - /************************************************ * data types ************************************************/ @@ -117,6 +119,8 @@ typedef struct { double *send_grid; /** Field to store grid points to recv */ double *recv_grid; + + fft_data_struct fft; } p3m_data_struct; /** P3M parameters. */ diff --git a/src/core/fft.cpp b/src/core/fft.cpp index d83c78b28bb..fbdcd69c221 100755 --- a/src/core/fft.cpp +++ b/src/core/fft.cpp @@ -49,7 +49,7 @@ fft_data_struct fft; * \param in input mesh. * \param out output mesh. */ -static void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out); +static void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out, fft_data_struct &fft); /** communicate the grid data according to the given * fft_forw_plan/fft_bakc_plan. \param plan_f communication plan (see \ref @@ -57,13 +57,11 @@ static void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out); * \param in input mesh. * \param out output mesh. */ -static void fft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, - double *in, double *out); +static void +fft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, double *out, fft_data_struct &fft); -void fft_pre_init() { fft_common_pre_init(&fft); } - -int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, - int *global_mesh_dim, double *global_mesh_off, int *ks_pnum) { +int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_mesh_dim, double *global_mesh_off, + int *ks_pnum, fft_data_struct &fft) { int i, j; /* helpers */ int mult[3]; @@ -117,9 +115,9 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, fft.plan[0].new_mesh[i] = ca_mesh_dim[i]; for (i = 1; i < 4; i++) { fft.plan[i].g_size = fft_find_comm_groups( - {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, - {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], - fft.plan[i].group, n_pos[i], my_pos[i]); + {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, + {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], + fft.plan[i].group, n_pos[i], my_pos[i]); if (fft.plan[i].g_size == -1) { /* try permutation */ j = n_grid[i][(fft.plan[i].row_dir + 1) % 3]; @@ -127,9 +125,9 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, n_grid[i][(fft.plan[i].row_dir + 2) % 3]; n_grid[i][(fft.plan[i].row_dir + 2) % 3] = j; fft.plan[i].g_size = fft_find_comm_groups( - {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, - {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], - fft.plan[i].group, n_pos[i], my_pos[i]); + {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, + {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], + fft.plan[i].group, n_pos[i], my_pos[i]); if (fft.plan[i].g_size == -1) { fprintf(stderr, "%d: INTERNAL ERROR: fft_find_comm_groups error\n", this_node); @@ -138,17 +136,17 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, } fft.plan[i].send_block = Utils::realloc( - fft.plan[i].send_block, 6 * fft.plan[i].g_size * sizeof(int)); + fft.plan[i].send_block, 6 * fft.plan[i].g_size * sizeof(int)); fft.plan[i].send_size = Utils::realloc( - fft.plan[i].send_size, 1 * fft.plan[i].g_size * sizeof(int)); + fft.plan[i].send_size, 1 * fft.plan[i].g_size * sizeof(int)); fft.plan[i].recv_block = Utils::realloc( - fft.plan[i].recv_block, 6 * fft.plan[i].g_size * sizeof(int)); + fft.plan[i].recv_block, 6 * fft.plan[i].g_size * sizeof(int)); fft.plan[i].recv_size = Utils::realloc( - fft.plan[i].recv_size, 1 * fft.plan[i].g_size * sizeof(int)); + fft.plan[i].recv_size, 1 * fft.plan[i].g_size * sizeof(int)); fft.plan[i].new_size = fft_calc_local_mesh( - my_pos[i], n_grid[i], global_mesh_dim, global_mesh_off, - fft.plan[i].new_mesh, fft.plan[i].start); + my_pos[i], n_grid[i], global_mesh_dim, global_mesh_off, + fft.plan[i].new_mesh, fft.plan[i].start); permute_ifield(fft.plan[i].new_mesh, 3, -(fft.plan[i].n_permute)); permute_ifield(fft.plan[i].start, 3, -(fft.plan[i].n_permute)); fft.plan[i].n_ffts = fft.plan[i].new_mesh[0] * fft.plan[i].new_mesh[1]; @@ -264,9 +262,9 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, if (fft.init_tag == 1) fftw_destroy_plan(fft.plan[i].our_fftw_plan); fft.plan[i].our_fftw_plan = fftw_plan_many_dft( - 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1, - fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2], - fft.plan[i].dir, FFTW_PATIENT); + 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1, + fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2], + fft.plan[i].dir, FFTW_PATIENT); fft.plan[i].fft_function = fftw_execute; } @@ -279,9 +277,9 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, if (fft.init_tag == 1) fftw_destroy_plan(fft.back[i].our_fftw_plan); fft.back[i].our_fftw_plan = fftw_plan_many_dft( - 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1, - fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2], - fft.back[i].dir, FFTW_PATIENT); + 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1, + fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2], + fft.back[i].dir, FFTW_PATIENT); fft.back[i].fft_function = fftw_execute; fft.back[i].pack_function = fft_pack_block_permute1; @@ -303,7 +301,7 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, return fft.max_mesh_size; } -void fft_perform_forw(double *data) { +void fft_perform_forw(double *data, fft_data_struct &fft) { int i; /* int m,n,o; */ @@ -311,10 +309,10 @@ void fft_perform_forw(double *data) { FFT_TRACE(fprintf(stderr, "%d: fft_perform_forw: dir 1:\n", this_node)); fftw_complex *c_data = (fftw_complex *)data; - fftw_complex *c_data_buf = (fftw_complex *)fft.data_buf; + fftw_complex *c_data_buf = (fftw_complex *) fft.data_buf; /* communication to current dir row format (in is data) */ - fft_forw_grid_comm(fft.plan[1], data, fft.data_buf); + fft_forw_grid_comm(fft.plan[1], data, fft.data_buf, fft); /* fprintf(stderr,"%d: start grid \n",this_node); @@ -340,13 +338,13 @@ void fft_perform_forw(double *data) { /* ===== second direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_forw: dir 2:\n", this_node)); /* communication to current dir row format (in is data) */ - fft_forw_grid_comm(fft.plan[2], data, fft.data_buf); + fft_forw_grid_comm(fft.plan[2], data, fft.data_buf, fft); /* perform FFT (in/out is fft.data_buf)*/ fftw_execute_dft(fft.plan[2].our_fftw_plan, c_data_buf, c_data_buf); /* ===== third direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_forw: dir 3:\n", this_node)); /* communication to current dir row format (in is fft.data_buf) */ - fft_forw_grid_comm(fft.plan[3], fft.data_buf, data); + fft_forw_grid_comm(fft.plan[3], fft.data_buf, data, fft); /* perform FFT (in/out is data)*/ fftw_execute_dft(fft.plan[3].our_fftw_plan, c_data, c_data); // fft_print_global_fft_mesh(fft.plan[3],data,1,0); @@ -354,11 +352,11 @@ void fft_perform_forw(double *data) { /* REMARK: Result has to be in data. */ } -void fft_perform_back(double *data, bool check_complex) { +void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft) { int i; fftw_complex *c_data = (fftw_complex *)data; - fftw_complex *c_data_buf = (fftw_complex *)fft.data_buf; + fftw_complex *c_data_buf = (fftw_complex *) fft.data_buf; /* ===== third direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_back: dir 3:\n", this_node)); @@ -366,14 +364,14 @@ void fft_perform_back(double *data, bool check_complex) { /* perform FFT (in is data) */ fftw_execute_dft(fft.back[3].our_fftw_plan, c_data, c_data); /* communicate (in is data)*/ - fft_back_grid_comm(fft.plan[3], fft.back[3], data, fft.data_buf); + fft_back_grid_comm(fft.plan[3], fft.back[3], data, fft.data_buf, fft); /* ===== second direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_back: dir 2:\n", this_node)); /* perform FFT (in is fft.data_buf) */ fftw_execute_dft(fft.back[2].our_fftw_plan, c_data_buf, c_data_buf); /* communicate (in is fft.data_buf) */ - fft_back_grid_comm(fft.plan[2], fft.back[2], fft.data_buf, data); + fft_back_grid_comm(fft.plan[2], fft.back[2], fft.data_buf, data, fft); /* ===== first direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_back: dir 1:\n", this_node)); @@ -391,12 +389,12 @@ void fft_perform_back(double *data, bool check_complex) { } } /* communicate (in is fft.data_buf) */ - fft_back_grid_comm(fft.plan[1], fft.back[1], fft.data_buf, data); + fft_back_grid_comm(fft.plan[1], fft.back[1], fft.data_buf, data, fft); /* REMARK: Result has to be in data. */ } -void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out) { +void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out, fft_data_struct &fft) { int i; MPI_Status status; double *tmp_ptr; @@ -427,8 +425,7 @@ void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out) { } } -void fft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, - double *out) { +void fft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, double *out, fft_data_struct &fft) { int i; MPI_Status status; double *tmp_ptr; diff --git a/src/core/fft.hpp b/src/core/fft.hpp index cc76fbe800a..73049684c6d 100644 --- a/src/core/fft.hpp +++ b/src/core/fft.hpp @@ -53,9 +53,6 @@ extern fft_data_struct fft; /************************************************************/ /*@{*/ -/** Initialize fft data structure. */ -void fft_pre_init(); - /** Initialize everything connected to the 3D-FFT. * \return Maximal size of local fft mesh (needed for allocation of ca_mesh). @@ -66,21 +63,21 @@ void fft_pre_init(); * \param global_mesh_off Pointer to global CA mesh offset. * \param ks_pnum Pointer to number of permutations in k-space. */ -int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, - int *global_mesh_dim, double *global_mesh_off, int *ks_pnum); +int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_mesh_dim, double *global_mesh_off, + int *ks_pnum, fft_data_struct &fft); /** perform the forward 3D FFT. The assigned charges are in \a data. The result is also stored in \a data. \warning The content of \a data is overwritten. \param data Mesh. */ -void fft_perform_forw(double *data); +void fft_perform_forw(double *data, fft_data_struct &fft); /** perform the backward 3D FFT. \warning The content of \a data is overwritten. \param data Mesh. */ -void fft_perform_back(double *data, bool check_complex); +void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft); /*@}*/ #endif From 32f8f99348b68224c10fd81c62e08dfe7506cb80 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Mon, 21 Jan 2019 17:14:15 +0100 Subject: [PATCH 2/9] wip: Unique name for fft global --- .../electrostatics_magnetostatics/p3m.cpp | 64 +++++++++---------- .../p3m_gpu_cuda.cu | 2 +- src/core/fft-common.hpp | 2 +- src/core/fft-dipolar.cpp | 2 +- src/core/fft.cpp | 22 +++---- src/core/fft.hpp | 2 +- .../fd-electrostatics_cuda.cu | 2 +- 7 files changed, 48 insertions(+), 48 deletions(-) diff --git a/src/core/electrostatics_magnetostatics/p3m.cpp b/src/core/electrostatics_magnetostatics/p3m.cpp index 69b8dfe5a86..cfaae0aeb22 100644 --- a/src/core/electrostatics_magnetostatics/p3m.cpp +++ b/src/core/electrostatics_magnetostatics/p3m.cpp @@ -289,7 +289,7 @@ void p3m_pre_init(void) { p3m.send_grid = nullptr; p3m.recv_grid = nullptr; - fft_common_pre_init(&fft); + fft_common_pre_init(&fft_aaaa); } void p3m_free() { @@ -367,7 +367,7 @@ void p3m_init() { int ca_mesh_size = fft_init(&p3m.rs_mesh, p3m.local_mesh.dim, p3m.local_mesh.margin, - p3m.params.mesh, p3m.params.mesh_off, &p3m.ks_pnum, fft); + p3m.params.mesh, p3m.params.mesh_off, &p3m.ks_pnum, fft_aaaa); p3m.ks_mesh = Utils::realloc(p3m.ks_mesh, ca_mesh_size * sizeof(double)); P3M_TRACE(fprintf(stderr, "%d: p3m.rs_mesh ADR=%p\n", this_node, @@ -820,7 +820,7 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { /* and Perform forward 3D FFT (Charge Assignment Mesh). */ if (p3m.sum_q2 > 0) { p3m_gather_fft_grid(p3m.rs_mesh); - fft_perform_forw(p3m.rs_mesh, fft); + fft_perform_forw(p3m.rs_mesh, fft_aaaa); } // Note: after these calls, the grids are in the order yzx and not xyz // anymore!!! @@ -835,7 +835,7 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { Coulomb energy **********************/ - for (i = 0; i < fft.plan[3].new_size; i++) { + for (i = 0; i < fft_aaaa.plan[3].new_size; i++) { // Use the energy optimized influence function for energy! node_k_space_energy += p3m.g_energy[i] * @@ -865,7 +865,7 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { /* Force preparation */ ind = 0; /* apply the influence function */ - for (i = 0; i < fft.plan[3].new_size; i++) { + for (i = 0; i < fft_aaaa.plan[3].new_size; i++) { p3m.ks_mesh[ind] = p3m.g_force[i] * p3m.rs_mesh[ind]; ind++; p3m.ks_mesh[ind] = p3m.g_force[i] * p3m.rs_mesh[ind]; @@ -887,24 +887,24 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { d_rs = (d + p3m.ks_pnum) % 3; /* sqrt(-1)*k differentiation */ ind = 0; - for (j[0] = 0; j[0] < fft.plan[3].new_mesh[0]; j[0]++) { - for (j[1] = 0; j[1] < fft.plan[3].new_mesh[1]; j[1]++) { - for (j[2] = 0; j[2] < fft.plan[3].new_mesh[2]; j[2]++) { + for (j[0] = 0; j[0] < fft_aaaa.plan[3].new_mesh[0]; j[0]++) { + for (j[1] = 0; j[1] < fft_aaaa.plan[3].new_mesh[1]; j[1]++) { + for (j[2] = 0; j[2] < fft_aaaa.plan[3].new_mesh[2]; j[2]++) { /* i*k*(Re+i*Im) = - Im*k + i*Re*k (i=sqrt(-1)) */ p3m.rs_mesh[ind] = -2.0 * PI * (p3m.ks_mesh[ind + 1] * - d_operator[j[d] + fft.plan[3].start[d]]) / + d_operator[j[d] + fft_aaaa.plan[3].start[d]]) / box_l[d_rs]; ind++; p3m.rs_mesh[ind] = 2.0 * PI * p3m.ks_mesh[ind - 1] * - d_operator[j[d] + fft.plan[3].start[d]] / + d_operator[j[d] + fft_aaaa.plan[3].start[d]] / box_l[d_rs]; ind++; } } } /* Back FFT force component mesh */ - fft_perform_back(p3m.rs_mesh, /* check_complex */ !p3m.params.tuning, fft); + fft_perform_back(p3m.rs_mesh, /* check_complex */ !p3m.params.tuning, fft_aaaa); /* redistribute force component mesh */ p3m_spread_force_grid(p3m.rs_mesh); /* Assign force component from mesh to particle */ @@ -1180,8 +1180,8 @@ template void calc_influence_function_force() { p3m_calc_meshift(); for (i = 0; i < 3; i++) { - size *= fft.plan[3].new_mesh[i]; - end[i] = fft.plan[3].start[i] + fft.plan[3].new_mesh[i]; + size *= fft_aaaa.plan[3].new_mesh[i]; + end[i] = fft_aaaa.plan[3].start[i] + fft_aaaa.plan[3].new_mesh[i]; } p3m.g_force = Utils::realloc(p3m.g_force, size * sizeof(double)); @@ -1195,13 +1195,13 @@ template void calc_influence_function_force() { return; } - for (n[0] = fft.plan[3].start[0]; n[0] < end[0]; n[0]++) { - for (n[1] = fft.plan[3].start[1]; n[1] < end[1]; n[1]++) { - for (n[2] = fft.plan[3].start[2]; n[2] < end[2]; n[2]++) { - ind = (n[2] - fft.plan[3].start[2]) + - fft.plan[3].new_mesh[2] * - ((n[1] - fft.plan[3].start[1]) + - (fft.plan[3].new_mesh[1] * (n[0] - fft.plan[3].start[0]))); + for (n[0] = fft_aaaa.plan[3].start[0]; n[0] < end[0]; n[0]++) { + for (n[1] = fft_aaaa.plan[3].start[1]; n[1] < end[1]; n[1]++) { + for (n[2] = fft_aaaa.plan[3].start[2]; n[2] < end[2]; n[2]++) { + ind = (n[2] - fft_aaaa.plan[3].start[2]) + + fft_aaaa.plan[3].new_mesh[2] * + ((n[1] - fft_aaaa.plan[3].start[1]) + + (fft_aaaa.plan[3].new_mesh[1] * (n[0] - fft_aaaa.plan[3].start[0]))); if ((n[KX] % (p3m.params.mesh[RX] / 2) == 0) && (n[KY] % (p3m.params.mesh[RY] / 2) == 0) && @@ -1302,9 +1302,9 @@ template void calc_influence_function_energy() { p3m_calc_meshift(); for (i = 0; i < 3; i++) { - size *= fft.plan[3].new_mesh[i]; - end[i] = fft.plan[3].start[i] + fft.plan[3].new_mesh[i]; - start[i] = fft.plan[3].start[i]; + size *= fft_aaaa.plan[3].new_mesh[i]; + end[i] = fft_aaaa.plan[3].start[i] + fft_aaaa.plan[3].new_mesh[i]; + start[i] = fft_aaaa.plan[3].start[i]; } p3m.g_energy = Utils::realloc(p3m.g_energy, size * sizeof(double)); @@ -1319,8 +1319,8 @@ template void calc_influence_function_energy() { for (n[0] = start[0]; n[0] < end[0]; n[0]++) { for (n[1] = start[1]; n[1] < end[1]; n[1]++) { for (n[2] = start[2]; n[2] < end[2]; n[2]++) { - ind = (n[2] - start[2]) + fft.plan[3].new_mesh[2] * (n[1] - start[1]) + - fft.plan[3].new_mesh[2] * fft.plan[3].new_mesh[1] * + ind = (n[2] - start[2]) + fft_aaaa.plan[3].new_mesh[2] * (n[1] - start[1]) + + fft_aaaa.plan[3].new_mesh[2] * fft_aaaa.plan[3].new_mesh[1] * (n[0] - start[0]); if ((n[KX] % (p3m.params.mesh[RX] / 2) == 0) && (n[KY] % (p3m.params.mesh[RY] / 2) == 0) && @@ -2350,17 +2350,17 @@ void p3m_calc_kspace_stress(double *stress) { } p3m_gather_fft_grid(p3m.rs_mesh); - fft_perform_forw(p3m.rs_mesh, fft); + fft_perform_forw(p3m.rs_mesh, fft_aaaa); force_prefac = coulomb.prefactor / (2.0 * box_l[0] * box_l[1] * box_l[2]); - for (j[0] = 0; j[0] < fft.plan[3].new_mesh[RX]; j[0]++) { - for (j[1] = 0; j[1] < fft.plan[3].new_mesh[RY]; j[1]++) { - for (j[2] = 0; j[2] < fft.plan[3].new_mesh[RZ]; j[2]++) { - kx = 2.0 * PI * p3m.d_op[RX][j[KX] + fft.plan[3].start[KX]] / + for (j[0] = 0; j[0] < fft_aaaa.plan[3].new_mesh[RX]; j[0]++) { + for (j[1] = 0; j[1] < fft_aaaa.plan[3].new_mesh[RY]; j[1]++) { + for (j[2] = 0; j[2] < fft_aaaa.plan[3].new_mesh[RZ]; j[2]++) { + kx = 2.0 * PI * p3m.d_op[RX][j[KX] + fft_aaaa.plan[3].start[KX]] / box_l[RX]; - ky = 2.0 * PI * p3m.d_op[RY][j[KY] + fft.plan[3].start[KY]] / + ky = 2.0 * PI * p3m.d_op[RY][j[KY] + fft_aaaa.plan[3].start[KY]] / box_l[RY]; - kz = 2.0 * PI * p3m.d_op[RZ][j[KZ] + fft.plan[3].start[KZ]] / + kz = 2.0 * PI * p3m.d_op[RZ][j[KZ] + fft_aaaa.plan[3].start[KZ]] / box_l[RZ]; sqk = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz); if (sqk == 0) { diff --git a/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu b/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu index 57766d51af1..d716d09fc5b 100644 --- a/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu +++ b/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu @@ -741,7 +741,7 @@ void p3m_gpu_init(int cao, int mesh[3], double alpha) { FFT_PLAN_FORW_FLAG) != CUFFT_SUCCESS || cufftPlan3d(&(p3m_gpu_fft_plans.back_plan), mesh[0], mesh[1], mesh[2], FFT_PLAN_BACK_FLAG) != CUFFT_SUCCESS) { - throw std::string("Unable to create fft plan"); + throw std::string("Unable to create fft_aaaa plan"); } } diff --git a/src/core/fft-common.hpp b/src/core/fft-common.hpp index 2160450f070..a05283359e8 100644 --- a/src/core/fft-common.hpp +++ b/src/core/fft-common.hpp @@ -123,7 +123,7 @@ typedef struct { * DEFINES ************************************************/ -/* MPI tags for the fft communications: */ +/* MPI tags for the fft_aaaa communications: */ /** Tag for communication in fft_init() */ #define REQ_FFT_INIT 300 /** Tag for communication in forw_grid_comm() */ diff --git a/src/core/fft-dipolar.cpp b/src/core/fft-dipolar.cpp index 78761fbb9d1..b8010b69dfc 100644 --- a/src/core/fft-dipolar.cpp +++ b/src/core/fft-dipolar.cpp @@ -408,7 +408,7 @@ void dfft_perform_back(double *data) { dfft.data_buf[i] = data[2 * i]; /* real value */ // Vincent: if (data[2 * i + 1] > 1e-5) { - printf("dipoar fft - Complex value is not zero (i=%d,data=%g)!!!\n", i, + printf("dipoar fft_aaaa - Complex value is not zero (i=%d,data=%g)!!!\n", i, data[2 * i + 1]); if (i > 100) errexit(); diff --git a/src/core/fft.cpp b/src/core/fft.cpp index fbdcd69c221..e06a8eed2ae 100755 --- a/src/core/fft.cpp +++ b/src/core/fft.cpp @@ -42,7 +42,7 @@ using Utils::permute_ifield; /************************************************ * variables ************************************************/ -fft_data_struct fft; +fft_data_struct fft_aaaa; /** communicate the grid data according to the given fft_forw_plan. * \param plan communication plan (see \ref fft_forw_plan). @@ -152,7 +152,7 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_m fft.plan[i].n_ffts = fft.plan[i].new_mesh[0] * fft.plan[i].new_mesh[1]; /* FFT_TRACE( printf("%d: comm_group( ",this_node )); */ - /* FFT_TRACE( for(j=0; j< fft.plan[i].g_size; j++) printf("%d ")); */ + /* FFT_TRACE( for(j=0; j< fft_aaaa.plan[i].g_size; j++) printf("%d ")); */ /* FFT_TRACE( printf(")\n")); */ /* === send/recv block specifications === */ @@ -215,7 +215,7 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_m fft.max_mesh_size = 2 * fft.plan[i].new_size; FFT_TRACE(fprintf(stderr, - "%d: fft.max_comm_size = %d, fft.max_mesh_size = %d\n", + "%d: fft_aaaa.max_comm_size = %d, fft_aaaa.max_mesh_size = %d\n", this_node, fft.max_comm_size, fft.max_mesh_size)); /* === pack function === */ @@ -320,7 +320,7 @@ void fft_perform_forw(double *data, fft_data_struct &fft) { for(m=0;m<8;m++) { for(n=0;n<8;n++) { for(o=0;o<8;o++) { - fprintf(stderr,"%.3f ",fft.data_buf[i++]); + fprintf(stderr,"%.3f ",fft_aaaa.data_buf[i++]); } fprintf(stderr,"\n"); } @@ -328,7 +328,7 @@ void fft_perform_forw(double *data, fft_data_struct &fft) { } */ - /* complexify the real data array (in is fft.data_buf) */ + /* complexify the real data array (in is fft_aaaa.data_buf) */ for (i = 0; i < fft.plan[1].new_size; i++) { data[2 * i] = fft.data_buf[i]; /* real value */ data[(2 * i) + 1] = 0; /* complex value */ @@ -339,15 +339,15 @@ void fft_perform_forw(double *data, fft_data_struct &fft) { FFT_TRACE(fprintf(stderr, "%d: fft_perform_forw: dir 2:\n", this_node)); /* communication to current dir row format (in is data) */ fft_forw_grid_comm(fft.plan[2], data, fft.data_buf, fft); - /* perform FFT (in/out is fft.data_buf)*/ + /* perform FFT (in/out is fft_aaaa.data_buf)*/ fftw_execute_dft(fft.plan[2].our_fftw_plan, c_data_buf, c_data_buf); /* ===== third direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_forw: dir 3:\n", this_node)); - /* communication to current dir row format (in is fft.data_buf) */ + /* communication to current dir row format (in is fft_aaaa.data_buf) */ fft_forw_grid_comm(fft.plan[3], fft.data_buf, data, fft); /* perform FFT (in/out is data)*/ fftw_execute_dft(fft.plan[3].our_fftw_plan, c_data, c_data); - // fft_print_global_fft_mesh(fft.plan[3],data,1,0); + // fft_print_global_fft_mesh(fft_aaaa.plan[3],data,1,0); /* REMARK: Result has to be in data. */ } @@ -368,9 +368,9 @@ void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft) { /* ===== second direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_back: dir 2:\n", this_node)); - /* perform FFT (in is fft.data_buf) */ + /* perform FFT (in is fft_aaaa.data_buf) */ fftw_execute_dft(fft.back[2].our_fftw_plan, c_data_buf, c_data_buf); - /* communicate (in is fft.data_buf) */ + /* communicate (in is fft_aaaa.data_buf) */ fft_back_grid_comm(fft.plan[2], fft.back[2], fft.data_buf, data, fft); /* ===== first direction ===== */ @@ -388,7 +388,7 @@ void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft) { errexit(); } } - /* communicate (in is fft.data_buf) */ + /* communicate (in is fft_aaaa.data_buf) */ fft_back_grid_comm(fft.plan[1], fft.back[1], fft.data_buf, data, fft); /* REMARK: Result has to be in data. */ diff --git a/src/core/fft.hpp b/src/core/fft.hpp index 73049684c6d..0bdb5153a12 100644 --- a/src/core/fft.hpp +++ b/src/core/fft.hpp @@ -47,7 +47,7 @@ #include "fft-common.hpp" -extern fft_data_struct fft; +extern fft_data_struct fft_aaaa; /** \name Exported Functions */ /************************************************************/ diff --git a/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu b/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu index 2ba23d529f9..c9d45a656e7 100644 --- a/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu +++ b/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu @@ -94,7 +94,7 @@ FdElectrostatics::FdElectrostatics(InputParameters inputParameters, if (cufftPlan3d(&plan_fft, parameters.dim_z, parameters.dim_y, parameters.dim_x, CUFFT_R2C) != CUFFT_SUCCESS) { - throw std::string("Unable to create fft plan"); + throw std::string("Unable to create fft_aaaa plan"); } if (cufftSetStream(plan_fft, cuda_stream) != CUFFT_SUCCESS) { From 456cf5ac7cfcb7c2efb54e343505939a56c9e101 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Mon, 21 Jan 2019 17:16:01 +0100 Subject: [PATCH 3/9] core: fft: Removed fft global --- .../electrostatics_magnetostatics/p3m.cpp | 64 +++++++++---------- src/core/fft.cpp | 5 -- src/core/fft.hpp | 2 - 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/src/core/electrostatics_magnetostatics/p3m.cpp b/src/core/electrostatics_magnetostatics/p3m.cpp index cfaae0aeb22..754c54a6789 100644 --- a/src/core/electrostatics_magnetostatics/p3m.cpp +++ b/src/core/electrostatics_magnetostatics/p3m.cpp @@ -289,7 +289,7 @@ void p3m_pre_init(void) { p3m.send_grid = nullptr; p3m.recv_grid = nullptr; - fft_common_pre_init(&fft_aaaa); + fft_common_pre_init(&p3m.fft); } void p3m_free() { @@ -367,7 +367,7 @@ void p3m_init() { int ca_mesh_size = fft_init(&p3m.rs_mesh, p3m.local_mesh.dim, p3m.local_mesh.margin, - p3m.params.mesh, p3m.params.mesh_off, &p3m.ks_pnum, fft_aaaa); + p3m.params.mesh, p3m.params.mesh_off, &p3m.ks_pnum, p3m.fft); p3m.ks_mesh = Utils::realloc(p3m.ks_mesh, ca_mesh_size * sizeof(double)); P3M_TRACE(fprintf(stderr, "%d: p3m.rs_mesh ADR=%p\n", this_node, @@ -820,7 +820,7 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { /* and Perform forward 3D FFT (Charge Assignment Mesh). */ if (p3m.sum_q2 > 0) { p3m_gather_fft_grid(p3m.rs_mesh); - fft_perform_forw(p3m.rs_mesh, fft_aaaa); + fft_perform_forw(p3m.rs_mesh, p3m.fft); } // Note: after these calls, the grids are in the order yzx and not xyz // anymore!!! @@ -835,7 +835,7 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { Coulomb energy **********************/ - for (i = 0; i < fft_aaaa.plan[3].new_size; i++) { + for (i = 0; i < p3m.fft.plan[3].new_size; i++) { // Use the energy optimized influence function for energy! node_k_space_energy += p3m.g_energy[i] * @@ -865,7 +865,7 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { /* Force preparation */ ind = 0; /* apply the influence function */ - for (i = 0; i < fft_aaaa.plan[3].new_size; i++) { + for (i = 0; i < p3m.fft.plan[3].new_size; i++) { p3m.ks_mesh[ind] = p3m.g_force[i] * p3m.rs_mesh[ind]; ind++; p3m.ks_mesh[ind] = p3m.g_force[i] * p3m.rs_mesh[ind]; @@ -887,24 +887,24 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { d_rs = (d + p3m.ks_pnum) % 3; /* sqrt(-1)*k differentiation */ ind = 0; - for (j[0] = 0; j[0] < fft_aaaa.plan[3].new_mesh[0]; j[0]++) { - for (j[1] = 0; j[1] < fft_aaaa.plan[3].new_mesh[1]; j[1]++) { - for (j[2] = 0; j[2] < fft_aaaa.plan[3].new_mesh[2]; j[2]++) { + for (j[0] = 0; j[0] < p3m.fft.plan[3].new_mesh[0]; j[0]++) { + for (j[1] = 0; j[1] < p3m.fft.plan[3].new_mesh[1]; j[1]++) { + for (j[2] = 0; j[2] < p3m.fft.plan[3].new_mesh[2]; j[2]++) { /* i*k*(Re+i*Im) = - Im*k + i*Re*k (i=sqrt(-1)) */ p3m.rs_mesh[ind] = -2.0 * PI * (p3m.ks_mesh[ind + 1] * - d_operator[j[d] + fft_aaaa.plan[3].start[d]]) / + d_operator[j[d] + p3m.fft.plan[3].start[d]]) / box_l[d_rs]; ind++; p3m.rs_mesh[ind] = 2.0 * PI * p3m.ks_mesh[ind - 1] * - d_operator[j[d] + fft_aaaa.plan[3].start[d]] / + d_operator[j[d] + p3m.fft.plan[3].start[d]] / box_l[d_rs]; ind++; } } } /* Back FFT force component mesh */ - fft_perform_back(p3m.rs_mesh, /* check_complex */ !p3m.params.tuning, fft_aaaa); + fft_perform_back(p3m.rs_mesh, /* check_complex */ !p3m.params.tuning, p3m.fft); /* redistribute force component mesh */ p3m_spread_force_grid(p3m.rs_mesh); /* Assign force component from mesh to particle */ @@ -1180,8 +1180,8 @@ template void calc_influence_function_force() { p3m_calc_meshift(); for (i = 0; i < 3; i++) { - size *= fft_aaaa.plan[3].new_mesh[i]; - end[i] = fft_aaaa.plan[3].start[i] + fft_aaaa.plan[3].new_mesh[i]; + size *= p3m.fft.plan[3].new_mesh[i]; + end[i] = p3m.fft.plan[3].start[i] + p3m.fft.plan[3].new_mesh[i]; } p3m.g_force = Utils::realloc(p3m.g_force, size * sizeof(double)); @@ -1195,13 +1195,13 @@ template void calc_influence_function_force() { return; } - for (n[0] = fft_aaaa.plan[3].start[0]; n[0] < end[0]; n[0]++) { - for (n[1] = fft_aaaa.plan[3].start[1]; n[1] < end[1]; n[1]++) { - for (n[2] = fft_aaaa.plan[3].start[2]; n[2] < end[2]; n[2]++) { - ind = (n[2] - fft_aaaa.plan[3].start[2]) + - fft_aaaa.plan[3].new_mesh[2] * - ((n[1] - fft_aaaa.plan[3].start[1]) + - (fft_aaaa.plan[3].new_mesh[1] * (n[0] - fft_aaaa.plan[3].start[0]))); + for (n[0] = p3m.fft.plan[3].start[0]; n[0] < end[0]; n[0]++) { + for (n[1] = p3m.fft.plan[3].start[1]; n[1] < end[1]; n[1]++) { + for (n[2] = p3m.fft.plan[3].start[2]; n[2] < end[2]; n[2]++) { + ind = (n[2] - p3m.fft.plan[3].start[2]) + + p3m.fft.plan[3].new_mesh[2] * + ((n[1] - p3m.fft.plan[3].start[1]) + + (p3m.fft.plan[3].new_mesh[1] * (n[0] - p3m.fft.plan[3].start[0]))); if ((n[KX] % (p3m.params.mesh[RX] / 2) == 0) && (n[KY] % (p3m.params.mesh[RY] / 2) == 0) && @@ -1302,9 +1302,9 @@ template void calc_influence_function_energy() { p3m_calc_meshift(); for (i = 0; i < 3; i++) { - size *= fft_aaaa.plan[3].new_mesh[i]; - end[i] = fft_aaaa.plan[3].start[i] + fft_aaaa.plan[3].new_mesh[i]; - start[i] = fft_aaaa.plan[3].start[i]; + size *= p3m.fft.plan[3].new_mesh[i]; + end[i] = p3m.fft.plan[3].start[i] + p3m.fft.plan[3].new_mesh[i]; + start[i] = p3m.fft.plan[3].start[i]; } p3m.g_energy = Utils::realloc(p3m.g_energy, size * sizeof(double)); @@ -1319,8 +1319,8 @@ template void calc_influence_function_energy() { for (n[0] = start[0]; n[0] < end[0]; n[0]++) { for (n[1] = start[1]; n[1] < end[1]; n[1]++) { for (n[2] = start[2]; n[2] < end[2]; n[2]++) { - ind = (n[2] - start[2]) + fft_aaaa.plan[3].new_mesh[2] * (n[1] - start[1]) + - fft_aaaa.plan[3].new_mesh[2] * fft_aaaa.plan[3].new_mesh[1] * + ind = (n[2] - start[2]) + p3m.fft.plan[3].new_mesh[2] * (n[1] - start[1]) + + p3m.fft.plan[3].new_mesh[2] * p3m.fft.plan[3].new_mesh[1] * (n[0] - start[0]); if ((n[KX] % (p3m.params.mesh[RX] / 2) == 0) && (n[KY] % (p3m.params.mesh[RY] / 2) == 0) && @@ -2350,17 +2350,17 @@ void p3m_calc_kspace_stress(double *stress) { } p3m_gather_fft_grid(p3m.rs_mesh); - fft_perform_forw(p3m.rs_mesh, fft_aaaa); + fft_perform_forw(p3m.rs_mesh, p3m.fft); force_prefac = coulomb.prefactor / (2.0 * box_l[0] * box_l[1] * box_l[2]); - for (j[0] = 0; j[0] < fft_aaaa.plan[3].new_mesh[RX]; j[0]++) { - for (j[1] = 0; j[1] < fft_aaaa.plan[3].new_mesh[RY]; j[1]++) { - for (j[2] = 0; j[2] < fft_aaaa.plan[3].new_mesh[RZ]; j[2]++) { - kx = 2.0 * PI * p3m.d_op[RX][j[KX] + fft_aaaa.plan[3].start[KX]] / + for (j[0] = 0; j[0] < p3m.fft.plan[3].new_mesh[RX]; j[0]++) { + for (j[1] = 0; j[1] < p3m.fft.plan[3].new_mesh[RY]; j[1]++) { + for (j[2] = 0; j[2] < p3m.fft.plan[3].new_mesh[RZ]; j[2]++) { + kx = 2.0 * PI * p3m.d_op[RX][j[KX] + p3m.fft.plan[3].start[KX]] / box_l[RX]; - ky = 2.0 * PI * p3m.d_op[RY][j[KY] + fft_aaaa.plan[3].start[KY]] / + ky = 2.0 * PI * p3m.d_op[RY][j[KY] + p3m.fft.plan[3].start[KY]] / box_l[RY]; - kz = 2.0 * PI * p3m.d_op[RZ][j[KZ] + fft_aaaa.plan[3].start[KZ]] / + kz = 2.0 * PI * p3m.d_op[RZ][j[KZ] + p3m.fft.plan[3].start[KZ]] / box_l[RZ]; sqk = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz); if (sqk == 0) { diff --git a/src/core/fft.cpp b/src/core/fft.cpp index e06a8eed2ae..dab3e448818 100755 --- a/src/core/fft.cpp +++ b/src/core/fft.cpp @@ -39,11 +39,6 @@ using Utils::permute_ifield; #include #include -/************************************************ - * variables - ************************************************/ -fft_data_struct fft_aaaa; - /** communicate the grid data according to the given fft_forw_plan. * \param plan communication plan (see \ref fft_forw_plan). * \param in input mesh. diff --git a/src/core/fft.hpp b/src/core/fft.hpp index 0bdb5153a12..c14886a7fa0 100644 --- a/src/core/fft.hpp +++ b/src/core/fft.hpp @@ -47,8 +47,6 @@ #include "fft-common.hpp" -extern fft_data_struct fft_aaaa; - /** \name Exported Functions */ /************************************************************/ /*@{*/ From 6612aca1968e6a70920616ca4e8b35499101a0cf Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Mon, 21 Jan 2019 17:23:57 +0100 Subject: [PATCH 4/9] core: fft: Removed dfft global --- .../p3m-dipolar.cpp | 161 +++++----- .../p3m-dipolar.hpp | 3 + src/core/fft-dipolar.cpp | 276 +++++++++--------- src/core/fft-dipolar.hpp | 2 +- 4 files changed, 222 insertions(+), 220 deletions(-) diff --git a/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp b/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp index 02f3905fd46..770110b4a98 100644 --- a/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp +++ b/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp @@ -38,7 +38,6 @@ #include "cells.hpp" #include "communication.hpp" #include "domain_decomposition.hpp" -#include "fft-dipolar.hpp" #include "global.hpp" #include "grid.hpp" #include "integrate.hpp" @@ -311,7 +310,7 @@ void dp3m_pre_init(void) { dp3m.energy_correction = 0.0; - dfft_pre_init(); + fft_common_pre_init(&dp3m.fft); } void dp3m_deactivate() { @@ -396,8 +395,8 @@ void dp3m_init() { (void *)dp3m.rs_mesh)); int ca_mesh_size = - dfft_init(&dp3m.rs_mesh, dp3m.local_mesh.dim, dp3m.local_mesh.margin, - dp3m.params.mesh, dp3m.params.mesh_off, &dp3m.ks_pnum); + fft_init(&dp3m.rs_mesh, dp3m.local_mesh.dim, dp3m.local_mesh.margin, + dp3m.params.mesh, dp3m.params.mesh_off, &dp3m.ks_pnum, dp3m.fft); dp3m.ks_mesh = Utils::realloc(dp3m.ks_mesh, ca_mesh_size * sizeof(double)); for (n = 0; n < 3; n++) @@ -444,17 +443,17 @@ double dp3m_average_dipolar_self_energy(double box_l, int mesh) { int size = 1; for (i = 0; i < 3; i++) { - size *= dfft.plan[3].new_mesh[i]; - end[i] = dfft.plan[3].start[i] + dfft.plan[3].new_mesh[i]; + size *= dp3m.fft.plan[3].new_mesh[i]; + end[i] = dp3m.fft.plan[3].start[i] + dp3m.fft.plan[3].new_mesh[i]; } - for (n[0] = dfft.plan[3].start[0]; n[0] < end[0]; n[0]++) { - for (n[1] = dfft.plan[3].start[1]; n[1] < end[1]; n[1]++) { - for (n[2] = dfft.plan[3].start[2]; n[2] < end[2]; n[2]++) { - ind = (n[2] - dfft.plan[3].start[2]) + - dfft.plan[3].new_mesh[2] * - ((n[1] - dfft.plan[3].start[1]) + - (dfft.plan[3].new_mesh[1] * (n[0] - dfft.plan[3].start[0]))); + for (n[0] = dp3m.fft.plan[3].start[0]; n[0] < end[0]; n[0]++) { + for (n[1] = dp3m.fft.plan[3].start[1]; n[1] < end[1]; n[1]++) { + for (n[2] = dp3m.fft.plan[3].start[2]; n[2] < end[2]; n[2]++) { + ind = (n[2] - dp3m.fft.plan[3].start[2]) + + dp3m.fft.plan[3].new_mesh[2] * + ((n[1] - dp3m.fft.plan[3].start[1]) + + (dp3m.fft.plan[3].new_mesh[1] * (n[0] - dp3m.fft.plan[3].start[0]))); if ((n[0] == 0) && (n[1] == 0) && (n[2] == 0)) node_phi += 0.0; @@ -920,9 +919,9 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { dp3m_gather_fft_grid(dp3m.rs_mesh_dip[0]); dp3m_gather_fft_grid(dp3m.rs_mesh_dip[1]); dp3m_gather_fft_grid(dp3m.rs_mesh_dip[2]); - dfft_perform_forw(dp3m.rs_mesh_dip[0]); - dfft_perform_forw(dp3m.rs_mesh_dip[1]); - dfft_perform_forw(dp3m.rs_mesh_dip[2]); + fft_perform_forw(dp3m.rs_mesh_dip[0], dp3m.fft); + fft_perform_forw(dp3m.rs_mesh_dip[1], dp3m.fft); + fft_perform_forw(dp3m.rs_mesh_dip[2], dp3m.fft); // Note: after these calls, the grids are in the order yzx and not xyz // anymore!!! } @@ -944,23 +943,23 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { * |(\Fourier{\vect{mu}}(k)\cdot \vect{k})|^2 */ ind = 0; i = 0; - for (j[0] = 0; j[0] < dfft.plan[3].new_mesh[0]; j[0]++) { - for (j[1] = 0; j[1] < dfft.plan[3].new_mesh[1]; j[1]++) { - for (j[2] = 0; j[2] < dfft.plan[3].new_mesh[2]; j[2]++) { + for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { + for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { + for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { node_k_space_energy_dip += dp3m.g_energy[i] * (Utils::sqr(dp3m.rs_mesh_dip[0][ind] * - dp3m.d_op[j[2] + dfft.plan[3].start[2]] + + dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] + dp3m.rs_mesh_dip[1][ind] * - dp3m.d_op[j[0] + dfft.plan[3].start[0]] + + dp3m.d_op[j[0] + dp3m.fft.plan[3].start[0]] + dp3m.rs_mesh_dip[2][ind] * - dp3m.d_op[j[1] + dfft.plan[3].start[1]]) + + dp3m.d_op[j[1] + dp3m.fft.plan[3].start[1]]) + Utils::sqr(dp3m.rs_mesh_dip[0][ind + 1] * - dp3m.d_op[j[2] + dfft.plan[3].start[2]] + + dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] + dp3m.rs_mesh_dip[1][ind + 1] * - dp3m.d_op[j[0] + dfft.plan[3].start[0]] + + dp3m.d_op[j[0] + dp3m.fft.plan[3].start[0]] + dp3m.rs_mesh_dip[2][ind + 1] * - dp3m.d_op[j[1] + dfft.plan[3].start[1]])); + dp3m.d_op[j[1] + dp3m.fft.plan[3].start[1]])); ind += 2; i++; } @@ -1017,24 +1016,24 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { ind = 0; i = 0; - for (j[0] = 0; j[0] < dfft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y - for (j[1] = 0; j[1] < dfft.plan[3].new_mesh[1]; j[1]++) { // j[1]=n_z - for (j[2] = 0; j[2] < dfft.plan[3].new_mesh[2]; j[2]++) { // j[2]=n_x + for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y + for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { // j[1]=n_z + for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { // j[2]=n_x // tmp0 = Re(mu)*k, tmp1 = Im(mu)*k tmp0 = dp3m.rs_mesh_dip[0][ind] * - dp3m.d_op[j[2] + dfft.plan[3].start[2]] + + dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] + dp3m.rs_mesh_dip[1][ind] * - dp3m.d_op[j[0] + dfft.plan[3].start[0]] + + dp3m.d_op[j[0] + dp3m.fft.plan[3].start[0]] + dp3m.rs_mesh_dip[2][ind] * - dp3m.d_op[j[1] + dfft.plan[3].start[1]]; + dp3m.d_op[j[1] + dp3m.fft.plan[3].start[1]]; tmp1 = dp3m.rs_mesh_dip[0][ind + 1] * - dp3m.d_op[j[2] + dfft.plan[3].start[2]] + + dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] + dp3m.rs_mesh_dip[1][ind + 1] * - dp3m.d_op[j[0] + dfft.plan[3].start[0]] + + dp3m.d_op[j[0] + dp3m.fft.plan[3].start[0]] + dp3m.rs_mesh_dip[2][ind + 1] * - dp3m.d_op[j[1] + dfft.plan[3].start[1]]; + dp3m.d_op[j[1] + dp3m.fft.plan[3].start[1]]; /* the optimal influence function is the same for torques and energy */ @@ -1051,21 +1050,21 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { for (d = 0; d < 3; d++) { d_rs = (d + dp3m.ks_pnum) % 3; ind = 0; - for (j[0] = 0; j[0] < dfft.plan[3].new_mesh[0]; j[0]++) { - for (j[1] = 0; j[1] < dfft.plan[3].new_mesh[1]; j[1]++) { - for (j[2] = 0; j[2] < dfft.plan[3].new_mesh[2]; j[2]++) { + for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { + for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { + for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { dp3m.rs_mesh[ind] = - dp3m.d_op[j[d] + dfft.plan[3].start[d]] * dp3m.ks_mesh[ind]; + dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * dp3m.ks_mesh[ind]; ind++; dp3m.rs_mesh[ind] = - dp3m.d_op[j[d] + dfft.plan[3].start[d]] * dp3m.ks_mesh[ind]; + dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * dp3m.ks_mesh[ind]; ind++; } } } /* Back FFT force component mesh */ - dfft_perform_back(dp3m.rs_mesh); + fft_perform_back(dp3m.rs_mesh, false, dp3m.fft); /* redistribute force component mesh */ dp3m_spread_force_grid(dp3m.rs_mesh); /* Assign force component from mesh to particle */ @@ -1088,22 +1087,22 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { /* fill in ks_mesh array for force calculation */ ind = 0; i = 0; - for (j[0] = 0; j[0] < dfft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y - for (j[1] = 0; j[1] < dfft.plan[3].new_mesh[1]; j[1]++) { // j[1]=n_z - for (j[2] = 0; j[2] < dfft.plan[3].new_mesh[2]; j[2]++) { // j[2]=n_x + for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y + for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { // j[1]=n_z + for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { // j[2]=n_x // tmp0 = Im(mu)*k, tmp1 = -Re(mu)*k tmp0 = dp3m.rs_mesh_dip[0][ind + 1] * - dp3m.d_op[j[2] + dfft.plan[3].start[2]] + + dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] + dp3m.rs_mesh_dip[1][ind + 1] * - dp3m.d_op[j[0] + dfft.plan[3].start[0]] + + dp3m.d_op[j[0] + dp3m.fft.plan[3].start[0]] + dp3m.rs_mesh_dip[2][ind + 1] * - dp3m.d_op[j[1] + dfft.plan[3].start[1]]; + dp3m.d_op[j[1] + dp3m.fft.plan[3].start[1]]; tmp1 = dp3m.rs_mesh_dip[0][ind] * - dp3m.d_op[j[2] + dfft.plan[3].start[2]] + + dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] + dp3m.rs_mesh_dip[1][ind] * - dp3m.d_op[j[0] + dfft.plan[3].start[0]] + + dp3m.d_op[j[0] + dp3m.fft.plan[3].start[0]] + dp3m.rs_mesh_dip[2][ind] * - dp3m.d_op[j[1] + dfft.plan[3].start[1]]; + dp3m.d_op[j[1] + dp3m.fft.plan[3].start[1]]; dp3m.ks_mesh[ind] = tmp0 * dp3m.g_force[i]; dp3m.ks_mesh[ind + 1] = -tmp1 * dp3m.g_force[i]; ind += 2; @@ -1116,35 +1115,35 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { for (d = 0; d < 3; d++) { /* direction in k space: */ d_rs = (d + dp3m.ks_pnum) % 3; ind = 0; - for (j[0] = 0; j[0] < dfft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y - for (j[1] = 0; j[1] < dfft.plan[3].new_mesh[1]; j[1]++) { // j[1]=n_z - for (j[2] = 0; j[2] < dfft.plan[3].new_mesh[2]; + for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y + for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { // j[1]=n_z + for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { // j[2]=n_x tmp0 = - dp3m.d_op[j[d] + dfft.plan[3].start[d]] * dp3m.ks_mesh[ind]; + dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * dp3m.ks_mesh[ind]; dp3m.rs_mesh_dip[0][ind] = - dp3m.d_op[j[2] + dfft.plan[3].start[2]] * tmp0; + dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] * tmp0; dp3m.rs_mesh_dip[1][ind] = - dp3m.d_op[j[0] + dfft.plan[3].start[0]] * tmp0; + dp3m.d_op[j[0] + dp3m.fft.plan[3].start[0]] * tmp0; dp3m.rs_mesh_dip[2][ind] = - dp3m.d_op[j[1] + dfft.plan[3].start[1]] * tmp0; + dp3m.d_op[j[1] + dp3m.fft.plan[3].start[1]] * tmp0; ind++; tmp0 = - dp3m.d_op[j[d] + dfft.plan[3].start[d]] * dp3m.ks_mesh[ind]; + dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * dp3m.ks_mesh[ind]; dp3m.rs_mesh_dip[0][ind] = - dp3m.d_op[j[2] + dfft.plan[3].start[2]] * tmp0; + dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] * tmp0; dp3m.rs_mesh_dip[1][ind] = - dp3m.d_op[j[0] + dfft.plan[3].start[0]] * tmp0; + dp3m.d_op[j[0] + dp3m.fft.plan[3].start[0]] * tmp0; dp3m.rs_mesh_dip[2][ind] = - dp3m.d_op[j[1] + dfft.plan[3].start[1]] * tmp0; + dp3m.d_op[j[1] + dp3m.fft.plan[3].start[1]] * tmp0; ind++; } } } /* Back FFT force component mesh */ - dfft_perform_back(dp3m.rs_mesh_dip[0]); - dfft_perform_back(dp3m.rs_mesh_dip[1]); - dfft_perform_back(dp3m.rs_mesh_dip[2]); + fft_perform_back(dp3m.rs_mesh_dip[0], false, dp3m.fft); + fft_perform_back(dp3m.rs_mesh_dip[1], false, dp3m.fft); + fft_perform_back(dp3m.rs_mesh_dip[2], false, dp3m.fft); /* redistribute force component mesh */ dp3m_spread_force_grid(dp3m.rs_mesh_dip[0]); dp3m_spread_force_grid(dp3m.rs_mesh_dip[1]); @@ -1394,20 +1393,20 @@ void dp3m_calc_influence_function_force() { dp3m_calc_meshift(); for (i = 0; i < 3; i++) { - size *= dfft.plan[3].new_mesh[i]; - end[i] = dfft.plan[3].start[i] + dfft.plan[3].new_mesh[i]; + size *= dp3m.fft.plan[3].new_mesh[i]; + end[i] = dp3m.fft.plan[3].start[i] + dp3m.fft.plan[3].new_mesh[i]; } dp3m.g_force = Utils::realloc(dp3m.g_force, size * sizeof(double)); fak1 = dp3m.params.mesh[0] * dp3m.params.mesh[0] * dp3m.params.mesh[0] * 2.0 / (box_l[0] * box_l[0]); - for (n[0] = dfft.plan[3].start[0]; n[0] < end[0]; n[0]++) - for (n[1] = dfft.plan[3].start[1]; n[1] < end[1]; n[1]++) - for (n[2] = dfft.plan[3].start[2]; n[2] < end[2]; n[2]++) { - ind = (n[2] - dfft.plan[3].start[2]) + - dfft.plan[3].new_mesh[2] * - ((n[1] - dfft.plan[3].start[1]) + - (dfft.plan[3].new_mesh[1] * (n[0] - dfft.plan[3].start[0]))); + for (n[0] = dp3m.fft.plan[3].start[0]; n[0] < end[0]; n[0]++) + for (n[1] = dp3m.fft.plan[3].start[1]; n[1] < end[1]; n[1]++) + for (n[2] = dp3m.fft.plan[3].start[2]; n[2] < end[2]; n[2]++) { + ind = (n[2] - dp3m.fft.plan[3].start[2]) + + dp3m.fft.plan[3].new_mesh[2] * + ((n[1] - dp3m.fft.plan[3].start[1]) + + (dp3m.fft.plan[3].new_mesh[1] * (n[0] - dp3m.fft.plan[3].start[0]))); if ((n[0] == 0) && (n[1] == 0) && (n[2] == 0)) dp3m.g_force[ind] = 0.0; @@ -1481,20 +1480,20 @@ void dp3m_calc_influence_function_energy() { dp3m_calc_meshift(); for (i = 0; i < 3; i++) { - size *= dfft.plan[3].new_mesh[i]; - end[i] = dfft.plan[3].start[i] + dfft.plan[3].new_mesh[i]; + size *= dp3m.fft.plan[3].new_mesh[i]; + end[i] = dp3m.fft.plan[3].start[i] + dp3m.fft.plan[3].new_mesh[i]; } dp3m.g_energy = Utils::realloc(dp3m.g_energy, size * sizeof(double)); fak1 = dp3m.params.mesh[0] * dp3m.params.mesh[0] * dp3m.params.mesh[0] * 2.0 / (box_l[0] * box_l[0]); - for (n[0] = dfft.plan[3].start[0]; n[0] < end[0]; n[0]++) - for (n[1] = dfft.plan[3].start[1]; n[1] < end[1]; n[1]++) - for (n[2] = dfft.plan[3].start[2]; n[2] < end[2]; n[2]++) { - ind = (n[2] - dfft.plan[3].start[2]) + - dfft.plan[3].new_mesh[2] * - ((n[1] - dfft.plan[3].start[1]) + - (dfft.plan[3].new_mesh[1] * (n[0] - dfft.plan[3].start[0]))); + for (n[0] = dp3m.fft.plan[3].start[0]; n[0] < end[0]; n[0]++) + for (n[1] = dp3m.fft.plan[3].start[1]; n[1] < end[1]; n[1]++) + for (n[2] = dp3m.fft.plan[3].start[2]; n[2] < end[2]; n[2]++) { + ind = (n[2] - dp3m.fft.plan[3].start[2]) + + dp3m.fft.plan[3].new_mesh[2] * + ((n[1] - dp3m.fft.plan[3].start[1]) + + (dp3m.fft.plan[3].new_mesh[1] * (n[0] - dp3m.fft.plan[3].start[0]))); if ((n[0] == 0) && (n[1] == 0) && (n[2] == 0)) dp3m.g_energy[ind] = 0.0; diff --git a/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp b/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp index 8b3160bd3b4..a8fcdba7340 100644 --- a/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp +++ b/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp @@ -42,6 +42,7 @@ #ifdef DP3M #include "nonbonded_interactions/nonbonded_interaction_data.hpp" #include "p3m-common.hpp" +#include "fft.hpp" #include "utils/math/AS_erfc_part.hpp" @@ -98,6 +99,8 @@ typedef struct { /* Stores the value of the energy correction due to MS effects */ double energy_correction; + + fft_data_struct fft; } dp3m_data_struct; /** dipolar P3M parameters. */ diff --git a/src/core/fft-dipolar.cpp b/src/core/fft-dipolar.cpp index b8010b69dfc..21c37b29d89 100644 --- a/src/core/fft-dipolar.cpp +++ b/src/core/fft-dipolar.cpp @@ -44,7 +44,7 @@ using Utils::permute_ifield; * variables ************************************************/ -fft_data_struct dfft; +fft_data_struct dfft_aaaa; /** communicate the grid data according to the given fft_forw_plan. * \param plan communication plan (see \ref fft_forw_plan). @@ -63,7 +63,7 @@ void dfft_forw_grid_comm(fft_forw_plan plan, double *in, double *out); void dfft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, double *out); -void dfft_pre_init() { fft_common_pre_init(&dfft); } +void dfft_pre_init() { fft_common_pre_init(&dfft_aaaa); } int dfft_init(double **data, int *local_mesh_dim, int *local_mesh_margin, int *global_mesh_dim, double *global_mesh_off, int *ks_pnum) { @@ -82,8 +82,8 @@ int dfft_init(double **data, int *local_mesh_dim, int *local_mesh_margin, FFT_TRACE(fprintf(stderr, "%d: dipolar dfft_init():\n", this_node)); - dfft.max_comm_size = 0; - dfft.max_mesh_size = 0; + dfft_aaaa.max_comm_size = 0; + dfft_aaaa.max_mesh_size = 0; for (i = 0; i < 4; i++) { n_id[i] = (int *)Utils::malloc(1 * n_nodes * sizeof(int)); n_pos[i] = (int *)Utils::malloc(3 * n_nodes * sizeof(int)); @@ -105,37 +105,37 @@ int dfft_init(double **data, int *local_mesh_dim, int *local_mesh_margin, /* FFT node grids (n_grid[1 - 3]) */ calc_2d_grid(n_nodes, n_grid[1]); /* resort n_grid[1] dimensions if necessary */ - dfft.plan[1].row_dir = map_3don2d_grid(n_grid[0], n_grid[1], mult); - dfft.plan[0].n_permute = 0; + dfft_aaaa.plan[1].row_dir = map_3don2d_grid(n_grid[0], n_grid[1], mult); + dfft_aaaa.plan[0].n_permute = 0; for (i = 1; i < 4; i++) - dfft.plan[i].n_permute = (dfft.plan[1].row_dir + i) % 3; + dfft_aaaa.plan[i].n_permute = (dfft_aaaa.plan[1].row_dir + i) % 3; for (i = 0; i < 3; i++) { n_grid[2][i] = n_grid[1][(i + 1) % 3]; n_grid[3][i] = n_grid[1][(i + 2) % 3]; } - dfft.plan[2].row_dir = (dfft.plan[1].row_dir - 1) % 3; - dfft.plan[3].row_dir = (dfft.plan[1].row_dir - 2) % 3; + dfft_aaaa.plan[2].row_dir = (dfft_aaaa.plan[1].row_dir - 1) % 3; + dfft_aaaa.plan[3].row_dir = (dfft_aaaa.plan[1].row_dir - 2) % 3; /* === communication groups === */ /* copy local mesh off real space charge assignment grid */ for (i = 0; i < 3; i++) - dfft.plan[0].new_mesh[i] = local_mesh_dim[i]; + dfft_aaaa.plan[0].new_mesh[i] = local_mesh_dim[i]; for (i = 1; i < 4; i++) { - dfft.plan[i].g_size = fft_find_comm_groups( + dfft_aaaa.plan[i].g_size = fft_find_comm_groups( {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], - dfft.plan[i].group, n_pos[i], my_pos[i]); - if (dfft.plan[i].g_size == -1) { + dfft_aaaa.plan[i].group, n_pos[i], my_pos[i]); + if (dfft_aaaa.plan[i].g_size == -1) { /* try permutation */ - j = n_grid[i][(dfft.plan[i].row_dir + 1) % 3]; - n_grid[i][(dfft.plan[i].row_dir + 1) % 3] = - n_grid[i][(dfft.plan[i].row_dir + 2) % 3]; - n_grid[i][(dfft.plan[i].row_dir + 2) % 3] = j; - dfft.plan[i].g_size = fft_find_comm_groups( + j = n_grid[i][(dfft_aaaa.plan[i].row_dir + 1) % 3]; + n_grid[i][(dfft_aaaa.plan[i].row_dir + 1) % 3] = + n_grid[i][(dfft_aaaa.plan[i].row_dir + 2) % 3]; + n_grid[i][(dfft_aaaa.plan[i].row_dir + 2) % 3] = j; + dfft_aaaa.plan[i].g_size = fft_find_comm_groups( {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], - dfft.plan[i].group, n_pos[i], my_pos[i]); - if (dfft.plan[i].g_size == -1) { + dfft_aaaa.plan[i].group, n_pos[i], my_pos[i]); + if (dfft_aaaa.plan[i].g_size == -1) { fprintf(stderr, "%d: dipolar INTERNAL ERROR: fft_find_comm_groups error\n", this_node); @@ -143,111 +143,111 @@ int dfft_init(double **data, int *local_mesh_dim, int *local_mesh_margin, } } - dfft.plan[i].send_block = Utils::realloc( - dfft.plan[i].send_block, 6 * dfft.plan[i].g_size * sizeof(int)); - dfft.plan[i].send_size = Utils::realloc( - dfft.plan[i].send_size, 1 * dfft.plan[i].g_size * sizeof(int)); - dfft.plan[i].recv_block = Utils::realloc( - dfft.plan[i].recv_block, 6 * dfft.plan[i].g_size * sizeof(int)); - dfft.plan[i].recv_size = Utils::realloc( - dfft.plan[i].recv_size, 1 * dfft.plan[i].g_size * sizeof(int)); + dfft_aaaa.plan[i].send_block = Utils::realloc( + dfft_aaaa.plan[i].send_block, 6 * dfft_aaaa.plan[i].g_size * sizeof(int)); + dfft_aaaa.plan[i].send_size = Utils::realloc( + dfft_aaaa.plan[i].send_size, 1 * dfft_aaaa.plan[i].g_size * sizeof(int)); + dfft_aaaa.plan[i].recv_block = Utils::realloc( + dfft_aaaa.plan[i].recv_block, 6 * dfft_aaaa.plan[i].g_size * sizeof(int)); + dfft_aaaa.plan[i].recv_size = Utils::realloc( + dfft_aaaa.plan[i].recv_size, 1 * dfft_aaaa.plan[i].g_size * sizeof(int)); - dfft.plan[i].new_size = fft_calc_local_mesh( + dfft_aaaa.plan[i].new_size = fft_calc_local_mesh( my_pos[i], n_grid[i], global_mesh_dim, global_mesh_off, - dfft.plan[i].new_mesh, dfft.plan[i].start); - permute_ifield(dfft.plan[i].new_mesh, 3, -(dfft.plan[i].n_permute)); - permute_ifield(dfft.plan[i].start, 3, -(dfft.plan[i].n_permute)); - dfft.plan[i].n_ffts = dfft.plan[i].new_mesh[0] * dfft.plan[i].new_mesh[1]; + dfft_aaaa.plan[i].new_mesh, dfft_aaaa.plan[i].start); + permute_ifield(dfft_aaaa.plan[i].new_mesh, 3, -(dfft_aaaa.plan[i].n_permute)); + permute_ifield(dfft_aaaa.plan[i].start, 3, -(dfft_aaaa.plan[i].n_permute)); + dfft_aaaa.plan[i].n_ffts = dfft_aaaa.plan[i].new_mesh[0] * dfft_aaaa.plan[i].new_mesh[1]; /* === send/recv block specifications === */ - for (j = 0; j < dfft.plan[i].g_size; j++) { + for (j = 0; j < dfft_aaaa.plan[i].g_size; j++) { /* send block: this_node to comm-group-node i (identity: node) */ - int node = dfft.plan[i].group[j]; - dfft.plan[i].send_size[j] = fft_calc_send_block( + int node = dfft_aaaa.plan[i].group[j]; + dfft_aaaa.plan[i].send_size[j] = fft_calc_send_block( my_pos[i - 1], n_grid[i - 1], &(n_pos[i][3 * node]), n_grid[i], - global_mesh_dim, global_mesh_off, &(dfft.plan[i].send_block[6 * j])); - permute_ifield(&(dfft.plan[i].send_block[6 * j]), 3, - -(dfft.plan[i - 1].n_permute)); - permute_ifield(&(dfft.plan[i].send_block[6 * j + 3]), 3, - -(dfft.plan[i - 1].n_permute)); - if (dfft.plan[i].send_size[j] > dfft.max_comm_size) - dfft.max_comm_size = dfft.plan[i].send_size[j]; + global_mesh_dim, global_mesh_off, &(dfft_aaaa.plan[i].send_block[6 * j])); + permute_ifield(&(dfft_aaaa.plan[i].send_block[6 * j]), 3, + -(dfft_aaaa.plan[i - 1].n_permute)); + permute_ifield(&(dfft_aaaa.plan[i].send_block[6 * j + 3]), 3, + -(dfft_aaaa.plan[i - 1].n_permute)); + if (dfft_aaaa.plan[i].send_size[j] > dfft_aaaa.max_comm_size) + dfft_aaaa.max_comm_size = dfft_aaaa.plan[i].send_size[j]; /* First plan send blocks have to be adjusted, since the CA grid may have an additional margin outside the actual domain of the node */ if (i == 1) { for (int k = 0; k < 3; k++) - dfft.plan[1].send_block[6 * j + k] += local_mesh_margin[2 * k]; + dfft_aaaa.plan[1].send_block[6 * j + k] += local_mesh_margin[2 * k]; } /* recv block: this_node from comm-group-node i (identity: node) */ - dfft.plan[i].recv_size[j] = fft_calc_send_block( + dfft_aaaa.plan[i].recv_size[j] = fft_calc_send_block( my_pos[i], n_grid[i], &(n_pos[i - 1][3 * node]), n_grid[i - 1], - global_mesh_dim, global_mesh_off, &(dfft.plan[i].recv_block[6 * j])); - permute_ifield(&(dfft.plan[i].recv_block[6 * j]), 3, - -(dfft.plan[i].n_permute)); - permute_ifield(&(dfft.plan[i].recv_block[6 * j + 3]), 3, - -(dfft.plan[i].n_permute)); - if (dfft.plan[i].recv_size[j] > dfft.max_comm_size) - dfft.max_comm_size = dfft.plan[i].recv_size[j]; + global_mesh_dim, global_mesh_off, &(dfft_aaaa.plan[i].recv_block[6 * j])); + permute_ifield(&(dfft_aaaa.plan[i].recv_block[6 * j]), 3, + -(dfft_aaaa.plan[i].n_permute)); + permute_ifield(&(dfft_aaaa.plan[i].recv_block[6 * j + 3]), 3, + -(dfft_aaaa.plan[i].n_permute)); + if (dfft_aaaa.plan[i].recv_size[j] > dfft_aaaa.max_comm_size) + dfft_aaaa.max_comm_size = dfft_aaaa.plan[i].recv_size[j]; } for (j = 0; j < 3; j++) - dfft.plan[i].old_mesh[j] = dfft.plan[i - 1].new_mesh[j]; + dfft_aaaa.plan[i].old_mesh[j] = dfft_aaaa.plan[i - 1].new_mesh[j]; if (i == 1) - dfft.plan[i].element = 1; + dfft_aaaa.plan[i].element = 1; else { - dfft.plan[i].element = 2; - for (j = 0; j < dfft.plan[i].g_size; j++) { - dfft.plan[i].send_size[j] *= 2; - dfft.plan[i].recv_size[j] *= 2; + dfft_aaaa.plan[i].element = 2; + for (j = 0; j < dfft_aaaa.plan[i].g_size; j++) { + dfft_aaaa.plan[i].send_size[j] *= 2; + dfft_aaaa.plan[i].recv_size[j] *= 2; } } /* DEBUG */ for (j = 0; j < n_nodes; j++) { /* MPI_Barrier(comm_cart); */ if (j == this_node) { - FFT_TRACE(fft_print_fft_plan(dfft.plan[i])); + FFT_TRACE(fft_print_fft_plan(dfft_aaaa.plan[i])); } } } /* Factor 2 for complex fields */ - dfft.max_comm_size *= 2; - dfft.max_mesh_size = + dfft_aaaa.max_comm_size *= 2; + dfft_aaaa.max_mesh_size = (local_mesh_dim[0] * local_mesh_dim[1] * local_mesh_dim[2]); for (i = 1; i < 4; i++) - if (2 * dfft.plan[i].new_size > dfft.max_mesh_size) - dfft.max_mesh_size = 2 * dfft.plan[i].new_size; + if (2 * dfft_aaaa.plan[i].new_size > dfft_aaaa.max_mesh_size) + dfft_aaaa.max_mesh_size = 2 * dfft_aaaa.plan[i].new_size; FFT_TRACE(fprintf(stderr, "%d: dfft.max_comm_size = %d, dfft.max_mesh_size = %d\n", - this_node, dfft.max_comm_size, dfft.max_mesh_size)); + this_node, dfft_aaaa.max_comm_size, dfft_aaaa.max_mesh_size)); /* === pack function === */ for (i = 1; i < 4; i++) { - dfft.plan[i].pack_function = fft_pack_block_permute2; + dfft_aaaa.plan[i].pack_function = fft_pack_block_permute2; FFT_TRACE(fprintf(stderr, "%d: forw plan[%d] permute 2 \n", this_node, i)); } (*ks_pnum) = 6; - if (dfft.plan[1].row_dir == 2) { - dfft.plan[1].pack_function = fft_pack_block; + if (dfft_aaaa.plan[1].row_dir == 2) { + dfft_aaaa.plan[1].pack_function = fft_pack_block; FFT_TRACE(fprintf(stderr, "%d: forw plan[%d] permute 0 \n", this_node, 1)); (*ks_pnum) = 4; - } else if (dfft.plan[1].row_dir == 1) { - dfft.plan[1].pack_function = fft_pack_block_permute1; + } else if (dfft_aaaa.plan[1].row_dir == 1) { + dfft_aaaa.plan[1].pack_function = fft_pack_block_permute1; FFT_TRACE(fprintf(stderr, "%d: forw plan[%d] permute 1 \n", this_node, 1)); (*ks_pnum) = 5; } /* Factor 2 for complex numbers */ - dfft.send_buf = - Utils::realloc(dfft.send_buf, dfft.max_comm_size * sizeof(double)); - dfft.recv_buf = - Utils::realloc(dfft.recv_buf, dfft.max_comm_size * sizeof(double)); - (*data) = Utils::realloc((*data), dfft.max_mesh_size * sizeof(double)); - dfft.data_buf = - Utils::realloc(dfft.data_buf, dfft.max_mesh_size * sizeof(double)); - if (!(*data) || !dfft.data_buf || !dfft.recv_buf || !dfft.send_buf) { + dfft_aaaa.send_buf = + Utils::realloc(dfft_aaaa.send_buf, dfft_aaaa.max_comm_size * sizeof(double)); + dfft_aaaa.recv_buf = + Utils::realloc(dfft_aaaa.recv_buf, dfft_aaaa.max_comm_size * sizeof(double)); + (*data) = Utils::realloc((*data), dfft_aaaa.max_mesh_size * sizeof(double)); + dfft_aaaa.data_buf = + Utils::realloc(dfft_aaaa.data_buf, dfft_aaaa.max_mesh_size * sizeof(double)); + if (!(*data) || !dfft_aaaa.data_buf || !dfft_aaaa.recv_buf || !dfft_aaaa.send_buf) { fprintf(stderr, "%d: Could not allocate FFT data arays\n", this_node); errexit(); } @@ -256,71 +256,71 @@ int dfft_init(double **data, int *local_mesh_dim, int *local_mesh_margin, /* === FFT Routines (Using FFTW / RFFTW package)=== */ for (i = 1; i < 4; i++) { - dfft.plan[i].dir = FFTW_FORWARD; + dfft_aaaa.plan[i].dir = FFTW_FORWARD; /* FFT plan creation. Attention: destroys contents of c_data/data and c_data_buf/data_buf. */ wisdom_status = FFTW_FAILURE; sprintf(wisdom_file_name, ".dfftw3_1d_wisdom_forw_n%d.file", - dfft.plan[i].new_mesh[2]); + dfft_aaaa.plan[i].new_mesh[2]); if ((wisdom_file = fopen(wisdom_file_name, "r")) != nullptr) { wisdom_status = fftw_import_wisdom_from_file(wisdom_file); fclose(wisdom_file); } - if (dfft.init_tag == 1) - fftw_destroy_plan(dfft.plan[i].our_fftw_plan); + if (dfft_aaaa.init_tag == 1) + fftw_destroy_plan(dfft_aaaa.plan[i].our_fftw_plan); // printf("dfft.plan[%d].n_ffts=%d\n",i,dfft.plan[i].n_ffts); - dfft.plan[i].our_fftw_plan = fftw_plan_many_dft( - 1, &dfft.plan[i].new_mesh[2], dfft.plan[i].n_ffts, c_data, nullptr, 1, - dfft.plan[i].new_mesh[2], c_data, nullptr, 1, dfft.plan[i].new_mesh[2], - dfft.plan[i].dir, FFTW_PATIENT); + dfft_aaaa.plan[i].our_fftw_plan = fftw_plan_many_dft( + 1, &dfft_aaaa.plan[i].new_mesh[2], dfft_aaaa.plan[i].n_ffts, c_data, nullptr, 1, + dfft_aaaa.plan[i].new_mesh[2], c_data, nullptr, 1, dfft_aaaa.plan[i].new_mesh[2], + dfft_aaaa.plan[i].dir, FFTW_PATIENT); if (wisdom_status == FFTW_FAILURE && (wisdom_file = fopen(wisdom_file_name, "w")) != nullptr) { fftw_export_wisdom_to_file(wisdom_file); fclose(wisdom_file); } - dfft.plan[i].fft_function = fftw_execute; + dfft_aaaa.plan[i].fft_function = fftw_execute; } /* === The BACK Direction === */ /* this is needed because slightly different functions are used */ for (i = 1; i < 4; i++) { - dfft.back[i].dir = FFTW_BACKWARD; + dfft_aaaa.back[i].dir = FFTW_BACKWARD; wisdom_status = FFTW_FAILURE; sprintf(wisdom_file_name, ".dfftw3_1d_wisdom_back_n%d.file", - dfft.plan[i].new_mesh[2]); + dfft_aaaa.plan[i].new_mesh[2]); if ((wisdom_file = fopen(wisdom_file_name, "r")) != nullptr) { wisdom_status = fftw_import_wisdom_from_file(wisdom_file); fclose(wisdom_file); } - if (dfft.init_tag == 1) - fftw_destroy_plan(dfft.back[i].our_fftw_plan); - dfft.back[i].our_fftw_plan = fftw_plan_many_dft( - 1, &dfft.plan[i].new_mesh[2], dfft.plan[i].n_ffts, c_data, nullptr, 1, - dfft.plan[i].new_mesh[2], c_data, nullptr, 1, dfft.plan[i].new_mesh[2], - dfft.back[i].dir, FFTW_PATIENT); + if (dfft_aaaa.init_tag == 1) + fftw_destroy_plan(dfft_aaaa.back[i].our_fftw_plan); + dfft_aaaa.back[i].our_fftw_plan = fftw_plan_many_dft( + 1, &dfft_aaaa.plan[i].new_mesh[2], dfft_aaaa.plan[i].n_ffts, c_data, nullptr, 1, + dfft_aaaa.plan[i].new_mesh[2], c_data, nullptr, 1, dfft_aaaa.plan[i].new_mesh[2], + dfft_aaaa.back[i].dir, FFTW_PATIENT); if (wisdom_status == FFTW_FAILURE && (wisdom_file = fopen(wisdom_file_name, "w")) != nullptr) { fftw_export_wisdom_to_file(wisdom_file); fclose(wisdom_file); } - dfft.back[i].fft_function = fftw_execute; - dfft.back[i].pack_function = fft_pack_block_permute1; + dfft_aaaa.back[i].fft_function = fftw_execute; + dfft_aaaa.back[i].pack_function = fft_pack_block_permute1; FFT_TRACE(fprintf(stderr, "%d: back plan[%d] permute 1 \n", this_node, i)); } - if (dfft.plan[1].row_dir == 2) { - dfft.back[1].pack_function = fft_pack_block; + if (dfft_aaaa.plan[1].row_dir == 2) { + dfft_aaaa.back[1].pack_function = fft_pack_block; FFT_TRACE(fprintf(stderr, "%d: back plan[%d] permute 0 \n", this_node, 1)); - } else if (dfft.plan[1].row_dir == 1) { - dfft.back[1].pack_function = fft_pack_block_permute2; + } else if (dfft_aaaa.plan[1].row_dir == 1) { + dfft_aaaa.back[1].pack_function = fft_pack_block_permute2; FFT_TRACE(fprintf(stderr, "%d: back plan[%d] permute 2 \n", this_node, 1)); } - dfft.init_tag = 1; + dfft_aaaa.init_tag = 1; /* free(data); */ for (i = 0; i < 4; i++) { free(n_id[i]); free(n_pos[i]); } - return dfft.max_mesh_size; + return dfft_aaaa.max_mesh_size; } void dfft_perform_forw(double *data) { @@ -331,10 +331,10 @@ void dfft_perform_forw(double *data) { fprintf(stderr, "%d: dipolar fft_perform_forw: dir 1:\n", this_node)); fftw_complex *c_data = (fftw_complex *)data; - fftw_complex *c_data_buf = (fftw_complex *)dfft.data_buf; + fftw_complex *c_data_buf = (fftw_complex *)dfft_aaaa.data_buf; /* communication to current dir row format (in is data) */ - dfft_forw_grid_comm(dfft.plan[1], data, dfft.data_buf); + dfft_forw_grid_comm(dfft_aaaa.plan[1], data, dfft_aaaa.data_buf); /* fprintf(stderr,"%d: start grid \n",this_node); @@ -351,26 +351,26 @@ void dfft_perform_forw(double *data) { */ /* complexify the real data array (in is data_buf) */ - for (i = 0; i < dfft.plan[1].new_size; i++) { - data[2 * i] = dfft.data_buf[i]; /* real value */ + for (i = 0; i < dfft_aaaa.plan[1].new_size; i++) { + data[2 * i] = dfft_aaaa.data_buf[i]; /* real value */ data[(2 * i) + 1] = 0; /* complex value */ } /* perform FFT (in/out is data)*/ - fftw_execute_dft(dfft.plan[1].our_fftw_plan, c_data, c_data); + fftw_execute_dft(dfft_aaaa.plan[1].our_fftw_plan, c_data, c_data); /* ===== second direction ===== */ FFT_TRACE( fprintf(stderr, "%d: dipolar fft_perform_forw: dir 2:\n", this_node)); /* communication to current dir row format (in is data) */ - dfft_forw_grid_comm(dfft.plan[2], data, dfft.data_buf); + dfft_forw_grid_comm(dfft_aaaa.plan[2], data, dfft_aaaa.data_buf); /* perform FFT (in/out is data_buf)*/ - fftw_execute_dft(dfft.plan[2].our_fftw_plan, c_data_buf, c_data_buf); + fftw_execute_dft(dfft_aaaa.plan[2].our_fftw_plan, c_data_buf, c_data_buf); /* ===== third direction ===== */ FFT_TRACE( fprintf(stderr, "%d: dipolar fft_perform_forw: dir 3:\n", this_node)); /* communication to current dir row format (in is data_buf) */ - dfft_forw_grid_comm(dfft.plan[3], dfft.data_buf, data); + dfft_forw_grid_comm(dfft_aaaa.plan[3], dfft_aaaa.data_buf, data); /* perform FFT (in/out is data)*/ - fftw_execute_dft(dfft.plan[3].our_fftw_plan, c_data, c_data); + fftw_execute_dft(dfft_aaaa.plan[3].our_fftw_plan, c_data, c_data); // fft_print_global_fft_mesh(dfft.plan[3],data,1,0); /* REMARK: Result has to be in data. */ @@ -380,32 +380,32 @@ void dfft_perform_back(double *data) { int i; fftw_complex *c_data = (fftw_complex *)data; - fftw_complex *c_data_buf = (fftw_complex *)dfft.data_buf; + fftw_complex *c_data_buf = (fftw_complex *)dfft_aaaa.data_buf; /* ===== third direction ===== */ FFT_TRACE( fprintf(stderr, "%d: dipolar fft_perform_back: dir 3:\n", this_node)); /* perform FFT (in is data) */ - fftw_execute_dft(dfft.back[3].our_fftw_plan, c_data, c_data); + fftw_execute_dft(dfft_aaaa.back[3].our_fftw_plan, c_data, c_data); /* communicate (in is data)*/ - dfft_back_grid_comm(dfft.plan[3], dfft.back[3], data, dfft.data_buf); + dfft_back_grid_comm(dfft_aaaa.plan[3], dfft_aaaa.back[3], data, dfft_aaaa.data_buf); /* ===== second direction ===== */ FFT_TRACE( fprintf(stderr, "%d: dipolar fft_perform_back: dir 2:\n", this_node)); /* perform FFT (in is data_buf) */ - fftw_execute_dft(dfft.back[2].our_fftw_plan, c_data_buf, c_data_buf); + fftw_execute_dft(dfft_aaaa.back[2].our_fftw_plan, c_data_buf, c_data_buf); /* communicate (in is data_buf) */ - dfft_back_grid_comm(dfft.plan[2], dfft.back[2], dfft.data_buf, data); + dfft_back_grid_comm(dfft_aaaa.plan[2], dfft_aaaa.back[2], dfft_aaaa.data_buf, data); /* ===== first direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_back: dir 1:\n", this_node)); /* perform FFT (in is data) */ - fftw_execute_dft(dfft.back[1].our_fftw_plan, c_data, c_data); + fftw_execute_dft(dfft_aaaa.back[1].our_fftw_plan, c_data, c_data); /* throw away the (hopefully) empty complex component (in is data)*/ - for (i = 0; i < dfft.plan[1].new_size; i++) { - dfft.data_buf[i] = data[2 * i]; /* real value */ + for (i = 0; i < dfft_aaaa.plan[1].new_size; i++) { + dfft_aaaa.data_buf[i] = data[2 * i]; /* real value */ // Vincent: if (data[2 * i + 1] > 1e-5) { printf("dipoar fft_aaaa - Complex value is not zero (i=%d,data=%g)!!!\n", i, @@ -415,7 +415,7 @@ void dfft_perform_back(double *data) { } } /* communicate (in is data_buf) */ - dfft_back_grid_comm(dfft.plan[1], dfft.back[1], dfft.data_buf, data); + dfft_back_grid_comm(dfft_aaaa.plan[1], dfft_aaaa.back[1], dfft_aaaa.data_buf, data); /* REMARK: Result has to be in data. */ } @@ -426,26 +426,26 @@ void dfft_forw_grid_comm(fft_forw_plan plan, double *in, double *out) { double *tmp_ptr; for (i = 0; i < plan.g_size; i++) { - plan.pack_function(in, dfft.send_buf, &(plan.send_block[6 * i]), + plan.pack_function(in, dfft_aaaa.send_buf, &(plan.send_block[6 * i]), &(plan.send_block[6 * i + 3]), plan.old_mesh, plan.element); if (plan.group[i] < this_node) { /* send first, receive second */ - MPI_Send(dfft.send_buf, plan.send_size[i], MPI_DOUBLE, plan.group[i], + MPI_Send(dfft_aaaa.send_buf, plan.send_size[i], MPI_DOUBLE, plan.group[i], REQ_FFT_FORW, comm_cart); - MPI_Recv(dfft.recv_buf, plan.recv_size[i], MPI_DOUBLE, plan.group[i], + MPI_Recv(dfft_aaaa.recv_buf, plan.recv_size[i], MPI_DOUBLE, plan.group[i], REQ_FFT_FORW, comm_cart, &status); } else if (plan.group[i] > this_node) { /* receive first, send second */ - MPI_Recv(dfft.recv_buf, plan.recv_size[i], MPI_DOUBLE, plan.group[i], + MPI_Recv(dfft_aaaa.recv_buf, plan.recv_size[i], MPI_DOUBLE, plan.group[i], REQ_FFT_FORW, comm_cart, &status); - MPI_Send(dfft.send_buf, plan.send_size[i], MPI_DOUBLE, plan.group[i], + MPI_Send(dfft_aaaa.send_buf, plan.send_size[i], MPI_DOUBLE, plan.group[i], REQ_FFT_FORW, comm_cart); } else { /* Self communication... */ - tmp_ptr = dfft.send_buf; - dfft.send_buf = dfft.recv_buf; - dfft.recv_buf = tmp_ptr; + tmp_ptr = dfft_aaaa.send_buf; + dfft_aaaa.send_buf = dfft_aaaa.recv_buf; + dfft_aaaa.recv_buf = tmp_ptr; } - fft_unpack_block(dfft.recv_buf, out, &(plan.recv_block[6 * i]), + fft_unpack_block(dfft_aaaa.recv_buf, out, &(plan.recv_block[6 * i]), &(plan.recv_block[6 * i + 3]), plan.new_mesh, plan.element); } @@ -463,26 +463,26 @@ void dfft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, for (i = 0; i < plan_f.g_size; i++) { - plan_b.pack_function(in, dfft.send_buf, &(plan_f.recv_block[6 * i]), + plan_b.pack_function(in, dfft_aaaa.send_buf, &(plan_f.recv_block[6 * i]), &(plan_f.recv_block[6 * i + 3]), plan_f.new_mesh, plan_f.element); if (plan_f.group[i] < this_node) { /* send first, receive second */ - MPI_Send(dfft.send_buf, plan_f.recv_size[i], MPI_DOUBLE, plan_f.group[i], + MPI_Send(dfft_aaaa.send_buf, plan_f.recv_size[i], MPI_DOUBLE, plan_f.group[i], REQ_FFT_BACK, comm_cart); - MPI_Recv(dfft.recv_buf, plan_f.send_size[i], MPI_DOUBLE, plan_f.group[i], + MPI_Recv(dfft_aaaa.recv_buf, plan_f.send_size[i], MPI_DOUBLE, plan_f.group[i], REQ_FFT_BACK, comm_cart, &status); } else if (plan_f.group[i] > this_node) { /* receive first, send second */ - MPI_Recv(dfft.recv_buf, plan_f.send_size[i], MPI_DOUBLE, plan_f.group[i], + MPI_Recv(dfft_aaaa.recv_buf, plan_f.send_size[i], MPI_DOUBLE, plan_f.group[i], REQ_FFT_BACK, comm_cart, &status); - MPI_Send(dfft.send_buf, plan_f.recv_size[i], MPI_DOUBLE, plan_f.group[i], + MPI_Send(dfft_aaaa.send_buf, plan_f.recv_size[i], MPI_DOUBLE, plan_f.group[i], REQ_FFT_BACK, comm_cart); } else { /* Self communication... */ - tmp_ptr = dfft.send_buf; - dfft.send_buf = dfft.recv_buf; - dfft.recv_buf = tmp_ptr; + tmp_ptr = dfft_aaaa.send_buf; + dfft_aaaa.send_buf = dfft_aaaa.recv_buf; + dfft_aaaa.recv_buf = tmp_ptr; } - fft_unpack_block(dfft.recv_buf, out, &(plan_f.send_block[6 * i]), + fft_unpack_block(dfft_aaaa.recv_buf, out, &(plan_f.send_block[6 * i]), &(plan_f.send_block[6 * i + 3]), plan_f.old_mesh, plan_f.element); } diff --git a/src/core/fft-dipolar.hpp b/src/core/fft-dipolar.hpp index 3f32f8f938f..7cde2fcd45b 100644 --- a/src/core/fft-dipolar.hpp +++ b/src/core/fft-dipolar.hpp @@ -47,7 +47,7 @@ #include "fft-common.hpp" -extern fft_data_struct dfft; +extern fft_data_struct dfft_aaaa; /** \name Exported Functions */ /************************************************************/ From a911743ce34e8915e7f7e9d76a1da7cc2c6fb4cf Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Mon, 21 Jan 2019 17:26:54 +0100 Subject: [PATCH 5/9] core: Removed redundant fft-dipolar --- src/core/CMakeLists.txt | 1 - src/core/fft-dipolar.cpp | 491 --------------------------------------- src/core/fft-dipolar.hpp | 86 ------- 3 files changed, 578 deletions(-) delete mode 100644 src/core/fft-dipolar.cpp delete mode 100644 src/core/fft-dipolar.hpp diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index ed9e7ecaebf..2eeb09694b9 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -15,7 +15,6 @@ set(EspressoCore_SRC EspressoSystemInterface.cpp fft-common.cpp fft.cpp - fft-dipolar.cpp forcecap.cpp forces.cpp galilei.cpp diff --git a/src/core/fft-dipolar.cpp b/src/core/fft-dipolar.cpp deleted file mode 100644 index 21c37b29d89..00000000000 --- a/src/core/fft-dipolar.cpp +++ /dev/null @@ -1,491 +0,0 @@ -/* - Copyright (C) 2010-2018 The ESPResSo project - Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - Max-Planck-Institute for Polymer Research, Theory Group - - This file is part of ESPResSo. - - ESPResSo is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - ESPResSo is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ -/** \file - * - * Routines, row decomposition, data structures and communication for the - * 3D-FFT. - * - */ - -#include "fft-dipolar.hpp" - -#ifdef DP3M - -#include "communication.hpp" -#include "debug.hpp" -#include "fft-common.hpp" -#include "grid.hpp" - -#include "utils/math/permute_ifield.hpp" -using Utils::permute_ifield; - -#include -#include - -/************************************************ - * variables - ************************************************/ - -fft_data_struct dfft_aaaa; - -/** communicate the grid data according to the given fft_forw_plan. - * \param plan communication plan (see \ref fft_forw_plan). - * \param in input mesh. - * \param out output mesh. - */ -void dfft_forw_grid_comm(fft_forw_plan plan, double *in, double *out); - -/** communicate the grid data according to the given - * fft_forw_plan/fft_bakc_plan. - * \param plan_f communication plan (see \ref fft_forw_plan). - * \param plan_b additional back plan (see \ref fft_back_plan). - * \param in input mesh. - * \param out output mesh. - */ -void dfft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, - double *out); - -void dfft_pre_init() { fft_common_pre_init(&dfft_aaaa); } - -int dfft_init(double **data, int *local_mesh_dim, int *local_mesh_margin, - int *global_mesh_dim, double *global_mesh_off, int *ks_pnum) { - int i, j; - /* helpers */ - int mult[3]; - - int n_grid[4][3]; /* The four node grids. */ - int my_pos[4][3]; /* The position of this_node in the node grids. */ - int *n_id[4]; /* linear node identity lists for the node grids. */ - int *n_pos[4]; /* positions of nodes in the node grids. */ - /* FFTW WISDOM stuff. */ - char wisdom_file_name[255]; - FILE *wisdom_file; - int wisdom_status; - - FFT_TRACE(fprintf(stderr, "%d: dipolar dfft_init():\n", this_node)); - - dfft_aaaa.max_comm_size = 0; - dfft_aaaa.max_mesh_size = 0; - for (i = 0; i < 4; i++) { - n_id[i] = (int *)Utils::malloc(1 * n_nodes * sizeof(int)); - n_pos[i] = (int *)Utils::malloc(3 * n_nodes * sizeof(int)); - } - - /* === node grids === */ - /* real space node grid (n_grid[0]) */ - for (i = 0; i < 3; i++) { - n_grid[0][i] = node_grid[i]; - my_pos[0][i] = node_pos[i]; - } - for (i = 0; i < n_nodes; i++) { - map_node_array(i, &(n_pos[0][3 * i + 0])); - n_id[0][get_linear_index(n_pos[0][3 * i + 0], n_pos[0][3 * i + 1], - n_pos[0][3 * i + 2], - {n_grid[0][0], n_grid[0][1], n_grid[0][2]})] = i; - } - - /* FFT node grids (n_grid[1 - 3]) */ - calc_2d_grid(n_nodes, n_grid[1]); - /* resort n_grid[1] dimensions if necessary */ - dfft_aaaa.plan[1].row_dir = map_3don2d_grid(n_grid[0], n_grid[1], mult); - dfft_aaaa.plan[0].n_permute = 0; - for (i = 1; i < 4; i++) - dfft_aaaa.plan[i].n_permute = (dfft_aaaa.plan[1].row_dir + i) % 3; - for (i = 0; i < 3; i++) { - n_grid[2][i] = n_grid[1][(i + 1) % 3]; - n_grid[3][i] = n_grid[1][(i + 2) % 3]; - } - dfft_aaaa.plan[2].row_dir = (dfft_aaaa.plan[1].row_dir - 1) % 3; - dfft_aaaa.plan[3].row_dir = (dfft_aaaa.plan[1].row_dir - 2) % 3; - - /* === communication groups === */ - /* copy local mesh off real space charge assignment grid */ - for (i = 0; i < 3; i++) - dfft_aaaa.plan[0].new_mesh[i] = local_mesh_dim[i]; - for (i = 1; i < 4; i++) { - dfft_aaaa.plan[i].g_size = fft_find_comm_groups( - {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, - {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], - dfft_aaaa.plan[i].group, n_pos[i], my_pos[i]); - if (dfft_aaaa.plan[i].g_size == -1) { - /* try permutation */ - j = n_grid[i][(dfft_aaaa.plan[i].row_dir + 1) % 3]; - n_grid[i][(dfft_aaaa.plan[i].row_dir + 1) % 3] = - n_grid[i][(dfft_aaaa.plan[i].row_dir + 2) % 3]; - n_grid[i][(dfft_aaaa.plan[i].row_dir + 2) % 3] = j; - dfft_aaaa.plan[i].g_size = fft_find_comm_groups( - {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, - {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], - dfft_aaaa.plan[i].group, n_pos[i], my_pos[i]); - if (dfft_aaaa.plan[i].g_size == -1) { - fprintf(stderr, - "%d: dipolar INTERNAL ERROR: fft_find_comm_groups error\n", - this_node); - errexit(); - } - } - - dfft_aaaa.plan[i].send_block = Utils::realloc( - dfft_aaaa.plan[i].send_block, 6 * dfft_aaaa.plan[i].g_size * sizeof(int)); - dfft_aaaa.plan[i].send_size = Utils::realloc( - dfft_aaaa.plan[i].send_size, 1 * dfft_aaaa.plan[i].g_size * sizeof(int)); - dfft_aaaa.plan[i].recv_block = Utils::realloc( - dfft_aaaa.plan[i].recv_block, 6 * dfft_aaaa.plan[i].g_size * sizeof(int)); - dfft_aaaa.plan[i].recv_size = Utils::realloc( - dfft_aaaa.plan[i].recv_size, 1 * dfft_aaaa.plan[i].g_size * sizeof(int)); - - dfft_aaaa.plan[i].new_size = fft_calc_local_mesh( - my_pos[i], n_grid[i], global_mesh_dim, global_mesh_off, - dfft_aaaa.plan[i].new_mesh, dfft_aaaa.plan[i].start); - permute_ifield(dfft_aaaa.plan[i].new_mesh, 3, -(dfft_aaaa.plan[i].n_permute)); - permute_ifield(dfft_aaaa.plan[i].start, 3, -(dfft_aaaa.plan[i].n_permute)); - dfft_aaaa.plan[i].n_ffts = dfft_aaaa.plan[i].new_mesh[0] * dfft_aaaa.plan[i].new_mesh[1]; - - /* === send/recv block specifications === */ - for (j = 0; j < dfft_aaaa.plan[i].g_size; j++) { - /* send block: this_node to comm-group-node i (identity: node) */ - int node = dfft_aaaa.plan[i].group[j]; - dfft_aaaa.plan[i].send_size[j] = fft_calc_send_block( - my_pos[i - 1], n_grid[i - 1], &(n_pos[i][3 * node]), n_grid[i], - global_mesh_dim, global_mesh_off, &(dfft_aaaa.plan[i].send_block[6 * j])); - permute_ifield(&(dfft_aaaa.plan[i].send_block[6 * j]), 3, - -(dfft_aaaa.plan[i - 1].n_permute)); - permute_ifield(&(dfft_aaaa.plan[i].send_block[6 * j + 3]), 3, - -(dfft_aaaa.plan[i - 1].n_permute)); - if (dfft_aaaa.plan[i].send_size[j] > dfft_aaaa.max_comm_size) - dfft_aaaa.max_comm_size = dfft_aaaa.plan[i].send_size[j]; - /* First plan send blocks have to be adjusted, since the CA grid - may have an additional margin outside the actual domain of the - node */ - if (i == 1) { - for (int k = 0; k < 3; k++) - dfft_aaaa.plan[1].send_block[6 * j + k] += local_mesh_margin[2 * k]; - } - /* recv block: this_node from comm-group-node i (identity: node) */ - dfft_aaaa.plan[i].recv_size[j] = fft_calc_send_block( - my_pos[i], n_grid[i], &(n_pos[i - 1][3 * node]), n_grid[i - 1], - global_mesh_dim, global_mesh_off, &(dfft_aaaa.plan[i].recv_block[6 * j])); - permute_ifield(&(dfft_aaaa.plan[i].recv_block[6 * j]), 3, - -(dfft_aaaa.plan[i].n_permute)); - permute_ifield(&(dfft_aaaa.plan[i].recv_block[6 * j + 3]), 3, - -(dfft_aaaa.plan[i].n_permute)); - if (dfft_aaaa.plan[i].recv_size[j] > dfft_aaaa.max_comm_size) - dfft_aaaa.max_comm_size = dfft_aaaa.plan[i].recv_size[j]; - } - - for (j = 0; j < 3; j++) - dfft_aaaa.plan[i].old_mesh[j] = dfft_aaaa.plan[i - 1].new_mesh[j]; - if (i == 1) - dfft_aaaa.plan[i].element = 1; - else { - dfft_aaaa.plan[i].element = 2; - for (j = 0; j < dfft_aaaa.plan[i].g_size; j++) { - dfft_aaaa.plan[i].send_size[j] *= 2; - dfft_aaaa.plan[i].recv_size[j] *= 2; - } - } - /* DEBUG */ - for (j = 0; j < n_nodes; j++) { - /* MPI_Barrier(comm_cart); */ - if (j == this_node) { - FFT_TRACE(fft_print_fft_plan(dfft_aaaa.plan[i])); - } - } - } - - /* Factor 2 for complex fields */ - dfft_aaaa.max_comm_size *= 2; - dfft_aaaa.max_mesh_size = - (local_mesh_dim[0] * local_mesh_dim[1] * local_mesh_dim[2]); - for (i = 1; i < 4; i++) - if (2 * dfft_aaaa.plan[i].new_size > dfft_aaaa.max_mesh_size) - dfft_aaaa.max_mesh_size = 2 * dfft_aaaa.plan[i].new_size; - - FFT_TRACE(fprintf(stderr, - "%d: dfft.max_comm_size = %d, dfft.max_mesh_size = %d\n", - this_node, dfft_aaaa.max_comm_size, dfft_aaaa.max_mesh_size)); - - /* === pack function === */ - for (i = 1; i < 4; i++) { - dfft_aaaa.plan[i].pack_function = fft_pack_block_permute2; - FFT_TRACE(fprintf(stderr, "%d: forw plan[%d] permute 2 \n", this_node, i)); - } - (*ks_pnum) = 6; - if (dfft_aaaa.plan[1].row_dir == 2) { - dfft_aaaa.plan[1].pack_function = fft_pack_block; - FFT_TRACE(fprintf(stderr, "%d: forw plan[%d] permute 0 \n", this_node, 1)); - (*ks_pnum) = 4; - } else if (dfft_aaaa.plan[1].row_dir == 1) { - dfft_aaaa.plan[1].pack_function = fft_pack_block_permute1; - FFT_TRACE(fprintf(stderr, "%d: forw plan[%d] permute 1 \n", this_node, 1)); - (*ks_pnum) = 5; - } - - /* Factor 2 for complex numbers */ - dfft_aaaa.send_buf = - Utils::realloc(dfft_aaaa.send_buf, dfft_aaaa.max_comm_size * sizeof(double)); - dfft_aaaa.recv_buf = - Utils::realloc(dfft_aaaa.recv_buf, dfft_aaaa.max_comm_size * sizeof(double)); - (*data) = Utils::realloc((*data), dfft_aaaa.max_mesh_size * sizeof(double)); - dfft_aaaa.data_buf = - Utils::realloc(dfft_aaaa.data_buf, dfft_aaaa.max_mesh_size * sizeof(double)); - if (!(*data) || !dfft_aaaa.data_buf || !dfft_aaaa.recv_buf || !dfft_aaaa.send_buf) { - fprintf(stderr, "%d: Could not allocate FFT data arays\n", this_node); - errexit(); - } - - fftw_complex *c_data = (fftw_complex *)(*data); - - /* === FFT Routines (Using FFTW / RFFTW package)=== */ - for (i = 1; i < 4; i++) { - dfft_aaaa.plan[i].dir = FFTW_FORWARD; - /* FFT plan creation. - Attention: destroys contents of c_data/data and c_data_buf/data_buf. */ - wisdom_status = FFTW_FAILURE; - sprintf(wisdom_file_name, ".dfftw3_1d_wisdom_forw_n%d.file", - dfft_aaaa.plan[i].new_mesh[2]); - if ((wisdom_file = fopen(wisdom_file_name, "r")) != nullptr) { - wisdom_status = fftw_import_wisdom_from_file(wisdom_file); - fclose(wisdom_file); - } - if (dfft_aaaa.init_tag == 1) - fftw_destroy_plan(dfft_aaaa.plan[i].our_fftw_plan); - // printf("dfft.plan[%d].n_ffts=%d\n",i,dfft.plan[i].n_ffts); - dfft_aaaa.plan[i].our_fftw_plan = fftw_plan_many_dft( - 1, &dfft_aaaa.plan[i].new_mesh[2], dfft_aaaa.plan[i].n_ffts, c_data, nullptr, 1, - dfft_aaaa.plan[i].new_mesh[2], c_data, nullptr, 1, dfft_aaaa.plan[i].new_mesh[2], - dfft_aaaa.plan[i].dir, FFTW_PATIENT); - if (wisdom_status == FFTW_FAILURE && - (wisdom_file = fopen(wisdom_file_name, "w")) != nullptr) { - fftw_export_wisdom_to_file(wisdom_file); - fclose(wisdom_file); - } - dfft_aaaa.plan[i].fft_function = fftw_execute; - } - - /* === The BACK Direction === */ - /* this is needed because slightly different functions are used */ - for (i = 1; i < 4; i++) { - dfft_aaaa.back[i].dir = FFTW_BACKWARD; - wisdom_status = FFTW_FAILURE; - sprintf(wisdom_file_name, ".dfftw3_1d_wisdom_back_n%d.file", - dfft_aaaa.plan[i].new_mesh[2]); - if ((wisdom_file = fopen(wisdom_file_name, "r")) != nullptr) { - wisdom_status = fftw_import_wisdom_from_file(wisdom_file); - fclose(wisdom_file); - } - if (dfft_aaaa.init_tag == 1) - fftw_destroy_plan(dfft_aaaa.back[i].our_fftw_plan); - dfft_aaaa.back[i].our_fftw_plan = fftw_plan_many_dft( - 1, &dfft_aaaa.plan[i].new_mesh[2], dfft_aaaa.plan[i].n_ffts, c_data, nullptr, 1, - dfft_aaaa.plan[i].new_mesh[2], c_data, nullptr, 1, dfft_aaaa.plan[i].new_mesh[2], - dfft_aaaa.back[i].dir, FFTW_PATIENT); - if (wisdom_status == FFTW_FAILURE && - (wisdom_file = fopen(wisdom_file_name, "w")) != nullptr) { - fftw_export_wisdom_to_file(wisdom_file); - fclose(wisdom_file); - } - dfft_aaaa.back[i].fft_function = fftw_execute; - dfft_aaaa.back[i].pack_function = fft_pack_block_permute1; - FFT_TRACE(fprintf(stderr, "%d: back plan[%d] permute 1 \n", this_node, i)); - } - if (dfft_aaaa.plan[1].row_dir == 2) { - dfft_aaaa.back[1].pack_function = fft_pack_block; - FFT_TRACE(fprintf(stderr, "%d: back plan[%d] permute 0 \n", this_node, 1)); - } else if (dfft_aaaa.plan[1].row_dir == 1) { - dfft_aaaa.back[1].pack_function = fft_pack_block_permute2; - FFT_TRACE(fprintf(stderr, "%d: back plan[%d] permute 2 \n", this_node, 1)); - } - dfft_aaaa.init_tag = 1; - /* free(data); */ - for (i = 0; i < 4; i++) { - free(n_id[i]); - free(n_pos[i]); - } - return dfft_aaaa.max_mesh_size; -} - -void dfft_perform_forw(double *data) { - int i; - /* int m,n,o; */ - /* ===== first direction ===== */ - FFT_TRACE( - fprintf(stderr, "%d: dipolar fft_perform_forw: dir 1:\n", this_node)); - - fftw_complex *c_data = (fftw_complex *)data; - fftw_complex *c_data_buf = (fftw_complex *)dfft_aaaa.data_buf; - - /* communication to current dir row format (in is data) */ - dfft_forw_grid_comm(dfft_aaaa.plan[1], data, dfft_aaaa.data_buf); - - /* - fprintf(stderr,"%d: start grid \n",this_node); - i=0; - for(m=0;m<8;m++) { - for(n=0;n<8;n++) { - for(o=0;o<8;o++) { - fprintf(stderr,"%.3f ",data_buf[i++]); - } - fprintf(stderr,"\n"); - } - fprintf(stderr,"\n"); - } - */ - - /* complexify the real data array (in is data_buf) */ - for (i = 0; i < dfft_aaaa.plan[1].new_size; i++) { - data[2 * i] = dfft_aaaa.data_buf[i]; /* real value */ - data[(2 * i) + 1] = 0; /* complex value */ - } - /* perform FFT (in/out is data)*/ - fftw_execute_dft(dfft_aaaa.plan[1].our_fftw_plan, c_data, c_data); - /* ===== second direction ===== */ - FFT_TRACE( - fprintf(stderr, "%d: dipolar fft_perform_forw: dir 2:\n", this_node)); - /* communication to current dir row format (in is data) */ - dfft_forw_grid_comm(dfft_aaaa.plan[2], data, dfft_aaaa.data_buf); - /* perform FFT (in/out is data_buf)*/ - fftw_execute_dft(dfft_aaaa.plan[2].our_fftw_plan, c_data_buf, c_data_buf); - /* ===== third direction ===== */ - FFT_TRACE( - fprintf(stderr, "%d: dipolar fft_perform_forw: dir 3:\n", this_node)); - /* communication to current dir row format (in is data_buf) */ - dfft_forw_grid_comm(dfft_aaaa.plan[3], dfft_aaaa.data_buf, data); - /* perform FFT (in/out is data)*/ - fftw_execute_dft(dfft_aaaa.plan[3].our_fftw_plan, c_data, c_data); - // fft_print_global_fft_mesh(dfft.plan[3],data,1,0); - - /* REMARK: Result has to be in data. */ -} - -void dfft_perform_back(double *data) { - int i; - - fftw_complex *c_data = (fftw_complex *)data; - fftw_complex *c_data_buf = (fftw_complex *)dfft_aaaa.data_buf; - - /* ===== third direction ===== */ - FFT_TRACE( - fprintf(stderr, "%d: dipolar fft_perform_back: dir 3:\n", this_node)); - - /* perform FFT (in is data) */ - fftw_execute_dft(dfft_aaaa.back[3].our_fftw_plan, c_data, c_data); - /* communicate (in is data)*/ - dfft_back_grid_comm(dfft_aaaa.plan[3], dfft_aaaa.back[3], data, dfft_aaaa.data_buf); - - /* ===== second direction ===== */ - FFT_TRACE( - fprintf(stderr, "%d: dipolar fft_perform_back: dir 2:\n", this_node)); - /* perform FFT (in is data_buf) */ - fftw_execute_dft(dfft_aaaa.back[2].our_fftw_plan, c_data_buf, c_data_buf); - /* communicate (in is data_buf) */ - dfft_back_grid_comm(dfft_aaaa.plan[2], dfft_aaaa.back[2], dfft_aaaa.data_buf, data); - - /* ===== first direction ===== */ - FFT_TRACE(fprintf(stderr, "%d: fft_perform_back: dir 1:\n", this_node)); - /* perform FFT (in is data) */ - fftw_execute_dft(dfft_aaaa.back[1].our_fftw_plan, c_data, c_data); - /* throw away the (hopefully) empty complex component (in is data)*/ - for (i = 0; i < dfft_aaaa.plan[1].new_size; i++) { - dfft_aaaa.data_buf[i] = data[2 * i]; /* real value */ - // Vincent: - if (data[2 * i + 1] > 1e-5) { - printf("dipoar fft_aaaa - Complex value is not zero (i=%d,data=%g)!!!\n", i, - data[2 * i + 1]); - if (i > 100) - errexit(); - } - } - /* communicate (in is data_buf) */ - dfft_back_grid_comm(dfft_aaaa.plan[1], dfft_aaaa.back[1], dfft_aaaa.data_buf, data); - - /* REMARK: Result has to be in data. */ -} - -void dfft_forw_grid_comm(fft_forw_plan plan, double *in, double *out) { - int i; - MPI_Status status; - double *tmp_ptr; - - for (i = 0; i < plan.g_size; i++) { - plan.pack_function(in, dfft_aaaa.send_buf, &(plan.send_block[6 * i]), - &(plan.send_block[6 * i + 3]), plan.old_mesh, - plan.element); - - if (plan.group[i] < this_node) { /* send first, receive second */ - MPI_Send(dfft_aaaa.send_buf, plan.send_size[i], MPI_DOUBLE, plan.group[i], - REQ_FFT_FORW, comm_cart); - MPI_Recv(dfft_aaaa.recv_buf, plan.recv_size[i], MPI_DOUBLE, plan.group[i], - REQ_FFT_FORW, comm_cart, &status); - } else if (plan.group[i] > this_node) { /* receive first, send second */ - MPI_Recv(dfft_aaaa.recv_buf, plan.recv_size[i], MPI_DOUBLE, plan.group[i], - REQ_FFT_FORW, comm_cart, &status); - MPI_Send(dfft_aaaa.send_buf, plan.send_size[i], MPI_DOUBLE, plan.group[i], - REQ_FFT_FORW, comm_cart); - } else { /* Self communication... */ - tmp_ptr = dfft_aaaa.send_buf; - dfft_aaaa.send_buf = dfft_aaaa.recv_buf; - dfft_aaaa.recv_buf = tmp_ptr; - } - fft_unpack_block(dfft_aaaa.recv_buf, out, &(plan.recv_block[6 * i]), - &(plan.recv_block[6 * i + 3]), plan.new_mesh, - plan.element); - } -} - -void dfft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, - double *out) { - int i; - MPI_Status status; - double *tmp_ptr; - - /* Back means: Use the send/receive stuff from the forward plan but - replace the receive blocks by the send blocks and vice - versa. Attention then also new_mesh and old_mesh are exchanged */ - - for (i = 0; i < plan_f.g_size; i++) { - - plan_b.pack_function(in, dfft_aaaa.send_buf, &(plan_f.recv_block[6 * i]), - &(plan_f.recv_block[6 * i + 3]), plan_f.new_mesh, - plan_f.element); - - if (plan_f.group[i] < this_node) { /* send first, receive second */ - MPI_Send(dfft_aaaa.send_buf, plan_f.recv_size[i], MPI_DOUBLE, plan_f.group[i], - REQ_FFT_BACK, comm_cart); - MPI_Recv(dfft_aaaa.recv_buf, plan_f.send_size[i], MPI_DOUBLE, plan_f.group[i], - REQ_FFT_BACK, comm_cart, &status); - } else if (plan_f.group[i] > this_node) { /* receive first, send second */ - MPI_Recv(dfft_aaaa.recv_buf, plan_f.send_size[i], MPI_DOUBLE, plan_f.group[i], - REQ_FFT_BACK, comm_cart, &status); - MPI_Send(dfft_aaaa.send_buf, plan_f.recv_size[i], MPI_DOUBLE, plan_f.group[i], - REQ_FFT_BACK, comm_cart); - } else { /* Self communication... */ - tmp_ptr = dfft_aaaa.send_buf; - dfft_aaaa.send_buf = dfft_aaaa.recv_buf; - dfft_aaaa.recv_buf = tmp_ptr; - } - fft_unpack_block(dfft_aaaa.recv_buf, out, &(plan_f.send_block[6 * i]), - &(plan_f.send_block[6 * i + 3]), plan_f.old_mesh, - plan_f.element); - } -} - -#endif /* DP3M */ diff --git a/src/core/fft-dipolar.hpp b/src/core/fft-dipolar.hpp deleted file mode 100644 index 7cde2fcd45b..00000000000 --- a/src/core/fft-dipolar.hpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - Copyright (C) 2010-2018 The ESPResSo project - Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - Max-Planck-Institute for Polymer Research, Theory Group - - This file is part of ESPResSo. - - ESPResSo is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - ESPResSo is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef _FFT_MAGNETOSTATICS_H -#define _FFT_MAGNETOSTATICS_H - -/** \file - * - * Routines, row decomposition, data structures and communication for the - * 3D-FFT. - * - * The 3D-FFT is split into 3 ond dimensional FFTs. The data is - * distributed in such a way, that for the actual direction of the - * FFT each node has a certain number of rows for which it performs a - * 1D-FFT. After performing the FFT on that direction the data is - * redistributed. - * - * For simplicity at the moment I have implemented a full complex to - * complex FFT (even though a real to complex FFT would be - * sufficient) - * - * \todo Combine the forward and backward structures. - * \todo The packing routines could be moved to utils.hpp when they are needed - * elsewhere. - */ - -#include "config.hpp" -#ifdef DP3M - -#include "fft-common.hpp" - -extern fft_data_struct dfft_aaaa; - -/** \name Exported Functions */ -/************************************************************/ -/*@{*/ - -/** Initialize some arrays connected to the 3D-FFT. */ -void dfft_pre_init(); - -/** Initialize everything connected to the 3D-FFT related to the dipole-dipole. - - * \return Maximal size of local fft mesh (needed for allocation of ca_mesh). - * \param data Pointer Pointer to data array. - * \param ca_mesh_dim Pointer to CA mesh dimensions. - * \param ca_mesh_margin Pointer to CA mesh margins. - * \param global_mesh_dim Pointer to global CA mesh dimensions. - * \param global_mesh_off Pointer to global CA mesh offset. - * \param ks_pnum Pointer to number of permutations in k-space. - */ -int dfft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, - int *global_mesh_dim, double *global_mesh_off, int *ks_pnum); - -/** perform the forward 3D FFT for meshes related to the magnetic dipole-dipole - interaction. The assigned charges are in \a data. The result is also stored - in \a data. \warning The content of \a data is overwritten. \param data - DMesh. -*/ -void dfft_perform_forw(double *data); - -/** perform the backward 3D FFT for meshes related to the magnetic dipole-dipole - interaction. \warning The content of \a data is overwritten. \param data - DMesh. -*/ -void dfft_perform_back(double *data); - -#endif /* DP3M */ -#endif /* _FFT_MAGNETOSTATICS_H */ From a5d6dad523d1e8688327da5dbcc74cd8a24d564b Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Mon, 21 Jan 2019 17:34:21 +0100 Subject: [PATCH 6/9] core: Fixed comment regression --- .../p3m_gpu_cuda.cu | 2 +- src/core/fft-common.hpp | 4 ++-- src/core/fft.cpp | 20 +++++++++---------- .../fd-electrostatics_cuda.cu | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu b/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu index d716d09fc5b..57766d51af1 100644 --- a/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu +++ b/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu @@ -741,7 +741,7 @@ void p3m_gpu_init(int cao, int mesh[3], double alpha) { FFT_PLAN_FORW_FLAG) != CUFFT_SUCCESS || cufftPlan3d(&(p3m_gpu_fft_plans.back_plan), mesh[0], mesh[1], mesh[2], FFT_PLAN_BACK_FLAG) != CUFFT_SUCCESS) { - throw std::string("Unable to create fft_aaaa plan"); + throw std::string("Unable to create fft plan"); } } diff --git a/src/core/fft-common.hpp b/src/core/fft-common.hpp index a05283359e8..395acb35c6f 100644 --- a/src/core/fft-common.hpp +++ b/src/core/fft-common.hpp @@ -21,9 +21,9 @@ #ifndef _FFT_COMMON_H #define _FFT_COMMON_H -#include "Vector.hpp" #include "config.hpp" #if defined(P3M) || defined(DP3M) +#include "Vector.hpp" #include @@ -123,7 +123,7 @@ typedef struct { * DEFINES ************************************************/ -/* MPI tags for the fft_aaaa communications: */ +/* MPI tags for the fft communications: */ /** Tag for communication in fft_init() */ #define REQ_FFT_INIT 300 /** Tag for communication in forw_grid_comm() */ diff --git a/src/core/fft.cpp b/src/core/fft.cpp index dab3e448818..c062d6deff6 100755 --- a/src/core/fft.cpp +++ b/src/core/fft.cpp @@ -147,7 +147,7 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_m fft.plan[i].n_ffts = fft.plan[i].new_mesh[0] * fft.plan[i].new_mesh[1]; /* FFT_TRACE( printf("%d: comm_group( ",this_node )); */ - /* FFT_TRACE( for(j=0; j< fft_aaaa.plan[i].g_size; j++) printf("%d ")); */ + /* FFT_TRACE( for(j=0; j< fft.plan[i].g_size; j++) printf("%d ")); */ /* FFT_TRACE( printf(")\n")); */ /* === send/recv block specifications === */ @@ -210,7 +210,7 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_m fft.max_mesh_size = 2 * fft.plan[i].new_size; FFT_TRACE(fprintf(stderr, - "%d: fft_aaaa.max_comm_size = %d, fft_aaaa.max_mesh_size = %d\n", + "%d: fft.max_comm_size = %d, fft.max_mesh_size = %d\n", this_node, fft.max_comm_size, fft.max_mesh_size)); /* === pack function === */ @@ -315,7 +315,7 @@ void fft_perform_forw(double *data, fft_data_struct &fft) { for(m=0;m<8;m++) { for(n=0;n<8;n++) { for(o=0;o<8;o++) { - fprintf(stderr,"%.3f ",fft_aaaa.data_buf[i++]); + fprintf(stderr,"%.3f ",fft.data_buf[i++]); } fprintf(stderr,"\n"); } @@ -323,7 +323,7 @@ void fft_perform_forw(double *data, fft_data_struct &fft) { } */ - /* complexify the real data array (in is fft_aaaa.data_buf) */ + /* complexify the real data array (in is fft.data_buf) */ for (i = 0; i < fft.plan[1].new_size; i++) { data[2 * i] = fft.data_buf[i]; /* real value */ data[(2 * i) + 1] = 0; /* complex value */ @@ -334,15 +334,15 @@ void fft_perform_forw(double *data, fft_data_struct &fft) { FFT_TRACE(fprintf(stderr, "%d: fft_perform_forw: dir 2:\n", this_node)); /* communication to current dir row format (in is data) */ fft_forw_grid_comm(fft.plan[2], data, fft.data_buf, fft); - /* perform FFT (in/out is fft_aaaa.data_buf)*/ + /* perform FFT (in/out is fft.data_buf)*/ fftw_execute_dft(fft.plan[2].our_fftw_plan, c_data_buf, c_data_buf); /* ===== third direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_forw: dir 3:\n", this_node)); - /* communication to current dir row format (in is fft_aaaa.data_buf) */ + /* communication to current dir row format (in is fft.data_buf) */ fft_forw_grid_comm(fft.plan[3], fft.data_buf, data, fft); /* perform FFT (in/out is data)*/ fftw_execute_dft(fft.plan[3].our_fftw_plan, c_data, c_data); - // fft_print_global_fft_mesh(fft_aaaa.plan[3],data,1,0); + // fft_print_global_fft_mesh(fft.plan[3],data,1,0); /* REMARK: Result has to be in data. */ } @@ -363,9 +363,9 @@ void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft) { /* ===== second direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_back: dir 2:\n", this_node)); - /* perform FFT (in is fft_aaaa.data_buf) */ + /* perform FFT (in is fft.data_buf) */ fftw_execute_dft(fft.back[2].our_fftw_plan, c_data_buf, c_data_buf); - /* communicate (in is fft_aaaa.data_buf) */ + /* communicate (in is fft.data_buf) */ fft_back_grid_comm(fft.plan[2], fft.back[2], fft.data_buf, data, fft); /* ===== first direction ===== */ @@ -383,7 +383,7 @@ void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft) { errexit(); } } - /* communicate (in is fft_aaaa.data_buf) */ + /* communicate (in is fft.data_buf) */ fft_back_grid_comm(fft.plan[1], fft.back[1], fft.data_buf, data, fft); /* REMARK: Result has to be in data. */ diff --git a/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu b/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu index c9d45a656e7..2ba23d529f9 100644 --- a/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu +++ b/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu @@ -94,7 +94,7 @@ FdElectrostatics::FdElectrostatics(InputParameters inputParameters, if (cufftPlan3d(&plan_fft, parameters.dim_z, parameters.dim_y, parameters.dim_x, CUFFT_R2C) != CUFFT_SUCCESS) { - throw std::string("Unable to create fft_aaaa plan"); + throw std::string("Unable to create fft plan"); } if (cufftSetStream(plan_fft, cuda_stream) != CUFFT_SUCCESS) { From cfa17baa141ac3f2f12800ab6b7acf78c01c609a Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Mon, 21 Jan 2019 17:35:53 +0100 Subject: [PATCH 7/9] Formatting --- .../p3m-dipolar.cpp | 53 ++++++++++------- .../p3m-dipolar.hpp | 4 +- .../electrostatics_magnetostatics/p3m.cpp | 19 +++--- src/core/fft.cpp | 58 ++++++++++--------- src/core/fft.hpp | 5 +- 5 files changed, 78 insertions(+), 61 deletions(-) mode change 100755 => 100644 src/core/fft.cpp diff --git a/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp b/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp index 770110b4a98..1a1dda2ab96 100644 --- a/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp +++ b/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp @@ -394,9 +394,9 @@ void dp3m_init() { P3M_TRACE(fprintf(stderr, "%d: dp3m.rs_mesh ADR=%p\n", this_node, (void *)dp3m.rs_mesh)); - int ca_mesh_size = - fft_init(&dp3m.rs_mesh, dp3m.local_mesh.dim, dp3m.local_mesh.margin, - dp3m.params.mesh, dp3m.params.mesh_off, &dp3m.ks_pnum, dp3m.fft); + int ca_mesh_size = fft_init(&dp3m.rs_mesh, dp3m.local_mesh.dim, + dp3m.local_mesh.margin, dp3m.params.mesh, + dp3m.params.mesh_off, &dp3m.ks_pnum, dp3m.fft); dp3m.ks_mesh = Utils::realloc(dp3m.ks_mesh, ca_mesh_size * sizeof(double)); for (n = 0; n < 3; n++) @@ -453,7 +453,8 @@ double dp3m_average_dipolar_self_energy(double box_l, int mesh) { ind = (n[2] - dp3m.fft.plan[3].start[2]) + dp3m.fft.plan[3].new_mesh[2] * ((n[1] - dp3m.fft.plan[3].start[1]) + - (dp3m.fft.plan[3].new_mesh[1] * (n[0] - dp3m.fft.plan[3].start[0]))); + (dp3m.fft.plan[3].new_mesh[1] * + (n[0] - dp3m.fft.plan[3].start[0]))); if ((n[0] == 0) && (n[1] == 0) && (n[2] == 0)) node_phi += 0.0; @@ -1016,9 +1017,11 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { ind = 0; i = 0; - for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y - for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { // j[1]=n_z - for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { // j[2]=n_x + for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y + for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; + j[1]++) { // j[1]=n_z + for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; + j[2]++) { // j[2]=n_x // tmp0 = Re(mu)*k, tmp1 = Im(mu)*k tmp0 = dp3m.rs_mesh_dip[0][ind] * @@ -1053,11 +1056,11 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { - dp3m.rs_mesh[ind] = - dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * dp3m.ks_mesh[ind]; + dp3m.rs_mesh[ind] = dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * + dp3m.ks_mesh[ind]; ind++; - dp3m.rs_mesh[ind] = - dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * dp3m.ks_mesh[ind]; + dp3m.rs_mesh[ind] = dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * + dp3m.ks_mesh[ind]; ind++; } } @@ -1087,9 +1090,11 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { /* fill in ks_mesh array for force calculation */ ind = 0; i = 0; - for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y - for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { // j[1]=n_z - for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { // j[2]=n_x + for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y + for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; + j[1]++) { // j[1]=n_z + for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; + j[2]++) { // j[2]=n_x // tmp0 = Im(mu)*k, tmp1 = -Re(mu)*k tmp0 = dp3m.rs_mesh_dip[0][ind + 1] * dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] + @@ -1115,12 +1120,14 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { for (d = 0; d < 3; d++) { /* direction in k space: */ d_rs = (d + dp3m.ks_pnum) % 3; ind = 0; - for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y - for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { // j[1]=n_z + for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; + j[0]++) { // j[0]=n_y + for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; + j[1]++) { // j[1]=n_z for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { // j[2]=n_x - tmp0 = - dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * dp3m.ks_mesh[ind]; + tmp0 = dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * + dp3m.ks_mesh[ind]; dp3m.rs_mesh_dip[0][ind] = dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] * tmp0; dp3m.rs_mesh_dip[1][ind] = @@ -1128,8 +1135,8 @@ double dp3m_calc_kspace_forces(int force_flag, int energy_flag) { dp3m.rs_mesh_dip[2][ind] = dp3m.d_op[j[1] + dp3m.fft.plan[3].start[1]] * tmp0; ind++; - tmp0 = - dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * dp3m.ks_mesh[ind]; + tmp0 = dp3m.d_op[j[d] + dp3m.fft.plan[3].start[d]] * + dp3m.ks_mesh[ind]; dp3m.rs_mesh_dip[0][ind] = dp3m.d_op[j[2] + dp3m.fft.plan[3].start[2]] * tmp0; dp3m.rs_mesh_dip[1][ind] = @@ -1406,7 +1413,8 @@ void dp3m_calc_influence_function_force() { ind = (n[2] - dp3m.fft.plan[3].start[2]) + dp3m.fft.plan[3].new_mesh[2] * ((n[1] - dp3m.fft.plan[3].start[1]) + - (dp3m.fft.plan[3].new_mesh[1] * (n[0] - dp3m.fft.plan[3].start[0]))); + (dp3m.fft.plan[3].new_mesh[1] * + (n[0] - dp3m.fft.plan[3].start[0]))); if ((n[0] == 0) && (n[1] == 0) && (n[2] == 0)) dp3m.g_force[ind] = 0.0; @@ -1493,7 +1501,8 @@ void dp3m_calc_influence_function_energy() { ind = (n[2] - dp3m.fft.plan[3].start[2]) + dp3m.fft.plan[3].new_mesh[2] * ((n[1] - dp3m.fft.plan[3].start[1]) + - (dp3m.fft.plan[3].new_mesh[1] * (n[0] - dp3m.fft.plan[3].start[0]))); + (dp3m.fft.plan[3].new_mesh[1] * + (n[0] - dp3m.fft.plan[3].start[0]))); if ((n[0] == 0) && (n[1] == 0) && (n[2] == 0)) dp3m.g_energy[ind] = 0.0; diff --git a/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp b/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp index a8fcdba7340..1b34ceab928 100644 --- a/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp +++ b/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp @@ -40,9 +40,9 @@ #include "config.hpp" #ifdef DP3M +#include "fft.hpp" #include "nonbonded_interactions/nonbonded_interaction_data.hpp" #include "p3m-common.hpp" -#include "fft.hpp" #include "utils/math/AS_erfc_part.hpp" @@ -99,7 +99,7 @@ typedef struct { /* Stores the value of the energy correction due to MS effects */ double energy_correction; - + fft_data_struct fft; } dp3m_data_struct; diff --git a/src/core/electrostatics_magnetostatics/p3m.cpp b/src/core/electrostatics_magnetostatics/p3m.cpp index 754c54a6789..c78c507a2bd 100644 --- a/src/core/electrostatics_magnetostatics/p3m.cpp +++ b/src/core/electrostatics_magnetostatics/p3m.cpp @@ -366,8 +366,8 @@ void p3m_init() { (void *)p3m.rs_mesh)); int ca_mesh_size = - fft_init(&p3m.rs_mesh, p3m.local_mesh.dim, p3m.local_mesh.margin, - p3m.params.mesh, p3m.params.mesh_off, &p3m.ks_pnum, p3m.fft); + fft_init(&p3m.rs_mesh, p3m.local_mesh.dim, p3m.local_mesh.margin, + p3m.params.mesh, p3m.params.mesh_off, &p3m.ks_pnum, p3m.fft); p3m.ks_mesh = Utils::realloc(p3m.ks_mesh, ca_mesh_size * sizeof(double)); P3M_TRACE(fprintf(stderr, "%d: p3m.rs_mesh ADR=%p\n", this_node, @@ -904,7 +904,8 @@ double p3m_calc_kspace_forces(int force_flag, int energy_flag) { } } /* Back FFT force component mesh */ - fft_perform_back(p3m.rs_mesh, /* check_complex */ !p3m.params.tuning, p3m.fft); + fft_perform_back(p3m.rs_mesh, /* check_complex */ !p3m.params.tuning, + p3m.fft); /* redistribute force component mesh */ p3m_spread_force_grid(p3m.rs_mesh); /* Assign force component from mesh to particle */ @@ -1198,10 +1199,11 @@ template void calc_influence_function_force() { for (n[0] = p3m.fft.plan[3].start[0]; n[0] < end[0]; n[0]++) { for (n[1] = p3m.fft.plan[3].start[1]; n[1] < end[1]; n[1]++) { for (n[2] = p3m.fft.plan[3].start[2]; n[2] < end[2]; n[2]++) { - ind = (n[2] - p3m.fft.plan[3].start[2]) + - p3m.fft.plan[3].new_mesh[2] * - ((n[1] - p3m.fft.plan[3].start[1]) + - (p3m.fft.plan[3].new_mesh[1] * (n[0] - p3m.fft.plan[3].start[0]))); + ind = + (n[2] - p3m.fft.plan[3].start[2]) + + p3m.fft.plan[3].new_mesh[2] * ((n[1] - p3m.fft.plan[3].start[1]) + + (p3m.fft.plan[3].new_mesh[1] * + (n[0] - p3m.fft.plan[3].start[0]))); if ((n[KX] % (p3m.params.mesh[RX] / 2) == 0) && (n[KY] % (p3m.params.mesh[RY] / 2) == 0) && @@ -1319,7 +1321,8 @@ template void calc_influence_function_energy() { for (n[0] = start[0]; n[0] < end[0]; n[0]++) { for (n[1] = start[1]; n[1] < end[1]; n[1]++) { for (n[2] = start[2]; n[2] < end[2]; n[2]++) { - ind = (n[2] - start[2]) + p3m.fft.plan[3].new_mesh[2] * (n[1] - start[1]) + + ind = (n[2] - start[2]) + + p3m.fft.plan[3].new_mesh[2] * (n[1] - start[1]) + p3m.fft.plan[3].new_mesh[2] * p3m.fft.plan[3].new_mesh[1] * (n[0] - start[0]); if ((n[KX] % (p3m.params.mesh[RX] / 2) == 0) && diff --git a/src/core/fft.cpp b/src/core/fft.cpp old mode 100755 new mode 100644 index c062d6deff6..6b38d2593bf --- a/src/core/fft.cpp +++ b/src/core/fft.cpp @@ -44,7 +44,8 @@ using Utils::permute_ifield; * \param in input mesh. * \param out output mesh. */ -static void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out, fft_data_struct &fft); +static void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out, + fft_data_struct &fft); /** communicate the grid data according to the given * fft_forw_plan/fft_bakc_plan. \param plan_f communication plan (see \ref @@ -52,11 +53,12 @@ static void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out, fft_ * \param in input mesh. * \param out output mesh. */ -static void -fft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, double *out, fft_data_struct &fft); +static void fft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, + double *in, double *out, fft_data_struct &fft); -int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_mesh_dim, double *global_mesh_off, - int *ks_pnum, fft_data_struct &fft) { +int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, + int *global_mesh_dim, double *global_mesh_off, int *ks_pnum, + fft_data_struct &fft) { int i, j; /* helpers */ int mult[3]; @@ -110,9 +112,9 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_m fft.plan[0].new_mesh[i] = ca_mesh_dim[i]; for (i = 1; i < 4; i++) { fft.plan[i].g_size = fft_find_comm_groups( - {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, - {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], - fft.plan[i].group, n_pos[i], my_pos[i]); + {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, + {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], + fft.plan[i].group, n_pos[i], my_pos[i]); if (fft.plan[i].g_size == -1) { /* try permutation */ j = n_grid[i][(fft.plan[i].row_dir + 1) % 3]; @@ -120,9 +122,9 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_m n_grid[i][(fft.plan[i].row_dir + 2) % 3]; n_grid[i][(fft.plan[i].row_dir + 2) % 3] = j; fft.plan[i].g_size = fft_find_comm_groups( - {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, - {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], - fft.plan[i].group, n_pos[i], my_pos[i]); + {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, + {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1], n_id[i], + fft.plan[i].group, n_pos[i], my_pos[i]); if (fft.plan[i].g_size == -1) { fprintf(stderr, "%d: INTERNAL ERROR: fft_find_comm_groups error\n", this_node); @@ -131,17 +133,17 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_m } fft.plan[i].send_block = Utils::realloc( - fft.plan[i].send_block, 6 * fft.plan[i].g_size * sizeof(int)); + fft.plan[i].send_block, 6 * fft.plan[i].g_size * sizeof(int)); fft.plan[i].send_size = Utils::realloc( - fft.plan[i].send_size, 1 * fft.plan[i].g_size * sizeof(int)); + fft.plan[i].send_size, 1 * fft.plan[i].g_size * sizeof(int)); fft.plan[i].recv_block = Utils::realloc( - fft.plan[i].recv_block, 6 * fft.plan[i].g_size * sizeof(int)); + fft.plan[i].recv_block, 6 * fft.plan[i].g_size * sizeof(int)); fft.plan[i].recv_size = Utils::realloc( - fft.plan[i].recv_size, 1 * fft.plan[i].g_size * sizeof(int)); + fft.plan[i].recv_size, 1 * fft.plan[i].g_size * sizeof(int)); fft.plan[i].new_size = fft_calc_local_mesh( - my_pos[i], n_grid[i], global_mesh_dim, global_mesh_off, - fft.plan[i].new_mesh, fft.plan[i].start); + my_pos[i], n_grid[i], global_mesh_dim, global_mesh_off, + fft.plan[i].new_mesh, fft.plan[i].start); permute_ifield(fft.plan[i].new_mesh, 3, -(fft.plan[i].n_permute)); permute_ifield(fft.plan[i].start, 3, -(fft.plan[i].n_permute)); fft.plan[i].n_ffts = fft.plan[i].new_mesh[0] * fft.plan[i].new_mesh[1]; @@ -257,9 +259,9 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_m if (fft.init_tag == 1) fftw_destroy_plan(fft.plan[i].our_fftw_plan); fft.plan[i].our_fftw_plan = fftw_plan_many_dft( - 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1, - fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2], - fft.plan[i].dir, FFTW_PATIENT); + 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1, + fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2], + fft.plan[i].dir, FFTW_PATIENT); fft.plan[i].fft_function = fftw_execute; } @@ -272,9 +274,9 @@ int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_m if (fft.init_tag == 1) fftw_destroy_plan(fft.back[i].our_fftw_plan); fft.back[i].our_fftw_plan = fftw_plan_many_dft( - 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1, - fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2], - fft.back[i].dir, FFTW_PATIENT); + 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1, + fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2], + fft.back[i].dir, FFTW_PATIENT); fft.back[i].fft_function = fftw_execute; fft.back[i].pack_function = fft_pack_block_permute1; @@ -304,7 +306,7 @@ void fft_perform_forw(double *data, fft_data_struct &fft) { FFT_TRACE(fprintf(stderr, "%d: fft_perform_forw: dir 1:\n", this_node)); fftw_complex *c_data = (fftw_complex *)data; - fftw_complex *c_data_buf = (fftw_complex *) fft.data_buf; + fftw_complex *c_data_buf = (fftw_complex *)fft.data_buf; /* communication to current dir row format (in is data) */ fft_forw_grid_comm(fft.plan[1], data, fft.data_buf, fft); @@ -351,7 +353,7 @@ void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft) { int i; fftw_complex *c_data = (fftw_complex *)data; - fftw_complex *c_data_buf = (fftw_complex *) fft.data_buf; + fftw_complex *c_data_buf = (fftw_complex *)fft.data_buf; /* ===== third direction ===== */ FFT_TRACE(fprintf(stderr, "%d: fft_perform_back: dir 3:\n", this_node)); @@ -389,7 +391,8 @@ void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft) { /* REMARK: Result has to be in data. */ } -void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out, fft_data_struct &fft) { +void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out, + fft_data_struct &fft) { int i; MPI_Status status; double *tmp_ptr; @@ -420,7 +423,8 @@ void fft_forw_grid_comm(fft_forw_plan plan, double *in, double *out, fft_data_st } } -void fft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, double *out, fft_data_struct &fft) { +void fft_back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, double *in, + double *out, fft_data_struct &fft) { int i; MPI_Status status; double *tmp_ptr; diff --git a/src/core/fft.hpp b/src/core/fft.hpp index c14886a7fa0..8a244d62fde 100644 --- a/src/core/fft.hpp +++ b/src/core/fft.hpp @@ -61,8 +61,9 @@ * \param global_mesh_off Pointer to global CA mesh offset. * \param ks_pnum Pointer to number of permutations in k-space. */ -int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, int *global_mesh_dim, double *global_mesh_off, - int *ks_pnum, fft_data_struct &fft); +int fft_init(double **data, int *ca_mesh_dim, int *ca_mesh_margin, + int *global_mesh_dim, double *global_mesh_off, int *ks_pnum, + fft_data_struct &fft); /** perform the forward 3D FFT. The assigned charges are in \a data. The result is also stored in \a data. From 9478c79022b4ff1c619149412c0cd3123d96ff89 Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Mon, 21 Jan 2019 21:52:05 +0100 Subject: [PATCH 8/9] core: Fixed build w/o fftw --- src/core/electrostatics_magnetostatics/icc.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/core/electrostatics_magnetostatics/icc.cpp b/src/core/electrostatics_magnetostatics/icc.cpp index 1e9e452abfe..c7c642ed6f8 100644 --- a/src/core/electrostatics_magnetostatics/icc.cpp +++ b/src/core/electrostatics_magnetostatics/icc.cpp @@ -24,6 +24,10 @@ file \ref icc.hpp. */ +#include "icc.hpp" + +#ifdef ELECTROSTATICS + #include #include #include @@ -35,7 +39,6 @@ #include "electrostatics_magnetostatics/mmm2d.hpp" #include "electrostatics_magnetostatics/p3m.hpp" #include "electrostatics_magnetostatics/p3m_gpu.hpp" -#include "icc.hpp" #include "cells.hpp" #include "communication.hpp" @@ -45,12 +48,11 @@ #include "initialize.hpp" #include "nonbonded_interactions/nonbonded_interaction_data.hpp" #include "particle_data.hpp" +#include "debug.hpp" #include "short_range_loop.hpp" #include "utils/NoOp.hpp" -#ifdef ELECTROSTATICS - iccp3m_struct iccp3m_cfg; /* functions that are used in icc* to compute the electric field acting on the From a25ea547845a6c2c1513b50c435aa98447bdf6af Mon Sep 17 00:00:00 2001 From: Florian Weik Date: Tue, 22 Jan 2019 10:42:47 +0100 Subject: [PATCH 9/9] Formatting --- src/core/electrostatics_magnetostatics/icc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/electrostatics_magnetostatics/icc.cpp b/src/core/electrostatics_magnetostatics/icc.cpp index c7c642ed6f8..51e18a5982b 100644 --- a/src/core/electrostatics_magnetostatics/icc.cpp +++ b/src/core/electrostatics_magnetostatics/icc.cpp @@ -43,12 +43,12 @@ #include "cells.hpp" #include "communication.hpp" #include "config.hpp" +#include "debug.hpp" #include "forces.hpp" #include "global.hpp" #include "initialize.hpp" #include "nonbonded_interactions/nonbonded_interaction_data.hpp" #include "particle_data.hpp" -#include "debug.hpp" #include "short_range_loop.hpp" #include "utils/NoOp.hpp"