FFT refactoring (#4985)

Fixes #4963 Follow-up to #4969 Description of changes: - separate FFT backend from FFT buffers with type erasure - extract FFT packing functions
espressomd · Aug 21, 2024 · 030d3c8 · 030d3c8
2 parents 8c90c3d + 6d08c8b
commit 030d3c8
Show file tree

Hide file tree

Showing 25 changed files with 752 additions and 491 deletions.
diff --git a/src/core/electrostatics/coulomb.cpp b/src/core/electrostatics/coulomb.cpp
@@ -145,7 +145,7 @@ Solver::calc_pressure_long_range(ParticleRange const &particles) const {
 struct ShortRangeCutoff {
 #ifdef P3M
   auto operator()(std::shared_ptr<CoulombP3M> const &actor) const {
-    return actor->p3m.params.r_cut;
+    return actor->p3m_params.r_cut;
   }
   auto
   operator()(std::shared_ptr<ElectrostaticLayerCorrection> const &actor) const {

diff --git a/src/core/electrostatics/elc.cpp b/src/core/electrostatics/elc.cpp
@@ -1011,7 +1011,8 @@ void ElectrostaticLayerCorrection::adapt_solver() {
   std::visit(
       [this](auto &solver) {
         set_prefactor(solver->prefactor);
-        solver->p3m.params.epsilon = P3M_EPSILON_METALLIC;
+        solver->adapt_epsilon_elc();
+        assert(solver->p3m_params.epsilon == P3M_EPSILON_METALLIC);
       },
       base_solver);
 }
@@ -1031,7 +1032,7 @@ void ElectrostaticLayerCorrection::recalc_box_h() {
 void ElectrostaticLayerCorrection::recalc_space_layer() {
   if (elc.dielectric_contrast_on) {
     auto const p3m_r_cut = std::visit(
-        [](auto &solver) { return solver->p3m.params.r_cut; }, base_solver);
+        [](auto &solver) { return solver->p3m_params.r_cut; }, base_solver);
     // recalculate the space layer size:
     // 1. set the space_layer to be 1/3 of the gap size, so that box = layer
     elc.space_layer = (1. / 3.) * elc.gap_size;

diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp
@@ -90,6 +90,10 @@
 #include <tuple>
 #include <utility>
 
+#ifdef FFTW3_H
+#error "The FFTW3 library shouldn't be visible in this translation unit"
+#endif
+
 template <typename FloatType, Arch Architecture>
 void CoulombP3MImpl<FloatType, Architecture>::count_charged_particles() {
   auto local_n = 0;
@@ -262,8 +266,11 @@ void CoulombP3MImpl<FloatType, Architecture>::init_cpu_kernels() {
 
   assert(p3m.fft);
   p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer);
-  p3m.fft->init_halo();
-  p3m.fft->init_fft();
+  p3m.fft_buffers->init_halo();
+  p3m.fft->init(p3m.params);
+  p3m.mesh.ks_pnum = p3m.fft->get_ks_pnum();
+  p3m.fft_buffers->init_meshes(p3m.fft->get_ca_mesh_size());
+  p3m.update_mesh_views();
   p3m.calc_differential_operator();
 
   /* fix box length dependent constants */
@@ -391,8 +398,9 @@ Utils::Vector9d CoulombP3MImpl<FloatType, Architecture>::long_range_pressure(
 
   if (p3m.sum_q2 > 0.) {
     charge_assign(particles);
-    p3m.fft->perform_scalar_halo_gather();
-    p3m.fft->perform_scalar_fwd_fft();
+    p3m.fft_buffers->perform_scalar_halo_gather();
+    p3m.fft->forward_fft(p3m.fft_buffers->get_scalar_mesh());
+    p3m.update_mesh_views();
 
     auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
     auto const &mesh_stop = p3m.mesh.size;
@@ -457,8 +465,9 @@ double CoulombP3MImpl<FloatType, Architecture>::long_range_kernel(
             system.coulomb.impl->solver)) {
       charge_assign(particles);
     }
-    p3m.fft->perform_scalar_halo_gather();
-    p3m.fft->perform_scalar_fwd_fft();
+    p3m.fft_buffers->perform_scalar_halo_gather();
+    p3m.fft->forward_fft(p3m.fft_buffers->get_scalar_mesh());
+    p3m.update_mesh_views();
   }
 
   auto p_q_range = ParticlePropertyRange::charge_range(particles);
@@ -515,8 +524,10 @@ double CoulombP3MImpl<FloatType, Architecture>::long_range_kernel(
     auto const check_residuals =
         not p3m.params.tuning and check_complex_residuals;
     p3m.fft->check_complex_residuals = check_residuals;
-    p3m.fft->perform_vector_back_fft();
-    p3m.fft->perform_vector_halo_spread();
+    for (auto &rs_mesh : p3m.fft_buffers->get_vector_mesh()) {
+      p3m.fft->backward_fft(rs_mesh);
+    }
+    p3m.fft_buffers->perform_vector_halo_spread();
     p3m.fft->check_complex_residuals = false;
 
     auto const force_prefac = prefactor / volume;
@@ -785,25 +796,25 @@ void CoulombP3M::sanity_checks_boxl() const {
   auto const &local_geo = *system.local_geo;
   for (auto i = 0u; i < 3u; i++) {
     /* check k-space cutoff */
-    if (p3m.params.cao_cut[i] >= box_geo.length_half()[i]) {
+    if (p3m_params.cao_cut[i] >= box_geo.length_half()[i]) {
       std::stringstream msg;
-      msg << "P3M_init: k-space cutoff " << p3m.params.cao_cut[i]
+      msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i]
           << " is larger than half of box dimension " << box_geo.length()[i];
       throw std::runtime_error(msg.str());
     }
-    if (p3m.params.cao_cut[i] >= local_geo.length()[i]) {
+    if (p3m_params.cao_cut[i] >= local_geo.length()[i]) {
       std::stringstream msg;
-      msg << "P3M_init: k-space cutoff " << p3m.params.cao_cut[i]
+      msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i]
           << " is larger than local box dimension " << local_geo.length()[i];
       throw std::runtime_error(msg.str());
     }
   }
 
-  if (p3m.params.epsilon != P3M_EPSILON_METALLIC) {
+  if (p3m_params.epsilon != P3M_EPSILON_METALLIC) {
     if ((box_geo.length()[0] != box_geo.length()[1]) or
         (box_geo.length()[1] != box_geo.length()[2]) or
-        (p3m.params.mesh[0] != p3m.params.mesh[1]) or
-        (p3m.params.mesh[1] != p3m.params.mesh[2])) {
+        (p3m_params.mesh[0] != p3m_params.mesh[1]) or
+        (p3m_params.mesh[1] != p3m_params.mesh[2])) {
       throw std::runtime_error(
           "CoulombP3M: non-metallic epsilon requires cubic box");
     }
@@ -841,7 +852,8 @@ void CoulombP3M::sanity_checks_node_grid() const {
   }
 }
 
-void CoulombP3M::scaleby_box_l() {
+template <typename FloatType, Arch Architecture>
+void CoulombP3MImpl<FloatType, Architecture>::scaleby_box_l() {
   auto const &box_geo = *get_system().box_geo;
   p3m.params.r_cut = p3m.params.r_cut_iL * box_geo.length()[0];
   p3m.params.alpha = p3m.params.alpha_L * box_geo.length_inv()[0];

diff --git a/src/core/electrostatics/p3m.hpp b/src/core/electrostatics/p3m.hpp
@@ -50,38 +50,19 @@
 
 #include <cmath>
 #include <numbers>
-#include <stdexcept>
 
 /** @brief P3M solver. */
 struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
-  /** @brief Coulomb P3M parameters. */
-  p3m_data_struct &p3m;
-
-  int tune_timings;
-  bool tune_verbose;
-  bool check_complex_residuals;
-
-protected:
-  bool m_is_tuned;
+  P3MParameters const &p3m_params;
 
 public:
-  CoulombP3M(p3m_data_struct &p3m_data, double prefactor, int tune_timings,
-             bool tune_verbose, bool check_complex_residuals)
-      : p3m{p3m_data}, tune_timings{tune_timings}, tune_verbose{tune_verbose},
-        check_complex_residuals{check_complex_residuals} {
-
-    if (tune_timings <= 0) {
-      throw std::domain_error("Parameter 'timings' must be > 0");
-    }
-    m_is_tuned = not p3m.params.tuning;
-    p3m.params.tuning = false;
-    set_prefactor(prefactor);
-  }
+  CoulombP3M(P3MParameters const &p3m_params) : p3m_params{p3m_params} {}
 
   virtual ~CoulombP3M() = default;
 
-  [[nodiscard]] bool is_tuned() const { return m_is_tuned; }
+  [[nodiscard]] virtual bool is_tuned() const noexcept = 0;
   [[nodiscard]] virtual bool is_gpu() const noexcept = 0;
+  [[nodiscard]] virtual bool is_double_precision() const noexcept = 0;
 
   /** @brief Recalculate all derived parameters. */
   virtual void init() = 0;
@@ -109,6 +90,7 @@ struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
    */
   virtual void count_charged_particles() = 0;
   virtual void count_charged_particles_elc(int, double, double) = 0;
+  virtual void adapt_epsilon_elc() = 0;
 
   /**
    * @brief Tune P3M parameters to desired accuracy.
@@ -119,7 +101,7 @@ struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
    * @ref P3MParameters::r_cut_iL "r_cut_iL" and
    * @ref P3MParameters::alpha_L "alpha_L" are tuned to obtain the target
    * @ref P3MParameters::accuracy "accuracy" in optimal time.
-   * These parameters are stored in the @ref p3m object.
+   * These parameters are stored in the @ref p3m_params object.
    *
    * The function utilizes the analytic expression of the error estimate
    * for the P3M method in @cite hockney88a (eq. (8.23)) in
@@ -163,10 +145,10 @@ struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
   /** Calculate real-space contribution of p3m Coulomb pair forces. */
   Utils::Vector3d pair_force(double q1q2, Utils::Vector3d const &d,
                              double dist) const {
-    if ((q1q2 == 0.) || dist >= p3m.params.r_cut || dist <= 0.) {
+    if ((q1q2 == 0.) || dist >= p3m_params.r_cut || dist <= 0.) {
       return {};
     }
-    auto const alpha = p3m.params.alpha;
+    auto const alpha = p3m_params.alpha;
     auto const adist = alpha * dist;
     auto const exp_adist_sq = exp(-adist * adist);
     auto const dist_sq = dist * dist;
@@ -184,10 +166,10 @@ struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
   /** Calculate real-space contribution of Coulomb pair energy. */
   // Eq. (3.6) @cite deserno00b
   double pair_energy(double q1q2, double dist) const {
-    if ((q1q2 == 0.) || dist >= p3m.params.r_cut || dist <= 0.) {
+    if ((q1q2 == 0.) || dist >= p3m_params.r_cut || dist <= 0.) {
       return {};
     }
-    auto const adist = p3m.params.alpha * dist;
+    auto const adist = p3m_params.alpha * dist;
 #if USE_ERFC_APPROXIMATION
     auto const erfc_part_ri = Utils::AS_erfc_part(adist) / dist;
     return prefactor * q1q2 * erfc_part_ri * exp(-adist * adist);
@@ -216,7 +198,7 @@ struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
   void sanity_checks_periodicity() const;
   void sanity_checks_cell_structure() const;
 
-  virtual void scaleby_box_l();
+  virtual void scaleby_box_l() = 0;
 };
 
 #endif // P3M
diff --git a/src/core/electrostatics/p3m.impl.hpp b/src/core/electrostatics/p3m.impl.hpp
@@ -19,19 +19,6 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/** @file
- *  P3M algorithm for long-range Coulomb interaction.
- *
- *  We use a P3M (Particle-Particle Particle-Mesh) method based on the
- *  Ewald summation. Details of the used method can be found in
- *  @cite hockney88a and @cite deserno98a @cite deserno98b.
- *
- *  Further reading: @cite ewald21a, @cite hockney88a, @cite deserno98a,
- *  @cite deserno98b, @cite deserno00e, @cite deserno00b, @cite cerda08d.
- *
- *  Implementation in p3m.cpp.
- */
-
 #pragma once
 
 #include "config/config.hpp"
@@ -49,11 +36,13 @@
 #include <utils/Vector.hpp>
 
 #include <memory>
+#include <stdexcept>
+#include <type_traits>
 #include <utility>
 
 template <typename FloatType>
-struct p3m_data_struct_coulomb : public p3m_data_struct_fft<FloatType> {
-  using p3m_data_struct_fft<FloatType>::p3m_data_struct_fft;
+struct p3m_data_struct_coulomb : public p3m_data_struct<FloatType> {
+  using p3m_data_struct<FloatType>::p3m_data_struct;
 
   /** number of charged particles (only on head node). */
   int sum_qpart = 0;
@@ -73,17 +62,34 @@ template <typename FloatType, Arch Architecture>
 struct CoulombP3MImpl : public CoulombP3M {
   ~CoulombP3MImpl() override = default;
 
+  /** @brief Coulomb P3M parameters. */
+  p3m_data_struct_coulomb<FloatType> &p3m;
+
+private:
   /** @brief Coulomb P3M meshes and FFT algorithm. */
   std::unique_ptr<p3m_data_struct_coulomb<FloatType>> p3m_impl;
-  // this member overshadows its own base class pointer in the parent class
-  p3m_data_struct_coulomb<FloatType> &p3m;
+  int tune_timings;
+  bool tune_verbose;
+  bool check_complex_residuals;
+  bool m_is_tuned;
 
-  template <typename... Args>
+public:
   CoulombP3MImpl(
       std::unique_ptr<p3m_data_struct_coulomb<FloatType>> &&p3m_handle,
-      Args &&...args)
-      : CoulombP3M(*p3m_handle, std::forward<Args>(args)...),
-        p3m_impl{std::move(p3m_handle)}, p3m{*p3m_impl} {}
+      double prefactor, int tune_timings, bool tune_verbose,
+      bool check_complex_residuals)
+      : CoulombP3M(p3m_handle->params), p3m{*p3m_handle},
+        p3m_impl{std::move(p3m_handle)}, tune_timings{tune_timings},
+        tune_verbose{tune_verbose},
+        check_complex_residuals{check_complex_residuals} {
+
+    if (tune_timings <= 0) {
+      throw std::domain_error("Parameter 'timings' must be > 0");
+    }
+    m_is_tuned = not p3m.params.tuning;
+    p3m.params.tuning = false;
+    set_prefactor(prefactor);
+  }
 
   void init() override {
     if constexpr (Architecture == Arch::CPU) {
@@ -103,10 +109,17 @@ struct CoulombP3MImpl : public CoulombP3M {
     p3m.sum_q2 = sum_q2;
     p3m.square_sum_q = square_sum_q;
   }
+  void adapt_epsilon_elc() override {
+    p3m.params.epsilon = P3M_EPSILON_METALLIC;
+  }
 
+  [[nodiscard]] bool is_tuned() const noexcept override { return m_is_tuned; }
   [[nodiscard]] bool is_gpu() const noexcept override {
     return Architecture == Arch::GPU;
   }
+  [[nodiscard]] bool is_double_precision() const noexcept override {
+    return std::is_same_v<FloatType, double>;
+  }
 
   void on_activation() override {
 #ifdef CUDA
@@ -160,6 +173,7 @@ struct CoulombP3MImpl : public CoulombP3M {
                            ParticleRange const &particles);
   void calc_influence_function_force() override;
   void calc_influence_function_energy() override;
+  void scaleby_box_l() override;
   void init_cpu_kernels();
 #ifdef CUDA
   void init_gpu_kernels();
@@ -170,13 +184,15 @@ struct CoulombP3MImpl : public CoulombP3M {
 };
 
 template <typename FloatType, Arch Architecture,
-          template <typename> class FFTBackendImpl, class... Args>
+          template <typename> class FFTBackendImpl,
+          template <typename> class P3MFFTMeshImpl, class... Args>
 std::shared_ptr<CoulombP3M> new_p3m_handle(P3MParameters &&p3m,
                                            Args &&...args) {
   auto obj = std::make_shared<CoulombP3MImpl<FloatType, Architecture>>(
       std::make_unique<p3m_data_struct_coulomb<FloatType>>(std::move(p3m)),
       std::forward<Args>(args)...);
-  obj->p3m.template make_fft_instance<FFTBackendImpl<FloatType>>(false);
+  obj->p3m.template make_mesh_instance<P3MFFTMeshImpl<FloatType>>();
+  obj->p3m.template make_fft_instance<FFTBackendImpl<FloatType>>();
   return obj;
 }