Skip to content

Commit

Permalink
FFT refactoring (#4985)
Browse files Browse the repository at this point in the history
Fixes #4963
Follow-up to #4969

Description of changes:
- separate FFT backend from FFT buffers with type erasure
- extract FFT packing functions
  • Loading branch information
kodiakhq[bot] authored Aug 21, 2024
2 parents 8c90c3d + 6d08c8b commit 030d3c8
Show file tree
Hide file tree
Showing 25 changed files with 752 additions and 491 deletions.
2 changes: 1 addition & 1 deletion src/core/electrostatics/coulomb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ Solver::calc_pressure_long_range(ParticleRange const &particles) const {
struct ShortRangeCutoff {
#ifdef P3M
auto operator()(std::shared_ptr<CoulombP3M> const &actor) const {
return actor->p3m.params.r_cut;
return actor->p3m_params.r_cut;
}
auto
operator()(std::shared_ptr<ElectrostaticLayerCorrection> const &actor) const {
Expand Down
5 changes: 3 additions & 2 deletions src/core/electrostatics/elc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1011,7 +1011,8 @@ void ElectrostaticLayerCorrection::adapt_solver() {
std::visit(
[this](auto &solver) {
set_prefactor(solver->prefactor);
solver->p3m.params.epsilon = P3M_EPSILON_METALLIC;
solver->adapt_epsilon_elc();
assert(solver->p3m_params.epsilon == P3M_EPSILON_METALLIC);
},
base_solver);
}
Expand All @@ -1031,7 +1032,7 @@ void ElectrostaticLayerCorrection::recalc_box_h() {
void ElectrostaticLayerCorrection::recalc_space_layer() {
if (elc.dielectric_contrast_on) {
auto const p3m_r_cut = std::visit(
[](auto &solver) { return solver->p3m.params.r_cut; }, base_solver);
[](auto &solver) { return solver->p3m_params.r_cut; }, base_solver);
// recalculate the space layer size:
// 1. set the space_layer to be 1/3 of the gap size, so that box = layer
elc.space_layer = (1. / 3.) * elc.gap_size;
Expand Down
44 changes: 28 additions & 16 deletions src/core/electrostatics/p3m.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@
#include <tuple>
#include <utility>

#ifdef FFTW3_H
#error "The FFTW3 library shouldn't be visible in this translation unit"
#endif

template <typename FloatType, Arch Architecture>
void CoulombP3MImpl<FloatType, Architecture>::count_charged_particles() {
auto local_n = 0;
Expand Down Expand Up @@ -262,8 +266,11 @@ void CoulombP3MImpl<FloatType, Architecture>::init_cpu_kernels() {

assert(p3m.fft);
p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer);
p3m.fft->init_halo();
p3m.fft->init_fft();
p3m.fft_buffers->init_halo();
p3m.fft->init(p3m.params);
p3m.mesh.ks_pnum = p3m.fft->get_ks_pnum();
p3m.fft_buffers->init_meshes(p3m.fft->get_ca_mesh_size());
p3m.update_mesh_views();
p3m.calc_differential_operator();

/* fix box length dependent constants */
Expand Down Expand Up @@ -391,8 +398,9 @@ Utils::Vector9d CoulombP3MImpl<FloatType, Architecture>::long_range_pressure(

if (p3m.sum_q2 > 0.) {
charge_assign(particles);
p3m.fft->perform_scalar_halo_gather();
p3m.fft->perform_scalar_fwd_fft();
p3m.fft_buffers->perform_scalar_halo_gather();
p3m.fft->forward_fft(p3m.fft_buffers->get_scalar_mesh());
p3m.update_mesh_views();

auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
auto const &mesh_stop = p3m.mesh.size;
Expand Down Expand Up @@ -457,8 +465,9 @@ double CoulombP3MImpl<FloatType, Architecture>::long_range_kernel(
system.coulomb.impl->solver)) {
charge_assign(particles);
}
p3m.fft->perform_scalar_halo_gather();
p3m.fft->perform_scalar_fwd_fft();
p3m.fft_buffers->perform_scalar_halo_gather();
p3m.fft->forward_fft(p3m.fft_buffers->get_scalar_mesh());
p3m.update_mesh_views();
}

auto p_q_range = ParticlePropertyRange::charge_range(particles);
Expand Down Expand Up @@ -515,8 +524,10 @@ double CoulombP3MImpl<FloatType, Architecture>::long_range_kernel(
auto const check_residuals =
not p3m.params.tuning and check_complex_residuals;
p3m.fft->check_complex_residuals = check_residuals;
p3m.fft->perform_vector_back_fft();
p3m.fft->perform_vector_halo_spread();
for (auto &rs_mesh : p3m.fft_buffers->get_vector_mesh()) {
p3m.fft->backward_fft(rs_mesh);
}
p3m.fft_buffers->perform_vector_halo_spread();
p3m.fft->check_complex_residuals = false;

auto const force_prefac = prefactor / volume;
Expand Down Expand Up @@ -785,25 +796,25 @@ void CoulombP3M::sanity_checks_boxl() const {
auto const &local_geo = *system.local_geo;
for (auto i = 0u; i < 3u; i++) {
/* check k-space cutoff */
if (p3m.params.cao_cut[i] >= box_geo.length_half()[i]) {
if (p3m_params.cao_cut[i] >= box_geo.length_half()[i]) {
std::stringstream msg;
msg << "P3M_init: k-space cutoff " << p3m.params.cao_cut[i]
msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i]
<< " is larger than half of box dimension " << box_geo.length()[i];
throw std::runtime_error(msg.str());
}
if (p3m.params.cao_cut[i] >= local_geo.length()[i]) {
if (p3m_params.cao_cut[i] >= local_geo.length()[i]) {
std::stringstream msg;
msg << "P3M_init: k-space cutoff " << p3m.params.cao_cut[i]
msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i]
<< " is larger than local box dimension " << local_geo.length()[i];
throw std::runtime_error(msg.str());
}
}

if (p3m.params.epsilon != P3M_EPSILON_METALLIC) {
if (p3m_params.epsilon != P3M_EPSILON_METALLIC) {
if ((box_geo.length()[0] != box_geo.length()[1]) or
(box_geo.length()[1] != box_geo.length()[2]) or
(p3m.params.mesh[0] != p3m.params.mesh[1]) or
(p3m.params.mesh[1] != p3m.params.mesh[2])) {
(p3m_params.mesh[0] != p3m_params.mesh[1]) or
(p3m_params.mesh[1] != p3m_params.mesh[2])) {
throw std::runtime_error(
"CoulombP3M: non-metallic epsilon requires cubic box");
}
Expand Down Expand Up @@ -841,7 +852,8 @@ void CoulombP3M::sanity_checks_node_grid() const {
}
}

void CoulombP3M::scaleby_box_l() {
template <typename FloatType, Arch Architecture>
void CoulombP3MImpl<FloatType, Architecture>::scaleby_box_l() {
auto const &box_geo = *get_system().box_geo;
p3m.params.r_cut = p3m.params.r_cut_iL * box_geo.length()[0];
p3m.params.alpha = p3m.params.alpha_L * box_geo.length_inv()[0];
Expand Down
40 changes: 11 additions & 29 deletions src/core/electrostatics/p3m.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,38 +50,19 @@

#include <cmath>
#include <numbers>
#include <stdexcept>

/** @brief P3M solver. */
struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
/** @brief Coulomb P3M parameters. */
p3m_data_struct &p3m;

int tune_timings;
bool tune_verbose;
bool check_complex_residuals;

protected:
bool m_is_tuned;
P3MParameters const &p3m_params;

public:
CoulombP3M(p3m_data_struct &p3m_data, double prefactor, int tune_timings,
bool tune_verbose, bool check_complex_residuals)
: p3m{p3m_data}, tune_timings{tune_timings}, tune_verbose{tune_verbose},
check_complex_residuals{check_complex_residuals} {

if (tune_timings <= 0) {
throw std::domain_error("Parameter 'timings' must be > 0");
}
m_is_tuned = not p3m.params.tuning;
p3m.params.tuning = false;
set_prefactor(prefactor);
}
CoulombP3M(P3MParameters const &p3m_params) : p3m_params{p3m_params} {}

virtual ~CoulombP3M() = default;

[[nodiscard]] bool is_tuned() const { return m_is_tuned; }
[[nodiscard]] virtual bool is_tuned() const noexcept = 0;
[[nodiscard]] virtual bool is_gpu() const noexcept = 0;
[[nodiscard]] virtual bool is_double_precision() const noexcept = 0;

/** @brief Recalculate all derived parameters. */
virtual void init() = 0;
Expand Down Expand Up @@ -109,6 +90,7 @@ struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
*/
virtual void count_charged_particles() = 0;
virtual void count_charged_particles_elc(int, double, double) = 0;
virtual void adapt_epsilon_elc() = 0;

/**
* @brief Tune P3M parameters to desired accuracy.
Expand All @@ -119,7 +101,7 @@ struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
* @ref P3MParameters::r_cut_iL "r_cut_iL" and
* @ref P3MParameters::alpha_L "alpha_L" are tuned to obtain the target
* @ref P3MParameters::accuracy "accuracy" in optimal time.
* These parameters are stored in the @ref p3m object.
* These parameters are stored in the @ref p3m_params object.
*
* The function utilizes the analytic expression of the error estimate
* for the P3M method in @cite hockney88a (eq. (8.23)) in
Expand Down Expand Up @@ -163,10 +145,10 @@ struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
/** Calculate real-space contribution of p3m Coulomb pair forces. */
Utils::Vector3d pair_force(double q1q2, Utils::Vector3d const &d,
double dist) const {
if ((q1q2 == 0.) || dist >= p3m.params.r_cut || dist <= 0.) {
if ((q1q2 == 0.) || dist >= p3m_params.r_cut || dist <= 0.) {
return {};
}
auto const alpha = p3m.params.alpha;
auto const alpha = p3m_params.alpha;
auto const adist = alpha * dist;
auto const exp_adist_sq = exp(-adist * adist);
auto const dist_sq = dist * dist;
Expand All @@ -184,10 +166,10 @@ struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
/** Calculate real-space contribution of Coulomb pair energy. */
// Eq. (3.6) @cite deserno00b
double pair_energy(double q1q2, double dist) const {
if ((q1q2 == 0.) || dist >= p3m.params.r_cut || dist <= 0.) {
if ((q1q2 == 0.) || dist >= p3m_params.r_cut || dist <= 0.) {
return {};
}
auto const adist = p3m.params.alpha * dist;
auto const adist = p3m_params.alpha * dist;
#if USE_ERFC_APPROXIMATION
auto const erfc_part_ri = Utils::AS_erfc_part(adist) / dist;
return prefactor * q1q2 * erfc_part_ri * exp(-adist * adist);
Expand Down Expand Up @@ -216,7 +198,7 @@ struct CoulombP3M : public Coulomb::Actor<CoulombP3M> {
void sanity_checks_periodicity() const;
void sanity_checks_cell_structure() const;

virtual void scaleby_box_l();
virtual void scaleby_box_l() = 0;
};

#endif // P3M
62 changes: 39 additions & 23 deletions src/core/electrostatics/p3m.impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

/** @file
* P3M algorithm for long-range Coulomb interaction.
*
* We use a P3M (Particle-Particle Particle-Mesh) method based on the
* Ewald summation. Details of the used method can be found in
* @cite hockney88a and @cite deserno98a @cite deserno98b.
*
* Further reading: @cite ewald21a, @cite hockney88a, @cite deserno98a,
* @cite deserno98b, @cite deserno00e, @cite deserno00b, @cite cerda08d.
*
* Implementation in p3m.cpp.
*/

#pragma once

#include "config/config.hpp"
Expand All @@ -49,11 +36,13 @@
#include <utils/Vector.hpp>

#include <memory>
#include <stdexcept>
#include <type_traits>
#include <utility>

template <typename FloatType>
struct p3m_data_struct_coulomb : public p3m_data_struct_fft<FloatType> {
using p3m_data_struct_fft<FloatType>::p3m_data_struct_fft;
struct p3m_data_struct_coulomb : public p3m_data_struct<FloatType> {
using p3m_data_struct<FloatType>::p3m_data_struct;

/** number of charged particles (only on head node). */
int sum_qpart = 0;
Expand All @@ -73,17 +62,34 @@ template <typename FloatType, Arch Architecture>
struct CoulombP3MImpl : public CoulombP3M {
~CoulombP3MImpl() override = default;

/** @brief Coulomb P3M parameters. */
p3m_data_struct_coulomb<FloatType> &p3m;

private:
/** @brief Coulomb P3M meshes and FFT algorithm. */
std::unique_ptr<p3m_data_struct_coulomb<FloatType>> p3m_impl;
// this member overshadows its own base class pointer in the parent class
p3m_data_struct_coulomb<FloatType> &p3m;
int tune_timings;
bool tune_verbose;
bool check_complex_residuals;
bool m_is_tuned;

template <typename... Args>
public:
CoulombP3MImpl(
std::unique_ptr<p3m_data_struct_coulomb<FloatType>> &&p3m_handle,
Args &&...args)
: CoulombP3M(*p3m_handle, std::forward<Args>(args)...),
p3m_impl{std::move(p3m_handle)}, p3m{*p3m_impl} {}
double prefactor, int tune_timings, bool tune_verbose,
bool check_complex_residuals)
: CoulombP3M(p3m_handle->params), p3m{*p3m_handle},
p3m_impl{std::move(p3m_handle)}, tune_timings{tune_timings},
tune_verbose{tune_verbose},
check_complex_residuals{check_complex_residuals} {

if (tune_timings <= 0) {
throw std::domain_error("Parameter 'timings' must be > 0");
}
m_is_tuned = not p3m.params.tuning;
p3m.params.tuning = false;
set_prefactor(prefactor);
}

void init() override {
if constexpr (Architecture == Arch::CPU) {
Expand All @@ -103,10 +109,17 @@ struct CoulombP3MImpl : public CoulombP3M {
p3m.sum_q2 = sum_q2;
p3m.square_sum_q = square_sum_q;
}
void adapt_epsilon_elc() override {
p3m.params.epsilon = P3M_EPSILON_METALLIC;
}

[[nodiscard]] bool is_tuned() const noexcept override { return m_is_tuned; }
[[nodiscard]] bool is_gpu() const noexcept override {
return Architecture == Arch::GPU;
}
[[nodiscard]] bool is_double_precision() const noexcept override {
return std::is_same_v<FloatType, double>;
}

void on_activation() override {
#ifdef CUDA
Expand Down Expand Up @@ -160,6 +173,7 @@ struct CoulombP3MImpl : public CoulombP3M {
ParticleRange const &particles);
void calc_influence_function_force() override;
void calc_influence_function_energy() override;
void scaleby_box_l() override;
void init_cpu_kernels();
#ifdef CUDA
void init_gpu_kernels();
Expand All @@ -170,13 +184,15 @@ struct CoulombP3MImpl : public CoulombP3M {
};

template <typename FloatType, Arch Architecture,
template <typename> class FFTBackendImpl, class... Args>
template <typename> class FFTBackendImpl,
template <typename> class P3MFFTMeshImpl, class... Args>
std::shared_ptr<CoulombP3M> new_p3m_handle(P3MParameters &&p3m,
Args &&...args) {
auto obj = std::make_shared<CoulombP3MImpl<FloatType, Architecture>>(
std::make_unique<p3m_data_struct_coulomb<FloatType>>(std::move(p3m)),
std::forward<Args>(args)...);
obj->p3m.template make_fft_instance<FFTBackendImpl<FloatType>>(false);
obj->p3m.template make_mesh_instance<P3MFFTMeshImpl<FloatType>>();
obj->p3m.template make_fft_instance<FFTBackendImpl<FloatType>>();
return obj;
}

Expand Down
Loading

0 comments on commit 030d3c8

Please sign in to comment.