Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUDA exception handling #4095

Merged
merged 22 commits into from
Jan 29, 2021
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cmake/FindCUDACompilerClang.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ function(find_gpu_library)
endif()
endfunction(find_gpu_library)

find_gpu_library(VARNAME CUDA_LIBRARY NAMES cuda REQUIRED)
find_gpu_library(VARNAME CUDART_LIBRARY NAMES cudart REQUIRED)
find_gpu_library(VARNAME CUFFT_LIBRARY NAMES cufft REQUIRED)

Expand Down
1 change: 0 additions & 1 deletion cmake/FindCUDACompilerNVCC.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ function(find_gpu_library)
endif()
endfunction(find_gpu_library)

find_gpu_library(VARNAME CUDA_LIBRARY NAMES cuda REQUIRED)
find_gpu_library(VARNAME CUDART_LIBRARY NAMES cudart REQUIRED)
find_gpu_library(VARNAME CUDA_CUFFT_LIBRARIES NAMES cufft REQUIRED)

Expand Down
35 changes: 26 additions & 9 deletions doc/sphinx/system_setup.rst
Original file line number Diff line number Diff line change
Expand Up @@ -231,16 +231,33 @@ For more information please check :class:`espressomd.cuda_init.CudaInitHandle`.
List available CUDA devices
~~~~~~~~~~~~~~~~~~~~~~~~~~~

If you want to list available CUDA devices
you should access :attr:`espressomd.cuda_init.CudaInitHandle.device_list`, e.g., ::
If you want to list available CUDA devices, you should call
:meth:`espressomd.cuda_init.CudaInitHandle.list_devices`::

system = espressomd.System(box_l=[1, 1, 1])

print(system.cuda_init_handle.device_list)
>>> import espressomd
>>> system = espressomd.System(box_l=[1, 1, 1])
>>> print(system.cuda_init_handle.list_devices())
{0: 'GeForce RTX 2080', 1: 'GeForce GT 730'}

This attribute is read only and will return a dictionary containing
This method returns a dictionary containing
the device id as key and the device name as its value.

To get more details on the CUDA devices for each MPI node, call
:meth:`espressomd.cuda_init.CudaInitHandle.list_devices_properties`::

>>> import pprint
>>> import espressomd
>>> system = espressomd.System(box_l=[1, 1, 1])
>>> pprint.pprint(system.cuda_init_handle.list_devices_properties())
{'seraue': {0: {'name': 'GeForce RTX 2080',
'compute_capability': (7, 5),
'cores': 46,
'total_memory': 8370061312},
1: {'name': 'GeForce GT 730',
'compute_capability': (3, 5),
'cores': 2,
'total_memory': 1014104064}}}

.. _Selection of CUDA device:

Selection of CUDA device
Expand All @@ -250,9 +267,9 @@ When you start ``pypresso`` your first GPU should be selected.
If you wanted to use the second GPU, this can be done
by setting :attr:`espressomd.cuda_init.CudaInitHandle.device` as follows::

system = espressomd.System(box_l=[1, 1, 1])

system.cuda_init_handle.device = 1
>>> import espressomd
>>> system = espressomd.System(box_l=[1, 1, 1])
>>> system.cuda_init_handle.device = 1

Setting a device id outside the valid range or a device
which does not meet the minimum requirements will raise
Expand Down
2 changes: 1 addition & 1 deletion maintainer/CI/build_cmake.sh
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ if [ "${run_checks}" = true ]; then

# fail if built with CUDA but no compatible GPU was found
if [ "${with_cuda}" = true ] && [ "${hide_gpu}" != true ]; then
./pypresso -c "import espressomd;assert espressomd.gpu_available(), 'No GPU available'" || exit 1
./pypresso -c "import espressomd.cuda_init as gpu;gpu.CudaInitHandle().device = 0" || exit 1
fi

# unit tests
Expand Down
17 changes: 8 additions & 9 deletions src/core/CellStructure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,22 +459,21 @@ struct CellStructure {

public:
/**
* @brief Set the particle decomposition to
* AtomDecomposition.
* @brief Set the particle decomposition to AtomDecomposition.
*
* @param comm Communicator to use.
* @param box Box Geometry
* @param comm Communicator to use.
* @param box Box Geometry
*/
void set_atom_decomposition(boost::mpi::communicator const &comm,
BoxGeometry const &box);

/**
* @brief Set the particle decomposition to
* DomainDecomposition.
* @brief Set the particle decomposition to DomainDecomposition.
*
* @param comm Cartesian communicator to use.
* @param box Box Geometry
* @param local_geo Geometry of the local box.
* @param comm Cartesian communicator to use.
* @param range Interaction range.
* @param box Box Geometry
* @param local_geo Geometry of the local box.
*/
void set_domain_decomposition(boost::mpi::communicator const &comm,
double range, BoxGeometry const &box,
Expand Down
50 changes: 24 additions & 26 deletions src/core/DomainDecomposition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,10 @@
*
*/
struct DomainDecomposition : public ParticleDecomposition {
/** Grind dimensions per node. */
/** Grid dimensions per node. */
Utils::Vector3i cell_grid = {};
/** cell size. */
/** Cell size. */
Utils::Vector3d cell_size = {};

private:
/** Offset in global grid */
Utils::Vector3i cell_offset = {};
/** linked cell grid with ghost frame. */
Expand Down Expand Up @@ -120,16 +118,16 @@ struct DomainDecomposition : public ParticleDecomposition {
}

private:
/** Fill local_cells list and ghost_cells list for use with domain
/** Fill @c m_local_cells list and @c m_ghost_cells list for use with domain
* decomposition.
*/
void mark_cells();

/** Fill a communication cell pointer list. Fill the cell pointers of
* all cells which are inside a rectangular subgrid of the 3D cell
* grid starting from the
* lower left corner lc up to the high top corner hc. The cell
* pointer list part_lists must already be large enough.
* lower left corner @p lc up to the high top corner @p hc. The cell
* pointer list @p part_lists must already be large enough.
* \param part_lists List of cell pointers to store the result.
* \param lc lower left corner of the subgrid.
* \param hc high up corner of the subgrid.
Expand Down Expand Up @@ -159,10 +157,10 @@ struct DomainDecomposition : public ParticleDecomposition {
/**
* @brief Split particle list by direction.
*
* Moves all particles from src into left
* and right depending if they belong to
* the left or right side from local node
* in direction dir.
* Moves all particles from @p src into @p left
* or @p right depending on whether they belong
* to the left or right side of the local node
* in direction @p dir.
*
* @param src Particles to sort.
* @param left Particles that should go to the left
Expand All @@ -185,36 +183,36 @@ struct DomainDecomposition : public ParticleDecomposition {
* @brief Calculate cell grid dimensions, cell sizes and number of cells.
*
* Calculates the cell grid, based on the local box size and the range.
* If the number of cells is larger than max_num_cells,
* it increases max_range until the number of cells is
* smaller or equal max_num_cells. It sets:
* cell_grid,
* ghost_cell_grid,
* cell_size, and
* inv_cell_size.
* If the number of cells is larger than @c max_num_cells,
* it increases @c max_range until the number of cells is
* smaller or equal to @c max_num_cells. It sets:
* @c cell_grid,
* @c ghost_cell_grid,
* @c cell_size, and
* @c inv_cell_size.
*
* @param range Required interacting range. All pairs closer
* than this distance are found.
* @param range interaction range. All pairs closer
* than this distance are found.
*/
void create_cell_grid(double range);

/** Init cell interactions for cell system domain decomposition.
* Initializes the interacting neighbor cell list of a cell.
* This list of interacting neighbor cells is used by the Verlet
* algorithm.
* algorithm.
*/
void init_cell_interactions();

/** Create communicators for cell structure domain decomposition. (see \ref
* GhostCommunicator)
/** Create communicators for cell structure domain decomposition (see \ref
* GhostCommunicator).
*/
GhostCommunicator prepare_comm();

/** Maximal number of cells per node. In order to avoid memory
* problems due to the cell grid one has to specify the maximal
* problems due to the cell grid, one has to specify the maximal
* number of cells. If the number of cells is larger
* than max_num_cells the cell grid is reduced.
* max_num_cells has to be larger than 27, e.g. one inner cell.
* than @c max_num_cells, the cell grid is reduced.
* @c max_num_cells has to be larger than 27, e.g. one inner cell.
*/
static constexpr int max_num_cells = 32768;
};
Expand Down
2 changes: 1 addition & 1 deletion src/core/EspressoSystemInterface_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include "EspressoSystemInterface.hpp"
#include "cuda_interface.hpp"
#include "cuda_utils.hpp"
#include "cuda_utils.cuh"
#include "errorhandling.hpp"

#include <cuda.h>
Expand Down
25 changes: 18 additions & 7 deletions src/core/actor/DipolarBarnesHut.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "DipolarBarnesHut_cuda.cuh"
#include "SystemInterface.hpp"
#include "cuda_interface.hpp"
#include "cuda_utils.hpp"
#include "electrostatics_magnetostatics/dipole.hpp"
#include "errorhandling.hpp"

Expand All @@ -38,7 +39,7 @@ typedef float dds_float;
class DipolarBarnesHut : public Actor {
public:
DipolarBarnesHut(SystemInterface &s, float epssq, float itolsq) {
k = static_cast<float>(dipole.prefactor);
m_k = static_cast<float>(dipole.prefactor);
m_epssq = epssq;
m_itolsq = itolsq;
setBHPrecision(&m_epssq, &m_itolsq);
Expand All @@ -53,34 +54,44 @@ class DipolarBarnesHut : public Actor {
};

void computeForces(SystemInterface &s) override {
allocBHmemCopy(static_cast<int>(s.npart_gpu()), &m_bh_data);
try {
allocBHmemCopy(static_cast<int>(s.npart_gpu()), &m_bh_data);
} catch (cuda_runtime_error const &err) {
runtimeErrorMsg() << "DipolarBarnesHut: " << err.what();
return;
}

fillConstantPointers(s.rGpuBegin(), s.dipGpuBegin(), m_bh_data);
initBHgpu(m_bh_data.blocks);
buildBoxBH(m_bh_data.blocks);
buildTreeBH(m_bh_data.blocks);
summarizeBH(m_bh_data.blocks);
sortBH(m_bh_data.blocks);
if (forceBH(&m_bh_data, k, s.fGpuBegin(), s.torqueGpuBegin())) {
if (forceBH(&m_bh_data, m_k, s.fGpuBegin(), s.torqueGpuBegin())) {
runtimeErrorMsg() << "kernels encountered a functional error";
}
};
void computeEnergy(SystemInterface &s) override {
allocBHmemCopy(static_cast<int>(s.npart_gpu()), &m_bh_data);
try {
allocBHmemCopy(static_cast<int>(s.npart_gpu()), &m_bh_data);
} catch (cuda_runtime_error const &err) {
runtimeErrorMsg() << "DipolarBarnesHut: " << err.what();
return;
}

fillConstantPointers(s.rGpuBegin(), s.dipGpuBegin(), m_bh_data);
initBHgpu(m_bh_data.blocks);
buildBoxBH(m_bh_data.blocks);
buildTreeBH(m_bh_data.blocks);
summarizeBH(m_bh_data.blocks);
sortBH(m_bh_data.blocks);
if (energyBH(&m_bh_data, k, (&(((CUDA_energy *)s.eGpu())->dipolar)))) {
if (energyBH(&m_bh_data, m_k, (&(((CUDA_energy *)s.eGpu())->dipolar)))) {
runtimeErrorMsg() << "kernels encountered a functional error";
}
};

protected:
float k;
private:
float m_k;
float m_epssq;
float m_itolsq;
BHData m_bh_data = {0, 0, 0, nullptr, nullptr,
Expand Down
10 changes: 4 additions & 6 deletions src/core/actor/DipolarBarnesHut_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
#include "DipolarBarnesHut_cuda.cuh"

#include "cuda_init.hpp"
#include "cuda_utils.hpp"
#include "cuda_utils.cuh"
#include "errorhandling.hpp"

#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
Expand Down Expand Up @@ -1181,11 +1182,8 @@ void allocBHmemCopy(int nbodies, BHData *bh_data) {

bh_data->nbodies = nbodies;

int devID = -1;
EspressoGpuDevice dev;

devID = cuda_get_device();
cuda_get_device_props(devID, dev);
auto const devID = cuda_get_device();
EspressoGpuDevice const dev = cuda_get_device_props(devID);

bh_data->blocks = dev.n_cores;
// Each node corresponds to a split of the cubic box in 3D space to equal
Expand Down
Loading