Skip to content

Commit

Permalink
CMake: Add Open MPI singleton guard
Browse files Browse the repository at this point in the history
On AMD Ryzen and AMD EPYC, Open MPI version 4.x does not properly
work in singleton mode if the MCA binding policy is set to "numa".
Print a harmless warning in pypresso/ipypresso if the CPU is
affected to help users troubleshoot the ORTE error message.
  • Loading branch information
jngrad committed Nov 21, 2022
1 parent bf6229a commit 1da59fe
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 7 deletions.
41 changes: 36 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT APPLE)
"Build with memory sanitizer (experimental; requires a memory-sanitized Python interpreter)"
OFF)
endif()
option(
ESPRESSO_ADD_OMPI_SINGLETON_WARNING
"Add a runtime warning in the pypresso script for NUMA architectures that aren't supported in singleton mode by Open MPI 4.x"
ON)
option(WARNINGS_ARE_ERRORS "Treat warnings as errors during compilation" OFF)
option(WITH_CCACHE "Use ccache compiler invocation." OFF)
option(WITH_PROFILER "Enable profiler annotations." OFF)
Expand Down Expand Up @@ -320,11 +324,38 @@ find_package(MPI 3.0 REQUIRED)
find_package(MpiexecBackend)

# OpenMPI checks the number of processes against the number of CPUs
if("${MPIEXEC_BACKEND_NAME}" STREQUAL "OpenMPI" AND "${MPIEXEC_BACKEND_VERSION}"
VERSION_GREATER_EQUAL 2.0.0)
set(MPIEXEC_OVERSUBSCRIBE "-oversubscribe")
else()
set(MPIEXEC_OVERSUBSCRIBE "")
set(MPIEXEC_OVERSUBSCRIBE "")
# Open MPI 4.x has a bug on NUMA archs that prevents running in singleton mode
set(ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA OFF)
set(ESPRESSO_CPU_MODEL_NAME_OMPI_SINGLETON_NUMA_PATTERN "AMD (EPYC|Ryzen)")

if("${MPIEXEC_BACKEND_NAME}" STREQUAL "OpenMPI")
if("${MPIEXEC_BACKEND_VERSION}" VERSION_GREATER_EQUAL 2.0.0)
set(MPIEXEC_OVERSUBSCRIBE "-oversubscribe")
endif()
if("${MPIEXEC_BACKEND_VERSION}" VERSION_GREATER_EQUAL 4.0
AND "${MPIEXEC_BACKEND_VERSION}" VERSION_LESS 5.0)
if(NOT DEFINED ESPRESSO_CPU_MODEL_NAME)
if(CMAKE_SYSTEM_NAME STREQUAL Linux)
if(EXISTS /proc/cpuinfo)
file(READ /proc/cpuinfo ESPRESSO_CPU_INFO)
string(REGEX
REPLACE ".*\n[Mm]odel name[ \t]*:[ \t]+([^\n]+).*" "\\1"
ESPRESSO_CPU_MODEL_NAME_STRING "${ESPRESSO_CPU_INFO}")
else()
set(ESPRESSO_CPU_MODEL_NAME_STRING "__unreadable")
endif()
else()
set(ESPRESSO_CPU_MODEL_NAME_STRING "__unaffected")
endif()
set(ESPRESSO_CPU_MODEL_NAME "${ESPRESSO_CPU_MODEL_NAME_STRING}"
CACHE INTERNAL "")
endif()
if(ESPRESSO_CPU_MODEL_NAME MATCHES
"^${ESPRESSO_CPU_MODEL_NAME_OMPI_SINGLETON_NUMA_PATTERN}")
set(ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA ON)
endif()
endif()
endif()

# OpenMPI cannot run two jobs in parallel in a Docker container, because the
Expand Down
3 changes: 3 additions & 0 deletions cmake/unit_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ function(UNIT_TEST)
list(APPEND TEST_ENV_VARIABLES "UBSAN_OPTIONS=suppressions=${CMAKE_SOURCE_DIR}/maintainer/CI/ubsan.supp:${SANITIZERS_HALT_ON_ERROR}:print_stacktrace=1")
list(APPEND TEST_ENV_VARIABLES "ASAN_OPTIONS=${SANITIZERS_HALT_ON_ERROR}:detect_leaks=0:allocator_may_return_null=1")
list(APPEND TEST_ENV_VARIABLES "MSAN_OPTIONS=${SANITIZERS_HALT_ON_ERROR}")
if(NOT TEST_NUM_PROC AND ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA AND "${TEST_DEPENDS}" MATCHES "(^|;)([Bb]oost::mpi|MPI::MPI_CXX)($|;)")
list(APPEND TEST_ENV_VARIABLES "OMPI_MCA_hwloc_base_binding_policy=none")
endif()
set_tests_properties(
${TEST_NAME} PROPERTIES ENVIRONMENT "${TEST_ENV_VARIABLES}")

Expand Down
14 changes: 14 additions & 0 deletions doc/sphinx/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ are required to be able to compile and use |es|:
Other MPI implementations like Intel MPI should also work, although
they are not actively tested in |es| continuous integration.

Open MPI version 4.x is known to not properly support the MCA binding
policy "numa" in singleton mode on a few NUMA architectures.
On affected systems, e.g. AMD Ryzen or AMD EPYC, Open MPI halts with
a fatal error when setting the processor affinity in ``MPI_Init``.
This issue can be resolved by setting the environment variable
``OMPI_MCA_hwloc_base_binding_policy`` to a value other than "numa",
such as "l3cache" to bind to a NUMA shared memory block, or to
"none" to disable binding (can cause performance loss).

Python
|es|'s main user interface relies on Python 3.

Expand Down Expand Up @@ -743,6 +752,11 @@ The following options are available:
* ``WITH_VALGRIND_INSTRUMENTATION``: Build with valgrind instrumentation
markers

* ``ESPRESSO_ADD_OMPI_SINGLETON_WARNING``: Add a runtime warning in the
pypresso and ipypresso scripts that is triggered in singleton mode
with Open MPI version 4.x on unsupported NUMA environments
(see :term:`MPI installation requirements <MPI>` for details).

When the value in the :file:`CMakeLists.txt` file is set to ON, the
corresponding option is created; if the value of the option is set to OFF,
the corresponding option is not created. These options can also be modified
Expand Down
3 changes: 1 addition & 2 deletions doc/sphinx/running.rst
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,7 @@ Parallel computing

Many algorithms in |es| are designed to work with multiple MPI ranks.
However, not all algorithms benefit from MPI parallelization equally.
Several algorithms only use MPI rank 0 (e.g. :ref:`Reaction methods`), while
a small subset simply don't support MPI (e.g. :ref:`Dipolar direct sum`).
Several algorithms only use MPI rank 0 (e.g. :ref:`Reaction methods`).
|es| should work with most MPI implementations on the market;
see the :term:`MPI installation requirements <MPI>` for details.

Expand Down
9 changes: 9 additions & 0 deletions src/python/pypresso.cmakein
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ else
fi
export PYTHONPATH

# Open MPI 4.x cannot run in singleton mode on some NUMA systems
if [ "@ESPRESSO_ADD_OMPI_SINGLETON_WARNING@" = "ON" ] && [ "@ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA@" = "ON" ]; then
if [ -z "${OMPI_COMM_WORLD_SIZE}" ] && [ "${OMPI_MCA_hwloc_base_binding_policy}" = "numa" ]; then
if test -f /proc/cpuinfo && grep --quiet -P "^[Mm]odel name[ \t]*:[ \t]+@ESPRESSO_CPU_MODEL_NAME_OMPI_SINGLETON_NUMA_PATTERN@( |$)" /proc/cpuinfo; then
echo "warning: if Open MPI fails to set processor affinity, set environment variable OMPI_MCA_hwloc_base_binding_policy to \"none\" or \"l3cache\""
fi
fi
fi

if [ "@CMAKE_CXX_COMPILER_ID@" != "GNU" ] && [ "@WITH_ASAN@" = "ON" ]; then
asan_lib=$("@CMAKE_CXX_COMPILER@" /dev/null -### -o /dev/null -fsanitize=address 2>&1 | grep -o '[" ][^" ]*libclang_rt.asan[^" ]*[^s][" ]' | sed 's/[" ]//g' | sed 's/\.a$/.so/g')
export DYLD_INSERT_LIBRARIES="$asan_lib"
Expand Down

0 comments on commit 1da59fe

Please sign in to comment.