diff --git a/CMakeLists.txt b/CMakeLists.txt index 37b891d74ad..7ceede6dec2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,10 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT APPLE) "Build with memory sanitizer (experimental; requires a memory-sanitized Python interpreter)" OFF) endif() +option( + ESPRESSO_ADD_OMPI_SINGLETON_WARNING + "Add a runtime warning in the pypresso script for NUMA architectures that aren't supported in singleton mode by Open MPI 4.x" + ON) option(WARNINGS_ARE_ERRORS "Treat warnings as errors during compilation" OFF) option(WITH_CCACHE "Use ccache compiler invocation." OFF) option(WITH_PROFILER "Enable profiler annotations." OFF) @@ -320,11 +324,38 @@ find_package(MPI 3.0 REQUIRED) find_package(MpiexecBackend) # OpenMPI checks the number of processes against the number of CPUs -if("${MPIEXEC_BACKEND_NAME}" STREQUAL "OpenMPI" AND "${MPIEXEC_BACKEND_VERSION}" - VERSION_GREATER_EQUAL 2.0.0) - set(MPIEXEC_OVERSUBSCRIBE "-oversubscribe") -else() - set(MPIEXEC_OVERSUBSCRIBE "") +set(MPIEXEC_OVERSUBSCRIBE "") +# Open MPI 4.x has a bug on NUMA archs that prevents running in singleton mode +set(ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA OFF) +set(ESPRESSO_CPU_MODEL_NAME_OMPI_SINGLETON_NUMA_PATTERN "AMD (EPYC|Ryzen)") + +if("${MPIEXEC_BACKEND_NAME}" STREQUAL "OpenMPI") + if("${MPIEXEC_BACKEND_VERSION}" VERSION_GREATER_EQUAL 2.0.0) + set(MPIEXEC_OVERSUBSCRIBE "-oversubscribe") + endif() + if("${MPIEXEC_BACKEND_VERSION}" VERSION_GREATER_EQUAL 4.0 + AND "${MPIEXEC_BACKEND_VERSION}" VERSION_LESS 5.0) + if(NOT DEFINED ESPRESSO_CPU_MODEL_NAME) + if(CMAKE_SYSTEM_NAME STREQUAL Linux) + if(EXISTS /proc/cpuinfo) + file(READ /proc/cpuinfo ESPRESSO_CPU_INFO) + string(REGEX + REPLACE ".*\n[Mm]odel name[ \t]*:[ \t]+([^\n]+).*" "\\1" + ESPRESSO_CPU_MODEL_NAME_STRING "${ESPRESSO_CPU_INFO}") + else() + set(ESPRESSO_CPU_MODEL_NAME_STRING "__unreadable") + endif() + else() + set(ESPRESSO_CPU_MODEL_NAME_STRING "__unaffected") + endif() + set(ESPRESSO_CPU_MODEL_NAME "${ESPRESSO_CPU_MODEL_NAME_STRING}" + CACHE INTERNAL "") + endif() + if(ESPRESSO_CPU_MODEL_NAME MATCHES + "^${ESPRESSO_CPU_MODEL_NAME_OMPI_SINGLETON_NUMA_PATTERN}") + set(ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA ON) + endif() + endif() endif() # OpenMPI cannot run two jobs in parallel in a Docker container, because the diff --git a/cmake/unit_test.cmake b/cmake/unit_test.cmake index cb993846436..e7b90a1b7e3 100644 --- a/cmake/unit_test.cmake +++ b/cmake/unit_test.cmake @@ -52,6 +52,9 @@ function(UNIT_TEST) list(APPEND TEST_ENV_VARIABLES "UBSAN_OPTIONS=suppressions=${CMAKE_SOURCE_DIR}/maintainer/CI/ubsan.supp:${SANITIZERS_HALT_ON_ERROR}:print_stacktrace=1") list(APPEND TEST_ENV_VARIABLES "ASAN_OPTIONS=${SANITIZERS_HALT_ON_ERROR}:detect_leaks=0:allocator_may_return_null=1") list(APPEND TEST_ENV_VARIABLES "MSAN_OPTIONS=${SANITIZERS_HALT_ON_ERROR}") + if(NOT TEST_NUM_PROC AND ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA AND "${TEST_DEPENDS}" MATCHES "(^|;)([Bb]oost::mpi|MPI::MPI_CXX)($|;)") + list(APPEND TEST_ENV_VARIABLES "OMPI_MCA_hwloc_base_binding_policy=none") + endif() set_tests_properties( ${TEST_NAME} PROPERTIES ENVIRONMENT "${TEST_ENV_VARIABLES}") diff --git a/doc/sphinx/installation.rst b/doc/sphinx/installation.rst index a13c09be344..325bf0550ce 100644 --- a/doc/sphinx/installation.rst +++ b/doc/sphinx/installation.rst @@ -67,6 +67,15 @@ are required to be able to compile and use |es|: Other MPI implementations like Intel MPI should also work, although they are not actively tested in |es| continuous integration. + Open MPI version 4.x is known to not properly support the MCA binding + policy "numa" in singleton mode on a few NUMA architectures. + On affected systems, e.g. AMD Ryzen or AMD EPYC, Open MPI halts with + a fatal error when setting the processor affinity in ``MPI_Init``. + This issue can be resolved by setting the environment variable + ``OMPI_MCA_hwloc_base_binding_policy`` to a value other than "numa", + such as "l3cache" to bind to a NUMA shared memory block, or to + "none" to disable binding (can cause performance loss). + Python |es|'s main user interface relies on Python 3. @@ -743,6 +752,11 @@ The following options are available: * ``WITH_VALGRIND_INSTRUMENTATION``: Build with valgrind instrumentation markers +* ``ESPRESSO_ADD_OMPI_SINGLETON_WARNING``: Add a runtime warning in the + pypresso and ipypresso scripts that is triggered in singleton mode + with Open MPI version 4.x on unsupported NUMA environments + (see :term:`MPI installation requirements ` for details). + When the value in the :file:`CMakeLists.txt` file is set to ON, the corresponding option is created; if the value of the option is set to OFF, the corresponding option is not created. These options can also be modified diff --git a/doc/sphinx/running.rst b/doc/sphinx/running.rst index 4e4f6a1680a..1a8ac31b6c4 100644 --- a/doc/sphinx/running.rst +++ b/doc/sphinx/running.rst @@ -292,8 +292,7 @@ Parallel computing Many algorithms in |es| are designed to work with multiple MPI ranks. However, not all algorithms benefit from MPI parallelization equally. -Several algorithms only use MPI rank 0 (e.g. :ref:`Reaction methods`), while -a small subset simply don't support MPI (e.g. :ref:`Dipolar direct sum`). +Several algorithms only use MPI rank 0 (e.g. :ref:`Reaction methods`). |es| should work with most MPI implementations on the market; see the :term:`MPI installation requirements ` for details. diff --git a/src/python/pypresso.cmakein b/src/python/pypresso.cmakein index f874fe14974..e41dee10ad3 100755 --- a/src/python/pypresso.cmakein +++ b/src/python/pypresso.cmakein @@ -14,6 +14,15 @@ else fi export PYTHONPATH +# Open MPI 4.x cannot run in singleton mode on some NUMA systems +if [ "@ESPRESSO_ADD_OMPI_SINGLETON_WARNING@" = "ON" ] && [ "@ESPRESSO_MPIEXEC_GUARD_SINGLETON_NUMA@" = "ON" ]; then + if [ -z "${OMPI_COMM_WORLD_SIZE}" ] && [ "${OMPI_MCA_hwloc_base_binding_policy}" = "numa" ]; then + if test -f /proc/cpuinfo && grep --quiet -P "^[Mm]odel name[ \t]*:[ \t]+@ESPRESSO_CPU_MODEL_NAME_OMPI_SINGLETON_NUMA_PATTERN@( |$)" /proc/cpuinfo; then + echo "warning: if Open MPI fails to set processor affinity, set environment variable OMPI_MCA_hwloc_base_binding_policy to \"none\" or \"l3cache\"" + fi + fi +fi + if [ "@CMAKE_CXX_COMPILER_ID@" != "GNU" ] && [ "@WITH_ASAN@" = "ON" ]; then asan_lib=$("@CMAKE_CXX_COMPILER@" /dev/null -### -o /dev/null -fsanitize=address 2>&1 | grep -o '[" ][^" ]*libclang_rt.asan[^" ]*[^s][" ]' | sed 's/[" ]//g' | sed 's/\.a$/.so/g') export DYLD_INSERT_LIBRARIES="$asan_lib"