Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added CUDA 11.0.2 and related recipes, incl. gompic/2020a and iccifortcuda/2020a #11295

Merged
merged 16 commits into from
Oct 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions easybuild/easyconfigs/c/CUDA/CUDA-11.0.2-GCC-9.3.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
easyblock = 'Bundle'
name = 'CUDA'
version = '11.0.2'

homepage = 'https://developer.nvidia.com/cuda-toolkit'
description = """CUDA (formerly Compute Unified Device Architecture) is a parallel
computing platform and programming model created by NVIDIA and implemented by the
graphics processing units (GPUs) that they produce. CUDA gives developers access
to the virtual instruction set and memory of the parallel computational elements in CUDA GPUs."""

toolchain = {'name': 'GCC', 'version': '9.3.0'}

dependencies = [('CUDAcore', '11.0.2', '', True)]

altroot = 'CUDAcore'

moduleclass = 'system'
17 changes: 17 additions & 0 deletions easybuild/easyconfigs/c/CUDA/CUDA-11.0.2-iccifort-2020.1.217.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
easyblock = 'Bundle'
name = 'CUDA'
version = '11.0.2'

homepage = 'https://developer.nvidia.com/cuda-toolkit'
description = """CUDA (formerly Compute Unified Device Architecture) is a parallel
computing platform and programming model created by NVIDIA and implemented by the
graphics processing units (GPUs) that they produce. CUDA gives developers access
to the virtual instruction set and memory of the parallel computational elements in CUDA GPUs."""

toolchain = {'name': 'iccifort', 'version': '2020.1.217'}

dependencies = [('CUDAcore', '11.0.2', '', True)]

altroot = 'CUDAcore'

moduleclass = 'system'
25 changes: 25 additions & 0 deletions easybuild/easyconfigs/c/CUDAcore/CUDAcore-11.0.2.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
easyblock = "EB_CUDA"
name = 'CUDAcore'
version = '11.0.2'
local_nv_version = '450.51.05'
boegel marked this conversation as resolved.
Show resolved Hide resolved

homepage = 'https://developer.nvidia.com/cuda-toolkit'
description = """CUDA (formerly Compute Unified Device Architecture) is a parallel
computing platform and programming model created by NVIDIA and implemented by the
graphics processing units (GPUs) that they produce. CUDA gives developers access
to the virtual instruction set and memory of the parallel computational elements in CUDA GPUs."""

toolchain = SYSTEM

source_urls = ['https://developer.download.nvidia.com/compute/cuda/%(version)s/local_installers/']
sources = ['cuda_%%(version)s_%s_linux%%(cudaarch)s.run' % local_nv_version]
checksums = [
{
'cuda_%%(version)s_%s_linux.run' % local_nv_version:
'48247ada0e3f106051029ae8f70fbd0c238040f58b0880e55026374a959a69c1',
'cuda_%%(version)s_%s_linux_ppc64le.run' % local_nv_version:
'db06d0f3fbf6f7aa1f106fc921ad1c86162210a26e8cb65b171c5240a3bf75da',
}
]

moduleclass = 'system'
37 changes: 37 additions & 0 deletions easybuild/easyconfigs/c/Check/Check-0.15.2-GCCcore-9.3.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
easyblock = 'ConfigureMake'

name = 'Check'
version = '0.15.2'

homepage = 'https://libcheck.github.io/check/'
description = """
Check is a unit testing framework for C. It features a simple interface for
defining unit tests, putting little in the way of the developer. Tests are
run in a separate address space, so both assertion failures and code errors
that cause segmentation faults or other signals can be caught. Test results
are reportable in the following: Subunit, TAP, XML, and a generic logging
format."""

toolchain = {'name': 'GCCcore', 'version': '9.3.0'}
toolchainopts = {'pic': True}

github_account = 'libcheck'
source_urls = [GITHUB_LOWER_SOURCE]
sources = ['%(version)s.tar.gz']
checksums = ['998d355294bb94072f40584272cf4424571c396c631620ce463f6ea97aa67d2e']

builddependencies = [
('binutils', '2.34'),
('Autotools', '20180311'),
('pkg-config', '0.29.2'),
]

preconfigopts = "autoreconf -f -i && "
configopts = "--disable-build-docs"

sanity_check_paths = {
'files': ['bin/checkmk', 'lib/libcheck.a', 'lib/libcheck.%s' % SHLIB_EXT],
'dirs': ['include', 'share']
}

moduleclass = 'lib'
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
easyblock = 'ConfigureMake'

name = 'GDRCopy'
version = '2.1'
local_cudaversion = '11.0.2'
versionsuffix = '-CUDA-%s' % local_cudaversion

homepage = 'https://github.com/NVIDIA/gdrcopy'
description = "A low-latency GPU memory copy library based on NVIDIA GPUDirect RDMA technology."

toolchain = {'name': 'GCCcore', 'version': '9.3.0'}
toolchainopts = {'pic': True}

github_account = 'NVIDIA'
source_urls = [GITHUB_SOURCE]
sources = ['%(version)s.tar.gz']
checksums = ['cecc7dcc071107f77396f5553c9109790b6d2298ae29eb2dbbdd52b2a213e4ea']

builddependencies = [
('binutils', '2.34'),
('Autotools', '20180311'),
('pkg-config', '0.29.2'),
]

dependencies = [
('CUDAcore', local_cudaversion, '', True),
('Check', '0.15.2'),
]

# This easyconfig only installs the library and binaries of GDRCopy. Please
# keep in mind that GDRCopy also needs the following kernel modules at runtime:
#
# 1. Kernel module for GDRCopy: improves Host to GPU communication
# https://github.com/NVIDIA/gdrcopy
# RPM: 'gdrcopy-kmod', DEB: 'gdrdrv-dkms'
# Requirements: version of GDRCopy kernel module (gdrdrv.ko) >= 2.0
#
# 2. (optional) Kernel module for GPUDirect RDMA: improves GPU to GPU communication
# https://github.com/Mellanox/nv_peer_memory
# RPM: 'nvidia_peer_memory'
# Requirements: Mellanox HCA with MLNX_OFED 2.1
#
# These kernel modules are not listed as system dependencies to lower the system
# requirements to build this easyconfig, as they are not needed for the build.

skipsteps = ['configure']

local_envopts = "PREFIX=%(installdir)s CUDA=$EBROOTCUDACORE"
prebuildopts = "PATH=$PATH:/sbin " # ensures that ldconfig is found
buildopts = "config lib exes %s" % local_envopts
installopts = local_envopts

sanity_check_paths = {
'files': ['bin/copybw', 'bin/copylat', 'bin/sanity', 'lib/libgdrapi.%s' % SHLIB_EXT],
'dirs': ['include'],
}

moduleclass = 'lib'
19 changes: 19 additions & 0 deletions easybuild/easyconfigs/g/gcccuda/gcccuda-2020a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
easyblock = "Toolchain"

name = 'gcccuda'
version = '2020a'

homepage = '(none)'
description = """GNU Compiler Collection (GCC) based compiler toolchain, along with CUDA toolkit."""

toolchain = SYSTEM

local_gcc_version = '9.3.0'

# compiler toolchain dependencies
dependencies = [
('GCC', local_gcc_version),
('CUDA', '11.0.2', '', ('GCC', local_gcc_version)),
]

moduleclass = 'toolchain'
21 changes: 21 additions & 0 deletions easybuild/easyconfigs/g/gompic/gompic-2020a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
easyblock = "Toolchain"

name = 'gompic'
version = '2020a'

homepage = '(none)'
description = """GNU Compiler Collection (GCC) based compiler toolchain along with CUDA toolkit,
including OpenMPI for MPI support with CUDA features enabled."""

toolchain = SYSTEM

local_gccver = '9.3.0'

# compiler toolchain dependencies
dependencies = [
('GCC', local_gccver), # part of gcccuda
('CUDA', '11.0.2', '', ('GCC', local_gccver)), # part of gcccuda
('OpenMPI', '4.0.3', '', ('gcccuda', version)),
]

moduleclass = 'toolchain'
18 changes: 18 additions & 0 deletions easybuild/easyconfigs/i/iccifortcuda/iccifortcuda-2020a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
easyblock = 'Toolchain'

name = 'iccifortcuda'
version = '2020a'

homepage = '(none)'
description = "Intel C, C++ & Fortran compilers with CUDA toolkit"

toolchain = SYSTEM

local_compver = '2020.1.217'

dependencies = [
('iccifort', local_compver),
('CUDA', '11.0.2', '', ('iccifort', local_compver)),
]

moduleclass = 'toolchain'
22 changes: 22 additions & 0 deletions easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.0.3-gcccuda-2020a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name = 'OpenMPI'
version = '4.0.3'

homepage = 'https://www.open-mpi.org/'
description = """The Open MPI Project is an open source MPI-3 implementation."""

toolchain = {'name': 'gcccuda', 'version': '2020a'}

source_urls = ['https://www.open-mpi.org/software/ompi/v%(version_major_minor)s/downloads']
sources = [SOURCELOWER_TAR_GZ]
checksums = ['6346bf976001ad274c7e018d6cc35c92bbb9426d8f7754fac00a17ea5ac8eebc']

dependencies = [
('zlib', '1.2.11'),
('hwloc', '2.2.0'),
('UCX', '1.8.0', '-CUDA-11.0.2'),
]

# to enable SLURM integration (site-specific)
# configopts = '--with-slurm --with-pmi=/usr/include/slurm --with-pmi-libdir=/usr'

moduleclass = 'mpi'
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name = 'OpenMPI'
version = '4.0.3'

homepage = 'https://www.open-mpi.org/'
description = """The Open MPI Project is an open source MPI-3 implementation."""

toolchain = {'name': 'iccifortcuda', 'version': '2020a'}

source_urls = ['https://www.open-mpi.org/software/ompi/v%(version_major_minor)s/downloads']
sources = [SOURCELOWER_TAR_GZ]
checksums = ['6346bf976001ad274c7e018d6cc35c92bbb9426d8f7754fac00a17ea5ac8eebc']

dependencies = [
('zlib', '1.2.11'),
('hwloc', '2.2.0'),
('UCX', '1.8.0', '-CUDA-11.0.2'),
]

# to enable SLURM integration (site-specific)
# configopts = '--with-slurm --with-pmi=/usr/include/slurm --with-pmi-libdir=/usr'

moduleclass = 'mpi'
63 changes: 63 additions & 0 deletions easybuild/easyconfigs/u/UCX/UCX-1.8.0-GCCcore-9.3.0-CUDA-11.0.2.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Note:
# This is an easyconfig file for EasyBuild, see https://github.com/easybuilders/easybuild
easyblock = 'ConfigureMake'

name = 'UCX'
version = '1.8.0'
boegel marked this conversation as resolved.
Show resolved Hide resolved
local_cudaversion = '11.0.2'
versionsuffix = '-CUDA-%s' % local_cudaversion

homepage = 'http://www.openucx.org/'
description = """Unified Communication X
An open-source production grade communication framework for data centric
and high-performance applications
"""

toolchain = {'name': 'GCCcore', 'version': '9.3.0'}
toolchainopts = {'pic': True}

source_urls = ['https://github.com/openucx/ucx/releases/download/v%(version)s']
sources = ['%(namelower)s-%(version)s.tar.gz']
patches = [
'UCX-1.7.0_binutils_2.34_api_fix.patch',
'UCX-1.8.0_fix-undefined-symbol.patch',
]
checksums = [
'e400f7aa5354971c8f5ac6b881dc2846143851df868088c37d432c076445628d', # ucx-1.8.0.tar.gz
'c09ebe4d734d520ae23f56d95ba0b91e464a42ccbaf435675424515ebd3fa3a9', # UCX-1.7.0_binutils_2.34_api_fix.patch
'eb757242ab3eecd8a851f329cb4baf3c64d46788ab61675f29ab4cc6a0274a45', # UCX-1.8.0_fix-undefined-symbol.patch
]

builddependencies = [
('binutils', '2.34'),
('Autotools', '20180311'),
('pkg-config', '0.29.2'),
]

osdependencies = [OS_PKG_OPENSSL_DEV]

dependencies = [
('numactl', '2.0.13'),
('CUDAcore', local_cudaversion, '', True),
('GDRCopy', '2.1', versionsuffix),
]

# CUDA_CFLAGS set by EB toolchain but also used differently in UCX makefiles
# unset LIBS from EB toolchain to avoid unconditional linking to libcudart:
# it only needs to be linked by the CUDA run-time plugins
preconfigopts = 'autoreconf && unset CUDA_CFLAGS && unset LIBS && '
configopts = '--enable-optimizations --enable-cma --enable-mt --with-verbs '
configopts += '--without-java --disable-doxygen-doc '
configopts += '--with-cuda=$EBROOTCUDACORE --with-gdrcopy=$EBROOTGDRCOPY '

prebuildopts = 'unset CUDA_CFLAGS && unset LIBS && '
buildopts = 'V=1'

sanity_check_paths = {
'files': ['bin/ucx_info', 'bin/ucx_perftest', 'bin/ucx_read_profile'],
'dirs': ['include', 'lib', 'share']
}

sanity_check_commands = ["ucx_info -d"]

moduleclass = 'lib'