Skip to content

Commit

Permalink
add CUDA provider
Browse files Browse the repository at this point in the history
  • Loading branch information
bratpiorka committed Sep 16, 2024
1 parent 9f03d6b commit 460c47f
Show file tree
Hide file tree
Showing 30 changed files with 1,406 additions and 33 deletions.
20 changes: 19 additions & 1 deletion .github/workflows/basic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ jobs:
compiler: [{c: gcc, cxx: g++}]
shared_library: ['OFF']
level_zero_provider: ['ON']
cuda_provider: ['ON']
install_tbb: ['ON']
disable_hwloc: ['OFF']
link_hwloc_statically: ['OFF']
Expand All @@ -31,6 +32,7 @@ jobs:
compiler: {c: gcc-7, cxx: g++-7}
shared_library: 'OFF'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
disable_hwloc: 'OFF'
link_hwloc_statically: 'OFF'
Expand All @@ -39,6 +41,7 @@ jobs:
compiler: {c: clang, cxx: clang++}
shared_library: 'OFF'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
disable_hwloc: 'OFF'
link_hwloc_statically: 'OFF'
Expand All @@ -47,6 +50,7 @@ jobs:
compiler: {c: gcc, cxx: g++}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
disable_hwloc: 'OFF'
link_hwloc_statically: 'OFF'
Expand All @@ -55,15 +59,17 @@ jobs:
compiler: {c: gcc, cxx: g++}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
disable_hwloc: 'OFF'
link_hwloc_statically: 'OFF'
# test level_zero_provider='OFF'
# test level_zero_provider='OFF' and cuda_provider='OFF'
- os: 'ubuntu-22.04'
build_type: Release
compiler: {c: gcc, cxx: g++}
shared_library: 'OFF'
level_zero_provider: 'OFF'
cuda_provider: 'OFF'
install_tbb: 'ON'
disable_hwloc: 'OFF'
link_hwloc_statically: 'OFF'
Expand All @@ -73,6 +79,7 @@ jobs:
compiler: {c: icx, cxx: icpx}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
disable_hwloc: 'OFF'
link_hwloc_statically: 'OFF'
Expand All @@ -82,6 +89,7 @@ jobs:
compiler: {c: gcc, cxx: g++}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'OFF'
disable_hwloc: 'OFF'
link_hwloc_statically: 'OFF'
Expand All @@ -90,6 +98,7 @@ jobs:
compiler: {c: gcc, cxx: g++}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
disable_hwloc: 'ON'
link_hwloc_statically: 'OFF'
Expand All @@ -98,6 +107,7 @@ jobs:
compiler: {c: gcc, cxx: g++}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
disable_hwloc: 'OFF'
link_hwloc_statically: 'ON'
Expand Down Expand Up @@ -149,6 +159,7 @@ jobs:
-DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}}
-DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}}
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
Expand Down Expand Up @@ -195,23 +206,27 @@ jobs:
compiler: [{c: cl, cxx: cl}]
shared_library: ['ON', 'OFF']
level_zero_provider: ['ON']
cuda_provider: ['ON']
include:
- os: 'windows-2022'
build_type: Release
compiler: {c: clang-cl, cxx: clang-cl}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
toolset: "-T ClangCL"
- os: 'windows-2022'
build_type: Release
compiler: {c: cl, cxx: cl}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
- os: 'windows-2022'
build_type: Release
compiler: {c: cl, cxx: cl}
shared_library: 'ON'
level_zero_provider: 'OFF'
cuda_provider: 'OFF'

runs-on: ${{matrix.os}}

Expand Down Expand Up @@ -247,6 +262,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}}
-DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}}
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Build UMF
Expand Down Expand Up @@ -305,6 +321,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON
-DUMF_LINK_HWLOC_STATICALLY=ON
Expand Down Expand Up @@ -347,6 +364,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON
-DUMF_LINK_HWLOC_STATICALLY=ON
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ jobs:
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=OFF
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ jobs:
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Build
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/fast.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ jobs:
-DUMF_BUILD_TESTS=${{matrix.build_tests}}
-DUMF_BUILD_EXAMPLES=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON
-DUMF_BUILD_SHARED_LIBRARY=ON
${{matrix.extra_build_options}}
Expand Down
69 changes: 66 additions & 3 deletions .github/workflows/gpu.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# This workflow builds and tests providers using GPU memory. It requires
# "level_zero" labeled self-hosted runners installed on systems with the
# appropriate GPU and drivers.
name: GPU

on: [workflow_call]
Expand Down Expand Up @@ -63,6 +60,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Configure build for Ubuntu
Expand All @@ -84,9 +82,11 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Build UMF
if: matrix.os == 'Ubuntu'
run: cmake --build ${{env.BUILD_DIR}} --config ${{env.BUILD_TYPE}} -j ${{matrix.number_of_processors}}

- name: Run tests
Expand All @@ -100,3 +100,66 @@ jobs:
- name: Run benchmarks
working-directory: ${{env.BUILD_DIR}}
run: ctest --output-on-failure --test-dir benchmark -C ${{env.BUILD_TYPE}} --exclude-regex umf-bench-multithreaded

gpu-CUDA:
name: Build
env:
BUILD_TYPE: Release
# run only on upstream; forks will not have the HW
# if: github.repository == 'oneapi-src/unified-memory-framework'
strategy:
matrix:
shared_library: ['ON', 'OFF']
# TODO add windows
os: ['Ubuntu']
include:
- os: 'Ubuntu'
compiler: {c: gcc, cxx: g++}
number_of_processors: '$(nproc)'

runs-on: ["DSS-CUDA", "DSS-${{matrix.os}}"]
steps:
- name: Checkout
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
fetch-depth: 0

- name: Get information about platform
if: matrix.os == 'Ubuntu'
run: .github/scripts/get_system_info.sh

- name: Configure build for Ubuntu
if: matrix.os == 'Ubuntu'
run: >
cmake -B ${{env.BUILD_DIR}}
-DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}"
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
-DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}}
-DUMF_BUILD_BENCHMARKS=ON
-DUMF_BUILD_TESTS=ON
-DUMF_BUILD_GPU_TESTS=ON
-DUMF_BUILD_GPU_EXAMPLES=ON
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Build UMF
run: cmake --build ${{env.BUILD_DIR}} --config ${{env.BUILD_TYPE}} -j ${{matrix.number_of_processors}}

- name: Run tests
working-directory: ${{env.BUILD_DIR}}
run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure --test-dir test

- name: Run examples
working-directory: ${{env.BUILD_DIR}}
run: ctest --output-on-failure --test-dir examples -C ${{env.BUILD_TYPE}}

- name: Run benchmarks
working-directory: ${{env.BUILD_DIR}}
run: ctest --output-on-failure --test-dir benchmark -C ${{env.BUILD_TYPE}} --exclude-regex umf-bench-multithreaded
1 change: 1 addition & 0 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_USE_VALGRIND=1
-DUMF_TESTS_FAIL_ON_SKIP=ON
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pr_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
-DUMF_FORMAT_CODE_STYLE=ON
-DUMF_BUILD_TESTS=OFF
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF
- name: Check C/C++ formatting
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/sanitizers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ jobs:
-DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
Expand Down Expand Up @@ -132,6 +133,7 @@ jobs:
-DUMF_USE_ASAN=${{matrix.sanitizers.asan}}
-DUMF_BUILD_EXAMPLES=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Build UMF
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/valgrind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_USE_VALGRIND=1
-DUMF_TESTS_FAIL_ON_SKIP=ON
Expand Down
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ find_package(PkgConfig)
# Build Options
option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF)
option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON)
option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON)
option(UMF_BUILD_LIBUMF_POOL_DISJOINT
"Build the libumf_pool_disjoint static library" OFF)
option(UMF_BUILD_LIBUMF_POOL_JEMALLOC
Expand Down Expand Up @@ -417,6 +418,11 @@ if(UMF_BUILD_LEVEL_ZERO_PROVIDER)
add_optional_symbol(umfLevelZeroMemoryProviderOps)
endif()

# Conditional configuration for CUDA provider
if(UMF_BUILD_CUDA_PROVIDER)
add_optional_symbol(umfCUDAMemoryProviderOps)
endif()

if(NOT UMF_DISABLE_HWLOC)
add_optional_symbol(umfOsMemoryProviderOps)
if(LINUX)
Expand Down
26 changes: 22 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ The Unified Memory Framework (UMF) is a library for constructing allocators and
For a quick introduction to UMF usage, please see
[examples](https://oneapi-src.github.io/unified-memory-framework/examples.html)
documentation, which includes the code of the
[basic example](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/basic.c)
and the more advanced one that allocates
[USM memory from the GPU device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/gpu_shared_memory.c)
using the Level Zero API and UMF Level Zero memory provider.
[basic example](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/basic.c).
The are also more advanced that allocates USM memory from the
[Level Zero device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/level_zero_shared_memory/level_zero_shared_memory.c)
using the Level Zero API and UMF Level Zero memory provider and [CUDA device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/cuda_shared_memory/cuda_shared_memory.c)
using the CUDA API and UMF CUDA memory provider.

## Build

Expand Down Expand Up @@ -101,6 +102,7 @@ List of options provided by CMake:
| - | - | - | - |
| UMF_BUILD_SHARED_LIBRARY | Build UMF as shared library | ON/OFF | OFF |
| UMF_BUILD_LEVEL_ZERO_PROVIDER | Build Level Zero memory provider | ON/OFF | ON |
| UMF_BUILD_CUDA_PROVIDER | Build CUDA memory provider | ON/OFF | ON |
| UMF_BUILD_LIBUMF_POOL_DISJOINT | Build the libumf_pool_disjoint static library | ON/OFF | OFF |
| UMF_BUILD_LIBUMF_POOL_JEMALLOC | Build the libumf_pool_jemalloc static library | ON/OFF | OFF |
| UMF_BUILD_TESTS | Build UMF tests | ON/OFF | ON |
Expand Down Expand Up @@ -188,6 +190,22 @@ with the `disable_provider_free` parameter set to true.
1) Linux OS
2) A character device file /dev/daxX.Y created in the OS.

#### CUDA memory provider

A memory provider that provides memory from CUDA device.

##### Requirements

1) Linux or Windows OS
2) The `UMF_BUILD_CUDA_PROVIDER` option turned `ON` (by default)

Additionally, required for tests:

3) The `UMF_BUILD_GPU_TESTS` option turned `ON`
4) System with CUDA compatible GPU
5) Required packages:
- nvidia-cuda-dev (Linux) or cuda-sdk (Windows)

### Memory pool managers

#### Proxy pool (part of libumf)
Expand Down
7 changes: 6 additions & 1 deletion benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ function(add_umf_benchmark)
target_compile_definitions(${BENCH_NAME}
PRIVATE UMF_BUILD_LEVEL_ZERO_PROVIDER=1)
endif()
if(UMF_BUILD_CUDA_PROVIDER)
target_compile_definitions(${BENCH_NAME}
PRIVATE UMF_BUILD_CUDA_PROVIDER=1)
endif()
if(UMF_BUILD_GPU_TESTS)
target_compile_definitions(${BENCH_NAME} PRIVATE UMF_BUILD_GPU_TESTS=1)
endif()
Expand All @@ -103,8 +107,9 @@ endif()
if(LINUX)
set(LIBS_OPTIONAL ${LIBS_OPTIONAL} m)
endif()
if(UMF_BUILD_GPU_TESTS)
if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER)
set(LIBS_OPTIONAL ${LIBS_OPTIONAL} ze_loader)
# TODO add CUDA
endif()

# BENCHMARKS
Expand Down
Loading

0 comments on commit 460c47f

Please sign in to comment.