Skip to content

Commit

Permalink
add CUDA provider
Browse files Browse the repository at this point in the history
  • Loading branch information
bratpiorka committed Aug 9, 2024
1 parent 3040c31 commit 12c6784
Show file tree
Hide file tree
Showing 27 changed files with 935 additions and 18 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/basic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ jobs:
compiler: [{c: gcc, cxx: g++}]
shared_library: ['OFF']
level_zero_provider: ['ON']
cuda_provider: ['ON']
install_tbb: ['ON']
disable_hwloc: ['OFF']
link_hwloc_statically: ['OFF']
Expand All @@ -31,58 +32,67 @@ jobs:
compiler: {c: gcc-7, cxx: g++-7}
shared_library: 'OFF'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
- os: 'ubuntu-22.04'
build_type: Release
compiler: {c: clang, cxx: clang++}
shared_library: 'OFF'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
- os: 'ubuntu-22.04'
build_type: Release
compiler: {c: gcc, cxx: g++}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
- os: 'ubuntu-22.04'
build_type: Debug
compiler: {c: gcc, cxx: g++}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
# test level_zero_provider='OFF'
- os: 'ubuntu-22.04'
build_type: Release
compiler: {c: gcc, cxx: g++}
shared_library: 'OFF'
level_zero_provider: 'OFF'
cuda_provider: 'OFF'
install_tbb: 'ON'
# test icx compiler
- os: 'ubuntu-22.04'
build_type: Release
compiler: {c: icx, cxx: icpx}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
# test without installing TBB
- os: 'ubuntu-22.04'
build_type: Release
compiler: {c: gcc, cxx: g++}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'OFF'
- os: 'ubuntu-22.04'
build_type: Debug
compiler: {c: gcc, cxx: g++}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
disable_hwloc: 'ON'
- os: 'ubuntu-22.04'
build_type: Release
compiler: {c: gcc, cxx: g++}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
install_tbb: 'ON'
link_hwloc_statically: 'ON'
runs-on: ${{matrix.os}}
Expand Down Expand Up @@ -133,6 +143,7 @@ jobs:
-DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}}
-DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}}
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
Expand Down Expand Up @@ -177,23 +188,27 @@ jobs:
compiler: [{c: cl, cxx: cl}]
shared_library: ['ON', 'OFF']
level_zero_provider: ['ON']
cuda_provider: ['ON']
include:
- os: 'windows-2022'
build_type: Release
compiler: {c: clang-cl, cxx: clang-cl}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
toolset: "-T ClangCL"
- os: 'windows-2022'
build_type: Release
compiler: {c: cl, cxx: cl}
shared_library: 'ON'
level_zero_provider: 'ON'
cuda_provider: 'ON'
- os: 'windows-2022'
build_type: Release
compiler: {c: cl, cxx: cl}
shared_library: 'ON'
level_zero_provider: 'OFF'
cuda_provider: 'OFF'

runs-on: ${{matrix.os}}

Expand Down Expand Up @@ -229,6 +244,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}}
-DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}}
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Build UMF
Expand Down Expand Up @@ -287,6 +303,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON
-DUMF_LINK_HWLOC_STATICALLY=ON
Expand Down Expand Up @@ -329,6 +346,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON
-DUMF_LINK_HWLOC_STATICALLY=ON
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ jobs:
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=OFF
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ jobs:
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Build
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/fast.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ jobs:
-DUMF_BUILD_TESTS=${{matrix.build_tests}}
-DUMF_BUILD_EXAMPLES=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON
-DUMF_BUILD_SHARED_LIBRARY=ON
${{matrix.extra_build_options}}
Expand Down
68 changes: 67 additions & 1 deletion .github/workflows/gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ env:
INSTL_DIR : "${{github.workspace}}/../install-dir"

jobs:
gpu:
gpu-L0:
name: Build
env:
BUILD_TYPE: Release
Expand Down Expand Up @@ -63,6 +63,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Configure build for Ubuntu
Expand All @@ -84,6 +85,71 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Build UMF
run: cmake --build ${{env.BUILD_DIR}} --config ${{env.BUILD_TYPE}} -j ${{matrix.number_of_processors}}

- name: Run tests
working-directory: ${{env.BUILD_DIR}}
run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure --test-dir test

- name: Run examples
working-directory: ${{env.BUILD_DIR}}
run: ctest --output-on-failure --test-dir examples -C ${{env.BUILD_TYPE}}

- name: Run benchmarks
working-directory: ${{env.BUILD_DIR}}
run: ctest --output-on-failure --test-dir benchmark -C ${{env.BUILD_TYPE}} --exclude-regex umf-bench-multithreaded

gpu-CUDA:
name: Build
env:
BUILD_TYPE: Release
# run only on upstream; forks will not have the HW
if: github.repository == 'oneapi-src/unified-memory-framework'
strategy:
matrix:
shared_library: ['ON', 'OFF']
# TODO add windows
os: ['Ubuntu']
include:
- os: 'Ubuntu'
compiler: {c: gcc, cxx: g++}
number_of_processors: '$(nproc)'

runs-on: ["DSS-CUDA", "DSS-${{matrix.os}}"]
steps:
- name: Checkout
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
fetch-depth: 0

- name: Get information about platform
if: matrix.os == 'Ubuntu'
run: .github/scripts/get_system_info.sh

- name: Configure build for Ubuntu
if: matrix.os == 'Ubuntu'
run: >
cmake
-B ${{env.BUILD_DIR}}
-DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}"
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
-DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}}
-DUMF_BUILD_BENCHMARKS=ON
-DUMF_BUILD_TESTS=ON
-DUMF_BUILD_GPU_TESTS=ON
-DUMF_BUILD_GPU_EXAMPLES=ON
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Build UMF
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_USE_VALGRIND=1
-DUMF_TESTS_FAIL_ON_SKIP=ON
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pr_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
-DUMF_FORMAT_CODE_STYLE=ON
-DUMF_BUILD_TESTS=OFF
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF
- name: Check C/C++ formatting
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/sanitizers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ jobs:
-DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=ON
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
Expand Down Expand Up @@ -130,6 +131,7 @@ jobs:
-DUMF_USE_ASAN=${{matrix.sanitizers.asan}}
-DUMF_BUILD_EXAMPLES=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_TESTS_FAIL_ON_SKIP=ON
- name: Build UMF
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/valgrind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ jobs:
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_USE_VALGRIND=1
-DUMF_TESTS_FAIL_ON_SKIP=ON
Expand Down
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ find_package(PkgConfig)
# Build Options
option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF)
option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON)
option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON)
option(UMF_BUILD_LIBUMF_POOL_DISJOINT
"Build the libumf_pool_disjoint static library" OFF)
option(UMF_BUILD_LIBUMF_POOL_JEMALLOC
Expand Down Expand Up @@ -399,6 +400,11 @@ if(UMF_BUILD_LEVEL_ZERO_PROVIDER)
add_optional_symbol(umfLevelZeroMemoryProviderOps)
endif()

# Conditional configuration for CUDA provider
if(UMF_BUILD_CUDA_PROVIDER)
add_optional_symbol(umfCUDAMemoryProviderOps)
endif()

if(NOT UMF_DISABLE_HWLOC)
add_optional_symbol(umfOsMemoryProviderOps)
endif()
Expand Down
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ For a quick introduction to UMF usage, please see
documentation, which includes the code of the
[basic example](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/basic.c)
and the more advanced one that allocates
[USM memory from the GPU device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/gpu_shared_memory.c)
[USM memory from the Level Zero device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/level_zero_shared_memory/level_zero_shared_memory.c)
using the Level Zero API and UMF Level Zero memory provider.

## Build
Expand Down Expand Up @@ -101,6 +101,7 @@ List of options provided by CMake:
| - | - | - | - |
| UMF_BUILD_SHARED_LIBRARY | Build UMF as shared library | ON/OFF | OFF |
| UMF_BUILD_LEVEL_ZERO_PROVIDER | Build Level Zero memory provider | ON/OFF | ON |
| UMF_BUILD_CUDA_PROVIDER | Build CUDA memory provider | ON/OFF | ON |
| UMF_BUILD_LIBUMF_POOL_DISJOINT | Build the libumf_pool_disjoint static library | ON/OFF | OFF |
| UMF_BUILD_LIBUMF_POOL_JEMALLOC | Build the libumf_pool_jemalloc static library | ON/OFF | OFF |
| UMF_BUILD_TESTS | Build UMF tests | ON/OFF | ON |
Expand Down Expand Up @@ -172,6 +173,22 @@ Additionally, required for tests:
5) Required packages:
- liblevel-zero-dev (Linux) or level-zero-sdk (Windows)

#### CUDA memory provider

A memory provider that provides memory from CUDA device.

##### Requirements

1) Linux or Windows OS
2) The `UMF_BUILD_CUDA_PROVIDER` option turned `ON` (by default)

Additionally, required for tests:

3) The `UMF_BUILD_GPU_TESTS` option turned `ON`
4) System with CUDA compatible GPU
5) Required packages:
- nvidia-cuda-dev (Linux) or cuda-sdk (Windows)

### Memory pool managers

#### Proxy pool (part of libumf)
Expand Down
4 changes: 4 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ function(add_umf_benchmark)
target_compile_definitions(${BENCH_NAME}
PRIVATE UMF_BUILD_LEVEL_ZERO_PROVIDER=1)
endif()
if(UMF_BUILD_CUDA_PROVIDER)
target_compile_definitions(${BENCH_NAME}
PRIVATE UMF_BUILD_CUDA_PROVIDER=1)
endif()
if(UMF_BUILD_GPU_TESTS)
target_compile_definitions(${BENCH_NAME} PRIVATE UMF_BUILD_GPU_TESTS=1)
endif()
Expand Down
2 changes: 2 additions & 0 deletions benchmark/ubench.c
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,8 @@ UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) {
}
#endif /* (defined UMF_BUILD_LIBUMF_POOL_DISJOINT && defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */

// TODO add IPC benchmark for CUDA

UBENCH_MAIN()

#if defined(_MSC_VER)
Expand Down
6 changes: 4 additions & 2 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,16 @@ if(UMF_POOL_SCALABLE_ENABLED)
endif()
endif()

# TODO add CUDA examples

if(UMF_BUILD_GPU_EXAMPLES
AND UMF_BUILD_LIBUMF_POOL_DISJOINT
AND UMF_BUILD_LEVEL_ZERO_PROVIDER)
set(EXAMPLE_NAME umf_example_gpu_shared_memory)
set(EXAMPLE_NAME umf_example_level_zero_shared_memory)

add_umf_executable(
NAME ${EXAMPLE_NAME}
SRCS gpu_shared_memory/gpu_shared_memory.c
SRCS level_zero_shared_memory/level_zero_shared_memory.c
LIBS disjoint_pool ze_loader umf)

target_include_directories(
Expand Down
Loading

0 comments on commit 12c6784

Please sign in to comment.