From 4c760c88bee6a12863aa39cb133346b61c671946 Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Fri, 9 Aug 2024 07:47:13 -0700 Subject: [PATCH 1/6] added threading build back Signed-off-by: Phuong Nguyen --- transformer_engine/common/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/transformer_engine/common/CMakeLists.txt b/transformer_engine/common/CMakeLists.txt index b814ef5974..855a50d1aa 100644 --- a/transformer_engine/common/CMakeLists.txt +++ b/transformer_engine/common/CMakeLists.txt @@ -14,6 +14,12 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON) project(transformer_engine LANGUAGES CUDA CXX) +if(NOT DEFINED NVTE_MAX_BUILD_THREADS) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads 4") +else() + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads ${NVTE_MAX_BUILD_THREADS}") +endif() + if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -G") endif() From a90721d0cbe882d91416727a0b7df4d9fbbbb15c Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Fri, 9 Aug 2024 15:32:27 -0700 Subject: [PATCH 2/6] add options for pytorch and paddle extensions Signed-off-by: Phuong Nguyen --- build_tools/paddle.py | 3 ++- build_tools/pytorch.py | 2 +- transformer_engine/common/CMakeLists.txt | 5 ++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/build_tools/paddle.py b/build_tools/paddle.py index 163f094fce..b823450f44 100644 --- a/build_tools/paddle.py +++ b/build_tools/paddle.py @@ -6,6 +6,7 @@ from pathlib import Path import setuptools +import os from .utils import cuda_version @@ -62,7 +63,7 @@ def setup_paddle_extension( print("Could not determine CUDA Toolkit version") else: if version >= (11, 2): - nvcc_flags.extend(["--threads", "4"]) + nvcc_flags.extend(["--threads", os.getenv("NVTE_MAX_BUILD_THREADS", "1")]) if version >= (11, 0): nvcc_flags.extend(["-gencode", "arch=compute_80,code=sm_80"]) if version >= (11, 8): diff --git a/build_tools/pytorch.py b/build_tools/pytorch.py index e423ffe907..327f849528 100644 --- a/build_tools/pytorch.py +++ b/build_tools/pytorch.py @@ -68,7 +68,7 @@ def setup_pytorch_extension( print("Could not determine CUDA Toolkit version") else: if version >= (11, 2): - nvcc_flags.extend(["--threads", "4"]) + nvcc_flags.extend(["--threads", os.getenv("NVTE_MAX_BUILD_THREADS", "1")]) if version >= (11, 0): nvcc_flags.extend(["-gencode", "arch=compute_80,code=sm_80"]) if version >= (11, 8): diff --git a/transformer_engine/common/CMakeLists.txt b/transformer_engine/common/CMakeLists.txt index 855a50d1aa..1b97c28b16 100644 --- a/transformer_engine/common/CMakeLists.txt +++ b/transformer_engine/common/CMakeLists.txt @@ -15,10 +15,9 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON) project(transformer_engine LANGUAGES CUDA CXX) if(NOT DEFINED NVTE_MAX_BUILD_THREADS) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads 4") -else() - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads ${NVTE_MAX_BUILD_THREADS}") + set(NVTE_MAX_BUILD_THREADS "1") endif() +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads ${NVTE_MAX_BUILD_THREADS}") if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -G") From 81fea661268106777b20fc6effec74dd30a4b1d3 Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Fri, 9 Aug 2024 17:01:58 -0700 Subject: [PATCH 3/6] added messages Signed-off-by: Phuong Nguyen --- transformer_engine/common/CMakeLists.txt | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/transformer_engine/common/CMakeLists.txt b/transformer_engine/common/CMakeLists.txt index 1b97c28b16..4e727e888e 100644 --- a/transformer_engine/common/CMakeLists.txt +++ b/transformer_engine/common/CMakeLists.txt @@ -14,10 +14,20 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON) project(transformer_engine LANGUAGES CUDA CXX) -if(NOT DEFINED NVTE_MAX_BUILD_THREADS) - set(NVTE_MAX_BUILD_THREADS "1") +if(NOT DEFINED ENV{NVTE_MAX_BUILD_THREADS}) + set(ENV{NVTE_MAX_BUILD_THREADS} "1") endif() -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads ${NVTE_MAX_BUILD_THREADS}") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads $ENV{NVTE_MAX_BUILD_THREADS}") + +if(DEFINED ENV{MAX_JOBS}) + set(JOBS $ENV{MAX_JOBS}) +elseif(DEFINED ENV{NVTE_MAX_BUILD_JOBS}) + set(JOBS $ENV{NVTE_MAX_BUILD_JOBS}) +else() + set(JOBS "max number of") +endif() + +message(STATUS "Parallel build with ${JOBS} jobs and $ENV{NVTE_MAX_BUILD_THREADS} threads per job") if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -G") From 5f71ee731ae5baf7b4811d446e883aeeb86023c8 Mon Sep 17 00:00:00 2001 From: Phuong Nguyen <36155692+phu0ngng@users.noreply.github.com> Date: Sat, 10 Aug 2024 07:57:53 -0700 Subject: [PATCH 4/6] Update transformer_engine/common/CMakeLists.txt Co-authored-by: Tim Moon <4406448+timmoon10@users.noreply.github.com> Signed-off-by: Phuong Nguyen <36155692+phu0ngng@users.noreply.github.com> --- transformer_engine/common/CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/transformer_engine/common/CMakeLists.txt b/transformer_engine/common/CMakeLists.txt index 4e727e888e..8c2f697218 100644 --- a/transformer_engine/common/CMakeLists.txt +++ b/transformer_engine/common/CMakeLists.txt @@ -14,10 +14,11 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON) project(transformer_engine LANGUAGES CUDA CXX) -if(NOT DEFINED ENV{NVTE_MAX_BUILD_THREADS}) - set(ENV{NVTE_MAX_BUILD_THREADS} "1") +set(BUILD_THREADS_PER_JOB $ENV{NVTE_BUILD_THREADS_PER_JOB}) +if (NOT BUILD_THREADS_PER_JOB) + set(BUILD_THREADS_PER_JOB 1) endif() -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads $ENV{NVTE_MAX_BUILD_THREADS}") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads ${BUILD_THREADS_PER_JOB}") if(DEFINED ENV{MAX_JOBS}) set(JOBS $ENV{MAX_JOBS}) From 8bc6bd2cd6d54fbc36840f3df622729f5d54fed8 Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Mon, 12 Aug 2024 14:10:27 -0700 Subject: [PATCH 5/6] renaming Signed-off-by: Phuong Nguyen --- build_tools/utils.py | 4 ++-- transformer_engine/common/CMakeLists.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/build_tools/utils.py b/build_tools/utils.py index 3230ad35bf..a0837c1c04 100644 --- a/build_tools/utils.py +++ b/build_tools/utils.py @@ -37,8 +37,8 @@ def get_max_jobs_for_parallel_build() -> int: num_jobs = 0 # Check environment variable - if os.getenv("NVTE_MAX_BUILD_JOBS"): - num_jobs = int(os.getenv("NVTE_MAX_BUILD_JOBS")) + if os.getenv("NVTE_BUILD_MAX_JOBS"): + num_jobs = int(os.getenv("NVTE_BUILD_MAX_JOBS")) elif os.getenv("MAX_JOBS"): num_jobs = int(os.getenv("MAX_JOBS")) diff --git a/transformer_engine/common/CMakeLists.txt b/transformer_engine/common/CMakeLists.txt index 8c2f697218..048e7fd61a 100644 --- a/transformer_engine/common/CMakeLists.txt +++ b/transformer_engine/common/CMakeLists.txt @@ -22,13 +22,13 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads ${BUILD_THREADS_PER_JOB}") if(DEFINED ENV{MAX_JOBS}) set(JOBS $ENV{MAX_JOBS}) -elseif(DEFINED ENV{NVTE_MAX_BUILD_JOBS}) - set(JOBS $ENV{NVTE_MAX_BUILD_JOBS}) +elseif(DEFINED ENV{NVTE_BUILD_MAX_JOBS}) + set(JOBS $ENV{NVTE_BUILD_MAX_JOBS}) else() set(JOBS "max number of") endif() -message(STATUS "Parallel build with ${JOBS} jobs and $ENV{NVTE_MAX_BUILD_THREADS} threads per job") +message(STATUS "Parallel build with ${JOBS} jobs and ${BUILD_THREADS_PER_JOB} threads per job") if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -G") From e5e44e5fe408ab56f98bcaa02fd8d89211d99d6f Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Mon, 12 Aug 2024 14:12:13 -0700 Subject: [PATCH 6/6] renaming Signed-off-by: Phuong Nguyen --- build_tools/paddle.py | 2 +- build_tools/pytorch.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/build_tools/paddle.py b/build_tools/paddle.py index b823450f44..f3140cf028 100644 --- a/build_tools/paddle.py +++ b/build_tools/paddle.py @@ -63,7 +63,7 @@ def setup_paddle_extension( print("Could not determine CUDA Toolkit version") else: if version >= (11, 2): - nvcc_flags.extend(["--threads", os.getenv("NVTE_MAX_BUILD_THREADS", "1")]) + nvcc_flags.extend(["--threads", os.getenv("NVTE_BUILD_THREADS_PER_JOB", "1")]) if version >= (11, 0): nvcc_flags.extend(["-gencode", "arch=compute_80,code=sm_80"]) if version >= (11, 8): diff --git a/build_tools/pytorch.py b/build_tools/pytorch.py index 327f849528..9b858653de 100644 --- a/build_tools/pytorch.py +++ b/build_tools/pytorch.py @@ -68,7 +68,7 @@ def setup_pytorch_extension( print("Could not determine CUDA Toolkit version") else: if version >= (11, 2): - nvcc_flags.extend(["--threads", os.getenv("NVTE_MAX_BUILD_THREADS", "1")]) + nvcc_flags.extend(["--threads", os.getenv("NVTE_BUILD_THREADS_PER_JOB", "1")]) if version >= (11, 0): nvcc_flags.extend(["-gencode", "arch=compute_80,code=sm_80"]) if version >= (11, 8):