Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autodetect for ROCm #2238

Merged
merged 1 commit into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gui.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ else
if [ "$RUNPOD" = false ]; then
if [[ "$@" == *"--use-ipex"* ]]; then
REQUIREMENTS_FILE="$SCRIPT_DIR/requirements_linux_ipex.txt"
elif [[ "$@" == *"--use-rocm"* ]]; then
elif [[ "$@" == *"--use-rocm"* ]] || [ -x "$(command -v rocminfo)" ] || [ -f "/opt/rocm/bin/rocminfo" ]; then
REQUIREMENTS_FILE="$SCRIPT_DIR/requirements_linux_rocm.txt"
else
REQUIREMENTS_FILE="$SCRIPT_DIR/requirements_linux.txt"
Expand Down
2 changes: 1 addition & 1 deletion setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ install_python_dependencies() {
python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_runpod.txt
elif [ "$USE_IPEX" = true ]; then
python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_linux_ipex.txt
elif [ "$USE_ROCM" = true ]; then
elif [ "$USE_ROCM" = true ] || [ -x "$(command -v rocminfo)" ] || [ -f "/opt/rocm/bin/rocminfo" ]; then
python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_linux_rocm.txt
else
python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_linux.txt
Expand Down
51 changes: 24 additions & 27 deletions setup/setup_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@
log.addHandler(rh)


def install_requirements_inbulk(requirements_file, show_stdout=True, optional_parm="", upgrade = False):

Check warning on line 221 in setup/setup_common.py

View workflow job for this annotation

GitHub Actions / build

"parm" should be "param" or "pram" or "parma".
if not os.path.exists(requirements_file):
log.error(f'Could not find the requirements file in {requirements_file}.')
return
Expand All @@ -226,12 +226,12 @@
log.info(f'Installing requirements from {requirements_file}...')

if upgrade:
optional_parm += " -U"

Check warning on line 229 in setup/setup_common.py

View workflow job for this annotation

GitHub Actions / build

"parm" should be "param" or "pram" or "parma".

if show_stdout:
run_cmd(f'pip install -r {requirements_file} {optional_parm}')

Check warning on line 232 in setup/setup_common.py

View workflow job for this annotation

GitHub Actions / build

"parm" should be "param" or "pram" or "parma".
else:
run_cmd(f'pip install -r {requirements_file} {optional_parm} --quiet')

Check warning on line 234 in setup/setup_common.py

View workflow job for this annotation

GitHub Actions / build

"parm" should be "param" or "pram" or "parma".
log.info(f'Requirements from {requirements_file} installed.')


Expand Down Expand Up @@ -330,7 +330,7 @@
# This function was adapted from code written by vladimandic: https://github.com/vladmandic/automatic/commits/master
#

# Check for nVidia toolkit or AMD toolkit
# Check for toolkit
if shutil.which('nvidia-smi') is not None or os.path.exists(
os.path.join(
os.environ.get('SystemRoot') or r'C:\Windows',
Expand All @@ -353,29 +353,18 @@
try:
import torch
try:
# Import IPEX / XPU support
import intel_extension_for_pytorch as ipex
if torch.xpu.is_available():
from library.ipex import ipex_init
ipex_init()
os.environ.setdefault('NEOReadDebugKeys', '1')
os.environ.setdefault('ClDeviceGlobalMemSizeAvailablePercent', '100')
except Exception:
pass
log.info(f'Torch {torch.__version__}')

# Check if CUDA is available
if not torch.cuda.is_available():
log.warning('Torch reports CUDA not available')
else:
if torch.cuda.is_available():
if torch.version.cuda:
if hasattr(torch, "xpu") and torch.xpu.is_available():
# Log Intel IPEX OneAPI version
log.info(f'Torch backend: Intel IPEX OneAPI {ipex.__version__}')
else:
# Log nVidia CUDA and cuDNN versions
log.info(
f'Torch backend: nVidia CUDA {torch.version.cuda} cuDNN {torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else "N/A"}'
)
# Log nVidia CUDA and cuDNN versions
log.info(
f'Torch backend: nVidia CUDA {torch.version.cuda} cuDNN {torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else "N/A"}'
)
elif torch.version.hip:
# Log AMD ROCm HIP version
log.info(f'Torch backend: AMD ROCm HIP {torch.version.hip}')
Expand All @@ -386,15 +375,23 @@
for device in [
torch.cuda.device(i) for i in range(torch.cuda.device_count())
]:
if hasattr(torch, "xpu") and torch.xpu.is_available():
log.info(
f'Torch detected GPU: {torch.xpu.get_device_name(device)} VRAM {round(torch.xpu.get_device_properties(device).total_memory / 1024 / 1024)} Compute Units {torch.xpu.get_device_properties(device).max_compute_units}'
)
else:
log.info(
f'Torch detected GPU: {torch.cuda.get_device_name(device)} VRAM {round(torch.cuda.get_device_properties(device).total_memory / 1024 / 1024)} Arch {torch.cuda.get_device_capability(device)} Cores {torch.cuda.get_device_properties(device).multi_processor_count}'
)
return int(torch.__version__[0])
log.info(
f'Torch detected GPU: {torch.cuda.get_device_name(device)} VRAM {round(torch.cuda.get_device_properties(device).total_memory / 1024 / 1024)} Arch {torch.cuda.get_device_capability(device)} Cores {torch.cuda.get_device_properties(device).multi_processor_count}'
)
# Check if XPU is available
elif hasattr(torch, "xpu") and torch.xpu.is_available():
# Log Intel IPEX version
log.info(f'Torch backend: Intel IPEX {ipex.__version__}')
for device in [
torch.xpu.device(i) for i in range(torch.xpu.device_count())
]:
log.info(
f'Torch detected GPU: {torch.xpu.get_device_name(device)} VRAM {round(torch.xpu.get_device_properties(device).total_memory / 1024 / 1024)} Compute Units {torch.xpu.get_device_properties(device).max_compute_units}'
)
else:
log.warning('Torch reports GPU not available')

return int(torch.__version__[0])
except Exception as e:
# log.warning(f'Could not load torch: {e}')
return 0
Expand Down
2 changes: 1 addition & 1 deletion setup/validate_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
log = setup_logging()

def check_torch():
# Check for nVidia toolkit or AMD toolkit
# Check for toolkit
if shutil.which('nvidia-smi') is not None or os.path.exists(
os.path.join(
os.environ.get('SystemRoot') or r'C:\Windows',
Expand Down
Loading