diff --git a/Makefile b/Makefile index b9dec82ad88f1..b92d8910fceaa 100644 --- a/Makefile +++ b/Makefile @@ -186,8 +186,9 @@ ifdef LLAMA_HIPBLAS CC := $(ROCM_PATH)/llvm/bin/clang CXX := $(ROCM_PATH)/llvm/bin/clang++ GPU_TARGETS = gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 - LLAMA_CUDA_DMMV_X ?= 64 - LLAMA_CUDA_MMV_Y ?= 2 + LLAMA_CUDA_DMMV_X ?= 128 + LLAMA_CUDA_MMV_Y ?= 1 + LLAMA_CUDA_KQUANTS_ITER ?= 1 LLAMA_CUDA_FORCE_DMMV = true CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) diff --git a/ggml.c b/ggml.c index f98cc229783b9..f6c397adb4cf3 100644 --- a/ggml.c +++ b/ggml.c @@ -246,12 +246,12 @@ inline static void* ggml_aligned_malloc(size_t size) { #if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions #include "ggml-opencl.h" #endif -#elif defined(GGML_USE_OPENBLAS) -#if defined(GGML_BLAS_USE_MKL) -#include -#else +#endif +#if defined(GGML_USE_OPENBLAS) #include #endif +#if defined(GGML_BLAS_USE_MKL) +#include #endif #if defined(GGML_USE_CUBLAS) #include "ggml-cuda.h"