diff --git a/src/runtime/contrib/cutlass/fp8_group_gemm.cu b/src/runtime/contrib/cutlass/fp8_group_gemm.cu index c93da6ff5766..4cc6192f8e13 100644 --- a/src/runtime/contrib/cutlass/fp8_group_gemm.cu +++ b/src/runtime/contrib/cutlass/fp8_group_gemm.cu @@ -56,7 +56,7 @@ void tvm_cutlass_fp8_group_gemm(NDArray x, NDArray weight, NDArray indptr, NDArr CHECK_EQ(alpha->dtype.bits, 32); int num_groups = weight->shape[0]; int n = weight->shape[1]; - int k = weight->shape[2]; + int k = x->shape[1]; const float* beta = nullptr; cudaStream_t stream = static_cast((*func)().operator void*()); cutlass_group_gemm(static_cast(x->data), static_cast(weight->data),