From aeafd9e2b5c2dc3a77489e8485b828dec8a6210d Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 2 Apr 2021 09:24:36 -0700 Subject: [PATCH] Fix CUDA HAL bug and enable more execution tests (#5296) --- iree/hal/cuda/graph_command_buffer.c | 8 +++-- iree/test/e2e/cuda_specific/BUILD | 36 ++++++++++++++++++++++ iree/test/e2e/cuda_specific/CMakeLists.txt | 26 ++++++++++++++++ iree/test/e2e/cuda_specific/dot.mlir | 29 +++++++++++++++++ 4 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 iree/test/e2e/cuda_specific/BUILD create mode 100644 iree/test/e2e/cuda_specific/CMakeLists.txt create mode 100644 iree/test/e2e/cuda_specific/dot.mlir diff --git a/iree/hal/cuda/graph_command_buffer.c b/iree/hal/cuda/graph_command_buffer.c index 7fa0822c42e3..af3918533dbe 100644 --- a/iree/hal/cuda/graph_command_buffer.c +++ b/iree/hal/cuda/graph_command_buffer.c @@ -246,7 +246,8 @@ static iree_status_t iree_hal_cuda_graph_command_buffer_fill_buffer( CUDA_MEMSET_NODE_PARAMS params = { .dst = target_device_buffer + target_offset, .elementSize = pattern_length, - .width = length, + // width in number of elements despite what driver documentation says. + .width = length / pattern_length, .height = 1, .value = dword_pattern, }; @@ -327,8 +328,9 @@ static iree_status_t iree_hal_cuda_graph_command_buffer_push_descriptor_set( assert(arg_index < max_binding_count && "binding index larger than the max expected."); CUdeviceptr device_ptr = - iree_hal_cuda_buffer_device_pointer(bindings[i].buffer) + - iree_hal_buffer_byte_offset(bindings[i].buffer); + iree_hal_cuda_buffer_device_pointer( + iree_hal_buffer_allocated_buffer(bindings[i].buffer)) + + iree_hal_buffer_byte_offset(bindings[i].buffer) + bindings[i].offset; *((CUdeviceptr*)command_buffer->current_descriptor[arg_index]) = device_ptr; } return iree_ok_status(); diff --git a/iree/test/e2e/cuda_specific/BUILD b/iree/test/e2e/cuda_specific/BUILD new file mode 100644 index 000000000000..cb55d3d8cb4c --- /dev/null +++ b/iree/test/e2e/cuda_specific/BUILD @@ -0,0 +1,36 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Tests for end-to-end IREE support specific to the CUDA backend to be able to +# incrementally enable features. + +load("//build_tools/bazel:iree_check_test.bzl", "iree_check_single_backend_test_suite") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], + licenses = ["notice"], # Apache 2.0 +) + +iree_check_single_backend_test_suite( + name = "check_cuda", + srcs = [ + "dot.mlir", + ], + compiler_flags = [ + "-iree-flow-dispatch-linalg-on-tensors", + ], + driver = "cuda", + target_backend = "cuda", +) diff --git a/iree/test/e2e/cuda_specific/CMakeLists.txt b/iree/test/e2e/cuda_specific/CMakeLists.txt new file mode 100644 index 000000000000..aa07629755d5 --- /dev/null +++ b/iree/test/e2e/cuda_specific/CMakeLists.txt @@ -0,0 +1,26 @@ +################################################################################ +# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # +# iree/test/e2e/cuda_specific/BUILD # +# # +# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # +# CMake-only content. # +# # +# To disable autogeneration for this file entirely, delete this header. # +################################################################################ + +iree_add_all_subdirs() + +iree_check_single_backend_test_suite( + NAME + check_cuda + SRCS + "dot.mlir" + TARGET_BACKEND + "cuda" + DRIVER + "cuda" + COMPILER_FLAGS + "-iree-flow-dispatch-linalg-on-tensors" +) + +### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/iree/test/e2e/cuda_specific/dot.mlir b/iree/test/e2e/cuda_specific/dot.mlir new file mode 100644 index 000000000000..caf38c26a0ff --- /dev/null +++ b/iree/test/e2e/cuda_specific/dot.mlir @@ -0,0 +1,29 @@ +func @f32() attributes { iree.module.export } { + %lhs = iree.unfoldable_constant dense<[ + [15.0, 14.0, 13.0], + [12.0, 11.0, 10.0], + [09.0, 08.0, 07.0], + [06.0, 05.0, 04.0], + [03.0, 02.0, 01.0]]> : tensor<5x3xf32> + %rhs = iree.unfoldable_constant dense<[ + [15.0, 14.0, 13.0, 12.0, 11.0], + [10.0, 09.0, 08.0, 07.0, 06.0], + [05.0, 04.0, 03.0, 02.0, 01.0]]> : tensor<3x5xf32> + %res = "mhlo.dot"(%lhs, %rhs) : (tensor<5x3xf32>, tensor<3x5xf32>) -> tensor<5x5xf32> + check.expect_almost_eq_const(%res, dense<[ + [430.0, 388.0, 346.0, 304.0, 262.0], + [340.0, 307.0, 274.0, 241.0, 208.0], + [250.0, 226.0, 202.0, 178.0, 154.0], + [160.0, 145.0, 130.0, 115.0, 100.0], + [70.0, 64.0, 58.0, 52.0, 46.0]]> : tensor<5x5xf32>) : tensor<5x5xf32> + return +} + +// large aligned case that can be vectorized. +func @large_aligned() attributes { iree.module.export } { + %lhs = iree.unfoldable_constant dense<1.0> : tensor<252x1024xf32> + %rhs = iree.unfoldable_constant dense<0.4> : tensor<1024x500xf32> + %res = "mhlo.dot"(%lhs, %rhs) : (tensor<252x1024xf32>, tensor<1024x500xf32>) -> tensor<252x500xf32> + check.expect_almost_eq_const(%res, dense<409.596> : tensor<252x500xf32>) : tensor<252x500xf32> + return +}