Skip to content

Commit

Permalink
Fix CUDA HAL bug and enable more execution tests (#5296)
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasRaoux authored Apr 2, 2021
1 parent 2801780 commit aeafd9e
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 3 deletions.
8 changes: 5 additions & 3 deletions iree/hal/cuda/graph_command_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@ static iree_status_t iree_hal_cuda_graph_command_buffer_fill_buffer(
CUDA_MEMSET_NODE_PARAMS params = {
.dst = target_device_buffer + target_offset,
.elementSize = pattern_length,
.width = length,
// width in number of elements despite what driver documentation says.
.width = length / pattern_length,
.height = 1,
.value = dword_pattern,
};
Expand Down Expand Up @@ -327,8 +328,9 @@ static iree_status_t iree_hal_cuda_graph_command_buffer_push_descriptor_set(
assert(arg_index < max_binding_count &&
"binding index larger than the max expected.");
CUdeviceptr device_ptr =
iree_hal_cuda_buffer_device_pointer(bindings[i].buffer) +
iree_hal_buffer_byte_offset(bindings[i].buffer);
iree_hal_cuda_buffer_device_pointer(
iree_hal_buffer_allocated_buffer(bindings[i].buffer)) +
iree_hal_buffer_byte_offset(bindings[i].buffer) + bindings[i].offset;
*((CUdeviceptr*)command_buffer->current_descriptor[arg_index]) = device_ptr;
}
return iree_ok_status();
Expand Down
36 changes: 36 additions & 0 deletions iree/test/e2e/cuda_specific/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Tests for end-to-end IREE support specific to the CUDA backend to be able to
# incrementally enable features.

load("//build_tools/bazel:iree_check_test.bzl", "iree_check_single_backend_test_suite")

package(
default_visibility = ["//visibility:public"],
features = ["layering_check"],
licenses = ["notice"], # Apache 2.0
)

iree_check_single_backend_test_suite(
name = "check_cuda",
srcs = [
"dot.mlir",
],
compiler_flags = [
"-iree-flow-dispatch-linalg-on-tensors",
],
driver = "cuda",
target_backend = "cuda",
)
26 changes: 26 additions & 0 deletions iree/test/e2e/cuda_specific/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
################################################################################
# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
# iree/test/e2e/cuda_specific/BUILD #
# #
# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
# CMake-only content. #
# #
# To disable autogeneration for this file entirely, delete this header. #
################################################################################

iree_add_all_subdirs()

iree_check_single_backend_test_suite(
NAME
check_cuda
SRCS
"dot.mlir"
TARGET_BACKEND
"cuda"
DRIVER
"cuda"
COMPILER_FLAGS
"-iree-flow-dispatch-linalg-on-tensors"
)

### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
29 changes: 29 additions & 0 deletions iree/test/e2e/cuda_specific/dot.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
func @f32() attributes { iree.module.export } {
%lhs = iree.unfoldable_constant dense<[
[15.0, 14.0, 13.0],
[12.0, 11.0, 10.0],
[09.0, 08.0, 07.0],
[06.0, 05.0, 04.0],
[03.0, 02.0, 01.0]]> : tensor<5x3xf32>
%rhs = iree.unfoldable_constant dense<[
[15.0, 14.0, 13.0, 12.0, 11.0],
[10.0, 09.0, 08.0, 07.0, 06.0],
[05.0, 04.0, 03.0, 02.0, 01.0]]> : tensor<3x5xf32>
%res = "mhlo.dot"(%lhs, %rhs) : (tensor<5x3xf32>, tensor<3x5xf32>) -> tensor<5x5xf32>
check.expect_almost_eq_const(%res, dense<[
[430.0, 388.0, 346.0, 304.0, 262.0],
[340.0, 307.0, 274.0, 241.0, 208.0],
[250.0, 226.0, 202.0, 178.0, 154.0],
[160.0, 145.0, 130.0, 115.0, 100.0],
[70.0, 64.0, 58.0, 52.0, 46.0]]> : tensor<5x5xf32>) : tensor<5x5xf32>
return
}

// large aligned case that can be vectorized.
func @large_aligned() attributes { iree.module.export } {
%lhs = iree.unfoldable_constant dense<1.0> : tensor<252x1024xf32>
%rhs = iree.unfoldable_constant dense<0.4> : tensor<1024x500xf32>
%res = "mhlo.dot"(%lhs, %rhs) : (tensor<252x1024xf32>, tensor<1024x500xf32>) -> tensor<252x500xf32>
check.expect_almost_eq_const(%res, dense<409.596> : tensor<252x500xf32>) : tensor<252x500xf32>
return
}

0 comments on commit aeafd9e

Please sign in to comment.