diff --git a/.pipelines/ci.yml b/.pipelines/ci.yml index 235e72f48..09a93c5ea 100644 --- a/.pipelines/ci.yml +++ b/.pipelines/ci.yml @@ -558,7 +558,7 @@ stages: name: 'onnxruntime-extensions-Linux-GPU-A10' timeoutInMinutes: 120 variables: - ORT_VERSION: '1.16.2' + ORT_VERSION: '1.16.3' TORCH_VERSION: 'torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118' steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 @@ -584,7 +584,7 @@ stages: userRepository: 'microsoft/onnxruntime' defaultVersionType: 'specificTag' version: 'v$(ORT_VERSION)' - itemPattern: '*-linux-x64-$(ORT_VERSION)*' + itemPattern: '*-linux-x64-gpu-$(ORT_VERSION)*' downloadPath: '$(Build.SourcesDirectory)' displayName: Download the ONNXRuntime prebuilt package. @@ -609,7 +609,7 @@ stages: script: | docker run --gpus all --rm \ --volume $(Build.SourcesDirectory):/onnxruntime-extensions \ - --volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-$(ORT_VERSION):/onnxruntime \ + --volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-gpu-$(ORT_VERSION):/onnxruntime \ -e CUDA_PATH=/usr/local/cuda-11.8 \ onnxruntime-extensionscuda11build \ /bin/bash -c " @@ -626,7 +626,7 @@ stages: script: | docker run --gpus all --rm \ --volume $(Build.SourcesDirectory):/onnxruntime-extensions \ - --volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-$(ORT_VERSION):/onnxruntime \ + --volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-gpu-$(ORT_VERSION):/onnxruntime \ -e CUDA_PATH=/usr/local/cuda-11.8 \ onnxruntime-extensionscuda11build \ /bin/bash -c " @@ -644,7 +644,7 @@ stages: script: | docker run --gpus all --rm \ --volume $(Build.SourcesDirectory):/onnxruntime-extensions \ - --volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-$(ORT_VERSION):/onnxruntime \ + --volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-gpu-$(ORT_VERSION):/onnxruntime \ -e CUDA_PATH=/usr/local/cuda-11.8 \ onnxruntime-extensionscuda11build \ /bin/bash -c " diff --git a/test/data/cuda/test_fastgelu.onnx b/test/data/cuda/test_fastgelu.onnx new file mode 100644 index 000000000..13eb48812 Binary files /dev/null and b/test/data/cuda/test_fastgelu.onnx differ diff --git a/test/data/cuda/test_fastgelu_f16.onnx b/test/data/cuda/test_fastgelu_f16.onnx new file mode 100644 index 000000000..83f90e176 Binary files /dev/null and b/test/data/cuda/test_fastgelu_f16.onnx differ diff --git a/test/data/cuda/test_negpos.onnx b/test/data/cuda/test_negpos.onnx new file mode 100644 index 000000000..b72717e48 Binary files /dev/null and b/test/data/cuda/test_negpos.onnx differ diff --git a/test/shared_test/test_kernel.hpp b/test/shared_test/test_kernel.hpp index 026048b99..549e4d699 100644 --- a/test/shared_test/test_kernel.hpp +++ b/test/shared_test/test_kernel.hpp @@ -52,6 +52,7 @@ void RunSession(Ort::Session& session_object, void TestInference(Ort::Env& env, const ORTCHAR_T* model_uri, const std::vector& inputs, const std::vector& outputs, - OutputValidator output_validator = nullptr); + OutputValidator output_validator = nullptr, + void* cuda_compute_stream = nullptr); void GetTensorMutableDataString(const OrtApi& api, const OrtValue* value, std::vector& output); diff --git a/test/shared_test/test_ortops.cc b/test/shared_test/test_ortops.cc index 4c68a1b36..be981cc43 100644 --- a/test/shared_test/test_ortops.cc +++ b/test/shared_test/test_ortops.cc @@ -296,11 +296,31 @@ void ValidateOutputEqual(size_t output_idx, Ort::Value& actual, TestValue expect } } +OrtCUDAProviderOptions CreateDefaultOrtCudaProviderOptionsWithCustomStream(void* cuda_compute_stream) { + OrtCUDAProviderOptions cuda_options; + + cuda_options.device_id = 0; + cuda_options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearch::OrtCudnnConvAlgoSearchExhaustive; + cuda_options.gpu_mem_limit = std::numeric_limits::max(); + cuda_options.arena_extend_strategy = 0; + cuda_options.do_copy_in_default_stream = true; + cuda_options.has_user_compute_stream = cuda_compute_stream != nullptr ? 1 : 0; + cuda_options.user_compute_stream = cuda_compute_stream; + cuda_options.default_memory_arena_cfg = nullptr; + + return cuda_options; +} + void TestInference(Ort::Env& env, const ORTCHAR_T* model_uri, const std::vector& inputs, const std::vector& outputs, - OutputValidator output_validator) { + OutputValidator output_validator, + void* cuda_compute_stream) { Ort::SessionOptions session_options; +#ifdef USE_CUDA + auto cuda_options = CreateDefaultOrtCudaProviderOptionsWithCustomStream(cuda_compute_stream); + session_options.AppendExecutionProvider_CUDA(cuda_options); +#endif auto library_handle = RegisterExtOps(session_options); // if session creation passes, model loads fine diff --git a/test/shared_test/test_ortops_cuda.cc b/test/shared_test/test_ortops_cuda.cc new file mode 100644 index 000000000..dc9ae35b2 --- /dev/null +++ b/test/shared_test/test_ortops_cuda.cc @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include +#include "gtest/gtest.h" +#include "ocos.h" +#include "test_kernel.hpp" + +#ifdef USE_CUDA + +TEST(CudaOp, test_fastgelu) { + auto ort_env = std::make_unique(ORT_LOGGING_LEVEL_WARNING, "Default"); + + std::vector inputs(2); + inputs[0].name = "x"; + inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + inputs[0].dims = {6}; + inputs[0].values_float = {0., 1., 2., 3., 4., 5.}; + + inputs[1].name = "bias"; + inputs[1].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + inputs[1].dims = {6}; + inputs[1].values_float = {0.0, 0.1, 0.2, 0.3, 0.4, 0.5}; + + std::vector outputs(1); + outputs[0].name = "y"; + outputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + outputs[0].dims = {6}; + outputs[0].values_float = {0., 0.9505811, 2.1696784, 3.298689, 4.399991, 5.5}; + + std::filesystem::path model_path = "data/cuda"; + model_path /= "test_fastgelu.onnx"; + + TestInference(*ort_env, model_path.c_str(), inputs, outputs); +} + +#endif \ No newline at end of file