Skip to content

Commit

Permalink
Add initial CUDA native UT (#625)
Browse files Browse the repository at this point in the history
* Add initial CUDA native UT

* fix the build issue

* fix other build error

* add 30 mins to android packaging pipeline timeout due to early timing out

* undo android pipeline timeout change - move to other PR

* revert ifdef for testing ci

* add if def for cuda

* update ci ORT linux package name

* update the package extraction path

* Update ci.yml

* Update ci.yml

---------

Co-authored-by: Sayan Shaw <[email protected]>
Co-authored-by: Wenbing Li <[email protected]>
Co-authored-by: Wenbing Li <[email protected]>
  • Loading branch information
4 people authored Jan 13, 2024
1 parent a32b932 commit a03eded
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 7 deletions.
10 changes: 5 additions & 5 deletions .pipelines/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ stages:
name: 'onnxruntime-extensions-Linux-GPU-A10'
timeoutInMinutes: 120
variables:
ORT_VERSION: '1.16.2'
ORT_VERSION: '1.16.3'
TORCH_VERSION: 'torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118'
steps:
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
Expand All @@ -584,7 +584,7 @@ stages:
userRepository: 'microsoft/onnxruntime'
defaultVersionType: 'specificTag'
version: 'v$(ORT_VERSION)'
itemPattern: '*-linux-x64-$(ORT_VERSION)*'
itemPattern: '*-linux-x64-gpu-$(ORT_VERSION)*'
downloadPath: '$(Build.SourcesDirectory)'
displayName: Download the ONNXRuntime prebuilt package.

Expand All @@ -609,7 +609,7 @@ stages:
script: |
docker run --gpus all --rm \
--volume $(Build.SourcesDirectory):/onnxruntime-extensions \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-$(ORT_VERSION):/onnxruntime \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-gpu-$(ORT_VERSION):/onnxruntime \
-e CUDA_PATH=/usr/local/cuda-11.8 \
onnxruntime-extensionscuda11build \
/bin/bash -c "
Expand All @@ -626,7 +626,7 @@ stages:
script: |
docker run --gpus all --rm \
--volume $(Build.SourcesDirectory):/onnxruntime-extensions \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-$(ORT_VERSION):/onnxruntime \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-gpu-$(ORT_VERSION):/onnxruntime \
-e CUDA_PATH=/usr/local/cuda-11.8 \
onnxruntime-extensionscuda11build \
/bin/bash -c "
Expand All @@ -644,7 +644,7 @@ stages:
script: |
docker run --gpus all --rm \
--volume $(Build.SourcesDirectory):/onnxruntime-extensions \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-$(ORT_VERSION):/onnxruntime \
--volume $(Build.SourcesDirectory)/onnxruntime-linux-x64-gpu-$(ORT_VERSION):/onnxruntime \
-e CUDA_PATH=/usr/local/cuda-11.8 \
onnxruntime-extensionscuda11build \
/bin/bash -c "
Expand Down
Binary file added test/data/cuda/test_fastgelu.onnx
Binary file not shown.
Binary file added test/data/cuda/test_fastgelu_f16.onnx
Binary file not shown.
Binary file added test/data/cuda/test_negpos.onnx
Binary file not shown.
3 changes: 2 additions & 1 deletion test/shared_test/test_kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ void RunSession(Ort::Session& session_object,
void TestInference(Ort::Env& env, const ORTCHAR_T* model_uri,
const std::vector<TestValue>& inputs,
const std::vector<TestValue>& outputs,
OutputValidator output_validator = nullptr);
OutputValidator output_validator = nullptr,
void* cuda_compute_stream = nullptr);

void GetTensorMutableDataString(const OrtApi& api, const OrtValue* value, std::vector<std::string>& output);
22 changes: 21 additions & 1 deletion test/shared_test/test_ortops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -296,11 +296,31 @@ void ValidateOutputEqual(size_t output_idx, Ort::Value& actual, TestValue expect
}
}

OrtCUDAProviderOptions CreateDefaultOrtCudaProviderOptionsWithCustomStream(void* cuda_compute_stream) {
OrtCUDAProviderOptions cuda_options;

cuda_options.device_id = 0;
cuda_options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearch::OrtCudnnConvAlgoSearchExhaustive;
cuda_options.gpu_mem_limit = std::numeric_limits<size_t>::max();
cuda_options.arena_extend_strategy = 0;
cuda_options.do_copy_in_default_stream = true;
cuda_options.has_user_compute_stream = cuda_compute_stream != nullptr ? 1 : 0;
cuda_options.user_compute_stream = cuda_compute_stream;
cuda_options.default_memory_arena_cfg = nullptr;

return cuda_options;
}

void TestInference(Ort::Env& env, const ORTCHAR_T* model_uri,
const std::vector<TestValue>& inputs,
const std::vector<TestValue>& outputs,
OutputValidator output_validator) {
OutputValidator output_validator,
void* cuda_compute_stream) {
Ort::SessionOptions session_options;
#ifdef USE_CUDA
auto cuda_options = CreateDefaultOrtCudaProviderOptionsWithCustomStream(cuda_compute_stream);
session_options.AppendExecutionProvider_CUDA(cuda_options);
#endif
auto library_handle = RegisterExtOps(session_options);

// if session creation passes, model loads fine
Expand Down
38 changes: 38 additions & 0 deletions test/shared_test/test_ortops_cuda.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include <filesystem>
#include <locale>
#include "gtest/gtest.h"
#include "ocos.h"
#include "test_kernel.hpp"

#ifdef USE_CUDA

TEST(CudaOp, test_fastgelu) {
auto ort_env = std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_WARNING, "Default");

std::vector<TestValue> inputs(2);
inputs[0].name = "x";
inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
inputs[0].dims = {6};
inputs[0].values_float = {0., 1., 2., 3., 4., 5.};

inputs[1].name = "bias";
inputs[1].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
inputs[1].dims = {6};
inputs[1].values_float = {0.0, 0.1, 0.2, 0.3, 0.4, 0.5};

std::vector<TestValue> outputs(1);
outputs[0].name = "y";
outputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
outputs[0].dims = {6};
outputs[0].values_float = {0., 0.9505811, 2.1696784, 3.298689, 4.399991, 5.5};

std::filesystem::path model_path = "data/cuda";
model_path /= "test_fastgelu.onnx";

TestInference(*ort_env, model_path.c_str(), inputs, outputs);
}

#endif

0 comments on commit a03eded

Please sign in to comment.