pth2onnx和onnx2trt是没有问题，但是trt推理出现box坐标是特别大的值，标签和得分是正确的 #447

Kingxudong · 2024-09-14T01:56:00Z

pth转换为onnx，用官方的代码转，并且测试是正确。但是onnx转换为trt，用的是tensorRT8.6.1,转换没有问题，但是推理出现box坐标是特别大的值，标签和得分是正确的。
D:\tool\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8\TensorRT-8.6.1.6\bin\trtexec.exe --onnx=model.onnx --workspace=4096 --avgRuns=100 --shapes=images:1x3x640x640 --saveEngine=model.trt

我的C++推理代码如下

#include
#include
#include
#include <windows.h>
#include
#include
#include <opencv2/opencv.hpp>
#include "NvInfer.h"
#include "NvInferRuntimeCommon.h"
#include
#include <opencv2/core/utils/filesystem.hpp>

using namespace nvinfer1;
using namespace std;

#define CUDA_CHECK(call)
do {
cudaError_t status = call;
if (status != cudaSuccess) {
fprintf(stderr, "CUDA Error in file '%s' in line %d: %s\n",
FILE, LINE, cudaGetErrorString(status));
exit(EXIT_FAILURE);
}
} while (0)

class Logger : public ILogger {
public:
void log(Severity severity, const char* msg) noexcept override {
if (severity != Severity::kINFO)
std::cout << msg << std::endl;
}
} gLogger;
class TensorRTInference {
public:
TensorRTInference(const std::string& enginePath);
~TensorRTInference();
void doInference(const std::vectorcv::String& image_paths);

private:
IRuntime* runtime;
ICudaEngine* engine_;
IExecutionContext* context_;
void* buffers_[5];
int inputIndex1_;
int inputIndex2_;
int outputIndex1_;
int outputIndex2_;
int outputIndex3_;
int batchSize_;
int inputSize1_;
int inputSize2_;
int outputSize1_;
int outputSize2_;
int outputSize3_;

void allocateBuffers();
void preprocess(const cv::Mat& image, float* buffer1, float* buffer2);
void postprocess(float* output1, float* output2, float* output3, cv::Mat& image);

};

int volume(const Dims& dims) {
int vol = 1;
for (int i = 0; i < dims.nbDims; ++i) {
vol *= dims.d[i];
}
return vol;
}
TensorRTInference::TensorRTInference(const std::string& enginePath) : engine_(nullptr), context_(nullptr) {
std::ifstream engineFile(enginePath, std::ios::binary);
if (!engineFile.good()) {
std::cerr << "Error opening engine file: " << enginePath << std::endl;
return;
}

std::stringstream engineStream;
engineStream << engineFile.rdbuf();
engineFile.close();

runtime = createInferRuntime(gLogger);
if (!runtime) {
    std::cerr << "Error creating InferRuntime" << std::endl;
    return;
}

engine_ = runtime->deserializeCudaEngine(engineStream.str().data(), engineStream.str().size(), nullptr);
if (engine_ == nullptr) {
    std::cerr << "Error deserializing the engine file: " << enginePath << std::endl;
    return;
}

context_ = engine_->createExecutionContext();
if (!context_) {
    std::cerr << "Error creating ExecutionContext" << std::endl;
    return;
}

inputIndex1_ = engine_->getBindingIndex("images");
inputIndex2_ = engine_->getBindingIndex("orig_target_sizes");
outputIndex1_ = engine_->getBindingIndex("labels");
outputIndex2_ = engine_->getBindingIndex("boxes");
outputIndex3_ = engine_->getBindingIndex("scores");

// Get input sizes
const Dims& inputDims1 = engine_->getBindingDimensions(inputIndex1_);
const Dims& inputDims2 = engine_->getBindingDimensions(inputIndex2_);
for (int i = 0; i < inputDims2.nbDims; ++i) {
    std::cout << "inputDims2[" << i << "]: " << inputDims2.d[i] << std::endl;
}
const Dims& outputDims1 = engine_->getBindingDimensions(outputIndex1_);
const Dims& outputDims2 = engine_->getBindingDimensions(outputIndex2_);

const Dims& outputDims3 = engine_->getBindingDimensions(outputIndex3_);

batchSize_ = 1; 


inputSize1_ = volume(inputDims1) * batchSize_ * sizeof(float);
inputSize2_ = volume(inputDims2) * sizeof(float);
outputSize1_ = volume(outputDims1) * sizeof(float); 
outputSize2_ = volume(outputDims2) * sizeof(float); 
outputSize3_ = volume(outputDims3) * sizeof(float); 

cout << inputSize2_ << endl;

allocateBuffers();

}

std::vector ToTensor(cv::Mat image) {
if (image.empty()) {
std::cerr << "Error: Empty image" << std::endl;
return {};
}

if (image.channels() != 3) {
    std::cerr << "Error: Image must have 3 channels" << std::endl;
    return {};
}

image.convertTo(image, CV_32FC3, 1.0f / 255.0f);

// Convert image to tensor
std::vector<cv::Mat> channels(3);
cv::split(image, channels);

std::vector<float> tensor(image.total() * image.channels());
int index = 0;


for (int c = 0; c < 3; ++c) {
    for (int i = 0; i < channels[c].rows; ++i) {
        for (int j = 0; j < channels[c].cols; ++j) {
            tensor[index++] = channels[c].at<float>(i, j);
        }
    }
}

return tensor;

}
std::vector ToTensorAndNormalize(cv::Mat image) {
if (image.empty()) {
std::cerr << "Error: Empty image" << std::endl;
return {};
}

if (image.channels() != 3) {
    std::cerr << "Error: Image must have 3 channels" << std::endl;
    return {};
}
cv::resize(image, image, cv::Size(640.f, 640.f));
image.convertTo(image, CV_32FC3, 1.0f / 255.0f);

const float mean[3] = { 0.485f, 0.456f, 0.406f };
const float std[3] = { 0.229f, 0.224f, 0.225f };

std::vector<cv::Mat> channels(3);
cv::split(image, channels);

std::vector<float> input_tensor;
input_tensor.reserve(640 * 640 * 3);  


for (int c = 0; c < 3; ++c) {
    for (int i = 0; i < channels[c].rows; ++i) {
        for (int j = 0; j < channels[c].cols; ++j) {
            float pixel = (channels[c].at<float>(i, j) - mean[c]) / std[c];
            input_tensor.push_back(pixel);
        }
    }
}

return input_tensor;

}

TensorRTInference::~TensorRTInference() {
if (context_) {
context_->destroy();
}
if (engine_) {
engine_->destroy();
}
if (runtime) {
runtime->destroy();
}
for (int i = 0; i < 5; ++i) {
if (buffers_[i]) cudaFree(buffers_[i]);
}
}

void TensorRTInference::preprocess(const cv::Mat& image, float* buffer1, float* buffer2) {
cv::Mat rgb_image;

if (image.channels() == 1) {
    cv::cvtColor(image, rgb_image, cv::COLOR_GRAY2RGB);
}
else if (image.channels() == 4) {
    cv::cvtColor(image, rgb_image, cv::COLOR_BGRA2RGB);
}
else if (image.channels() == 3) {
    cv::cvtColor(image, rgb_image, cv::COLOR_BGR2RGB);
}
else {
    rgb_image = image;  
}

std::vector<float> tensor1 = ToTensorAndNormalize(rgb_image);

//float tensor[1][2] = { {1, 1} };
//float orig_target_sizes[2] = { 640.0f, 640.0f };
float orig_target_sizes[2] = { static_cast<float>(image.cols), static_cast<float>(image.rows) };
std::cout << "Original target sizes: " << orig_target_sizes[0] << ", " << orig_target_sizes[1] << std::endl;

std::cout << "Input tensor size: " << tensor1.size() << std::endl;
//std::cout << "Input tensor size: " << tensor1.size() << std::endl;

std::memcpy(buffer1, tensor1.data(), inputSize1_);

std::memcpy(buffer2, orig_target_sizes, inputSize2_);

}

void TensorRTInference::postprocess(float* output1, float* output2, float* output3, cv::Mat& image) {
float numDetections = outputSize2_ / (4 * sizeof(float));
float confThreshold = 0.5f;

std::cout << "Box " << (output2 + 1 * 4)[0] <<  std::endl;

for (int i = 0; i < numDetections; ++i) {

    float* bbox = output2 + i * 4;
    float labels = output1[i];

    int x1 = static_cast<float>(bbox[0]);
    int y1 = static_cast<float>(bbox[1]);
    int x2 = static_cast<float>(bbox[2]);
    int y2 = static_cast<float>(bbox[3]);
    
    // Draw bounding box
    cv::rectangle(image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2);
    std::string label = "label: " + std::to_string(labels);
    cv::putText(image, label, cv::Point(x1, y1 - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1);

}

float* additionalData = output3;

std::cout << "First values of output3:" << std::endl;
for (int i = 0; i < 50; ++i) {
    std::cout << additionalData[i] << " ";
}
std::cout << std::endl;

}

std::string replaceFolderName(const std::string& path, const std::string& oldFolder, const std::string& newFolder) {
size_t pos = path.find(oldFolder);
if (pos != std::string::npos) {
std::string newPath = path.substr(0, pos) + newFolder + path.substr(pos + oldFolder.length());
return newPath;
}
else {
return path;
}
}

void softmax(float* output, std::vector& probs) {
probs.clear();
float sum = 0.0f;
for (int i = 0; i < 2; ++i) {
probs.push_back(std::exp(output[i]));
sum += probs.back();
}
for (int i = 0; i < 2; ++i) {
probs[i] /= sum;
}
}
void TensorRTInference::allocateBuffers() {
std::cout << "Allocating buffers..." << std::endl;
CUDA_CHECK(cudaMalloc(&buffers_[inputIndex1_], inputSize1_));
CUDA_CHECK(cudaMalloc(&buffers_[inputIndex2_], inputSize2_));
CUDA_CHECK(cudaMalloc(&buffers_[outputIndex1_], outputSize1_));
CUDA_CHECK(cudaMalloc(&buffers_[outputIndex2_], outputSize2_));
CUDA_CHECK(cudaMalloc(&buffers_[outputIndex3_], outputSize3_));
std::cout << "Buffers allocated successfully." << std::endl;
}
void TensorRTInference::doInference(const std::vectorcv::String& image_paths) {
float* inputBuffer1 = new float[inputSize1_ / sizeof(float)];
float* inputBuffer2 = new float[inputSize2_ / sizeof(float)];

float* outputBuffer1 = new float[outputSize1_ / sizeof(float)];
float* outputBuffer2 = new float[outputSize2_ / sizeof(float)];
float* outputBuffer3 = new float[outputSize3_ / sizeof(float)];

for (const auto& filename : image_paths) {
    std::cout << "Processing image: " << filename << std::endl;
    clock_t start = clock();
    cv::Mat image = cv::imread(filename);
    int height = image.rows;
    int width = image.cols;
    int channels = image.channels();

    // Print the shape of the image
    std::cout << "Image shape: (" << height << ", " << width << ", " << channels << ")" << std::endl;

    if (image.empty()) {
        std::cerr << "Error loading image: " << filename << std::endl;
        continue;
    }

    preprocess(image, inputBuffer1, inputBuffer2);


    clock_t gpuStart = clock();
    CUDA_CHECK(cudaMemcpy(buffers_[inputIndex1_], inputBuffer1, inputSize1_, cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(buffers_[inputIndex2_], inputBuffer2, inputSize2_, cudaMemcpyHostToDevice));

    bool success = context_->executeV2(buffers_);
    if (!success) {
        std::cerr << "TensorRT execution failed." << std::endl;
        continue;
    }

    CUDA_CHECK(cudaMemcpy(outputBuffer1, buffers_[outputIndex1_], outputSize1_, cudaMemcpyDeviceToHost));
    CUDA_CHECK(cudaMemcpy(outputBuffer2, buffers_[outputIndex2_], outputSize2_, cudaMemcpyDeviceToHost));
    CUDA_CHECK(cudaMemcpy(outputBuffer3, buffers_[outputIndex3_], outputSize3_, cudaMemcpyDeviceToHost));

    clock_t gpuEnd = clock();
    std::cout << "GPU inference time: " << (gpuEnd - gpuStart) / (double)CLOCKS_PER_SEC << " seconds." << std::endl;

    postprocess(outputBuffer1, outputBuffer2, outputBuffer3, image);
    std::cout << "First values of outputBuffer1:" << std::endl;
    for (int i = 0; i < 10; ++i) {
        std::cout << outputBuffer2[i] << " ";
    }
    std::cout << std::endl;
    std::string output_path = replaceFolderName(filename, "debug", "debug_out");
    cv::imwrite(output_path, image);

}

delete[] inputBuffer1;
delete[] inputBuffer2;
delete[] outputBuffer1;
delete[] outputBuffer2;
delete[] outputBuffer3;

}

int main(int argc, char** argv) {
try {
cudaSetDevice(0);
TensorRTInference inference("D:\tool\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8\TensorRT-8.6.1.6\bin\cfg_model.trt");
std::string img_dir = "E:\YOLOv8-main\val2017\debug";
std::vectorcv::String image_paths;
cv::utils::fs::glob(img_dir, "*.png", image_paths);
inference.doInference(image_paths);
}
catch (const std::exception& e) {
std::cerr << "Exception: " << e.what() << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
能帮忙检查一下吗

The text was updated successfully, but these errors were encountered:

lyuwenyu · 2024-09-14T02:01:11Z

可以先用这个脚本测一下导出的结果

https://github.com/lyuwenyu/RT-DETR/blob/main/rtdetrv2_pytorch/references/deploy/rtdetrv2_tensorrt.py

Kingxudong · 2024-09-14T07:57:06Z

可以先用这个脚本测一下导出的结果

https://github.com/lyuwenyu/RT-DETR/blob/main/rtdetrv2_pytorch/references/deploy/rtdetrv2_tensorrt.py

有测试，无论是官方提供的权重文件还是我自己的权重文件，都出向没有“create_execution_context”这个模块，未定义

lyuwenyu · 2024-09-14T08:46:41Z

trt版本是多少

box不对看下是不是数据类型的问题

Kingxudong · 2024-09-14T12:27:51Z

trt版本是多少

box不对看下是不是数据类型的问题

trt版本是8.6.1；
但是标签和得分都是对的

gk966988 · 2024-09-19T03:11:27Z

遇见相同的问题，在onnx转tensorrt后，使用https://github.com/lyuwenyu/RT-DETR/blob/main/rtdetrv2_pytorch/references/deploy/rtdetrv2_tensorrt.py脚本推理结果不对。

Kingxudong assigned lyuwenyu Sep 14, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

pth2onnx和onnx2trt是没有问题，但是trt推理出现box坐标是特别大的值，标签和得分是正确的 #447

pth2onnx和onnx2trt是没有问题，但是trt推理出现box坐标是特别大的值，标签和得分是正确的 #447

Kingxudong commented Sep 14, 2024

lyuwenyu commented Sep 14, 2024

Kingxudong commented Sep 14, 2024

lyuwenyu commented Sep 14, 2024

Kingxudong commented Sep 14, 2024

gk966988 commented Sep 19, 2024

pth2onnx和onnx2trt是没有问题，但是trt推理出现box坐标是特别大的值，标签和得分是正确的 #447

pth2onnx和onnx2trt是没有问题，但是trt推理出现box坐标是特别大的值，标签和得分是正确的 #447

Comments

Kingxudong commented Sep 14, 2024

lyuwenyu commented Sep 14, 2024

Kingxudong commented Sep 14, 2024

lyuwenyu commented Sep 14, 2024

Kingxudong commented Sep 14, 2024

gk966988 commented Sep 19, 2024