Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pth2onnx和onnx2trt是没有问题,但是trt推理出现box坐标是特别大的值,标签和得分是正确的 #447

Open
Kingxudong opened this issue Sep 14, 2024 · 5 comments
Assignees

Comments

@Kingxudong
Copy link

pth转换为onnx,用官方的代码转,并且测试是正确。但是onnx转换为trt,用的是tensorRT8.6.1,转换没有问题,但是推理出现box坐标是特别大的值,标签和得分是正确的。
D:\tool\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8\TensorRT-8.6.1.6\bin\trtexec.exe --onnx=model.onnx --workspace=4096 --avgRuns=100 --shapes=images:1x3x640x640 --saveEngine=model.trt

我的C++推理代码如下

#include
#include
#include
#include <windows.h>
#include
#include
#include <opencv2/opencv.hpp>
#include "NvInfer.h"
#include "NvInferRuntimeCommon.h"
#include
#include <opencv2/core/utils/filesystem.hpp>

using namespace nvinfer1;
using namespace std;

#define CUDA_CHECK(call)
do {
cudaError_t status = call;
if (status != cudaSuccess) {
fprintf(stderr, "CUDA Error in file '%s' in line %d: %s\n",
FILE, LINE, cudaGetErrorString(status));
exit(EXIT_FAILURE);
}
} while (0)

class Logger : public ILogger {
public:
void log(Severity severity, const char* msg) noexcept override {
if (severity != Severity::kINFO)
std::cout << msg << std::endl;
}
} gLogger;
class TensorRTInference {
public:
TensorRTInference(const std::string& enginePath);
~TensorRTInference();
void doInference(const std::vectorcv::String& image_paths);

private:
IRuntime* runtime;
ICudaEngine* engine_;
IExecutionContext* context_;
void* buffers_[5];
int inputIndex1_;
int inputIndex2_;
int outputIndex1_;
int outputIndex2_;
int outputIndex3_;
int batchSize_;
int inputSize1_;
int inputSize2_;
int outputSize1_;
int outputSize2_;
int outputSize3_;

void allocateBuffers();
void preprocess(const cv::Mat& image, float* buffer1, float* buffer2);
void postprocess(float* output1, float* output2, float* output3, cv::Mat& image);

};

int volume(const Dims& dims) {
int vol = 1;
for (int i = 0; i < dims.nbDims; ++i) {
vol *= dims.d[i];
}
return vol;
}
TensorRTInference::TensorRTInference(const std::string& enginePath) : engine_(nullptr), context_(nullptr) {
std::ifstream engineFile(enginePath, std::ios::binary);
if (!engineFile.good()) {
std::cerr << "Error opening engine file: " << enginePath << std::endl;
return;
}

std::stringstream engineStream;
engineStream << engineFile.rdbuf();
engineFile.close();

runtime = createInferRuntime(gLogger);
if (!runtime) {
    std::cerr << "Error creating InferRuntime" << std::endl;
    return;
}

engine_ = runtime->deserializeCudaEngine(engineStream.str().data(), engineStream.str().size(), nullptr);
if (engine_ == nullptr) {
    std::cerr << "Error deserializing the engine file: " << enginePath << std::endl;
    return;
}

context_ = engine_->createExecutionContext();
if (!context_) {
    std::cerr << "Error creating ExecutionContext" << std::endl;
    return;
}

inputIndex1_ = engine_->getBindingIndex("images");
inputIndex2_ = engine_->getBindingIndex("orig_target_sizes");
outputIndex1_ = engine_->getBindingIndex("labels");
outputIndex2_ = engine_->getBindingIndex("boxes");
outputIndex3_ = engine_->getBindingIndex("scores");

// Get input sizes
const Dims& inputDims1 = engine_->getBindingDimensions(inputIndex1_);
const Dims& inputDims2 = engine_->getBindingDimensions(inputIndex2_);
for (int i = 0; i < inputDims2.nbDims; ++i) {
    std::cout << "inputDims2[" << i << "]: " << inputDims2.d[i] << std::endl;
}
const Dims& outputDims1 = engine_->getBindingDimensions(outputIndex1_);
const Dims& outputDims2 = engine_->getBindingDimensions(outputIndex2_);

const Dims& outputDims3 = engine_->getBindingDimensions(outputIndex3_);

batchSize_ = 1; 


inputSize1_ = volume(inputDims1) * batchSize_ * sizeof(float);
inputSize2_ = volume(inputDims2) * sizeof(float);
outputSize1_ = volume(outputDims1) * sizeof(float); 
outputSize2_ = volume(outputDims2) * sizeof(float); 
outputSize3_ = volume(outputDims3) * sizeof(float); 

cout << inputSize2_ << endl;

allocateBuffers();

}

std::vector ToTensor(cv::Mat image) {
if (image.empty()) {
std::cerr << "Error: Empty image" << std::endl;
return {};
}

if (image.channels() != 3) {
    std::cerr << "Error: Image must have 3 channels" << std::endl;
    return {};
}

image.convertTo(image, CV_32FC3, 1.0f / 255.0f);

// Convert image to tensor
std::vector<cv::Mat> channels(3);
cv::split(image, channels);

std::vector<float> tensor(image.total() * image.channels());
int index = 0;


for (int c = 0; c < 3; ++c) {
    for (int i = 0; i < channels[c].rows; ++i) {
        for (int j = 0; j < channels[c].cols; ++j) {
            tensor[index++] = channels[c].at<float>(i, j);
        }
    }
}

return tensor;

}
std::vector ToTensorAndNormalize(cv::Mat image) {
if (image.empty()) {
std::cerr << "Error: Empty image" << std::endl;
return {};
}

if (image.channels() != 3) {
    std::cerr << "Error: Image must have 3 channels" << std::endl;
    return {};
}
cv::resize(image, image, cv::Size(640.f, 640.f));
image.convertTo(image, CV_32FC3, 1.0f / 255.0f);

const float mean[3] = { 0.485f, 0.456f, 0.406f };
const float std[3] = { 0.229f, 0.224f, 0.225f };

std::vector<cv::Mat> channels(3);
cv::split(image, channels);

std::vector<float> input_tensor;
input_tensor.reserve(640 * 640 * 3);  


for (int c = 0; c < 3; ++c) {
    for (int i = 0; i < channels[c].rows; ++i) {
        for (int j = 0; j < channels[c].cols; ++j) {
            float pixel = (channels[c].at<float>(i, j) - mean[c]) / std[c];
            input_tensor.push_back(pixel);
        }
    }
}

return input_tensor; 

}

TensorRTInference::~TensorRTInference() {
if (context_) {
context_->destroy();
}
if (engine_) {
engine_->destroy();
}
if (runtime) {
runtime->destroy();
}
for (int i = 0; i < 5; ++i) {
if (buffers_[i]) cudaFree(buffers_[i]);
}
}

void TensorRTInference::preprocess(const cv::Mat& image, float* buffer1, float* buffer2) {
cv::Mat rgb_image;

if (image.channels() == 1) {
    cv::cvtColor(image, rgb_image, cv::COLOR_GRAY2RGB);
}
else if (image.channels() == 4) {
    cv::cvtColor(image, rgb_image, cv::COLOR_BGRA2RGB);
}
else if (image.channels() == 3) {
    cv::cvtColor(image, rgb_image, cv::COLOR_BGR2RGB);
}
else {
    rgb_image = image;  
}

std::vector<float> tensor1 = ToTensorAndNormalize(rgb_image);

//float tensor[1][2] = { {1, 1} };
//float orig_target_sizes[2] = { 640.0f, 640.0f };
float orig_target_sizes[2] = { static_cast<float>(image.cols), static_cast<float>(image.rows) };
std::cout << "Original target sizes: " << orig_target_sizes[0] << ", " << orig_target_sizes[1] << std::endl;

std::cout << "Input tensor size: " << tensor1.size() << std::endl;
//std::cout << "Input tensor size: " << tensor1.size() << std::endl;

std::memcpy(buffer1, tensor1.data(), inputSize1_);

std::memcpy(buffer2, orig_target_sizes, inputSize2_);

}

void TensorRTInference::postprocess(float* output1, float* output2, float* output3, cv::Mat& image) {
float numDetections = outputSize2_ / (4 * sizeof(float));
float confThreshold = 0.5f;

std::cout << "Box " << (output2 + 1 * 4)[0] <<  std::endl;

for (int i = 0; i < numDetections; ++i) {

    float* bbox = output2 + i * 4;
    float labels = output1[i];

    int x1 = static_cast<float>(bbox[0]);
    int y1 = static_cast<float>(bbox[1]);
    int x2 = static_cast<float>(bbox[2]);
    int y2 = static_cast<float>(bbox[3]);
    
    // Draw bounding box
    cv::rectangle(image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2);
    std::string label = "label: " + std::to_string(labels);
    cv::putText(image, label, cv::Point(x1, y1 - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1);

}

float* additionalData = output3;

std::cout << "First values of output3:" << std::endl;
for (int i = 0; i < 50; ++i) {
    std::cout << additionalData[i] << " ";
}
std::cout << std::endl;

}

std::string replaceFolderName(const std::string& path, const std::string& oldFolder, const std::string& newFolder) {
size_t pos = path.find(oldFolder);
if (pos != std::string::npos) {
std::string newPath = path.substr(0, pos) + newFolder + path.substr(pos + oldFolder.length());
return newPath;
}
else {
return path;
}
}

void softmax(float* output, std::vector& probs) {
probs.clear();
float sum = 0.0f;
for (int i = 0; i < 2; ++i) {
probs.push_back(std::exp(output[i]));
sum += probs.back();
}
for (int i = 0; i < 2; ++i) {
probs[i] /= sum;
}
}
void TensorRTInference::allocateBuffers() {
std::cout << "Allocating buffers..." << std::endl;
CUDA_CHECK(cudaMalloc(&buffers_[inputIndex1_], inputSize1_));
CUDA_CHECK(cudaMalloc(&buffers_[inputIndex2_], inputSize2_));
CUDA_CHECK(cudaMalloc(&buffers_[outputIndex1_], outputSize1_));
CUDA_CHECK(cudaMalloc(&buffers_[outputIndex2_], outputSize2_));
CUDA_CHECK(cudaMalloc(&buffers_[outputIndex3_], outputSize3_));
std::cout << "Buffers allocated successfully." << std::endl;
}
void TensorRTInference::doInference(const std::vectorcv::String& image_paths) {
float* inputBuffer1 = new float[inputSize1_ / sizeof(float)];
float* inputBuffer2 = new float[inputSize2_ / sizeof(float)];

float* outputBuffer1 = new float[outputSize1_ / sizeof(float)];
float* outputBuffer2 = new float[outputSize2_ / sizeof(float)];
float* outputBuffer3 = new float[outputSize3_ / sizeof(float)];

for (const auto& filename : image_paths) {
    std::cout << "Processing image: " << filename << std::endl;
    clock_t start = clock();
    cv::Mat image = cv::imread(filename);
    int height = image.rows;
    int width = image.cols;
    int channels = image.channels();

    // Print the shape of the image
    std::cout << "Image shape: (" << height << ", " << width << ", " << channels << ")" << std::endl;

    if (image.empty()) {
        std::cerr << "Error loading image: " << filename << std::endl;
        continue;
    }

    preprocess(image, inputBuffer1, inputBuffer2);


    clock_t gpuStart = clock();
    CUDA_CHECK(cudaMemcpy(buffers_[inputIndex1_], inputBuffer1, inputSize1_, cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(buffers_[inputIndex2_], inputBuffer2, inputSize2_, cudaMemcpyHostToDevice));

    bool success = context_->executeV2(buffers_);
    if (!success) {
        std::cerr << "TensorRT execution failed." << std::endl;
        continue;
    }

    CUDA_CHECK(cudaMemcpy(outputBuffer1, buffers_[outputIndex1_], outputSize1_, cudaMemcpyDeviceToHost));
    CUDA_CHECK(cudaMemcpy(outputBuffer2, buffers_[outputIndex2_], outputSize2_, cudaMemcpyDeviceToHost));
    CUDA_CHECK(cudaMemcpy(outputBuffer3, buffers_[outputIndex3_], outputSize3_, cudaMemcpyDeviceToHost));

    clock_t gpuEnd = clock();
    std::cout << "GPU inference time: " << (gpuEnd - gpuStart) / (double)CLOCKS_PER_SEC << " seconds." << std::endl;

    postprocess(outputBuffer1, outputBuffer2, outputBuffer3, image);
    std::cout << "First values of outputBuffer1:" << std::endl;
    for (int i = 0; i < 10; ++i) {
        std::cout << outputBuffer2[i] << " ";
    }
    std::cout << std::endl;
    std::string output_path = replaceFolderName(filename, "debug", "debug_out");
    cv::imwrite(output_path, image);

}

delete[] inputBuffer1;
delete[] inputBuffer2;
delete[] outputBuffer1;
delete[] outputBuffer2;
delete[] outputBuffer3;

}

int main(int argc, char** argv) {
try {
cudaSetDevice(0);
TensorRTInference inference("D:\tool\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8\TensorRT-8.6.1.6\bin\cfg_model.trt");
std::string img_dir = "E:\YOLOv8-main\val2017\debug";
std::vectorcv::String image_paths;
cv::utils::fs::glob(img_dir, "*.png", image_paths);
inference.doInference(image_paths);
}
catch (const std::exception& e) {
std::cerr << "Exception: " << e.what() << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
能帮忙检查一下吗

@lyuwenyu
Copy link
Owner

@Kingxudong
Copy link
Author

可以先用这个脚本测一下导出的结果

https://github.com/lyuwenyu/RT-DETR/blob/main/rtdetrv2_pytorch/references/deploy/rtdetrv2_tensorrt.py

有测试,无论是官方提供的权重文件还是我自己的权重文件,都出向没有“create_execution_context”这个模块,未定义

@lyuwenyu
Copy link
Owner

trt版本是多少


box不对看下是不是数据类型的问题

@Kingxudong
Copy link
Author

trt版本是多少

box不对看下是不是数据类型的问题

trt版本是8.6.1;
但是标签和得分都是对的

@gk966988
Copy link

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants