From e48767b62503cbd89765fa4b1619cb9e14cf8731 Mon Sep 17 00:00:00 2001 From: Ian Chen Date: Fri, 26 Jul 2024 09:39:02 -0700 Subject: [PATCH] Optimization: remove extra copy of data buffer in Ogre2GpuRays and Ogre2DepthCamera (#1022) Signed-off-by: Ian Chen --- ogre2/src/Ogre2DepthCamera.cc | 28 +++++--------------- ogre2/src/Ogre2GpuRays.cc | 48 +++++++---------------------------- 2 files changed, 16 insertions(+), 60 deletions(-) diff --git a/ogre2/src/Ogre2DepthCamera.cc b/ogre2/src/Ogre2DepthCamera.cc index db9ad5d99..de756e211 100644 --- a/ogre2/src/Ogre2DepthCamera.cc +++ b/ogre2/src/Ogre2DepthCamera.cc @@ -92,15 +92,13 @@ class Ogre2DepthGaussianNoisePass : public Ogre2GaussianNoisePass /// \brief Private data for the Ogre2DepthCamera class class gz::rendering::Ogre2DepthCameraPrivate { - /// \brief The depth buffer + /// \brief The depth buffer - also the outgoing point cloud data used + /// by newRgbPointCloud event public: float *depthBuffer = nullptr; /// \brief Outgoing depth data, used by newDepthFrame event. public: float *depthImage = nullptr; - /// \brief Outgoing point cloud data, used by newRgbPointCloud event. - public: float *pointCloudImage = nullptr; - /// \brief maximum value used for data outside sensor range public: float dataMaxVal = gz::math::INF_D; @@ -316,12 +314,6 @@ void Ogre2DepthCamera::Destroy() this->dataPtr->depthImage = nullptr; } - if (this->dataPtr->pointCloudImage) - { - delete [] this->dataPtr->pointCloudImage; - this->dataPtr->pointCloudImage = nullptr; - } - if (!this->ogreCamera) return; @@ -1195,10 +1187,6 @@ void Ogre2DepthCamera::PostRender() { this->dataPtr->depthImage = new float[len]; } - if (!this->dataPtr->pointCloudImage) - { - this->dataPtr->pointCloudImage = new float[len * channelCount]; - } // fill depth data for (unsigned int i = 0; i < height; ++i) @@ -1216,10 +1204,8 @@ void Ogre2DepthCamera::PostRender() // point cloud data if (this->dataPtr->newRgbPointCloud.ConnectionCount() > 0u) { - memcpy(this->dataPtr->pointCloudImage, - this->dataPtr->depthBuffer, len * channelCount * sizeof(float)); this->dataPtr->newRgbPointCloud( - this->dataPtr->pointCloudImage, width, height, channelCount, + this->dataPtr->depthBuffer, width, height, channelCount, "PF_FLOAT32_RGBA"); // Uncomment to debug color output @@ -1229,7 +1215,7 @@ void Ogre2DepthCamera::PostRender() // for (unsigned int j = 0; j < width; ++j) // { // float color = - // this->dataPtr->pointCloudImage[step + j*channelCount + 3]; + // this->dataPtr->depthBuffer[step + j*channelCount + 3]; // // unpack rgb data // uint32_t *rgba = reinterpret_cast(&color); // unsigned int r = *rgba >> 24 & 0xFF; @@ -1246,9 +1232,9 @@ void Ogre2DepthCamera::PostRender() // { // for (unsigned int j = 0; j < width; ++j) // { - // gzdbg << "[" << this->dataPtr->pointCloudImage[i*width*4+j*4] << "]" - // << "[" << this->dataPtr->pointCloudImage[i*width*4+j*4+1] << "]" - // << "[" << this->dataPtr->pointCloudImage[i*width*4+j*4+2] << "],"; + // gzdbg << "[" << this->dataPtr->depthBuffer[i*width*4+j*4] << "]" + // << "[" << this->dataPtr->depthBuffer[i*width*4+j*4+1] << "]" + // << "[" << this->dataPtr->depthBuffer[i*width*4+j*4+2] << "],"; // } // gzdbg << std::endl; // } diff --git a/ogre2/src/Ogre2GpuRays.cc b/ogre2/src/Ogre2GpuRays.cc index 1044dfde0..d07da5a86 100644 --- a/ogre2/src/Ogre2GpuRays.cc +++ b/ogre2/src/Ogre2GpuRays.cc @@ -127,9 +127,6 @@ class GZ_RENDERING_OGRE2_HIDDEN gz::rendering::Ogre2GpuRaysPrivate unsigned int, unsigned int, unsigned int, const std::string &)> newGpuRaysFrame; - /// \brief Raw buffer of gpu rays data. - public: float *gpuRaysBuffer = nullptr; - /// \brief Outgoing gpu rays data, used by newGpuRaysFrame event. public: float *gpuRaysScan = nullptr; @@ -597,12 +594,6 @@ void Ogre2GpuRays::Destroy() if (!this->dataPtr->ogreCamera) return; - if (this->dataPtr->gpuRaysBuffer) - { - delete [] this->dataPtr->gpuRaysBuffer; - this->dataPtr->gpuRaysBuffer = nullptr; - } - if (this->dataPtr->gpuRaysScan) { delete [] this->dataPtr->gpuRaysScan; @@ -1326,12 +1317,6 @@ void Ogre2GpuRays::PostRender() PixelFormat format = PF_FLOAT32_RGBA; unsigned int rawChannelCount = PixelUtil::ChannelCount(format); unsigned int bytesPerChannel = PixelUtil::BytesPerChannel(format); - int rawLen = width * height * rawChannelCount; - - if (!this->dataPtr->gpuRaysBuffer) - { - this->dataPtr->gpuRaysBuffer = new float[rawLen]; - } // blit data from gpu to cpu Ogre::Image2 image; @@ -1339,19 +1324,6 @@ void Ogre2GpuRays::PostRender() Ogre::TextureBox box = image.getData(0u); float *bufferTmp = static_cast(box.data); - // TODO(anyone): It seems wasteful to have gpuRaysBuffer at all - // We should be able to convert directly from bufferTmp to gpuRaysScan - - // copy data row by row. The texture box may not be a contiguous region of - // a texture - for (unsigned int i = 0; i < height; ++i) - { - unsigned int rawDataRowIdx = i * box.bytesPerRow / bytesPerChannel; - unsigned int rowIdx = i * width * rawChannelCount; - memcpy(&this->dataPtr->gpuRaysBuffer[rowIdx], &bufferTmp[rawDataRowIdx], - width * rawChannelCount * bytesPerChannel); - } - // Metal does not support RGB32_FLOAT so the internal texture format is // RGBA32_FLOAT. For backward compatibility, output data is kept in RGB // format instead of RGBA @@ -1364,21 +1336,19 @@ void Ogre2GpuRays::PostRender() // copy data from RGBA buffer to RGB buffer for (unsigned int row = 0; row < height; ++row) { + unsigned int rawDataRowIdx = row * box.bytesPerRow / bytesPerChannel; + unsigned int rowIdx = row * width * this->Channels(); + // the texture box step size could be larger than our image buffer step // size for (unsigned int column = 0; column < width; ++column) { - unsigned int idx = (row * width * this->Channels()) + - column * this->Channels(); - unsigned int rawIdx = (row * width * rawChannelCount) + - column * rawChannelCount; - - this->dataPtr->gpuRaysScan[idx] = - this->dataPtr->gpuRaysBuffer[rawIdx]; - this->dataPtr->gpuRaysScan[idx + 1] = - this->dataPtr->gpuRaysBuffer[rawIdx + 1]; - this->dataPtr->gpuRaysScan[idx + 2] = - this->dataPtr->gpuRaysBuffer[rawIdx + 2]; + unsigned int idx = rowIdx + column * this->Channels(); + unsigned int rawIdx = rawDataRowIdx + column * rawChannelCount; + + this->dataPtr->gpuRaysScan[idx] = bufferTmp[rawIdx]; + this->dataPtr->gpuRaysScan[idx + 1] = bufferTmp[rawIdx + 1]; + this->dataPtr->gpuRaysScan[idx + 2] = bufferTmp[rawIdx + 2]; } }