From bec5c82137c696a2411d9b643804cfe3acc163d9 Mon Sep 17 00:00:00 2001 From: Sylvain Doremus Date: Mon, 29 May 2023 00:08:55 +0200 Subject: [PATCH] Clusters: Added support for lights morton codes sorting. --- .github/workflows/cmake.yml | 5 +- data/vcpkg/ports/rendergraph/portfile.cmake | 8 +- data/vcpkg/ports/shaderwriter/portfile.cmake | 8 +- external/RenderGraph | 2 +- external/ShaderWriter | 2 +- external/vcpkg | 2 +- .../Core/Castor3D/Buffer/GpuBufferOffset.hpp | 30 + .../Render/Clustered/BuildLightsBVH.hpp | 2 +- .../Render/Clustered/ClusteredModule.hpp | 2 + .../Render/Clustered/FrustumClusters.hpp | 44 +- .../Render/Clustered/SortLightsMortonCode.hpp | 18 + .../Core/CastorUtils/Design/DataHolder.hpp | 2 +- .../Core/CastorUtils/Exception/Assertion.hpp | 1 - source/Core/Castor3D/Buffer/GpuBufferPool.cpp | 107 +- source/Core/Castor3D/CMakeLists.txt | 2 + source/Core/Castor3D/DebugDefines.hpp | 1 + .../Clustered/AssignLightsToClusters.cpp | 41 +- .../Render/Clustered/BuildLightsBVH.cpp | 90 +- .../Render/Clustered/ComputeClustersAABB.cpp | 4 +- .../Clustered/ComputeLightsMortonCode.cpp | 36 +- .../Render/Clustered/FrustumClusters.cpp | 55 +- .../Render/Clustered/ReduceLightsAABB.cpp | 10 +- .../Render/Clustered/SortLightsMortonCode.cpp | 970 ++++++++++++++++++ .../VoxelConeTracing/Voxelizer.cpp | 2 +- .../Lighting/ClusteredLightsPipeline.cpp | 2 +- .../Render/Opaque/Lighting/LightsPipeline.cpp | 2 +- .../Render/Opaque/VisibilityReorderPass.cpp | 2 +- source/Core/Castor3D/Render/RenderTarget.cpp | 1 + .../Core/Castor3D/Scene/Light/SpotLight.cpp | 2 +- .../CastorUtils/CastorUtilsPrerequisites.cpp | 1 - tools/CastorTestLauncher/MainFrame.cpp | 2 +- tools/CastorViewer/MainFrame.cpp | 14 +- vcpkg.json | 3 +- 33 files changed, 1386 insertions(+), 87 deletions(-) create mode 100644 include/Core/Castor3D/Render/Clustered/SortLightsMortonCode.hpp create mode 100644 source/Core/Castor3D/Render/Clustered/SortLightsMortonCode.cpp diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 1c8fc0b3fd..6f82b4e01d 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -25,6 +25,9 @@ jobs: - name: Checkout submodules run: | git submodule update --init -- "CMake" + git submodule update --init --recursive -- "external/Ashes" + git submodule update --init --recursive -- "external/ShaderWriter" + git submodule update --init --recursive -- "external/RenderGraph" git submodule update --init -- "external/vcpkg" - name: Setup vcpkg uses: lukka/run-vcpkg@v10 @@ -51,7 +54,7 @@ jobs: shell: bash working-directory: ${{runner.workspace}}/build-${{ matrix.buildType }} run: | - cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.buildType }} -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/external/vcpkg/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/package/Castor3D -DPROJECTS_WARNINGS_AS_ERRORS=$PROJ_WAE -DPROJECTS_UNITY_BUILD=ON -DPROJECTS_USE_PRECOMPILED_HEADERS=OFF -DSDW_GENERATE_SOURCE=OFF -DCASTOR_BUILD_PLUGINS=ON -DCASTOR_BUILDGRP_INTEROP=OFF -DCASTOR_BUILDGRP_TEST=OFF -DCASTOR_USE_GLSLANG=ON -DCASTOR_DISABLE_DELAYED_INITIALISATION=ON -DASHES_BUILD_RENDERER_OGL=OFF -DCASTOR_BUILDGRP_SAMPLE=OFF -DCASTOR_BUILDGRP_TOOL=OFF -DCASTOR_FORCE_VCPKG_SUBMODULES=ON + cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.buildType }} -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/external/vcpkg/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/package/Castor3D -DPROJECTS_WARNINGS_AS_ERRORS=$PROJ_WAE -DPROJECTS_UNITY_BUILD=ON -DPROJECTS_USE_PRECOMPILED_HEADERS=OFF -DSDW_GENERATE_SOURCE=OFF -DCASTOR_BUILD_PLUGINS=ON -DCASTOR_BUILDGRP_INTEROP=OFF -DCASTOR_BUILDGRP_TEST=OFF -DCASTOR_USE_GLSLANG=ON -DCASTOR_DISABLE_DELAYED_INITIALISATION=ON -DASHES_BUILD_RENDERER_OGL=OFF -DCASTOR_BUILDGRP_SAMPLE=OFF -DCASTOR_BUILDGRP_TOOL=OFF - name: Build if: steps.configure.conclusion == 'success' id: build diff --git a/data/vcpkg/ports/rendergraph/portfile.cmake b/data/vcpkg/ports/rendergraph/portfile.cmake index e8818301af..3f722d50a8 100644 --- a/data/vcpkg/ports/rendergraph/portfile.cmake +++ b/data/vcpkg/ports/rendergraph/portfile.cmake @@ -1,15 +1,15 @@ vcpkg_from_github(OUT_SOURCE_PATH SOURCE_PATH REPO DragonJoker/RenderGraph - REF c5bdda793361901762d273b76f5a2355f1ab5411 + REF e9f6b3243095d832f409e15fbafff4f73af6e401 HEAD_REF master - SHA512 c9549ebb6109d6a3a5673b4180fe6f9491d14f8666714451b3bc6b0cd8b6d4a57c79d5122b4729af0e98580418ff0089700ec71c8882744a342e58b4d19be0e3 + SHA512 3609da52a6f5c5c213fcc56416cfc714403d0bc80742540990cab2973b2ee3e41e9629b04f379db84c2194acff6cecf11c4e86d4167ca78aa8606a85cbcaa433 ) vcpkg_from_github(OUT_SOURCE_PATH CMAKE_SOURCE_PATH REPO DragonJoker/CMakeUtils - REF 89a4c8fd4f0a464403676b6b1f1c5d178f6255b3 + REF 3818effff171f863d0c23e6fbbf79911f03cc6d3 HEAD_REF master - SHA512 98c46a563f2e4a28d9c91f4f255c500118dd0c66a4422d42969bdcc16c1493581db45b709fc3167e1ced8fa628d51880cb459df0d6b3dc013b945f66597ec768 + SHA512 3a13b371adf24f530fdef6005d3a9105185eca131c9b8aba68615a593f76d6aac2c3c7f0ecf6ce430e2b854afca4abd99f1400a1e6665478c9daa34a8dac6f5b ) get_filename_component(SRC_PATH "${CMAKE_SOURCE_PATH}" DIRECTORY) diff --git a/data/vcpkg/ports/shaderwriter/portfile.cmake b/data/vcpkg/ports/shaderwriter/portfile.cmake index 1e09a154b0..aaf33ee3cf 100644 --- a/data/vcpkg/ports/shaderwriter/portfile.cmake +++ b/data/vcpkg/ports/shaderwriter/portfile.cmake @@ -1,15 +1,15 @@ vcpkg_from_github(OUT_SOURCE_PATH SOURCE_PATH REPO DragonJoker/ShaderWriter - REF 42aa67fda5b2a72f21c0fcc9f51c2ed2838e1360 + REF 6aa2e9bb7f51c80df3ebf5f3d7ea5936d87e671f HEAD_REF development - SHA512 37826bf50ae09e2e1eea68e21ee2dcf4461b56c3833cc0d19b4005411bfbc81e5bcce2586e63359c6b5226b92d3c71d4d9129319fa85df57013e0fd025538e60 + SHA512 04c9973c513ff4b912283d4db93e255455e2ed909fa1ebfc09759d39e79bd7e156abf65f5686a8ffa311f7441b9c802c8a346ec41b02eb4e42564b53431042b8 ) vcpkg_from_github(OUT_SOURCE_PATH CMAKE_SOURCE_PATH REPO DragonJoker/CMakeUtils - REF 89a4c8fd4f0a464403676b6b1f1c5d178f6255b3 + REF 3818effff171f863d0c23e6fbbf79911f03cc6d3 HEAD_REF master - SHA512 98c46a563f2e4a28d9c91f4f255c500118dd0c66a4422d42969bdcc16c1493581db45b709fc3167e1ced8fa628d51880cb459df0d6b3dc013b945f66597ec768 + SHA512 3a13b371adf24f530fdef6005d3a9105185eca131c9b8aba68615a593f76d6aac2c3c7f0ecf6ce430e2b854afca4abd99f1400a1e6665478c9daa34a8dac6f5b ) file(REMOVE_RECURSE "${SOURCE_PATH}/CMake") diff --git a/external/RenderGraph b/external/RenderGraph index 31396b687f..e9f6b32430 160000 --- a/external/RenderGraph +++ b/external/RenderGraph @@ -1 +1 @@ -Subproject commit 31396b687fc090e9fc9a1840f1dc3593a6ccf31c +Subproject commit e9f6b3243095d832f409e15fbafff4f73af6e401 diff --git a/external/ShaderWriter b/external/ShaderWriter index 42aa67fda5..6aa2e9bb7f 160000 --- a/external/ShaderWriter +++ b/external/ShaderWriter @@ -1 +1 @@ -Subproject commit 42aa67fda5b2a72f21c0fcc9f51c2ed2838e1360 +Subproject commit 6aa2e9bb7f51c80df3ebf5f3d7ea5936d87e671f diff --git a/external/vcpkg b/external/vcpkg index 6accd15d64..1c5a340f6e 160000 --- a/external/vcpkg +++ b/external/vcpkg @@ -1 +1 @@ -Subproject commit 6accd15d644e93cec849ea346a147828437928b3 +Subproject commit 1c5a340f6e10985e2d92af174a68dbd15c1fa4e1 diff --git a/include/Core/Castor3D/Buffer/GpuBufferOffset.hpp b/include/Core/Castor3D/Buffer/GpuBufferOffset.hpp index 0ceb99f98c..b2622e1ca8 100644 --- a/include/Core/Castor3D/Buffer/GpuBufferOffset.hpp +++ b/include/Core/Castor3D/Buffer/GpuBufferOffset.hpp @@ -10,6 +10,36 @@ See LICENSE file in root folder namespace castor3d { + C3D_API void createUniformPassBinding( crg::FramePass & pass + , uint32_t binding + , std::string const & name + , std::vector< ashes::BufferBase const * > buffers + , VkDeviceSize offset + , VkDeviceSize size ); + C3D_API void createInputStoragePassBinding( crg::FramePass & pass + , uint32_t binding + , std::string const & name + , std::vector< ashes::BufferBase const * > buffers + , VkDeviceSize offset + , VkDeviceSize size ); + C3D_API void createInOutStoragePassBinding( crg::FramePass & pass + , uint32_t binding + , std::string const & name + , std::vector< ashes::BufferBase const * > buffers + , VkDeviceSize offset + , VkDeviceSize size ); + C3D_API void createOutputStoragePassBinding( crg::FramePass & pass + , uint32_t binding + , std::string const & name + , std::vector< ashes::BufferBase const * > buffers + , VkDeviceSize offset + , VkDeviceSize size ); + C3D_API void createClearableOutputStorageBinding( crg::FramePass & pass + , uint32_t binding + , std::string const & name + , std::vector< ashes::BufferBase const * > buffers + , VkDeviceSize offset + , VkDeviceSize size ); C3D_API void createUniformPassBinding( crg::FramePass & pass , uint32_t binding , std::string const & name diff --git a/include/Core/Castor3D/Render/Clustered/BuildLightsBVH.hpp b/include/Core/Castor3D/Render/Clustered/BuildLightsBVH.hpp index 3d8112b5af..2fe76545c8 100644 --- a/include/Core/Castor3D/Render/Clustered/BuildLightsBVH.hpp +++ b/include/Core/Castor3D/Render/Clustered/BuildLightsBVH.hpp @@ -9,7 +9,7 @@ See LICENSE file in root folder namespace castor3d { C3D_API crg::FramePass const & createBuildLightsBVHPass( crg::FramePassGroup & graph - , crg::FramePass const * previousPass + , crg::FramePassArray const & previousPasses , RenderDevice const & device , CameraUbo const & cameraUbo , FrustumClusters & clusters ); diff --git a/include/Core/Castor3D/Render/Clustered/ClusteredModule.hpp b/include/Core/Castor3D/Render/Clustered/ClusteredModule.hpp index 8d4bda12c1..05aea91cd0 100644 --- a/include/Core/Castor3D/Render/Clustered/ClusteredModule.hpp +++ b/include/Core/Castor3D/Render/Clustered/ClusteredModule.hpp @@ -40,6 +40,8 @@ namespace castor3d using OnClustersBuffersChanged = castor::SignalT< ClustersBuffersChangedFunction >; using OnClustersBuffersChangedConnection = castor::ConnectionT< ClustersBuffersChangedFunction >; + C3D_API u32 getLightsMortonCodeChunkCount( u32 lightCount ); + //@} //@} } diff --git a/include/Core/Castor3D/Render/Clustered/FrustumClusters.hpp b/include/Core/Castor3D/Render/Clustered/FrustumClusters.hpp index 9d6fa718ab..abea8aed90 100644 --- a/include/Core/Castor3D/Render/Clustered/FrustumClusters.hpp +++ b/include/Core/Castor3D/Render/Clustered/FrustumClusters.hpp @@ -140,9 +140,24 @@ namespace castor3d return *m_spotBVHBuffer; } - void swapLightMortonIndicesIO() + void initPointLightMortonIndicesIO()noexcept { - m_lightMortonIndicesInput = 1u - m_lightMortonIndicesInput; + m_pointLightMortonIndicesInput = 1u; + } + + void initSpotLightMortonIndicesIO()noexcept + { + m_spotLightMortonIndicesInput = 1u; + } + + void swapPointLightMortonIndicesIO()noexcept + { + m_pointLightMortonIndicesInput = 1u - m_pointLightMortonIndicesInput; + } + + void swapSpotLightMortonIndicesIO()noexcept + { + m_spotLightMortonIndicesInput = 1u - m_spotLightMortonIndicesInput; } ashes::BufferBase & getPointLightIndicesBuffer( uint32_t index )const noexcept @@ -167,42 +182,47 @@ namespace castor3d ashes::BufferBase & getInputPointLightIndicesBuffer()const noexcept { - return getPointLightIndicesBuffer( m_lightMortonIndicesInput ); + return getPointLightIndicesBuffer( m_pointLightMortonIndicesInput ); } ashes::BufferBase & getInputSpotLightIndicesBuffer()const noexcept { - return getSpotLightIndicesBuffer( m_lightMortonIndicesInput ); + return getSpotLightIndicesBuffer( m_spotLightMortonIndicesInput ); } ashes::BufferBase & getOutputPointLightIndicesBuffer()const noexcept { - return getPointLightIndicesBuffer( 1u - m_lightMortonIndicesInput ); + return getPointLightIndicesBuffer( 1u - m_pointLightMortonIndicesInput ); } ashes::BufferBase & getOutputSpotLightIndicesBuffer()const noexcept { - return getSpotLightIndicesBuffer( 1u - m_lightMortonIndicesInput ); + return getSpotLightIndicesBuffer( 1u - m_spotLightMortonIndicesInput ); } ashes::BufferBase & getInputPointLightMortonCodesBuffer()const noexcept { - return getPointLightMortonCodesBuffer( m_lightMortonIndicesInput ); + return getPointLightMortonCodesBuffer( m_pointLightMortonIndicesInput ); } ashes::BufferBase & getInputSpotLightMortonCodesBuffer()const noexcept { - return getSpotLightMortonCodesBuffer( m_lightMortonIndicesInput ); + return getSpotLightMortonCodesBuffer( m_spotLightMortonIndicesInput ); } ashes::BufferBase & getOutputPointLightMortonCodesBuffer()const noexcept { - return getPointLightMortonCodesBuffer( 1u - m_lightMortonIndicesInput ); + return getPointLightMortonCodesBuffer( 1u - m_pointLightMortonIndicesInput ); } ashes::BufferBase & getOutputSpotLightMortonCodesBuffer()const noexcept { - return getSpotLightMortonCodesBuffer( 1u - m_lightMortonIndicesInput ); + return getSpotLightMortonCodesBuffer( 1u - m_spotLightMortonIndicesInput ); + } + + ashes::BufferBase & getMergePathPartitionsBuffer()const noexcept + { + return *m_mergePathPartitionsBuffer; } auto & getCamera()const noexcept @@ -229,7 +249,8 @@ namespace castor3d bool m_clustersDirty{ true }; bool m_lightsDirty{ true }; bool m_first{ true }; - uint32_t m_lightMortonIndicesInput{ 1u }; + uint32_t m_pointLightMortonIndicesInput{ 1u }; + uint32_t m_spotLightMortonIndicesInput{ 1u }; castor::GroupChangeTracked< castor::Point3ui > m_dimensions; castor::GroupChangeTracked< uint32_t > m_clusterSize; castor::GroupChangeTracked< castor::Matrix4x4f > m_cameraProjection; @@ -242,6 +263,7 @@ namespace castor3d ashes::BufferBasePtr m_pointLightClusterIndexBuffer; ashes::BufferBasePtr m_spotLightClusterIndexBuffer; ashes::BufferBasePtr m_lightsAABBBuffer; + ashes::BufferBasePtr m_mergePathPartitionsBuffer; std::array< ashes::BufferBasePtr, 2u > m_pointMortonCodesBuffers; std::array< ashes::BufferBasePtr, 2u > m_spotMortonCodesBuffers; std::array< ashes::BufferBasePtr, 2u > m_pointIndicesBuffers; diff --git a/include/Core/Castor3D/Render/Clustered/SortLightsMortonCode.hpp b/include/Core/Castor3D/Render/Clustered/SortLightsMortonCode.hpp new file mode 100644 index 0000000000..63dc97f7c2 --- /dev/null +++ b/include/Core/Castor3D/Render/Clustered/SortLightsMortonCode.hpp @@ -0,0 +1,18 @@ +/* +See LICENSE file in root folder +*/ +#ifndef ___C3D_SortLightsMortonCode_H___ +#define ___C3D_SortLightsMortonCode_H___ + +#include "ClusteredModule.hpp" + +namespace castor3d +{ + C3D_API crg::FramePassArray createSortLightsMortonCodePass( crg::FramePassGroup & graph + , crg::FramePass const * previousPass + , RenderDevice const & device + , CameraUbo const & cameraUbo + , FrustumClusters & clusters ); +} + +#endif diff --git a/include/Core/CastorUtils/Design/DataHolder.hpp b/include/Core/CastorUtils/Design/DataHolder.hpp index 2d4e0d49af..e3ffab0a88 100644 --- a/include/Core/CastorUtils/Design/DataHolder.hpp +++ b/include/Core/CastorUtils/Design/DataHolder.hpp @@ -14,7 +14,7 @@ namespace castor public: DataHolderT()noexcept = default; - DataHolderT( Data d )noexcept + explicit DataHolderT( Data d )noexcept : m_data{ std::move( d ) } { } diff --git a/include/Core/CastorUtils/Exception/Assertion.hpp b/include/Core/CastorUtils/Exception/Assertion.hpp index 7f54bf7d52..93e8bce396 100644 --- a/include/Core/CastorUtils/Exception/Assertion.hpp +++ b/include/Core/CastorUtils/Exception/Assertion.hpp @@ -11,7 +11,6 @@ See LICENSE file in root folder namespace castor { CU_API void cuLogError( char const * const description ); - [[ noreturn ]] CU_API void cuFailure( char const * const description ); } diff --git a/source/Core/Castor3D/Buffer/GpuBufferPool.cpp b/source/Core/Castor3D/Buffer/GpuBufferPool.cpp index 09feef5e5e..26eba01e93 100644 --- a/source/Core/Castor3D/Buffer/GpuBufferPool.cpp +++ b/source/Core/Castor3D/Buffer/GpuBufferPool.cpp @@ -15,6 +15,93 @@ namespace castor3d { //********************************************************************************************* + namespace gpupol + { + static crg::VkBufferArray makeVkArray( std::vector< ashes::BufferBase const * > const & buffers ) + { + crg::VkBufferArray result; + + for ( auto buffer : buffers ) + { + result.push_back( *buffer ); + } + + return result; + } + } + + //********************************************************************************************* + + void createUniformPassBinding( crg::FramePass & pass + , uint32_t binding + , std::string const & name + , std::vector< ashes::BufferBase const * > buffers + , VkDeviceSize offset + , VkDeviceSize size ) + { + crg::VkBufferArray vkBuffers = gpupol::makeVkArray( buffers ); + pass.addUniformBuffer( { vkBuffers, name } + , binding + , offset + , size ); + } + + void createInputStoragePassBinding( crg::FramePass & pass + , uint32_t binding + , std::string const & name + , std::vector< ashes::BufferBase const * > buffers + , VkDeviceSize offset + , VkDeviceSize size ) + { + crg::VkBufferArray vkBuffers = gpupol::makeVkArray( buffers ); + pass.addInputStorageBuffer( { vkBuffers, name } + , binding + , offset + , size ); + } + + void createInOutStoragePassBinding( crg::FramePass & pass + , uint32_t binding + , std::string const & name + , std::vector< ashes::BufferBase const * > buffers + , VkDeviceSize offset + , VkDeviceSize size ) + { + crg::VkBufferArray vkBuffers = gpupol::makeVkArray( buffers ); + pass.addInOutStorageBuffer( { vkBuffers, name } + , binding + , offset + , size ); + } + + void createOutputStoragePassBinding( crg::FramePass & pass + , uint32_t binding + , std::string const & name + , std::vector< ashes::BufferBase const * > buffers + , VkDeviceSize offset + , VkDeviceSize size ) + { + crg::VkBufferArray vkBuffers = gpupol::makeVkArray( buffers ); + pass.addOutputStorageBuffer( { vkBuffers, name } + , binding + , offset + , size ); + } + + void createClearableOutputStorageBinding( crg::FramePass & pass + , uint32_t binding + , std::string const & name + , std::vector< ashes::BufferBase const * > buffers + , VkDeviceSize offset + , VkDeviceSize size ) + { + crg::VkBufferArray vkBuffers = gpupol::makeVkArray( buffers ); + pass.addClearableOutputStorageBuffer( { vkBuffers, name } + , binding + , offset + , size ); + } + void createUniformPassBinding( crg::FramePass & pass , uint32_t binding , std::string const & name @@ -22,8 +109,10 @@ namespace castor3d , VkDeviceSize offset , VkDeviceSize size ) { - pass.addUniformBuffer( { buffer, name } + createUniformPassBinding( pass , binding + , name + , { &buffer } , offset , size ); } @@ -35,8 +124,10 @@ namespace castor3d , VkDeviceSize offset , VkDeviceSize size ) { - pass.addInputStorageBuffer( { buffer, name } + createInputStoragePassBinding( pass , binding + , name + , { &buffer } , offset , size ); } @@ -48,8 +139,10 @@ namespace castor3d , VkDeviceSize offset , VkDeviceSize size ) { - pass.addInOutStorageBuffer( { buffer, name } + createInOutStoragePassBinding( pass , binding + , name + , { &buffer } , offset , size ); } @@ -61,8 +154,10 @@ namespace castor3d , VkDeviceSize offset , VkDeviceSize size ) { - pass.addOutputStorageBuffer( { buffer, name } + createOutputStoragePassBinding( pass , binding + , name + , { &buffer } , offset , size ); } @@ -74,8 +169,10 @@ namespace castor3d , VkDeviceSize offset , VkDeviceSize size ) { - pass.addClearableOutputStorageBuffer( { buffer, name } + createClearableOutputStorageBinding( pass , binding + , name + , { &buffer } , offset , size ); } diff --git a/source/Core/Castor3D/CMakeLists.txt b/source/Core/Castor3D/CMakeLists.txt index e8fc40c6bc..07585c20ae 100644 --- a/source/Core/Castor3D/CMakeLists.txt +++ b/source/Core/Castor3D/CMakeLists.txt @@ -1138,6 +1138,7 @@ set( ${PROJECT_NAME}_FOLDER_SRC_FILES ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/ComputeLightsMortonCode.cpp ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/FrustumClusters.cpp ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/ReduceLightsAABB.cpp + ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/SortLightsMortonCode.cpp ) set( ${PROJECT_NAME}_FOLDER_HDR_FILES ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/AssignLightsToClusters.hpp @@ -1147,6 +1148,7 @@ set( ${PROJECT_NAME}_FOLDER_HDR_FILES ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/ComputeLightsMortonCode.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/FrustumClusters.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/ReduceLightsAABB.hpp + ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/SortLightsMortonCode.hpp ) set( ${PROJECT_NAME}_SRC_FILES ${${PROJECT_NAME}_SRC_FILES} diff --git a/source/Core/Castor3D/DebugDefines.hpp b/source/Core/Castor3D/DebugDefines.hpp index f5970f3f1e..a222885256 100644 --- a/source/Core/Castor3D/DebugDefines.hpp +++ b/source/Core/Castor3D/DebugDefines.hpp @@ -16,5 +16,6 @@ See LICENSE file in root folder #define C3D_DebugDisableSafeBands 0 #define C3D_DebugUseLightsBVH 1 +#define C3D_DebugSortLightsMortonCode 1 #endif diff --git a/source/Core/Castor3D/Render/Clustered/AssignLightsToClusters.cpp b/source/Core/Castor3D/Render/Clustered/AssignLightsToClusters.cpp index 4ad5400dca..7c945b8118 100644 --- a/source/Core/Castor3D/Render/Clustered/AssignLightsToClusters.cpp +++ b/source/Core/Castor3D/Render/Clustered/AssignLightsToClusters.cpp @@ -538,15 +538,42 @@ namespace castor3d , crg::ComputePass{framePass , context , graph - , crg::ru::Config{} +#if C3D_DebugUseLightsBVH && C3D_DebugSortLightsMortonCode + , crg::ru::Config{ 2u } +#else + , crg::ru::Config{ 1u } +#endif , config + .getPassIndex( RunnablePass::GetPassIndexCallback( [this](){ return doGetPassIndex(); } ) ) .program( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( CreateInfoHolder::getData() ) ) .enabled( &clusters.needsLightsUpdate() ) .end( RecordCallback{ [this]( crg::RecordContext & ctx, VkCommandBuffer cb, uint32_t idx ) { doPostRecord( ctx, cb, idx ); } } ) } + , m_lightCache{ clusters.getCamera().getScene()->getLightCache() } { } private: + uint32_t doGetPassIndex() + { +#if C3D_DebugUseLightsBVH && C3D_DebugSortLightsMortonCode + u32 result = {}; + + auto pointLightsCount = m_lightCache.getLightsCount( LightType::ePoint ); + auto spoLightsCount = m_lightCache.getLightsCount( LightType::eSpot ); + auto totalValues = std::max( pointLightsCount, spoLightsCount ); + auto numChunks = getLightsMortonCodeChunkCount( totalValues ); + + if ( numChunks > 1u ) + { + result = ( ( numChunks - 1u ) % 2u ); + } + + return result; +#else + return 0u; +#endif + } + void doPostRecord( crg::RecordContext & context , VkCommandBuffer commandBuffer , uint32_t index ) @@ -559,10 +586,10 @@ namespace castor3d && attach.isStorageBuffer() && attach.isClearableBuffer() ) { - auto currentState = context.getAccessState( buffer.buffer.buffer + auto currentState = context.getAccessState( buffer.buffer.buffer( index ) , buffer.range ); context.memoryBarrier( commandBuffer - , buffer.buffer.buffer + , buffer.buffer.buffer( index ) , buffer.range , currentState.access , currentState.pipelineStage @@ -570,6 +597,9 @@ namespace castor3d } } } + + private: + LightCache const & m_lightCache; }; } @@ -610,8 +640,13 @@ namespace castor3d #if C3D_DebugUseLightsBVH createInputStoragePassBinding( pass, uint32_t( dspclst::ePointLightBVH ), "C3D_PointLightsBVH", clusters.getPointLightBVHBuffer(), 0u, ashes::WholeSize ); createInputStoragePassBinding( pass, uint32_t( dspclst::eSpotLightBVH ), "C3D_SpotLightsBVH", clusters.getSpotLightBVHBuffer(), 0u, ashes::WholeSize ); +# if C3D_DebugSortLightsMortonCode + createInputStoragePassBinding( pass, uint32_t( dspclst::ePointLightIndices ), "C3D_PointLightIndices", { &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() }, 0u, ashes::WholeSize ); + createInputStoragePassBinding( pass, uint32_t( dspclst::eSpotLightIndices ), "C3D_SpotLightIndices", { &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() }, 0u, ashes::WholeSize ); +# else createInputStoragePassBinding( pass, uint32_t( dspclst::ePointLightIndices ), "C3D_PointLightIndices", clusters.getInputPointLightIndicesBuffer(), 0u, ashes::WholeSize ); createInputStoragePassBinding( pass, uint32_t( dspclst::eSpotLightIndices ), "C3D_SpotLightIndices", clusters.getInputSpotLightIndicesBuffer(), 0u, ashes::WholeSize ); +# endif #endif return pass; } diff --git a/source/Core/Castor3D/Render/Clustered/BuildLightsBVH.cpp b/source/Core/Castor3D/Render/Clustered/BuildLightsBVH.cpp index 4c03577a58..59142c1f6f 100644 --- a/source/Core/Castor3D/Render/Clustered/BuildLightsBVH.cpp +++ b/source/Core/Castor3D/Render/Clustered/BuildLightsBVH.cpp @@ -1,5 +1,6 @@ #include "Castor3D/Render/Clustered/BuildLightsBVH.hpp" +#include "Castor3D/DebugDefines.hpp" #include "Castor3D/Engine.hpp" #include "Castor3D/Cache/LightCache.hpp" #include "Castor3D/Render/RenderDevice.hpp" @@ -24,6 +25,8 @@ #include #include +#include + namespace castor3d { //********************************************************************************************* @@ -42,6 +45,7 @@ namespace castor3d }; static uint32_t constexpr NumThreads = 32u * 16u; + static float constexpr FltMax = std::numeric_limits< float >::max(); static ShaderPtr createShader( bool bottomLevel ) { @@ -149,8 +153,8 @@ namespace castor3d } ELSE { - aabbMin = vec4( sdw::Float{ FLT_MAX }, FLT_MAX, FLT_MAX, 1.0f ); - aabbMax = vec4( sdw::Float{ -FLT_MAX }, -FLT_MAX, -FLT_MAX, 1.0f ); + aabbMin = vec4( sdw::Float{ FltMax }, FltMax, FltMax, 1.0f ); + aabbMax = vec4( sdw::Float{ -FltMax }, -FltMax, -FltMax, 1.0f ); } FI; @@ -197,8 +201,8 @@ namespace castor3d } ELSE { - aabbMin = vec4( sdw::Float{ FLT_MAX }, FLT_MAX, FLT_MAX, 1.0f ); - aabbMax = vec4( sdw::Float{ -FLT_MAX }, -FLT_MAX, -FLT_MAX, 1.0f ); + aabbMin = vec4( sdw::Float{ FltMax }, FltMax, FltMax, 1.0f ); + aabbMax = vec4( sdw::Float{ -FltMax }, -FltMax, -FltMax, 1.0f ); } FI; @@ -247,8 +251,8 @@ namespace castor3d } ELSE { - aabbMin = vec4( sdw::Float{ FLT_MAX }, FLT_MAX, FLT_MAX, 1.0f ); - aabbMax = vec4( sdw::Float{ -FLT_MAX }, -FLT_MAX, -FLT_MAX, 1.0f ); + aabbMin = vec4( sdw::Float{ FltMax }, FltMax, FltMax, 1.0f ); + aabbMax = vec4( sdw::Float{ -FltMax }, -FltMax, -FltMax, 1.0f ); } FI; @@ -289,8 +293,8 @@ namespace castor3d } ELSE { - aabbMin = vec4( sdw::Float{ FLT_MAX }, FLT_MAX, FLT_MAX, 1.0f ); - aabbMax = vec4( sdw::Float{ -FLT_MAX }, -FLT_MAX, -FLT_MAX, 1.0f ); + aabbMin = vec4( sdw::Float{ FltMax }, FltMax, FltMax, 1.0f ); + aabbMax = vec4( sdw::Float{ -FltMax }, -FltMax, -FltMax, 1.0f ); } FI; @@ -337,13 +341,18 @@ namespace castor3d , { [this]( uint32_t index ){ doInitialise( index ); } , GetPipelineStateCallback( [](){ return crg::getPipelineState( VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT ); } ) , [this]( crg::RecordContext & recContext, VkCommandBuffer cb, uint32_t i ){ doRecordInto( recContext, cb, i ); } - , GetPassIndexCallback( [](){ return 0u; } ) + , GetPassIndexCallback( [this](){ return doGetPassIndex(); } ) , IsEnabledCallback( [this](){ return doIsEnabled(); } ) , IsComputePassCallback( [](){ return true; } ) } +#if C3D_DebugSortLightsMortonCode + , crg::ru::Config{ 2u, true /* resettable */ } } +#else , crg::ru::Config{ 1u, true /* resettable */ } } +#endif + , m_clusters{ clusters } , m_lightCache{ clusters.getCamera().getScene()->getLightCache() } - , m_bottom{ framePass, context, graph, device, true, clusters } - , m_top{ framePass, context, graph, device, false, clusters } + , m_bottom{ framePass, context, graph, device, true, this } + , m_top{ framePass, context, graph, device, false, this } { } @@ -371,12 +380,13 @@ namespace castor3d , crg::RunnableGraph & graph , RenderDevice const & device , bool bottomLevel - , FrustumClusters const & clusters ) + , FramePass * parent ) : shader{ VK_SHADER_STAGE_COMPUTE_BIT, "BuildLightsBVH", createShader( bottomLevel ) } , createInfo{ ashes::PipelineShaderStageCreateInfoArray{ makeShaderState( device, shader ) } } - , cpConfig{ crg::defaultV< uint32_t const * > - , &clusters.needsLightsUpdate() + , cpConfig{ crg::getDefaultV< InitialiseCallback >() + , &parent->m_clusters.needsLightsUpdate() , crg::getDefaultV< IsEnabledCallback >() + , GetPassIndexCallback( [parent]() { return parent->doGetPassIndex(); } ) , crg::getDefaultV< RecordCallback >() , crg::getDefaultV< RecordCallback >() , 1u @@ -386,15 +396,20 @@ namespace castor3d , context , graph , crg::pp::Config{} - .program( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( createInfo ) ) - .pushConstants( VkPushConstantRange{ VK_SHADER_STAGE_COMPUTE_BIT, 0u, 4u } ) + .program( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( createInfo ) ) + .pushConstants( VkPushConstantRange{ VK_SHADER_STAGE_COMPUTE_BIT, 0u, 4u } ) , VK_PIPELINE_BIND_POINT_COMPUTE +#if C3D_DebugSortLightsMortonCode + , 2u } +#else , 1u } +#endif { } }; private: + FrustumClusters const & m_clusters; LightCache const & m_lightCache; Pipeline m_bottom; Pipeline m_top; @@ -408,6 +423,27 @@ namespace castor3d doCreatePipeline( index, m_top ); } + uint32_t doGetPassIndex() + { +#if C3D_DebugSortLightsMortonCode + u32 result = {}; + + auto pointLightsCount = m_lightCache.getLightsCount( LightType::ePoint ); + auto spoLightsCount = m_lightCache.getLightsCount( LightType::eSpot ); + auto totalValues = std::max( pointLightsCount, spoLightsCount ); + auto numChunks = getLightsMortonCodeChunkCount( totalValues ); + + if ( numChunks > 1u ) + { + result = ( ( numChunks - 1u ) % 2u ); + } + + return result; +#else + return 0u; +#endif + } + bool doIsEnabled()const { return ( m_bottom.cpConfig.isEnabled ? ( *m_bottom.cpConfig.isEnabled )() : false ) @@ -422,11 +458,11 @@ namespace castor3d auto pointLightsCount = m_lightCache.getLightsCount( LightType::ePoint ); auto spoLightsCount = m_lightCache.getLightsCount( LightType::eSpot ); auto maxLeaves = std::max( pointLightsCount, spoLightsCount ); - auto numThreadGroups = uint32_t( std::ceil( float( maxLeaves ) / float( 32u * 16u ) ) ); + auto numThreadGroups = uint32_t( std::ceil( float( maxLeaves ) / float( NumThreads ) ) ); m_bottom.pipeline.recordInto( context, commandBuffer, index ); m_context.vkCmdDispatch( commandBuffer, numThreadGroups, 1u, 1u ); uint32_t maxLevels = FrustumClusters::getNumLevels( maxLeaves ); - doBarriers( context, commandBuffer, 0 ); + doBarriers( context, commandBuffer, index, 0 ); // Now build upper levels of the BVH. if ( maxLevels > 1u ) @@ -435,7 +471,7 @@ namespace castor3d for ( uint32_t level = maxLevels - 1u; level > 0; --level ) { - doBarriers( context, commandBuffer, 1 ); + doBarriers( context, commandBuffer, index, 1 ); m_context.vkCmdPushConstants( commandBuffer, m_top.pipeline.getPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0u, 4u, &level ); uint32_t numChildNodes = FrustumClusters::getNumLevelNodes( level ); numThreadGroups = uint32_t( std::ceil( float( numChildNodes ) / float( NumThreads ) ) ); @@ -443,11 +479,12 @@ namespace castor3d } } - doBarriers( context, commandBuffer, 2 ); + doBarriers( context, commandBuffer, index, 2 ); } void doBarriers( crg::RecordContext & context , VkCommandBuffer commandBuffer + , uint32_t passIndex , int idx ) { for ( auto & attach : m_pass.buffers ) @@ -458,10 +495,10 @@ namespace castor3d && attach.isStorageBuffer() && attach.isClearableBuffer() ) { - auto currentState = context.getAccessState( buffer.buffer.buffer + auto currentState = context.getAccessState( buffer.buffer.buffer( passIndex ) , buffer.range ); context.memoryBarrier( commandBuffer - , buffer.buffer.buffer + , buffer.buffer.buffer( passIndex ) , buffer.range , currentState.access , currentState.pipelineStage @@ -492,7 +529,7 @@ namespace castor3d //********************************************************************************************* crg::FramePass const & createBuildLightsBVHPass( crg::FramePassGroup & graph - , crg::FramePass const * previousPass + , crg::FramePassArray const & previousPasses , RenderDevice const & device , CameraUbo const & cameraUbo , FrustumClusters & clusters ) @@ -511,13 +548,18 @@ namespace castor3d , result->getTimer() ); return result; } ); - pass.addDependency( *previousPass ); + pass.addDependencies( previousPasses ); cameraUbo.createPassBinding( pass, lgtbvh::eCamera ); auto & lights = clusters.getCamera().getScene()->getLightCache(); lights.createPassBinding( pass, lgtbvh::eLights ); clusters.getClustersUbo().createPassBinding( pass, lgtbvh::eClusters ); +#if C3D_DebugSortLightsMortonCode + createInputStoragePassBinding( pass, uint32_t( lgtbvh::ePointLightIndices ), "C3D_PointLightIndices", { &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() }, 0u, ashes::WholeSize ); + createInputStoragePassBinding( pass, uint32_t( lgtbvh::eSpotLightIndices ), "C3D_SpotLightIndices", { &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() }, 0u, ashes::WholeSize ); +#else createInputStoragePassBinding( pass, uint32_t( lgtbvh::ePointLightIndices ), "C3D_PointLightIndices", clusters.getInputPointLightIndicesBuffer(), 0u, ashes::WholeSize ); createInputStoragePassBinding( pass, uint32_t( lgtbvh::eSpotLightIndices ), "C3D_SpotLightIndices", clusters.getInputSpotLightIndicesBuffer(), 0u, ashes::WholeSize ); +#endif createClearableOutputStorageBinding( pass, uint32_t( lgtbvh::ePointLightBVH ), "C3D_PointLightBVH", clusters.getPointLightBVHBuffer(), 0u, ashes::WholeSize ); createClearableOutputStorageBinding( pass, uint32_t( lgtbvh::eSpotLightBVH ), "C3D_SpotLightBVH", clusters.getSpotLightBVHBuffer(), 0u, ashes::WholeSize ); return pass; diff --git a/source/Core/Castor3D/Render/Clustered/ComputeClustersAABB.cpp b/source/Core/Castor3D/Render/Clustered/ComputeClustersAABB.cpp index afc2e08506..1db32ca1bf 100644 --- a/source/Core/Castor3D/Render/Clustered/ComputeClustersAABB.cpp +++ b/source/Core/Castor3D/Render/Clustered/ComputeClustersAABB.cpp @@ -186,10 +186,10 @@ namespace castor3d && attach.isStorageBuffer() && attach.isClearableBuffer() ) { - auto currentState = context.getAccessState( buffer.buffer.buffer + auto currentState = context.getAccessState( buffer.buffer.buffer( index ) , buffer.range ); context.memoryBarrier( commandBuffer - , buffer.buffer.buffer + , buffer.buffer.buffer( index ) , buffer.range , currentState.access , currentState.pipelineStage diff --git a/source/Core/Castor3D/Render/Clustered/ComputeLightsMortonCode.cpp b/source/Core/Castor3D/Render/Clustered/ComputeLightsMortonCode.cpp index 46e83a1e9b..e5d06af710 100644 --- a/source/Core/Castor3D/Render/Clustered/ComputeLightsMortonCode.cpp +++ b/source/Core/Castor3D/Render/Clustered/ComputeLightsMortonCode.cpp @@ -1,5 +1,6 @@ #include "Castor3D/Render/Clustered/ComputeLightsMortonCode.hpp" +#include "Castor3D/DebugDefines.hpp" #include "Castor3D/Engine.hpp" #include "Castor3D/Cache/LightCache.hpp" #include "Castor3D/Render/RenderDevice.hpp" @@ -107,7 +108,7 @@ namespace castor3d bitShift += 2u; } - return mortonCode; + writer.returnStmt( mortonCode ); } , sdw::InUVec3{ writer, "quantizedCoord" } ); @@ -175,11 +176,22 @@ namespace castor3d using ShaderHolder = DataHolderT< ShaderModule >; using CreateInfoHolder = DataHolderT< ashes::PipelineShaderStageCreateInfoArray >; + void doInitClustersBuffersIndices()const + { +#if C3D_DebugSortLightsMortonCode + m_clusters.initPointLightMortonIndicesIO(); + m_clusters.initSpotLightMortonIndicesIO(); + m_clusters.swapPointLightMortonIndicesIO(); + m_clusters.swapSpotLightMortonIndicesIO(); +#endif + } + public: FramePass( crg::FramePass const & framePass , crg::GraphContext & context , crg::RunnableGraph & graph , RenderDevice const & device + , FrustumClusters & clusters , crg::cp::Config config ) : ShaderHolder{ ShaderModule{ VK_SHADER_STAGE_COMPUTE_BIT, "ComputeLightsMortonCode", createShader() } } , CreateInfoHolder{ ashes::PipelineShaderStageCreateInfoArray{ makeShaderState( device, ShaderHolder::getData() ) } } @@ -188,9 +200,13 @@ namespace castor3d , graph , crg::ru::Config{} , config + .getPassIndex( GetPassIndexCallback{ [this]() { doInitClustersBuffersIndices(); return 0u; } } ) + .initialise( InitialiseCallback{ [this]( uint32_t idx ) { doInitClustersBuffersIndices(); } } ) .program( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( CreateInfoHolder::getData() ) ) .end( RecordCallback{ [this]( crg::RecordContext & ctx, VkCommandBuffer cb, uint32_t idx ) { doPostRecord( ctx, cb, idx ); } } ) } + , m_clusters{ clusters } { + doInitClustersBuffersIndices(); } private: @@ -206,17 +222,21 @@ namespace castor3d && attach.isStorageBuffer() && attach.isClearableBuffer() ) { - auto currentState = context.getAccessState( buffer.buffer.buffer + auto currentState = context.getAccessState( buffer.buffer.buffer( index ) , buffer.range ); context.memoryBarrier( commandBuffer - , buffer.buffer.buffer + , buffer.buffer.buffer( index ) , buffer.range , currentState.access , currentState.pipelineStage , { VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT } ); } } + + doInitClustersBuffersIndices(); } + + FrustumClusters & m_clusters; }; } @@ -238,6 +258,7 @@ namespace castor3d , context , graph , device + , clusters , crg::cp::Config{} .groupCountX( numThreadGroups ) .enabled( &clusters.needsClustersUpdate() ) ); @@ -251,11 +272,18 @@ namespace castor3d lights.createPassBinding( pass, cmpmrt::eLights ); clusters.getClustersUbo().createPassBinding( pass, cmpmrt::eClusters ); createInputStoragePassBinding( pass, uint32_t( cmpmrt::eLightsAABB ), "C3D_LightsAABB", clusters.getLightsAABBBuffer(), 0u, ashes::WholeSize ); +#if C3D_DebugSortLightsMortonCode + clusters.initPointLightMortonIndicesIO(); + clusters.initSpotLightMortonIndicesIO(); +#endif createClearableOutputStorageBinding( pass, uint32_t( cmpmrt::ePointLightMortonCodes ), "C3D_PointLightMortonCodes", clusters.getOutputPointLightMortonCodesBuffer(), 0u, ashes::WholeSize ); createClearableOutputStorageBinding( pass, uint32_t( cmpmrt::eSpotLightMortonCodes ), "C3D_SpotLightMortonCodes", clusters.getOutputSpotLightMortonCodesBuffer(), 0u, ashes::WholeSize ); createClearableOutputStorageBinding( pass, uint32_t( cmpmrt::ePointLightIndices ), "C3D_PointLightIndices", clusters.getOutputPointLightIndicesBuffer(), 0u, ashes::WholeSize ); createClearableOutputStorageBinding( pass, uint32_t( cmpmrt::eSpotLightIndices ), "C3D_SpotLightIndices", clusters.getOutputSpotLightIndicesBuffer(), 0u, ashes::WholeSize ); - clusters.swapLightMortonIndicesIO(); +#if C3D_DebugSortLightsMortonCode + clusters.swapPointLightMortonIndicesIO(); + clusters.swapSpotLightMortonIndicesIO(); +#endif return pass; } diff --git a/source/Core/Castor3D/Render/Clustered/FrustumClusters.cpp b/source/Core/Castor3D/Render/Clustered/FrustumClusters.cpp index b0785a357a..e78c23a43e 100644 --- a/source/Core/Castor3D/Render/Clustered/FrustumClusters.cpp +++ b/source/Core/Castor3D/Render/Clustered/FrustumClusters.cpp @@ -10,6 +10,7 @@ #include "Castor3D/Render/Clustered/ComputeClustersAABB.hpp" #include "Castor3D/Render/Clustered/ComputeLightsMortonCode.hpp" #include "Castor3D/Render/Clustered/ReduceLightsAABB.hpp" +#include "Castor3D/Render/Clustered/SortLightsMortonCode.hpp" #include "Castor3D/Scene/Camera.hpp" #include "Castor3D/Scene/Scene.hpp" #include "Castor3D/Scene/Light/DirectionalLight.hpp" @@ -118,6 +119,40 @@ namespace castor3d , "C3D_SpotLightBVH" ) } #endif { +#if C3D_DebugUseLightsBVH +# if C3D_DebugSortLightsMortonCode + static uint32_t constexpr NumThreadsPerThreadGroup = 256u; + static uint32_t constexpr ElementsPerThread = 8u; + + // The maximum number of elements that need to be sorted. + uint32_t maxElements = MaxLightsCount; + + // Radix sort will sort Morton codes (keys) into chunks of SORT_NUM_THREADS_PER_THREAD_GROUP size. + uint32_t chunkSize = NumThreadsPerThreadGroup; + // The number of chunks that need to be merge sorted after Radix sort finishes. + uint32_t numChunks = uint32_t( std::ceil( float( maxElements ) / float( chunkSize ) ) ); + // The number of sort groups that are needed to sort the first set of chunks. + // Each sort group will sort 2 chunks. So the maximum number of sort groups is 1/2 of the + // number of chunks. + uint32_t maxSortGroups = numChunks / 2u; + // The number of merge path partitions per sort group is the total values + // to be sorted per sort group (2 chunks) divided by the number of elements + // that can be sorted per thread group. One is added to account for the + // merge path partition at the END of the chunk. + uint32_t numMergePathPartitionsPerSortGroup = uint32_t( std::ceil( float( chunkSize * 2u ) / float( ElementsPerThread * NumThreadsPerThreadGroup ) ) ) + 1u; + + // The maximum number of merge path partitions is the number of merge path partitions + // needed by a single sort group multiplied by the maximum number of sort groups. + uint32_t maxMergePathPartitions = numMergePathPartitionsPerSortGroup * maxSortGroups; + + m_mergePathPartitionsBuffer = makeBufferBase( m_device + , sizeof( u32 ) * maxMergePathPartitions + , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT + , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + , "C3D_MergePathPartitions" ); +# endif +#endif + doUpdate(); } @@ -145,17 +180,21 @@ namespace castor3d , CameraUbo const & cameraUbo ) { auto & graph = parentGraph.createPassGroup( "Clusters" ); - auto lastPass = &createComputeClustersAABBPass( graph, previousPass - , m_device, cameraUbo, *this ); + crg::FramePassArray lastPasses = { &createComputeClustersAABBPass( graph, previousPass + , m_device, cameraUbo, *this ) }; #if C3D_DebugUseLightsBVH - lastPass = &createReduceLightsAABBPass( graph, lastPass - , m_device, cameraUbo, *this ); - lastPass = &createComputeLightsMortonCodePass( graph, lastPass - , m_device, cameraUbo, *this ); - lastPass = &createBuildLightsBVHPass( graph, lastPass + lastPasses = { &createReduceLightsAABBPass( graph, lastPasses.front() + , m_device, cameraUbo, *this ) }; + lastPasses = { &createComputeLightsMortonCodePass( graph, lastPasses.front() + , m_device, cameraUbo, *this ) }; +# if C3D_DebugSortLightsMortonCode + lastPasses = createSortLightsMortonCodePass( graph, lastPasses.front() , m_device, cameraUbo, *this ); +# endif + lastPasses = { &createBuildLightsBVHPass( graph, lastPasses + , m_device, cameraUbo, *this ) }; #endif - return createAssignLightsToClustersPass( graph, lastPass + return createAssignLightsToClustersPass( graph, lastPasses.front() , m_device, cameraUbo, *this ); } diff --git a/source/Core/Castor3D/Render/Clustered/ReduceLightsAABB.cpp b/source/Core/Castor3D/Render/Clustered/ReduceLightsAABB.cpp index 53c7b70987..af7e6be5cd 100644 --- a/source/Core/Castor3D/Render/Clustered/ReduceLightsAABB.cpp +++ b/source/Core/Castor3D/Render/Clustered/ReduceLightsAABB.cpp @@ -25,6 +25,8 @@ #include #include +#include + #define C3D_DebugEnableWarpOptimisation 0 namespace castor3d @@ -42,6 +44,7 @@ namespace castor3d }; static uint32_t constexpr NumThreads = 512u; + static float constexpr FltMax = std::numeric_limits< float >::max(); static ShaderPtr createShader( bool first ) { @@ -151,9 +154,9 @@ namespace castor3d auto threadIndex = in.globalInvocationID.x(); auto aabbMin = writer.declLocale( "aabbMin" - , vec4( sdw::Float{ FLT_MAX }, FLT_MAX, FLT_MAX, 1.0f ) ); + , vec4( sdw::Float{ FltMax }, FltMax, FltMax, 1.0f ) ); auto aabbMax = writer.declLocale( "aabbMax" - , vec4( sdw::Float{ -FLT_MAX }, -FLT_MAX, -FLT_MAX, 1.0f ) ); + , vec4( sdw::Float{ -FltMax }, -FltMax, -FltMax, 1.0f ) ); if ( first ) { @@ -277,9 +280,10 @@ namespace castor3d , FrustumClusters const & clusters ) : shader{ VK_SHADER_STAGE_COMPUTE_BIT, "ReduceLightsAABB" + ( first ? std::string{ "/First" } : std::string{ "/Second" } ), createShader( first ) } , createInfo{ ashes::PipelineShaderStageCreateInfoArray{ makeShaderState( device, shader ) } } - , cpConfig{ crg::defaultV< uint32_t const * > + , cpConfig{ crg::getDefaultV< InitialiseCallback >() , &clusters.needsLightsUpdate() , crg::getDefaultV< IsEnabledCallback >() + , crg::getDefaultV< GetPassIndexCallback >() , crg::getDefaultV< RecordCallback >() , crg::getDefaultV< RecordCallback >() , 1u diff --git a/source/Core/Castor3D/Render/Clustered/SortLightsMortonCode.cpp b/source/Core/Castor3D/Render/Clustered/SortLightsMortonCode.cpp new file mode 100644 index 0000000000..470c8e21d0 --- /dev/null +++ b/source/Core/Castor3D/Render/Clustered/SortLightsMortonCode.cpp @@ -0,0 +1,970 @@ +#include "Castor3D/Render/Clustered/SortLightsMortonCode.hpp" + +#include "Castor3D/Engine.hpp" +#include "Castor3D/Cache/LightCache.hpp" +#include "Castor3D/Render/RenderDevice.hpp" +#include "Castor3D/Render/RenderSystem.hpp" +#include "Castor3D/Render/Clustered/FrustumClusters.hpp" +#include "Castor3D/Scene/Camera.hpp" +#include "Castor3D/Scene/Scene.hpp" +#include "Castor3D/Scene/Light/PointLight.hpp" +#include "Castor3D/Scene/Light/SpotLight.hpp" +#include "Castor3D/Shader/Program.hpp" +#include "Castor3D/Shader/Shaders/GlslAABB.hpp" +#include "Castor3D/Shader/Shaders/GlslAppendBuffer.hpp" +#include "Castor3D/Shader/Shaders/GlslClusteredLights.hpp" +#include "Castor3D/Shader/Shaders/GlslLight.hpp" +#include "Castor3D/Shader/Shaders/GlslUtils.hpp" +#include "Castor3D/Shader/Ubos/CameraUbo.hpp" +#include "Castor3D/Shader/Ubos/ClustersUbo.hpp" + +#include + +#include + +#include +#include + +namespace castor3d +{ + //********************************************************************************************* + + namespace srtmrt + { + static uint32_t constexpr NumThreads = 256u; + static uint32_t constexpr NumThreadsPerThreadGroup = 256u; + static uint32_t constexpr NumValuesPerThread = 8u; + static uint32_t constexpr NumValuesPerThreadGroup = NumThreads * NumValuesPerThread; + + struct DispatchData + { + u32 numElements; + u32 chunkSize; + }; + + using InMortonCodesCallback = ashes::BufferBase & ( FrustumClusters::* )()const noexcept; + using OutMortonCodesCallback = ashes::BufferBase & ( FrustumClusters::* )()const noexcept; + using InIndicesCallback = ashes::BufferBase & ( FrustumClusters::* )()const noexcept; + using OutIndicesCallback = ashes::BufferBase & ( FrustumClusters::* )()const noexcept; + + struct LightData + { + LightType lightType; + std::string name; + InMortonCodesCallback m_inMortonCodes; + OutMortonCodesCallback m_outMortonCodes; + InIndicesCallback m_inIndices; + OutIndicesCallback m_outIndices; + + ashes::BufferBase & inMortonCodes( FrustumClusters & clusters ) + { + return ( clusters.*m_inMortonCodes )(); + } + + ashes::BufferBase & outMortonCodes( FrustumClusters & clusters ) + { + return ( clusters.*m_outMortonCodes )(); + } + + ashes::BufferBase & inIndices( FrustumClusters & clusters ) + { + return ( clusters.*m_inIndices )(); + } + + ashes::BufferBase & outIndices( FrustumClusters & clusters ) + { + return ( clusters.*m_outIndices )(); + } + }; + + //***************************************************************************************** + + namespace radix + { + enum BindingPoints + { + eInputKeys, + eInputValues, + eOutputKeys, + eOutputValues, + }; + + static ShaderPtr createShader() + { + sdw::ComputeWriter writer; + + auto inputKeysBuffer = writer.declStorageBuffer( "c3d_inputKeysBuffer" + , uint32_t( eInputKeys ) + , 0u ); + auto c3d_inputKeys = inputKeysBuffer.declMemberArray< sdw::UInt >( "ik" ); + inputKeysBuffer.end(); + + auto inputValuesBuffer = writer.declStorageBuffer( "c3d_inputValuesBuffer" + , uint32_t( eInputValues ) + , 0u ); + auto c3d_inputValues = inputValuesBuffer.declMemberArray< sdw::UInt >( "iv" ); + inputValuesBuffer.end(); + + auto outputKeysBuffer = writer.declStorageBuffer( "c3d_outputKeysBuffer" + , uint32_t( eOutputKeys ) + , 0u ); + auto c3d_outputKeys = outputKeysBuffer.declMemberArray< sdw::UInt >( "ok" ); + outputKeysBuffer.end(); + + auto outputValuesBuffer = writer.declStorageBuffer( "c3d_outputValuesBuffer" + , uint32_t( eOutputValues ) + , 0u ); + auto c3d_outputValues = outputValuesBuffer.declMemberArray< sdw::UInt >( "ov" ); + outputValuesBuffer.end(); + + sdw::PushConstantBuffer pcb{ writer, "C3D_DispatchData", "c3d_dispatchData" }; + auto c3d_numElements = pcb.declMember< sdw::UInt >( "c3d_numElements" ); + auto c3d_chunkSize = pcb.declMember< sdw::UInt >( "c3d_chunkSize" ); + pcb.end(); + + auto gsKeys = writer.declSharedVariable< sdw::UInt >( "gsKeys", NumThreads ); // A temporary buffer to store the input keys. (1,024 Bytes) + auto gsValues = writer.declSharedVariable< sdw::UInt >( "gsValues", NumThreads ); // A temporary buffer to store the input values. (1,024 Bytes) + auto gsE = writer.declSharedVariable< sdw::UInt >( "gsE", NumThreads ); // Set a 1 for all false sort keys (b == 0) and a 0 for all true sort keys (b == 1) (1,024 Bytes) + auto gsF = writer.declSharedVariable< sdw::UInt >( "gsF", NumThreads ); // Scan the splits. This results in the output index of all false sort keys (b == 0) (1,024 Bytes) + auto gsD = writer.declSharedVariable< sdw::UInt >( "gsD", NumThreads ); // The desination index for the ouput key and value. (1,024 Bytes) + auto gsTotalFalses = writer.declSharedVariable< sdw::UInt >( "gsTotalFalses" ); // The result of e[NUM_THREADS - 1] + f[NUM_THREADS - 1]; (4 Bytes) + + writer.implementMainT< sdw::VoidT >( NumThreads + , [&]( sdw::ComputeIn in ) + { + // The number of bits to consider sorting. + // In this case, the input keys are 30-bit morton codes. + const u32 NumBits = 30u; + + auto groupIndex = in.localInvocationIndex; + auto threadIndex = in.globalInvocationID.x(); + + // Store the input key and values into shared memory. + gsKeys[groupIndex] = writer.ternary( threadIndex < c3d_numElements, c3d_inputKeys[threadIndex], sdw::UInt{ UINT_MAX } ); + gsValues[groupIndex] = writer.ternary( threadIndex < c3d_numElements, c3d_inputValues[threadIndex], sdw::UInt{ UINT_MAX } ); + + // Loop over the bits starting at the least-significant bit. + FOR( writer, sdw::UInt, b, 0_u, b < NumBits, ++b ) + { + // 1. In a temporary buffer in shared memory, we set a 1 for all false + // sort keys (b = 0) and a 0 for all true sort keys. + gsE[groupIndex] = writer.ternary( ( ( gsKeys[groupIndex] >> b ) & 1_u ) == 0_u + , 1_u + , 0_u ); + + // Sync group shared memory writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + + IF( writer, groupIndex == 0_u ) + { + gsF[groupIndex] = 0_u; + } + ELSE + { + gsF[groupIndex] = gsE[groupIndex - 1_u]; + } + FI; + + // Sync group shared memory writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + auto temp = writer.declLocale( "temp", 0_u ); + + // 2. We then scan (prefix sum) this buffer. This is the enumerate operation; + // each false sort key now contains its destination address in the scan + // output, which we will call f. These first two steps are equivalent to + // a stream compaction operation on all false sort keys. + for ( u32 i = 1; i < NumThreads; i <<= 1u ) + { + temp = gsF[groupIndex]; + + IF( writer, groupIndex > i ) + { + temp += gsF[groupIndex - i]; + } + FI; + + // Sync group shared memory reads before writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + + gsF[groupIndex] = temp; + + // Sync group shared memory writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + } + + // 3. The last element in the scan's output now contains the total + // number of false sort keys. We write this value to a shared + // variable, gs_TotalFalses. + IF ( writer, groupIndex == 0_u ) + { + gsTotalFalses = gsE[NumThreads - 1u] + gsF[NumThreads - 1u]; + } + FI; + + // Sync group shared memory writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + + // 4. Now we compute the destination address for the true sort keys. For + // a sort key at index i, this address is t = i - f + totalFalses. We + // then select between t and f depending on the value of b to get the + // destination address d of each fragment. + gsD[groupIndex] = writer.ternary( gsE[groupIndex] == 1u + , gsF[groupIndex] + , groupIndex - gsF[groupIndex] + gsTotalFalses ); + + // 5. Finally, we scatter the original sort keys to destination address + // d. The scatter pattern is a perfect permutation of the input, so + // we see no write conflicts with this scatter. + auto key = writer.declLocale( "key", gsKeys[groupIndex] ); + auto value = writer.declLocale( "value", gsValues[groupIndex] ); + + // Sync group shared memory reads before writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + + gsKeys[gsD[groupIndex]] = key; + gsValues[gsD[groupIndex]] = value; + + // Sync group shared memory writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + } + ROF; + + // Now commit the results to global memory. + c3d_outputKeys[threadIndex] = gsKeys[groupIndex]; + c3d_outputValues[threadIndex] = gsValues[groupIndex]; + } ); + return std::make_unique< ast::Shader >( std::move( writer.getShader() ) ); + } + + class FramePass + : public crg::RunnablePass + { + public: + FramePass( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & graph + , RenderDevice const & device + , FrustumClusters & clusters + , LightData lightData ) + : crg::RunnablePass{ framePass + , context + , graph + , { [this]( uint32_t index ){ doInitialise( index ); } + , GetPipelineStateCallback( [](){ return crg::getPipelineState( VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT ); } ) + , [this]( crg::RecordContext & recContext, VkCommandBuffer cb, uint32_t i ){ doRecordInto( recContext, cb, i ); } + , GetPassIndexCallback( [](){ return 0u; } ) + , IsEnabledCallback( [this](){ return doIsEnabled(); } ) + , IsComputePassCallback( [](){ return true; } ) } + , crg::ru::Config{ 1u, true /* resettable */ } } + , m_clusters{ clusters } + , m_lightCache{ clusters.getCamera().getScene()->getLightCache() } + , m_lightData{ std::move( lightData ) } + , m_pipeline{ framePass, context, graph, device, this } + { + } + + CRG_API void resetPipeline( crg::VkPipelineShaderStageCreateInfoArray config + , uint32_t index ) + { + resetCommandBuffer( index ); + m_pipeline.pipeline.resetPipeline( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( m_pipeline.createInfo ), index ); + doCreatePipeline( index, m_pipeline ); + reRecordCurrent(); + } + + private: + struct Pipeline + { + ShaderModule shader; + ashes::PipelineShaderStageCreateInfoArray createInfo; + crg::cp::ConfigData cpConfig; + crg::PipelineHolder pipeline; + + Pipeline( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & graph + , RenderDevice const & device + , FramePass * parent ) + : shader{ VK_SHADER_STAGE_COMPUTE_BIT, "RadixSort", createShader() } + , createInfo{ ashes::PipelineShaderStageCreateInfoArray{ makeShaderState( device, shader ) } } + , cpConfig{ crg::getDefaultV< InitialiseCallback >() + , nullptr + , IsEnabledCallback( [parent]() { return parent->doIsEnabled(); } ) + , GetPassIndexCallback( []() { return 0u; } ) + , crg::getDefaultV< RecordCallback >() + , crg::getDefaultV< RecordCallback >() + , 1u + , 1u + , 1u } + , pipeline{ framePass + , context + , graph + , crg::pp::Config{} + .program( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( createInfo ) ) + .pushConstants( VkPushConstantRange{ VK_SHADER_STAGE_COMPUTE_BIT, 0u, 8u } ) + , VK_PIPELINE_BIND_POINT_COMPUTE + , 1u } + { + } + }; + + private: + FrustumClusters & m_clusters; + LightCache const & m_lightCache; + LightData m_lightData; + Pipeline m_pipeline; + + private: + void doInitialise( uint32_t index ) + { + m_pipeline.pipeline.initialise(); + doCreatePipeline( index, m_pipeline ); + } + + bool doIsEnabled()const + { + return m_clusters.needsLightsUpdate() + && m_lightCache.getLightsCount( m_lightData.lightType ) > 0; + } + + void doRecordInto( crg::RecordContext & context + , VkCommandBuffer commandBuffer + , uint32_t index ) + { + // Build bottom level of the BVH. + auto lightsCount = m_lightCache.getLightsCount( m_lightData.lightType ); + auto numThreadGroups = uint32_t( std::ceil( float( lightsCount ) / float( NumThreadsPerThreadGroup ) ) ); + DispatchData data{ lightsCount, 0u }; + m_pipeline.pipeline.recordInto( context, commandBuffer, index ); + m_context.vkCmdPushConstants( commandBuffer, m_pipeline.pipeline.getPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0u, 8u, &data ); + m_context.vkCmdDispatch( commandBuffer, numThreadGroups, 1u, 1u ); + } + + void doCreatePipeline( uint32_t index + , Pipeline & pipeline ) + { + auto & program = pipeline.pipeline.getProgram( index ); + VkComputePipelineCreateInfo createInfo{ VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO + , nullptr + , 0u + , program.front() + , pipeline.pipeline.getPipelineLayout() + , VkPipeline{} + , 0u }; + pipeline.pipeline.createPipeline( index, createInfo ); + } + }; + } + + namespace merge + { + enum BindingPoints + { + eInputKeys, + eInputValues, + eInputMergePathPartitions, + eOutputKeys, + eOutputValues, + eOutputMergePathPartitions, + }; + + static ShaderPtr createShader( bool mergePathPartitions ) + { + sdw::ComputeWriter writer; + + auto inputKeysBuffer = writer.declStorageBuffer( "c3d_inputKeysBuffer" + , uint32_t( eInputKeys ) + , 0u ); + auto c3d_inputKeys = inputKeysBuffer.declMemberArray< sdw::UInt >( "ik" ); + inputKeysBuffer.end(); + + auto inputValuesBuffer = writer.declStorageBuffer( "c3d_inputValuesBuffer" + , uint32_t( eInputValues ) + , 0u ); + auto c3d_inputValues = inputValuesBuffer.declMemberArray< sdw::UInt >( "iv" ); + inputValuesBuffer.end(); + + auto inputMergePathPartitionsBuffer = writer.declStorageBuffer( "c3d_inputMergePathPartitionsBuffer" + , uint32_t( eInputMergePathPartitions ) + , 0u ); + auto c3d_inputMergePathPartitions = inputMergePathPartitionsBuffer.declMemberArray< sdw::Int >( "ip" ); + inputMergePathPartitionsBuffer.end(); + + auto outputKeysBuffer = writer.declStorageBuffer( "c3d_outputKeysBuffer" + , uint32_t( eOutputKeys ) + , 0u ); + auto c3d_outputKeys = outputKeysBuffer.declMemberArray< sdw::UInt >( "ok" ); + outputKeysBuffer.end(); + + auto outputValuesBuffer = writer.declStorageBuffer( "c3d_outputValuesBuffer" + , uint32_t( eOutputValues ) + , 0u ); + auto c3d_outputValues = outputValuesBuffer.declMemberArray< sdw::UInt >( "ov" ); + outputValuesBuffer.end(); + + auto outputMergePathPartitionsBuffer = writer.declStorageBuffer( "c3d_outputMergePathPartitionsBuffer" + , uint32_t( eOutputMergePathPartitions ) + , 0u ); + auto c3d_outputMergePathPartitions = outputMergePathPartitionsBuffer.declMemberArray< sdw::Int >( "op" ); + outputMergePathPartitionsBuffer.end(); + + sdw::PushConstantBuffer pcb{ writer, "C3D_DispatchData", "c3d_dispatchData" }; + auto c3d_numElements = pcb.declMember< sdw::UInt >( "c3d_numElements" ); + auto c3d_chunkSize = pcb.declMember< sdw::UInt >( "c3d_chunkSize" ); + pcb.end(); + + auto gsKeys = writer.declSharedVariable< sdw::UInt >( "gsKeys", NumValuesPerThreadGroup ); // Intermediate keys. (8,192 Bytes) + auto gsValues = writer.declSharedVariable< sdw::UInt >( "gsValues", NumValuesPerThreadGroup ); // Intermediate values. (8,192 Bytes) + + /** + * MergePath is a binary search over two sorted arrays that finds the + * point in list A and list B to begin a merge operation. + * Based on: https://moderngpu.github.io/bulkinsert.html#mergepath + * Retrieved on: Aug 9, 2016. + * + * @param a0 The first element in list A. + * @param aCount The number of elements in A. + * @param b0 The first element in list B. + * @param bCount The number of elements in B. + * @param diag The cross diagonal of the merge matrix where the merge path is computed. + * @param bUseSharedMem Whether to read from shared memory or global memory. + * @return + */ + auto mergePath = writer.implementFunction< sdw::Int >( "c3d_mergePath" + , [&]( sdw::Int a0, sdw::Int aCount + , sdw::Int b0, sdw::Int bCount + , sdw::Int diag, sdw::Boolean bUseSharedMem ) + { + auto begin = writer.declLocale( "begin", max( 0_i, diag - bCount ) ); + auto end = writer.declLocale( "end", min( diag, aCount ) ); + + WHILE( writer, begin < end ) + { + // Find the mid-point to start searching from. + auto mid = writer.declLocale( "mid", ( begin + end ) >> 1_i ); + auto a = writer.declLocale( "a", writer.ternary( bUseSharedMem, gsKeys[a0 + mid], c3d_inputKeys[a0 + mid] ) ); + auto b = writer.declLocale( "b", writer.ternary( bUseSharedMem, gsKeys[b0 + diag - 1 - mid], c3d_inputKeys[b0 + diag - 1 - mid] ) ); + + IF( writer, a < b ) + { + begin = mid + 1_i; + } + ELSE + { + end = mid; + } + FI; + } + ELIHW; + + writer.returnStmt( begin ); + } + , sdw::InInt{ writer, "a0" } + , sdw::InInt{ writer, "aCount" } + , sdw::InInt{ writer, "b0" } + , sdw::InInt{ writer, "bCount" } + , sdw::InInt{ writer, "diag" } + , sdw::InBoolean{ writer, "bUseSharedMem" } ); + /** + * Perform a serial merge using shared memory. Write results to global memory. + */ + auto serialMerge = writer.implementFunction< sdw::Void >( "c3d_serialMerge" + , [&]( sdw::Int a0, sdw::Int a1 + , sdw::Int b0, sdw::Int b1 + , sdw::Int diag + , sdw::Int numValues, sdw::Int out0 ) + { + auto aKey = writer.declLocale( "aKey", gsKeys[a0] ); + auto bKey = writer.declLocale( "bKey", gsKeys[b0] ); + + auto aValue = writer.declLocale( "aValue", gsValues[a0] ); + auto bValue = writer.declLocale( "bValue", gsValues[b0] ); + + FOR( writer, sdw::Int, i, 0_i, i < int( NumValuesPerThread ) && diag + i < numValues, ++i ) + { + IF( writer, b0 >= b1 || ( a0 < a1 && aKey < bKey ) ) + { + c3d_outputKeys[out0 + diag + i] = aKey; + c3d_outputValues[out0 + diag + i] = aValue; + + ++a0; + + aKey = gsKeys[a0]; + aValue = gsValues[a0]; + } + ELSE + { + c3d_outputKeys[out0 + diag + i] = bKey; + c3d_outputValues[out0 + diag + i] = bValue; + + ++b0; + + bKey = gsKeys[b0]; + bValue = gsValues[b0]; + } + FI; + } + ROF; + } + , sdw::InInt{ writer, "a0" } + , sdw::InInt{ writer, "a1" } + , sdw::InInt{ writer, "b0" } + , sdw::InInt{ writer, "b1" } + , sdw::InInt{ writer, "diag" } + , sdw::InInt{ writer, "numValues" } + , sdw::InInt{ writer, "out0" } ); + + writer.implementMainT< sdw::VoidT >( NumThreads + , [&]( sdw::ComputeIn in ) + { + auto threadIndex = in.globalInvocationID.x(); + auto chunkSize = c3d_chunkSize; + // Number of chunks to sort. + auto numChunks = writer.declLocale( "numChunks", writer.cast< sdw::UInt >( ceil( writer.cast< sdw::Float >( c3d_numElements ) / writer.cast< sdw::Float >( chunkSize ) ) ) ); + // Num values to sort per sort group. + auto numValuesPerSortGroup = writer.declLocale( "numValuesPerSortGroup", min( chunkSize * 2_u, c3d_numElements ) ); + + if ( mergePathPartitions ) + { + // Number of sort groups needed to sort all chunks. + auto numSortGroups = writer.declLocale( "numSortGroups", numChunks / 2_u ); + + // Total number of partitions per sort group. + auto numPartitionsPerSortGroup = writer.declLocale( "numPartitionsPerSortGroup", writer.cast< sdw::UInt >( ceil( writer.cast< sdw::Float >( numValuesPerSortGroup ) / float( NumValuesPerThreadGroup ) ) ) + 1_u ); + // The sort group this thread is operating on. + auto sortGroup = writer.declLocale( "sortGroup", threadIndex / numPartitionsPerSortGroup ); + // The partition this thread is computing within the sort group. + auto partitionInSortGroup = writer.declLocale( "partitionInSortGroup", threadIndex % numPartitionsPerSortGroup ); + + // The partition across all sort groups. + auto globalPartition = writer.declLocale( "globalPartition", ( sortGroup * numPartitionsPerSortGroup ) + partitionInSortGroup ); + // Compute the maximum number of partitions to compute. + auto maxPartitions = writer.declLocale( "maxPartitions", numSortGroups * numPartitionsPerSortGroup ); + + IF( writer, globalPartition < maxPartitions ) + { + auto a0 = writer.declLocale( "a0", writer.cast< sdw::Int >( sortGroup * numValuesPerSortGroup ) ); + auto a1 = writer.declLocale( "a1", min( a0 + writer.cast< sdw::Int >( chunkSize ), writer.cast< sdw::Int >( c3d_numElements ) ) ); + auto aCount = writer.declLocale( "aCount", a1 - a0 ); + auto b0 = writer.declLocale( "b0", a1 ); + auto b1 = writer.declLocale( "b1", min( b0 + writer.cast< sdw::Int >( chunkSize ), writer.cast< sdw::Int >( c3d_numElements ) ) ); + auto bCount = writer.declLocale( "bCount", b1 - b0 ); + // Number of values to sort in this sort group. + auto numValues = writer.declLocale( "numValues", aCount + bCount ); + // The diagonal in the merge matrix of this sort group. + auto diag = writer.declLocale( "diag", min( writer.cast< sdw::Int >( partitionInSortGroup * NumValuesPerThreadGroup ), numValues ) ); + + // Find the merge path for this partition using global memory. + auto mergPath = writer.declLocale( "mergePath", mergePath( a0, aCount, b0, bCount, diag, 0_b ) ); + + // Write the merge path to global memory. + c3d_outputMergePathPartitions[globalPartition] = mergPath; + } + FI; + } + else + { + auto groupID = in.workGroupID.x(); + auto groupIndex = in.localInvocationIndex; + + // Number of sort groups needed to sort all chunks. + auto numSortGroups = writer.declLocale( "numSortGroups", max( numChunks / 2_u, 1_u ) ); + + // Compute the number of thread groups required to sort a single sort group. + auto numThreadGroupsPerSortGroup = writer.declLocale( "numThreadGroupsPerSortGroup", writer.cast< sdw::UInt >( ceil( writer.cast< sdw::Float >( numValuesPerSortGroup ) / float( NumValuesPerThreadGroup ) ) ) ); + // The number of partitions per sort group. + // We add 1 to account for the merge path partition at the end of the sort group. + auto numPartitionsPerSortGroup = writer.declLocale( "numPartitionsPerSortGroup", numThreadGroupsPerSortGroup + 1_u ); + + // Compute the sort group that this thread is operating on. + auto sortGroup = writer.declLocale( "sortGroup", groupID / numThreadGroupsPerSortGroup ); + // The merge path partition within the sort group. + auto partition = writer.declLocale( "partition", groupID % numThreadGroupsPerSortGroup ); + + auto globalPartition = writer.declLocale( "globalPartition", ( sortGroup * numPartitionsPerSortGroup ) + partition ); + + // Load the keys into shared memory based on the mergepath for this thread group. + auto mergePath0 = writer.declLocale( "mergePath0", c3d_inputMergePathPartitions[globalPartition] ); + auto mergePath1 = writer.declLocale( "mergePath1", c3d_inputMergePathPartitions[globalPartition + 1_u] ); + auto diag0 = writer.declLocale( "diag0", writer.cast< sdw::Int >( min( partition * NumValuesPerThreadGroup, numValuesPerSortGroup ) ) ); + auto diag1 = writer.declLocale( "diag1", writer.cast< sdw::Int >( min( ( partition + 1_u ) * NumValuesPerThreadGroup, numValuesPerSortGroup ) ) ); + + // Compute the chunk ranges in the input set. + auto chunkOffsetA0 = writer.declLocale( "chunkOffsetA0", writer.cast< sdw::Int >( min( sortGroup * numValuesPerSortGroup, c3d_numElements ) ) ); + auto chunkOffsetA1 = writer.declLocale( "chunkOffsetA1", min( chunkOffsetA0 + writer.cast< sdw::Int >( chunkSize ), writer.cast< sdw::Int >( c3d_numElements ) ) ); + auto chunkSizeA = writer.declLocale( "chunkSizeA", chunkOffsetA1 - chunkOffsetA0 ); + + auto chunkOffsetB0 = writer.declLocale( "chunkOffsetB0", chunkOffsetA1 ); + auto chunkOffsetB1 = writer.declLocale( "chunkOffsetB1", min( chunkOffsetB0 + writer.cast< sdw::Int >( chunkSize ), writer.cast< sdw::Int >( c3d_numElements ) ) ); + auto chunkSizeB = writer.declLocale( "chunkSizeB", chunkOffsetB1 - chunkOffsetB0 ); + + // The total number of values to be sorted. + auto numValues = writer.declLocale( "numValues", chunkSizeA + chunkSizeB ); + + auto a0 = writer.declLocale( "a0", mergePath0 ); + auto a1 = writer.declLocale( "a1", mergePath1 ); + auto numA = writer.declLocale( "numA", min( a1 - a0, chunkSizeA ) ); + + auto b0 = writer.declLocale( "b0", diag0 - mergePath0 ); + auto b1 = writer.declLocale( "b1", diag1 - mergePath1 ); + auto numB = writer.declLocale( "numB", min( b1 - b0, chunkSizeB ) ); + + // Compute the diagonal for this thread within the threadgroup. + auto diag = writer.declLocale( "diag", writer.cast< sdw::Int >( groupIndex * NumValuesPerThread ) ); + + auto a = writer.declLocale( "a", 0_i ); + auto b = writer.declLocale( "b", 0_i ); + auto key = writer.declLocale( "key", 0_u ); + auto value = writer.declLocale( "value", 0_u ); + + // Load the keys and values into shared memory. + for ( s32 i = 0; i < s32( NumValuesPerThread ); ++i ) + { + a = a0 + diag + i; + b = b0 + ( a - a1 ); + + IF ( writer, a < a1 ) + { + key = c3d_inputKeys[chunkOffsetA0 + a]; + value = c3d_inputValues[chunkOffsetA0 + a]; + } + ELSE + { + key = c3d_inputKeys[chunkOffsetB0 + b]; + value = c3d_inputValues[chunkOffsetB0 + b]; + } + FI; + + gsKeys[diag + i] = key; + gsValues[diag + i] = value; + } + + // Sync loading of keys/values in shared memory. + shader::groupMemoryBarrierWithGroupSync( writer ); + + // Compute the mergepath for this thread using shared memory. + auto mergPath = writer.declLocale( "mergePath", mergePath( 0_i, numA, numA, numB, diag, 1_b ) ); + + // Perform the serial merge using shared memory. + serialMerge( mergPath, numA, numA + diag - mergPath, numA + numB, diag0 + diag, numValues, chunkOffsetA0 ); + } + } ); + return std::make_unique< ast::Shader >( std::move( writer.getShader() ) ); + } + + class FramePass + : public crg::RunnablePass + { + public: + FramePass( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & graph + , RenderDevice const & device + , FrustumClusters & clusters + , LightData lightData ) + : crg::RunnablePass{ framePass + , context + , graph + , { [this]( uint32_t index ){ doInitialise( index ); } + , GetPipelineStateCallback( [](){ return crg::getPipelineState( VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT ); } ) + , [this]( crg::RecordContext & recContext, VkCommandBuffer cb, uint32_t i ){ doRecordInto( recContext, cb, i ); } + , crg::getDefaultV< GetPassIndexCallback >() + , IsEnabledCallback( [this](){ return doIsEnabled(); } ) + , IsComputePassCallback( [](){ return true; } ) } + , crg::ru::Config{ 1u, true /* resettable */ } } + , m_clusters{ clusters } + , m_lightCache{ clusters.getCamera().getScene()->getLightCache() } + , m_lightData{ std::move( lightData ) } + , m_partitions{ framePass, context, graph, device, true, this } + , m_merge{ framePass, context, graph, device, false, this } + { + } + + CRG_API void resetPipeline( crg::VkPipelineShaderStageCreateInfoArray config + , uint32_t index ) + { + resetCommandBuffer( index ); + m_partitions.pipeline.resetPipeline( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( m_partitions.createInfo ), index ); + m_merge.pipeline.resetPipeline( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( m_merge.createInfo ), index ); + doCreatePipeline( index, m_partitions ); + doCreatePipeline( index, m_merge ); + reRecordCurrent(); + } + + private: + struct Pipeline + { + ShaderModule shader; + ashes::PipelineShaderStageCreateInfoArray createInfo; + crg::cp::ConfigData cpConfig; + crg::PipelineHolder pipeline; + + Pipeline( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & graph + , RenderDevice const & device + , bool mergePathPartitions + , FramePass * parent ) + : shader{ VK_SHADER_STAGE_COMPUTE_BIT, mergePathPartitions ? std::string{ "MergePathPartitions" } : std::string{ "MergeSort" }, createShader( mergePathPartitions ) } + , createInfo{ ashes::PipelineShaderStageCreateInfoArray{ makeShaderState( device, shader ) } } + , cpConfig{ crg::getDefaultV< InitialiseCallback >() + , nullptr + , IsEnabledCallback( [parent](){ return parent->doIsEnabled();} ) + , crg::getDefaultV< GetPassIndexCallback >() + , crg::getDefaultV< RecordCallback >() + , crg::getDefaultV< RecordCallback >() + , 1u + , 1u + , 1u } + , pipeline{ framePass + , context + , graph + , crg::pp::Config{} + .program( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( createInfo ) ) + .pushConstants( VkPushConstantRange{ VK_SHADER_STAGE_COMPUTE_BIT, 0u, 8u } ) + , VK_PIPELINE_BIND_POINT_COMPUTE + , 2u } + { + } + }; + + private: + FrustumClusters & m_clusters; + LightCache const & m_lightCache; + LightData m_lightData; + Pipeline m_partitions; + Pipeline m_merge; + + private: + void doInitialise( uint32_t index ) + { + m_partitions.pipeline.initialise(); + m_merge.pipeline.initialise(); + doCreatePipeline( index, m_partitions ); + doCreatePipeline( index, m_merge ); + } + + bool doIsEnabled()const + { + return m_clusters.needsLightsUpdate() + && m_lightCache.getLightsCount( m_lightData.lightType ) > 0; + } + + void doRecordInto( crg::RecordContext & context + , VkCommandBuffer commandBuffer + , uint32_t index ) + { + auto totalValues = m_lightCache.getLightsCount( m_lightData.lightType ); + auto chunkSize = NumThreadsPerThreadGroup; + + // The total number of complete chunks to sort. + auto numChunks = getLightsMortonCodeChunkCount( totalValues ); + DispatchData data{ totalValues, 0u }; + + while ( numChunks > 1u ) + { + data.chunkSize = chunkSize; + + // Number of sort groups required to sort all chunks. + // Each sort group merge sorts 2 chunks into a single chunk. + auto numSortGroups = numChunks / 2u; + + // Compute merge path partitions per thread group. + { + m_partitions.pipeline.recordInto( context, commandBuffer, index ); + + // The number of thread groups that are required per sort group. + auto numThreadGroupsPerSortGroup = uint32_t( std::ceil( float( chunkSize * 2u ) / float( NumValuesPerThreadGroup ) ) ); + + // The number of merge path partitions that need to be computed. + auto numMergePathPartitionsPerSortGroup = numThreadGroupsPerSortGroup + 1u; + auto totalMergePathPartitions = numMergePathPartitionsPerSortGroup * numSortGroups; + + // The number of thread groups needed to compute all merge path partitions. + auto numThreadGroups = uint32_t( std::ceil( float( totalMergePathPartitions ) / float( NumThreadsPerThreadGroup ) ) ); + + m_context.vkCmdPushConstants( commandBuffer, m_partitions.pipeline.getPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0u, 8u, &data ); + m_context.vkCmdDispatch( commandBuffer, numThreadGroups, 1u, 1u ); + + // Add an explicit barrier for MergePathPartitions. + // This is required since the MergePathPartitions structured buffer is being used + // as a UAV in the MergePathPartions compute shader and as an SRV in the MergeSort + // compute shader. Because the MergePathPartions argument is not rebound between + // dispatches, no implicit UAV barrier will be added to the command list and MergeSort + // will likely not see the correct merge path partitions. + // To resolve this, an explicit UAV barrier is added for the resource. + doBarriers( context, commandBuffer, index ); + } + + // Perform merge sort using merge path partitions computed from the previous step. + { + m_merge.pipeline.recordInto( context, commandBuffer, index ); + + // The number of values that each sort group will sort. + // Each sort group merges 2 chunks into 1. + auto numValuesPerSortGroup = std::min( chunkSize * 2u, totalValues ); + + // The number of thread groups required to sort all values. + auto numThreadGroupsPerSortGroup = uint32_t( std::ceil( float( numValuesPerSortGroup ) / float( NumValuesPerThreadGroup ) ) ); + + m_context.vkCmdPushConstants( commandBuffer, m_merge.pipeline.getPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0u, 8u, &data ); + m_context.vkCmdDispatch( commandBuffer, numThreadGroupsPerSortGroup * numSortGroups, 1u, 1u ); + } + + // Ping-pong the buffers + index = 1u - index; + + chunkSize *= 2; + numChunks = uint32_t( std::ceil( float( totalValues ) / float( chunkSize ) ) ); + } + } + + void doBarriers( crg::RecordContext & context + , VkCommandBuffer commandBuffer + , uint32_t passIndex ) + { + for ( auto & attach : m_pass.buffers ) + { + auto buffer = attach.buffer; + + if ( !attach.isNoTransition() + && attach.isStorageBuffer() + && attach.isClearableBuffer() ) + { + auto currentState = context.getAccessState( buffer.buffer.buffer( passIndex ) + , buffer.range ); + context.memoryBarrier( commandBuffer + , buffer.buffer.buffer( passIndex ) + , buffer.range + , currentState.access + , currentState.pipelineStage + , crg::AccessState{ VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT } + , true ); + } + } + } + + void doCreatePipeline( uint32_t index + , Pipeline & pipeline ) + { + auto & program = pipeline.pipeline.getProgram( index ); + VkComputePipelineCreateInfo createInfo{ VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO + , nullptr + , 0u + , program.front() + , pipeline.pipeline.getPipelineLayout() + , VkPipeline{} + , 0u }; + pipeline.pipeline.createPipeline( index, createInfo ); + } + }; + } + + //********************************************************************************************* + + static crg::FramePass const & createSortLightMortonCodesPasses( crg::FramePassGroup & graph + , crg::FramePass const * previousPass + , RenderDevice const & device + , CameraUbo const & cameraUbo + , FrustumClusters & clusters + , LightData lightData ) + { + // Create Radix sort pass + auto & radix = graph.createPass( "RadixSort" + lightData.name + , [&clusters, &device, lightData]( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & graph ) + { + auto result = std::make_unique< radix::FramePass >( framePass + , context + , graph + , device + , clusters + , lightData ); + device.renderSystem.getEngine()->registerTimer( framePass.getFullName() + , result->getTimer() ); + return result; + } ); + radix.addDependency( *previousPass ); + createInputStoragePassBinding( radix, uint32_t( radix::eInputKeys ), "C3D_In" + lightData.name + "LightMortonCodes", lightData.inMortonCodes( clusters ), 0u, ashes::WholeSize ); + createInputStoragePassBinding( radix, uint32_t( radix::eInputValues ), "C3D_In" + lightData.name + "LightIndices", lightData.inIndices( clusters ), 0u, ashes::WholeSize ); + createClearableOutputStorageBinding( radix, uint32_t( radix::eOutputKeys ), "C3D_Out" + lightData.name + "LightMortonCodes", lightData.outMortonCodes( clusters ), 0u, ashes::WholeSize ); + createClearableOutputStorageBinding( radix, uint32_t( radix::eOutputValues ), "C3D_Out" + lightData.name + "LightIndices", lightData.outIndices( clusters ), 0u, ashes::WholeSize ); + + // Create Merge sort pass + auto & mergeSort = graph.createPass( "MergeSort" + lightData.name + , [&clusters, &device, lightData]( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & graph ) + { + auto result = std::make_unique< merge::FramePass >( framePass + , context + , graph + , device + , clusters + , lightData ); + device.renderSystem.getEngine()->registerTimer( framePass.getFullName() + , result->getTimer() ); + return result; + } ); + mergeSort.addDependency( radix ); + createInOutStoragePassBinding( mergeSort, uint32_t( merge::eInputKeys ), "C3D_In" + lightData.name + "MortonCodes", { &lightData.outMortonCodes( clusters ), &lightData.inMortonCodes( clusters ) }, 0u, ashes::WholeSize ); + createInOutStoragePassBinding( mergeSort, uint32_t( merge::eInputValues ), "C3D_In" + lightData.name + "LightIndices", { &lightData.outIndices( clusters ), &lightData.inIndices( clusters ) }, 0u, ashes::WholeSize ); + createInputStoragePassBinding( mergeSort, uint32_t( merge::eInputMergePathPartitions ), "C3D_In" + lightData.name + "MergePathPartitions", clusters.getMergePathPartitionsBuffer(), 0u, ashes::WholeSize ); + createInOutStoragePassBinding( mergeSort, uint32_t( merge::eOutputKeys ), "C3D_Out" + lightData.name + "MortonCodes", { &lightData.inMortonCodes( clusters ), &lightData.outMortonCodes( clusters ) }, 0u, ashes::WholeSize ); + createInOutStoragePassBinding( mergeSort, uint32_t( merge::eOutputValues ), "C3D_Out" + lightData.name + "LightIndices", { &lightData.inIndices( clusters ), &lightData.outIndices( clusters ) }, 0u, ashes::WholeSize ); + createClearableOutputStorageBinding( mergeSort, uint32_t( merge::eOutputMergePathPartitions ), "C3D_Out" + lightData.name + "MergePathPartitions", clusters.getMergePathPartitionsBuffer(), 0u, ashes::WholeSize ); + + return mergeSort; + } + } + + //********************************************************************************************* + + u32 getLightsMortonCodeChunkCount( u32 lightCount ) + { + auto chunkSize = srtmrt::NumThreadsPerThreadGroup; + + // The total number of complete chunks to sort. + auto numChunks = uint32_t( std::ceil( float( lightCount ) / float( chunkSize ) ) ); + + return numChunks; + } + + crg::FramePassArray createSortLightsMortonCodePass( crg::FramePassGroup & graph + , crg::FramePass const * previousPass + , RenderDevice const & device + , CameraUbo const & cameraUbo + , FrustumClusters & clusters ) + { + // Radix sort + auto & point = srtmrt::createSortLightMortonCodesPasses( graph + , previousPass + , device + , cameraUbo + , clusters + , { LightType::ePoint + , "Point" + , &FrustumClusters::getInputPointLightMortonCodesBuffer + , & FrustumClusters::getOutputPointLightMortonCodesBuffer + , &FrustumClusters::getInputPointLightIndicesBuffer + , &FrustumClusters::getOutputPointLightIndicesBuffer } ); + + // Merge sort + auto & spot = srtmrt::createSortLightMortonCodesPasses( graph + , previousPass + , device + , cameraUbo + , clusters + , { LightType::eSpot + , "Spot" + , &FrustumClusters::getInputSpotLightMortonCodesBuffer + , & FrustumClusters::getOutputSpotLightMortonCodesBuffer + , &FrustumClusters::getInputSpotLightIndicesBuffer + , &FrustumClusters::getOutputSpotLightIndicesBuffer } ); + + return { &point, &spot }; + } + + //********************************************************************************************* +} diff --git a/source/Core/Castor3D/Render/GlobalIllumination/VoxelConeTracing/Voxelizer.cpp b/source/Core/Castor3D/Render/GlobalIllumination/VoxelConeTracing/Voxelizer.cpp index 3bdce89ee6..fec3661527 100644 --- a/source/Core/Castor3D/Render/GlobalIllumination/VoxelConeTracing/Voxelizer.cpp +++ b/source/Core/Castor3D/Render/GlobalIllumination/VoxelConeTracing/Voxelizer.cpp @@ -102,7 +102,7 @@ namespace castor3d for ( auto & attach : m_pass.buffers ) { m_context.vkCmdFillBuffer( commandBuffer - , attach.buffer.buffer.buffer + , attach.buffer.buffer.buffer( index ) , attach.buffer.range.offset , attach.buffer.range.size , 0u ); diff --git a/source/Core/Castor3D/Render/Opaque/Lighting/ClusteredLightsPipeline.cpp b/source/Core/Castor3D/Render/Opaque/Lighting/ClusteredLightsPipeline.cpp index 85ba7e0897..c0e509044c 100644 --- a/source/Core/Castor3D/Render/Opaque/Lighting/ClusteredLightsPipeline.cpp +++ b/source/Core/Castor3D/Render/Opaque/Lighting/ClusteredLightsPipeline.cpp @@ -168,7 +168,7 @@ namespace castor3d if ( attach.view.size() == 1u ) { auto view = attach.view.front(); - context.setLayoutState( view + context.setLayoutState( resolveView( view, passIndex ) , { attach.output , crg::getAccessMask( attach.output ) , crg::getStageMask( attach.output ) } ); diff --git a/source/Core/Castor3D/Render/Opaque/Lighting/LightsPipeline.cpp b/source/Core/Castor3D/Render/Opaque/Lighting/LightsPipeline.cpp index f8f7bd4ed3..7304bd6df3 100644 --- a/source/Core/Castor3D/Render/Opaque/Lighting/LightsPipeline.cpp +++ b/source/Core/Castor3D/Render/Opaque/Lighting/LightsPipeline.cpp @@ -445,7 +445,7 @@ namespace castor3d if ( attach.view.size() == 1u ) { auto view = attach.view.front(); - context.setLayoutState( view + context.setLayoutState( resolveView( view, passIndex ) , { attach.output , crg::getAccessMask( attach.output ) , crg::getStageMask( attach.output ) } ); diff --git a/source/Core/Castor3D/Render/Opaque/VisibilityReorderPass.cpp b/source/Core/Castor3D/Render/Opaque/VisibilityReorderPass.cpp index 3c4330ceae..7376c0fc73 100644 --- a/source/Core/Castor3D/Render/Opaque/VisibilityReorderPass.cpp +++ b/source/Core/Castor3D/Render/Opaque/VisibilityReorderPass.cpp @@ -354,7 +354,7 @@ namespace castor3d { auto buffer = attach.buffer.buffer; m_context.vkCmdFillBuffer( commandBuffer - , buffer.buffer + , buffer.buffer( index ) , 0u , ashes::WholeSize , 0u ); diff --git a/source/Core/Castor3D/Render/RenderTarget.cpp b/source/Core/Castor3D/Render/RenderTarget.cpp index 0eb1c743c4..12b7128741 100644 --- a/source/Core/Castor3D/Render/RenderTarget.cpp +++ b/source/Core/Castor3D/Render/RenderTarget.cpp @@ -565,6 +565,7 @@ namespace castor3d doCleanupCombineProgram(); m_culler.reset(); m_hdrConfigUbo.reset(); + m_frustumClusters.reset(); } void RenderTarget::update( CpuUpdater & updater ) diff --git a/source/Core/Castor3D/Scene/Light/SpotLight.cpp b/source/Core/Castor3D/Scene/Light/SpotLight.cpp index 77455aff25..7f339edc35 100644 --- a/source/Core/Castor3D/Scene/Light/SpotLight.cpp +++ b/source/Core/Castor3D/Scene/Light/SpotLight.cpp @@ -26,7 +26,7 @@ namespace castor3d castor::Point3f min{ points[0] }; castor::Point3f max{ points[0] }; - for ( auto & cur : castor::makeArrayView( &points[1], points.size() - 1u ) ) + for ( auto & cur : castor::makeArrayView( &points[1], uint64_t( points.size() - 1u ) ) ) { max[0] = std::max( cur[0], max[0] ); max[1] = std::max( cur[1], max[1] ); diff --git a/source/Core/CastorUtils/CastorUtilsPrerequisites.cpp b/source/Core/CastorUtils/CastorUtilsPrerequisites.cpp index a5cd31fffd..41c5d1c60c 100644 --- a/source/Core/CastorUtils/CastorUtilsPrerequisites.cpp +++ b/source/Core/CastorUtils/CastorUtilsPrerequisites.cpp @@ -82,7 +82,6 @@ namespace castor Logger::logError( description ); } - [[ noreturn ]] void cuFailure( char const * const description ) { std::stringstream stream; diff --git a/tools/CastorTestLauncher/MainFrame.cpp b/tools/CastorTestLauncher/MainFrame.cpp index 678ac7353f..7dfe3688bc 100644 --- a/tools/CastorTestLauncher/MainFrame.cpp +++ b/tools/CastorTestLauncher/MainFrame.cpp @@ -262,7 +262,7 @@ namespace test_launcher , m_engine{ engine } , m_maxFrameCount{ maxFrameCount } { - SetClientSize( FromDIP( wxSize{ 800, 600 } ) ); + SetClientSize( wxSize{ 800, 600 } ); } bool MainFrame::initialise() diff --git a/tools/CastorViewer/MainFrame.cpp b/tools/CastorViewer/MainFrame.cpp index 37764e21a9..00bc0c9923 100644 --- a/tools/CastorViewer/MainFrame.cpp +++ b/tools/CastorViewer/MainFrame.cpp @@ -265,7 +265,11 @@ namespace CastorViewer SetBackgroundColour( GuiCommon::PANEL_BACKGROUND_COLOUR ); SetForegroundColour( GuiCommon::PANEL_FOREGROUND_COLOUR ); - auto size = FromDIP( wxSize{ 800 + m_propertiesWidth, 600 + m_logsHeight } ); +#if wxCHECK_VERSION( 3, 1, 0 ) + auto size = this->FromDIP( wxSize{ 800 + m_propertiesWidth, 600 + m_logsHeight } ); +#else + auto size = wxSize{ 800 + m_propertiesWidth, 600 + m_logsHeight }; +#endif SetClientSize( size ); #if wxCHECK_VERSION( 2, 9, 0 ) SetMinClientSize( size ); @@ -690,14 +694,18 @@ namespace CastorViewer return; } - auto size = FromDIP( GuiCommon::make_wxSize( target->getSize() ) ); +#if wxCHECK_VERSION( 3, 1, 0 ) + auto size = this->FromDIP( GuiCommon::make_wxSize( target->getSize() ) ); +#else + auto size = GuiCommon::make_wxSize( target->getSize() ); +#endif if ( IsMaximized() ) { Maximize( false ); } - SetPosition( FromDIP( wxPoint{} ) ); + SetPosition( wxPoint{} ); SetClientSize( size ); #if wxCHECK_VERSION( 2, 9, 0 ) SetMinClientSize( size ); diff --git a/vcpkg.json b/vcpkg.json index 9868ae42ec..30fe633775 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -2,7 +2,7 @@ "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg/master/scripts/vcpkg.schema.json", "name": "castor3d", "version": "0.15.0", - "builtin-baseline": "6accd15d644e93cec849ea346a147828437928b3", + "builtin-baseline": "1c5a340f6e10985e2d92af174a68dbd15c1fa4e1", "dependencies": [ "convectionkernels", "freetype", @@ -15,7 +15,6 @@ "assimp", "freeimage", "glsl", - "tools", "vkfft" ], "vcpkg-configuration": {